xref: /openbmc/linux/arch/arm64/include/asm/tlbflush.h (revision 55fd7e02)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Based on arch/arm/include/asm/tlbflush.h
4  *
5  * Copyright (C) 1999-2003 Russell King
6  * Copyright (C) 2012 ARM Ltd.
7  */
8 #ifndef __ASM_TLBFLUSH_H
9 #define __ASM_TLBFLUSH_H
10 
11 #ifndef __ASSEMBLY__
12 
13 #include <linux/mm_types.h>
14 #include <linux/sched.h>
15 #include <asm/cputype.h>
16 #include <asm/mmu.h>
17 
18 /*
19  * Raw TLBI operations.
20  *
21  * Where necessary, use the __tlbi() macro to avoid asm()
22  * boilerplate. Drivers and most kernel code should use the TLB
23  * management routines in preference to the macro below.
24  *
25  * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
26  * on whether a particular TLBI operation takes an argument or
27  * not. The macros handles invoking the asm with or without the
28  * register argument as appropriate.
29  */
30 #define __TLBI_0(op, arg) asm ("tlbi " #op "\n"				       \
31 		   ALTERNATIVE("nop\n			nop",		       \
32 			       "dsb ish\n		tlbi " #op,	       \
33 			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
34 			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
35 			    : : )
36 
37 #define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n"			       \
38 		   ALTERNATIVE("nop\n			nop",		       \
39 			       "dsb ish\n		tlbi " #op ", %0",     \
40 			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
41 			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
42 			    : : "r" (arg))
43 
44 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
45 
46 #define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0)
47 
48 #define __tlbi_user(op, arg) do {						\
49 	if (arm64_kernel_unmapped_at_el0())					\
50 		__tlbi(op, (arg) | USER_ASID_FLAG);				\
51 } while (0)
52 
53 /* This macro creates a properly formatted VA operand for the TLBI */
54 #define __TLBI_VADDR(addr, asid)				\
55 	({							\
56 		unsigned long __ta = (addr) >> 12;		\
57 		__ta &= GENMASK_ULL(43, 0);			\
58 		__ta |= (unsigned long)(asid) << 48;		\
59 		__ta;						\
60 	})
61 
62 /*
63  *	TLB Invalidation
64  *	================
65  *
66  * 	This header file implements the low-level TLB invalidation routines
67  *	(sometimes referred to as "flushing" in the kernel) for arm64.
68  *
69  *	Every invalidation operation uses the following template:
70  *
71  *	DSB ISHST	// Ensure prior page-table updates have completed
72  *	TLBI ...	// Invalidate the TLB
73  *	DSB ISH		// Ensure the TLB invalidation has completed
74  *      if (invalidated kernel mappings)
75  *		ISB	// Discard any instructions fetched from the old mapping
76  *
77  *
78  *	The following functions form part of the "core" TLB invalidation API,
79  *	as documented in Documentation/core-api/cachetlb.rst:
80  *
81  *	flush_tlb_all()
82  *		Invalidate the entire TLB (kernel + user) on all CPUs
83  *
84  *	flush_tlb_mm(mm)
85  *		Invalidate an entire user address space on all CPUs.
86  *		The 'mm' argument identifies the ASID to invalidate.
87  *
88  *	flush_tlb_range(vma, start, end)
89  *		Invalidate the virtual-address range '[start, end)' on all
90  *		CPUs for the user address space corresponding to 'vma->mm'.
91  *		Note that this operation also invalidates any walk-cache
92  *		entries associated with translations for the specified address
93  *		range.
94  *
95  *	flush_tlb_kernel_range(start, end)
96  *		Same as flush_tlb_range(..., start, end), but applies to
97  * 		kernel mappings rather than a particular user address space.
98  *		Whilst not explicitly documented, this function is used when
99  *		unmapping pages from vmalloc/io space.
100  *
101  *	flush_tlb_page(vma, addr)
102  *		Invalidate a single user mapping for address 'addr' in the
103  *		address space corresponding to 'vma->mm'.  Note that this
104  *		operation only invalidates a single, last-level page-table
105  *		entry and therefore does not affect any walk-caches.
106  *
107  *
108  *	Next, we have some undocumented invalidation routines that you probably
109  *	don't want to call unless you know what you're doing:
110  *
111  *	local_flush_tlb_all()
112  *		Same as flush_tlb_all(), but only applies to the calling CPU.
113  *
114  *	__flush_tlb_kernel_pgtable(addr)
115  *		Invalidate a single kernel mapping for address 'addr' on all
116  *		CPUs, ensuring that any walk-cache entries associated with the
117  *		translation are also invalidated.
118  *
119  *	__flush_tlb_range(vma, start, end, stride, last_level)
120  *		Invalidate the virtual-address range '[start, end)' on all
121  *		CPUs for the user address space corresponding to 'vma->mm'.
122  *		The invalidation operations are issued at a granularity
123  *		determined by 'stride' and only affect any walk-cache entries
124  *		if 'last_level' is equal to false.
125  *
126  *
127  *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
128  *	on top of these routines, since that is our interface to the mmu_gather
129  *	API as used by munmap() and friends.
130  */
131 static inline void local_flush_tlb_all(void)
132 {
133 	dsb(nshst);
134 	__tlbi(vmalle1);
135 	dsb(nsh);
136 	isb();
137 }
138 
139 static inline void flush_tlb_all(void)
140 {
141 	dsb(ishst);
142 	__tlbi(vmalle1is);
143 	dsb(ish);
144 	isb();
145 }
146 
147 static inline void flush_tlb_mm(struct mm_struct *mm)
148 {
149 	unsigned long asid = __TLBI_VADDR(0, ASID(mm));
150 
151 	dsb(ishst);
152 	__tlbi(aside1is, asid);
153 	__tlbi_user(aside1is, asid);
154 	dsb(ish);
155 }
156 
157 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
158 					 unsigned long uaddr)
159 {
160 	unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
161 
162 	dsb(ishst);
163 	__tlbi(vale1is, addr);
164 	__tlbi_user(vale1is, addr);
165 }
166 
167 static inline void flush_tlb_page(struct vm_area_struct *vma,
168 				  unsigned long uaddr)
169 {
170 	flush_tlb_page_nosync(vma, uaddr);
171 	dsb(ish);
172 }
173 
174 /*
175  * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
176  * necessarily a performance improvement.
177  */
178 #define MAX_TLBI_OPS	PTRS_PER_PTE
179 
180 static inline void __flush_tlb_range(struct vm_area_struct *vma,
181 				     unsigned long start, unsigned long end,
182 				     unsigned long stride, bool last_level)
183 {
184 	unsigned long asid = ASID(vma->vm_mm);
185 	unsigned long addr;
186 
187 	start = round_down(start, stride);
188 	end = round_up(end, stride);
189 
190 	if ((end - start) >= (MAX_TLBI_OPS * stride)) {
191 		flush_tlb_mm(vma->vm_mm);
192 		return;
193 	}
194 
195 	/* Convert the stride into units of 4k */
196 	stride >>= 12;
197 
198 	start = __TLBI_VADDR(start, asid);
199 	end = __TLBI_VADDR(end, asid);
200 
201 	dsb(ishst);
202 	for (addr = start; addr < end; addr += stride) {
203 		if (last_level) {
204 			__tlbi(vale1is, addr);
205 			__tlbi_user(vale1is, addr);
206 		} else {
207 			__tlbi(vae1is, addr);
208 			__tlbi_user(vae1is, addr);
209 		}
210 	}
211 	dsb(ish);
212 }
213 
214 static inline void flush_tlb_range(struct vm_area_struct *vma,
215 				   unsigned long start, unsigned long end)
216 {
217 	/*
218 	 * We cannot use leaf-only invalidation here, since we may be invalidating
219 	 * table entries as part of collapsing hugepages or moving page tables.
220 	 */
221 	__flush_tlb_range(vma, start, end, PAGE_SIZE, false);
222 }
223 
224 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
225 {
226 	unsigned long addr;
227 
228 	if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
229 		flush_tlb_all();
230 		return;
231 	}
232 
233 	start = __TLBI_VADDR(start, 0);
234 	end = __TLBI_VADDR(end, 0);
235 
236 	dsb(ishst);
237 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
238 		__tlbi(vaale1is, addr);
239 	dsb(ish);
240 	isb();
241 }
242 
243 /*
244  * Used to invalidate the TLB (walk caches) corresponding to intermediate page
245  * table levels (pgd/pud/pmd).
246  */
247 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
248 {
249 	unsigned long addr = __TLBI_VADDR(kaddr, 0);
250 
251 	dsb(ishst);
252 	__tlbi(vaae1is, addr);
253 	dsb(ish);
254 	isb();
255 }
256 #endif
257 
258 #endif
259