1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Based on arch/arm/include/asm/tlbflush.h 4 * 5 * Copyright (C) 1999-2003 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 */ 8 #ifndef __ASM_TLBFLUSH_H 9 #define __ASM_TLBFLUSH_H 10 11 #ifndef __ASSEMBLY__ 12 13 #include <linux/bitfield.h> 14 #include <linux/mm_types.h> 15 #include <linux/sched.h> 16 #include <asm/cputype.h> 17 #include <asm/mmu.h> 18 19 /* 20 * Raw TLBI operations. 21 * 22 * Where necessary, use the __tlbi() macro to avoid asm() 23 * boilerplate. Drivers and most kernel code should use the TLB 24 * management routines in preference to the macro below. 25 * 26 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending 27 * on whether a particular TLBI operation takes an argument or 28 * not. The macros handles invoking the asm with or without the 29 * register argument as appropriate. 30 */ 31 #define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \ 32 ALTERNATIVE("nop\n nop", \ 33 "dsb ish\n tlbi " #op, \ 34 ARM64_WORKAROUND_REPEAT_TLBI, \ 35 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 36 : : ) 37 38 #define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ 39 ALTERNATIVE("nop\n nop", \ 40 "dsb ish\n tlbi " #op ", %0", \ 41 ARM64_WORKAROUND_REPEAT_TLBI, \ 42 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 43 : : "r" (arg)) 44 45 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) 46 47 #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) 48 49 #define __tlbi_user(op, arg) do { \ 50 if (arm64_kernel_unmapped_at_el0()) \ 51 __tlbi(op, (arg) | USER_ASID_FLAG); \ 52 } while (0) 53 54 /* This macro creates a properly formatted VA operand for the TLBI */ 55 #define __TLBI_VADDR(addr, asid) \ 56 ({ \ 57 unsigned long __ta = (addr) >> 12; \ 58 __ta &= GENMASK_ULL(43, 0); \ 59 __ta |= (unsigned long)(asid) << 48; \ 60 __ta; \ 61 }) 62 63 /* 64 * Get translation granule of the system, which is decided by 65 * PAGE_SIZE. Used by TTL. 66 * - 4KB : 1 67 * - 16KB : 2 68 * - 64KB : 3 69 */ 70 #define TLBI_TTL_TG_4K 1 71 #define TLBI_TTL_TG_16K 2 72 #define TLBI_TTL_TG_64K 3 73 74 static inline unsigned long get_trans_granule(void) 75 { 76 switch (PAGE_SIZE) { 77 case SZ_4K: 78 return TLBI_TTL_TG_4K; 79 case SZ_16K: 80 return TLBI_TTL_TG_16K; 81 case SZ_64K: 82 return TLBI_TTL_TG_64K; 83 default: 84 return 0; 85 } 86 } 87 88 /* 89 * Level-based TLBI operations. 90 * 91 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for 92 * the level at which the invalidation must take place. If the level is 93 * wrong, no invalidation may take place. In the case where the level 94 * cannot be easily determined, a 0 value for the level parameter will 95 * perform a non-hinted invalidation. 96 * 97 * For Stage-2 invalidation, use the level values provided to that effect 98 * in asm/stage2_pgtable.h. 99 */ 100 #define TLBI_TTL_MASK GENMASK_ULL(47, 44) 101 102 #define __tlbi_level(op, addr, level) do { \ 103 u64 arg = addr; \ 104 \ 105 if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ 106 level) { \ 107 u64 ttl = level & 3; \ 108 ttl |= get_trans_granule() << 2; \ 109 arg &= ~TLBI_TTL_MASK; \ 110 arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ 111 } \ 112 \ 113 __tlbi(op, arg); \ 114 } while(0) 115 116 #define __tlbi_user_level(op, arg, level) do { \ 117 if (arm64_kernel_unmapped_at_el0()) \ 118 __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ 119 } while (0) 120 121 /* 122 * This macro creates a properly formatted VA operand for the TLB RANGE. 123 * The value bit assignments are: 124 * 125 * +----------+------+-------+-------+-------+----------------------+ 126 * | ASID | TG | SCALE | NUM | TTL | BADDR | 127 * +-----------------+-------+-------+-------+----------------------+ 128 * |63 48|47 46|45 44|43 39|38 37|36 0| 129 * 130 * The address range is determined by below formula: 131 * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) 132 * 133 */ 134 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ 135 ({ \ 136 unsigned long __ta = (addr) >> PAGE_SHIFT; \ 137 __ta &= GENMASK_ULL(36, 0); \ 138 __ta |= (unsigned long)(ttl) << 37; \ 139 __ta |= (unsigned long)(num) << 39; \ 140 __ta |= (unsigned long)(scale) << 44; \ 141 __ta |= get_trans_granule() << 46; \ 142 __ta |= (unsigned long)(asid) << 48; \ 143 __ta; \ 144 }) 145 146 /* These macros are used by the TLBI RANGE feature. */ 147 #define __TLBI_RANGE_PAGES(num, scale) \ 148 ((unsigned long)((num) + 1) << (5 * (scale) + 1)) 149 #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) 150 151 /* 152 * Generate 'num' values from -1 to 30 with -1 rejected by the 153 * __flush_tlb_range() loop below. 154 */ 155 #define TLBI_RANGE_MASK GENMASK_ULL(4, 0) 156 #define __TLBI_RANGE_NUM(pages, scale) \ 157 ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) 158 159 /* 160 * TLB Invalidation 161 * ================ 162 * 163 * This header file implements the low-level TLB invalidation routines 164 * (sometimes referred to as "flushing" in the kernel) for arm64. 165 * 166 * Every invalidation operation uses the following template: 167 * 168 * DSB ISHST // Ensure prior page-table updates have completed 169 * TLBI ... // Invalidate the TLB 170 * DSB ISH // Ensure the TLB invalidation has completed 171 * if (invalidated kernel mappings) 172 * ISB // Discard any instructions fetched from the old mapping 173 * 174 * 175 * The following functions form part of the "core" TLB invalidation API, 176 * as documented in Documentation/core-api/cachetlb.rst: 177 * 178 * flush_tlb_all() 179 * Invalidate the entire TLB (kernel + user) on all CPUs 180 * 181 * flush_tlb_mm(mm) 182 * Invalidate an entire user address space on all CPUs. 183 * The 'mm' argument identifies the ASID to invalidate. 184 * 185 * flush_tlb_range(vma, start, end) 186 * Invalidate the virtual-address range '[start, end)' on all 187 * CPUs for the user address space corresponding to 'vma->mm'. 188 * Note that this operation also invalidates any walk-cache 189 * entries associated with translations for the specified address 190 * range. 191 * 192 * flush_tlb_kernel_range(start, end) 193 * Same as flush_tlb_range(..., start, end), but applies to 194 * kernel mappings rather than a particular user address space. 195 * Whilst not explicitly documented, this function is used when 196 * unmapping pages from vmalloc/io space. 197 * 198 * flush_tlb_page(vma, addr) 199 * Invalidate a single user mapping for address 'addr' in the 200 * address space corresponding to 'vma->mm'. Note that this 201 * operation only invalidates a single, last-level page-table 202 * entry and therefore does not affect any walk-caches. 203 * 204 * 205 * Next, we have some undocumented invalidation routines that you probably 206 * don't want to call unless you know what you're doing: 207 * 208 * local_flush_tlb_all() 209 * Same as flush_tlb_all(), but only applies to the calling CPU. 210 * 211 * __flush_tlb_kernel_pgtable(addr) 212 * Invalidate a single kernel mapping for address 'addr' on all 213 * CPUs, ensuring that any walk-cache entries associated with the 214 * translation are also invalidated. 215 * 216 * __flush_tlb_range(vma, start, end, stride, last_level) 217 * Invalidate the virtual-address range '[start, end)' on all 218 * CPUs for the user address space corresponding to 'vma->mm'. 219 * The invalidation operations are issued at a granularity 220 * determined by 'stride' and only affect any walk-cache entries 221 * if 'last_level' is equal to false. 222 * 223 * 224 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented 225 * on top of these routines, since that is our interface to the mmu_gather 226 * API as used by munmap() and friends. 227 */ 228 static inline void local_flush_tlb_all(void) 229 { 230 dsb(nshst); 231 __tlbi(vmalle1); 232 dsb(nsh); 233 isb(); 234 } 235 236 static inline void flush_tlb_all(void) 237 { 238 dsb(ishst); 239 __tlbi(vmalle1is); 240 dsb(ish); 241 isb(); 242 } 243 244 static inline void flush_tlb_mm(struct mm_struct *mm) 245 { 246 unsigned long asid = __TLBI_VADDR(0, ASID(mm)); 247 248 dsb(ishst); 249 __tlbi(aside1is, asid); 250 __tlbi_user(aside1is, asid); 251 dsb(ish); 252 } 253 254 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, 255 unsigned long uaddr) 256 { 257 unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); 258 259 dsb(ishst); 260 __tlbi(vale1is, addr); 261 __tlbi_user(vale1is, addr); 262 } 263 264 static inline void flush_tlb_page(struct vm_area_struct *vma, 265 unsigned long uaddr) 266 { 267 flush_tlb_page_nosync(vma, uaddr); 268 dsb(ish); 269 } 270 271 /* 272 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not 273 * necessarily a performance improvement. 274 */ 275 #define MAX_TLBI_OPS PTRS_PER_PTE 276 277 static inline void __flush_tlb_range(struct vm_area_struct *vma, 278 unsigned long start, unsigned long end, 279 unsigned long stride, bool last_level, 280 int tlb_level) 281 { 282 int num = 0; 283 int scale = 0; 284 unsigned long asid = ASID(vma->vm_mm); 285 unsigned long addr; 286 unsigned long pages; 287 288 start = round_down(start, stride); 289 end = round_up(end, stride); 290 pages = (end - start) >> PAGE_SHIFT; 291 292 /* 293 * When not uses TLB range ops, we can handle up to 294 * (MAX_TLBI_OPS - 1) pages; 295 * When uses TLB range ops, we can handle up to 296 * (MAX_TLBI_RANGE_PAGES - 1) pages. 297 */ 298 if ((!system_supports_tlb_range() && 299 (end - start) >= (MAX_TLBI_OPS * stride)) || 300 pages >= MAX_TLBI_RANGE_PAGES) { 301 flush_tlb_mm(vma->vm_mm); 302 return; 303 } 304 305 dsb(ishst); 306 307 /* 308 * When the CPU does not support TLB range operations, flush the TLB 309 * entries one by one at the granularity of 'stride'. If the the TLB 310 * range ops are supported, then: 311 * 312 * 1. If 'pages' is odd, flush the first page through non-range 313 * operations; 314 * 315 * 2. For remaining pages: the minimum range granularity is decided 316 * by 'scale', so multiple range TLBI operations may be required. 317 * Start from scale = 0, flush the corresponding number of pages 318 * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it 319 * until no pages left. 320 * 321 * Note that certain ranges can be represented by either num = 31 and 322 * scale or num = 0 and scale + 1. The loop below favours the latter 323 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. 324 */ 325 while (pages > 0) { 326 if (!system_supports_tlb_range() || 327 pages % 2 == 1) { 328 addr = __TLBI_VADDR(start, asid); 329 if (last_level) { 330 __tlbi_level(vale1is, addr, tlb_level); 331 __tlbi_user_level(vale1is, addr, tlb_level); 332 } else { 333 __tlbi_level(vae1is, addr, tlb_level); 334 __tlbi_user_level(vae1is, addr, tlb_level); 335 } 336 start += stride; 337 pages -= stride >> PAGE_SHIFT; 338 continue; 339 } 340 341 num = __TLBI_RANGE_NUM(pages, scale); 342 if (num >= 0) { 343 addr = __TLBI_VADDR_RANGE(start, asid, scale, 344 num, tlb_level); 345 if (last_level) { 346 __tlbi(rvale1is, addr); 347 __tlbi_user(rvale1is, addr); 348 } else { 349 __tlbi(rvae1is, addr); 350 __tlbi_user(rvae1is, addr); 351 } 352 start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; 353 pages -= __TLBI_RANGE_PAGES(num, scale); 354 } 355 scale++; 356 } 357 dsb(ish); 358 } 359 360 static inline void flush_tlb_range(struct vm_area_struct *vma, 361 unsigned long start, unsigned long end) 362 { 363 /* 364 * We cannot use leaf-only invalidation here, since we may be invalidating 365 * table entries as part of collapsing hugepages or moving page tables. 366 * Set the tlb_level to 0 because we can not get enough information here. 367 */ 368 __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); 369 } 370 371 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) 372 { 373 unsigned long addr; 374 375 if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { 376 flush_tlb_all(); 377 return; 378 } 379 380 start = __TLBI_VADDR(start, 0); 381 end = __TLBI_VADDR(end, 0); 382 383 dsb(ishst); 384 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) 385 __tlbi(vaale1is, addr); 386 dsb(ish); 387 isb(); 388 } 389 390 /* 391 * Used to invalidate the TLB (walk caches) corresponding to intermediate page 392 * table levels (pgd/pud/pmd). 393 */ 394 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) 395 { 396 unsigned long addr = __TLBI_VADDR(kaddr, 0); 397 398 dsb(ishst); 399 __tlbi(vaae1is, addr); 400 dsb(ish); 401 isb(); 402 } 403 #endif 404 405 #endif 406