1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Based on arch/arm/include/asm/tlbflush.h 4 * 5 * Copyright (C) 1999-2003 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 */ 8 #ifndef __ASM_TLBFLUSH_H 9 #define __ASM_TLBFLUSH_H 10 11 #ifndef __ASSEMBLY__ 12 13 #include <linux/bitfield.h> 14 #include <linux/mm_types.h> 15 #include <linux/sched.h> 16 #include <linux/mmu_notifier.h> 17 #include <asm/cputype.h> 18 #include <asm/mmu.h> 19 20 /* 21 * Raw TLBI operations. 22 * 23 * Where necessary, use the __tlbi() macro to avoid asm() 24 * boilerplate. Drivers and most kernel code should use the TLB 25 * management routines in preference to the macro below. 26 * 27 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending 28 * on whether a particular TLBI operation takes an argument or 29 * not. The macros handles invoking the asm with or without the 30 * register argument as appropriate. 31 */ 32 #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ 33 "tlbi " #op "\n" \ 34 ALTERNATIVE("nop\n nop", \ 35 "dsb ish\n tlbi " #op, \ 36 ARM64_WORKAROUND_REPEAT_TLBI, \ 37 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 38 : : ) 39 40 #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ 41 "tlbi " #op ", %0\n" \ 42 ALTERNATIVE("nop\n nop", \ 43 "dsb ish\n tlbi " #op ", %0", \ 44 ARM64_WORKAROUND_REPEAT_TLBI, \ 45 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 46 : : "r" (arg)) 47 48 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) 49 50 #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) 51 52 #define __tlbi_user(op, arg) do { \ 53 if (arm64_kernel_unmapped_at_el0()) \ 54 __tlbi(op, (arg) | USER_ASID_FLAG); \ 55 } while (0) 56 57 /* This macro creates a properly formatted VA operand for the TLBI */ 58 #define __TLBI_VADDR(addr, asid) \ 59 ({ \ 60 unsigned long __ta = (addr) >> 12; \ 61 __ta &= GENMASK_ULL(43, 0); \ 62 __ta |= (unsigned long)(asid) << 48; \ 63 __ta; \ 64 }) 65 66 /* 67 * Get translation granule of the system, which is decided by 68 * PAGE_SIZE. Used by TTL. 69 * - 4KB : 1 70 * - 16KB : 2 71 * - 64KB : 3 72 */ 73 #define TLBI_TTL_TG_4K 1 74 #define TLBI_TTL_TG_16K 2 75 #define TLBI_TTL_TG_64K 3 76 77 static inline unsigned long get_trans_granule(void) 78 { 79 switch (PAGE_SIZE) { 80 case SZ_4K: 81 return TLBI_TTL_TG_4K; 82 case SZ_16K: 83 return TLBI_TTL_TG_16K; 84 case SZ_64K: 85 return TLBI_TTL_TG_64K; 86 default: 87 return 0; 88 } 89 } 90 91 /* 92 * Level-based TLBI operations. 93 * 94 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for 95 * the level at which the invalidation must take place. If the level is 96 * wrong, no invalidation may take place. In the case where the level 97 * cannot be easily determined, a 0 value for the level parameter will 98 * perform a non-hinted invalidation. 99 * 100 * For Stage-2 invalidation, use the level values provided to that effect 101 * in asm/stage2_pgtable.h. 102 */ 103 #define TLBI_TTL_MASK GENMASK_ULL(47, 44) 104 105 #define __tlbi_level(op, addr, level) do { \ 106 u64 arg = addr; \ 107 \ 108 if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ 109 level) { \ 110 u64 ttl = level & 3; \ 111 ttl |= get_trans_granule() << 2; \ 112 arg &= ~TLBI_TTL_MASK; \ 113 arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ 114 } \ 115 \ 116 __tlbi(op, arg); \ 117 } while(0) 118 119 #define __tlbi_user_level(op, arg, level) do { \ 120 if (arm64_kernel_unmapped_at_el0()) \ 121 __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ 122 } while (0) 123 124 /* 125 * This macro creates a properly formatted VA operand for the TLB RANGE. 126 * The value bit assignments are: 127 * 128 * +----------+------+-------+-------+-------+----------------------+ 129 * | ASID | TG | SCALE | NUM | TTL | BADDR | 130 * +-----------------+-------+-------+-------+----------------------+ 131 * |63 48|47 46|45 44|43 39|38 37|36 0| 132 * 133 * The address range is determined by below formula: 134 * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) 135 * 136 */ 137 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ 138 ({ \ 139 unsigned long __ta = (addr) >> PAGE_SHIFT; \ 140 __ta &= GENMASK_ULL(36, 0); \ 141 __ta |= (unsigned long)(ttl) << 37; \ 142 __ta |= (unsigned long)(num) << 39; \ 143 __ta |= (unsigned long)(scale) << 44; \ 144 __ta |= get_trans_granule() << 46; \ 145 __ta |= (unsigned long)(asid) << 48; \ 146 __ta; \ 147 }) 148 149 /* These macros are used by the TLBI RANGE feature. */ 150 #define __TLBI_RANGE_PAGES(num, scale) \ 151 ((unsigned long)((num) + 1) << (5 * (scale) + 1)) 152 #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) 153 154 /* 155 * Generate 'num' values from -1 to 30 with -1 rejected by the 156 * __flush_tlb_range() loop below. 157 */ 158 #define TLBI_RANGE_MASK GENMASK_ULL(4, 0) 159 #define __TLBI_RANGE_NUM(pages, scale) \ 160 ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) 161 162 /* 163 * TLB Invalidation 164 * ================ 165 * 166 * This header file implements the low-level TLB invalidation routines 167 * (sometimes referred to as "flushing" in the kernel) for arm64. 168 * 169 * Every invalidation operation uses the following template: 170 * 171 * DSB ISHST // Ensure prior page-table updates have completed 172 * TLBI ... // Invalidate the TLB 173 * DSB ISH // Ensure the TLB invalidation has completed 174 * if (invalidated kernel mappings) 175 * ISB // Discard any instructions fetched from the old mapping 176 * 177 * 178 * The following functions form part of the "core" TLB invalidation API, 179 * as documented in Documentation/core-api/cachetlb.rst: 180 * 181 * flush_tlb_all() 182 * Invalidate the entire TLB (kernel + user) on all CPUs 183 * 184 * flush_tlb_mm(mm) 185 * Invalidate an entire user address space on all CPUs. 186 * The 'mm' argument identifies the ASID to invalidate. 187 * 188 * flush_tlb_range(vma, start, end) 189 * Invalidate the virtual-address range '[start, end)' on all 190 * CPUs for the user address space corresponding to 'vma->mm'. 191 * Note that this operation also invalidates any walk-cache 192 * entries associated with translations for the specified address 193 * range. 194 * 195 * flush_tlb_kernel_range(start, end) 196 * Same as flush_tlb_range(..., start, end), but applies to 197 * kernel mappings rather than a particular user address space. 198 * Whilst not explicitly documented, this function is used when 199 * unmapping pages from vmalloc/io space. 200 * 201 * flush_tlb_page(vma, addr) 202 * Invalidate a single user mapping for address 'addr' in the 203 * address space corresponding to 'vma->mm'. Note that this 204 * operation only invalidates a single, last-level page-table 205 * entry and therefore does not affect any walk-caches. 206 * 207 * 208 * Next, we have some undocumented invalidation routines that you probably 209 * don't want to call unless you know what you're doing: 210 * 211 * local_flush_tlb_all() 212 * Same as flush_tlb_all(), but only applies to the calling CPU. 213 * 214 * __flush_tlb_kernel_pgtable(addr) 215 * Invalidate a single kernel mapping for address 'addr' on all 216 * CPUs, ensuring that any walk-cache entries associated with the 217 * translation are also invalidated. 218 * 219 * __flush_tlb_range(vma, start, end, stride, last_level) 220 * Invalidate the virtual-address range '[start, end)' on all 221 * CPUs for the user address space corresponding to 'vma->mm'. 222 * The invalidation operations are issued at a granularity 223 * determined by 'stride' and only affect any walk-cache entries 224 * if 'last_level' is equal to false. 225 * 226 * 227 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented 228 * on top of these routines, since that is our interface to the mmu_gather 229 * API as used by munmap() and friends. 230 */ 231 static inline void local_flush_tlb_all(void) 232 { 233 dsb(nshst); 234 __tlbi(vmalle1); 235 dsb(nsh); 236 isb(); 237 } 238 239 static inline void flush_tlb_all(void) 240 { 241 dsb(ishst); 242 __tlbi(vmalle1is); 243 dsb(ish); 244 isb(); 245 } 246 247 static inline void flush_tlb_mm(struct mm_struct *mm) 248 { 249 unsigned long asid; 250 251 dsb(ishst); 252 asid = __TLBI_VADDR(0, ASID(mm)); 253 __tlbi(aside1is, asid); 254 __tlbi_user(aside1is, asid); 255 dsb(ish); 256 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 257 } 258 259 static inline void __flush_tlb_page_nosync(struct mm_struct *mm, 260 unsigned long uaddr) 261 { 262 unsigned long addr; 263 264 dsb(ishst); 265 addr = __TLBI_VADDR(uaddr, ASID(mm)); 266 __tlbi(vale1is, addr); 267 __tlbi_user(vale1is, addr); 268 mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, 269 (uaddr & PAGE_MASK) + PAGE_SIZE); 270 } 271 272 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, 273 unsigned long uaddr) 274 { 275 return __flush_tlb_page_nosync(vma->vm_mm, uaddr); 276 } 277 278 static inline void flush_tlb_page(struct vm_area_struct *vma, 279 unsigned long uaddr) 280 { 281 flush_tlb_page_nosync(vma, uaddr); 282 dsb(ish); 283 } 284 285 static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) 286 { 287 #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI 288 /* 289 * TLB flush deferral is not required on systems which are affected by 290 * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation 291 * will have two consecutive TLBI instructions with a dsb(ish) in between 292 * defeating the purpose (i.e save overall 'dsb ish' cost). 293 */ 294 if (unlikely(cpus_have_const_cap(ARM64_WORKAROUND_REPEAT_TLBI))) 295 return false; 296 #endif 297 return true; 298 } 299 300 static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, 301 struct mm_struct *mm, 302 unsigned long uaddr) 303 { 304 __flush_tlb_page_nosync(mm, uaddr); 305 } 306 307 /* 308 * If mprotect/munmap/etc occurs during TLB batched flushing, we need to 309 * synchronise all the TLBI issued with a DSB to avoid the race mentioned in 310 * flush_tlb_batched_pending(). 311 */ 312 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) 313 { 314 dsb(ish); 315 } 316 317 /* 318 * To support TLB batched flush for multiple pages unmapping, we only send 319 * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the 320 * completion at the end in arch_tlbbatch_flush(). Since we've already issued 321 * TLBI for each page so only a DSB is needed to synchronise its effect on the 322 * other CPUs. 323 * 324 * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence 325 * for each page. 326 */ 327 static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) 328 { 329 dsb(ish); 330 } 331 332 /* 333 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not 334 * necessarily a performance improvement. 335 */ 336 #define MAX_TLBI_OPS PTRS_PER_PTE 337 338 /* 339 * __flush_tlb_range_op - Perform TLBI operation upon a range 340 * 341 * @op: TLBI instruction that operates on a range (has 'r' prefix) 342 * @start: The start address of the range 343 * @pages: Range as the number of pages from 'start' 344 * @stride: Flush granularity 345 * @asid: The ASID of the task (0 for IPA instructions) 346 * @tlb_level: Translation Table level hint, if known 347 * @tlbi_user: If 'true', call an additional __tlbi_user() 348 * (typically for user ASIDs). 'flase' for IPA instructions 349 * 350 * When the CPU does not support TLB range operations, flush the TLB 351 * entries one by one at the granularity of 'stride'. If the TLB 352 * range ops are supported, then: 353 * 354 * 1. If 'pages' is odd, flush the first page through non-range 355 * operations; 356 * 357 * 2. For remaining pages: the minimum range granularity is decided 358 * by 'scale', so multiple range TLBI operations may be required. 359 * Start from scale = 0, flush the corresponding number of pages 360 * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it 361 * until no pages left. 362 * 363 * Note that certain ranges can be represented by either num = 31 and 364 * scale or num = 0 and scale + 1. The loop below favours the latter 365 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. 366 */ 367 #define __flush_tlb_range_op(op, start, pages, stride, \ 368 asid, tlb_level, tlbi_user) \ 369 do { \ 370 int num = 0; \ 371 int scale = 0; \ 372 unsigned long addr; \ 373 \ 374 while (pages > 0) { \ 375 if (!system_supports_tlb_range() || \ 376 pages % 2 == 1) { \ 377 addr = __TLBI_VADDR(start, asid); \ 378 __tlbi_level(op, addr, tlb_level); \ 379 if (tlbi_user) \ 380 __tlbi_user_level(op, addr, tlb_level); \ 381 start += stride; \ 382 pages -= stride >> PAGE_SHIFT; \ 383 continue; \ 384 } \ 385 \ 386 num = __TLBI_RANGE_NUM(pages, scale); \ 387 if (num >= 0) { \ 388 addr = __TLBI_VADDR_RANGE(start, asid, scale, \ 389 num, tlb_level); \ 390 __tlbi(r##op, addr); \ 391 if (tlbi_user) \ 392 __tlbi_user(r##op, addr); \ 393 start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ 394 pages -= __TLBI_RANGE_PAGES(num, scale); \ 395 } \ 396 scale++; \ 397 } \ 398 } while (0) 399 400 #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ 401 __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) 402 403 static inline void __flush_tlb_range(struct vm_area_struct *vma, 404 unsigned long start, unsigned long end, 405 unsigned long stride, bool last_level, 406 int tlb_level) 407 { 408 unsigned long asid, pages; 409 410 start = round_down(start, stride); 411 end = round_up(end, stride); 412 pages = (end - start) >> PAGE_SHIFT; 413 414 /* 415 * When not uses TLB range ops, we can handle up to 416 * (MAX_TLBI_OPS - 1) pages; 417 * When uses TLB range ops, we can handle up to 418 * (MAX_TLBI_RANGE_PAGES - 1) pages. 419 */ 420 if ((!system_supports_tlb_range() && 421 (end - start) >= (MAX_TLBI_OPS * stride)) || 422 pages >= MAX_TLBI_RANGE_PAGES) { 423 flush_tlb_mm(vma->vm_mm); 424 return; 425 } 426 427 dsb(ishst); 428 asid = ASID(vma->vm_mm); 429 430 if (last_level) 431 __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); 432 else 433 __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); 434 435 dsb(ish); 436 mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); 437 } 438 439 static inline void flush_tlb_range(struct vm_area_struct *vma, 440 unsigned long start, unsigned long end) 441 { 442 /* 443 * We cannot use leaf-only invalidation here, since we may be invalidating 444 * table entries as part of collapsing hugepages or moving page tables. 445 * Set the tlb_level to 0 because we can not get enough information here. 446 */ 447 __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); 448 } 449 450 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) 451 { 452 unsigned long addr; 453 454 if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { 455 flush_tlb_all(); 456 return; 457 } 458 459 start = __TLBI_VADDR(start, 0); 460 end = __TLBI_VADDR(end, 0); 461 462 dsb(ishst); 463 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) 464 __tlbi(vaale1is, addr); 465 dsb(ish); 466 isb(); 467 } 468 469 /* 470 * Used to invalidate the TLB (walk caches) corresponding to intermediate page 471 * table levels (pgd/pud/pmd). 472 */ 473 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) 474 { 475 unsigned long addr = __TLBI_VADDR(kaddr, 0); 476 477 dsb(ishst); 478 __tlbi(vaae1is, addr); 479 dsb(ish); 480 isb(); 481 } 482 #endif 483 484 #endif 485