1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Based on arch/arm/include/asm/tlbflush.h 4 * 5 * Copyright (C) 1999-2003 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 */ 8 #ifndef __ASM_TLBFLUSH_H 9 #define __ASM_TLBFLUSH_H 10 11 #ifndef __ASSEMBLY__ 12 13 #include <linux/bitfield.h> 14 #include <linux/mm_types.h> 15 #include <linux/sched.h> 16 #include <linux/mmu_notifier.h> 17 #include <asm/cputype.h> 18 #include <asm/mmu.h> 19 20 /* 21 * Raw TLBI operations. 22 * 23 * Where necessary, use the __tlbi() macro to avoid asm() 24 * boilerplate. Drivers and most kernel code should use the TLB 25 * management routines in preference to the macro below. 26 * 27 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending 28 * on whether a particular TLBI operation takes an argument or 29 * not. The macros handles invoking the asm with or without the 30 * register argument as appropriate. 31 */ 32 #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ 33 "tlbi " #op "\n" \ 34 ALTERNATIVE("nop\n nop", \ 35 "dsb ish\n tlbi " #op, \ 36 ARM64_WORKAROUND_REPEAT_TLBI, \ 37 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 38 : : ) 39 40 #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ 41 "tlbi " #op ", %0\n" \ 42 ALTERNATIVE("nop\n nop", \ 43 "dsb ish\n tlbi " #op ", %0", \ 44 ARM64_WORKAROUND_REPEAT_TLBI, \ 45 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 46 : : "r" (arg)) 47 48 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) 49 50 #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) 51 52 #define __tlbi_user(op, arg) do { \ 53 if (arm64_kernel_unmapped_at_el0()) \ 54 __tlbi(op, (arg) | USER_ASID_FLAG); \ 55 } while (0) 56 57 /* This macro creates a properly formatted VA operand for the TLBI */ 58 #define __TLBI_VADDR(addr, asid) \ 59 ({ \ 60 unsigned long __ta = (addr) >> 12; \ 61 __ta &= GENMASK_ULL(43, 0); \ 62 __ta |= (unsigned long)(asid) << 48; \ 63 __ta; \ 64 }) 65 66 /* 67 * Get translation granule of the system, which is decided by 68 * PAGE_SIZE. Used by TTL. 69 * - 4KB : 1 70 * - 16KB : 2 71 * - 64KB : 3 72 */ 73 #define TLBI_TTL_TG_4K 1 74 #define TLBI_TTL_TG_16K 2 75 #define TLBI_TTL_TG_64K 3 76 77 static inline unsigned long get_trans_granule(void) 78 { 79 switch (PAGE_SIZE) { 80 case SZ_4K: 81 return TLBI_TTL_TG_4K; 82 case SZ_16K: 83 return TLBI_TTL_TG_16K; 84 case SZ_64K: 85 return TLBI_TTL_TG_64K; 86 default: 87 return 0; 88 } 89 } 90 91 /* 92 * Level-based TLBI operations. 93 * 94 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for 95 * the level at which the invalidation must take place. If the level is 96 * wrong, no invalidation may take place. In the case where the level 97 * cannot be easily determined, a 0 value for the level parameter will 98 * perform a non-hinted invalidation. 99 * 100 * For Stage-2 invalidation, use the level values provided to that effect 101 * in asm/stage2_pgtable.h. 102 */ 103 #define TLBI_TTL_MASK GENMASK_ULL(47, 44) 104 105 #define __tlbi_level(op, addr, level) do { \ 106 u64 arg = addr; \ 107 \ 108 if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ 109 level) { \ 110 u64 ttl = level & 3; \ 111 ttl |= get_trans_granule() << 2; \ 112 arg &= ~TLBI_TTL_MASK; \ 113 arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ 114 } \ 115 \ 116 __tlbi(op, arg); \ 117 } while(0) 118 119 #define __tlbi_user_level(op, arg, level) do { \ 120 if (arm64_kernel_unmapped_at_el0()) \ 121 __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ 122 } while (0) 123 124 /* 125 * This macro creates a properly formatted VA operand for the TLB RANGE. 126 * The value bit assignments are: 127 * 128 * +----------+------+-------+-------+-------+----------------------+ 129 * | ASID | TG | SCALE | NUM | TTL | BADDR | 130 * +-----------------+-------+-------+-------+----------------------+ 131 * |63 48|47 46|45 44|43 39|38 37|36 0| 132 * 133 * The address range is determined by below formula: 134 * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) 135 * 136 */ 137 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ 138 ({ \ 139 unsigned long __ta = (addr) >> PAGE_SHIFT; \ 140 __ta &= GENMASK_ULL(36, 0); \ 141 __ta |= (unsigned long)(ttl) << 37; \ 142 __ta |= (unsigned long)(num) << 39; \ 143 __ta |= (unsigned long)(scale) << 44; \ 144 __ta |= get_trans_granule() << 46; \ 145 __ta |= (unsigned long)(asid) << 48; \ 146 __ta; \ 147 }) 148 149 /* These macros are used by the TLBI RANGE feature. */ 150 #define __TLBI_RANGE_PAGES(num, scale) \ 151 ((unsigned long)((num) + 1) << (5 * (scale) + 1)) 152 #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) 153 154 /* 155 * Generate 'num' values from -1 to 30 with -1 rejected by the 156 * __flush_tlb_range() loop below. 157 */ 158 #define TLBI_RANGE_MASK GENMASK_ULL(4, 0) 159 #define __TLBI_RANGE_NUM(pages, scale) \ 160 ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) 161 162 /* 163 * TLB Invalidation 164 * ================ 165 * 166 * This header file implements the low-level TLB invalidation routines 167 * (sometimes referred to as "flushing" in the kernel) for arm64. 168 * 169 * Every invalidation operation uses the following template: 170 * 171 * DSB ISHST // Ensure prior page-table updates have completed 172 * TLBI ... // Invalidate the TLB 173 * DSB ISH // Ensure the TLB invalidation has completed 174 * if (invalidated kernel mappings) 175 * ISB // Discard any instructions fetched from the old mapping 176 * 177 * 178 * The following functions form part of the "core" TLB invalidation API, 179 * as documented in Documentation/core-api/cachetlb.rst: 180 * 181 * flush_tlb_all() 182 * Invalidate the entire TLB (kernel + user) on all CPUs 183 * 184 * flush_tlb_mm(mm) 185 * Invalidate an entire user address space on all CPUs. 186 * The 'mm' argument identifies the ASID to invalidate. 187 * 188 * flush_tlb_range(vma, start, end) 189 * Invalidate the virtual-address range '[start, end)' on all 190 * CPUs for the user address space corresponding to 'vma->mm'. 191 * Note that this operation also invalidates any walk-cache 192 * entries associated with translations for the specified address 193 * range. 194 * 195 * flush_tlb_kernel_range(start, end) 196 * Same as flush_tlb_range(..., start, end), but applies to 197 * kernel mappings rather than a particular user address space. 198 * Whilst not explicitly documented, this function is used when 199 * unmapping pages from vmalloc/io space. 200 * 201 * flush_tlb_page(vma, addr) 202 * Invalidate a single user mapping for address 'addr' in the 203 * address space corresponding to 'vma->mm'. Note that this 204 * operation only invalidates a single, last-level page-table 205 * entry and therefore does not affect any walk-caches. 206 * 207 * 208 * Next, we have some undocumented invalidation routines that you probably 209 * don't want to call unless you know what you're doing: 210 * 211 * local_flush_tlb_all() 212 * Same as flush_tlb_all(), but only applies to the calling CPU. 213 * 214 * __flush_tlb_kernel_pgtable(addr) 215 * Invalidate a single kernel mapping for address 'addr' on all 216 * CPUs, ensuring that any walk-cache entries associated with the 217 * translation are also invalidated. 218 * 219 * __flush_tlb_range(vma, start, end, stride, last_level) 220 * Invalidate the virtual-address range '[start, end)' on all 221 * CPUs for the user address space corresponding to 'vma->mm'. 222 * The invalidation operations are issued at a granularity 223 * determined by 'stride' and only affect any walk-cache entries 224 * if 'last_level' is equal to false. 225 * 226 * 227 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented 228 * on top of these routines, since that is our interface to the mmu_gather 229 * API as used by munmap() and friends. 230 */ 231 static inline void local_flush_tlb_all(void) 232 { 233 dsb(nshst); 234 __tlbi(vmalle1); 235 dsb(nsh); 236 isb(); 237 } 238 239 static inline void flush_tlb_all(void) 240 { 241 dsb(ishst); 242 __tlbi(vmalle1is); 243 dsb(ish); 244 isb(); 245 } 246 247 static inline void flush_tlb_mm(struct mm_struct *mm) 248 { 249 unsigned long asid; 250 251 dsb(ishst); 252 asid = __TLBI_VADDR(0, ASID(mm)); 253 __tlbi(aside1is, asid); 254 __tlbi_user(aside1is, asid); 255 dsb(ish); 256 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 257 } 258 259 static inline void __flush_tlb_page_nosync(struct mm_struct *mm, 260 unsigned long uaddr) 261 { 262 unsigned long addr; 263 264 dsb(ishst); 265 addr = __TLBI_VADDR(uaddr, ASID(mm)); 266 __tlbi(vale1is, addr); 267 __tlbi_user(vale1is, addr); 268 mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, 269 (uaddr & PAGE_MASK) + PAGE_SIZE); 270 } 271 272 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, 273 unsigned long uaddr) 274 { 275 return __flush_tlb_page_nosync(vma->vm_mm, uaddr); 276 } 277 278 static inline void flush_tlb_page(struct vm_area_struct *vma, 279 unsigned long uaddr) 280 { 281 flush_tlb_page_nosync(vma, uaddr); 282 dsb(ish); 283 } 284 285 static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) 286 { 287 #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI 288 /* 289 * TLB flush deferral is not required on systems which are affected by 290 * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation 291 * will have two consecutive TLBI instructions with a dsb(ish) in between 292 * defeating the purpose (i.e save overall 'dsb ish' cost). 293 */ 294 if (unlikely(cpus_have_const_cap(ARM64_WORKAROUND_REPEAT_TLBI))) 295 return false; 296 #endif 297 return true; 298 } 299 300 static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, 301 struct mm_struct *mm, 302 unsigned long uaddr) 303 { 304 __flush_tlb_page_nosync(mm, uaddr); 305 } 306 307 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) 308 { 309 dsb(ish); 310 } 311 312 static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) 313 { 314 dsb(ish); 315 } 316 317 /* 318 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not 319 * necessarily a performance improvement. 320 */ 321 #define MAX_TLBI_OPS PTRS_PER_PTE 322 323 static inline void __flush_tlb_range(struct vm_area_struct *vma, 324 unsigned long start, unsigned long end, 325 unsigned long stride, bool last_level, 326 int tlb_level) 327 { 328 int num = 0; 329 int scale = 0; 330 unsigned long asid, addr, pages; 331 332 start = round_down(start, stride); 333 end = round_up(end, stride); 334 pages = (end - start) >> PAGE_SHIFT; 335 336 /* 337 * When not uses TLB range ops, we can handle up to 338 * (MAX_TLBI_OPS - 1) pages; 339 * When uses TLB range ops, we can handle up to 340 * (MAX_TLBI_RANGE_PAGES - 1) pages. 341 */ 342 if ((!system_supports_tlb_range() && 343 (end - start) >= (MAX_TLBI_OPS * stride)) || 344 pages >= MAX_TLBI_RANGE_PAGES) { 345 flush_tlb_mm(vma->vm_mm); 346 return; 347 } 348 349 dsb(ishst); 350 asid = ASID(vma->vm_mm); 351 352 /* 353 * When the CPU does not support TLB range operations, flush the TLB 354 * entries one by one at the granularity of 'stride'. If the TLB 355 * range ops are supported, then: 356 * 357 * 1. If 'pages' is odd, flush the first page through non-range 358 * operations; 359 * 360 * 2. For remaining pages: the minimum range granularity is decided 361 * by 'scale', so multiple range TLBI operations may be required. 362 * Start from scale = 0, flush the corresponding number of pages 363 * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it 364 * until no pages left. 365 * 366 * Note that certain ranges can be represented by either num = 31 and 367 * scale or num = 0 and scale + 1. The loop below favours the latter 368 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. 369 */ 370 while (pages > 0) { 371 if (!system_supports_tlb_range() || 372 pages % 2 == 1) { 373 addr = __TLBI_VADDR(start, asid); 374 if (last_level) { 375 __tlbi_level(vale1is, addr, tlb_level); 376 __tlbi_user_level(vale1is, addr, tlb_level); 377 } else { 378 __tlbi_level(vae1is, addr, tlb_level); 379 __tlbi_user_level(vae1is, addr, tlb_level); 380 } 381 start += stride; 382 pages -= stride >> PAGE_SHIFT; 383 continue; 384 } 385 386 num = __TLBI_RANGE_NUM(pages, scale); 387 if (num >= 0) { 388 addr = __TLBI_VADDR_RANGE(start, asid, scale, 389 num, tlb_level); 390 if (last_level) { 391 __tlbi(rvale1is, addr); 392 __tlbi_user(rvale1is, addr); 393 } else { 394 __tlbi(rvae1is, addr); 395 __tlbi_user(rvae1is, addr); 396 } 397 start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; 398 pages -= __TLBI_RANGE_PAGES(num, scale); 399 } 400 scale++; 401 } 402 dsb(ish); 403 mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); 404 } 405 406 static inline void flush_tlb_range(struct vm_area_struct *vma, 407 unsigned long start, unsigned long end) 408 { 409 /* 410 * We cannot use leaf-only invalidation here, since we may be invalidating 411 * table entries as part of collapsing hugepages or moving page tables. 412 * Set the tlb_level to 0 because we can not get enough information here. 413 */ 414 __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); 415 } 416 417 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) 418 { 419 unsigned long addr; 420 421 if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { 422 flush_tlb_all(); 423 return; 424 } 425 426 start = __TLBI_VADDR(start, 0); 427 end = __TLBI_VADDR(end, 0); 428 429 dsb(ishst); 430 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) 431 __tlbi(vaale1is, addr); 432 dsb(ish); 433 isb(); 434 } 435 436 /* 437 * Used to invalidate the TLB (walk caches) corresponding to intermediate page 438 * table levels (pgd/pud/pmd). 439 */ 440 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) 441 { 442 unsigned long addr = __TLBI_VADDR(kaddr, 0); 443 444 dsb(ishst); 445 __tlbi(vaae1is, addr); 446 dsb(ish); 447 isb(); 448 } 449 #endif 450 451 #endif 452