1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Based on arch/arm/include/asm/tlbflush.h 4 * 5 * Copyright (C) 1999-2003 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 */ 8 #ifndef __ASM_TLBFLUSH_H 9 #define __ASM_TLBFLUSH_H 10 11 #ifndef __ASSEMBLY__ 12 13 #include <linux/bitfield.h> 14 #include <linux/mm_types.h> 15 #include <linux/sched.h> 16 #include <asm/cputype.h> 17 #include <asm/mmu.h> 18 19 /* 20 * Raw TLBI operations. 21 * 22 * Where necessary, use the __tlbi() macro to avoid asm() 23 * boilerplate. Drivers and most kernel code should use the TLB 24 * management routines in preference to the macro below. 25 * 26 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending 27 * on whether a particular TLBI operation takes an argument or 28 * not. The macros handles invoking the asm with or without the 29 * register argument as appropriate. 30 */ 31 #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ 32 "tlbi " #op "\n" \ 33 ALTERNATIVE("nop\n nop", \ 34 "dsb ish\n tlbi " #op, \ 35 ARM64_WORKAROUND_REPEAT_TLBI, \ 36 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 37 : : ) 38 39 #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ 40 "tlbi " #op ", %0\n" \ 41 ALTERNATIVE("nop\n nop", \ 42 "dsb ish\n tlbi " #op ", %0", \ 43 ARM64_WORKAROUND_REPEAT_TLBI, \ 44 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 45 : : "r" (arg)) 46 47 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) 48 49 #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) 50 51 #define __tlbi_user(op, arg) do { \ 52 if (arm64_kernel_unmapped_at_el0()) \ 53 __tlbi(op, (arg) | USER_ASID_FLAG); \ 54 } while (0) 55 56 /* This macro creates a properly formatted VA operand for the TLBI */ 57 #define __TLBI_VADDR(addr, asid) \ 58 ({ \ 59 unsigned long __ta = (addr) >> 12; \ 60 __ta &= GENMASK_ULL(43, 0); \ 61 __ta |= (unsigned long)(asid) << 48; \ 62 __ta; \ 63 }) 64 65 /* 66 * Get translation granule of the system, which is decided by 67 * PAGE_SIZE. Used by TTL. 68 * - 4KB : 1 69 * - 16KB : 2 70 * - 64KB : 3 71 */ 72 #define TLBI_TTL_TG_4K 1 73 #define TLBI_TTL_TG_16K 2 74 #define TLBI_TTL_TG_64K 3 75 76 static inline unsigned long get_trans_granule(void) 77 { 78 switch (PAGE_SIZE) { 79 case SZ_4K: 80 return TLBI_TTL_TG_4K; 81 case SZ_16K: 82 return TLBI_TTL_TG_16K; 83 case SZ_64K: 84 return TLBI_TTL_TG_64K; 85 default: 86 return 0; 87 } 88 } 89 90 /* 91 * Level-based TLBI operations. 92 * 93 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for 94 * the level at which the invalidation must take place. If the level is 95 * wrong, no invalidation may take place. In the case where the level 96 * cannot be easily determined, a 0 value for the level parameter will 97 * perform a non-hinted invalidation. 98 * 99 * For Stage-2 invalidation, use the level values provided to that effect 100 * in asm/stage2_pgtable.h. 101 */ 102 #define TLBI_TTL_MASK GENMASK_ULL(47, 44) 103 104 #define __tlbi_level(op, addr, level) do { \ 105 u64 arg = addr; \ 106 \ 107 if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ 108 level) { \ 109 u64 ttl = level & 3; \ 110 ttl |= get_trans_granule() << 2; \ 111 arg &= ~TLBI_TTL_MASK; \ 112 arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ 113 } \ 114 \ 115 __tlbi(op, arg); \ 116 } while(0) 117 118 #define __tlbi_user_level(op, arg, level) do { \ 119 if (arm64_kernel_unmapped_at_el0()) \ 120 __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ 121 } while (0) 122 123 /* 124 * This macro creates a properly formatted VA operand for the TLB RANGE. 125 * The value bit assignments are: 126 * 127 * +----------+------+-------+-------+-------+----------------------+ 128 * | ASID | TG | SCALE | NUM | TTL | BADDR | 129 * +-----------------+-------+-------+-------+----------------------+ 130 * |63 48|47 46|45 44|43 39|38 37|36 0| 131 * 132 * The address range is determined by below formula: 133 * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) 134 * 135 */ 136 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ 137 ({ \ 138 unsigned long __ta = (addr) >> PAGE_SHIFT; \ 139 __ta &= GENMASK_ULL(36, 0); \ 140 __ta |= (unsigned long)(ttl) << 37; \ 141 __ta |= (unsigned long)(num) << 39; \ 142 __ta |= (unsigned long)(scale) << 44; \ 143 __ta |= get_trans_granule() << 46; \ 144 __ta |= (unsigned long)(asid) << 48; \ 145 __ta; \ 146 }) 147 148 /* These macros are used by the TLBI RANGE feature. */ 149 #define __TLBI_RANGE_PAGES(num, scale) \ 150 ((unsigned long)((num) + 1) << (5 * (scale) + 1)) 151 #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) 152 153 /* 154 * Generate 'num' values from -1 to 30 with -1 rejected by the 155 * __flush_tlb_range() loop below. 156 */ 157 #define TLBI_RANGE_MASK GENMASK_ULL(4, 0) 158 #define __TLBI_RANGE_NUM(pages, scale) \ 159 ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) 160 161 /* 162 * TLB Invalidation 163 * ================ 164 * 165 * This header file implements the low-level TLB invalidation routines 166 * (sometimes referred to as "flushing" in the kernel) for arm64. 167 * 168 * Every invalidation operation uses the following template: 169 * 170 * DSB ISHST // Ensure prior page-table updates have completed 171 * TLBI ... // Invalidate the TLB 172 * DSB ISH // Ensure the TLB invalidation has completed 173 * if (invalidated kernel mappings) 174 * ISB // Discard any instructions fetched from the old mapping 175 * 176 * 177 * The following functions form part of the "core" TLB invalidation API, 178 * as documented in Documentation/core-api/cachetlb.rst: 179 * 180 * flush_tlb_all() 181 * Invalidate the entire TLB (kernel + user) on all CPUs 182 * 183 * flush_tlb_mm(mm) 184 * Invalidate an entire user address space on all CPUs. 185 * The 'mm' argument identifies the ASID to invalidate. 186 * 187 * flush_tlb_range(vma, start, end) 188 * Invalidate the virtual-address range '[start, end)' on all 189 * CPUs for the user address space corresponding to 'vma->mm'. 190 * Note that this operation also invalidates any walk-cache 191 * entries associated with translations for the specified address 192 * range. 193 * 194 * flush_tlb_kernel_range(start, end) 195 * Same as flush_tlb_range(..., start, end), but applies to 196 * kernel mappings rather than a particular user address space. 197 * Whilst not explicitly documented, this function is used when 198 * unmapping pages from vmalloc/io space. 199 * 200 * flush_tlb_page(vma, addr) 201 * Invalidate a single user mapping for address 'addr' in the 202 * address space corresponding to 'vma->mm'. Note that this 203 * operation only invalidates a single, last-level page-table 204 * entry and therefore does not affect any walk-caches. 205 * 206 * 207 * Next, we have some undocumented invalidation routines that you probably 208 * don't want to call unless you know what you're doing: 209 * 210 * local_flush_tlb_all() 211 * Same as flush_tlb_all(), but only applies to the calling CPU. 212 * 213 * __flush_tlb_kernel_pgtable(addr) 214 * Invalidate a single kernel mapping for address 'addr' on all 215 * CPUs, ensuring that any walk-cache entries associated with the 216 * translation are also invalidated. 217 * 218 * __flush_tlb_range(vma, start, end, stride, last_level) 219 * Invalidate the virtual-address range '[start, end)' on all 220 * CPUs for the user address space corresponding to 'vma->mm'. 221 * The invalidation operations are issued at a granularity 222 * determined by 'stride' and only affect any walk-cache entries 223 * if 'last_level' is equal to false. 224 * 225 * 226 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented 227 * on top of these routines, since that is our interface to the mmu_gather 228 * API as used by munmap() and friends. 229 */ 230 static inline void local_flush_tlb_all(void) 231 { 232 dsb(nshst); 233 __tlbi(vmalle1); 234 dsb(nsh); 235 isb(); 236 } 237 238 static inline void flush_tlb_all(void) 239 { 240 dsb(ishst); 241 __tlbi(vmalle1is); 242 dsb(ish); 243 isb(); 244 } 245 246 static inline void flush_tlb_mm(struct mm_struct *mm) 247 { 248 unsigned long asid; 249 250 dsb(ishst); 251 asid = __TLBI_VADDR(0, ASID(mm)); 252 __tlbi(aside1is, asid); 253 __tlbi_user(aside1is, asid); 254 dsb(ish); 255 } 256 257 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, 258 unsigned long uaddr) 259 { 260 unsigned long addr; 261 262 dsb(ishst); 263 addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); 264 __tlbi(vale1is, addr); 265 __tlbi_user(vale1is, addr); 266 } 267 268 static inline void flush_tlb_page(struct vm_area_struct *vma, 269 unsigned long uaddr) 270 { 271 flush_tlb_page_nosync(vma, uaddr); 272 dsb(ish); 273 } 274 275 /* 276 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not 277 * necessarily a performance improvement. 278 */ 279 #define MAX_TLBI_OPS PTRS_PER_PTE 280 281 static inline void __flush_tlb_range(struct vm_area_struct *vma, 282 unsigned long start, unsigned long end, 283 unsigned long stride, bool last_level, 284 int tlb_level) 285 { 286 int num = 0; 287 int scale = 0; 288 unsigned long asid, addr, pages; 289 290 start = round_down(start, stride); 291 end = round_up(end, stride); 292 pages = (end - start) >> PAGE_SHIFT; 293 294 /* 295 * When not uses TLB range ops, we can handle up to 296 * (MAX_TLBI_OPS - 1) pages; 297 * When uses TLB range ops, we can handle up to 298 * (MAX_TLBI_RANGE_PAGES - 1) pages. 299 */ 300 if ((!system_supports_tlb_range() && 301 (end - start) >= (MAX_TLBI_OPS * stride)) || 302 pages >= MAX_TLBI_RANGE_PAGES) { 303 flush_tlb_mm(vma->vm_mm); 304 return; 305 } 306 307 dsb(ishst); 308 asid = ASID(vma->vm_mm); 309 310 /* 311 * When the CPU does not support TLB range operations, flush the TLB 312 * entries one by one at the granularity of 'stride'. If the TLB 313 * range ops are supported, then: 314 * 315 * 1. If 'pages' is odd, flush the first page through non-range 316 * operations; 317 * 318 * 2. For remaining pages: the minimum range granularity is decided 319 * by 'scale', so multiple range TLBI operations may be required. 320 * Start from scale = 0, flush the corresponding number of pages 321 * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it 322 * until no pages left. 323 * 324 * Note that certain ranges can be represented by either num = 31 and 325 * scale or num = 0 and scale + 1. The loop below favours the latter 326 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. 327 */ 328 while (pages > 0) { 329 if (!system_supports_tlb_range() || 330 pages % 2 == 1) { 331 addr = __TLBI_VADDR(start, asid); 332 if (last_level) { 333 __tlbi_level(vale1is, addr, tlb_level); 334 __tlbi_user_level(vale1is, addr, tlb_level); 335 } else { 336 __tlbi_level(vae1is, addr, tlb_level); 337 __tlbi_user_level(vae1is, addr, tlb_level); 338 } 339 start += stride; 340 pages -= stride >> PAGE_SHIFT; 341 continue; 342 } 343 344 num = __TLBI_RANGE_NUM(pages, scale); 345 if (num >= 0) { 346 addr = __TLBI_VADDR_RANGE(start, asid, scale, 347 num, tlb_level); 348 if (last_level) { 349 __tlbi(rvale1is, addr); 350 __tlbi_user(rvale1is, addr); 351 } else { 352 __tlbi(rvae1is, addr); 353 __tlbi_user(rvae1is, addr); 354 } 355 start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; 356 pages -= __TLBI_RANGE_PAGES(num, scale); 357 } 358 scale++; 359 } 360 dsb(ish); 361 } 362 363 static inline void flush_tlb_range(struct vm_area_struct *vma, 364 unsigned long start, unsigned long end) 365 { 366 /* 367 * We cannot use leaf-only invalidation here, since we may be invalidating 368 * table entries as part of collapsing hugepages or moving page tables. 369 * Set the tlb_level to 0 because we can not get enough information here. 370 */ 371 __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); 372 } 373 374 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) 375 { 376 unsigned long addr; 377 378 if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { 379 flush_tlb_all(); 380 return; 381 } 382 383 start = __TLBI_VADDR(start, 0); 384 end = __TLBI_VADDR(end, 0); 385 386 dsb(ishst); 387 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) 388 __tlbi(vaale1is, addr); 389 dsb(ish); 390 isb(); 391 } 392 393 /* 394 * Used to invalidate the TLB (walk caches) corresponding to intermediate page 395 * table levels (pgd/pud/pmd). 396 */ 397 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) 398 { 399 unsigned long addr = __TLBI_VADDR(kaddr, 0); 400 401 dsb(ishst); 402 __tlbi(vaae1is, addr); 403 dsb(ish); 404 isb(); 405 } 406 #endif 407 408 #endif 409