1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* include/asm-generic/tlb.h 3 * 4 * Generic TLB shootdown code 5 * 6 * Copyright 2001 Red Hat, Inc. 7 * Based on code from mm/memory.c Copyright Linus Torvalds and others. 8 * 9 * Copyright 2011 Red Hat, Inc., Peter Zijlstra 10 */ 11 #ifndef _ASM_GENERIC__TLB_H 12 #define _ASM_GENERIC__TLB_H 13 14 #include <linux/mmu_notifier.h> 15 #include <linux/swap.h> 16 #include <linux/hugetlb_inline.h> 17 #include <asm/pgalloc.h> 18 #include <asm/tlbflush.h> 19 #include <asm/cacheflush.h> 20 21 /* 22 * Blindly accessing user memory from NMI context can be dangerous 23 * if we're in the middle of switching the current user task or switching 24 * the loaded mm. 25 */ 26 #ifndef nmi_uaccess_okay 27 # define nmi_uaccess_okay() true 28 #endif 29 30 #ifdef CONFIG_MMU 31 32 /* 33 * Generic MMU-gather implementation. 34 * 35 * The mmu_gather data structure is used by the mm code to implement the 36 * correct and efficient ordering of freeing pages and TLB invalidations. 37 * 38 * This correct ordering is: 39 * 40 * 1) unhook page 41 * 2) TLB invalidate page 42 * 3) free page 43 * 44 * That is, we must never free a page before we have ensured there are no live 45 * translations left to it. Otherwise it might be possible to observe (or 46 * worse, change) the page content after it has been reused. 47 * 48 * The mmu_gather API consists of: 49 * 50 * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather 51 * 52 * Finish in particular will issue a (final) TLB invalidate and free 53 * all (remaining) queued pages. 54 * 55 * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA 56 * 57 * Defaults to flushing at tlb_end_vma() to reset the range; helps when 58 * there's large holes between the VMAs. 59 * 60 * - tlb_remove_table() 61 * 62 * tlb_remove_table() is the basic primitive to free page-table directories 63 * (__p*_free_tlb()). In it's most primitive form it is an alias for 64 * tlb_remove_page() below, for when page directories are pages and have no 65 * additional constraints. 66 * 67 * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. 68 * 69 * - tlb_remove_page() / __tlb_remove_page() 70 * - tlb_remove_page_size() / __tlb_remove_page_size() 71 * 72 * __tlb_remove_page_size() is the basic primitive that queues a page for 73 * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a 74 * boolean indicating if the queue is (now) full and a call to 75 * tlb_flush_mmu() is required. 76 * 77 * tlb_remove_page() and tlb_remove_page_size() imply the call to 78 * tlb_flush_mmu() when required and has no return value. 79 * 80 * - tlb_change_page_size() 81 * 82 * call before __tlb_remove_page*() to set the current page-size; implies a 83 * possible tlb_flush_mmu() call. 84 * 85 * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() 86 * 87 * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets 88 * related state, like the range) 89 * 90 * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees 91 * whatever pages are still batched. 92 * 93 * - mmu_gather::fullmm 94 * 95 * A flag set by tlb_gather_mmu() to indicate we're going to free 96 * the entire mm; this allows a number of optimizations. 97 * 98 * - We can ignore tlb_{start,end}_vma(); because we don't 99 * care about ranges. Everything will be shot down. 100 * 101 * - (RISC) architectures that use ASIDs can cycle to a new ASID 102 * and delay the invalidation until ASID space runs out. 103 * 104 * - mmu_gather::need_flush_all 105 * 106 * A flag that can be set by the arch code if it wants to force 107 * flush the entire TLB irrespective of the range. For instance 108 * x86-PAE needs this when changing top-level entries. 109 * 110 * And allows the architecture to provide and implement tlb_flush(): 111 * 112 * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make 113 * use of: 114 * 115 * - mmu_gather::start / mmu_gather::end 116 * 117 * which provides the range that needs to be flushed to cover the pages to 118 * be freed. 119 * 120 * - mmu_gather::freed_tables 121 * 122 * set when we freed page table pages 123 * 124 * - tlb_get_unmap_shift() / tlb_get_unmap_size() 125 * 126 * returns the smallest TLB entry size unmapped in this range. 127 * 128 * If an architecture does not provide tlb_flush() a default implementation 129 * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is 130 * specified, in which case we'll default to flush_tlb_mm(). 131 * 132 * Additionally there are a few opt-in features: 133 * 134 * MMU_GATHER_PAGE_SIZE 135 * 136 * This ensures we call tlb_flush() every time tlb_change_page_size() actually 137 * changes the size and provides mmu_gather::page_size to tlb_flush(). 138 * 139 * This might be useful if your architecture has size specific TLB 140 * invalidation instructions. 141 * 142 * MMU_GATHER_TABLE_FREE 143 * 144 * This provides tlb_remove_table(), to be used instead of tlb_remove_page() 145 * for page directores (__p*_free_tlb()). 146 * 147 * Useful if your architecture has non-page page directories. 148 * 149 * When used, an architecture is expected to provide __tlb_remove_table() 150 * which does the actual freeing of these pages. 151 * 152 * MMU_GATHER_RCU_TABLE_FREE 153 * 154 * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see 155 * comment below). 156 * 157 * Useful if your architecture doesn't use IPIs for remote TLB invalidates 158 * and therefore doesn't naturally serialize with software page-table walkers. 159 * 160 * MMU_GATHER_NO_RANGE 161 * 162 * Use this if your architecture lacks an efficient flush_tlb_range(). 163 * 164 * MMU_GATHER_NO_GATHER 165 * 166 * If the option is set the mmu_gather will not track individual pages for 167 * delayed page free anymore. A platform that enables the option needs to 168 * provide its own implementation of the __tlb_remove_page_size() function to 169 * free pages. 170 * 171 * This is useful if your architecture already flushes TLB entries in the 172 * various ptep_get_and_clear() functions. 173 */ 174 175 #ifdef CONFIG_MMU_GATHER_TABLE_FREE 176 177 struct mmu_table_batch { 178 #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE 179 struct rcu_head rcu; 180 #endif 181 unsigned int nr; 182 void *tables[0]; 183 }; 184 185 #define MAX_TABLE_BATCH \ 186 ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) 187 188 extern void tlb_remove_table(struct mmu_gather *tlb, void *table); 189 190 #else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ 191 192 /* 193 * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based 194 * page directories and we can use the normal page batching to free them. 195 */ 196 #define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) 197 198 #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ 199 200 #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE 201 /* 202 * This allows an architecture that does not use the linux page-tables for 203 * hardware to skip the TLBI when freeing page tables. 204 */ 205 #ifndef tlb_needs_table_invalidate 206 #define tlb_needs_table_invalidate() (true) 207 #endif 208 209 #else 210 211 #ifdef tlb_needs_table_invalidate 212 #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE 213 #endif 214 215 #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 216 217 218 #ifndef CONFIG_MMU_GATHER_NO_GATHER 219 /* 220 * If we can't allocate a page to make a big batch of page pointers 221 * to work on, then just handle a few from the on-stack structure. 222 */ 223 #define MMU_GATHER_BUNDLE 8 224 225 struct mmu_gather_batch { 226 struct mmu_gather_batch *next; 227 unsigned int nr; 228 unsigned int max; 229 struct page *pages[0]; 230 }; 231 232 #define MAX_GATHER_BATCH \ 233 ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) 234 235 /* 236 * Limit the maximum number of mmu_gather batches to reduce a risk of soft 237 * lockups for non-preemptible kernels on huge machines when a lot of memory 238 * is zapped during unmapping. 239 * 10K pages freed at once should be safe even without a preemption point. 240 */ 241 #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) 242 243 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, 244 int page_size); 245 #endif 246 247 /* 248 * struct mmu_gather is an opaque type used by the mm code for passing around 249 * any data needed by arch specific code for tlb_remove_page. 250 */ 251 struct mmu_gather { 252 struct mm_struct *mm; 253 254 #ifdef CONFIG_MMU_GATHER_TABLE_FREE 255 struct mmu_table_batch *batch; 256 #endif 257 258 unsigned long start; 259 unsigned long end; 260 /* 261 * we are in the middle of an operation to clear 262 * a full mm and can make some optimizations 263 */ 264 unsigned int fullmm : 1; 265 266 /* 267 * we have performed an operation which 268 * requires a complete flush of the tlb 269 */ 270 unsigned int need_flush_all : 1; 271 272 /* 273 * we have removed page directories 274 */ 275 unsigned int freed_tables : 1; 276 277 /* 278 * at which levels have we cleared entries? 279 */ 280 unsigned int cleared_ptes : 1; 281 unsigned int cleared_pmds : 1; 282 unsigned int cleared_puds : 1; 283 unsigned int cleared_p4ds : 1; 284 285 /* 286 * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma 287 */ 288 unsigned int vma_exec : 1; 289 unsigned int vma_huge : 1; 290 291 unsigned int batch_count; 292 293 #ifndef CONFIG_MMU_GATHER_NO_GATHER 294 struct mmu_gather_batch *active; 295 struct mmu_gather_batch local; 296 struct page *__pages[MMU_GATHER_BUNDLE]; 297 298 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE 299 unsigned int page_size; 300 #endif 301 #endif 302 }; 303 304 void tlb_flush_mmu(struct mmu_gather *tlb); 305 306 static inline void __tlb_adjust_range(struct mmu_gather *tlb, 307 unsigned long address, 308 unsigned int range_size) 309 { 310 tlb->start = min(tlb->start, address); 311 tlb->end = max(tlb->end, address + range_size); 312 } 313 314 static inline void __tlb_reset_range(struct mmu_gather *tlb) 315 { 316 if (tlb->fullmm) { 317 tlb->start = tlb->end = ~0; 318 } else { 319 tlb->start = TASK_SIZE; 320 tlb->end = 0; 321 } 322 tlb->freed_tables = 0; 323 tlb->cleared_ptes = 0; 324 tlb->cleared_pmds = 0; 325 tlb->cleared_puds = 0; 326 tlb->cleared_p4ds = 0; 327 /* 328 * Do not reset mmu_gather::vma_* fields here, we do not 329 * call into tlb_start_vma() again to set them if there is an 330 * intermediate flush. 331 */ 332 } 333 334 #ifdef CONFIG_MMU_GATHER_NO_RANGE 335 336 #if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) 337 #error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() 338 #endif 339 340 /* 341 * When an architecture does not have efficient means of range flushing TLBs 342 * there is no point in doing intermediate flushes on tlb_end_vma() to keep the 343 * range small. We equally don't have to worry about page granularity or other 344 * things. 345 * 346 * All we need to do is issue a full flush for any !0 range. 347 */ 348 static inline void tlb_flush(struct mmu_gather *tlb) 349 { 350 if (tlb->end) 351 flush_tlb_mm(tlb->mm); 352 } 353 354 static inline void 355 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } 356 357 #define tlb_end_vma tlb_end_vma 358 static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } 359 360 #else /* CONFIG_MMU_GATHER_NO_RANGE */ 361 362 #ifndef tlb_flush 363 364 #if defined(tlb_start_vma) || defined(tlb_end_vma) 365 #error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() 366 #endif 367 368 /* 369 * When an architecture does not provide its own tlb_flush() implementation 370 * but does have a reasonably efficient flush_vma_range() implementation 371 * use that. 372 */ 373 static inline void tlb_flush(struct mmu_gather *tlb) 374 { 375 if (tlb->fullmm || tlb->need_flush_all) { 376 flush_tlb_mm(tlb->mm); 377 } else if (tlb->end) { 378 struct vm_area_struct vma = { 379 .vm_mm = tlb->mm, 380 .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | 381 (tlb->vma_huge ? VM_HUGETLB : 0), 382 }; 383 384 flush_tlb_range(&vma, tlb->start, tlb->end); 385 } 386 } 387 388 static inline void 389 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) 390 { 391 /* 392 * flush_tlb_range() implementations that look at VM_HUGETLB (tile, 393 * mips-4k) flush only large pages. 394 * 395 * flush_tlb_range() implementations that flush I-TLB also flush D-TLB 396 * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing 397 * range. 398 * 399 * We rely on tlb_end_vma() to issue a flush, such that when we reset 400 * these values the batch is empty. 401 */ 402 tlb->vma_huge = is_vm_hugetlb_page(vma); 403 tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); 404 } 405 406 #else 407 408 static inline void 409 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } 410 411 #endif 412 413 #endif /* CONFIG_MMU_GATHER_NO_RANGE */ 414 415 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 416 { 417 /* 418 * Anything calling __tlb_adjust_range() also sets at least one of 419 * these bits. 420 */ 421 if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || 422 tlb->cleared_puds || tlb->cleared_p4ds)) 423 return; 424 425 tlb_flush(tlb); 426 mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); 427 __tlb_reset_range(tlb); 428 } 429 430 static inline void tlb_remove_page_size(struct mmu_gather *tlb, 431 struct page *page, int page_size) 432 { 433 if (__tlb_remove_page_size(tlb, page, page_size)) 434 tlb_flush_mmu(tlb); 435 } 436 437 static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) 438 { 439 return __tlb_remove_page_size(tlb, page, PAGE_SIZE); 440 } 441 442 /* tlb_remove_page 443 * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when 444 * required. 445 */ 446 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) 447 { 448 return tlb_remove_page_size(tlb, page, PAGE_SIZE); 449 } 450 451 static inline void tlb_change_page_size(struct mmu_gather *tlb, 452 unsigned int page_size) 453 { 454 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE 455 if (tlb->page_size && tlb->page_size != page_size) { 456 if (!tlb->fullmm && !tlb->need_flush_all) 457 tlb_flush_mmu(tlb); 458 } 459 460 tlb->page_size = page_size; 461 #endif 462 } 463 464 static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) 465 { 466 if (tlb->cleared_ptes) 467 return PAGE_SHIFT; 468 if (tlb->cleared_pmds) 469 return PMD_SHIFT; 470 if (tlb->cleared_puds) 471 return PUD_SHIFT; 472 if (tlb->cleared_p4ds) 473 return P4D_SHIFT; 474 475 return PAGE_SHIFT; 476 } 477 478 static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) 479 { 480 return 1UL << tlb_get_unmap_shift(tlb); 481 } 482 483 /* 484 * In the case of tlb vma handling, we can optimise these away in the 485 * case where we're doing a full MM flush. When we're doing a munmap, 486 * the vmas are adjusted to only cover the region to be torn down. 487 */ 488 #ifndef tlb_start_vma 489 static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 490 { 491 if (tlb->fullmm) 492 return; 493 494 tlb_update_vma_flags(tlb, vma); 495 flush_cache_range(vma, vma->vm_start, vma->vm_end); 496 } 497 #endif 498 499 #ifndef tlb_end_vma 500 static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 501 { 502 if (tlb->fullmm) 503 return; 504 505 /* 506 * Do a TLB flush and reset the range at VMA boundaries; this avoids 507 * the ranges growing with the unused space between consecutive VMAs, 508 * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on 509 * this. 510 */ 511 tlb_flush_mmu_tlbonly(tlb); 512 } 513 #endif 514 515 #ifndef __tlb_remove_tlb_entry 516 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) 517 #endif 518 519 /** 520 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. 521 * 522 * Record the fact that pte's were really unmapped by updating the range, 523 * so we can later optimise away the tlb invalidate. This helps when 524 * userspace is unmapping already-unmapped pages, which happens quite a lot. 525 */ 526 #define tlb_remove_tlb_entry(tlb, ptep, address) \ 527 do { \ 528 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 529 tlb->cleared_ptes = 1; \ 530 __tlb_remove_tlb_entry(tlb, ptep, address); \ 531 } while (0) 532 533 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ 534 do { \ 535 unsigned long _sz = huge_page_size(h); \ 536 __tlb_adjust_range(tlb, address, _sz); \ 537 if (_sz == PMD_SIZE) \ 538 tlb->cleared_pmds = 1; \ 539 else if (_sz == PUD_SIZE) \ 540 tlb->cleared_puds = 1; \ 541 __tlb_remove_tlb_entry(tlb, ptep, address); \ 542 } while (0) 543 544 /** 545 * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation 546 * This is a nop so far, because only x86 needs it. 547 */ 548 #ifndef __tlb_remove_pmd_tlb_entry 549 #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) 550 #endif 551 552 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ 553 do { \ 554 __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ 555 tlb->cleared_pmds = 1; \ 556 __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ 557 } while (0) 558 559 /** 560 * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb 561 * invalidation. This is a nop so far, because only x86 needs it. 562 */ 563 #ifndef __tlb_remove_pud_tlb_entry 564 #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) 565 #endif 566 567 #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ 568 do { \ 569 __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ 570 tlb->cleared_puds = 1; \ 571 __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ 572 } while (0) 573 574 /* 575 * For things like page tables caches (ie caching addresses "inside" the 576 * page tables, like x86 does), for legacy reasons, flushing an 577 * individual page had better flush the page table caches behind it. This 578 * is definitely how x86 works, for example. And if you have an 579 * architected non-legacy page table cache (which I'm not aware of 580 * anybody actually doing), you're going to have some architecturally 581 * explicit flushing for that, likely *separate* from a regular TLB entry 582 * flush, and thus you'd need more than just some range expansion.. 583 * 584 * So if we ever find an architecture 585 * that would want something that odd, I think it is up to that 586 * architecture to do its own odd thing, not cause pain for others 587 * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com 588 * 589 * For now w.r.t page table cache, mark the range_size as PAGE_SIZE 590 */ 591 592 #ifndef pte_free_tlb 593 #define pte_free_tlb(tlb, ptep, address) \ 594 do { \ 595 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 596 tlb->freed_tables = 1; \ 597 tlb->cleared_pmds = 1; \ 598 __pte_free_tlb(tlb, ptep, address); \ 599 } while (0) 600 #endif 601 602 #ifndef pmd_free_tlb 603 #define pmd_free_tlb(tlb, pmdp, address) \ 604 do { \ 605 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 606 tlb->freed_tables = 1; \ 607 tlb->cleared_puds = 1; \ 608 __pmd_free_tlb(tlb, pmdp, address); \ 609 } while (0) 610 #endif 611 612 #ifndef pud_free_tlb 613 #define pud_free_tlb(tlb, pudp, address) \ 614 do { \ 615 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 616 tlb->freed_tables = 1; \ 617 tlb->cleared_p4ds = 1; \ 618 __pud_free_tlb(tlb, pudp, address); \ 619 } while (0) 620 #endif 621 622 #ifndef p4d_free_tlb 623 #define p4d_free_tlb(tlb, pudp, address) \ 624 do { \ 625 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 626 tlb->freed_tables = 1; \ 627 __p4d_free_tlb(tlb, pudp, address); \ 628 } while (0) 629 #endif 630 631 #endif /* CONFIG_MMU */ 632 633 #endif /* _ASM_GENERIC__TLB_H */ 634