1 /* include/asm-generic/tlb.h 2 * 3 * Generic TLB shootdown code 4 * 5 * Copyright 2001 Red Hat, Inc. 6 * Based on code from mm/memory.c Copyright Linus Torvalds and others. 7 * 8 * Copyright 2011 Red Hat, Inc., Peter Zijlstra 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 #ifndef _ASM_GENERIC__TLB_H 16 #define _ASM_GENERIC__TLB_H 17 18 #include <linux/mmu_notifier.h> 19 #include <linux/swap.h> 20 #include <asm/pgalloc.h> 21 #include <asm/tlbflush.h> 22 #include <asm/cacheflush.h> 23 24 /* 25 * Blindly accessing user memory from NMI context can be dangerous 26 * if we're in the middle of switching the current user task or switching 27 * the loaded mm. 28 */ 29 #ifndef nmi_uaccess_okay 30 # define nmi_uaccess_okay() true 31 #endif 32 33 #ifdef CONFIG_MMU 34 35 /* 36 * Generic MMU-gather implementation. 37 * 38 * The mmu_gather data structure is used by the mm code to implement the 39 * correct and efficient ordering of freeing pages and TLB invalidations. 40 * 41 * This correct ordering is: 42 * 43 * 1) unhook page 44 * 2) TLB invalidate page 45 * 3) free page 46 * 47 * That is, we must never free a page before we have ensured there are no live 48 * translations left to it. Otherwise it might be possible to observe (or 49 * worse, change) the page content after it has been reused. 50 * 51 * The mmu_gather API consists of: 52 * 53 * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather 54 * 55 * Finish in particular will issue a (final) TLB invalidate and free 56 * all (remaining) queued pages. 57 * 58 * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA 59 * 60 * Defaults to flushing at tlb_end_vma() to reset the range; helps when 61 * there's large holes between the VMAs. 62 * 63 * - tlb_remove_page() / __tlb_remove_page() 64 * - tlb_remove_page_size() / __tlb_remove_page_size() 65 * 66 * __tlb_remove_page_size() is the basic primitive that queues a page for 67 * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a 68 * boolean indicating if the queue is (now) full and a call to 69 * tlb_flush_mmu() is required. 70 * 71 * tlb_remove_page() and tlb_remove_page_size() imply the call to 72 * tlb_flush_mmu() when required and has no return value. 73 * 74 * - tlb_change_page_size() 75 * 76 * call before __tlb_remove_page*() to set the current page-size; implies a 77 * possible tlb_flush_mmu() call. 78 * 79 * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() 80 * 81 * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets 82 * related state, like the range) 83 * 84 * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees 85 * whatever pages are still batched. 86 * 87 * - mmu_gather::fullmm 88 * 89 * A flag set by tlb_gather_mmu() to indicate we're going to free 90 * the entire mm; this allows a number of optimizations. 91 * 92 * - We can ignore tlb_{start,end}_vma(); because we don't 93 * care about ranges. Everything will be shot down. 94 * 95 * - (RISC) architectures that use ASIDs can cycle to a new ASID 96 * and delay the invalidation until ASID space runs out. 97 * 98 * - mmu_gather::need_flush_all 99 * 100 * A flag that can be set by the arch code if it wants to force 101 * flush the entire TLB irrespective of the range. For instance 102 * x86-PAE needs this when changing top-level entries. 103 * 104 * And allows the architecture to provide and implement tlb_flush(): 105 * 106 * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make 107 * use of: 108 * 109 * - mmu_gather::start / mmu_gather::end 110 * 111 * which provides the range that needs to be flushed to cover the pages to 112 * be freed. 113 * 114 * - mmu_gather::freed_tables 115 * 116 * set when we freed page table pages 117 * 118 * - tlb_get_unmap_shift() / tlb_get_unmap_size() 119 * 120 * returns the smallest TLB entry size unmapped in this range. 121 * 122 * If an architecture does not provide tlb_flush() a default implementation 123 * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is 124 * specified, in which case we'll default to flush_tlb_mm(). 125 * 126 * Additionally there are a few opt-in features: 127 * 128 * HAVE_MMU_GATHER_PAGE_SIZE 129 * 130 * This ensures we call tlb_flush() every time tlb_change_page_size() actually 131 * changes the size and provides mmu_gather::page_size to tlb_flush(). 132 * 133 * HAVE_RCU_TABLE_FREE 134 * 135 * This provides tlb_remove_table(), to be used instead of tlb_remove_page() 136 * for page directores (__p*_free_tlb()). This provides separate freeing of 137 * the page-table pages themselves in a semi-RCU fashion (see comment below). 138 * Useful if your architecture doesn't use IPIs for remote TLB invalidates 139 * and therefore doesn't naturally serialize with software page-table walkers. 140 * 141 * When used, an architecture is expected to provide __tlb_remove_table() 142 * which does the actual freeing of these pages. 143 * 144 * HAVE_RCU_TABLE_NO_INVALIDATE 145 * 146 * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before 147 * freeing the page-table pages. This can be avoided if you use 148 * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux 149 * page-tables natively. 150 * 151 * MMU_GATHER_NO_RANGE 152 * 153 * Use this if your architecture lacks an efficient flush_tlb_range(). 154 */ 155 156 #ifdef CONFIG_HAVE_RCU_TABLE_FREE 157 /* 158 * Semi RCU freeing of the page directories. 159 * 160 * This is needed by some architectures to implement software pagetable walkers. 161 * 162 * gup_fast() and other software pagetable walkers do a lockless page-table 163 * walk and therefore needs some synchronization with the freeing of the page 164 * directories. The chosen means to accomplish that is by disabling IRQs over 165 * the walk. 166 * 167 * Architectures that use IPIs to flush TLBs will then automagically DTRT, 168 * since we unlink the page, flush TLBs, free the page. Since the disabling of 169 * IRQs delays the completion of the TLB flush we can never observe an already 170 * freed page. 171 * 172 * Architectures that do not have this (PPC) need to delay the freeing by some 173 * other means, this is that means. 174 * 175 * What we do is batch the freed directory pages (tables) and RCU free them. 176 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling 177 * holds off grace periods. 178 * 179 * However, in order to batch these pages we need to allocate storage, this 180 * allocation is deep inside the MM code and can thus easily fail on memory 181 * pressure. To guarantee progress we fall back to single table freeing, see 182 * the implementation of tlb_remove_table_one(). 183 * 184 */ 185 struct mmu_table_batch { 186 struct rcu_head rcu; 187 unsigned int nr; 188 void *tables[0]; 189 }; 190 191 #define MAX_TABLE_BATCH \ 192 ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) 193 194 extern void tlb_remove_table(struct mmu_gather *tlb, void *table); 195 196 #endif 197 198 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER 199 /* 200 * If we can't allocate a page to make a big batch of page pointers 201 * to work on, then just handle a few from the on-stack structure. 202 */ 203 #define MMU_GATHER_BUNDLE 8 204 205 struct mmu_gather_batch { 206 struct mmu_gather_batch *next; 207 unsigned int nr; 208 unsigned int max; 209 struct page *pages[0]; 210 }; 211 212 #define MAX_GATHER_BATCH \ 213 ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) 214 215 /* 216 * Limit the maximum number of mmu_gather batches to reduce a risk of soft 217 * lockups for non-preemptible kernels on huge machines when a lot of memory 218 * is zapped during unmapping. 219 * 10K pages freed at once should be safe even without a preemption point. 220 */ 221 #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) 222 223 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, 224 int page_size); 225 #endif 226 227 /* 228 * struct mmu_gather is an opaque type used by the mm code for passing around 229 * any data needed by arch specific code for tlb_remove_page. 230 */ 231 struct mmu_gather { 232 struct mm_struct *mm; 233 234 #ifdef CONFIG_HAVE_RCU_TABLE_FREE 235 struct mmu_table_batch *batch; 236 #endif 237 238 unsigned long start; 239 unsigned long end; 240 /* 241 * we are in the middle of an operation to clear 242 * a full mm and can make some optimizations 243 */ 244 unsigned int fullmm : 1; 245 246 /* 247 * we have performed an operation which 248 * requires a complete flush of the tlb 249 */ 250 unsigned int need_flush_all : 1; 251 252 /* 253 * we have removed page directories 254 */ 255 unsigned int freed_tables : 1; 256 257 /* 258 * at which levels have we cleared entries? 259 */ 260 unsigned int cleared_ptes : 1; 261 unsigned int cleared_pmds : 1; 262 unsigned int cleared_puds : 1; 263 unsigned int cleared_p4ds : 1; 264 265 /* 266 * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma 267 */ 268 unsigned int vma_exec : 1; 269 unsigned int vma_huge : 1; 270 271 unsigned int batch_count; 272 273 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER 274 struct mmu_gather_batch *active; 275 struct mmu_gather_batch local; 276 struct page *__pages[MMU_GATHER_BUNDLE]; 277 278 #ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE 279 unsigned int page_size; 280 #endif 281 #endif 282 }; 283 284 void arch_tlb_gather_mmu(struct mmu_gather *tlb, 285 struct mm_struct *mm, unsigned long start, unsigned long end); 286 void tlb_flush_mmu(struct mmu_gather *tlb); 287 void arch_tlb_finish_mmu(struct mmu_gather *tlb, 288 unsigned long start, unsigned long end, bool force); 289 290 static inline void __tlb_adjust_range(struct mmu_gather *tlb, 291 unsigned long address, 292 unsigned int range_size) 293 { 294 tlb->start = min(tlb->start, address); 295 tlb->end = max(tlb->end, address + range_size); 296 } 297 298 static inline void __tlb_reset_range(struct mmu_gather *tlb) 299 { 300 if (tlb->fullmm) { 301 tlb->start = tlb->end = ~0; 302 } else { 303 tlb->start = TASK_SIZE; 304 tlb->end = 0; 305 } 306 tlb->freed_tables = 0; 307 tlb->cleared_ptes = 0; 308 tlb->cleared_pmds = 0; 309 tlb->cleared_puds = 0; 310 tlb->cleared_p4ds = 0; 311 /* 312 * Do not reset mmu_gather::vma_* fields here, we do not 313 * call into tlb_start_vma() again to set them if there is an 314 * intermediate flush. 315 */ 316 } 317 318 #ifdef CONFIG_MMU_GATHER_NO_RANGE 319 320 #if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) 321 #error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() 322 #endif 323 324 /* 325 * When an architecture does not have efficient means of range flushing TLBs 326 * there is no point in doing intermediate flushes on tlb_end_vma() to keep the 327 * range small. We equally don't have to worry about page granularity or other 328 * things. 329 * 330 * All we need to do is issue a full flush for any !0 range. 331 */ 332 static inline void tlb_flush(struct mmu_gather *tlb) 333 { 334 if (tlb->end) 335 flush_tlb_mm(tlb->mm); 336 } 337 338 static inline void 339 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } 340 341 #define tlb_end_vma tlb_end_vma 342 static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } 343 344 #else /* CONFIG_MMU_GATHER_NO_RANGE */ 345 346 #ifndef tlb_flush 347 348 #if defined(tlb_start_vma) || defined(tlb_end_vma) 349 #error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() 350 #endif 351 352 /* 353 * When an architecture does not provide its own tlb_flush() implementation 354 * but does have a reasonably efficient flush_vma_range() implementation 355 * use that. 356 */ 357 static inline void tlb_flush(struct mmu_gather *tlb) 358 { 359 if (tlb->fullmm || tlb->need_flush_all) { 360 flush_tlb_mm(tlb->mm); 361 } else if (tlb->end) { 362 struct vm_area_struct vma = { 363 .vm_mm = tlb->mm, 364 .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | 365 (tlb->vma_huge ? VM_HUGETLB : 0), 366 }; 367 368 flush_tlb_range(&vma, tlb->start, tlb->end); 369 } 370 } 371 372 static inline void 373 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) 374 { 375 /* 376 * flush_tlb_range() implementations that look at VM_HUGETLB (tile, 377 * mips-4k) flush only large pages. 378 * 379 * flush_tlb_range() implementations that flush I-TLB also flush D-TLB 380 * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing 381 * range. 382 * 383 * We rely on tlb_end_vma() to issue a flush, such that when we reset 384 * these values the batch is empty. 385 */ 386 tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB); 387 tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); 388 } 389 390 #else 391 392 static inline void 393 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } 394 395 #endif 396 397 #endif /* CONFIG_MMU_GATHER_NO_RANGE */ 398 399 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 400 { 401 if (!tlb->end) 402 return; 403 404 tlb_flush(tlb); 405 mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); 406 __tlb_reset_range(tlb); 407 } 408 409 static inline void tlb_remove_page_size(struct mmu_gather *tlb, 410 struct page *page, int page_size) 411 { 412 if (__tlb_remove_page_size(tlb, page, page_size)) 413 tlb_flush_mmu(tlb); 414 } 415 416 static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) 417 { 418 return __tlb_remove_page_size(tlb, page, PAGE_SIZE); 419 } 420 421 /* tlb_remove_page 422 * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when 423 * required. 424 */ 425 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) 426 { 427 return tlb_remove_page_size(tlb, page, PAGE_SIZE); 428 } 429 430 static inline void tlb_change_page_size(struct mmu_gather *tlb, 431 unsigned int page_size) 432 { 433 #ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE 434 if (tlb->page_size && tlb->page_size != page_size) { 435 if (!tlb->fullmm) 436 tlb_flush_mmu(tlb); 437 } 438 439 tlb->page_size = page_size; 440 #endif 441 } 442 443 static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) 444 { 445 if (tlb->cleared_ptes) 446 return PAGE_SHIFT; 447 if (tlb->cleared_pmds) 448 return PMD_SHIFT; 449 if (tlb->cleared_puds) 450 return PUD_SHIFT; 451 if (tlb->cleared_p4ds) 452 return P4D_SHIFT; 453 454 return PAGE_SHIFT; 455 } 456 457 static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) 458 { 459 return 1UL << tlb_get_unmap_shift(tlb); 460 } 461 462 /* 463 * In the case of tlb vma handling, we can optimise these away in the 464 * case where we're doing a full MM flush. When we're doing a munmap, 465 * the vmas are adjusted to only cover the region to be torn down. 466 */ 467 #ifndef tlb_start_vma 468 static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 469 { 470 if (tlb->fullmm) 471 return; 472 473 tlb_update_vma_flags(tlb, vma); 474 flush_cache_range(vma, vma->vm_start, vma->vm_end); 475 } 476 #endif 477 478 #ifndef tlb_end_vma 479 static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 480 { 481 if (tlb->fullmm) 482 return; 483 484 /* 485 * Do a TLB flush and reset the range at VMA boundaries; this avoids 486 * the ranges growing with the unused space between consecutive VMAs, 487 * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on 488 * this. 489 */ 490 tlb_flush_mmu_tlbonly(tlb); 491 } 492 #endif 493 494 #ifndef __tlb_remove_tlb_entry 495 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) 496 #endif 497 498 /** 499 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. 500 * 501 * Record the fact that pte's were really unmapped by updating the range, 502 * so we can later optimise away the tlb invalidate. This helps when 503 * userspace is unmapping already-unmapped pages, which happens quite a lot. 504 */ 505 #define tlb_remove_tlb_entry(tlb, ptep, address) \ 506 do { \ 507 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 508 tlb->cleared_ptes = 1; \ 509 __tlb_remove_tlb_entry(tlb, ptep, address); \ 510 } while (0) 511 512 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ 513 do { \ 514 unsigned long _sz = huge_page_size(h); \ 515 __tlb_adjust_range(tlb, address, _sz); \ 516 if (_sz == PMD_SIZE) \ 517 tlb->cleared_pmds = 1; \ 518 else if (_sz == PUD_SIZE) \ 519 tlb->cleared_puds = 1; \ 520 __tlb_remove_tlb_entry(tlb, ptep, address); \ 521 } while (0) 522 523 /** 524 * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation 525 * This is a nop so far, because only x86 needs it. 526 */ 527 #ifndef __tlb_remove_pmd_tlb_entry 528 #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) 529 #endif 530 531 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ 532 do { \ 533 __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ 534 tlb->cleared_pmds = 1; \ 535 __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ 536 } while (0) 537 538 /** 539 * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb 540 * invalidation. This is a nop so far, because only x86 needs it. 541 */ 542 #ifndef __tlb_remove_pud_tlb_entry 543 #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) 544 #endif 545 546 #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ 547 do { \ 548 __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ 549 tlb->cleared_puds = 1; \ 550 __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ 551 } while (0) 552 553 /* 554 * For things like page tables caches (ie caching addresses "inside" the 555 * page tables, like x86 does), for legacy reasons, flushing an 556 * individual page had better flush the page table caches behind it. This 557 * is definitely how x86 works, for example. And if you have an 558 * architected non-legacy page table cache (which I'm not aware of 559 * anybody actually doing), you're going to have some architecturally 560 * explicit flushing for that, likely *separate* from a regular TLB entry 561 * flush, and thus you'd need more than just some range expansion.. 562 * 563 * So if we ever find an architecture 564 * that would want something that odd, I think it is up to that 565 * architecture to do its own odd thing, not cause pain for others 566 * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com 567 * 568 * For now w.r.t page table cache, mark the range_size as PAGE_SIZE 569 */ 570 571 #ifndef pte_free_tlb 572 #define pte_free_tlb(tlb, ptep, address) \ 573 do { \ 574 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 575 tlb->freed_tables = 1; \ 576 tlb->cleared_pmds = 1; \ 577 __pte_free_tlb(tlb, ptep, address); \ 578 } while (0) 579 #endif 580 581 #ifndef pmd_free_tlb 582 #define pmd_free_tlb(tlb, pmdp, address) \ 583 do { \ 584 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 585 tlb->freed_tables = 1; \ 586 tlb->cleared_puds = 1; \ 587 __pmd_free_tlb(tlb, pmdp, address); \ 588 } while (0) 589 #endif 590 591 #ifndef __ARCH_HAS_4LEVEL_HACK 592 #ifndef pud_free_tlb 593 #define pud_free_tlb(tlb, pudp, address) \ 594 do { \ 595 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 596 tlb->freed_tables = 1; \ 597 tlb->cleared_p4ds = 1; \ 598 __pud_free_tlb(tlb, pudp, address); \ 599 } while (0) 600 #endif 601 #endif 602 603 #ifndef __ARCH_HAS_5LEVEL_HACK 604 #ifndef p4d_free_tlb 605 #define p4d_free_tlb(tlb, pudp, address) \ 606 do { \ 607 __tlb_adjust_range(tlb, address, PAGE_SIZE); \ 608 tlb->freed_tables = 1; \ 609 __p4d_free_tlb(tlb, pudp, address); \ 610 } while (0) 611 #endif 612 #endif 613 614 #endif /* CONFIG_MMU */ 615 616 #endif /* _ASM_GENERIC__TLB_H */ 617