155f4949fSIngo Molnar #include <linux/init.h> 255f4949fSIngo Molnar 355f4949fSIngo Molnar #include <linux/mm.h> 455f4949fSIngo Molnar #include <linux/spinlock.h> 555f4949fSIngo Molnar #include <linux/smp.h> 655f4949fSIngo Molnar #include <linux/interrupt.h> 74b599fedSPaul Gortmaker #include <linux/export.h> 893296720SShaohua Li #include <linux/cpu.h> 918bf3c3eSTim Chen #include <linux/debugfs.h> 1055f4949fSIngo Molnar 1155f4949fSIngo Molnar #include <asm/tlbflush.h> 1255f4949fSIngo Molnar #include <asm/mmu_context.h> 1318bf3c3eSTim Chen #include <asm/nospec-branch.h> 14350f8f56SJan Beulich #include <asm/cache.h> 1555f4949fSIngo Molnar #include <asm/apic.h> 1655f4949fSIngo Molnar #include <asm/uv/uv.h> 1755f4949fSIngo Molnar 1855f4949fSIngo Molnar /* 19ce4a4e56SAndy Lutomirski * TLB flushing, formerly SMP-only 2055f4949fSIngo Molnar * c/o Linus Torvalds. 2155f4949fSIngo Molnar * 2255f4949fSIngo Molnar * These mean you can really definitely utterly forget about 2355f4949fSIngo Molnar * writing to user space from interrupts. (Its not allowed anyway). 2455f4949fSIngo Molnar * 2555f4949fSIngo Molnar * Optimizations Manfred Spraul <manfred@colorfullife.com> 2655f4949fSIngo Molnar * 2755f4949fSIngo Molnar * More scalable flush, from Andi Kleen 2855f4949fSIngo Molnar * 2952aec330SAlex Shi * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 3055f4949fSIngo Molnar */ 3155f4949fSIngo Molnar 322ea907c4SDave Hansen /* 332ea907c4SDave Hansen * We get here when we do something requiring a TLB invalidation 342ea907c4SDave Hansen * but could not go invalidate all of the contexts. We do the 352ea907c4SDave Hansen * necessary invalidation by clearing out the 'ctx_id' which 362ea907c4SDave Hansen * forces a TLB flush when the context is loaded. 372ea907c4SDave Hansen */ 38387048f5Szhong jiang static void clear_asid_other(void) 392ea907c4SDave Hansen { 402ea907c4SDave Hansen u16 asid; 412ea907c4SDave Hansen 422ea907c4SDave Hansen /* 432ea907c4SDave Hansen * This is only expected to be set if we have disabled 442ea907c4SDave Hansen * kernel _PAGE_GLOBAL pages. 452ea907c4SDave Hansen */ 462ea907c4SDave Hansen if (!static_cpu_has(X86_FEATURE_PTI)) { 472ea907c4SDave Hansen WARN_ON_ONCE(1); 482ea907c4SDave Hansen return; 492ea907c4SDave Hansen } 502ea907c4SDave Hansen 512ea907c4SDave Hansen for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { 522ea907c4SDave Hansen /* Do not need to flush the current asid */ 532ea907c4SDave Hansen if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid)) 542ea907c4SDave Hansen continue; 552ea907c4SDave Hansen /* 562ea907c4SDave Hansen * Make sure the next time we go to switch to 572ea907c4SDave Hansen * this asid, we do a flush: 582ea907c4SDave Hansen */ 592ea907c4SDave Hansen this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0); 602ea907c4SDave Hansen } 612ea907c4SDave Hansen this_cpu_write(cpu_tlbstate.invalidate_other, false); 622ea907c4SDave Hansen } 632ea907c4SDave Hansen 64f39681edSAndy Lutomirski atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); 65f39681edSAndy Lutomirski 66b956575bSAndy Lutomirski 6710af6235SAndy Lutomirski static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, 6810af6235SAndy Lutomirski u16 *new_asid, bool *need_flush) 6910af6235SAndy Lutomirski { 7010af6235SAndy Lutomirski u16 asid; 7110af6235SAndy Lutomirski 7210af6235SAndy Lutomirski if (!static_cpu_has(X86_FEATURE_PCID)) { 7310af6235SAndy Lutomirski *new_asid = 0; 7410af6235SAndy Lutomirski *need_flush = true; 7510af6235SAndy Lutomirski return; 7610af6235SAndy Lutomirski } 7710af6235SAndy Lutomirski 782ea907c4SDave Hansen if (this_cpu_read(cpu_tlbstate.invalidate_other)) 792ea907c4SDave Hansen clear_asid_other(); 802ea907c4SDave Hansen 8110af6235SAndy Lutomirski for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { 8210af6235SAndy Lutomirski if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != 8310af6235SAndy Lutomirski next->context.ctx_id) 8410af6235SAndy Lutomirski continue; 8510af6235SAndy Lutomirski 8610af6235SAndy Lutomirski *new_asid = asid; 8710af6235SAndy Lutomirski *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < 8810af6235SAndy Lutomirski next_tlb_gen); 8910af6235SAndy Lutomirski return; 9010af6235SAndy Lutomirski } 9110af6235SAndy Lutomirski 9210af6235SAndy Lutomirski /* 9310af6235SAndy Lutomirski * We don't currently own an ASID slot on this CPU. 9410af6235SAndy Lutomirski * Allocate a slot. 9510af6235SAndy Lutomirski */ 9610af6235SAndy Lutomirski *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; 9710af6235SAndy Lutomirski if (*new_asid >= TLB_NR_DYN_ASIDS) { 9810af6235SAndy Lutomirski *new_asid = 0; 9910af6235SAndy Lutomirski this_cpu_write(cpu_tlbstate.next_asid, 1); 10010af6235SAndy Lutomirski } 10110af6235SAndy Lutomirski *need_flush = true; 10210af6235SAndy Lutomirski } 10310af6235SAndy Lutomirski 10448e11198SDave Hansen static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) 10548e11198SDave Hansen { 10648e11198SDave Hansen unsigned long new_mm_cr3; 10748e11198SDave Hansen 10848e11198SDave Hansen if (need_flush) { 1096fd166aaSPeter Zijlstra invalidate_user_asid(new_asid); 11048e11198SDave Hansen new_mm_cr3 = build_cr3(pgdir, new_asid); 11148e11198SDave Hansen } else { 11248e11198SDave Hansen new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); 11348e11198SDave Hansen } 11448e11198SDave Hansen 11548e11198SDave Hansen /* 11648e11198SDave Hansen * Caution: many callers of this function expect 11748e11198SDave Hansen * that load_cr3() is serializing and orders TLB 11848e11198SDave Hansen * fills with respect to the mm_cpumask writes. 11948e11198SDave Hansen */ 12048e11198SDave Hansen write_cr3(new_mm_cr3); 12148e11198SDave Hansen } 12248e11198SDave Hansen 12355f4949fSIngo Molnar void leave_mm(int cpu) 12455f4949fSIngo Molnar { 1253d28ebceSAndy Lutomirski struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 1263d28ebceSAndy Lutomirski 1273d28ebceSAndy Lutomirski /* 1283d28ebceSAndy Lutomirski * It's plausible that we're in lazy TLB mode while our mm is init_mm. 1293d28ebceSAndy Lutomirski * If so, our callers still expect us to flush the TLB, but there 1303d28ebceSAndy Lutomirski * aren't any user TLB entries in init_mm to worry about. 1313d28ebceSAndy Lutomirski * 1323d28ebceSAndy Lutomirski * This needs to happen before any other sanity checks due to 1333d28ebceSAndy Lutomirski * intel_idle's shenanigans. 1343d28ebceSAndy Lutomirski */ 1353d28ebceSAndy Lutomirski if (loaded_mm == &init_mm) 1363d28ebceSAndy Lutomirski return; 1373d28ebceSAndy Lutomirski 13894b1b03bSAndy Lutomirski /* Warn if we're not lazy. */ 139b956575bSAndy Lutomirski WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy)); 1403d28ebceSAndy Lutomirski 1413d28ebceSAndy Lutomirski switch_mm(NULL, &init_mm, NULL); 142a6fca40fSSuresh Siddha } 14367535736SAndy Lutomirski EXPORT_SYMBOL_GPL(leave_mm); 14455f4949fSIngo Molnar 14569c0319aSAndy Lutomirski void switch_mm(struct mm_struct *prev, struct mm_struct *next, 14669c0319aSAndy Lutomirski struct task_struct *tsk) 14769c0319aSAndy Lutomirski { 148078194f8SAndy Lutomirski unsigned long flags; 149078194f8SAndy Lutomirski 150078194f8SAndy Lutomirski local_irq_save(flags); 151078194f8SAndy Lutomirski switch_mm_irqs_off(prev, next, tsk); 152078194f8SAndy Lutomirski local_irq_restore(flags); 153078194f8SAndy Lutomirski } 154078194f8SAndy Lutomirski 1555beda7d5SAndy Lutomirski static void sync_current_stack_to_mm(struct mm_struct *mm) 1565beda7d5SAndy Lutomirski { 1575beda7d5SAndy Lutomirski unsigned long sp = current_stack_pointer; 1585beda7d5SAndy Lutomirski pgd_t *pgd = pgd_offset(mm, sp); 1595beda7d5SAndy Lutomirski 160ed7588d5SKirill A. Shutemov if (pgtable_l5_enabled()) { 1615beda7d5SAndy Lutomirski if (unlikely(pgd_none(*pgd))) { 1625beda7d5SAndy Lutomirski pgd_t *pgd_ref = pgd_offset_k(sp); 1635beda7d5SAndy Lutomirski 1645beda7d5SAndy Lutomirski set_pgd(pgd, *pgd_ref); 1655beda7d5SAndy Lutomirski } 1665beda7d5SAndy Lutomirski } else { 1675beda7d5SAndy Lutomirski /* 1685beda7d5SAndy Lutomirski * "pgd" is faked. The top level entries are "p4d"s, so sync 1695beda7d5SAndy Lutomirski * the p4d. This compiles to approximately the same code as 1705beda7d5SAndy Lutomirski * the 5-level case. 1715beda7d5SAndy Lutomirski */ 1725beda7d5SAndy Lutomirski p4d_t *p4d = p4d_offset(pgd, sp); 1735beda7d5SAndy Lutomirski 1745beda7d5SAndy Lutomirski if (unlikely(p4d_none(*p4d))) { 1755beda7d5SAndy Lutomirski pgd_t *pgd_ref = pgd_offset_k(sp); 1765beda7d5SAndy Lutomirski p4d_t *p4d_ref = p4d_offset(pgd_ref, sp); 1775beda7d5SAndy Lutomirski 1785beda7d5SAndy Lutomirski set_p4d(p4d, *p4d_ref); 1795beda7d5SAndy Lutomirski } 1805beda7d5SAndy Lutomirski } 1815beda7d5SAndy Lutomirski } 1825beda7d5SAndy Lutomirski 183078194f8SAndy Lutomirski void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 184078194f8SAndy Lutomirski struct task_struct *tsk) 185078194f8SAndy Lutomirski { 1863d28ebceSAndy Lutomirski struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); 18710af6235SAndy Lutomirski u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); 18894b1b03bSAndy Lutomirski unsigned cpu = smp_processor_id(); 18994b1b03bSAndy Lutomirski u64 next_tlb_gen; 190*12c4d978SRik van Riel bool need_flush; 191*12c4d978SRik van Riel u16 new_asid; 19269c0319aSAndy Lutomirski 1933d28ebceSAndy Lutomirski /* 19494b1b03bSAndy Lutomirski * NB: The scheduler will call us with prev == next when switching 19594b1b03bSAndy Lutomirski * from lazy TLB mode to normal mode if active_mm isn't changing. 19694b1b03bSAndy Lutomirski * When this happens, we don't assume that CR3 (and hence 19794b1b03bSAndy Lutomirski * cpu_tlbstate.loaded_mm) matches next. 1983d28ebceSAndy Lutomirski * 1993d28ebceSAndy Lutomirski * NB: leave_mm() calls us with prev == NULL and tsk == NULL. 2003d28ebceSAndy Lutomirski */ 2013d28ebceSAndy Lutomirski 20294b1b03bSAndy Lutomirski /* We don't want flush_tlb_func_* to run concurrently with us. */ 20394b1b03bSAndy Lutomirski if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 20494b1b03bSAndy Lutomirski WARN_ON_ONCE(!irqs_disabled()); 20594b1b03bSAndy Lutomirski 20694b1b03bSAndy Lutomirski /* 20794b1b03bSAndy Lutomirski * Verify that CR3 is what we think it is. This will catch 20894b1b03bSAndy Lutomirski * hypothetical buggy code that directly switches to swapper_pg_dir 20910af6235SAndy Lutomirski * without going through leave_mm() / switch_mm_irqs_off() or that 21010af6235SAndy Lutomirski * does something like write_cr3(read_cr3_pa()). 211a376e7f9SAndy Lutomirski * 212a376e7f9SAndy Lutomirski * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3() 213a376e7f9SAndy Lutomirski * isn't free. 21494b1b03bSAndy Lutomirski */ 215a376e7f9SAndy Lutomirski #ifdef CONFIG_DEBUG_VM 21650fb83a6SDave Hansen if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { 217a376e7f9SAndy Lutomirski /* 218a376e7f9SAndy Lutomirski * If we were to BUG here, we'd be very likely to kill 219a376e7f9SAndy Lutomirski * the system so hard that we don't see the call trace. 220a376e7f9SAndy Lutomirski * Try to recover instead by ignoring the error and doing 221a376e7f9SAndy Lutomirski * a global flush to minimize the chance of corruption. 222a376e7f9SAndy Lutomirski * 223a376e7f9SAndy Lutomirski * (This is far from being a fully correct recovery. 224a376e7f9SAndy Lutomirski * Architecturally, the CPU could prefetch something 225a376e7f9SAndy Lutomirski * back into an incorrect ASID slot and leave it there 226a376e7f9SAndy Lutomirski * to cause trouble down the road. It's better than 227a376e7f9SAndy Lutomirski * nothing, though.) 228a376e7f9SAndy Lutomirski */ 229a376e7f9SAndy Lutomirski __flush_tlb_all(); 230a376e7f9SAndy Lutomirski } 231a376e7f9SAndy Lutomirski #endif 232b956575bSAndy Lutomirski this_cpu_write(cpu_tlbstate.is_lazy, false); 2333d28ebceSAndy Lutomirski 234306e0604SMathieu Desnoyers /* 23510bcc80eSMathieu Desnoyers * The membarrier system call requires a full memory barrier and 23610bcc80eSMathieu Desnoyers * core serialization before returning to user-space, after 23710bcc80eSMathieu Desnoyers * storing to rq->curr. Writing to CR3 provides that full 23810bcc80eSMathieu Desnoyers * memory barrier and core serializing instruction. 239306e0604SMathieu Desnoyers */ 2403d28ebceSAndy Lutomirski if (real_prev == next) { 241e8b9b0ccSAndy Lutomirski VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != 24294b1b03bSAndy Lutomirski next->context.ctx_id); 24394b1b03bSAndy Lutomirski 2443d28ebceSAndy Lutomirski /* 24552a288c7SPeter Zijlstra * We don't currently support having a real mm loaded without 24652a288c7SPeter Zijlstra * our cpu set in mm_cpumask(). We have all the bookkeeping 24752a288c7SPeter Zijlstra * in place to figure out whether we would need to flush 24852a288c7SPeter Zijlstra * if our cpu were cleared in mm_cpumask(), but we don't 24952a288c7SPeter Zijlstra * currently use it. 2503d28ebceSAndy Lutomirski */ 251b956575bSAndy Lutomirski if (WARN_ON_ONCE(real_prev != &init_mm && 252b956575bSAndy Lutomirski !cpumask_test_cpu(cpu, mm_cpumask(next)))) 25394b1b03bSAndy Lutomirski cpumask_set_cpu(cpu, mm_cpumask(next)); 25494b1b03bSAndy Lutomirski 255b956575bSAndy Lutomirski return; 25694b1b03bSAndy Lutomirski } else { 25718bf3c3eSTim Chen u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); 25818bf3c3eSTim Chen 25918bf3c3eSTim Chen /* 26018bf3c3eSTim Chen * Avoid user/user BTB poisoning by flushing the branch 26118bf3c3eSTim Chen * predictor when switching between processes. This stops 26218bf3c3eSTim Chen * one process from doing Spectre-v2 attacks on another. 26318bf3c3eSTim Chen * 26418bf3c3eSTim Chen * As an optimization, flush indirect branches only when 26518bf3c3eSTim Chen * switching into processes that disable dumping. This 26618bf3c3eSTim Chen * protects high value processes like gpg, without having 26718bf3c3eSTim Chen * too high performance overhead. IBPB is *expensive*! 26818bf3c3eSTim Chen * 26918bf3c3eSTim Chen * This will not flush branches when switching into kernel 27018bf3c3eSTim Chen * threads. It will also not flush if we switch to idle 27118bf3c3eSTim Chen * thread and back to the same process. It will flush if we 27218bf3c3eSTim Chen * switch to a different non-dumpable process. 27318bf3c3eSTim Chen */ 27418bf3c3eSTim Chen if (tsk && tsk->mm && 27518bf3c3eSTim Chen tsk->mm->context.ctx_id != last_ctx_id && 27618bf3c3eSTim Chen get_dumpable(tsk->mm) != SUID_DUMP_USER) 27718bf3c3eSTim Chen indirect_branch_prediction_barrier(); 27894b1b03bSAndy Lutomirski 279e37e43a4SAndy Lutomirski if (IS_ENABLED(CONFIG_VMAP_STACK)) { 280e37e43a4SAndy Lutomirski /* 281e37e43a4SAndy Lutomirski * If our current stack is in vmalloc space and isn't 282e37e43a4SAndy Lutomirski * mapped in the new pgd, we'll double-fault. Forcibly 283e37e43a4SAndy Lutomirski * map it. 284e37e43a4SAndy Lutomirski */ 2855beda7d5SAndy Lutomirski sync_current_stack_to_mm(next); 286e37e43a4SAndy Lutomirski } 287e37e43a4SAndy Lutomirski 288e9d8c615SRik van Riel /* 289e9d8c615SRik van Riel * Stop remote flushes for the previous mm. 290e9d8c615SRik van Riel * Skip kernel threads; we never send init_mm TLB flushing IPIs, 291e9d8c615SRik van Riel * but the bitmap manipulation can cause cache line contention. 292e9d8c615SRik van Riel */ 293e9d8c615SRik van Riel if (real_prev != &init_mm) { 294e9d8c615SRik van Riel VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, 295e9d8c615SRik van Riel mm_cpumask(real_prev))); 29694b1b03bSAndy Lutomirski cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); 297e9d8c615SRik van Riel } 298e37e43a4SAndy Lutomirski 29969c0319aSAndy Lutomirski /* 30094b1b03bSAndy Lutomirski * Start remote flushes and then read tlb_gen. 30169c0319aSAndy Lutomirski */ 302e9d8c615SRik van Riel if (next != &init_mm) 30394b1b03bSAndy Lutomirski cpumask_set_cpu(cpu, mm_cpumask(next)); 30494b1b03bSAndy Lutomirski next_tlb_gen = atomic64_read(&next->context.tlb_gen); 30594b1b03bSAndy Lutomirski 30610af6235SAndy Lutomirski choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); 30769c0319aSAndy Lutomirski 3084012e77aSAndy Lutomirski /* Let nmi_uaccess_okay() know that we're changing CR3. */ 3094012e77aSAndy Lutomirski this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); 3104012e77aSAndy Lutomirski barrier(); 311*12c4d978SRik van Riel } 3124012e77aSAndy Lutomirski 31310af6235SAndy Lutomirski if (need_flush) { 31410af6235SAndy Lutomirski this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 31510af6235SAndy Lutomirski this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 31648e11198SDave Hansen load_new_mm_cr3(next->pgd, new_asid, true); 31767535736SAndy Lutomirski 31867535736SAndy Lutomirski /* 31967535736SAndy Lutomirski * NB: This gets called via leave_mm() in the idle path 32067535736SAndy Lutomirski * where RCU functions differently. Tracing normally 32167535736SAndy Lutomirski * uses RCU, so we need to use the _rcuidle variant. 32267535736SAndy Lutomirski * 32367535736SAndy Lutomirski * (There is no good reason for this. The idle code should 32467535736SAndy Lutomirski * be rearranged to call this before rcu_idle_enter().) 32567535736SAndy Lutomirski */ 32667535736SAndy Lutomirski trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 32710af6235SAndy Lutomirski } else { 32810af6235SAndy Lutomirski /* The new ASID is already up to date. */ 32948e11198SDave Hansen load_new_mm_cr3(next->pgd, new_asid, false); 33067535736SAndy Lutomirski 33167535736SAndy Lutomirski /* See above wrt _rcuidle. */ 33267535736SAndy Lutomirski trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); 33310af6235SAndy Lutomirski } 33410af6235SAndy Lutomirski 33518bf3c3eSTim Chen /* 33618bf3c3eSTim Chen * Record last user mm's context id, so we can avoid 33718bf3c3eSTim Chen * flushing branch buffer with IBPB if we switch back 33818bf3c3eSTim Chen * to the same user. 33918bf3c3eSTim Chen */ 34018bf3c3eSTim Chen if (next != &init_mm) 34118bf3c3eSTim Chen this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); 34218bf3c3eSTim Chen 3434012e77aSAndy Lutomirski /* Make sure we write CR3 before loaded_mm. */ 3444012e77aSAndy Lutomirski barrier(); 3454012e77aSAndy Lutomirski 34610af6235SAndy Lutomirski this_cpu_write(cpu_tlbstate.loaded_mm, next); 34710af6235SAndy Lutomirski this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); 34869c0319aSAndy Lutomirski 34969c0319aSAndy Lutomirski load_mm_cr4(next); 35073534258SAndy Lutomirski switch_ldt(real_prev, next); 35169c0319aSAndy Lutomirski } 35269c0319aSAndy Lutomirski 353b0579adeSAndy Lutomirski /* 3544e57b946SAndy Lutomirski * Please ignore the name of this function. It should be called 3554e57b946SAndy Lutomirski * switch_to_kernel_thread(). 3564e57b946SAndy Lutomirski * 357b956575bSAndy Lutomirski * enter_lazy_tlb() is a hint from the scheduler that we are entering a 358b956575bSAndy Lutomirski * kernel thread or other context without an mm. Acceptable implementations 359b956575bSAndy Lutomirski * include doing nothing whatsoever, switching to init_mm, or various clever 360b956575bSAndy Lutomirski * lazy tricks to try to minimize TLB flushes. 361b956575bSAndy Lutomirski * 362b956575bSAndy Lutomirski * The scheduler reserves the right to call enter_lazy_tlb() several times 363b956575bSAndy Lutomirski * in a row. It will notify us that we're going back to a real mm by 364b956575bSAndy Lutomirski * calling switch_mm_irqs_off(). 365b956575bSAndy Lutomirski */ 366b956575bSAndy Lutomirski void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 367b956575bSAndy Lutomirski { 368b956575bSAndy Lutomirski if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) 369b956575bSAndy Lutomirski return; 370b956575bSAndy Lutomirski 371b956575bSAndy Lutomirski this_cpu_write(cpu_tlbstate.is_lazy, true); 372b956575bSAndy Lutomirski } 373b956575bSAndy Lutomirski 374b956575bSAndy Lutomirski /* 37572c0098dSAndy Lutomirski * Call this when reinitializing a CPU. It fixes the following potential 37672c0098dSAndy Lutomirski * problems: 37772c0098dSAndy Lutomirski * 37872c0098dSAndy Lutomirski * - The ASID changed from what cpu_tlbstate thinks it is (most likely 37972c0098dSAndy Lutomirski * because the CPU was taken down and came back up with CR3's PCID 38072c0098dSAndy Lutomirski * bits clear. CPU hotplug can do this. 38172c0098dSAndy Lutomirski * 38272c0098dSAndy Lutomirski * - The TLB contains junk in slots corresponding to inactive ASIDs. 38372c0098dSAndy Lutomirski * 38472c0098dSAndy Lutomirski * - The CPU went so far out to lunch that it may have missed a TLB 38572c0098dSAndy Lutomirski * flush. 38672c0098dSAndy Lutomirski */ 38772c0098dSAndy Lutomirski void initialize_tlbstate_and_flush(void) 38872c0098dSAndy Lutomirski { 38972c0098dSAndy Lutomirski int i; 39072c0098dSAndy Lutomirski struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm); 39172c0098dSAndy Lutomirski u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen); 39272c0098dSAndy Lutomirski unsigned long cr3 = __read_cr3(); 39372c0098dSAndy Lutomirski 39472c0098dSAndy Lutomirski /* Assert that CR3 already references the right mm. */ 39572c0098dSAndy Lutomirski WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); 39672c0098dSAndy Lutomirski 39772c0098dSAndy Lutomirski /* 39872c0098dSAndy Lutomirski * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization 39972c0098dSAndy Lutomirski * doesn't work like other CR4 bits because it can only be set from 40072c0098dSAndy Lutomirski * long mode.) 40172c0098dSAndy Lutomirski */ 4027898f796SAndy Lutomirski WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && 40372c0098dSAndy Lutomirski !(cr4_read_shadow() & X86_CR4_PCIDE)); 40472c0098dSAndy Lutomirski 40572c0098dSAndy Lutomirski /* Force ASID 0 and force a TLB flush. */ 40650fb83a6SDave Hansen write_cr3(build_cr3(mm->pgd, 0)); 40772c0098dSAndy Lutomirski 40872c0098dSAndy Lutomirski /* Reinitialize tlbstate. */ 40918bf3c3eSTim Chen this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); 41072c0098dSAndy Lutomirski this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 41172c0098dSAndy Lutomirski this_cpu_write(cpu_tlbstate.next_asid, 1); 41272c0098dSAndy Lutomirski this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); 41372c0098dSAndy Lutomirski this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); 41472c0098dSAndy Lutomirski 41572c0098dSAndy Lutomirski for (i = 1; i < TLB_NR_DYN_ASIDS; i++) 41672c0098dSAndy Lutomirski this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); 41772c0098dSAndy Lutomirski } 41872c0098dSAndy Lutomirski 41972c0098dSAndy Lutomirski /* 420b0579adeSAndy Lutomirski * flush_tlb_func_common()'s memory ordering requirement is that any 421b0579adeSAndy Lutomirski * TLB fills that happen after we flush the TLB are ordered after we 422b0579adeSAndy Lutomirski * read active_mm's tlb_gen. We don't need any explicit barriers 423b0579adeSAndy Lutomirski * because all x86 flush operations are serializing and the 424b0579adeSAndy Lutomirski * atomic64_read operation won't be reordered by the compiler. 425b0579adeSAndy Lutomirski */ 426454bbad9SAndy Lutomirski static void flush_tlb_func_common(const struct flush_tlb_info *f, 427454bbad9SAndy Lutomirski bool local, enum tlb_flush_reason reason) 42855f4949fSIngo Molnar { 429b0579adeSAndy Lutomirski /* 430b0579adeSAndy Lutomirski * We have three different tlb_gen values in here. They are: 431b0579adeSAndy Lutomirski * 432b0579adeSAndy Lutomirski * - mm_tlb_gen: the latest generation. 433b0579adeSAndy Lutomirski * - local_tlb_gen: the generation that this CPU has already caught 434b0579adeSAndy Lutomirski * up to. 435b0579adeSAndy Lutomirski * - f->new_tlb_gen: the generation that the requester of the flush 436b0579adeSAndy Lutomirski * wants us to catch up to. 437b0579adeSAndy Lutomirski */ 438b0579adeSAndy Lutomirski struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 43910af6235SAndy Lutomirski u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); 440b0579adeSAndy Lutomirski u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); 44110af6235SAndy Lutomirski u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); 442b0579adeSAndy Lutomirski 443bc0d5a89SAndy Lutomirski /* This code cannot presently handle being reentered. */ 444bc0d5a89SAndy Lutomirski VM_WARN_ON(!irqs_disabled()); 445bc0d5a89SAndy Lutomirski 446b956575bSAndy Lutomirski if (unlikely(loaded_mm == &init_mm)) 447b956575bSAndy Lutomirski return; 448b956575bSAndy Lutomirski 44910af6235SAndy Lutomirski VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != 450b0579adeSAndy Lutomirski loaded_mm->context.ctx_id); 451b0579adeSAndy Lutomirski 452b956575bSAndy Lutomirski if (this_cpu_read(cpu_tlbstate.is_lazy)) { 453b0579adeSAndy Lutomirski /* 454b956575bSAndy Lutomirski * We're in lazy mode. We need to at least flush our 455b956575bSAndy Lutomirski * paging-structure cache to avoid speculatively reading 456b956575bSAndy Lutomirski * garbage into our TLB. Since switching to init_mm is barely 457b956575bSAndy Lutomirski * slower than a minimal flush, just switch to init_mm. 458b0579adeSAndy Lutomirski */ 459b956575bSAndy Lutomirski switch_mm_irqs_off(NULL, &init_mm, NULL); 460b3b90e5aSAndy Lutomirski return; 461b3b90e5aSAndy Lutomirski } 462b3b90e5aSAndy Lutomirski 463b0579adeSAndy Lutomirski if (unlikely(local_tlb_gen == mm_tlb_gen)) { 464b0579adeSAndy Lutomirski /* 465b0579adeSAndy Lutomirski * There's nothing to do: we're already up to date. This can 466b0579adeSAndy Lutomirski * happen if two concurrent flushes happen -- the first flush to 467b0579adeSAndy Lutomirski * be handled can catch us all the way up, leaving no work for 468b0579adeSAndy Lutomirski * the second flush. 469b0579adeSAndy Lutomirski */ 47094b1b03bSAndy Lutomirski trace_tlb_flush(reason, 0); 471b0579adeSAndy Lutomirski return; 472b0579adeSAndy Lutomirski } 473b0579adeSAndy Lutomirski 474b0579adeSAndy Lutomirski WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen); 475b0579adeSAndy Lutomirski WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen); 476b0579adeSAndy Lutomirski 477b0579adeSAndy Lutomirski /* 478b0579adeSAndy Lutomirski * If we get to this point, we know that our TLB is out of date. 479b0579adeSAndy Lutomirski * This does not strictly imply that we need to flush (it's 480b0579adeSAndy Lutomirski * possible that f->new_tlb_gen <= local_tlb_gen), but we're 481b0579adeSAndy Lutomirski * going to need to flush in the very near future, so we might 482b0579adeSAndy Lutomirski * as well get it over with. 483b0579adeSAndy Lutomirski * 484b0579adeSAndy Lutomirski * The only question is whether to do a full or partial flush. 485b0579adeSAndy Lutomirski * 486b0579adeSAndy Lutomirski * We do a partial flush if requested and two extra conditions 487b0579adeSAndy Lutomirski * are met: 488b0579adeSAndy Lutomirski * 489b0579adeSAndy Lutomirski * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that 490b0579adeSAndy Lutomirski * we've always done all needed flushes to catch up to 491b0579adeSAndy Lutomirski * local_tlb_gen. If, for example, local_tlb_gen == 2 and 492b0579adeSAndy Lutomirski * f->new_tlb_gen == 3, then we know that the flush needed to bring 493b0579adeSAndy Lutomirski * us up to date for tlb_gen 3 is the partial flush we're 494b0579adeSAndy Lutomirski * processing. 495b0579adeSAndy Lutomirski * 496b0579adeSAndy Lutomirski * As an example of why this check is needed, suppose that there 497b0579adeSAndy Lutomirski * are two concurrent flushes. The first is a full flush that 498b0579adeSAndy Lutomirski * changes context.tlb_gen from 1 to 2. The second is a partial 499b0579adeSAndy Lutomirski * flush that changes context.tlb_gen from 2 to 3. If they get 500b0579adeSAndy Lutomirski * processed on this CPU in reverse order, we'll see 501b0579adeSAndy Lutomirski * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. 5021299ef1dSAndy Lutomirski * If we were to use __flush_tlb_one_user() and set local_tlb_gen to 503b0579adeSAndy Lutomirski * 3, we'd be break the invariant: we'd update local_tlb_gen above 504b0579adeSAndy Lutomirski * 1 without the full flush that's needed for tlb_gen 2. 505b0579adeSAndy Lutomirski * 506b0579adeSAndy Lutomirski * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation. 507b0579adeSAndy Lutomirski * Partial TLB flushes are not all that much cheaper than full TLB 508b0579adeSAndy Lutomirski * flushes, so it seems unlikely that it would be a performance win 509b0579adeSAndy Lutomirski * to do a partial flush if that won't bring our TLB fully up to 510b0579adeSAndy Lutomirski * date. By doing a full flush instead, we can increase 511b0579adeSAndy Lutomirski * local_tlb_gen all the way to mm_tlb_gen and we can probably 512b0579adeSAndy Lutomirski * avoid another flush in the very near future. 513b0579adeSAndy Lutomirski */ 514b0579adeSAndy Lutomirski if (f->end != TLB_FLUSH_ALL && 515b0579adeSAndy Lutomirski f->new_tlb_gen == local_tlb_gen + 1 && 516b0579adeSAndy Lutomirski f->new_tlb_gen == mm_tlb_gen) { 517b0579adeSAndy Lutomirski /* Partial flush */ 518a31acd3eSPeter Zijlstra unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift; 519a31acd3eSPeter Zijlstra unsigned long addr = f->start; 520b0579adeSAndy Lutomirski 521a2055abeSAndy Lutomirski while (addr < f->end) { 5221299ef1dSAndy Lutomirski __flush_tlb_one_user(addr); 523a31acd3eSPeter Zijlstra addr += 1UL << f->stride_shift; 524e7b52ffdSAlex Shi } 525454bbad9SAndy Lutomirski if (local) 526a31acd3eSPeter Zijlstra count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate); 527a31acd3eSPeter Zijlstra trace_tlb_flush(reason, nr_invalidate); 528b0579adeSAndy Lutomirski } else { 529b0579adeSAndy Lutomirski /* Full flush. */ 530b0579adeSAndy Lutomirski local_flush_tlb(); 531b0579adeSAndy Lutomirski if (local) 532b0579adeSAndy Lutomirski count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 533b0579adeSAndy Lutomirski trace_tlb_flush(reason, TLB_FLUSH_ALL); 534e7b52ffdSAlex Shi } 535b0579adeSAndy Lutomirski 536b0579adeSAndy Lutomirski /* Both paths above update our state to mm_tlb_gen. */ 53710af6235SAndy Lutomirski this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen); 53855f4949fSIngo Molnar } 53955f4949fSIngo Molnar 540454bbad9SAndy Lutomirski static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) 541454bbad9SAndy Lutomirski { 542454bbad9SAndy Lutomirski const struct flush_tlb_info *f = info; 543454bbad9SAndy Lutomirski 544454bbad9SAndy Lutomirski flush_tlb_func_common(f, true, reason); 545454bbad9SAndy Lutomirski } 546454bbad9SAndy Lutomirski 547454bbad9SAndy Lutomirski static void flush_tlb_func_remote(void *info) 548454bbad9SAndy Lutomirski { 549454bbad9SAndy Lutomirski const struct flush_tlb_info *f = info; 550454bbad9SAndy Lutomirski 551454bbad9SAndy Lutomirski inc_irq_stat(irq_tlb_count); 552454bbad9SAndy Lutomirski 5533d28ebceSAndy Lutomirski if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm)) 554454bbad9SAndy Lutomirski return; 555454bbad9SAndy Lutomirski 556454bbad9SAndy Lutomirski count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 557454bbad9SAndy Lutomirski flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN); 558454bbad9SAndy Lutomirski } 559454bbad9SAndy Lutomirski 56055f4949fSIngo Molnar void native_flush_tlb_others(const struct cpumask *cpumask, 561a2055abeSAndy Lutomirski const struct flush_tlb_info *info) 56255f4949fSIngo Molnar { 563ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 564a2055abeSAndy Lutomirski if (info->end == TLB_FLUSH_ALL) 56518c98243SNadav Amit trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); 56618c98243SNadav Amit else 56718c98243SNadav Amit trace_tlb_flush(TLB_REMOTE_SEND_IPI, 568a2055abeSAndy Lutomirski (info->end - info->start) >> PAGE_SHIFT); 56918c98243SNadav Amit 57055f4949fSIngo Molnar if (is_uv_system()) { 57194b1b03bSAndy Lutomirski /* 57294b1b03bSAndy Lutomirski * This whole special case is confused. UV has a "Broadcast 57394b1b03bSAndy Lutomirski * Assist Unit", which seems to be a fancy way to send IPIs. 57494b1b03bSAndy Lutomirski * Back when x86 used an explicit TLB flush IPI, UV was 57594b1b03bSAndy Lutomirski * optimized to use its own mechanism. These days, x86 uses 57694b1b03bSAndy Lutomirski * smp_call_function_many(), but UV still uses a manual IPI, 57794b1b03bSAndy Lutomirski * and that IPI's action is out of date -- it does a manual 57894b1b03bSAndy Lutomirski * flush instead of calling flush_tlb_func_remote(). This 57994b1b03bSAndy Lutomirski * means that the percpu tlb_gen variables won't be updated 58094b1b03bSAndy Lutomirski * and we'll do pointless flushes on future context switches. 58194b1b03bSAndy Lutomirski * 58294b1b03bSAndy Lutomirski * Rather than hooking native_flush_tlb_others() here, I think 58394b1b03bSAndy Lutomirski * that UV should be updated so that smp_call_function_many(), 58494b1b03bSAndy Lutomirski * etc, are optimal on UV. 58594b1b03bSAndy Lutomirski */ 58652a288c7SPeter Zijlstra unsigned int cpu; 58752a288c7SPeter Zijlstra 58825542c64SXiao Guangrong cpu = smp_processor_id(); 589a2055abeSAndy Lutomirski cpumask = uv_flush_tlb_others(cpumask, info); 59055f4949fSIngo Molnar if (cpumask) 591454bbad9SAndy Lutomirski smp_call_function_many(cpumask, flush_tlb_func_remote, 592a2055abeSAndy Lutomirski (void *)info, 1); 59355f4949fSIngo Molnar return; 59455f4949fSIngo Molnar } 595454bbad9SAndy Lutomirski smp_call_function_many(cpumask, flush_tlb_func_remote, 596a2055abeSAndy Lutomirski (void *)info, 1); 59755f4949fSIngo Molnar } 59855f4949fSIngo Molnar 599a5102476SDave Hansen /* 600a5102476SDave Hansen * See Documentation/x86/tlb.txt for details. We choose 33 601a5102476SDave Hansen * because it is large enough to cover the vast majority (at 602a5102476SDave Hansen * least 95%) of allocations, and is small enough that we are 603a5102476SDave Hansen * confident it will not cause too much overhead. Each single 604a5102476SDave Hansen * flush is about 100 ns, so this caps the maximum overhead at 605a5102476SDave Hansen * _about_ 3,000 ns. 606a5102476SDave Hansen * 607a5102476SDave Hansen * This is in units of pages. 608a5102476SDave Hansen */ 60986426851SJeremiah Mahler static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 610e9f4e0a9SDave Hansen 611611ae8e3SAlex Shi void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 612a31acd3eSPeter Zijlstra unsigned long end, unsigned int stride_shift) 613611ae8e3SAlex Shi { 614454bbad9SAndy Lutomirski int cpu; 615e7b52ffdSAlex Shi 616515ab7c4SNadav Amit struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { 617454bbad9SAndy Lutomirski .mm = mm, 618a31acd3eSPeter Zijlstra .stride_shift = stride_shift, 619454bbad9SAndy Lutomirski }; 620ce27374fSAndy Lutomirski 621454bbad9SAndy Lutomirski cpu = get_cpu(); 62271b3c126SAndy Lutomirski 623f39681edSAndy Lutomirski /* This is also a barrier that synchronizes with switch_mm(). */ 624b0579adeSAndy Lutomirski info.new_tlb_gen = inc_mm_tlb_gen(mm); 62571b3c126SAndy Lutomirski 626454bbad9SAndy Lutomirski /* Should we flush just the requested range? */ 627454bbad9SAndy Lutomirski if ((end != TLB_FLUSH_ALL) && 628a31acd3eSPeter Zijlstra ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) { 629a2055abeSAndy Lutomirski info.start = start; 630a2055abeSAndy Lutomirski info.end = end; 631454bbad9SAndy Lutomirski } else { 632454bbad9SAndy Lutomirski info.start = 0UL; 633454bbad9SAndy Lutomirski info.end = TLB_FLUSH_ALL; 6344995ab9cSDave Hansen } 635454bbad9SAndy Lutomirski 636bc0d5a89SAndy Lutomirski if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { 637bc0d5a89SAndy Lutomirski VM_WARN_ON(irqs_disabled()); 638bc0d5a89SAndy Lutomirski local_irq_disable(); 639454bbad9SAndy Lutomirski flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN); 640bc0d5a89SAndy Lutomirski local_irq_enable(); 641bc0d5a89SAndy Lutomirski } 642bc0d5a89SAndy Lutomirski 643454bbad9SAndy Lutomirski if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) 644a2055abeSAndy Lutomirski flush_tlb_others(mm_cpumask(mm), &info); 64594b1b03bSAndy Lutomirski 646454bbad9SAndy Lutomirski put_cpu(); 647e7b52ffdSAlex Shi } 648e7b52ffdSAlex Shi 649a2055abeSAndy Lutomirski 65055f4949fSIngo Molnar static void do_flush_tlb_all(void *info) 65155f4949fSIngo Molnar { 652ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 65355f4949fSIngo Molnar __flush_tlb_all(); 65455f4949fSIngo Molnar } 65555f4949fSIngo Molnar 65655f4949fSIngo Molnar void flush_tlb_all(void) 65755f4949fSIngo Molnar { 658ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 65955f4949fSIngo Molnar on_each_cpu(do_flush_tlb_all, NULL, 1); 66055f4949fSIngo Molnar } 6613df3212fSAlex Shi 662effee4b9SAlex Shi static void do_kernel_range_flush(void *info) 663effee4b9SAlex Shi { 664effee4b9SAlex Shi struct flush_tlb_info *f = info; 665effee4b9SAlex Shi unsigned long addr; 666effee4b9SAlex Shi 667effee4b9SAlex Shi /* flush range by one by one 'invlpg' */ 668a2055abeSAndy Lutomirski for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 6691299ef1dSAndy Lutomirski __flush_tlb_one_kernel(addr); 670effee4b9SAlex Shi } 671effee4b9SAlex Shi 672effee4b9SAlex Shi void flush_tlb_kernel_range(unsigned long start, unsigned long end) 673effee4b9SAlex Shi { 674effee4b9SAlex Shi 675effee4b9SAlex Shi /* Balance as user space task's flush, a bit conservative */ 676e9f4e0a9SDave Hansen if (end == TLB_FLUSH_ALL || 677be4ffc0dSAndy Lutomirski (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { 678effee4b9SAlex Shi on_each_cpu(do_flush_tlb_all, NULL, 1); 679e9f4e0a9SDave Hansen } else { 680e9f4e0a9SDave Hansen struct flush_tlb_info info; 681a2055abeSAndy Lutomirski info.start = start; 682a2055abeSAndy Lutomirski info.end = end; 683effee4b9SAlex Shi on_each_cpu(do_kernel_range_flush, &info, 1); 684effee4b9SAlex Shi } 685effee4b9SAlex Shi } 6862d040a1cSDave Hansen 687e73ad5ffSAndy Lutomirski void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) 688e73ad5ffSAndy Lutomirski { 689a2055abeSAndy Lutomirski struct flush_tlb_info info = { 690a2055abeSAndy Lutomirski .mm = NULL, 691a2055abeSAndy Lutomirski .start = 0UL, 692a2055abeSAndy Lutomirski .end = TLB_FLUSH_ALL, 693a2055abeSAndy Lutomirski }; 694a2055abeSAndy Lutomirski 695e73ad5ffSAndy Lutomirski int cpu = get_cpu(); 696e73ad5ffSAndy Lutomirski 697bc0d5a89SAndy Lutomirski if (cpumask_test_cpu(cpu, &batch->cpumask)) { 698bc0d5a89SAndy Lutomirski VM_WARN_ON(irqs_disabled()); 699bc0d5a89SAndy Lutomirski local_irq_disable(); 7003f79e4c7SAndy Lutomirski flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN); 701bc0d5a89SAndy Lutomirski local_irq_enable(); 702bc0d5a89SAndy Lutomirski } 703bc0d5a89SAndy Lutomirski 704e73ad5ffSAndy Lutomirski if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) 705a2055abeSAndy Lutomirski flush_tlb_others(&batch->cpumask, &info); 70694b1b03bSAndy Lutomirski 707e73ad5ffSAndy Lutomirski cpumask_clear(&batch->cpumask); 708e73ad5ffSAndy Lutomirski 709e73ad5ffSAndy Lutomirski put_cpu(); 710e73ad5ffSAndy Lutomirski } 711e73ad5ffSAndy Lutomirski 7122d040a1cSDave Hansen static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, 7132d040a1cSDave Hansen size_t count, loff_t *ppos) 7142d040a1cSDave Hansen { 7152d040a1cSDave Hansen char buf[32]; 7162d040a1cSDave Hansen unsigned int len; 7172d040a1cSDave Hansen 7182d040a1cSDave Hansen len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); 7192d040a1cSDave Hansen return simple_read_from_buffer(user_buf, count, ppos, buf, len); 7202d040a1cSDave Hansen } 7212d040a1cSDave Hansen 7222d040a1cSDave Hansen static ssize_t tlbflush_write_file(struct file *file, 7232d040a1cSDave Hansen const char __user *user_buf, size_t count, loff_t *ppos) 7242d040a1cSDave Hansen { 7252d040a1cSDave Hansen char buf[32]; 7262d040a1cSDave Hansen ssize_t len; 7272d040a1cSDave Hansen int ceiling; 7282d040a1cSDave Hansen 7292d040a1cSDave Hansen len = min(count, sizeof(buf) - 1); 7302d040a1cSDave Hansen if (copy_from_user(buf, user_buf, len)) 7312d040a1cSDave Hansen return -EFAULT; 7322d040a1cSDave Hansen 7332d040a1cSDave Hansen buf[len] = '\0'; 7342d040a1cSDave Hansen if (kstrtoint(buf, 0, &ceiling)) 7352d040a1cSDave Hansen return -EINVAL; 7362d040a1cSDave Hansen 7372d040a1cSDave Hansen if (ceiling < 0) 7382d040a1cSDave Hansen return -EINVAL; 7392d040a1cSDave Hansen 7402d040a1cSDave Hansen tlb_single_page_flush_ceiling = ceiling; 7412d040a1cSDave Hansen return count; 7422d040a1cSDave Hansen } 7432d040a1cSDave Hansen 7442d040a1cSDave Hansen static const struct file_operations fops_tlbflush = { 7452d040a1cSDave Hansen .read = tlbflush_read_file, 7462d040a1cSDave Hansen .write = tlbflush_write_file, 7472d040a1cSDave Hansen .llseek = default_llseek, 7482d040a1cSDave Hansen }; 7492d040a1cSDave Hansen 7502d040a1cSDave Hansen static int __init create_tlb_single_page_flush_ceiling(void) 7512d040a1cSDave Hansen { 7522d040a1cSDave Hansen debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, 7532d040a1cSDave Hansen arch_debugfs_dir, NULL, &fops_tlbflush); 7542d040a1cSDave Hansen return 0; 7552d040a1cSDave Hansen } 7562d040a1cSDave Hansen late_initcall(create_tlb_single_page_flush_ceiling); 757