155f4949fSIngo Molnar #include <linux/init.h> 255f4949fSIngo Molnar 355f4949fSIngo Molnar #include <linux/mm.h> 455f4949fSIngo Molnar #include <linux/spinlock.h> 555f4949fSIngo Molnar #include <linux/smp.h> 655f4949fSIngo Molnar #include <linux/interrupt.h> 755f4949fSIngo Molnar #include <linux/module.h> 893296720SShaohua Li #include <linux/cpu.h> 955f4949fSIngo Molnar 1055f4949fSIngo Molnar #include <asm/tlbflush.h> 1155f4949fSIngo Molnar #include <asm/mmu_context.h> 12350f8f56SJan Beulich #include <asm/cache.h> 1355f4949fSIngo Molnar #include <asm/apic.h> 1455f4949fSIngo Molnar #include <asm/uv/uv.h> 153df3212fSAlex Shi #include <linux/debugfs.h> 1655f4949fSIngo Molnar 1755f4949fSIngo Molnar /* 1855f4949fSIngo Molnar * Smarter SMP flushing macros. 1955f4949fSIngo Molnar * c/o Linus Torvalds. 2055f4949fSIngo Molnar * 2155f4949fSIngo Molnar * These mean you can really definitely utterly forget about 2255f4949fSIngo Molnar * writing to user space from interrupts. (Its not allowed anyway). 2355f4949fSIngo Molnar * 2455f4949fSIngo Molnar * Optimizations Manfred Spraul <manfred@colorfullife.com> 2555f4949fSIngo Molnar * 2655f4949fSIngo Molnar * More scalable flush, from Andi Kleen 2755f4949fSIngo Molnar * 2852aec330SAlex Shi * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 2955f4949fSIngo Molnar */ 3055f4949fSIngo Molnar 31e1074888SAndy Lutomirski #ifdef CONFIG_SMP 32e1074888SAndy Lutomirski 3352aec330SAlex Shi struct flush_tlb_info { 3455f4949fSIngo Molnar struct mm_struct *flush_mm; 35e7b52ffdSAlex Shi unsigned long flush_start; 36e7b52ffdSAlex Shi unsigned long flush_end; 3755f4949fSIngo Molnar }; 3893296720SShaohua Li 3955f4949fSIngo Molnar /* 4055f4949fSIngo Molnar * We cannot call mmdrop() because we are in interrupt context, 4155f4949fSIngo Molnar * instead update mm->cpu_vm_mask. 4255f4949fSIngo Molnar */ 4355f4949fSIngo Molnar void leave_mm(int cpu) 4455f4949fSIngo Molnar { 4502171b4aSLinus Torvalds struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm); 46c6ae41e7SAlex Shi if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 4755f4949fSIngo Molnar BUG(); 48a6fca40fSSuresh Siddha if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { 49a6fca40fSSuresh Siddha cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); 5055f4949fSIngo Molnar load_cr3(swapper_pg_dir); 517c7f1547SDave Hansen /* 527c7f1547SDave Hansen * This gets called in the idle path where RCU 537c7f1547SDave Hansen * functions differently. Tracing normally 547c7f1547SDave Hansen * uses RCU, so we have to call the tracepoint 557c7f1547SDave Hansen * specially here. 567c7f1547SDave Hansen */ 577c7f1547SDave Hansen trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 5855f4949fSIngo Molnar } 59a6fca40fSSuresh Siddha } 6055f4949fSIngo Molnar EXPORT_SYMBOL_GPL(leave_mm); 6155f4949fSIngo Molnar 62*69c0319aSAndy Lutomirski #endif /* CONFIG_SMP */ 63*69c0319aSAndy Lutomirski 64*69c0319aSAndy Lutomirski void switch_mm(struct mm_struct *prev, struct mm_struct *next, 65*69c0319aSAndy Lutomirski struct task_struct *tsk) 66*69c0319aSAndy Lutomirski { 67*69c0319aSAndy Lutomirski unsigned cpu = smp_processor_id(); 68*69c0319aSAndy Lutomirski 69*69c0319aSAndy Lutomirski if (likely(prev != next)) { 70*69c0319aSAndy Lutomirski #ifdef CONFIG_SMP 71*69c0319aSAndy Lutomirski this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); 72*69c0319aSAndy Lutomirski this_cpu_write(cpu_tlbstate.active_mm, next); 73*69c0319aSAndy Lutomirski #endif 74*69c0319aSAndy Lutomirski cpumask_set_cpu(cpu, mm_cpumask(next)); 75*69c0319aSAndy Lutomirski 76*69c0319aSAndy Lutomirski /* 77*69c0319aSAndy Lutomirski * Re-load page tables. 78*69c0319aSAndy Lutomirski * 79*69c0319aSAndy Lutomirski * This logic has an ordering constraint: 80*69c0319aSAndy Lutomirski * 81*69c0319aSAndy Lutomirski * CPU 0: Write to a PTE for 'next' 82*69c0319aSAndy Lutomirski * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. 83*69c0319aSAndy Lutomirski * CPU 1: set bit 1 in next's mm_cpumask 84*69c0319aSAndy Lutomirski * CPU 1: load from the PTE that CPU 0 writes (implicit) 85*69c0319aSAndy Lutomirski * 86*69c0319aSAndy Lutomirski * We need to prevent an outcome in which CPU 1 observes 87*69c0319aSAndy Lutomirski * the new PTE value and CPU 0 observes bit 1 clear in 88*69c0319aSAndy Lutomirski * mm_cpumask. (If that occurs, then the IPI will never 89*69c0319aSAndy Lutomirski * be sent, and CPU 0's TLB will contain a stale entry.) 90*69c0319aSAndy Lutomirski * 91*69c0319aSAndy Lutomirski * The bad outcome can occur if either CPU's load is 92*69c0319aSAndy Lutomirski * reordered before that CPU's store, so both CPUs must 93*69c0319aSAndy Lutomirski * execute full barriers to prevent this from happening. 94*69c0319aSAndy Lutomirski * 95*69c0319aSAndy Lutomirski * Thus, switch_mm needs a full barrier between the 96*69c0319aSAndy Lutomirski * store to mm_cpumask and any operation that could load 97*69c0319aSAndy Lutomirski * from next->pgd. TLB fills are special and can happen 98*69c0319aSAndy Lutomirski * due to instruction fetches or for no reason at all, 99*69c0319aSAndy Lutomirski * and neither LOCK nor MFENCE orders them. 100*69c0319aSAndy Lutomirski * Fortunately, load_cr3() is serializing and gives the 101*69c0319aSAndy Lutomirski * ordering guarantee we need. 102*69c0319aSAndy Lutomirski * 103*69c0319aSAndy Lutomirski */ 104*69c0319aSAndy Lutomirski load_cr3(next->pgd); 105*69c0319aSAndy Lutomirski 106*69c0319aSAndy Lutomirski trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 107*69c0319aSAndy Lutomirski 108*69c0319aSAndy Lutomirski /* Stop flush ipis for the previous mm */ 109*69c0319aSAndy Lutomirski cpumask_clear_cpu(cpu, mm_cpumask(prev)); 110*69c0319aSAndy Lutomirski 111*69c0319aSAndy Lutomirski /* Load per-mm CR4 state */ 112*69c0319aSAndy Lutomirski load_mm_cr4(next); 113*69c0319aSAndy Lutomirski 114*69c0319aSAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 115*69c0319aSAndy Lutomirski /* 116*69c0319aSAndy Lutomirski * Load the LDT, if the LDT is different. 117*69c0319aSAndy Lutomirski * 118*69c0319aSAndy Lutomirski * It's possible that prev->context.ldt doesn't match 119*69c0319aSAndy Lutomirski * the LDT register. This can happen if leave_mm(prev) 120*69c0319aSAndy Lutomirski * was called and then modify_ldt changed 121*69c0319aSAndy Lutomirski * prev->context.ldt but suppressed an IPI to this CPU. 122*69c0319aSAndy Lutomirski * In this case, prev->context.ldt != NULL, because we 123*69c0319aSAndy Lutomirski * never set context.ldt to NULL while the mm still 124*69c0319aSAndy Lutomirski * exists. That means that next->context.ldt != 125*69c0319aSAndy Lutomirski * prev->context.ldt, because mms never share an LDT. 126*69c0319aSAndy Lutomirski */ 127*69c0319aSAndy Lutomirski if (unlikely(prev->context.ldt != next->context.ldt)) 128*69c0319aSAndy Lutomirski load_mm_ldt(next); 129*69c0319aSAndy Lutomirski #endif 130*69c0319aSAndy Lutomirski } 131*69c0319aSAndy Lutomirski #ifdef CONFIG_SMP 132*69c0319aSAndy Lutomirski else { 133*69c0319aSAndy Lutomirski this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); 134*69c0319aSAndy Lutomirski BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); 135*69c0319aSAndy Lutomirski 136*69c0319aSAndy Lutomirski if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { 137*69c0319aSAndy Lutomirski /* 138*69c0319aSAndy Lutomirski * On established mms, the mm_cpumask is only changed 139*69c0319aSAndy Lutomirski * from irq context, from ptep_clear_flush() while in 140*69c0319aSAndy Lutomirski * lazy tlb mode, and here. Irqs are blocked during 141*69c0319aSAndy Lutomirski * schedule, protecting us from simultaneous changes. 142*69c0319aSAndy Lutomirski */ 143*69c0319aSAndy Lutomirski cpumask_set_cpu(cpu, mm_cpumask(next)); 144*69c0319aSAndy Lutomirski 145*69c0319aSAndy Lutomirski /* 146*69c0319aSAndy Lutomirski * We were in lazy tlb mode and leave_mm disabled 147*69c0319aSAndy Lutomirski * tlb flush IPI delivery. We must reload CR3 148*69c0319aSAndy Lutomirski * to make sure to use no freed page tables. 149*69c0319aSAndy Lutomirski * 150*69c0319aSAndy Lutomirski * As above, load_cr3() is serializing and orders TLB 151*69c0319aSAndy Lutomirski * fills with respect to the mm_cpumask write. 152*69c0319aSAndy Lutomirski */ 153*69c0319aSAndy Lutomirski load_cr3(next->pgd); 154*69c0319aSAndy Lutomirski trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 155*69c0319aSAndy Lutomirski load_mm_cr4(next); 156*69c0319aSAndy Lutomirski load_mm_ldt(next); 157*69c0319aSAndy Lutomirski } 158*69c0319aSAndy Lutomirski } 159*69c0319aSAndy Lutomirski #endif 160*69c0319aSAndy Lutomirski } 161*69c0319aSAndy Lutomirski 162*69c0319aSAndy Lutomirski #ifdef CONFIG_SMP 163*69c0319aSAndy Lutomirski 16455f4949fSIngo Molnar /* 16555f4949fSIngo Molnar * The flush IPI assumes that a thread switch happens in this order: 16655f4949fSIngo Molnar * [cpu0: the cpu that switches] 16755f4949fSIngo Molnar * 1) switch_mm() either 1a) or 1b) 16855f4949fSIngo Molnar * 1a) thread switch to a different mm 16952aec330SAlex Shi * 1a1) set cpu_tlbstate to TLBSTATE_OK 17052aec330SAlex Shi * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm 17152aec330SAlex Shi * if cpu0 was in lazy tlb mode. 17252aec330SAlex Shi * 1a2) update cpu active_mm 17355f4949fSIngo Molnar * Now cpu0 accepts tlb flushes for the new mm. 17452aec330SAlex Shi * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); 17555f4949fSIngo Molnar * Now the other cpus will send tlb flush ipis. 17655f4949fSIngo Molnar * 1a4) change cr3. 17752aec330SAlex Shi * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); 17852aec330SAlex Shi * Stop ipi delivery for the old mm. This is not synchronized with 17952aec330SAlex Shi * the other cpus, but flush_tlb_func ignore flush ipis for the wrong 18052aec330SAlex Shi * mm, and in the worst case we perform a superfluous tlb flush. 18155f4949fSIngo Molnar * 1b) thread switch without mm change 18252aec330SAlex Shi * cpu active_mm is correct, cpu0 already handles flush ipis. 18352aec330SAlex Shi * 1b1) set cpu_tlbstate to TLBSTATE_OK 18455f4949fSIngo Molnar * 1b2) test_and_set the cpu bit in cpu_vm_mask. 18555f4949fSIngo Molnar * Atomically set the bit [other cpus will start sending flush ipis], 18655f4949fSIngo Molnar * and test the bit. 18755f4949fSIngo Molnar * 1b3) if the bit was 0: leave_mm was called, flush the tlb. 18855f4949fSIngo Molnar * 2) switch %%esp, ie current 18955f4949fSIngo Molnar * 19055f4949fSIngo Molnar * The interrupt must handle 2 special cases: 19155f4949fSIngo Molnar * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. 19255f4949fSIngo Molnar * - the cpu performs speculative tlb reads, i.e. even if the cpu only 19355f4949fSIngo Molnar * runs in kernel space, the cpu could load tlb entries for user space 19455f4949fSIngo Molnar * pages. 19555f4949fSIngo Molnar * 19652aec330SAlex Shi * The good news is that cpu_tlbstate is local to each cpu, no 19755f4949fSIngo Molnar * write/read ordering problems. 19855f4949fSIngo Molnar */ 19955f4949fSIngo Molnar 20055f4949fSIngo Molnar /* 20152aec330SAlex Shi * TLB flush funcation: 20255f4949fSIngo Molnar * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. 20355f4949fSIngo Molnar * 2) Leave the mm if we are in the lazy tlb mode. 20455f4949fSIngo Molnar */ 20552aec330SAlex Shi static void flush_tlb_func(void *info) 20655f4949fSIngo Molnar { 20752aec330SAlex Shi struct flush_tlb_info *f = info; 20855f4949fSIngo Molnar 209fd0f5869STomoki Sekiyama inc_irq_stat(irq_tlb_count); 210fd0f5869STomoki Sekiyama 211858eaaa7SNadav Amit if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) 21252aec330SAlex Shi return; 21355f4949fSIngo Molnar 214ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 215c6ae41e7SAlex Shi if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 216d17d8f9dSDave Hansen if (f->flush_end == TLB_FLUSH_ALL) { 21755f4949fSIngo Molnar local_flush_tlb(); 218d17d8f9dSDave Hansen trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); 219d17d8f9dSDave Hansen } else { 220e7b52ffdSAlex Shi unsigned long addr; 221d17d8f9dSDave Hansen unsigned long nr_pages = 222bbc03778SDave Hansen (f->flush_end - f->flush_start) / PAGE_SIZE; 223e7b52ffdSAlex Shi addr = f->flush_start; 224e7b52ffdSAlex Shi while (addr < f->flush_end) { 225e7b52ffdSAlex Shi __flush_tlb_single(addr); 226e7b52ffdSAlex Shi addr += PAGE_SIZE; 227e7b52ffdSAlex Shi } 228d17d8f9dSDave Hansen trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); 229e7b52ffdSAlex Shi } 23055f4949fSIngo Molnar } else 23152aec330SAlex Shi leave_mm(smp_processor_id()); 23255f4949fSIngo Molnar 23355f4949fSIngo Molnar } 23455f4949fSIngo Molnar 23555f4949fSIngo Molnar void native_flush_tlb_others(const struct cpumask *cpumask, 236e7b52ffdSAlex Shi struct mm_struct *mm, unsigned long start, 237e7b52ffdSAlex Shi unsigned long end) 23855f4949fSIngo Molnar { 23952aec330SAlex Shi struct flush_tlb_info info; 24018c98243SNadav Amit 24118c98243SNadav Amit if (end == 0) 24218c98243SNadav Amit end = start + PAGE_SIZE; 24352aec330SAlex Shi info.flush_mm = mm; 24452aec330SAlex Shi info.flush_start = start; 24552aec330SAlex Shi info.flush_end = end; 24652aec330SAlex Shi 247ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 24818c98243SNadav Amit if (end == TLB_FLUSH_ALL) 24918c98243SNadav Amit trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); 25018c98243SNadav Amit else 25118c98243SNadav Amit trace_tlb_flush(TLB_REMOTE_SEND_IPI, 25218c98243SNadav Amit (end - start) >> PAGE_SHIFT); 25318c98243SNadav Amit 25455f4949fSIngo Molnar if (is_uv_system()) { 25555f4949fSIngo Molnar unsigned int cpu; 25655f4949fSIngo Molnar 25725542c64SXiao Guangrong cpu = smp_processor_id(); 258e7b52ffdSAlex Shi cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); 25955f4949fSIngo Molnar if (cpumask) 26052aec330SAlex Shi smp_call_function_many(cpumask, flush_tlb_func, 26152aec330SAlex Shi &info, 1); 26255f4949fSIngo Molnar return; 26355f4949fSIngo Molnar } 26452aec330SAlex Shi smp_call_function_many(cpumask, flush_tlb_func, &info, 1); 26555f4949fSIngo Molnar } 26655f4949fSIngo Molnar 26755f4949fSIngo Molnar void flush_tlb_current_task(void) 26855f4949fSIngo Molnar { 26955f4949fSIngo Molnar struct mm_struct *mm = current->mm; 27055f4949fSIngo Molnar 27155f4949fSIngo Molnar preempt_disable(); 27255f4949fSIngo Molnar 273ec659934SMel Gorman count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 27471b3c126SAndy Lutomirski 27571b3c126SAndy Lutomirski /* This is an implicit full barrier that synchronizes with switch_mm. */ 27655f4949fSIngo Molnar local_flush_tlb(); 27771b3c126SAndy Lutomirski 278d17d8f9dSDave Hansen trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); 27978f1c4d6SRusty Russell if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 280e7b52ffdSAlex Shi flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 28155f4949fSIngo Molnar preempt_enable(); 28255f4949fSIngo Molnar } 28355f4949fSIngo Molnar 284a5102476SDave Hansen /* 285a5102476SDave Hansen * See Documentation/x86/tlb.txt for details. We choose 33 286a5102476SDave Hansen * because it is large enough to cover the vast majority (at 287a5102476SDave Hansen * least 95%) of allocations, and is small enough that we are 288a5102476SDave Hansen * confident it will not cause too much overhead. Each single 289a5102476SDave Hansen * flush is about 100 ns, so this caps the maximum overhead at 290a5102476SDave Hansen * _about_ 3,000 ns. 291a5102476SDave Hansen * 292a5102476SDave Hansen * This is in units of pages. 293a5102476SDave Hansen */ 29486426851SJeremiah Mahler static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 295e9f4e0a9SDave Hansen 296611ae8e3SAlex Shi void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 297611ae8e3SAlex Shi unsigned long end, unsigned long vmflag) 298611ae8e3SAlex Shi { 299611ae8e3SAlex Shi unsigned long addr; 3009dfa6deeSDave Hansen /* do a global flush by default */ 3019dfa6deeSDave Hansen unsigned long base_pages_to_flush = TLB_FLUSH_ALL; 302e7b52ffdSAlex Shi 303611ae8e3SAlex Shi preempt_disable(); 30471b3c126SAndy Lutomirski if (current->active_mm != mm) { 30571b3c126SAndy Lutomirski /* Synchronize with switch_mm. */ 30671b3c126SAndy Lutomirski smp_mb(); 30771b3c126SAndy Lutomirski 3084995ab9cSDave Hansen goto out; 30971b3c126SAndy Lutomirski } 310611ae8e3SAlex Shi 311611ae8e3SAlex Shi if (!current->mm) { 312611ae8e3SAlex Shi leave_mm(smp_processor_id()); 31371b3c126SAndy Lutomirski 31471b3c126SAndy Lutomirski /* Synchronize with switch_mm. */ 31571b3c126SAndy Lutomirski smp_mb(); 31671b3c126SAndy Lutomirski 3174995ab9cSDave Hansen goto out; 318611ae8e3SAlex Shi } 319611ae8e3SAlex Shi 3209dfa6deeSDave Hansen if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) 3219dfa6deeSDave Hansen base_pages_to_flush = (end - start) >> PAGE_SHIFT; 322611ae8e3SAlex Shi 32371b3c126SAndy Lutomirski /* 32471b3c126SAndy Lutomirski * Both branches below are implicit full barriers (MOV to CR or 32571b3c126SAndy Lutomirski * INVLPG) that synchronize with switch_mm. 32671b3c126SAndy Lutomirski */ 3279dfa6deeSDave Hansen if (base_pages_to_flush > tlb_single_page_flush_ceiling) { 3289dfa6deeSDave Hansen base_pages_to_flush = TLB_FLUSH_ALL; 329ec659934SMel Gorman count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 330e7b52ffdSAlex Shi local_flush_tlb(); 3319824cf97SDave Hansen } else { 332611ae8e3SAlex Shi /* flush range by one by one 'invlpg' */ 3339824cf97SDave Hansen for (addr = start; addr < end; addr += PAGE_SIZE) { 334ec659934SMel Gorman count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 335e7b52ffdSAlex Shi __flush_tlb_single(addr); 3369824cf97SDave Hansen } 337e7b52ffdSAlex Shi } 338d17d8f9dSDave Hansen trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); 3394995ab9cSDave Hansen out: 3409dfa6deeSDave Hansen if (base_pages_to_flush == TLB_FLUSH_ALL) { 3414995ab9cSDave Hansen start = 0UL; 3424995ab9cSDave Hansen end = TLB_FLUSH_ALL; 3434995ab9cSDave Hansen } 344e7b52ffdSAlex Shi if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 3454995ab9cSDave Hansen flush_tlb_others(mm_cpumask(mm), mm, start, end); 346e7b52ffdSAlex Shi preempt_enable(); 347e7b52ffdSAlex Shi } 348e7b52ffdSAlex Shi 349e7b52ffdSAlex Shi void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) 35055f4949fSIngo Molnar { 35155f4949fSIngo Molnar struct mm_struct *mm = vma->vm_mm; 35255f4949fSIngo Molnar 35355f4949fSIngo Molnar preempt_disable(); 35455f4949fSIngo Molnar 35555f4949fSIngo Molnar if (current->active_mm == mm) { 35671b3c126SAndy Lutomirski if (current->mm) { 35771b3c126SAndy Lutomirski /* 35871b3c126SAndy Lutomirski * Implicit full barrier (INVLPG) that synchronizes 35971b3c126SAndy Lutomirski * with switch_mm. 36071b3c126SAndy Lutomirski */ 361e7b52ffdSAlex Shi __flush_tlb_one(start); 36271b3c126SAndy Lutomirski } else { 36355f4949fSIngo Molnar leave_mm(smp_processor_id()); 36471b3c126SAndy Lutomirski 36571b3c126SAndy Lutomirski /* Synchronize with switch_mm. */ 36671b3c126SAndy Lutomirski smp_mb(); 36771b3c126SAndy Lutomirski } 36855f4949fSIngo Molnar } 36955f4949fSIngo Molnar 37078f1c4d6SRusty Russell if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 371e7b52ffdSAlex Shi flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); 37255f4949fSIngo Molnar 37355f4949fSIngo Molnar preempt_enable(); 37455f4949fSIngo Molnar } 37555f4949fSIngo Molnar 37655f4949fSIngo Molnar static void do_flush_tlb_all(void *info) 37755f4949fSIngo Molnar { 378ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 37955f4949fSIngo Molnar __flush_tlb_all(); 380c6ae41e7SAlex Shi if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) 3813f8afb77SBorislav Petkov leave_mm(smp_processor_id()); 38255f4949fSIngo Molnar } 38355f4949fSIngo Molnar 38455f4949fSIngo Molnar void flush_tlb_all(void) 38555f4949fSIngo Molnar { 386ec659934SMel Gorman count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 38755f4949fSIngo Molnar on_each_cpu(do_flush_tlb_all, NULL, 1); 38855f4949fSIngo Molnar } 3893df3212fSAlex Shi 390effee4b9SAlex Shi static void do_kernel_range_flush(void *info) 391effee4b9SAlex Shi { 392effee4b9SAlex Shi struct flush_tlb_info *f = info; 393effee4b9SAlex Shi unsigned long addr; 394effee4b9SAlex Shi 395effee4b9SAlex Shi /* flush range by one by one 'invlpg' */ 3966df46865SDave Hansen for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) 397effee4b9SAlex Shi __flush_tlb_single(addr); 398effee4b9SAlex Shi } 399effee4b9SAlex Shi 400effee4b9SAlex Shi void flush_tlb_kernel_range(unsigned long start, unsigned long end) 401effee4b9SAlex Shi { 402effee4b9SAlex Shi 403effee4b9SAlex Shi /* Balance as user space task's flush, a bit conservative */ 404e9f4e0a9SDave Hansen if (end == TLB_FLUSH_ALL || 405e9f4e0a9SDave Hansen (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) { 406effee4b9SAlex Shi on_each_cpu(do_flush_tlb_all, NULL, 1); 407e9f4e0a9SDave Hansen } else { 408e9f4e0a9SDave Hansen struct flush_tlb_info info; 409effee4b9SAlex Shi info.flush_start = start; 410effee4b9SAlex Shi info.flush_end = end; 411effee4b9SAlex Shi on_each_cpu(do_kernel_range_flush, &info, 1); 412effee4b9SAlex Shi } 413effee4b9SAlex Shi } 4142d040a1cSDave Hansen 4152d040a1cSDave Hansen static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, 4162d040a1cSDave Hansen size_t count, loff_t *ppos) 4172d040a1cSDave Hansen { 4182d040a1cSDave Hansen char buf[32]; 4192d040a1cSDave Hansen unsigned int len; 4202d040a1cSDave Hansen 4212d040a1cSDave Hansen len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); 4222d040a1cSDave Hansen return simple_read_from_buffer(user_buf, count, ppos, buf, len); 4232d040a1cSDave Hansen } 4242d040a1cSDave Hansen 4252d040a1cSDave Hansen static ssize_t tlbflush_write_file(struct file *file, 4262d040a1cSDave Hansen const char __user *user_buf, size_t count, loff_t *ppos) 4272d040a1cSDave Hansen { 4282d040a1cSDave Hansen char buf[32]; 4292d040a1cSDave Hansen ssize_t len; 4302d040a1cSDave Hansen int ceiling; 4312d040a1cSDave Hansen 4322d040a1cSDave Hansen len = min(count, sizeof(buf) - 1); 4332d040a1cSDave Hansen if (copy_from_user(buf, user_buf, len)) 4342d040a1cSDave Hansen return -EFAULT; 4352d040a1cSDave Hansen 4362d040a1cSDave Hansen buf[len] = '\0'; 4372d040a1cSDave Hansen if (kstrtoint(buf, 0, &ceiling)) 4382d040a1cSDave Hansen return -EINVAL; 4392d040a1cSDave Hansen 4402d040a1cSDave Hansen if (ceiling < 0) 4412d040a1cSDave Hansen return -EINVAL; 4422d040a1cSDave Hansen 4432d040a1cSDave Hansen tlb_single_page_flush_ceiling = ceiling; 4442d040a1cSDave Hansen return count; 4452d040a1cSDave Hansen } 4462d040a1cSDave Hansen 4472d040a1cSDave Hansen static const struct file_operations fops_tlbflush = { 4482d040a1cSDave Hansen .read = tlbflush_read_file, 4492d040a1cSDave Hansen .write = tlbflush_write_file, 4502d040a1cSDave Hansen .llseek = default_llseek, 4512d040a1cSDave Hansen }; 4522d040a1cSDave Hansen 4532d040a1cSDave Hansen static int __init create_tlb_single_page_flush_ceiling(void) 4542d040a1cSDave Hansen { 4552d040a1cSDave Hansen debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, 4562d040a1cSDave Hansen arch_debugfs_dir, NULL, &fops_tlbflush); 4572d040a1cSDave Hansen return 0; 4582d040a1cSDave Hansen } 4592d040a1cSDave Hansen late_initcall(create_tlb_single_page_flush_ceiling); 460e1074888SAndy Lutomirski 461e1074888SAndy Lutomirski #endif /* CONFIG_SMP */ 462