xref: /openbmc/linux/arch/x86/mm/tlb.c (revision 69c0319aabba45bcf33178916a2f06967b4adede)
155f4949fSIngo Molnar #include <linux/init.h>
255f4949fSIngo Molnar 
355f4949fSIngo Molnar #include <linux/mm.h>
455f4949fSIngo Molnar #include <linux/spinlock.h>
555f4949fSIngo Molnar #include <linux/smp.h>
655f4949fSIngo Molnar #include <linux/interrupt.h>
755f4949fSIngo Molnar #include <linux/module.h>
893296720SShaohua Li #include <linux/cpu.h>
955f4949fSIngo Molnar 
1055f4949fSIngo Molnar #include <asm/tlbflush.h>
1155f4949fSIngo Molnar #include <asm/mmu_context.h>
12350f8f56SJan Beulich #include <asm/cache.h>
1355f4949fSIngo Molnar #include <asm/apic.h>
1455f4949fSIngo Molnar #include <asm/uv/uv.h>
153df3212fSAlex Shi #include <linux/debugfs.h>
1655f4949fSIngo Molnar 
1755f4949fSIngo Molnar /*
1855f4949fSIngo Molnar  *	Smarter SMP flushing macros.
1955f4949fSIngo Molnar  *		c/o Linus Torvalds.
2055f4949fSIngo Molnar  *
2155f4949fSIngo Molnar  *	These mean you can really definitely utterly forget about
2255f4949fSIngo Molnar  *	writing to user space from interrupts. (Its not allowed anyway).
2355f4949fSIngo Molnar  *
2455f4949fSIngo Molnar  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
2555f4949fSIngo Molnar  *
2655f4949fSIngo Molnar  *	More scalable flush, from Andi Kleen
2755f4949fSIngo Molnar  *
2852aec330SAlex Shi  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
2955f4949fSIngo Molnar  */
3055f4949fSIngo Molnar 
31e1074888SAndy Lutomirski #ifdef CONFIG_SMP
32e1074888SAndy Lutomirski 
3352aec330SAlex Shi struct flush_tlb_info {
3455f4949fSIngo Molnar 	struct mm_struct *flush_mm;
35e7b52ffdSAlex Shi 	unsigned long flush_start;
36e7b52ffdSAlex Shi 	unsigned long flush_end;
3755f4949fSIngo Molnar };
3893296720SShaohua Li 
3955f4949fSIngo Molnar /*
4055f4949fSIngo Molnar  * We cannot call mmdrop() because we are in interrupt context,
4155f4949fSIngo Molnar  * instead update mm->cpu_vm_mask.
4255f4949fSIngo Molnar  */
4355f4949fSIngo Molnar void leave_mm(int cpu)
4455f4949fSIngo Molnar {
4502171b4aSLinus Torvalds 	struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
46c6ae41e7SAlex Shi 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
4755f4949fSIngo Molnar 		BUG();
48a6fca40fSSuresh Siddha 	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
49a6fca40fSSuresh Siddha 		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
5055f4949fSIngo Molnar 		load_cr3(swapper_pg_dir);
517c7f1547SDave Hansen 		/*
527c7f1547SDave Hansen 		 * This gets called in the idle path where RCU
537c7f1547SDave Hansen 		 * functions differently.  Tracing normally
547c7f1547SDave Hansen 		 * uses RCU, so we have to call the tracepoint
557c7f1547SDave Hansen 		 * specially here.
567c7f1547SDave Hansen 		 */
577c7f1547SDave Hansen 		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
5855f4949fSIngo Molnar 	}
59a6fca40fSSuresh Siddha }
6055f4949fSIngo Molnar EXPORT_SYMBOL_GPL(leave_mm);
6155f4949fSIngo Molnar 
62*69c0319aSAndy Lutomirski #endif /* CONFIG_SMP */
63*69c0319aSAndy Lutomirski 
64*69c0319aSAndy Lutomirski void switch_mm(struct mm_struct *prev, struct mm_struct *next,
65*69c0319aSAndy Lutomirski 	       struct task_struct *tsk)
66*69c0319aSAndy Lutomirski {
67*69c0319aSAndy Lutomirski 	unsigned cpu = smp_processor_id();
68*69c0319aSAndy Lutomirski 
69*69c0319aSAndy Lutomirski 	if (likely(prev != next)) {
70*69c0319aSAndy Lutomirski #ifdef CONFIG_SMP
71*69c0319aSAndy Lutomirski 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
72*69c0319aSAndy Lutomirski 		this_cpu_write(cpu_tlbstate.active_mm, next);
73*69c0319aSAndy Lutomirski #endif
74*69c0319aSAndy Lutomirski 		cpumask_set_cpu(cpu, mm_cpumask(next));
75*69c0319aSAndy Lutomirski 
76*69c0319aSAndy Lutomirski 		/*
77*69c0319aSAndy Lutomirski 		 * Re-load page tables.
78*69c0319aSAndy Lutomirski 		 *
79*69c0319aSAndy Lutomirski 		 * This logic has an ordering constraint:
80*69c0319aSAndy Lutomirski 		 *
81*69c0319aSAndy Lutomirski 		 *  CPU 0: Write to a PTE for 'next'
82*69c0319aSAndy Lutomirski 		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
83*69c0319aSAndy Lutomirski 		 *  CPU 1: set bit 1 in next's mm_cpumask
84*69c0319aSAndy Lutomirski 		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
85*69c0319aSAndy Lutomirski 		 *
86*69c0319aSAndy Lutomirski 		 * We need to prevent an outcome in which CPU 1 observes
87*69c0319aSAndy Lutomirski 		 * the new PTE value and CPU 0 observes bit 1 clear in
88*69c0319aSAndy Lutomirski 		 * mm_cpumask.  (If that occurs, then the IPI will never
89*69c0319aSAndy Lutomirski 		 * be sent, and CPU 0's TLB will contain a stale entry.)
90*69c0319aSAndy Lutomirski 		 *
91*69c0319aSAndy Lutomirski 		 * The bad outcome can occur if either CPU's load is
92*69c0319aSAndy Lutomirski 		 * reordered before that CPU's store, so both CPUs must
93*69c0319aSAndy Lutomirski 		 * execute full barriers to prevent this from happening.
94*69c0319aSAndy Lutomirski 		 *
95*69c0319aSAndy Lutomirski 		 * Thus, switch_mm needs a full barrier between the
96*69c0319aSAndy Lutomirski 		 * store to mm_cpumask and any operation that could load
97*69c0319aSAndy Lutomirski 		 * from next->pgd.  TLB fills are special and can happen
98*69c0319aSAndy Lutomirski 		 * due to instruction fetches or for no reason at all,
99*69c0319aSAndy Lutomirski 		 * and neither LOCK nor MFENCE orders them.
100*69c0319aSAndy Lutomirski 		 * Fortunately, load_cr3() is serializing and gives the
101*69c0319aSAndy Lutomirski 		 * ordering guarantee we need.
102*69c0319aSAndy Lutomirski 		 *
103*69c0319aSAndy Lutomirski 		 */
104*69c0319aSAndy Lutomirski 		load_cr3(next->pgd);
105*69c0319aSAndy Lutomirski 
106*69c0319aSAndy Lutomirski 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
107*69c0319aSAndy Lutomirski 
108*69c0319aSAndy Lutomirski 		/* Stop flush ipis for the previous mm */
109*69c0319aSAndy Lutomirski 		cpumask_clear_cpu(cpu, mm_cpumask(prev));
110*69c0319aSAndy Lutomirski 
111*69c0319aSAndy Lutomirski 		/* Load per-mm CR4 state */
112*69c0319aSAndy Lutomirski 		load_mm_cr4(next);
113*69c0319aSAndy Lutomirski 
114*69c0319aSAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL
115*69c0319aSAndy Lutomirski 		/*
116*69c0319aSAndy Lutomirski 		 * Load the LDT, if the LDT is different.
117*69c0319aSAndy Lutomirski 		 *
118*69c0319aSAndy Lutomirski 		 * It's possible that prev->context.ldt doesn't match
119*69c0319aSAndy Lutomirski 		 * the LDT register.  This can happen if leave_mm(prev)
120*69c0319aSAndy Lutomirski 		 * was called and then modify_ldt changed
121*69c0319aSAndy Lutomirski 		 * prev->context.ldt but suppressed an IPI to this CPU.
122*69c0319aSAndy Lutomirski 		 * In this case, prev->context.ldt != NULL, because we
123*69c0319aSAndy Lutomirski 		 * never set context.ldt to NULL while the mm still
124*69c0319aSAndy Lutomirski 		 * exists.  That means that next->context.ldt !=
125*69c0319aSAndy Lutomirski 		 * prev->context.ldt, because mms never share an LDT.
126*69c0319aSAndy Lutomirski 		 */
127*69c0319aSAndy Lutomirski 		if (unlikely(prev->context.ldt != next->context.ldt))
128*69c0319aSAndy Lutomirski 			load_mm_ldt(next);
129*69c0319aSAndy Lutomirski #endif
130*69c0319aSAndy Lutomirski 	}
131*69c0319aSAndy Lutomirski #ifdef CONFIG_SMP
132*69c0319aSAndy Lutomirski 	  else {
133*69c0319aSAndy Lutomirski 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
134*69c0319aSAndy Lutomirski 		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
135*69c0319aSAndy Lutomirski 
136*69c0319aSAndy Lutomirski 		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
137*69c0319aSAndy Lutomirski 			/*
138*69c0319aSAndy Lutomirski 			 * On established mms, the mm_cpumask is only changed
139*69c0319aSAndy Lutomirski 			 * from irq context, from ptep_clear_flush() while in
140*69c0319aSAndy Lutomirski 			 * lazy tlb mode, and here. Irqs are blocked during
141*69c0319aSAndy Lutomirski 			 * schedule, protecting us from simultaneous changes.
142*69c0319aSAndy Lutomirski 			 */
143*69c0319aSAndy Lutomirski 			cpumask_set_cpu(cpu, mm_cpumask(next));
144*69c0319aSAndy Lutomirski 
145*69c0319aSAndy Lutomirski 			/*
146*69c0319aSAndy Lutomirski 			 * We were in lazy tlb mode and leave_mm disabled
147*69c0319aSAndy Lutomirski 			 * tlb flush IPI delivery. We must reload CR3
148*69c0319aSAndy Lutomirski 			 * to make sure to use no freed page tables.
149*69c0319aSAndy Lutomirski 			 *
150*69c0319aSAndy Lutomirski 			 * As above, load_cr3() is serializing and orders TLB
151*69c0319aSAndy Lutomirski 			 * fills with respect to the mm_cpumask write.
152*69c0319aSAndy Lutomirski 			 */
153*69c0319aSAndy Lutomirski 			load_cr3(next->pgd);
154*69c0319aSAndy Lutomirski 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
155*69c0319aSAndy Lutomirski 			load_mm_cr4(next);
156*69c0319aSAndy Lutomirski 			load_mm_ldt(next);
157*69c0319aSAndy Lutomirski 		}
158*69c0319aSAndy Lutomirski 	}
159*69c0319aSAndy Lutomirski #endif
160*69c0319aSAndy Lutomirski }
161*69c0319aSAndy Lutomirski 
162*69c0319aSAndy Lutomirski #ifdef CONFIG_SMP
163*69c0319aSAndy Lutomirski 
16455f4949fSIngo Molnar /*
16555f4949fSIngo Molnar  * The flush IPI assumes that a thread switch happens in this order:
16655f4949fSIngo Molnar  * [cpu0: the cpu that switches]
16755f4949fSIngo Molnar  * 1) switch_mm() either 1a) or 1b)
16855f4949fSIngo Molnar  * 1a) thread switch to a different mm
16952aec330SAlex Shi  * 1a1) set cpu_tlbstate to TLBSTATE_OK
17052aec330SAlex Shi  *	Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
17152aec330SAlex Shi  *	if cpu0 was in lazy tlb mode.
17252aec330SAlex Shi  * 1a2) update cpu active_mm
17355f4949fSIngo Molnar  *	Now cpu0 accepts tlb flushes for the new mm.
17452aec330SAlex Shi  * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
17555f4949fSIngo Molnar  *	Now the other cpus will send tlb flush ipis.
17655f4949fSIngo Molnar  * 1a4) change cr3.
17752aec330SAlex Shi  * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
17852aec330SAlex Shi  *	Stop ipi delivery for the old mm. This is not synchronized with
17952aec330SAlex Shi  *	the other cpus, but flush_tlb_func ignore flush ipis for the wrong
18052aec330SAlex Shi  *	mm, and in the worst case we perform a superfluous tlb flush.
18155f4949fSIngo Molnar  * 1b) thread switch without mm change
18252aec330SAlex Shi  *	cpu active_mm is correct, cpu0 already handles flush ipis.
18352aec330SAlex Shi  * 1b1) set cpu_tlbstate to TLBSTATE_OK
18455f4949fSIngo Molnar  * 1b2) test_and_set the cpu bit in cpu_vm_mask.
18555f4949fSIngo Molnar  *	Atomically set the bit [other cpus will start sending flush ipis],
18655f4949fSIngo Molnar  *	and test the bit.
18755f4949fSIngo Molnar  * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
18855f4949fSIngo Molnar  * 2) switch %%esp, ie current
18955f4949fSIngo Molnar  *
19055f4949fSIngo Molnar  * The interrupt must handle 2 special cases:
19155f4949fSIngo Molnar  * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
19255f4949fSIngo Molnar  * - the cpu performs speculative tlb reads, i.e. even if the cpu only
19355f4949fSIngo Molnar  *   runs in kernel space, the cpu could load tlb entries for user space
19455f4949fSIngo Molnar  *   pages.
19555f4949fSIngo Molnar  *
19652aec330SAlex Shi  * The good news is that cpu_tlbstate is local to each cpu, no
19755f4949fSIngo Molnar  * write/read ordering problems.
19855f4949fSIngo Molnar  */
19955f4949fSIngo Molnar 
20055f4949fSIngo Molnar /*
20152aec330SAlex Shi  * TLB flush funcation:
20255f4949fSIngo Molnar  * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
20355f4949fSIngo Molnar  * 2) Leave the mm if we are in the lazy tlb mode.
20455f4949fSIngo Molnar  */
20552aec330SAlex Shi static void flush_tlb_func(void *info)
20655f4949fSIngo Molnar {
20752aec330SAlex Shi 	struct flush_tlb_info *f = info;
20855f4949fSIngo Molnar 
209fd0f5869STomoki Sekiyama 	inc_irq_stat(irq_tlb_count);
210fd0f5869STomoki Sekiyama 
211858eaaa7SNadav Amit 	if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
21252aec330SAlex Shi 		return;
21355f4949fSIngo Molnar 
214ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
215c6ae41e7SAlex Shi 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
216d17d8f9dSDave Hansen 		if (f->flush_end == TLB_FLUSH_ALL) {
21755f4949fSIngo Molnar 			local_flush_tlb();
218d17d8f9dSDave Hansen 			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
219d17d8f9dSDave Hansen 		} else {
220e7b52ffdSAlex Shi 			unsigned long addr;
221d17d8f9dSDave Hansen 			unsigned long nr_pages =
222bbc03778SDave Hansen 				(f->flush_end - f->flush_start) / PAGE_SIZE;
223e7b52ffdSAlex Shi 			addr = f->flush_start;
224e7b52ffdSAlex Shi 			while (addr < f->flush_end) {
225e7b52ffdSAlex Shi 				__flush_tlb_single(addr);
226e7b52ffdSAlex Shi 				addr += PAGE_SIZE;
227e7b52ffdSAlex Shi 			}
228d17d8f9dSDave Hansen 			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
229e7b52ffdSAlex Shi 		}
23055f4949fSIngo Molnar 	} else
23152aec330SAlex Shi 		leave_mm(smp_processor_id());
23255f4949fSIngo Molnar 
23355f4949fSIngo Molnar }
23455f4949fSIngo Molnar 
23555f4949fSIngo Molnar void native_flush_tlb_others(const struct cpumask *cpumask,
236e7b52ffdSAlex Shi 				 struct mm_struct *mm, unsigned long start,
237e7b52ffdSAlex Shi 				 unsigned long end)
23855f4949fSIngo Molnar {
23952aec330SAlex Shi 	struct flush_tlb_info info;
24018c98243SNadav Amit 
24118c98243SNadav Amit 	if (end == 0)
24218c98243SNadav Amit 		end = start + PAGE_SIZE;
24352aec330SAlex Shi 	info.flush_mm = mm;
24452aec330SAlex Shi 	info.flush_start = start;
24552aec330SAlex Shi 	info.flush_end = end;
24652aec330SAlex Shi 
247ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
24818c98243SNadav Amit 	if (end == TLB_FLUSH_ALL)
24918c98243SNadav Amit 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
25018c98243SNadav Amit 	else
25118c98243SNadav Amit 		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
25218c98243SNadav Amit 				(end - start) >> PAGE_SHIFT);
25318c98243SNadav Amit 
25455f4949fSIngo Molnar 	if (is_uv_system()) {
25555f4949fSIngo Molnar 		unsigned int cpu;
25655f4949fSIngo Molnar 
25725542c64SXiao Guangrong 		cpu = smp_processor_id();
258e7b52ffdSAlex Shi 		cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
25955f4949fSIngo Molnar 		if (cpumask)
26052aec330SAlex Shi 			smp_call_function_many(cpumask, flush_tlb_func,
26152aec330SAlex Shi 								&info, 1);
26255f4949fSIngo Molnar 		return;
26355f4949fSIngo Molnar 	}
26452aec330SAlex Shi 	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
26555f4949fSIngo Molnar }
26655f4949fSIngo Molnar 
26755f4949fSIngo Molnar void flush_tlb_current_task(void)
26855f4949fSIngo Molnar {
26955f4949fSIngo Molnar 	struct mm_struct *mm = current->mm;
27055f4949fSIngo Molnar 
27155f4949fSIngo Molnar 	preempt_disable();
27255f4949fSIngo Molnar 
273ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
27471b3c126SAndy Lutomirski 
27571b3c126SAndy Lutomirski 	/* This is an implicit full barrier that synchronizes with switch_mm. */
27655f4949fSIngo Molnar 	local_flush_tlb();
27771b3c126SAndy Lutomirski 
278d17d8f9dSDave Hansen 	trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
27978f1c4d6SRusty Russell 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
280e7b52ffdSAlex Shi 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
28155f4949fSIngo Molnar 	preempt_enable();
28255f4949fSIngo Molnar }
28355f4949fSIngo Molnar 
284a5102476SDave Hansen /*
285a5102476SDave Hansen  * See Documentation/x86/tlb.txt for details.  We choose 33
286a5102476SDave Hansen  * because it is large enough to cover the vast majority (at
287a5102476SDave Hansen  * least 95%) of allocations, and is small enough that we are
288a5102476SDave Hansen  * confident it will not cause too much overhead.  Each single
289a5102476SDave Hansen  * flush is about 100 ns, so this caps the maximum overhead at
290a5102476SDave Hansen  * _about_ 3,000 ns.
291a5102476SDave Hansen  *
292a5102476SDave Hansen  * This is in units of pages.
293a5102476SDave Hansen  */
29486426851SJeremiah Mahler static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
295e9f4e0a9SDave Hansen 
296611ae8e3SAlex Shi void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
297611ae8e3SAlex Shi 				unsigned long end, unsigned long vmflag)
298611ae8e3SAlex Shi {
299611ae8e3SAlex Shi 	unsigned long addr;
3009dfa6deeSDave Hansen 	/* do a global flush by default */
3019dfa6deeSDave Hansen 	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
302e7b52ffdSAlex Shi 
303611ae8e3SAlex Shi 	preempt_disable();
30471b3c126SAndy Lutomirski 	if (current->active_mm != mm) {
30571b3c126SAndy Lutomirski 		/* Synchronize with switch_mm. */
30671b3c126SAndy Lutomirski 		smp_mb();
30771b3c126SAndy Lutomirski 
3084995ab9cSDave Hansen 		goto out;
30971b3c126SAndy Lutomirski 	}
310611ae8e3SAlex Shi 
311611ae8e3SAlex Shi 	if (!current->mm) {
312611ae8e3SAlex Shi 		leave_mm(smp_processor_id());
31371b3c126SAndy Lutomirski 
31471b3c126SAndy Lutomirski 		/* Synchronize with switch_mm. */
31571b3c126SAndy Lutomirski 		smp_mb();
31671b3c126SAndy Lutomirski 
3174995ab9cSDave Hansen 		goto out;
318611ae8e3SAlex Shi 	}
319611ae8e3SAlex Shi 
3209dfa6deeSDave Hansen 	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
3219dfa6deeSDave Hansen 		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
322611ae8e3SAlex Shi 
32371b3c126SAndy Lutomirski 	/*
32471b3c126SAndy Lutomirski 	 * Both branches below are implicit full barriers (MOV to CR or
32571b3c126SAndy Lutomirski 	 * INVLPG) that synchronize with switch_mm.
32671b3c126SAndy Lutomirski 	 */
3279dfa6deeSDave Hansen 	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
3289dfa6deeSDave Hansen 		base_pages_to_flush = TLB_FLUSH_ALL;
329ec659934SMel Gorman 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
330e7b52ffdSAlex Shi 		local_flush_tlb();
3319824cf97SDave Hansen 	} else {
332611ae8e3SAlex Shi 		/* flush range by one by one 'invlpg' */
3339824cf97SDave Hansen 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
334ec659934SMel Gorman 			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
335e7b52ffdSAlex Shi 			__flush_tlb_single(addr);
3369824cf97SDave Hansen 		}
337e7b52ffdSAlex Shi 	}
338d17d8f9dSDave Hansen 	trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
3394995ab9cSDave Hansen out:
3409dfa6deeSDave Hansen 	if (base_pages_to_flush == TLB_FLUSH_ALL) {
3414995ab9cSDave Hansen 		start = 0UL;
3424995ab9cSDave Hansen 		end = TLB_FLUSH_ALL;
3434995ab9cSDave Hansen 	}
344e7b52ffdSAlex Shi 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
3454995ab9cSDave Hansen 		flush_tlb_others(mm_cpumask(mm), mm, start, end);
346e7b52ffdSAlex Shi 	preempt_enable();
347e7b52ffdSAlex Shi }
348e7b52ffdSAlex Shi 
349e7b52ffdSAlex Shi void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
35055f4949fSIngo Molnar {
35155f4949fSIngo Molnar 	struct mm_struct *mm = vma->vm_mm;
35255f4949fSIngo Molnar 
35355f4949fSIngo Molnar 	preempt_disable();
35455f4949fSIngo Molnar 
35555f4949fSIngo Molnar 	if (current->active_mm == mm) {
35671b3c126SAndy Lutomirski 		if (current->mm) {
35771b3c126SAndy Lutomirski 			/*
35871b3c126SAndy Lutomirski 			 * Implicit full barrier (INVLPG) that synchronizes
35971b3c126SAndy Lutomirski 			 * with switch_mm.
36071b3c126SAndy Lutomirski 			 */
361e7b52ffdSAlex Shi 			__flush_tlb_one(start);
36271b3c126SAndy Lutomirski 		} else {
36355f4949fSIngo Molnar 			leave_mm(smp_processor_id());
36471b3c126SAndy Lutomirski 
36571b3c126SAndy Lutomirski 			/* Synchronize with switch_mm. */
36671b3c126SAndy Lutomirski 			smp_mb();
36771b3c126SAndy Lutomirski 		}
36855f4949fSIngo Molnar 	}
36955f4949fSIngo Molnar 
37078f1c4d6SRusty Russell 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
371e7b52ffdSAlex Shi 		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
37255f4949fSIngo Molnar 
37355f4949fSIngo Molnar 	preempt_enable();
37455f4949fSIngo Molnar }
37555f4949fSIngo Molnar 
37655f4949fSIngo Molnar static void do_flush_tlb_all(void *info)
37755f4949fSIngo Molnar {
378ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
37955f4949fSIngo Molnar 	__flush_tlb_all();
380c6ae41e7SAlex Shi 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
3813f8afb77SBorislav Petkov 		leave_mm(smp_processor_id());
38255f4949fSIngo Molnar }
38355f4949fSIngo Molnar 
38455f4949fSIngo Molnar void flush_tlb_all(void)
38555f4949fSIngo Molnar {
386ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
38755f4949fSIngo Molnar 	on_each_cpu(do_flush_tlb_all, NULL, 1);
38855f4949fSIngo Molnar }
3893df3212fSAlex Shi 
390effee4b9SAlex Shi static void do_kernel_range_flush(void *info)
391effee4b9SAlex Shi {
392effee4b9SAlex Shi 	struct flush_tlb_info *f = info;
393effee4b9SAlex Shi 	unsigned long addr;
394effee4b9SAlex Shi 
395effee4b9SAlex Shi 	/* flush range by one by one 'invlpg' */
3966df46865SDave Hansen 	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
397effee4b9SAlex Shi 		__flush_tlb_single(addr);
398effee4b9SAlex Shi }
399effee4b9SAlex Shi 
400effee4b9SAlex Shi void flush_tlb_kernel_range(unsigned long start, unsigned long end)
401effee4b9SAlex Shi {
402effee4b9SAlex Shi 
403effee4b9SAlex Shi 	/* Balance as user space task's flush, a bit conservative */
404e9f4e0a9SDave Hansen 	if (end == TLB_FLUSH_ALL ||
405e9f4e0a9SDave Hansen 	    (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
406effee4b9SAlex Shi 		on_each_cpu(do_flush_tlb_all, NULL, 1);
407e9f4e0a9SDave Hansen 	} else {
408e9f4e0a9SDave Hansen 		struct flush_tlb_info info;
409effee4b9SAlex Shi 		info.flush_start = start;
410effee4b9SAlex Shi 		info.flush_end = end;
411effee4b9SAlex Shi 		on_each_cpu(do_kernel_range_flush, &info, 1);
412effee4b9SAlex Shi 	}
413effee4b9SAlex Shi }
4142d040a1cSDave Hansen 
4152d040a1cSDave Hansen static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
4162d040a1cSDave Hansen 			     size_t count, loff_t *ppos)
4172d040a1cSDave Hansen {
4182d040a1cSDave Hansen 	char buf[32];
4192d040a1cSDave Hansen 	unsigned int len;
4202d040a1cSDave Hansen 
4212d040a1cSDave Hansen 	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
4222d040a1cSDave Hansen 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
4232d040a1cSDave Hansen }
4242d040a1cSDave Hansen 
4252d040a1cSDave Hansen static ssize_t tlbflush_write_file(struct file *file,
4262d040a1cSDave Hansen 		 const char __user *user_buf, size_t count, loff_t *ppos)
4272d040a1cSDave Hansen {
4282d040a1cSDave Hansen 	char buf[32];
4292d040a1cSDave Hansen 	ssize_t len;
4302d040a1cSDave Hansen 	int ceiling;
4312d040a1cSDave Hansen 
4322d040a1cSDave Hansen 	len = min(count, sizeof(buf) - 1);
4332d040a1cSDave Hansen 	if (copy_from_user(buf, user_buf, len))
4342d040a1cSDave Hansen 		return -EFAULT;
4352d040a1cSDave Hansen 
4362d040a1cSDave Hansen 	buf[len] = '\0';
4372d040a1cSDave Hansen 	if (kstrtoint(buf, 0, &ceiling))
4382d040a1cSDave Hansen 		return -EINVAL;
4392d040a1cSDave Hansen 
4402d040a1cSDave Hansen 	if (ceiling < 0)
4412d040a1cSDave Hansen 		return -EINVAL;
4422d040a1cSDave Hansen 
4432d040a1cSDave Hansen 	tlb_single_page_flush_ceiling = ceiling;
4442d040a1cSDave Hansen 	return count;
4452d040a1cSDave Hansen }
4462d040a1cSDave Hansen 
4472d040a1cSDave Hansen static const struct file_operations fops_tlbflush = {
4482d040a1cSDave Hansen 	.read = tlbflush_read_file,
4492d040a1cSDave Hansen 	.write = tlbflush_write_file,
4502d040a1cSDave Hansen 	.llseek = default_llseek,
4512d040a1cSDave Hansen };
4522d040a1cSDave Hansen 
4532d040a1cSDave Hansen static int __init create_tlb_single_page_flush_ceiling(void)
4542d040a1cSDave Hansen {
4552d040a1cSDave Hansen 	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
4562d040a1cSDave Hansen 			    arch_debugfs_dir, NULL, &fops_tlbflush);
4572d040a1cSDave Hansen 	return 0;
4582d040a1cSDave Hansen }
4592d040a1cSDave Hansen late_initcall(create_tlb_single_page_flush_ceiling);
460e1074888SAndy Lutomirski 
461e1074888SAndy Lutomirski #endif /* CONFIG_SMP */
462