xref: /openbmc/linux/arch/x86/mm/tlb.c (revision bc0d5a89fbe3c83ac45438d7ba88309f4713615d)
155f4949fSIngo Molnar #include <linux/init.h>
255f4949fSIngo Molnar 
355f4949fSIngo Molnar #include <linux/mm.h>
455f4949fSIngo Molnar #include <linux/spinlock.h>
555f4949fSIngo Molnar #include <linux/smp.h>
655f4949fSIngo Molnar #include <linux/interrupt.h>
74b599fedSPaul Gortmaker #include <linux/export.h>
893296720SShaohua Li #include <linux/cpu.h>
955f4949fSIngo Molnar 
1055f4949fSIngo Molnar #include <asm/tlbflush.h>
1155f4949fSIngo Molnar #include <asm/mmu_context.h>
12350f8f56SJan Beulich #include <asm/cache.h>
1355f4949fSIngo Molnar #include <asm/apic.h>
1455f4949fSIngo Molnar #include <asm/uv/uv.h>
153df3212fSAlex Shi #include <linux/debugfs.h>
1655f4949fSIngo Molnar 
1755f4949fSIngo Molnar /*
18ce4a4e56SAndy Lutomirski  *	TLB flushing, formerly SMP-only
1955f4949fSIngo Molnar  *		c/o Linus Torvalds.
2055f4949fSIngo Molnar  *
2155f4949fSIngo Molnar  *	These mean you can really definitely utterly forget about
2255f4949fSIngo Molnar  *	writing to user space from interrupts. (Its not allowed anyway).
2355f4949fSIngo Molnar  *
2455f4949fSIngo Molnar  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
2555f4949fSIngo Molnar  *
2655f4949fSIngo Molnar  *	More scalable flush, from Andi Kleen
2755f4949fSIngo Molnar  *
2852aec330SAlex Shi  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
2955f4949fSIngo Molnar  */
3055f4949fSIngo Molnar 
3155f4949fSIngo Molnar void leave_mm(int cpu)
3255f4949fSIngo Molnar {
333d28ebceSAndy Lutomirski 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
343d28ebceSAndy Lutomirski 
353d28ebceSAndy Lutomirski 	/*
363d28ebceSAndy Lutomirski 	 * It's plausible that we're in lazy TLB mode while our mm is init_mm.
373d28ebceSAndy Lutomirski 	 * If so, our callers still expect us to flush the TLB, but there
383d28ebceSAndy Lutomirski 	 * aren't any user TLB entries in init_mm to worry about.
393d28ebceSAndy Lutomirski 	 *
403d28ebceSAndy Lutomirski 	 * This needs to happen before any other sanity checks due to
413d28ebceSAndy Lutomirski 	 * intel_idle's shenanigans.
423d28ebceSAndy Lutomirski 	 */
433d28ebceSAndy Lutomirski 	if (loaded_mm == &init_mm)
443d28ebceSAndy Lutomirski 		return;
453d28ebceSAndy Lutomirski 
46c6ae41e7SAlex Shi 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
4755f4949fSIngo Molnar 		BUG();
483d28ebceSAndy Lutomirski 
493d28ebceSAndy Lutomirski 	switch_mm(NULL, &init_mm, NULL);
50a6fca40fSSuresh Siddha }
5155f4949fSIngo Molnar EXPORT_SYMBOL_GPL(leave_mm);
5255f4949fSIngo Molnar 
5369c0319aSAndy Lutomirski void switch_mm(struct mm_struct *prev, struct mm_struct *next,
5469c0319aSAndy Lutomirski 	       struct task_struct *tsk)
5569c0319aSAndy Lutomirski {
56078194f8SAndy Lutomirski 	unsigned long flags;
57078194f8SAndy Lutomirski 
58078194f8SAndy Lutomirski 	local_irq_save(flags);
59078194f8SAndy Lutomirski 	switch_mm_irqs_off(prev, next, tsk);
60078194f8SAndy Lutomirski 	local_irq_restore(flags);
61078194f8SAndy Lutomirski }
62078194f8SAndy Lutomirski 
63078194f8SAndy Lutomirski void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
64078194f8SAndy Lutomirski 			struct task_struct *tsk)
65078194f8SAndy Lutomirski {
6669c0319aSAndy Lutomirski 	unsigned cpu = smp_processor_id();
673d28ebceSAndy Lutomirski 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
6869c0319aSAndy Lutomirski 
693d28ebceSAndy Lutomirski 	/*
703d28ebceSAndy Lutomirski 	 * NB: The scheduler will call us with prev == next when
713d28ebceSAndy Lutomirski 	 * switching from lazy TLB mode to normal mode if active_mm
723d28ebceSAndy Lutomirski 	 * isn't changing.  When this happens, there is no guarantee
733d28ebceSAndy Lutomirski 	 * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
743d28ebceSAndy Lutomirski 	 *
753d28ebceSAndy Lutomirski 	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
763d28ebceSAndy Lutomirski 	 */
773d28ebceSAndy Lutomirski 
783d28ebceSAndy Lutomirski 	this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
793d28ebceSAndy Lutomirski 
803d28ebceSAndy Lutomirski 	if (real_prev == next) {
813d28ebceSAndy Lutomirski 		/*
823d28ebceSAndy Lutomirski 		 * There's nothing to do: we always keep the per-mm control
833d28ebceSAndy Lutomirski 		 * regs in sync with cpu_tlbstate.loaded_mm.  Just
843d28ebceSAndy Lutomirski 		 * sanity-check mm_cpumask.
853d28ebceSAndy Lutomirski 		 */
863d28ebceSAndy Lutomirski 		if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
873d28ebceSAndy Lutomirski 			cpumask_set_cpu(cpu, mm_cpumask(next));
883d28ebceSAndy Lutomirski 		return;
893d28ebceSAndy Lutomirski 	}
903d28ebceSAndy Lutomirski 
91e37e43a4SAndy Lutomirski 	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
92e37e43a4SAndy Lutomirski 		/*
93e37e43a4SAndy Lutomirski 		 * If our current stack is in vmalloc space and isn't
94e37e43a4SAndy Lutomirski 		 * mapped in the new pgd, we'll double-fault.  Forcibly
95e37e43a4SAndy Lutomirski 		 * map it.
96e37e43a4SAndy Lutomirski 		 */
97e37e43a4SAndy Lutomirski 		unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
98e37e43a4SAndy Lutomirski 
99e37e43a4SAndy Lutomirski 		pgd_t *pgd = next->pgd + stack_pgd_index;
100e37e43a4SAndy Lutomirski 
101e37e43a4SAndy Lutomirski 		if (unlikely(pgd_none(*pgd)))
102e37e43a4SAndy Lutomirski 			set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
103e37e43a4SAndy Lutomirski 	}
104e37e43a4SAndy Lutomirski 
1053d28ebceSAndy Lutomirski 	this_cpu_write(cpu_tlbstate.loaded_mm, next);
106e37e43a4SAndy Lutomirski 
1073d28ebceSAndy Lutomirski 	WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
10869c0319aSAndy Lutomirski 	cpumask_set_cpu(cpu, mm_cpumask(next));
10969c0319aSAndy Lutomirski 
11069c0319aSAndy Lutomirski 	/*
11169c0319aSAndy Lutomirski 	 * Re-load page tables.
11269c0319aSAndy Lutomirski 	 *
11369c0319aSAndy Lutomirski 	 * This logic has an ordering constraint:
11469c0319aSAndy Lutomirski 	 *
11569c0319aSAndy Lutomirski 	 *  CPU 0: Write to a PTE for 'next'
11669c0319aSAndy Lutomirski 	 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
11769c0319aSAndy Lutomirski 	 *  CPU 1: set bit 1 in next's mm_cpumask
11869c0319aSAndy Lutomirski 	 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
11969c0319aSAndy Lutomirski 	 *
12069c0319aSAndy Lutomirski 	 * We need to prevent an outcome in which CPU 1 observes
12169c0319aSAndy Lutomirski 	 * the new PTE value and CPU 0 observes bit 1 clear in
12269c0319aSAndy Lutomirski 	 * mm_cpumask.  (If that occurs, then the IPI will never
12369c0319aSAndy Lutomirski 	 * be sent, and CPU 0's TLB will contain a stale entry.)
12469c0319aSAndy Lutomirski 	 *
12569c0319aSAndy Lutomirski 	 * The bad outcome can occur if either CPU's load is
12669c0319aSAndy Lutomirski 	 * reordered before that CPU's store, so both CPUs must
12769c0319aSAndy Lutomirski 	 * execute full barriers to prevent this from happening.
12869c0319aSAndy Lutomirski 	 *
12969c0319aSAndy Lutomirski 	 * Thus, switch_mm needs a full barrier between the
13069c0319aSAndy Lutomirski 	 * store to mm_cpumask and any operation that could load
13169c0319aSAndy Lutomirski 	 * from next->pgd.  TLB fills are special and can happen
13269c0319aSAndy Lutomirski 	 * due to instruction fetches or for no reason at all,
13369c0319aSAndy Lutomirski 	 * and neither LOCK nor MFENCE orders them.
13469c0319aSAndy Lutomirski 	 * Fortunately, load_cr3() is serializing and gives the
13569c0319aSAndy Lutomirski 	 * ordering guarantee we need.
13669c0319aSAndy Lutomirski 	 */
13769c0319aSAndy Lutomirski 	load_cr3(next->pgd);
13869c0319aSAndy Lutomirski 
1393d28ebceSAndy Lutomirski 	/*
1403d28ebceSAndy Lutomirski 	 * This gets called via leave_mm() in the idle path where RCU
1413d28ebceSAndy Lutomirski 	 * functions differently.  Tracing normally uses RCU, so we have to
1423d28ebceSAndy Lutomirski 	 * call the tracepoint specially here.
1433d28ebceSAndy Lutomirski 	 */
1443d28ebceSAndy Lutomirski 	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
14569c0319aSAndy Lutomirski 
14669c0319aSAndy Lutomirski 	/* Stop flush ipis for the previous mm */
1473d28ebceSAndy Lutomirski 	WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
1483d28ebceSAndy Lutomirski 		     real_prev != &init_mm);
1493d28ebceSAndy Lutomirski 	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
15069c0319aSAndy Lutomirski 
15173534258SAndy Lutomirski 	/* Load per-mm CR4 and LDTR state */
15269c0319aSAndy Lutomirski 	load_mm_cr4(next);
15373534258SAndy Lutomirski 	switch_ldt(real_prev, next);
15469c0319aSAndy Lutomirski }
15569c0319aSAndy Lutomirski 
15655f4949fSIngo Molnar /*
15755f4949fSIngo Molnar  * The flush IPI assumes that a thread switch happens in this order:
15855f4949fSIngo Molnar  * [cpu0: the cpu that switches]
15955f4949fSIngo Molnar  * 1) switch_mm() either 1a) or 1b)
16055f4949fSIngo Molnar  * 1a) thread switch to a different mm
16152aec330SAlex Shi  * 1a1) set cpu_tlbstate to TLBSTATE_OK
16252aec330SAlex Shi  *	Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
16352aec330SAlex Shi  *	if cpu0 was in lazy tlb mode.
16452aec330SAlex Shi  * 1a2) update cpu active_mm
16555f4949fSIngo Molnar  *	Now cpu0 accepts tlb flushes for the new mm.
16652aec330SAlex Shi  * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
16755f4949fSIngo Molnar  *	Now the other cpus will send tlb flush ipis.
16855f4949fSIngo Molnar  * 1a4) change cr3.
16952aec330SAlex Shi  * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
17052aec330SAlex Shi  *	Stop ipi delivery for the old mm. This is not synchronized with
17152aec330SAlex Shi  *	the other cpus, but flush_tlb_func ignore flush ipis for the wrong
17252aec330SAlex Shi  *	mm, and in the worst case we perform a superfluous tlb flush.
17355f4949fSIngo Molnar  * 1b) thread switch without mm change
17452aec330SAlex Shi  *	cpu active_mm is correct, cpu0 already handles flush ipis.
17552aec330SAlex Shi  * 1b1) set cpu_tlbstate to TLBSTATE_OK
17655f4949fSIngo Molnar  * 1b2) test_and_set the cpu bit in cpu_vm_mask.
17755f4949fSIngo Molnar  *	Atomically set the bit [other cpus will start sending flush ipis],
17855f4949fSIngo Molnar  *	and test the bit.
17955f4949fSIngo Molnar  * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
18055f4949fSIngo Molnar  * 2) switch %%esp, ie current
18155f4949fSIngo Molnar  *
18255f4949fSIngo Molnar  * The interrupt must handle 2 special cases:
18355f4949fSIngo Molnar  * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
18455f4949fSIngo Molnar  * - the cpu performs speculative tlb reads, i.e. even if the cpu only
18555f4949fSIngo Molnar  *   runs in kernel space, the cpu could load tlb entries for user space
18655f4949fSIngo Molnar  *   pages.
18755f4949fSIngo Molnar  *
18852aec330SAlex Shi  * The good news is that cpu_tlbstate is local to each cpu, no
18955f4949fSIngo Molnar  * write/read ordering problems.
19055f4949fSIngo Molnar  */
19155f4949fSIngo Molnar 
192454bbad9SAndy Lutomirski static void flush_tlb_func_common(const struct flush_tlb_info *f,
193454bbad9SAndy Lutomirski 				  bool local, enum tlb_flush_reason reason)
19455f4949fSIngo Molnar {
195*bc0d5a89SAndy Lutomirski 	/* This code cannot presently handle being reentered. */
196*bc0d5a89SAndy Lutomirski 	VM_WARN_ON(!irqs_disabled());
197*bc0d5a89SAndy Lutomirski 
198b3b90e5aSAndy Lutomirski 	if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
199b3b90e5aSAndy Lutomirski 		leave_mm(smp_processor_id());
200b3b90e5aSAndy Lutomirski 		return;
201b3b90e5aSAndy Lutomirski 	}
202b3b90e5aSAndy Lutomirski 
203a2055abeSAndy Lutomirski 	if (f->end == TLB_FLUSH_ALL) {
20455f4949fSIngo Molnar 		local_flush_tlb();
205454bbad9SAndy Lutomirski 		if (local)
206454bbad9SAndy Lutomirski 			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
207454bbad9SAndy Lutomirski 		trace_tlb_flush(reason, TLB_FLUSH_ALL);
208d17d8f9dSDave Hansen 	} else {
209e7b52ffdSAlex Shi 		unsigned long addr;
210be4ffc0dSAndy Lutomirski 		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
211a2055abeSAndy Lutomirski 		addr = f->start;
212a2055abeSAndy Lutomirski 		while (addr < f->end) {
213e7b52ffdSAlex Shi 			__flush_tlb_single(addr);
214e7b52ffdSAlex Shi 			addr += PAGE_SIZE;
215e7b52ffdSAlex Shi 		}
216454bbad9SAndy Lutomirski 		if (local)
217454bbad9SAndy Lutomirski 			count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
218454bbad9SAndy Lutomirski 		trace_tlb_flush(reason, nr_pages);
219e7b52ffdSAlex Shi 	}
22055f4949fSIngo Molnar }
22155f4949fSIngo Molnar 
222454bbad9SAndy Lutomirski static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
223454bbad9SAndy Lutomirski {
224454bbad9SAndy Lutomirski 	const struct flush_tlb_info *f = info;
225454bbad9SAndy Lutomirski 
226454bbad9SAndy Lutomirski 	flush_tlb_func_common(f, true, reason);
227454bbad9SAndy Lutomirski }
228454bbad9SAndy Lutomirski 
229454bbad9SAndy Lutomirski static void flush_tlb_func_remote(void *info)
230454bbad9SAndy Lutomirski {
231454bbad9SAndy Lutomirski 	const struct flush_tlb_info *f = info;
232454bbad9SAndy Lutomirski 
233454bbad9SAndy Lutomirski 	inc_irq_stat(irq_tlb_count);
234454bbad9SAndy Lutomirski 
2353d28ebceSAndy Lutomirski 	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
236454bbad9SAndy Lutomirski 		return;
237454bbad9SAndy Lutomirski 
238454bbad9SAndy Lutomirski 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
239454bbad9SAndy Lutomirski 	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
240454bbad9SAndy Lutomirski }
241454bbad9SAndy Lutomirski 
24255f4949fSIngo Molnar void native_flush_tlb_others(const struct cpumask *cpumask,
243a2055abeSAndy Lutomirski 			     const struct flush_tlb_info *info)
24455f4949fSIngo Molnar {
245ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
246a2055abeSAndy Lutomirski 	if (info->end == TLB_FLUSH_ALL)
24718c98243SNadav Amit 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
24818c98243SNadav Amit 	else
24918c98243SNadav Amit 		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
250a2055abeSAndy Lutomirski 				(info->end - info->start) >> PAGE_SHIFT);
25118c98243SNadav Amit 
25255f4949fSIngo Molnar 	if (is_uv_system()) {
25355f4949fSIngo Molnar 		unsigned int cpu;
25455f4949fSIngo Molnar 
25525542c64SXiao Guangrong 		cpu = smp_processor_id();
256a2055abeSAndy Lutomirski 		cpumask = uv_flush_tlb_others(cpumask, info);
25755f4949fSIngo Molnar 		if (cpumask)
258454bbad9SAndy Lutomirski 			smp_call_function_many(cpumask, flush_tlb_func_remote,
259a2055abeSAndy Lutomirski 					       (void *)info, 1);
26055f4949fSIngo Molnar 		return;
26155f4949fSIngo Molnar 	}
262454bbad9SAndy Lutomirski 	smp_call_function_many(cpumask, flush_tlb_func_remote,
263a2055abeSAndy Lutomirski 			       (void *)info, 1);
26455f4949fSIngo Molnar }
26555f4949fSIngo Molnar 
266a5102476SDave Hansen /*
267a5102476SDave Hansen  * See Documentation/x86/tlb.txt for details.  We choose 33
268a5102476SDave Hansen  * because it is large enough to cover the vast majority (at
269a5102476SDave Hansen  * least 95%) of allocations, and is small enough that we are
270a5102476SDave Hansen  * confident it will not cause too much overhead.  Each single
271a5102476SDave Hansen  * flush is about 100 ns, so this caps the maximum overhead at
272a5102476SDave Hansen  * _about_ 3,000 ns.
273a5102476SDave Hansen  *
274a5102476SDave Hansen  * This is in units of pages.
275a5102476SDave Hansen  */
27686426851SJeremiah Mahler static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
277e9f4e0a9SDave Hansen 
278611ae8e3SAlex Shi void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
279611ae8e3SAlex Shi 				unsigned long end, unsigned long vmflag)
280611ae8e3SAlex Shi {
281454bbad9SAndy Lutomirski 	int cpu;
282e7b52ffdSAlex Shi 
283454bbad9SAndy Lutomirski 	struct flush_tlb_info info = {
284454bbad9SAndy Lutomirski 		.mm = mm,
285454bbad9SAndy Lutomirski 	};
286ce27374fSAndy Lutomirski 
287454bbad9SAndy Lutomirski 	cpu = get_cpu();
28871b3c126SAndy Lutomirski 
28971b3c126SAndy Lutomirski 	/* Synchronize with switch_mm. */
29071b3c126SAndy Lutomirski 	smp_mb();
29171b3c126SAndy Lutomirski 
292454bbad9SAndy Lutomirski 	/* Should we flush just the requested range? */
293454bbad9SAndy Lutomirski 	if ((end != TLB_FLUSH_ALL) &&
294454bbad9SAndy Lutomirski 	    !(vmflag & VM_HUGETLB) &&
295454bbad9SAndy Lutomirski 	    ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
296a2055abeSAndy Lutomirski 		info.start = start;
297a2055abeSAndy Lutomirski 		info.end = end;
298454bbad9SAndy Lutomirski 	} else {
299454bbad9SAndy Lutomirski 		info.start = 0UL;
300454bbad9SAndy Lutomirski 		info.end = TLB_FLUSH_ALL;
3014995ab9cSDave Hansen 	}
302454bbad9SAndy Lutomirski 
303*bc0d5a89SAndy Lutomirski 	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
304*bc0d5a89SAndy Lutomirski 		VM_WARN_ON(irqs_disabled());
305*bc0d5a89SAndy Lutomirski 		local_irq_disable();
306454bbad9SAndy Lutomirski 		flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
307*bc0d5a89SAndy Lutomirski 		local_irq_enable();
308*bc0d5a89SAndy Lutomirski 	}
309*bc0d5a89SAndy Lutomirski 
310454bbad9SAndy Lutomirski 	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
311a2055abeSAndy Lutomirski 		flush_tlb_others(mm_cpumask(mm), &info);
312454bbad9SAndy Lutomirski 	put_cpu();
313e7b52ffdSAlex Shi }
314e7b52ffdSAlex Shi 
315a2055abeSAndy Lutomirski 
31655f4949fSIngo Molnar static void do_flush_tlb_all(void *info)
31755f4949fSIngo Molnar {
318ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
31955f4949fSIngo Molnar 	__flush_tlb_all();
320c6ae41e7SAlex Shi 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
3213f8afb77SBorislav Petkov 		leave_mm(smp_processor_id());
32255f4949fSIngo Molnar }
32355f4949fSIngo Molnar 
32455f4949fSIngo Molnar void flush_tlb_all(void)
32555f4949fSIngo Molnar {
326ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
32755f4949fSIngo Molnar 	on_each_cpu(do_flush_tlb_all, NULL, 1);
32855f4949fSIngo Molnar }
3293df3212fSAlex Shi 
330effee4b9SAlex Shi static void do_kernel_range_flush(void *info)
331effee4b9SAlex Shi {
332effee4b9SAlex Shi 	struct flush_tlb_info *f = info;
333effee4b9SAlex Shi 	unsigned long addr;
334effee4b9SAlex Shi 
335effee4b9SAlex Shi 	/* flush range by one by one 'invlpg' */
336a2055abeSAndy Lutomirski 	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
337effee4b9SAlex Shi 		__flush_tlb_single(addr);
338effee4b9SAlex Shi }
339effee4b9SAlex Shi 
340effee4b9SAlex Shi void flush_tlb_kernel_range(unsigned long start, unsigned long end)
341effee4b9SAlex Shi {
342effee4b9SAlex Shi 
343effee4b9SAlex Shi 	/* Balance as user space task's flush, a bit conservative */
344e9f4e0a9SDave Hansen 	if (end == TLB_FLUSH_ALL ||
345be4ffc0dSAndy Lutomirski 	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
346effee4b9SAlex Shi 		on_each_cpu(do_flush_tlb_all, NULL, 1);
347e9f4e0a9SDave Hansen 	} else {
348e9f4e0a9SDave Hansen 		struct flush_tlb_info info;
349a2055abeSAndy Lutomirski 		info.start = start;
350a2055abeSAndy Lutomirski 		info.end = end;
351effee4b9SAlex Shi 		on_each_cpu(do_kernel_range_flush, &info, 1);
352effee4b9SAlex Shi 	}
353effee4b9SAlex Shi }
3542d040a1cSDave Hansen 
355e73ad5ffSAndy Lutomirski void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
356e73ad5ffSAndy Lutomirski {
357a2055abeSAndy Lutomirski 	struct flush_tlb_info info = {
358a2055abeSAndy Lutomirski 		.mm = NULL,
359a2055abeSAndy Lutomirski 		.start = 0UL,
360a2055abeSAndy Lutomirski 		.end = TLB_FLUSH_ALL,
361a2055abeSAndy Lutomirski 	};
362a2055abeSAndy Lutomirski 
363e73ad5ffSAndy Lutomirski 	int cpu = get_cpu();
364e73ad5ffSAndy Lutomirski 
365*bc0d5a89SAndy Lutomirski 	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
366*bc0d5a89SAndy Lutomirski 		VM_WARN_ON(irqs_disabled());
367*bc0d5a89SAndy Lutomirski 		local_irq_disable();
3683f79e4c7SAndy Lutomirski 		flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
369*bc0d5a89SAndy Lutomirski 		local_irq_enable();
370*bc0d5a89SAndy Lutomirski 	}
371*bc0d5a89SAndy Lutomirski 
372e73ad5ffSAndy Lutomirski 	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
373a2055abeSAndy Lutomirski 		flush_tlb_others(&batch->cpumask, &info);
374e73ad5ffSAndy Lutomirski 	cpumask_clear(&batch->cpumask);
375e73ad5ffSAndy Lutomirski 
376e73ad5ffSAndy Lutomirski 	put_cpu();
377e73ad5ffSAndy Lutomirski }
378e73ad5ffSAndy Lutomirski 
3792d040a1cSDave Hansen static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
3802d040a1cSDave Hansen 			     size_t count, loff_t *ppos)
3812d040a1cSDave Hansen {
3822d040a1cSDave Hansen 	char buf[32];
3832d040a1cSDave Hansen 	unsigned int len;
3842d040a1cSDave Hansen 
3852d040a1cSDave Hansen 	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
3862d040a1cSDave Hansen 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
3872d040a1cSDave Hansen }
3882d040a1cSDave Hansen 
3892d040a1cSDave Hansen static ssize_t tlbflush_write_file(struct file *file,
3902d040a1cSDave Hansen 		 const char __user *user_buf, size_t count, loff_t *ppos)
3912d040a1cSDave Hansen {
3922d040a1cSDave Hansen 	char buf[32];
3932d040a1cSDave Hansen 	ssize_t len;
3942d040a1cSDave Hansen 	int ceiling;
3952d040a1cSDave Hansen 
3962d040a1cSDave Hansen 	len = min(count, sizeof(buf) - 1);
3972d040a1cSDave Hansen 	if (copy_from_user(buf, user_buf, len))
3982d040a1cSDave Hansen 		return -EFAULT;
3992d040a1cSDave Hansen 
4002d040a1cSDave Hansen 	buf[len] = '\0';
4012d040a1cSDave Hansen 	if (kstrtoint(buf, 0, &ceiling))
4022d040a1cSDave Hansen 		return -EINVAL;
4032d040a1cSDave Hansen 
4042d040a1cSDave Hansen 	if (ceiling < 0)
4052d040a1cSDave Hansen 		return -EINVAL;
4062d040a1cSDave Hansen 
4072d040a1cSDave Hansen 	tlb_single_page_flush_ceiling = ceiling;
4082d040a1cSDave Hansen 	return count;
4092d040a1cSDave Hansen }
4102d040a1cSDave Hansen 
4112d040a1cSDave Hansen static const struct file_operations fops_tlbflush = {
4122d040a1cSDave Hansen 	.read = tlbflush_read_file,
4132d040a1cSDave Hansen 	.write = tlbflush_write_file,
4142d040a1cSDave Hansen 	.llseek = default_llseek,
4152d040a1cSDave Hansen };
4162d040a1cSDave Hansen 
4172d040a1cSDave Hansen static int __init create_tlb_single_page_flush_ceiling(void)
4182d040a1cSDave Hansen {
4192d040a1cSDave Hansen 	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
4202d040a1cSDave Hansen 			    arch_debugfs_dir, NULL, &fops_tlbflush);
4212d040a1cSDave Hansen 	return 0;
4222d040a1cSDave Hansen }
4232d040a1cSDave Hansen late_initcall(create_tlb_single_page_flush_ceiling);
424