xref: /openbmc/linux/arch/x86/include/asm/tlbflush.h (revision b0579ade)
11965aae3SH. Peter Anvin #ifndef _ASM_X86_TLBFLUSH_H
21965aae3SH. Peter Anvin #define _ASM_X86_TLBFLUSH_H
3bb898558SAl Viro 
4bb898558SAl Viro #include <linux/mm.h>
5bb898558SAl Viro #include <linux/sched.h>
6bb898558SAl Viro 
7bb898558SAl Viro #include <asm/processor.h>
8cd4d09ecSBorislav Petkov #include <asm/cpufeature.h>
9f05e798aSDavid Howells #include <asm/special_insns.h>
10ce4a4e56SAndy Lutomirski #include <asm/smp.h>
11bb898558SAl Viro 
12060a402aSAndy Lutomirski static inline void __invpcid(unsigned long pcid, unsigned long addr,
13060a402aSAndy Lutomirski 			     unsigned long type)
14060a402aSAndy Lutomirski {
15e2c7698cSBorislav Petkov 	struct { u64 d[2]; } desc = { { pcid, addr } };
16060a402aSAndy Lutomirski 
17060a402aSAndy Lutomirski 	/*
18060a402aSAndy Lutomirski 	 * The memory clobber is because the whole point is to invalidate
19060a402aSAndy Lutomirski 	 * stale TLB entries and, especially if we're flushing global
20060a402aSAndy Lutomirski 	 * mappings, we don't want the compiler to reorder any subsequent
21060a402aSAndy Lutomirski 	 * memory accesses before the TLB flush.
22060a402aSAndy Lutomirski 	 *
23060a402aSAndy Lutomirski 	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
24060a402aSAndy Lutomirski 	 * invpcid (%rcx), %rax in long mode.
25060a402aSAndy Lutomirski 	 */
26060a402aSAndy Lutomirski 	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
27e2c7698cSBorislav Petkov 		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
28060a402aSAndy Lutomirski }
29060a402aSAndy Lutomirski 
30060a402aSAndy Lutomirski #define INVPCID_TYPE_INDIV_ADDR		0
31060a402aSAndy Lutomirski #define INVPCID_TYPE_SINGLE_CTXT	1
32060a402aSAndy Lutomirski #define INVPCID_TYPE_ALL_INCL_GLOBAL	2
33060a402aSAndy Lutomirski #define INVPCID_TYPE_ALL_NON_GLOBAL	3
34060a402aSAndy Lutomirski 
35060a402aSAndy Lutomirski /* Flush all mappings for a given pcid and addr, not including globals. */
36060a402aSAndy Lutomirski static inline void invpcid_flush_one(unsigned long pcid,
37060a402aSAndy Lutomirski 				     unsigned long addr)
38060a402aSAndy Lutomirski {
39060a402aSAndy Lutomirski 	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
40060a402aSAndy Lutomirski }
41060a402aSAndy Lutomirski 
42060a402aSAndy Lutomirski /* Flush all mappings for a given PCID, not including globals. */
43060a402aSAndy Lutomirski static inline void invpcid_flush_single_context(unsigned long pcid)
44060a402aSAndy Lutomirski {
45060a402aSAndy Lutomirski 	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
46060a402aSAndy Lutomirski }
47060a402aSAndy Lutomirski 
48060a402aSAndy Lutomirski /* Flush all mappings, including globals, for all PCIDs. */
49060a402aSAndy Lutomirski static inline void invpcid_flush_all(void)
50060a402aSAndy Lutomirski {
51060a402aSAndy Lutomirski 	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
52060a402aSAndy Lutomirski }
53060a402aSAndy Lutomirski 
54060a402aSAndy Lutomirski /* Flush all mappings for all PCIDs except globals. */
55060a402aSAndy Lutomirski static inline void invpcid_flush_all_nonglobals(void)
56060a402aSAndy Lutomirski {
57060a402aSAndy Lutomirski 	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
58060a402aSAndy Lutomirski }
59060a402aSAndy Lutomirski 
60f39681edSAndy Lutomirski static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
61f39681edSAndy Lutomirski {
62f39681edSAndy Lutomirski 	u64 new_tlb_gen;
63f39681edSAndy Lutomirski 
64f39681edSAndy Lutomirski 	/*
65f39681edSAndy Lutomirski 	 * Bump the generation count.  This also serves as a full barrier
66f39681edSAndy Lutomirski 	 * that synchronizes with switch_mm(): callers are required to order
67f39681edSAndy Lutomirski 	 * their read of mm_cpumask after their writes to the paging
68f39681edSAndy Lutomirski 	 * structures.
69f39681edSAndy Lutomirski 	 */
70f39681edSAndy Lutomirski 	smp_mb__before_atomic();
71f39681edSAndy Lutomirski 	new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
72f39681edSAndy Lutomirski 	smp_mb__after_atomic();
73f39681edSAndy Lutomirski 
74f39681edSAndy Lutomirski 	return new_tlb_gen;
75f39681edSAndy Lutomirski }
76f39681edSAndy Lutomirski 
77bb898558SAl Viro #ifdef CONFIG_PARAVIRT
78bb898558SAl Viro #include <asm/paravirt.h>
79bb898558SAl Viro #else
80bb898558SAl Viro #define __flush_tlb() __native_flush_tlb()
81bb898558SAl Viro #define __flush_tlb_global() __native_flush_tlb_global()
82bb898558SAl Viro #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
83bb898558SAl Viro #endif
84bb898558SAl Viro 
85b0579adeSAndy Lutomirski struct tlb_context {
86b0579adeSAndy Lutomirski 	u64 ctx_id;
87b0579adeSAndy Lutomirski 	u64 tlb_gen;
88b0579adeSAndy Lutomirski };
89b0579adeSAndy Lutomirski 
901e02ce4cSAndy Lutomirski struct tlb_state {
913d28ebceSAndy Lutomirski 	/*
923d28ebceSAndy Lutomirski 	 * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
933d28ebceSAndy Lutomirski 	 * are on.  This means that it may not match current->active_mm,
943d28ebceSAndy Lutomirski 	 * which will contain the previous user mm when we're in lazy TLB
953d28ebceSAndy Lutomirski 	 * mode even if we've already switched back to swapper_pg_dir.
963d28ebceSAndy Lutomirski 	 */
973d28ebceSAndy Lutomirski 	struct mm_struct *loaded_mm;
981e02ce4cSAndy Lutomirski 	int state;
991e02ce4cSAndy Lutomirski 
1001e02ce4cSAndy Lutomirski 	/*
1011e02ce4cSAndy Lutomirski 	 * Access to this CR4 shadow and to H/W CR4 is protected by
1021e02ce4cSAndy Lutomirski 	 * disabling interrupts when modifying either one.
1031e02ce4cSAndy Lutomirski 	 */
1041e02ce4cSAndy Lutomirski 	unsigned long cr4;
105b0579adeSAndy Lutomirski 
106b0579adeSAndy Lutomirski 	/*
107b0579adeSAndy Lutomirski 	 * This is a list of all contexts that might exist in the TLB.
108b0579adeSAndy Lutomirski 	 * Since we don't yet use PCID, there is only one context.
109b0579adeSAndy Lutomirski 	 *
110b0579adeSAndy Lutomirski 	 * For each context, ctx_id indicates which mm the TLB's user
111b0579adeSAndy Lutomirski 	 * entries came from.  As an invariant, the TLB will never
112b0579adeSAndy Lutomirski 	 * contain entries that are out-of-date as when that mm reached
113b0579adeSAndy Lutomirski 	 * the tlb_gen in the list.
114b0579adeSAndy Lutomirski 	 *
115b0579adeSAndy Lutomirski 	 * To be clear, this means that it's legal for the TLB code to
116b0579adeSAndy Lutomirski 	 * flush the TLB without updating tlb_gen.  This can happen
117b0579adeSAndy Lutomirski 	 * (for now, at least) due to paravirt remote flushes.
118b0579adeSAndy Lutomirski 	 */
119b0579adeSAndy Lutomirski 	struct tlb_context ctxs[1];
1201e02ce4cSAndy Lutomirski };
1211e02ce4cSAndy Lutomirski DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
1221e02ce4cSAndy Lutomirski 
1231e02ce4cSAndy Lutomirski /* Initialize cr4 shadow for this CPU. */
1241e02ce4cSAndy Lutomirski static inline void cr4_init_shadow(void)
1251e02ce4cSAndy Lutomirski {
1261ef55be1SAndy Lutomirski 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
1271e02ce4cSAndy Lutomirski }
1281e02ce4cSAndy Lutomirski 
129375074ccSAndy Lutomirski /* Set in this cpu's CR4. */
130375074ccSAndy Lutomirski static inline void cr4_set_bits(unsigned long mask)
131375074ccSAndy Lutomirski {
132375074ccSAndy Lutomirski 	unsigned long cr4;
133375074ccSAndy Lutomirski 
1341e02ce4cSAndy Lutomirski 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
1351e02ce4cSAndy Lutomirski 	if ((cr4 | mask) != cr4) {
136375074ccSAndy Lutomirski 		cr4 |= mask;
1371e02ce4cSAndy Lutomirski 		this_cpu_write(cpu_tlbstate.cr4, cr4);
1381e02ce4cSAndy Lutomirski 		__write_cr4(cr4);
1391e02ce4cSAndy Lutomirski 	}
140375074ccSAndy Lutomirski }
141375074ccSAndy Lutomirski 
142375074ccSAndy Lutomirski /* Clear in this cpu's CR4. */
143375074ccSAndy Lutomirski static inline void cr4_clear_bits(unsigned long mask)
144375074ccSAndy Lutomirski {
145375074ccSAndy Lutomirski 	unsigned long cr4;
146375074ccSAndy Lutomirski 
1471e02ce4cSAndy Lutomirski 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
1481e02ce4cSAndy Lutomirski 	if ((cr4 & ~mask) != cr4) {
149375074ccSAndy Lutomirski 		cr4 &= ~mask;
1501e02ce4cSAndy Lutomirski 		this_cpu_write(cpu_tlbstate.cr4, cr4);
1511e02ce4cSAndy Lutomirski 		__write_cr4(cr4);
1521e02ce4cSAndy Lutomirski 	}
1531e02ce4cSAndy Lutomirski }
1541e02ce4cSAndy Lutomirski 
1555a920155SThomas Gleixner static inline void cr4_toggle_bits(unsigned long mask)
1565a920155SThomas Gleixner {
1575a920155SThomas Gleixner 	unsigned long cr4;
1585a920155SThomas Gleixner 
1595a920155SThomas Gleixner 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
1605a920155SThomas Gleixner 	cr4 ^= mask;
1615a920155SThomas Gleixner 	this_cpu_write(cpu_tlbstate.cr4, cr4);
1625a920155SThomas Gleixner 	__write_cr4(cr4);
1635a920155SThomas Gleixner }
1645a920155SThomas Gleixner 
1651e02ce4cSAndy Lutomirski /* Read the CR4 shadow. */
1661e02ce4cSAndy Lutomirski static inline unsigned long cr4_read_shadow(void)
1671e02ce4cSAndy Lutomirski {
1681e02ce4cSAndy Lutomirski 	return this_cpu_read(cpu_tlbstate.cr4);
169375074ccSAndy Lutomirski }
170375074ccSAndy Lutomirski 
171375074ccSAndy Lutomirski /*
172375074ccSAndy Lutomirski  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
173375074ccSAndy Lutomirski  * enable and PPro Global page enable), so that any CPU's that boot
174375074ccSAndy Lutomirski  * up after us can get the correct flags.  This should only be used
175375074ccSAndy Lutomirski  * during boot on the boot cpu.
176375074ccSAndy Lutomirski  */
177375074ccSAndy Lutomirski extern unsigned long mmu_cr4_features;
178375074ccSAndy Lutomirski extern u32 *trampoline_cr4_features;
179375074ccSAndy Lutomirski 
180375074ccSAndy Lutomirski static inline void cr4_set_bits_and_update_boot(unsigned long mask)
181375074ccSAndy Lutomirski {
182375074ccSAndy Lutomirski 	mmu_cr4_features |= mask;
183375074ccSAndy Lutomirski 	if (trampoline_cr4_features)
184375074ccSAndy Lutomirski 		*trampoline_cr4_features = mmu_cr4_features;
185375074ccSAndy Lutomirski 	cr4_set_bits(mask);
186375074ccSAndy Lutomirski }
187375074ccSAndy Lutomirski 
188bb898558SAl Viro static inline void __native_flush_tlb(void)
189bb898558SAl Viro {
1905cf0791dSSebastian Andrzej Siewior 	/*
1915cf0791dSSebastian Andrzej Siewior 	 * If current->mm == NULL then we borrow a mm which may change during a
1925cf0791dSSebastian Andrzej Siewior 	 * task switch and therefore we must not be preempted while we write CR3
1935cf0791dSSebastian Andrzej Siewior 	 * back:
1945cf0791dSSebastian Andrzej Siewior 	 */
1955cf0791dSSebastian Andrzej Siewior 	preempt_disable();
1966c690ee1SAndy Lutomirski 	native_write_cr3(__native_read_cr3());
1975cf0791dSSebastian Andrzej Siewior 	preempt_enable();
198bb898558SAl Viro }
199bb898558SAl Viro 
200086fc8f8SFenghua Yu static inline void __native_flush_tlb_global_irq_disabled(void)
201086fc8f8SFenghua Yu {
202086fc8f8SFenghua Yu 	unsigned long cr4;
203086fc8f8SFenghua Yu 
2041e02ce4cSAndy Lutomirski 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
205086fc8f8SFenghua Yu 	/* clear PGE */
206086fc8f8SFenghua Yu 	native_write_cr4(cr4 & ~X86_CR4_PGE);
207086fc8f8SFenghua Yu 	/* write old PGE again and flush TLBs */
208086fc8f8SFenghua Yu 	native_write_cr4(cr4);
209086fc8f8SFenghua Yu }
210086fc8f8SFenghua Yu 
211bb898558SAl Viro static inline void __native_flush_tlb_global(void)
212bb898558SAl Viro {
213bb898558SAl Viro 	unsigned long flags;
214bb898558SAl Viro 
215d8bced79SAndy Lutomirski 	if (static_cpu_has(X86_FEATURE_INVPCID)) {
216d8bced79SAndy Lutomirski 		/*
217d8bced79SAndy Lutomirski 		 * Using INVPCID is considerably faster than a pair of writes
218d8bced79SAndy Lutomirski 		 * to CR4 sandwiched inside an IRQ flag save/restore.
219d8bced79SAndy Lutomirski 		 */
220d8bced79SAndy Lutomirski 		invpcid_flush_all();
221d8bced79SAndy Lutomirski 		return;
222d8bced79SAndy Lutomirski 	}
223d8bced79SAndy Lutomirski 
224bb898558SAl Viro 	/*
225bb898558SAl Viro 	 * Read-modify-write to CR4 - protect it from preemption and
226bb898558SAl Viro 	 * from interrupts. (Use the raw variant because this code can
227bb898558SAl Viro 	 * be called from deep inside debugging code.)
228bb898558SAl Viro 	 */
229bb898558SAl Viro 	raw_local_irq_save(flags);
230bb898558SAl Viro 
231086fc8f8SFenghua Yu 	__native_flush_tlb_global_irq_disabled();
232bb898558SAl Viro 
233bb898558SAl Viro 	raw_local_irq_restore(flags);
234bb898558SAl Viro }
235bb898558SAl Viro 
236bb898558SAl Viro static inline void __native_flush_tlb_single(unsigned long addr)
237bb898558SAl Viro {
238bb898558SAl Viro 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
239bb898558SAl Viro }
240bb898558SAl Viro 
241bb898558SAl Viro static inline void __flush_tlb_all(void)
242bb898558SAl Viro {
2432c4ea6e2SDaniel Borkmann 	if (boot_cpu_has(X86_FEATURE_PGE))
244bb898558SAl Viro 		__flush_tlb_global();
245bb898558SAl Viro 	else
246bb898558SAl Viro 		__flush_tlb();
247bb898558SAl Viro }
248bb898558SAl Viro 
249bb898558SAl Viro static inline void __flush_tlb_one(unsigned long addr)
250bb898558SAl Viro {
251ec659934SMel Gorman 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
252bb898558SAl Viro 	__flush_tlb_single(addr);
253bb898558SAl Viro }
254bb898558SAl Viro 
2553e7f3db0SAlex Shi #define TLB_FLUSH_ALL	-1UL
256bb898558SAl Viro 
257bb898558SAl Viro /*
258bb898558SAl Viro  * TLB flushing:
259bb898558SAl Viro  *
260bb898558SAl Viro  *  - flush_tlb_all() flushes all processes TLBs
261bb898558SAl Viro  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
262bb898558SAl Viro  *  - flush_tlb_page(vma, vmaddr) flushes one page
263bb898558SAl Viro  *  - flush_tlb_range(vma, start, end) flushes a range of pages
264bb898558SAl Viro  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
265a2055abeSAndy Lutomirski  *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
266bb898558SAl Viro  *
267bb898558SAl Viro  * ..but the i386 has somewhat limited tlb flushing capabilities,
268bb898558SAl Viro  * and page-granular flushes are available only on i486 and up.
269bb898558SAl Viro  */
270a2055abeSAndy Lutomirski struct flush_tlb_info {
271b0579adeSAndy Lutomirski 	/*
272b0579adeSAndy Lutomirski 	 * We support several kinds of flushes.
273b0579adeSAndy Lutomirski 	 *
274b0579adeSAndy Lutomirski 	 * - Fully flush a single mm.  .mm will be set, .end will be
275b0579adeSAndy Lutomirski 	 *   TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
276b0579adeSAndy Lutomirski 	 *   which the IPI sender is trying to catch us up.
277b0579adeSAndy Lutomirski 	 *
278b0579adeSAndy Lutomirski 	 * - Partially flush a single mm.  .mm will be set, .start and
279b0579adeSAndy Lutomirski 	 *   .end will indicate the range, and .new_tlb_gen will be set
280b0579adeSAndy Lutomirski 	 *   such that the changes between generation .new_tlb_gen-1 and
281b0579adeSAndy Lutomirski 	 *   .new_tlb_gen are entirely contained in the indicated range.
282b0579adeSAndy Lutomirski 	 *
283b0579adeSAndy Lutomirski 	 * - Fully flush all mms whose tlb_gens have been updated.  .mm
284b0579adeSAndy Lutomirski 	 *   will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
285b0579adeSAndy Lutomirski 	 *   will be zero.
286b0579adeSAndy Lutomirski 	 */
287a2055abeSAndy Lutomirski 	struct mm_struct	*mm;
288a2055abeSAndy Lutomirski 	unsigned long		start;
289a2055abeSAndy Lutomirski 	unsigned long		end;
290b0579adeSAndy Lutomirski 	u64			new_tlb_gen;
291a2055abeSAndy Lutomirski };
292a2055abeSAndy Lutomirski 
293bb898558SAl Viro #define local_flush_tlb() __flush_tlb()
294bb898558SAl Viro 
295611ae8e3SAlex Shi #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
296611ae8e3SAlex Shi 
297611ae8e3SAlex Shi #define flush_tlb_range(vma, start, end)	\
298611ae8e3SAlex Shi 		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
299611ae8e3SAlex Shi 
300bb898558SAl Viro extern void flush_tlb_all(void);
301611ae8e3SAlex Shi extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
302611ae8e3SAlex Shi 				unsigned long end, unsigned long vmflag);
303effee4b9SAlex Shi extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
304bb898558SAl Viro 
305ca6c99c0SAndy Lutomirski static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
306ca6c99c0SAndy Lutomirski {
307ca6c99c0SAndy Lutomirski 	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
308ca6c99c0SAndy Lutomirski }
309ca6c99c0SAndy Lutomirski 
3104595f962SRusty Russell void native_flush_tlb_others(const struct cpumask *cpumask,
311a2055abeSAndy Lutomirski 			     const struct flush_tlb_info *info);
312bb898558SAl Viro 
313bb898558SAl Viro #define TLBSTATE_OK	1
314bb898558SAl Viro #define TLBSTATE_LAZY	2
315bb898558SAl Viro 
316e73ad5ffSAndy Lutomirski static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
317e73ad5ffSAndy Lutomirski 					struct mm_struct *mm)
318e73ad5ffSAndy Lutomirski {
319f39681edSAndy Lutomirski 	inc_mm_tlb_gen(mm);
320e73ad5ffSAndy Lutomirski 	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
321e73ad5ffSAndy Lutomirski }
322e73ad5ffSAndy Lutomirski 
323e73ad5ffSAndy Lutomirski extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
324e73ad5ffSAndy Lutomirski 
325bb898558SAl Viro #ifndef CONFIG_PARAVIRT
326a2055abeSAndy Lutomirski #define flush_tlb_others(mask, info)	\
327a2055abeSAndy Lutomirski 	native_flush_tlb_others(mask, info)
328bb898558SAl Viro #endif
329bb898558SAl Viro 
3301965aae3SH. Peter Anvin #endif /* _ASM_X86_TLBFLUSH_H */
331