176b04384SDavid Woodhouse /* SPDX-License-Identifier: GPL-2.0 */
276b04384SDavid Woodhouse 
37a32fc51SBorislav Petkov #ifndef _ASM_X86_NOSPEC_BRANCH_H_
47a32fc51SBorislav Petkov #define _ASM_X86_NOSPEC_BRANCH_H_
576b04384SDavid Woodhouse 
6fa1202efSThomas Gleixner #include <linux/static_key.h>
700089c04SJulien Thierry #include <linux/objtool.h>
86fda8a38SPeter Zijlstra #include <linux/linkage.h>
9fa1202efSThomas Gleixner 
1076b04384SDavid Woodhouse #include <asm/alternative.h>
1176b04384SDavid Woodhouse #include <asm/cpufeatures.h>
12ea00f301SPeter Zijlstra #include <asm/msr-index.h>
13089dd8e5SPeter Zijlstra #include <asm/unwind_hints.h>
14db886979SNathan Chancellor #include <asm/percpu.h>
155d821386SThomas Gleixner #include <asm/current.h>
1676b04384SDavid Woodhouse 
175d821386SThomas Gleixner /*
185d821386SThomas Gleixner  * Call depth tracking for Intel SKL CPUs to address the RSB underflow
195d821386SThomas Gleixner  * issue in software.
205d821386SThomas Gleixner  *
215d821386SThomas Gleixner  * The tracking does not use a counter. It uses uses arithmetic shift
225d821386SThomas Gleixner  * right on call entry and logical shift left on return.
235d821386SThomas Gleixner  *
245d821386SThomas Gleixner  * The depth tracking variable is initialized to 0x8000.... when the call
255d821386SThomas Gleixner  * depth is zero. The arithmetic shift right sign extends the MSB and
265d821386SThomas Gleixner  * saturates after the 12th call. The shift count is 5 for both directions
275d821386SThomas Gleixner  * so the tracking covers 12 nested calls.
285d821386SThomas Gleixner  *
295d821386SThomas Gleixner  *  Call
305d821386SThomas Gleixner  *  0: 0x8000000000000000	0x0000000000000000
315d821386SThomas Gleixner  *  1: 0xfc00000000000000	0xf000000000000000
325d821386SThomas Gleixner  * ...
335d821386SThomas Gleixner  * 11: 0xfffffffffffffff8	0xfffffffffffffc00
345d821386SThomas Gleixner  * 12: 0xffffffffffffffff	0xffffffffffffffe0
355d821386SThomas Gleixner  *
365d821386SThomas Gleixner  * After a return buffer fill the depth is credited 12 calls before the
375d821386SThomas Gleixner  * next stuffing has to take place.
385d821386SThomas Gleixner  *
395d821386SThomas Gleixner  * There is a inaccuracy for situations like this:
405d821386SThomas Gleixner  *
415d821386SThomas Gleixner  *  10 calls
425d821386SThomas Gleixner  *   5 returns
435d821386SThomas Gleixner  *   3 calls
445d821386SThomas Gleixner  *   4 returns
455d821386SThomas Gleixner  *   3 calls
465d821386SThomas Gleixner  *   ....
475d821386SThomas Gleixner  *
485d821386SThomas Gleixner  * The shift count might cause this to be off by one in either direction,
495d821386SThomas Gleixner  * but there is still a cushion vs. the RSB depth. The algorithm does not
505d821386SThomas Gleixner  * claim to be perfect and it can be speculated around by the CPU, but it
515d821386SThomas Gleixner  * is considered that it obfuscates the problem enough to make exploitation
525d821386SThomas Gleixner  * extremly difficult.
535d821386SThomas Gleixner  */
545d821386SThomas Gleixner #define RET_DEPTH_SHIFT			5
555d821386SThomas Gleixner #define RSB_RET_STUFF_LOOPS		16
565d821386SThomas Gleixner #define RET_DEPTH_INIT			0x8000000000000000ULL
575d821386SThomas Gleixner #define RET_DEPTH_INIT_FROM_CALL	0xfc00000000000000ULL
585d821386SThomas Gleixner #define RET_DEPTH_CREDIT		0xffffffffffffffffULL
595d821386SThomas Gleixner 
60f5c1bb2aSThomas Gleixner #ifdef CONFIG_CALL_THUNKS_DEBUG
61f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_CALLS				\
62f5c1bb2aSThomas Gleixner 	incq	%gs:__x86_call_count;
63f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_RETS				\
64f5c1bb2aSThomas Gleixner 	incq	%gs:__x86_ret_count;
65f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_STUFFS				\
66f5c1bb2aSThomas Gleixner 	incq	%gs:__x86_stuffs_count;
67f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_CTXSW				\
68f5c1bb2aSThomas Gleixner 	incq	%gs:__x86_ctxsw_count;
69f5c1bb2aSThomas Gleixner #else
70f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_CALLS
71f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_RETS
72f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_STUFFS
73f5c1bb2aSThomas Gleixner # define CALL_THUNKS_DEBUG_INC_CTXSW
74f5c1bb2aSThomas Gleixner #endif
75f5c1bb2aSThomas Gleixner 
765d821386SThomas Gleixner #if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
775d821386SThomas Gleixner 
785d821386SThomas Gleixner #include <asm/asm-offsets.h>
795d821386SThomas Gleixner 
805d821386SThomas Gleixner #define CREDIT_CALL_DEPTH					\
815d821386SThomas Gleixner 	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
825d821386SThomas Gleixner 
835d821386SThomas Gleixner #define ASM_CREDIT_CALL_DEPTH					\
845d821386SThomas Gleixner 	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
855d821386SThomas Gleixner 
865d821386SThomas Gleixner #define RESET_CALL_DEPTH					\
873496d1c6SPeter Zijlstra 	xor	%eax, %eax;					\
883496d1c6SPeter Zijlstra 	bts	$63, %rax;					\
895d821386SThomas Gleixner 	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
905d821386SThomas Gleixner 
915d821386SThomas Gleixner #define RESET_CALL_DEPTH_FROM_CALL				\
923496d1c6SPeter Zijlstra 	movb	$0xfc, %al;					\
935d821386SThomas Gleixner 	shl	$56, %rax;					\
94f5c1bb2aSThomas Gleixner 	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
95f5c1bb2aSThomas Gleixner 	CALL_THUNKS_DEBUG_INC_CALLS
965d821386SThomas Gleixner 
975d821386SThomas Gleixner #define INCREMENT_CALL_DEPTH					\
98f5c1bb2aSThomas Gleixner 	sarq	$5, %gs:pcpu_hot + X86_call_depth;		\
99f5c1bb2aSThomas Gleixner 	CALL_THUNKS_DEBUG_INC_CALLS
1005d821386SThomas Gleixner 
1015d821386SThomas Gleixner #define ASM_INCREMENT_CALL_DEPTH				\
102f5c1bb2aSThomas Gleixner 	sarq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
103f5c1bb2aSThomas Gleixner 	CALL_THUNKS_DEBUG_INC_CALLS
1045d821386SThomas Gleixner 
1055d821386SThomas Gleixner #else
1065d821386SThomas Gleixner #define CREDIT_CALL_DEPTH
107f5c1bb2aSThomas Gleixner #define ASM_CREDIT_CALL_DEPTH
1085d821386SThomas Gleixner #define RESET_CALL_DEPTH
1095d821386SThomas Gleixner #define INCREMENT_CALL_DEPTH
110f5c1bb2aSThomas Gleixner #define ASM_INCREMENT_CALL_DEPTH
1115d821386SThomas Gleixner #define RESET_CALL_DEPTH_FROM_CALL
1125d821386SThomas Gleixner #endif
1131a6f7442SPeter Zijlstra 
114d1c99108SDavid Woodhouse /*
115d1c99108SDavid Woodhouse  * Fill the CPU return stack buffer.
116d1c99108SDavid Woodhouse  *
117d1c99108SDavid Woodhouse  * Each entry in the RSB, if used for a speculative 'ret', contains an
118d1c99108SDavid Woodhouse  * infinite 'pause; lfence; jmp' loop to capture speculative execution.
119d1c99108SDavid Woodhouse  *
120d1c99108SDavid Woodhouse  * This is required in various cases for retpoline and IBRS-based
121d1c99108SDavid Woodhouse  * mitigations for the Spectre variant 2 vulnerability. Sometimes to
122d1c99108SDavid Woodhouse  * eliminate potentially bogus entries from the RSB, and sometimes
123d1c99108SDavid Woodhouse  * purely to ensure that it doesn't get empty, which on some CPUs would
124d1c99108SDavid Woodhouse  * allow predictions from other (unwanted!) sources to be used.
125d1c99108SDavid Woodhouse  *
126d1c99108SDavid Woodhouse  * We define a CPP macro such that it can be used from both .S files and
127d1c99108SDavid Woodhouse  * inline assembly. It's possible to do a .macro and then include that
128d1c99108SDavid Woodhouse  * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
129d1c99108SDavid Woodhouse  */
130d1c99108SDavid Woodhouse 
1315d821386SThomas Gleixner #define RETPOLINE_THUNK_SIZE	32
132d1c99108SDavid Woodhouse #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
133d1c99108SDavid Woodhouse 
134d1c99108SDavid Woodhouse /*
1354e3aa923SPeter Zijlstra  * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
1364e3aa923SPeter Zijlstra  */
1374e3aa923SPeter Zijlstra #define __FILL_RETURN_SLOT			\
1384e3aa923SPeter Zijlstra 	ANNOTATE_INTRA_FUNCTION_CALL;		\
1394e3aa923SPeter Zijlstra 	call	772f;				\
1404e3aa923SPeter Zijlstra 	int3;					\
1414e3aa923SPeter Zijlstra 772:
1424e3aa923SPeter Zijlstra 
1434e3aa923SPeter Zijlstra /*
1444e3aa923SPeter Zijlstra  * Stuff the entire RSB.
1454e3aa923SPeter Zijlstra  *
146d1c99108SDavid Woodhouse  * Google experimented with loop-unrolling and this turned out to be
147c681df88SIngo Molnar  * the optimal version - two calls, each with their own speculation
148d1c99108SDavid Woodhouse  * trap should their return address end up getting used, in a loop.
149d1c99108SDavid Woodhouse  */
15033292497SPeter Zijlstra #ifdef CONFIG_X86_64
1514e3aa923SPeter Zijlstra #define __FILL_RETURN_BUFFER(reg, nr)			\
152d1c99108SDavid Woodhouse 	mov	$(nr/2), reg;				\
153d1c99108SDavid Woodhouse 771:							\
1544e3aa923SPeter Zijlstra 	__FILL_RETURN_SLOT				\
1554e3aa923SPeter Zijlstra 	__FILL_RETURN_SLOT				\
1564e3aa923SPeter Zijlstra 	add	$(BITS_PER_LONG/8) * 2, %_ASM_SP;	\
157d1c99108SDavid Woodhouse 	dec	reg;					\
158ba6e31afSPawan Gupta 	jnz	771b;					\
159ba6e31afSPawan Gupta 	/* barrier for jnz misprediction */		\
1605d821386SThomas Gleixner 	lfence;						\
161f5c1bb2aSThomas Gleixner 	ASM_CREDIT_CALL_DEPTH				\
162f5c1bb2aSThomas Gleixner 	CALL_THUNKS_DEBUG_INC_CTXSW
16333292497SPeter Zijlstra #else
16433292497SPeter Zijlstra /*
16533292497SPeter Zijlstra  * i386 doesn't unconditionally have LFENCE, as such it can't
16633292497SPeter Zijlstra  * do a loop.
16733292497SPeter Zijlstra  */
16833292497SPeter Zijlstra #define __FILL_RETURN_BUFFER(reg, nr)			\
16933292497SPeter Zijlstra 	.rept nr;					\
17033292497SPeter Zijlstra 	__FILL_RETURN_SLOT;				\
17133292497SPeter Zijlstra 	.endr;						\
17233292497SPeter Zijlstra 	add	$(BITS_PER_LONG/8) * nr, %_ASM_SP;
17333292497SPeter Zijlstra #endif
174d1c99108SDavid Woodhouse 
1754e3aa923SPeter Zijlstra /*
1764e3aa923SPeter Zijlstra  * Stuff a single RSB slot.
1774e3aa923SPeter Zijlstra  *
1784e3aa923SPeter Zijlstra  * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
1794e3aa923SPeter Zijlstra  * forced to retire before letting a RET instruction execute.
1804e3aa923SPeter Zijlstra  *
1814e3aa923SPeter Zijlstra  * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
1824e3aa923SPeter Zijlstra  * before this point.
1834e3aa923SPeter Zijlstra  */
1844e3aa923SPeter Zijlstra #define __FILL_ONE_RETURN				\
1854e3aa923SPeter Zijlstra 	__FILL_RETURN_SLOT				\
1864e3aa923SPeter Zijlstra 	add	$(BITS_PER_LONG/8), %_ASM_SP;		\
1874e3aa923SPeter Zijlstra 	lfence;
1884e3aa923SPeter Zijlstra 
18976b04384SDavid Woodhouse #ifdef __ASSEMBLY__
19076b04384SDavid Woodhouse 
19176b04384SDavid Woodhouse /*
1929e0e3c51SPeter Zijlstra  * This should be used immediately before an indirect jump/call. It tells
1939e0e3c51SPeter Zijlstra  * objtool the subsequent indirect jump/call is vouched safe for retpoline
1949e0e3c51SPeter Zijlstra  * builds.
1959e0e3c51SPeter Zijlstra  */
1969e0e3c51SPeter Zijlstra .macro ANNOTATE_RETPOLINE_SAFE
1971c0c1fafSJosh Poimboeuf .Lhere_\@:
1989e0e3c51SPeter Zijlstra 	.pushsection .discard.retpoline_safe
19906f61af8SFangrui Song 	.long .Lhere_\@
2009e0e3c51SPeter Zijlstra 	.popsection
2019e0e3c51SPeter Zijlstra .endm
2029e0e3c51SPeter Zijlstra 
2039e0e3c51SPeter Zijlstra /*
2049bb2ec60SPeter Zijlstra  * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
2059bb2ec60SPeter Zijlstra  * vs RETBleed validation.
2069bb2ec60SPeter Zijlstra  */
2079bb2ec60SPeter Zijlstra #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
2089bb2ec60SPeter Zijlstra 
2099bb2ec60SPeter Zijlstra /*
210a09a6e23SPeter Zijlstra  * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
211a09a6e23SPeter Zijlstra  * eventually turn into it's own annotation.
212a09a6e23SPeter Zijlstra  */
2134708ea14SJosh Poimboeuf .macro VALIDATE_UNRET_END
214fb3bd914SBorislav Petkov (AMD) #if defined(CONFIG_NOINSTR_VALIDATION) && \
215fb3bd914SBorislav Petkov (AMD) 	(defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
216a09a6e23SPeter Zijlstra 	ANNOTATE_RETPOLINE_SAFE
217a09a6e23SPeter Zijlstra 	nop
218a09a6e23SPeter Zijlstra #endif
219a09a6e23SPeter Zijlstra .endm
220a09a6e23SPeter Zijlstra 
221a09a6e23SPeter Zijlstra /*
22209d09531SPeter Zijlstra  * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
22309d09531SPeter Zijlstra  * to the retpoline thunk with a CS prefix when the register requires
22409d09531SPeter Zijlstra  * a RAX prefix byte to encode. Also see apply_retpolines().
22509d09531SPeter Zijlstra  */
22609d09531SPeter Zijlstra .macro __CS_PREFIX reg:req
22709d09531SPeter Zijlstra 	.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
22809d09531SPeter Zijlstra 	.ifc \reg,\rs
22909d09531SPeter Zijlstra 	.byte 0x2e
23009d09531SPeter Zijlstra 	.endif
23109d09531SPeter Zijlstra 	.endr
23209d09531SPeter Zijlstra .endm
23309d09531SPeter Zijlstra 
23409d09531SPeter Zijlstra /*
23576b04384SDavid Woodhouse  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
23676b04384SDavid Woodhouse  * indirect jmp/call which may be susceptible to the Spectre variant 2
23776b04384SDavid Woodhouse  * attack.
2380479a42dSPeter Zijlstra  *
2390479a42dSPeter Zijlstra  * NOTE: these do not take kCFI into account and are thus not comparable to C
2400479a42dSPeter Zijlstra  * indirect calls, take care when using. The target of these should be an ENDBR
2410479a42dSPeter Zijlstra  * instruction irrespective of kCFI.
24276b04384SDavid Woodhouse  */
24376b04384SDavid Woodhouse .macro JMP_NOSPEC reg:req
24476b04384SDavid Woodhouse #ifdef CONFIG_RETPOLINE
24509d09531SPeter Zijlstra 	__CS_PREFIX \reg
24609d09531SPeter Zijlstra 	jmp	__x86_indirect_thunk_\reg
24776b04384SDavid Woodhouse #else
24834fdce69SPeter Zijlstra 	jmp	*%\reg
24909d09531SPeter Zijlstra 	int3
25076b04384SDavid Woodhouse #endif
25176b04384SDavid Woodhouse .endm
25276b04384SDavid Woodhouse 
25376b04384SDavid Woodhouse .macro CALL_NOSPEC reg:req
25476b04384SDavid Woodhouse #ifdef CONFIG_RETPOLINE
25509d09531SPeter Zijlstra 	__CS_PREFIX \reg
25609d09531SPeter Zijlstra 	call	__x86_indirect_thunk_\reg
25776b04384SDavid Woodhouse #else
25834fdce69SPeter Zijlstra 	call	*%\reg
25976b04384SDavid Woodhouse #endif
26076b04384SDavid Woodhouse .endm
26176b04384SDavid Woodhouse 
262d1c99108SDavid Woodhouse  /*
263d1c99108SDavid Woodhouse   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
264d1c99108SDavid Woodhouse   * monstrosity above, manually.
265d1c99108SDavid Woodhouse   */
2664e3aa923SPeter Zijlstra .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
2674e3aa923SPeter Zijlstra 	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
2684e3aa923SPeter Zijlstra 		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
2696ea17e84SPeter Zijlstra 		__stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
2704e3aa923SPeter Zijlstra 
271d1c99108SDavid Woodhouse .Lskip_rsb_\@:
272117cc7a9SDavid Woodhouse .endm
273117cc7a9SDavid Woodhouse 
274e40f32f1SBorislav Petkov (AMD) /*
275e40f32f1SBorislav Petkov (AMD)  * The CALL to srso_alias_untrain_ret() must be patched in directly at
276e40f32f1SBorislav Petkov (AMD)  * the spot where untraining must be done, ie., srso_alias_untrain_ret()
277e40f32f1SBorislav Petkov (AMD)  * must be the target of a CALL instruction instead of indirectly
278e40f32f1SBorislav Petkov (AMD)  * jumping to a wrapper which then calls it. Therefore, this macro is
279e40f32f1SBorislav Petkov (AMD)  * called outside of __UNTRAIN_RET below, for the time being, before the
280e40f32f1SBorislav Petkov (AMD)  * kernel can support nested alternatives with arbitrary nesting.
281e40f32f1SBorislav Petkov (AMD)  */
282e40f32f1SBorislav Petkov (AMD) .macro CALL_UNTRAIN_RET
283626ea25eSJosh Poimboeuf #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
284e40f32f1SBorislav Petkov (AMD) 	ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
285e40f32f1SBorislav Petkov (AMD) 		          "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
286f43b9876SPeter Zijlstra #endif
287e40f32f1SBorislav Petkov (AMD) .endm
288f43b9876SPeter Zijlstra 
289a149180fSPeter Zijlstra /*
290a149180fSPeter Zijlstra  * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
291a149180fSPeter Zijlstra  * return thunk isn't mapped into the userspace tables (then again, AMD
292a149180fSPeter Zijlstra  * typically has NO_MELTDOWN).
293a149180fSPeter Zijlstra  *
294d025b7baSPeter Zijlstra  * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
2953ebc1700SPeter Zijlstra  * entry_ibpb() will clobber AX, CX, DX.
296a149180fSPeter Zijlstra  *
297a149180fSPeter Zijlstra  * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
298a149180fSPeter Zijlstra  * where we have a stack but before any RET instruction.
299a149180fSPeter Zijlstra  */
30093eae88eSJosh Poimboeuf .macro __UNTRAIN_RET ibpb_feature, call_depth_insns
301820a3626SJosh Poimboeuf #if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
3024708ea14SJosh Poimboeuf 	VALIDATE_UNRET_END
303e40f32f1SBorislav Petkov (AMD) 	CALL_UNTRAIN_RET
304e40f32f1SBorislav Petkov (AMD) 	ALTERNATIVE_2 "",						\
30593eae88eSJosh Poimboeuf 		      "call entry_ibpb", \ibpb_feature,			\
30693eae88eSJosh Poimboeuf 		     __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
3075d821386SThomas Gleixner #endif
3085d821386SThomas Gleixner .endm
3095d821386SThomas Gleixner 
31093eae88eSJosh Poimboeuf #define UNTRAIN_RET \
31193eae88eSJosh Poimboeuf 	__UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
312864bcaa3SPeter Zijlstra 
31393eae88eSJosh Poimboeuf #define UNTRAIN_RET_VM \
31493eae88eSJosh Poimboeuf 	__UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
31593eae88eSJosh Poimboeuf 
31693eae88eSJosh Poimboeuf #define UNTRAIN_RET_FROM_CALL \
31793eae88eSJosh Poimboeuf 	__UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
3185d821386SThomas Gleixner 
3195d821386SThomas Gleixner 
3205d821386SThomas Gleixner .macro CALL_DEPTH_ACCOUNT
3215d821386SThomas Gleixner #ifdef CONFIG_CALL_DEPTH_TRACKING
3225d821386SThomas Gleixner 	ALTERNATIVE "",							\
3235d821386SThomas Gleixner 		    __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
324a149180fSPeter Zijlstra #endif
325a149180fSPeter Zijlstra .endm
326a149180fSPeter Zijlstra 
32748985d64SPawan Gupta /*
32848985d64SPawan Gupta  * Macro to execute VERW instruction that mitigate transient data sampling
32948985d64SPawan Gupta  * attacks such as MDS. On affected systems a microcode update overloaded VERW
33048985d64SPawan Gupta  * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
33148985d64SPawan Gupta  *
33248985d64SPawan Gupta  * Note: Only the memory operand variant of VERW clears the CPU buffers.
33348985d64SPawan Gupta  */
33448985d64SPawan Gupta .macro CLEAR_CPU_BUFFERS
33570977e7dSDave Hansen 	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
33648985d64SPawan Gupta .endm
33748985d64SPawan Gupta 
338eb36b0dcSPawan Gupta #ifdef CONFIG_X86_64
339eb36b0dcSPawan Gupta .macro CLEAR_BRANCH_HISTORY
340eb36b0dcSPawan Gupta 	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
341eb36b0dcSPawan Gupta .endm
342*1c42ff89SPawan Gupta 
343*1c42ff89SPawan Gupta .macro CLEAR_BRANCH_HISTORY_VMEXIT
344*1c42ff89SPawan Gupta 	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
345*1c42ff89SPawan Gupta .endm
346eb36b0dcSPawan Gupta #else
347eb36b0dcSPawan Gupta #define CLEAR_BRANCH_HISTORY
348*1c42ff89SPawan Gupta #define CLEAR_BRANCH_HISTORY_VMEXIT
349eb36b0dcSPawan Gupta #endif
350eb36b0dcSPawan Gupta 
35176b04384SDavid Woodhouse #else /* __ASSEMBLY__ */
35276b04384SDavid Woodhouse 
3539e0e3c51SPeter Zijlstra #define ANNOTATE_RETPOLINE_SAFE					\
3549e0e3c51SPeter Zijlstra 	"999:\n\t"						\
3559e0e3c51SPeter Zijlstra 	".pushsection .discard.retpoline_safe\n\t"		\
35606f61af8SFangrui Song 	".long 999b\n\t"					\
3579e0e3c51SPeter Zijlstra 	".popsection\n\t"
3589e0e3c51SPeter Zijlstra 
3591a6f7442SPeter Zijlstra typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
360369ae6ffSPeter Zijlstra extern retpoline_thunk_t __x86_indirect_thunk_array[];
3613b6c1747SPeter Zijlstra extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
3623b6c1747SPeter Zijlstra extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
363369ae6ffSPeter Zijlstra 
364095b8303SPeter Zijlstra #ifdef CONFIG_RETHUNK
3650b53c374SPeter Zijlstra extern void __x86_return_thunk(void);
366095b8303SPeter Zijlstra #else
__x86_return_thunk(void)367095b8303SPeter Zijlstra static inline void __x86_return_thunk(void) {}
368095b8303SPeter Zijlstra #endif
369095b8303SPeter Zijlstra 
370820a3626SJosh Poimboeuf #ifdef CONFIG_CPU_UNRET_ENTRY
371820a3626SJosh Poimboeuf extern void retbleed_return_thunk(void);
372820a3626SJosh Poimboeuf #else
retbleed_return_thunk(void)373820a3626SJosh Poimboeuf static inline void retbleed_return_thunk(void) {}
374820a3626SJosh Poimboeuf #endif
375820a3626SJosh Poimboeuf 
376e40f32f1SBorislav Petkov (AMD) extern void srso_alias_untrain_ret(void);
377e40f32f1SBorislav Petkov (AMD) 
378820a3626SJosh Poimboeuf #ifdef CONFIG_CPU_SRSO
379820a3626SJosh Poimboeuf extern void srso_return_thunk(void);
380820a3626SJosh Poimboeuf extern void srso_alias_return_thunk(void);
381820a3626SJosh Poimboeuf #else
srso_return_thunk(void)382820a3626SJosh Poimboeuf static inline void srso_return_thunk(void) {}
srso_alias_return_thunk(void)383820a3626SJosh Poimboeuf static inline void srso_alias_return_thunk(void) {}
384820a3626SJosh Poimboeuf #endif
385820a3626SJosh Poimboeuf 
386d025b7baSPeter Zijlstra extern void retbleed_return_thunk(void);
387d43490d0SPeter Zijlstra extern void srso_return_thunk(void);
388d43490d0SPeter Zijlstra extern void srso_alias_return_thunk(void);
389d43490d0SPeter Zijlstra 
390d025b7baSPeter Zijlstra extern void retbleed_untrain_ret(void);
391fb3bd914SBorislav Petkov (AMD) extern void srso_untrain_ret(void);
39242be649dSPeter Zijlstra extern void srso_alias_untrain_ret(void);
393d43490d0SPeter Zijlstra 
394e7c25c44SPeter Zijlstra extern void entry_untrain_ret(void);
3953ebc1700SPeter Zijlstra extern void entry_ibpb(void);
3960b53c374SPeter Zijlstra 
397eb36b0dcSPawan Gupta #ifdef CONFIG_X86_64
398eb36b0dcSPawan Gupta extern void clear_bhb_loop(void);
399eb36b0dcSPawan Gupta #endif
400eb36b0dcSPawan Gupta 
401770ae1b7SPeter Zijlstra extern void (*x86_return_thunk)(void);
402770ae1b7SPeter Zijlstra 
4035d821386SThomas Gleixner #ifdef CONFIG_CALL_DEPTH_TRACKING
4045d821386SThomas Gleixner extern void __x86_return_skl(void);
4055d821386SThomas Gleixner 
x86_set_skl_return_thunk(void)4065d821386SThomas Gleixner static inline void x86_set_skl_return_thunk(void)
4075d821386SThomas Gleixner {
4085d821386SThomas Gleixner 	x86_return_thunk = &__x86_return_skl;
4095d821386SThomas Gleixner }
410ee3e2469SPeter Zijlstra 
411ee3e2469SPeter Zijlstra #define CALL_DEPTH_ACCOUNT					\
412ee3e2469SPeter Zijlstra 	ALTERNATIVE("",						\
413ee3e2469SPeter Zijlstra 		    __stringify(INCREMENT_CALL_DEPTH),		\
414ee3e2469SPeter Zijlstra 		    X86_FEATURE_CALL_DEPTH)
415ee3e2469SPeter Zijlstra 
416f5c1bb2aSThomas Gleixner #ifdef CONFIG_CALL_THUNKS_DEBUG
417f5c1bb2aSThomas Gleixner DECLARE_PER_CPU(u64, __x86_call_count);
418f5c1bb2aSThomas Gleixner DECLARE_PER_CPU(u64, __x86_ret_count);
419f5c1bb2aSThomas Gleixner DECLARE_PER_CPU(u64, __x86_stuffs_count);
420f5c1bb2aSThomas Gleixner DECLARE_PER_CPU(u64, __x86_ctxsw_count);
421f5c1bb2aSThomas Gleixner #endif
4225d821386SThomas Gleixner #else
x86_set_skl_return_thunk(void)4235d821386SThomas Gleixner static inline void x86_set_skl_return_thunk(void) {}
424ee3e2469SPeter Zijlstra 
425ee3e2469SPeter Zijlstra #define CALL_DEPTH_ACCOUNT ""
426ee3e2469SPeter Zijlstra 
4275d821386SThomas Gleixner #endif
4285d821386SThomas Gleixner 
429369ae6ffSPeter Zijlstra #ifdef CONFIG_RETPOLINE
4301a6f7442SPeter Zijlstra 
4316fda8a38SPeter Zijlstra #define GEN(reg) \
4321a6f7442SPeter Zijlstra 	extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
4336fda8a38SPeter Zijlstra #include <asm/GEN-for-each-reg.h>
4346fda8a38SPeter Zijlstra #undef GEN
4356fda8a38SPeter Zijlstra 
4363b6c1747SPeter Zijlstra #define GEN(reg)						\
4373b6c1747SPeter Zijlstra 	extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
4383b6c1747SPeter Zijlstra #include <asm/GEN-for-each-reg.h>
4393b6c1747SPeter Zijlstra #undef GEN
4403b6c1747SPeter Zijlstra 
4413b6c1747SPeter Zijlstra #define GEN(reg)						\
4423b6c1747SPeter Zijlstra 	extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
4433b6c1747SPeter Zijlstra #include <asm/GEN-for-each-reg.h>
4443b6c1747SPeter Zijlstra #undef GEN
4453b6c1747SPeter Zijlstra 
4464cd24de3SZhenzhong Duan #ifdef CONFIG_X86_64
44776b04384SDavid Woodhouse 
44876b04384SDavid Woodhouse /*
4494cd24de3SZhenzhong Duan  * Inline asm uses the %V modifier which is only in newer GCC
4504cd24de3SZhenzhong Duan  * which is ensured when CONFIG_RETPOLINE is defined.
45176b04384SDavid Woodhouse  */
45276b04384SDavid Woodhouse # define CALL_NOSPEC						\
4530cbb76d6SZhenzhong Duan 	ALTERNATIVE_2(						\
4549e0e3c51SPeter Zijlstra 	ANNOTATE_RETPOLINE_SAFE					\
45576b04384SDavid Woodhouse 	"call *%[thunk_target]\n",				\
45611925185SPeter Zijlstra 	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
4570cbb76d6SZhenzhong Duan 	X86_FEATURE_RETPOLINE,					\
4580cbb76d6SZhenzhong Duan 	"lfence;\n"						\
4590cbb76d6SZhenzhong Duan 	ANNOTATE_RETPOLINE_SAFE					\
4600cbb76d6SZhenzhong Duan 	"call *%[thunk_target]\n",				\
461d45476d9SPeter Zijlstra (Intel) 	X86_FEATURE_RETPOLINE_LFENCE)
462cc1ac9c7SPeter Zijlstra 
46376b04384SDavid Woodhouse # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
46476b04384SDavid Woodhouse 
4654cd24de3SZhenzhong Duan #else /* CONFIG_X86_32 */
46676b04384SDavid Woodhouse /*
46776b04384SDavid Woodhouse  * For i386 we use the original ret-equivalent retpoline, because
46876b04384SDavid Woodhouse  * otherwise we'll run out of registers. We don't care about CET
46976b04384SDavid Woodhouse  * here, anyway.
47076b04384SDavid Woodhouse  */
471a14bff13SAndy Whitcroft # define CALL_NOSPEC						\
4720cbb76d6SZhenzhong Duan 	ALTERNATIVE_2(						\
473a14bff13SAndy Whitcroft 	ANNOTATE_RETPOLINE_SAFE					\
474a14bff13SAndy Whitcroft 	"call *%[thunk_target]\n",				\
47576b04384SDavid Woodhouse 	"       jmp    904f;\n"					\
47676b04384SDavid Woodhouse 	"       .align 16\n"					\
47776b04384SDavid Woodhouse 	"901:	call   903f;\n"					\
47876b04384SDavid Woodhouse 	"902:	pause;\n"					\
47928d437d5STom Lendacky 	"    	lfence;\n"					\
48076b04384SDavid Woodhouse 	"       jmp    902b;\n"					\
48176b04384SDavid Woodhouse 	"       .align 16\n"					\
482b63f20a7SSean Christopherson 	"903:	lea    4(%%esp), %%esp;\n"			\
48376b04384SDavid Woodhouse 	"       pushl  %[thunk_target];\n"			\
48476b04384SDavid Woodhouse 	"       ret;\n"						\
48576b04384SDavid Woodhouse 	"       .align 16\n"					\
48676b04384SDavid Woodhouse 	"904:	call   901b;\n",				\
4870cbb76d6SZhenzhong Duan 	X86_FEATURE_RETPOLINE,					\
4880cbb76d6SZhenzhong Duan 	"lfence;\n"						\
4890cbb76d6SZhenzhong Duan 	ANNOTATE_RETPOLINE_SAFE					\
4900cbb76d6SZhenzhong Duan 	"call *%[thunk_target]\n",				\
491d45476d9SPeter Zijlstra (Intel) 	X86_FEATURE_RETPOLINE_LFENCE)
49276b04384SDavid Woodhouse 
49376b04384SDavid Woodhouse # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
4944cd24de3SZhenzhong Duan #endif
495117cc7a9SDavid Woodhouse #else /* No retpoline for C / inline asm */
49676b04384SDavid Woodhouse # define CALL_NOSPEC "call *%[thunk_target]\n"
49776b04384SDavid Woodhouse # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
49876b04384SDavid Woodhouse #endif
49976b04384SDavid Woodhouse 
500da285121SDavid Woodhouse /* The Spectre V2 mitigation variants */
501da285121SDavid Woodhouse enum spectre_v2_mitigation {
502da285121SDavid Woodhouse 	SPECTRE_V2_NONE,
503d45476d9SPeter Zijlstra (Intel) 	SPECTRE_V2_RETPOLINE,
504d45476d9SPeter Zijlstra (Intel) 	SPECTRE_V2_LFENCE,
5051e19da85SPeter Zijlstra 	SPECTRE_V2_EIBRS,
5061e19da85SPeter Zijlstra 	SPECTRE_V2_EIBRS_RETPOLINE,
5071e19da85SPeter Zijlstra 	SPECTRE_V2_EIBRS_LFENCE,
5087c693f54SPawan Gupta 	SPECTRE_V2_IBRS,
509da285121SDavid Woodhouse };
510da285121SDavid Woodhouse 
511fa1202efSThomas Gleixner /* The indirect branch speculation control variants */
512fa1202efSThomas Gleixner enum spectre_v2_user_mitigation {
513fa1202efSThomas Gleixner 	SPECTRE_V2_USER_NONE,
514fa1202efSThomas Gleixner 	SPECTRE_V2_USER_STRICT,
51520c3a2c3SThomas Lendacky 	SPECTRE_V2_USER_STRICT_PREFERRED,
5169137bb27SThomas Gleixner 	SPECTRE_V2_USER_PRCTL,
5176b3e64c2SThomas Gleixner 	SPECTRE_V2_USER_SECCOMP,
518fa1202efSThomas Gleixner };
519fa1202efSThomas Gleixner 
52024f7fc83SKonrad Rzeszutek Wilk /* The Speculative Store Bypass disable variants */
52124f7fc83SKonrad Rzeszutek Wilk enum ssb_mitigation {
52224f7fc83SKonrad Rzeszutek Wilk 	SPEC_STORE_BYPASS_NONE,
52324f7fc83SKonrad Rzeszutek Wilk 	SPEC_STORE_BYPASS_DISABLE,
524a73ec77eSThomas Gleixner 	SPEC_STORE_BYPASS_PRCTL,
525f21b53b2SKees Cook 	SPEC_STORE_BYPASS_SECCOMP,
52624f7fc83SKonrad Rzeszutek Wilk };
52724f7fc83SKonrad Rzeszutek Wilk 
5281aa7a573SLinus Torvalds static __always_inline
alternative_msr_write(unsigned int msr,u64 val,unsigned int feature)5291aa7a573SLinus Torvalds void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
5301aa7a573SLinus Torvalds {
5311aa7a573SLinus Torvalds 	asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
5321aa7a573SLinus Torvalds 		: : "c" (msr),
5335f2b745fSJim Mattson 		    "a" ((u32)val),
5345f2b745fSJim Mattson 		    "d" ((u32)(val >> 32)),
5351aa7a573SLinus Torvalds 		    [feature] "i" (feature)
5361aa7a573SLinus Torvalds 		: "memory");
5371aa7a573SLinus Torvalds }
538dd84441aSDavid Woodhouse 
5391b5277c0SBorislav Petkov (AMD) extern u64 x86_pred_cmd;
5401b5277c0SBorislav Petkov (AMD) 
indirect_branch_prediction_barrier(void)54120ffa1caSDavid Woodhouse static inline void indirect_branch_prediction_barrier(void)
54220ffa1caSDavid Woodhouse {
5431b5277c0SBorislav Petkov (AMD) 	alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
54420ffa1caSDavid Woodhouse }
54520ffa1caSDavid Woodhouse 
546fa8ac498SThomas Gleixner /* The Intel SPEC CTRL MSR base value cache */
547fa8ac498SThomas Gleixner extern u64 x86_spec_ctrl_base;
548db886979SNathan Chancellor DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
54966065157SPawan Gupta extern void update_spec_ctrl_cond(u64 val);
550bf5835bcSPeter Zijlstra extern u64 spec_ctrl_current(void);
551fa8ac498SThomas Gleixner 
552dd84441aSDavid Woodhouse /*
553dd84441aSDavid Woodhouse  * With retpoline, we must use IBRS to restrict branch prediction
554dd84441aSDavid Woodhouse  * before calling into firmware.
555d72f4e29SIngo Molnar  *
556d72f4e29SIngo Molnar  * (Implemented as CPP macros due to header hell.)
557dd84441aSDavid Woodhouse  */
558d72f4e29SIngo Molnar #define firmware_restrict_branch_speculation_start()			\
559d72f4e29SIngo Molnar do {									\
560d72f4e29SIngo Molnar 	preempt_disable();						\
561e6aa1362SJosh Poimboeuf 	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
562e6aa1362SJosh Poimboeuf 			      spec_ctrl_current() | SPEC_CTRL_IBRS,	\
563d72f4e29SIngo Molnar 			      X86_FEATURE_USE_IBRS_FW);			\
56428a99e95SPeter Zijlstra 	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,		\
56528a99e95SPeter Zijlstra 			      X86_FEATURE_USE_IBPB_FW);			\
566d72f4e29SIngo Molnar } while (0)
567dd84441aSDavid Woodhouse 
568d72f4e29SIngo Molnar #define firmware_restrict_branch_speculation_end()			\
569d72f4e29SIngo Molnar do {									\
570e6aa1362SJosh Poimboeuf 	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
571e6aa1362SJosh Poimboeuf 			      spec_ctrl_current(),			\
572d72f4e29SIngo Molnar 			      X86_FEATURE_USE_IBRS_FW);			\
573d72f4e29SIngo Molnar 	preempt_enable();						\
574d72f4e29SIngo Molnar } while (0)
57576b04384SDavid Woodhouse 
576fa1202efSThomas Gleixner DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
5774c71a2b6SThomas Gleixner DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
5784c71a2b6SThomas Gleixner DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
579fa1202efSThomas Gleixner 
58007f07f55SThomas Gleixner DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
58104dcbdb8SThomas Gleixner 
582b5f06f64SBalbir Singh DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
583b5f06f64SBalbir Singh 
5848cb861e9SPawan Gupta DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
5858cb861e9SPawan Gupta 
58648985d64SPawan Gupta extern u16 mds_verw_sel;
58748985d64SPawan Gupta 
5886a9e5292SThomas Gleixner #include <asm/segment.h>
5896a9e5292SThomas Gleixner 
5906a9e5292SThomas Gleixner /**
5911b42f017SPawan Gupta  * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
5926a9e5292SThomas Gleixner  *
5936a9e5292SThomas Gleixner  * This uses the otherwise unused and obsolete VERW instruction in
5946a9e5292SThomas Gleixner  * combination with microcode which triggers a CPU buffer flush when the
5956a9e5292SThomas Gleixner  * instruction is executed.
5966a9e5292SThomas Gleixner  */
mds_clear_cpu_buffers(void)597a7ef9ba9SThomas Gleixner static __always_inline void mds_clear_cpu_buffers(void)
5986a9e5292SThomas Gleixner {
5996a9e5292SThomas Gleixner 	static const u16 ds = __KERNEL_DS;
6006a9e5292SThomas Gleixner 
6016a9e5292SThomas Gleixner 	/*
6026a9e5292SThomas Gleixner 	 * Has to be the memory-operand variant because only that
6036a9e5292SThomas Gleixner 	 * guarantees the CPU buffer flush functionality according to
6046a9e5292SThomas Gleixner 	 * documentation. The register-operand variant does not.
6056a9e5292SThomas Gleixner 	 * Works with any segment selector, but a valid writable
6066a9e5292SThomas Gleixner 	 * data segment is the fastest variant.
6076a9e5292SThomas Gleixner 	 *
6086a9e5292SThomas Gleixner 	 * "cc" clobber is required because VERW modifies ZF.
6096a9e5292SThomas Gleixner 	 */
6106a9e5292SThomas Gleixner 	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
6116a9e5292SThomas Gleixner }
6126a9e5292SThomas Gleixner 
61304dcbdb8SThomas Gleixner /**
61407f07f55SThomas Gleixner  * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
61507f07f55SThomas Gleixner  *
61607f07f55SThomas Gleixner  * Clear CPU buffers if the corresponding static key is enabled
61707f07f55SThomas Gleixner  */
mds_idle_clear_cpu_buffers(void)61810fdb38cSPeter Zijlstra static __always_inline void mds_idle_clear_cpu_buffers(void)
61907f07f55SThomas Gleixner {
62007f07f55SThomas Gleixner 	if (static_branch_likely(&mds_idle_clear))
62107f07f55SThomas Gleixner 		mds_clear_cpu_buffers();
62207f07f55SThomas Gleixner }
62307f07f55SThomas Gleixner 
62476b04384SDavid Woodhouse #endif /* __ASSEMBLY__ */
625a493a87fSDaniel Borkmann 
6267a32fc51SBorislav Petkov #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
627