xref: /openbmc/linux/arch/x86/lib/retpoline.S (revision af023ef3)
176b04384SDavid Woodhouse/* SPDX-License-Identifier: GPL-2.0 */
276b04384SDavid Woodhouse
376b04384SDavid Woodhouse#include <linux/stringify.h>
476b04384SDavid Woodhouse#include <linux/linkage.h>
576b04384SDavid Woodhouse#include <asm/dwarf2.h>
676b04384SDavid Woodhouse#include <asm/cpufeatures.h>
75e21a3ecSJuergen Gross#include <asm/alternative.h>
85d821386SThomas Gleixner#include <asm/asm-offsets.h>
976b04384SDavid Woodhouse#include <asm/export.h>
1076b04384SDavid Woodhouse#include <asm/nospec-branch.h>
11cc1ac9c7SPeter Zijlstra#include <asm/unwind_hints.h>
125d821386SThomas Gleixner#include <asm/percpu.h>
13cc1ac9c7SPeter Zijlstra#include <asm/frame.h>
14fb3bd914SBorislav Petkov (AMD)#include <asm/nops.h>
1576b04384SDavid Woodhouse
1679cd2a11SPetr Pavlu	.section .text..__x86.indirect_thunk
179bc0bb50SPeter Zijlstra
183b6c1747SPeter Zijlstra
193b6c1747SPeter Zijlstra.macro POLINE reg
20cc1ac9c7SPeter Zijlstra	ANNOTATE_INTRA_FUNCTION_CALL
21cc1ac9c7SPeter Zijlstra	call    .Ldo_rop_\@
223b6c1747SPeter Zijlstra	int3
23cc1ac9c7SPeter Zijlstra.Ldo_rop_\@:
24cc1ac9c7SPeter Zijlstra	mov     %\reg, (%_ASM_SP)
25b735bd3eSJosh Poimboeuf	UNWIND_HINT_FUNC
263b6c1747SPeter Zijlstra.endm
273b6c1747SPeter Zijlstra
283b6c1747SPeter Zijlstra.macro RETPOLINE reg
293b6c1747SPeter Zijlstra	POLINE \reg
30f94909ceSPeter Zijlstra	RET
3111925185SPeter Zijlstra.endm
3211925185SPeter Zijlstra
3311925185SPeter Zijlstra.macro THUNK reg
3411925185SPeter Zijlstra
351a6f7442SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
361a6f7442SPeter ZijlstraSYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
37fb799447SJosh Poimboeuf	UNWIND_HINT_UNDEFINED
381c0513deSJosh Poimboeuf	ANNOTATE_NOENDBR
3911925185SPeter Zijlstra
4000e15333SPeter Zijlstra	ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
4100e15333SPeter Zijlstra		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
4200e15333SPeter Zijlstra		      __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
4311925185SPeter Zijlstra
4476b04384SDavid Woodhouse.endm
4576b04384SDavid Woodhouse
4676b04384SDavid Woodhouse/*
4776b04384SDavid Woodhouse * Despite being an assembler file we can't just use .irp here
4876b04384SDavid Woodhouse * because __KSYM_DEPS__ only uses the C preprocessor and would
4976b04384SDavid Woodhouse * only see one instance of "__x86_indirect_thunk_\reg" rather
5076b04384SDavid Woodhouse * than one per register with the correct names. So we do it
5176b04384SDavid Woodhouse * the simple and nasty way...
52ca3f0d80SPeter Zijlstra *
53ca3f0d80SPeter Zijlstra * Worse, you can only have a single EXPORT_SYMBOL per line,
54ca3f0d80SPeter Zijlstra * and CPP can't insert newlines, so we have to repeat everything
55ca3f0d80SPeter Zijlstra * at least twice.
5676b04384SDavid Woodhouse */
57ca3f0d80SPeter Zijlstra
58c1804a23SMasami Hiramatsu#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
5976b04384SDavid Woodhouse
601a6f7442SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
611a6f7442SPeter ZijlstraSYM_CODE_START(__x86_indirect_thunk_array)
621a6f7442SPeter Zijlstra
63ca3f0d80SPeter Zijlstra#define GEN(reg) THUNK reg
64ca3f0d80SPeter Zijlstra#include <asm/GEN-for-each-reg.h>
65ca3f0d80SPeter Zijlstra#undef GEN
66b6d3d994SPeter Zijlstra
671a6f7442SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
681a6f7442SPeter ZijlstraSYM_CODE_END(__x86_indirect_thunk_array)
691a6f7442SPeter Zijlstra
703b6c1747SPeter Zijlstra#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
71ca3f0d80SPeter Zijlstra#include <asm/GEN-for-each-reg.h>
72b6d3d994SPeter Zijlstra#undef GEN
730b53c374SPeter Zijlstra
743b6c1747SPeter Zijlstra#ifdef CONFIG_CALL_DEPTH_TRACKING
753b6c1747SPeter Zijlstra.macro CALL_THUNK reg
763b6c1747SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
773b6c1747SPeter Zijlstra
783b6c1747SPeter ZijlstraSYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
79fb799447SJosh Poimboeuf	UNWIND_HINT_UNDEFINED
803b6c1747SPeter Zijlstra	ANNOTATE_NOENDBR
813b6c1747SPeter Zijlstra
823b6c1747SPeter Zijlstra	CALL_DEPTH_ACCOUNT
833b6c1747SPeter Zijlstra	POLINE \reg
843b6c1747SPeter Zijlstra	ANNOTATE_UNRET_SAFE
853b6c1747SPeter Zijlstra	ret
863b6c1747SPeter Zijlstra	int3
873b6c1747SPeter Zijlstra.endm
883b6c1747SPeter Zijlstra
893b6c1747SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
903b6c1747SPeter ZijlstraSYM_CODE_START(__x86_indirect_call_thunk_array)
913b6c1747SPeter Zijlstra
923b6c1747SPeter Zijlstra#define GEN(reg) CALL_THUNK reg
933b6c1747SPeter Zijlstra#include <asm/GEN-for-each-reg.h>
943b6c1747SPeter Zijlstra#undef GEN
953b6c1747SPeter Zijlstra
963b6c1747SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
973b6c1747SPeter ZijlstraSYM_CODE_END(__x86_indirect_call_thunk_array)
983b6c1747SPeter Zijlstra
993b6c1747SPeter Zijlstra#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
1003b6c1747SPeter Zijlstra#include <asm/GEN-for-each-reg.h>
1013b6c1747SPeter Zijlstra#undef GEN
1023b6c1747SPeter Zijlstra
1033b6c1747SPeter Zijlstra.macro JUMP_THUNK reg
1043b6c1747SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
1053b6c1747SPeter Zijlstra
1063b6c1747SPeter ZijlstraSYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
107fb799447SJosh Poimboeuf	UNWIND_HINT_UNDEFINED
1083b6c1747SPeter Zijlstra	ANNOTATE_NOENDBR
1093b6c1747SPeter Zijlstra	POLINE \reg
1103b6c1747SPeter Zijlstra	ANNOTATE_UNRET_SAFE
1113b6c1747SPeter Zijlstra	ret
1123b6c1747SPeter Zijlstra	int3
1133b6c1747SPeter Zijlstra.endm
1143b6c1747SPeter Zijlstra
1153b6c1747SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
1163b6c1747SPeter ZijlstraSYM_CODE_START(__x86_indirect_jump_thunk_array)
1173b6c1747SPeter Zijlstra
1183b6c1747SPeter Zijlstra#define GEN(reg) JUMP_THUNK reg
1193b6c1747SPeter Zijlstra#include <asm/GEN-for-each-reg.h>
1203b6c1747SPeter Zijlstra#undef GEN
1213b6c1747SPeter Zijlstra
1223b6c1747SPeter Zijlstra	.align RETPOLINE_THUNK_SIZE
1233b6c1747SPeter ZijlstraSYM_CODE_END(__x86_indirect_jump_thunk_array)
1243b6c1747SPeter Zijlstra
1253b6c1747SPeter Zijlstra#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
1263b6c1747SPeter Zijlstra#include <asm/GEN-for-each-reg.h>
1273b6c1747SPeter Zijlstra#undef GEN
1283b6c1747SPeter Zijlstra#endif
1290b53c374SPeter Zijlstra/*
1300b53c374SPeter Zijlstra * This function name is magical and is used by -mfunction-return=thunk-extern
1310b53c374SPeter Zijlstra * for the compiler to generate JMPs to it.
1320b53c374SPeter Zijlstra */
133f43b9876SPeter Zijlstra#ifdef CONFIG_RETHUNK
134f43b9876SPeter Zijlstra
135fb3bd914SBorislav Petkov (AMD)/*
136fb3bd914SBorislav Petkov (AMD) * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at
137fb3bd914SBorislav Petkov (AMD) * special addresses:
138fb3bd914SBorislav Petkov (AMD) *
139fb3bd914SBorislav Petkov (AMD) * - srso_untrain_ret_alias() is 2M aligned
140fb3bd914SBorislav Petkov (AMD) * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14
141fb3bd914SBorislav Petkov (AMD) * and 20 in its virtual address are set (while those bits in the
142fb3bd914SBorislav Petkov (AMD) * srso_untrain_ret_alias() function are cleared).
143fb3bd914SBorislav Petkov (AMD) *
144fb3bd914SBorislav Petkov (AMD) * This guarantees that those two addresses will alias in the branch
145fb3bd914SBorislav Petkov (AMD) * target buffer of Zen3/4 generations, leading to any potential
146fb3bd914SBorislav Petkov (AMD) * poisoned entries at that BTB slot to get evicted.
147fb3bd914SBorislav Petkov (AMD) *
148fb3bd914SBorislav Petkov (AMD) * As a result, srso_safe_ret_alias() becomes a safe return.
149fb3bd914SBorislav Petkov (AMD) */
150fb3bd914SBorislav Petkov (AMD)#ifdef CONFIG_CPU_SRSO
15179cd2a11SPetr Pavlu	.section .text..__x86.rethunk_untrain
152fb3bd914SBorislav Petkov (AMD)
153fb3bd914SBorislav Petkov (AMD)SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
1543bbbe97aSBorislav Petkov (AMD)	ANNOTATE_NOENDBR
155fb3bd914SBorislav Petkov (AMD)	ASM_NOP2
156fb3bd914SBorislav Petkov (AMD)	lfence
157fb3bd914SBorislav Petkov (AMD)	jmp __x86_return_thunk
158fb3bd914SBorislav Petkov (AMD)SYM_FUNC_END(srso_untrain_ret_alias)
159fb3bd914SBorislav Petkov (AMD)__EXPORT_THUNK(srso_untrain_ret_alias)
160fb3bd914SBorislav Petkov (AMD)
16179cd2a11SPetr Pavlu	.section .text..__x86.rethunk_safe
162fb3bd914SBorislav Petkov (AMD)#endif
163fb3bd914SBorislav Petkov (AMD)
164fb3bd914SBorislav Petkov (AMD)/* Needs a definition for the __x86_return_thunk alternative below. */
165fb3bd914SBorislav Petkov (AMD)SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
166fb3bd914SBorislav Petkov (AMD)#ifdef CONFIG_CPU_SRSO
167ba5ca5e5SSean Christopherson	lea 8(%_ASM_SP), %_ASM_SP
168fb3bd914SBorislav Petkov (AMD)	UNWIND_HINT_FUNC
169fb3bd914SBorislav Petkov (AMD)#endif
170fb3bd914SBorislav Petkov (AMD)	ANNOTATE_UNRET_SAFE
171fb3bd914SBorislav Petkov (AMD)	ret
172fb3bd914SBorislav Petkov (AMD)	int3
173fb3bd914SBorislav Petkov (AMD)SYM_FUNC_END(srso_safe_ret_alias)
174fb3bd914SBorislav Petkov (AMD)
17579cd2a11SPetr Pavlu	.section .text..__x86.return_thunk
176a149180fSPeter Zijlstra
177a149180fSPeter Zijlstra/*
178a149180fSPeter Zijlstra * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
179a149180fSPeter Zijlstra * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
180a149180fSPeter Zijlstra *    alignment within the BTB.
181a149180fSPeter Zijlstra * 2) The instruction at zen_untrain_ret must contain, and not
182a149180fSPeter Zijlstra *    end with, the 0xc3 byte of the RET.
183a149180fSPeter Zijlstra * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
184a149180fSPeter Zijlstra *    from re-poisioning the BTB prediction.
185a149180fSPeter Zijlstra */
186a149180fSPeter Zijlstra	.align 64
187fb3bd914SBorislav Petkov (AMD)	.skip 64 - (__ret - zen_untrain_ret), 0xcc
1889a48d604SBorislav Petkov (AMD)SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
1899a48d604SBorislav Petkov (AMD)	ANNOTATE_NOENDBR
190a149180fSPeter Zijlstra	/*
191a149180fSPeter Zijlstra	 * As executed from zen_untrain_ret, this is:
192a149180fSPeter Zijlstra	 *
193a149180fSPeter Zijlstra	 *   TEST $0xcc, %bl
194a149180fSPeter Zijlstra	 *   LFENCE
195a149180fSPeter Zijlstra	 *   JMP __x86_return_thunk
196a149180fSPeter Zijlstra	 *
197a149180fSPeter Zijlstra	 * Executing the TEST instruction has a side effect of evicting any BTB
198a149180fSPeter Zijlstra	 * prediction (potentially attacker controlled) attached to the RET, as
199a149180fSPeter Zijlstra	 * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
200a149180fSPeter Zijlstra	 */
201a149180fSPeter Zijlstra	.byte	0xf6
202a149180fSPeter Zijlstra
203a149180fSPeter Zijlstra	/*
204a149180fSPeter Zijlstra	 * As executed from __x86_return_thunk, this is a plain RET.
205a149180fSPeter Zijlstra	 *
206a149180fSPeter Zijlstra	 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
207a149180fSPeter Zijlstra	 *
208a149180fSPeter Zijlstra	 * We subsequently jump backwards and architecturally execute the RET.
209a149180fSPeter Zijlstra	 * This creates a correct BTB prediction (type=ret), but in the
210a149180fSPeter Zijlstra	 * meantime we suffer Straight Line Speculation (because the type was
211a149180fSPeter Zijlstra	 * no branch) which is halted by the INT3.
212a149180fSPeter Zijlstra	 *
213a149180fSPeter Zijlstra	 * With SMT enabled and STIBP active, a sibling thread cannot poison
214a149180fSPeter Zijlstra	 * RET's prediction to a type of its choice, but can evict the
215a149180fSPeter Zijlstra	 * prediction due to competitive sharing. If the prediction is
216a149180fSPeter Zijlstra	 * evicted, __x86_return_thunk will suffer Straight Line Speculation
217a149180fSPeter Zijlstra	 * which will be contained safely by the INT3.
218a149180fSPeter Zijlstra	 */
219fb3bd914SBorislav Petkov (AMD)SYM_INNER_LABEL(__ret, SYM_L_GLOBAL)
2200b53c374SPeter Zijlstra	ret
2210b53c374SPeter Zijlstra	int3
222fb3bd914SBorislav Petkov (AMD)SYM_CODE_END(__ret)
2230b53c374SPeter Zijlstra
224a149180fSPeter Zijlstra	/*
225a149180fSPeter Zijlstra	 * Ensure the TEST decoding / BTB invalidation is complete.
226a149180fSPeter Zijlstra	 */
227a149180fSPeter Zijlstra	lfence
228a149180fSPeter Zijlstra
229a149180fSPeter Zijlstra	/*
230a149180fSPeter Zijlstra	 * Jump back and execute the RET in the middle of the TEST instruction.
231a149180fSPeter Zijlstra	 * INT3 is for SLS protection.
232a149180fSPeter Zijlstra	 */
233fb3bd914SBorislav Petkov (AMD)	jmp __ret
234a149180fSPeter Zijlstra	int3
235a149180fSPeter ZijlstraSYM_FUNC_END(zen_untrain_ret)
236a149180fSPeter Zijlstra__EXPORT_THUNK(zen_untrain_ret)
237a149180fSPeter Zijlstra
238fb3bd914SBorislav Petkov (AMD)/*
239fb3bd914SBorislav Petkov (AMD) * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret()
240fb3bd914SBorislav Petkov (AMD) * above. On kernel entry, srso_untrain_ret() is executed which is a
241fb3bd914SBorislav Petkov (AMD) *
242ba5ca5e5SSean Christopherson * movabs $0xccccc30824648d48,%rax
243fb3bd914SBorislav Petkov (AMD) *
244fb3bd914SBorislav Petkov (AMD) * and when the return thunk executes the inner label srso_safe_ret()
245fb3bd914SBorislav Petkov (AMD) * later, it is a stack manipulation and a RET which is mispredicted and
246fb3bd914SBorislav Petkov (AMD) * thus a "safe" one to use.
247fb3bd914SBorislav Petkov (AMD) */
248fb3bd914SBorislav Petkov (AMD)	.align 64
249fb3bd914SBorislav Petkov (AMD)	.skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
250fb3bd914SBorislav Petkov (AMD)SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
251fb3bd914SBorislav Petkov (AMD)	ANNOTATE_NOENDBR
252fb3bd914SBorislav Petkov (AMD)	.byte 0x48, 0xb8
253fb3bd914SBorislav Petkov (AMD)
254fb3bd914SBorislav Petkov (AMD)SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
255ba5ca5e5SSean Christopherson	lea 8(%_ASM_SP), %_ASM_SP
256fb3bd914SBorislav Petkov (AMD)	ret
257fb3bd914SBorislav Petkov (AMD)	int3
258fb3bd914SBorislav Petkov (AMD)	int3
259fb3bd914SBorislav Petkov (AMD)	lfence
260fb3bd914SBorislav Petkov (AMD)	call srso_safe_ret
261*af023ef3SPeter Zijlstra	ud2
262fb3bd914SBorislav Petkov (AMD)SYM_CODE_END(srso_safe_ret)
263fb3bd914SBorislav Petkov (AMD)SYM_FUNC_END(srso_untrain_ret)
264fb3bd914SBorislav Petkov (AMD)__EXPORT_THUNK(srso_untrain_ret)
265fb3bd914SBorislav Petkov (AMD)
26677f67119SPeter ZijlstraSYM_CODE_START(__x86_return_thunk)
26777f67119SPeter Zijlstra	UNWIND_HINT_FUNC
26877f67119SPeter Zijlstra	ANNOTATE_NOENDBR
269fb3bd914SBorislav Petkov (AMD)	ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
270fb3bd914SBorislav Petkov (AMD)			"call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
271*af023ef3SPeter Zijlstra	ud2
272fb3bd914SBorislav Petkov (AMD)SYM_CODE_END(__x86_return_thunk)
273a149180fSPeter ZijlstraEXPORT_SYMBOL(__x86_return_thunk)
274f43b9876SPeter Zijlstra
275f43b9876SPeter Zijlstra#endif /* CONFIG_RETHUNK */
2765d821386SThomas Gleixner
2775d821386SThomas Gleixner#ifdef CONFIG_CALL_DEPTH_TRACKING
2785d821386SThomas Gleixner
2795d821386SThomas Gleixner	.align 64
2805d821386SThomas GleixnerSYM_FUNC_START(__x86_return_skl)
2815d821386SThomas Gleixner	ANNOTATE_NOENDBR
282f5c1bb2aSThomas Gleixner	/*
283f5c1bb2aSThomas Gleixner	 * Keep the hotpath in a 16byte I-fetch for the non-debug
284f5c1bb2aSThomas Gleixner	 * case.
285f5c1bb2aSThomas Gleixner	 */
286f5c1bb2aSThomas Gleixner	CALL_THUNKS_DEBUG_INC_RETS
2875d821386SThomas Gleixner	shlq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
2885d821386SThomas Gleixner	jz	1f
2895d821386SThomas Gleixner	ANNOTATE_UNRET_SAFE
2905d821386SThomas Gleixner	ret
2915d821386SThomas Gleixner	int3
2925d821386SThomas Gleixner1:
293f5c1bb2aSThomas Gleixner	CALL_THUNKS_DEBUG_INC_STUFFS
2945d821386SThomas Gleixner	.rept	16
2955d821386SThomas Gleixner	ANNOTATE_INTRA_FUNCTION_CALL
2965d821386SThomas Gleixner	call	2f
2975d821386SThomas Gleixner	int3
2985d821386SThomas Gleixner2:
2995d821386SThomas Gleixner	.endr
3005d821386SThomas Gleixner	add	$(8*16), %rsp
3015d821386SThomas Gleixner
3025d821386SThomas Gleixner	CREDIT_CALL_DEPTH
3035d821386SThomas Gleixner
3045d821386SThomas Gleixner	ANNOTATE_UNRET_SAFE
3055d821386SThomas Gleixner	ret
3065d821386SThomas Gleixner	int3
3075d821386SThomas GleixnerSYM_FUNC_END(__x86_return_skl)
3085d821386SThomas Gleixner
3095d821386SThomas Gleixner#endif /* CONFIG_CALL_DEPTH_TRACKING */
310