1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef _ASM_X86_NOSPEC_BRANCH_H_ 4 #define _ASM_X86_NOSPEC_BRANCH_H_ 5 6 #include <linux/static_key.h> 7 #include <linux/objtool.h> 8 #include <linux/linkage.h> 9 10 #include <asm/alternative.h> 11 #include <asm/cpufeatures.h> 12 #include <asm/msr-index.h> 13 #include <asm/unwind_hints.h> 14 #include <asm/percpu.h> 15 16 #define RETPOLINE_THUNK_SIZE 32 17 18 /* 19 * Fill the CPU return stack buffer. 20 * 21 * Each entry in the RSB, if used for a speculative 'ret', contains an 22 * infinite 'pause; lfence; jmp' loop to capture speculative execution. 23 * 24 * This is required in various cases for retpoline and IBRS-based 25 * mitigations for the Spectre variant 2 vulnerability. Sometimes to 26 * eliminate potentially bogus entries from the RSB, and sometimes 27 * purely to ensure that it doesn't get empty, which on some CPUs would 28 * allow predictions from other (unwanted!) sources to be used. 29 * 30 * We define a CPP macro such that it can be used from both .S files and 31 * inline assembly. It's possible to do a .macro and then include that 32 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. 33 */ 34 35 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ 36 37 /* 38 * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. 39 */ 40 #define __FILL_RETURN_SLOT \ 41 ANNOTATE_INTRA_FUNCTION_CALL; \ 42 call 772f; \ 43 int3; \ 44 772: 45 46 /* 47 * Stuff the entire RSB. 48 * 49 * Google experimented with loop-unrolling and this turned out to be 50 * the optimal version - two calls, each with their own speculation 51 * trap should their return address end up getting used, in a loop. 52 */ 53 #ifdef CONFIG_X86_64 54 #define __FILL_RETURN_BUFFER(reg, nr) \ 55 mov $(nr/2), reg; \ 56 771: \ 57 __FILL_RETURN_SLOT \ 58 __FILL_RETURN_SLOT \ 59 add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ 60 dec reg; \ 61 jnz 771b; \ 62 /* barrier for jnz misprediction */ \ 63 lfence; 64 #else 65 /* 66 * i386 doesn't unconditionally have LFENCE, as such it can't 67 * do a loop. 68 */ 69 #define __FILL_RETURN_BUFFER(reg, nr) \ 70 .rept nr; \ 71 __FILL_RETURN_SLOT; \ 72 .endr; \ 73 add $(BITS_PER_LONG/8) * nr, %_ASM_SP; 74 #endif 75 76 /* 77 * Stuff a single RSB slot. 78 * 79 * To mitigate Post-Barrier RSB speculation, one CALL instruction must be 80 * forced to retire before letting a RET instruction execute. 81 * 82 * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed 83 * before this point. 84 */ 85 #define __FILL_ONE_RETURN \ 86 __FILL_RETURN_SLOT \ 87 add $(BITS_PER_LONG/8), %_ASM_SP; \ 88 lfence; 89 90 #ifdef __ASSEMBLY__ 91 92 /* 93 * This should be used immediately before an indirect jump/call. It tells 94 * objtool the subsequent indirect jump/call is vouched safe for retpoline 95 * builds. 96 */ 97 .macro ANNOTATE_RETPOLINE_SAFE 98 .Lannotate_\@: 99 .pushsection .discard.retpoline_safe 100 _ASM_PTR .Lannotate_\@ 101 .popsection 102 .endm 103 104 /* 105 * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions 106 * vs RETBleed validation. 107 */ 108 #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE 109 110 /* 111 * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should 112 * eventually turn into it's own annotation. 113 */ 114 .macro ANNOTATE_UNRET_END 115 #ifdef CONFIG_DEBUG_ENTRY 116 ANNOTATE_RETPOLINE_SAFE 117 nop 118 #endif 119 .endm 120 121 /* 122 * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call 123 * to the retpoline thunk with a CS prefix when the register requires 124 * a RAX prefix byte to encode. Also see apply_retpolines(). 125 */ 126 .macro __CS_PREFIX reg:req 127 .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 128 .ifc \reg,\rs 129 .byte 0x2e 130 .endif 131 .endr 132 .endm 133 134 /* 135 * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple 136 * indirect jmp/call which may be susceptible to the Spectre variant 2 137 * attack. 138 */ 139 .macro JMP_NOSPEC reg:req 140 #ifdef CONFIG_RETPOLINE 141 __CS_PREFIX \reg 142 jmp __x86_indirect_thunk_\reg 143 #else 144 jmp *%\reg 145 int3 146 #endif 147 .endm 148 149 .macro CALL_NOSPEC reg:req 150 #ifdef CONFIG_RETPOLINE 151 __CS_PREFIX \reg 152 call __x86_indirect_thunk_\reg 153 #else 154 call *%\reg 155 #endif 156 .endm 157 158 /* 159 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP 160 * monstrosity above, manually. 161 */ 162 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) 163 ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ 164 __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ 165 __stringify(__FILL_ONE_RETURN), \ftr2 166 167 .Lskip_rsb_\@: 168 .endm 169 170 #ifdef CONFIG_CPU_UNRET_ENTRY 171 #define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" 172 #else 173 #define CALL_ZEN_UNTRAIN_RET "" 174 #endif 175 176 /* 177 * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the 178 * return thunk isn't mapped into the userspace tables (then again, AMD 179 * typically has NO_MELTDOWN). 180 * 181 * While zen_untrain_ret() doesn't clobber anything but requires stack, 182 * entry_ibpb() will clobber AX, CX, DX. 183 * 184 * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point 185 * where we have a stack but before any RET instruction. 186 */ 187 .macro UNTRAIN_RET 188 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) 189 ANNOTATE_UNRET_END 190 ALTERNATIVE_2 "", \ 191 CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ 192 "call entry_ibpb", X86_FEATURE_ENTRY_IBPB 193 #endif 194 .endm 195 196 #else /* __ASSEMBLY__ */ 197 198 #define ANNOTATE_RETPOLINE_SAFE \ 199 "999:\n\t" \ 200 ".pushsection .discard.retpoline_safe\n\t" \ 201 _ASM_PTR " 999b\n\t" \ 202 ".popsection\n\t" 203 204 typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; 205 extern retpoline_thunk_t __x86_indirect_thunk_array[]; 206 207 extern void __x86_return_thunk(void); 208 extern void zen_untrain_ret(void); 209 extern void entry_ibpb(void); 210 211 #ifdef CONFIG_RETPOLINE 212 213 #define GEN(reg) \ 214 extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; 215 #include <asm/GEN-for-each-reg.h> 216 #undef GEN 217 218 #ifdef CONFIG_X86_64 219 220 /* 221 * Inline asm uses the %V modifier which is only in newer GCC 222 * which is ensured when CONFIG_RETPOLINE is defined. 223 */ 224 # define CALL_NOSPEC \ 225 ALTERNATIVE_2( \ 226 ANNOTATE_RETPOLINE_SAFE \ 227 "call *%[thunk_target]\n", \ 228 "call __x86_indirect_thunk_%V[thunk_target]\n", \ 229 X86_FEATURE_RETPOLINE, \ 230 "lfence;\n" \ 231 ANNOTATE_RETPOLINE_SAFE \ 232 "call *%[thunk_target]\n", \ 233 X86_FEATURE_RETPOLINE_LFENCE) 234 235 # define THUNK_TARGET(addr) [thunk_target] "r" (addr) 236 237 #else /* CONFIG_X86_32 */ 238 /* 239 * For i386 we use the original ret-equivalent retpoline, because 240 * otherwise we'll run out of registers. We don't care about CET 241 * here, anyway. 242 */ 243 # define CALL_NOSPEC \ 244 ALTERNATIVE_2( \ 245 ANNOTATE_RETPOLINE_SAFE \ 246 "call *%[thunk_target]\n", \ 247 " jmp 904f;\n" \ 248 " .align 16\n" \ 249 "901: call 903f;\n" \ 250 "902: pause;\n" \ 251 " lfence;\n" \ 252 " jmp 902b;\n" \ 253 " .align 16\n" \ 254 "903: lea 4(%%esp), %%esp;\n" \ 255 " pushl %[thunk_target];\n" \ 256 " ret;\n" \ 257 " .align 16\n" \ 258 "904: call 901b;\n", \ 259 X86_FEATURE_RETPOLINE, \ 260 "lfence;\n" \ 261 ANNOTATE_RETPOLINE_SAFE \ 262 "call *%[thunk_target]\n", \ 263 X86_FEATURE_RETPOLINE_LFENCE) 264 265 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) 266 #endif 267 #else /* No retpoline for C / inline asm */ 268 # define CALL_NOSPEC "call *%[thunk_target]\n" 269 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) 270 #endif 271 272 /* The Spectre V2 mitigation variants */ 273 enum spectre_v2_mitigation { 274 SPECTRE_V2_NONE, 275 SPECTRE_V2_RETPOLINE, 276 SPECTRE_V2_LFENCE, 277 SPECTRE_V2_EIBRS, 278 SPECTRE_V2_EIBRS_RETPOLINE, 279 SPECTRE_V2_EIBRS_LFENCE, 280 SPECTRE_V2_IBRS, 281 }; 282 283 /* The indirect branch speculation control variants */ 284 enum spectre_v2_user_mitigation { 285 SPECTRE_V2_USER_NONE, 286 SPECTRE_V2_USER_STRICT, 287 SPECTRE_V2_USER_STRICT_PREFERRED, 288 SPECTRE_V2_USER_PRCTL, 289 SPECTRE_V2_USER_SECCOMP, 290 }; 291 292 /* The Speculative Store Bypass disable variants */ 293 enum ssb_mitigation { 294 SPEC_STORE_BYPASS_NONE, 295 SPEC_STORE_BYPASS_DISABLE, 296 SPEC_STORE_BYPASS_PRCTL, 297 SPEC_STORE_BYPASS_SECCOMP, 298 }; 299 300 extern char __indirect_thunk_start[]; 301 extern char __indirect_thunk_end[]; 302 303 static __always_inline 304 void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) 305 { 306 asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) 307 : : "c" (msr), 308 "a" ((u32)val), 309 "d" ((u32)(val >> 32)), 310 [feature] "i" (feature) 311 : "memory"); 312 } 313 314 static inline void indirect_branch_prediction_barrier(void) 315 { 316 u64 val = PRED_CMD_IBPB; 317 318 alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); 319 } 320 321 /* The Intel SPEC CTRL MSR base value cache */ 322 extern u64 x86_spec_ctrl_base; 323 DECLARE_PER_CPU(u64, x86_spec_ctrl_current); 324 extern void write_spec_ctrl_current(u64 val, bool force); 325 extern u64 spec_ctrl_current(void); 326 327 /* 328 * With retpoline, we must use IBRS to restrict branch prediction 329 * before calling into firmware. 330 * 331 * (Implemented as CPP macros due to header hell.) 332 */ 333 #define firmware_restrict_branch_speculation_start() \ 334 do { \ 335 preempt_disable(); \ 336 alternative_msr_write(MSR_IA32_SPEC_CTRL, \ 337 spec_ctrl_current() | SPEC_CTRL_IBRS, \ 338 X86_FEATURE_USE_IBRS_FW); \ 339 alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ 340 X86_FEATURE_USE_IBPB_FW); \ 341 } while (0) 342 343 #define firmware_restrict_branch_speculation_end() \ 344 do { \ 345 alternative_msr_write(MSR_IA32_SPEC_CTRL, \ 346 spec_ctrl_current(), \ 347 X86_FEATURE_USE_IBRS_FW); \ 348 preempt_enable(); \ 349 } while (0) 350 351 DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); 352 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); 353 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); 354 355 DECLARE_STATIC_KEY_FALSE(mds_user_clear); 356 DECLARE_STATIC_KEY_FALSE(mds_idle_clear); 357 358 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); 359 360 DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); 361 362 #include <asm/segment.h> 363 364 /** 365 * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability 366 * 367 * This uses the otherwise unused and obsolete VERW instruction in 368 * combination with microcode which triggers a CPU buffer flush when the 369 * instruction is executed. 370 */ 371 static __always_inline void mds_clear_cpu_buffers(void) 372 { 373 static const u16 ds = __KERNEL_DS; 374 375 /* 376 * Has to be the memory-operand variant because only that 377 * guarantees the CPU buffer flush functionality according to 378 * documentation. The register-operand variant does not. 379 * Works with any segment selector, but a valid writable 380 * data segment is the fastest variant. 381 * 382 * "cc" clobber is required because VERW modifies ZF. 383 */ 384 asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); 385 } 386 387 /** 388 * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability 389 * 390 * Clear CPU buffers if the corresponding static key is enabled 391 */ 392 static __always_inline void mds_user_clear_cpu_buffers(void) 393 { 394 if (static_branch_likely(&mds_user_clear)) 395 mds_clear_cpu_buffers(); 396 } 397 398 /** 399 * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability 400 * 401 * Clear CPU buffers if the corresponding static key is enabled 402 */ 403 static inline void mds_idle_clear_cpu_buffers(void) 404 { 405 if (static_branch_likely(&mds_idle_clear)) 406 mds_clear_cpu_buffers(); 407 } 408 409 #endif /* __ASSEMBLY__ */ 410 411 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ 412