xref: /openbmc/linux/arch/x86/kernel/cpu/bugs.c (revision 9e3bd0f6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Copyright (C) 1994  Linus Torvalds
4  *
5  *  Cyrix stuff, June 1998 by:
6  *	- Rafael R. Reilova (moved everything from head.S),
7  *        <rreilova@ececs.uc.edu>
8  *	- Channing Corn (tests & fixes),
9  *	- Andrew D. Balsa (code cleanup).
10  */
11 #include <linux/init.h>
12 #include <linux/utsname.h>
13 #include <linux/cpu.h>
14 #include <linux/module.h>
15 #include <linux/nospec.h>
16 #include <linux/prctl.h>
17 #include <linux/sched/smt.h>
18 
19 #include <asm/spec-ctrl.h>
20 #include <asm/cmdline.h>
21 #include <asm/bugs.h>
22 #include <asm/processor.h>
23 #include <asm/processor-flags.h>
24 #include <asm/fpu/internal.h>
25 #include <asm/msr.h>
26 #include <asm/vmx.h>
27 #include <asm/paravirt.h>
28 #include <asm/alternative.h>
29 #include <asm/pgtable.h>
30 #include <asm/set_memory.h>
31 #include <asm/intel-family.h>
32 #include <asm/e820/api.h>
33 #include <asm/hypervisor.h>
34 
35 #include "cpu.h"
36 
37 static void __init spectre_v1_select_mitigation(void);
38 static void __init spectre_v2_select_mitigation(void);
39 static void __init ssb_select_mitigation(void);
40 static void __init l1tf_select_mitigation(void);
41 static void __init mds_select_mitigation(void);
42 
43 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
44 u64 x86_spec_ctrl_base;
45 EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
46 static DEFINE_MUTEX(spec_ctrl_mutex);
47 
48 /*
49  * The vendor and possibly platform specific bits which can be modified in
50  * x86_spec_ctrl_base.
51  */
52 static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
53 
54 /*
55  * AMD specific MSR info for Speculative Store Bypass control.
56  * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
57  */
58 u64 __ro_after_init x86_amd_ls_cfg_base;
59 u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
60 
61 /* Control conditional STIBP in switch_to() */
62 DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
63 /* Control conditional IBPB in switch_mm() */
64 DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
65 /* Control unconditional IBPB in switch_mm() */
66 DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
67 
68 /* Control MDS CPU buffer clear before returning to user space */
69 DEFINE_STATIC_KEY_FALSE(mds_user_clear);
70 EXPORT_SYMBOL_GPL(mds_user_clear);
71 /* Control MDS CPU buffer clear before idling (halt, mwait) */
72 DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
73 EXPORT_SYMBOL_GPL(mds_idle_clear);
74 
75 void __init check_bugs(void)
76 {
77 	identify_boot_cpu();
78 
79 	/*
80 	 * identify_boot_cpu() initialized SMT support information, let the
81 	 * core code know.
82 	 */
83 	cpu_smt_check_topology();
84 
85 	if (!IS_ENABLED(CONFIG_SMP)) {
86 		pr_info("CPU: ");
87 		print_cpu_info(&boot_cpu_data);
88 	}
89 
90 	/*
91 	 * Read the SPEC_CTRL MSR to account for reserved bits which may
92 	 * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
93 	 * init code as it is not enumerated and depends on the family.
94 	 */
95 	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
96 		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
97 
98 	/* Allow STIBP in MSR_SPEC_CTRL if supported */
99 	if (boot_cpu_has(X86_FEATURE_STIBP))
100 		x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
101 
102 	/* Select the proper CPU mitigations before patching alternatives: */
103 	spectre_v1_select_mitigation();
104 	spectre_v2_select_mitigation();
105 	ssb_select_mitigation();
106 	l1tf_select_mitigation();
107 	mds_select_mitigation();
108 
109 	arch_smt_update();
110 
111 #ifdef CONFIG_X86_32
112 	/*
113 	 * Check whether we are able to run this kernel safely on SMP.
114 	 *
115 	 * - i386 is no longer supported.
116 	 * - In order to run on anything without a TSC, we need to be
117 	 *   compiled for a i486.
118 	 */
119 	if (boot_cpu_data.x86 < 4)
120 		panic("Kernel requires i486+ for 'invlpg' and other features");
121 
122 	init_utsname()->machine[1] =
123 		'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
124 	alternative_instructions();
125 
126 	fpu__init_check_bugs();
127 #else /* CONFIG_X86_64 */
128 	alternative_instructions();
129 
130 	/*
131 	 * Make sure the first 2MB area is not mapped by huge pages
132 	 * There are typically fixed size MTRRs in there and overlapping
133 	 * MTRRs into large pages causes slow downs.
134 	 *
135 	 * Right now we don't do that with gbpages because there seems
136 	 * very little benefit for that case.
137 	 */
138 	if (!direct_gbpages)
139 		set_memory_4k((unsigned long)__va(0), 1);
140 #endif
141 }
142 
143 void
144 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
145 {
146 	u64 msrval, guestval, hostval = x86_spec_ctrl_base;
147 	struct thread_info *ti = current_thread_info();
148 
149 	/* Is MSR_SPEC_CTRL implemented ? */
150 	if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
151 		/*
152 		 * Restrict guest_spec_ctrl to supported values. Clear the
153 		 * modifiable bits in the host base value and or the
154 		 * modifiable bits from the guest value.
155 		 */
156 		guestval = hostval & ~x86_spec_ctrl_mask;
157 		guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
158 
159 		/* SSBD controlled in MSR_SPEC_CTRL */
160 		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
161 		    static_cpu_has(X86_FEATURE_AMD_SSBD))
162 			hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
163 
164 		/* Conditional STIBP enabled? */
165 		if (static_branch_unlikely(&switch_to_cond_stibp))
166 			hostval |= stibp_tif_to_spec_ctrl(ti->flags);
167 
168 		if (hostval != guestval) {
169 			msrval = setguest ? guestval : hostval;
170 			wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
171 		}
172 	}
173 
174 	/*
175 	 * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
176 	 * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
177 	 */
178 	if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
179 	    !static_cpu_has(X86_FEATURE_VIRT_SSBD))
180 		return;
181 
182 	/*
183 	 * If the host has SSBD mitigation enabled, force it in the host's
184 	 * virtual MSR value. If its not permanently enabled, evaluate
185 	 * current's TIF_SSBD thread flag.
186 	 */
187 	if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
188 		hostval = SPEC_CTRL_SSBD;
189 	else
190 		hostval = ssbd_tif_to_spec_ctrl(ti->flags);
191 
192 	/* Sanitize the guest value */
193 	guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
194 
195 	if (hostval != guestval) {
196 		unsigned long tif;
197 
198 		tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
199 				 ssbd_spec_ctrl_to_tif(hostval);
200 
201 		speculation_ctrl_update(tif);
202 	}
203 }
204 EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
205 
206 static void x86_amd_ssb_disable(void)
207 {
208 	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
209 
210 	if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
211 		wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
212 	else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
213 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
214 }
215 
216 #undef pr_fmt
217 #define pr_fmt(fmt)	"MDS: " fmt
218 
219 /* Default mitigation for MDS-affected CPUs */
220 static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL;
221 static bool mds_nosmt __ro_after_init = false;
222 
223 static const char * const mds_strings[] = {
224 	[MDS_MITIGATION_OFF]	= "Vulnerable",
225 	[MDS_MITIGATION_FULL]	= "Mitigation: Clear CPU buffers",
226 	[MDS_MITIGATION_VMWERV]	= "Vulnerable: Clear CPU buffers attempted, no microcode",
227 };
228 
229 static void __init mds_select_mitigation(void)
230 {
231 	if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) {
232 		mds_mitigation = MDS_MITIGATION_OFF;
233 		return;
234 	}
235 
236 	if (mds_mitigation == MDS_MITIGATION_FULL) {
237 		if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
238 			mds_mitigation = MDS_MITIGATION_VMWERV;
239 
240 		static_branch_enable(&mds_user_clear);
241 
242 		if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
243 		    (mds_nosmt || cpu_mitigations_auto_nosmt()))
244 			cpu_smt_disable(false);
245 	}
246 
247 	pr_info("%s\n", mds_strings[mds_mitigation]);
248 }
249 
250 static int __init mds_cmdline(char *str)
251 {
252 	if (!boot_cpu_has_bug(X86_BUG_MDS))
253 		return 0;
254 
255 	if (!str)
256 		return -EINVAL;
257 
258 	if (!strcmp(str, "off"))
259 		mds_mitigation = MDS_MITIGATION_OFF;
260 	else if (!strcmp(str, "full"))
261 		mds_mitigation = MDS_MITIGATION_FULL;
262 	else if (!strcmp(str, "full,nosmt")) {
263 		mds_mitigation = MDS_MITIGATION_FULL;
264 		mds_nosmt = true;
265 	}
266 
267 	return 0;
268 }
269 early_param("mds", mds_cmdline);
270 
271 #undef pr_fmt
272 #define pr_fmt(fmt)     "Spectre V1 : " fmt
273 
274 enum spectre_v1_mitigation {
275 	SPECTRE_V1_MITIGATION_NONE,
276 	SPECTRE_V1_MITIGATION_AUTO,
277 };
278 
279 static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init =
280 	SPECTRE_V1_MITIGATION_AUTO;
281 
282 static const char * const spectre_v1_strings[] = {
283 	[SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
284 	[SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
285 };
286 
287 /*
288  * Does SMAP provide full mitigation against speculative kernel access to
289  * userspace?
290  */
291 static bool smap_works_speculatively(void)
292 {
293 	if (!boot_cpu_has(X86_FEATURE_SMAP))
294 		return false;
295 
296 	/*
297 	 * On CPUs which are vulnerable to Meltdown, SMAP does not
298 	 * prevent speculative access to user data in the L1 cache.
299 	 * Consider SMAP to be non-functional as a mitigation on these
300 	 * CPUs.
301 	 */
302 	if (boot_cpu_has(X86_BUG_CPU_MELTDOWN))
303 		return false;
304 
305 	return true;
306 }
307 
308 static void __init spectre_v1_select_mitigation(void)
309 {
310 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
311 		spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
312 		return;
313 	}
314 
315 	if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
316 		/*
317 		 * With Spectre v1, a user can speculatively control either
318 		 * path of a conditional swapgs with a user-controlled GS
319 		 * value.  The mitigation is to add lfences to both code paths.
320 		 *
321 		 * If FSGSBASE is enabled, the user can put a kernel address in
322 		 * GS, in which case SMAP provides no protection.
323 		 *
324 		 * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
325 		 *	   FSGSBASE enablement patches have been merged. ]
326 		 *
327 		 * If FSGSBASE is disabled, the user can only put a user space
328 		 * address in GS.  That makes an attack harder, but still
329 		 * possible if there's no SMAP protection.
330 		 */
331 		if (!smap_works_speculatively()) {
332 			/*
333 			 * Mitigation can be provided from SWAPGS itself or
334 			 * PTI as the CR3 write in the Meltdown mitigation
335 			 * is serializing.
336 			 *
337 			 * If neither is there, mitigate with an LFENCE to
338 			 * stop speculation through swapgs.
339 			 */
340 			if (boot_cpu_has_bug(X86_BUG_SWAPGS) &&
341 			    !boot_cpu_has(X86_FEATURE_PTI))
342 				setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
343 
344 			/*
345 			 * Enable lfences in the kernel entry (non-swapgs)
346 			 * paths, to prevent user entry from speculatively
347 			 * skipping swapgs.
348 			 */
349 			setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL);
350 		}
351 	}
352 
353 	pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
354 }
355 
356 static int __init nospectre_v1_cmdline(char *str)
357 {
358 	spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
359 	return 0;
360 }
361 early_param("nospectre_v1", nospectre_v1_cmdline);
362 
363 #undef pr_fmt
364 #define pr_fmt(fmt)     "Spectre V2 : " fmt
365 
366 static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
367 	SPECTRE_V2_NONE;
368 
369 static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
370 	SPECTRE_V2_USER_NONE;
371 
372 #ifdef CONFIG_RETPOLINE
373 static bool spectre_v2_bad_module;
374 
375 bool retpoline_module_ok(bool has_retpoline)
376 {
377 	if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
378 		return true;
379 
380 	pr_err("System may be vulnerable to spectre v2\n");
381 	spectre_v2_bad_module = true;
382 	return false;
383 }
384 
385 static inline const char *spectre_v2_module_string(void)
386 {
387 	return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
388 }
389 #else
390 static inline const char *spectre_v2_module_string(void) { return ""; }
391 #endif
392 
393 static inline bool match_option(const char *arg, int arglen, const char *opt)
394 {
395 	int len = strlen(opt);
396 
397 	return len == arglen && !strncmp(arg, opt, len);
398 }
399 
400 /* The kernel command line selection for spectre v2 */
401 enum spectre_v2_mitigation_cmd {
402 	SPECTRE_V2_CMD_NONE,
403 	SPECTRE_V2_CMD_AUTO,
404 	SPECTRE_V2_CMD_FORCE,
405 	SPECTRE_V2_CMD_RETPOLINE,
406 	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
407 	SPECTRE_V2_CMD_RETPOLINE_AMD,
408 };
409 
410 enum spectre_v2_user_cmd {
411 	SPECTRE_V2_USER_CMD_NONE,
412 	SPECTRE_V2_USER_CMD_AUTO,
413 	SPECTRE_V2_USER_CMD_FORCE,
414 	SPECTRE_V2_USER_CMD_PRCTL,
415 	SPECTRE_V2_USER_CMD_PRCTL_IBPB,
416 	SPECTRE_V2_USER_CMD_SECCOMP,
417 	SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
418 };
419 
420 static const char * const spectre_v2_user_strings[] = {
421 	[SPECTRE_V2_USER_NONE]			= "User space: Vulnerable",
422 	[SPECTRE_V2_USER_STRICT]		= "User space: Mitigation: STIBP protection",
423 	[SPECTRE_V2_USER_STRICT_PREFERRED]	= "User space: Mitigation: STIBP always-on protection",
424 	[SPECTRE_V2_USER_PRCTL]			= "User space: Mitigation: STIBP via prctl",
425 	[SPECTRE_V2_USER_SECCOMP]		= "User space: Mitigation: STIBP via seccomp and prctl",
426 };
427 
428 static const struct {
429 	const char			*option;
430 	enum spectre_v2_user_cmd	cmd;
431 	bool				secure;
432 } v2_user_options[] __initconst = {
433 	{ "auto",		SPECTRE_V2_USER_CMD_AUTO,		false },
434 	{ "off",		SPECTRE_V2_USER_CMD_NONE,		false },
435 	{ "on",			SPECTRE_V2_USER_CMD_FORCE,		true  },
436 	{ "prctl",		SPECTRE_V2_USER_CMD_PRCTL,		false },
437 	{ "prctl,ibpb",		SPECTRE_V2_USER_CMD_PRCTL_IBPB,		false },
438 	{ "seccomp",		SPECTRE_V2_USER_CMD_SECCOMP,		false },
439 	{ "seccomp,ibpb",	SPECTRE_V2_USER_CMD_SECCOMP_IBPB,	false },
440 };
441 
442 static void __init spec_v2_user_print_cond(const char *reason, bool secure)
443 {
444 	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
445 		pr_info("spectre_v2_user=%s forced on command line.\n", reason);
446 }
447 
448 static enum spectre_v2_user_cmd __init
449 spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
450 {
451 	char arg[20];
452 	int ret, i;
453 
454 	switch (v2_cmd) {
455 	case SPECTRE_V2_CMD_NONE:
456 		return SPECTRE_V2_USER_CMD_NONE;
457 	case SPECTRE_V2_CMD_FORCE:
458 		return SPECTRE_V2_USER_CMD_FORCE;
459 	default:
460 		break;
461 	}
462 
463 	ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
464 				  arg, sizeof(arg));
465 	if (ret < 0)
466 		return SPECTRE_V2_USER_CMD_AUTO;
467 
468 	for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
469 		if (match_option(arg, ret, v2_user_options[i].option)) {
470 			spec_v2_user_print_cond(v2_user_options[i].option,
471 						v2_user_options[i].secure);
472 			return v2_user_options[i].cmd;
473 		}
474 	}
475 
476 	pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
477 	return SPECTRE_V2_USER_CMD_AUTO;
478 }
479 
480 static void __init
481 spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
482 {
483 	enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
484 	bool smt_possible = IS_ENABLED(CONFIG_SMP);
485 	enum spectre_v2_user_cmd cmd;
486 
487 	if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
488 		return;
489 
490 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
491 	    cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
492 		smt_possible = false;
493 
494 	cmd = spectre_v2_parse_user_cmdline(v2_cmd);
495 	switch (cmd) {
496 	case SPECTRE_V2_USER_CMD_NONE:
497 		goto set_mode;
498 	case SPECTRE_V2_USER_CMD_FORCE:
499 		mode = SPECTRE_V2_USER_STRICT;
500 		break;
501 	case SPECTRE_V2_USER_CMD_PRCTL:
502 	case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
503 		mode = SPECTRE_V2_USER_PRCTL;
504 		break;
505 	case SPECTRE_V2_USER_CMD_AUTO:
506 	case SPECTRE_V2_USER_CMD_SECCOMP:
507 	case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
508 		if (IS_ENABLED(CONFIG_SECCOMP))
509 			mode = SPECTRE_V2_USER_SECCOMP;
510 		else
511 			mode = SPECTRE_V2_USER_PRCTL;
512 		break;
513 	}
514 
515 	/*
516 	 * At this point, an STIBP mode other than "off" has been set.
517 	 * If STIBP support is not being forced, check if STIBP always-on
518 	 * is preferred.
519 	 */
520 	if (mode != SPECTRE_V2_USER_STRICT &&
521 	    boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
522 		mode = SPECTRE_V2_USER_STRICT_PREFERRED;
523 
524 	/* Initialize Indirect Branch Prediction Barrier */
525 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
526 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
527 
528 		switch (cmd) {
529 		case SPECTRE_V2_USER_CMD_FORCE:
530 		case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
531 		case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
532 			static_branch_enable(&switch_mm_always_ibpb);
533 			break;
534 		case SPECTRE_V2_USER_CMD_PRCTL:
535 		case SPECTRE_V2_USER_CMD_AUTO:
536 		case SPECTRE_V2_USER_CMD_SECCOMP:
537 			static_branch_enable(&switch_mm_cond_ibpb);
538 			break;
539 		default:
540 			break;
541 		}
542 
543 		pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
544 			static_key_enabled(&switch_mm_always_ibpb) ?
545 			"always-on" : "conditional");
546 	}
547 
548 	/* If enhanced IBRS is enabled no STIBP required */
549 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
550 		return;
551 
552 	/*
553 	 * If SMT is not possible or STIBP is not available clear the STIBP
554 	 * mode.
555 	 */
556 	if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
557 		mode = SPECTRE_V2_USER_NONE;
558 set_mode:
559 	spectre_v2_user = mode;
560 	/* Only print the STIBP mode when SMT possible */
561 	if (smt_possible)
562 		pr_info("%s\n", spectre_v2_user_strings[mode]);
563 }
564 
565 static const char * const spectre_v2_strings[] = {
566 	[SPECTRE_V2_NONE]			= "Vulnerable",
567 	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
568 	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
569 	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
570 };
571 
572 static const struct {
573 	const char *option;
574 	enum spectre_v2_mitigation_cmd cmd;
575 	bool secure;
576 } mitigation_options[] __initconst = {
577 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
578 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
579 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
580 	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_AMD,	  false },
581 	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
582 	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
583 };
584 
585 static void __init spec_v2_print_cond(const char *reason, bool secure)
586 {
587 	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
588 		pr_info("%s selected on command line.\n", reason);
589 }
590 
591 static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
592 {
593 	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
594 	char arg[20];
595 	int ret, i;
596 
597 	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
598 	    cpu_mitigations_off())
599 		return SPECTRE_V2_CMD_NONE;
600 
601 	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
602 	if (ret < 0)
603 		return SPECTRE_V2_CMD_AUTO;
604 
605 	for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
606 		if (!match_option(arg, ret, mitigation_options[i].option))
607 			continue;
608 		cmd = mitigation_options[i].cmd;
609 		break;
610 	}
611 
612 	if (i >= ARRAY_SIZE(mitigation_options)) {
613 		pr_err("unknown option (%s). Switching to AUTO select\n", arg);
614 		return SPECTRE_V2_CMD_AUTO;
615 	}
616 
617 	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
618 	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
619 	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
620 	    !IS_ENABLED(CONFIG_RETPOLINE)) {
621 		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
622 		return SPECTRE_V2_CMD_AUTO;
623 	}
624 
625 	if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
626 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
627 	    boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
628 		pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
629 		return SPECTRE_V2_CMD_AUTO;
630 	}
631 
632 	spec_v2_print_cond(mitigation_options[i].option,
633 			   mitigation_options[i].secure);
634 	return cmd;
635 }
636 
637 static void __init spectre_v2_select_mitigation(void)
638 {
639 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
640 	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
641 
642 	/*
643 	 * If the CPU is not affected and the command line mode is NONE or AUTO
644 	 * then nothing to do.
645 	 */
646 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
647 	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
648 		return;
649 
650 	switch (cmd) {
651 	case SPECTRE_V2_CMD_NONE:
652 		return;
653 
654 	case SPECTRE_V2_CMD_FORCE:
655 	case SPECTRE_V2_CMD_AUTO:
656 		if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
657 			mode = SPECTRE_V2_IBRS_ENHANCED;
658 			/* Force it so VMEXIT will restore correctly */
659 			x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
660 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
661 			goto specv2_set_mode;
662 		}
663 		if (IS_ENABLED(CONFIG_RETPOLINE))
664 			goto retpoline_auto;
665 		break;
666 	case SPECTRE_V2_CMD_RETPOLINE_AMD:
667 		if (IS_ENABLED(CONFIG_RETPOLINE))
668 			goto retpoline_amd;
669 		break;
670 	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
671 		if (IS_ENABLED(CONFIG_RETPOLINE))
672 			goto retpoline_generic;
673 		break;
674 	case SPECTRE_V2_CMD_RETPOLINE:
675 		if (IS_ENABLED(CONFIG_RETPOLINE))
676 			goto retpoline_auto;
677 		break;
678 	}
679 	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
680 	return;
681 
682 retpoline_auto:
683 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
684 	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
685 	retpoline_amd:
686 		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
687 			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
688 			goto retpoline_generic;
689 		}
690 		mode = SPECTRE_V2_RETPOLINE_AMD;
691 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
692 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
693 	} else {
694 	retpoline_generic:
695 		mode = SPECTRE_V2_RETPOLINE_GENERIC;
696 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
697 	}
698 
699 specv2_set_mode:
700 	spectre_v2_enabled = mode;
701 	pr_info("%s\n", spectre_v2_strings[mode]);
702 
703 	/*
704 	 * If spectre v2 protection has been enabled, unconditionally fill
705 	 * RSB during a context switch; this protects against two independent
706 	 * issues:
707 	 *
708 	 *	- RSB underflow (and switch to BTB) on Skylake+
709 	 *	- SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
710 	 */
711 	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
712 	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
713 
714 	/*
715 	 * Retpoline means the kernel is safe because it has no indirect
716 	 * branches. Enhanced IBRS protects firmware too, so, enable restricted
717 	 * speculation around firmware calls only when Enhanced IBRS isn't
718 	 * supported.
719 	 *
720 	 * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
721 	 * the user might select retpoline on the kernel command line and if
722 	 * the CPU supports Enhanced IBRS, kernel might un-intentionally not
723 	 * enable IBRS around firmware calls.
724 	 */
725 	if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
726 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
727 		pr_info("Enabling Restricted Speculation for firmware calls\n");
728 	}
729 
730 	/* Set up IBPB and STIBP depending on the general spectre V2 command */
731 	spectre_v2_user_select_mitigation(cmd);
732 }
733 
734 static void update_stibp_msr(void * __unused)
735 {
736 	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
737 }
738 
739 /* Update x86_spec_ctrl_base in case SMT state changed. */
740 static void update_stibp_strict(void)
741 {
742 	u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
743 
744 	if (sched_smt_active())
745 		mask |= SPEC_CTRL_STIBP;
746 
747 	if (mask == x86_spec_ctrl_base)
748 		return;
749 
750 	pr_info("Update user space SMT mitigation: STIBP %s\n",
751 		mask & SPEC_CTRL_STIBP ? "always-on" : "off");
752 	x86_spec_ctrl_base = mask;
753 	on_each_cpu(update_stibp_msr, NULL, 1);
754 }
755 
756 /* Update the static key controlling the evaluation of TIF_SPEC_IB */
757 static void update_indir_branch_cond(void)
758 {
759 	if (sched_smt_active())
760 		static_branch_enable(&switch_to_cond_stibp);
761 	else
762 		static_branch_disable(&switch_to_cond_stibp);
763 }
764 
765 #undef pr_fmt
766 #define pr_fmt(fmt) fmt
767 
768 /* Update the static key controlling the MDS CPU buffer clear in idle */
769 static void update_mds_branch_idle(void)
770 {
771 	/*
772 	 * Enable the idle clearing if SMT is active on CPUs which are
773 	 * affected only by MSBDS and not any other MDS variant.
774 	 *
775 	 * The other variants cannot be mitigated when SMT is enabled, so
776 	 * clearing the buffers on idle just to prevent the Store Buffer
777 	 * repartitioning leak would be a window dressing exercise.
778 	 */
779 	if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
780 		return;
781 
782 	if (sched_smt_active())
783 		static_branch_enable(&mds_idle_clear);
784 	else
785 		static_branch_disable(&mds_idle_clear);
786 }
787 
788 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
789 
790 void cpu_bugs_smt_update(void)
791 {
792 	/* Enhanced IBRS implies STIBP. No update required. */
793 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
794 		return;
795 
796 	mutex_lock(&spec_ctrl_mutex);
797 
798 	switch (spectre_v2_user) {
799 	case SPECTRE_V2_USER_NONE:
800 		break;
801 	case SPECTRE_V2_USER_STRICT:
802 	case SPECTRE_V2_USER_STRICT_PREFERRED:
803 		update_stibp_strict();
804 		break;
805 	case SPECTRE_V2_USER_PRCTL:
806 	case SPECTRE_V2_USER_SECCOMP:
807 		update_indir_branch_cond();
808 		break;
809 	}
810 
811 	switch (mds_mitigation) {
812 	case MDS_MITIGATION_FULL:
813 	case MDS_MITIGATION_VMWERV:
814 		if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
815 			pr_warn_once(MDS_MSG_SMT);
816 		update_mds_branch_idle();
817 		break;
818 	case MDS_MITIGATION_OFF:
819 		break;
820 	}
821 
822 	mutex_unlock(&spec_ctrl_mutex);
823 }
824 
825 #undef pr_fmt
826 #define pr_fmt(fmt)	"Speculative Store Bypass: " fmt
827 
828 static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
829 
830 /* The kernel command line selection */
831 enum ssb_mitigation_cmd {
832 	SPEC_STORE_BYPASS_CMD_NONE,
833 	SPEC_STORE_BYPASS_CMD_AUTO,
834 	SPEC_STORE_BYPASS_CMD_ON,
835 	SPEC_STORE_BYPASS_CMD_PRCTL,
836 	SPEC_STORE_BYPASS_CMD_SECCOMP,
837 };
838 
839 static const char * const ssb_strings[] = {
840 	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
841 	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
842 	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl",
843 	[SPEC_STORE_BYPASS_SECCOMP]	= "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
844 };
845 
846 static const struct {
847 	const char *option;
848 	enum ssb_mitigation_cmd cmd;
849 } ssb_mitigation_options[]  __initconst = {
850 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
851 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
852 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
853 	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL },   /* Disable Speculative Store Bypass via prctl */
854 	{ "seccomp",	SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
855 };
856 
857 static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
858 {
859 	enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
860 	char arg[20];
861 	int ret, i;
862 
863 	if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
864 	    cpu_mitigations_off()) {
865 		return SPEC_STORE_BYPASS_CMD_NONE;
866 	} else {
867 		ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
868 					  arg, sizeof(arg));
869 		if (ret < 0)
870 			return SPEC_STORE_BYPASS_CMD_AUTO;
871 
872 		for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
873 			if (!match_option(arg, ret, ssb_mitigation_options[i].option))
874 				continue;
875 
876 			cmd = ssb_mitigation_options[i].cmd;
877 			break;
878 		}
879 
880 		if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
881 			pr_err("unknown option (%s). Switching to AUTO select\n", arg);
882 			return SPEC_STORE_BYPASS_CMD_AUTO;
883 		}
884 	}
885 
886 	return cmd;
887 }
888 
889 static enum ssb_mitigation __init __ssb_select_mitigation(void)
890 {
891 	enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
892 	enum ssb_mitigation_cmd cmd;
893 
894 	if (!boot_cpu_has(X86_FEATURE_SSBD))
895 		return mode;
896 
897 	cmd = ssb_parse_cmdline();
898 	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
899 	    (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
900 	     cmd == SPEC_STORE_BYPASS_CMD_AUTO))
901 		return mode;
902 
903 	switch (cmd) {
904 	case SPEC_STORE_BYPASS_CMD_AUTO:
905 	case SPEC_STORE_BYPASS_CMD_SECCOMP:
906 		/*
907 		 * Choose prctl+seccomp as the default mode if seccomp is
908 		 * enabled.
909 		 */
910 		if (IS_ENABLED(CONFIG_SECCOMP))
911 			mode = SPEC_STORE_BYPASS_SECCOMP;
912 		else
913 			mode = SPEC_STORE_BYPASS_PRCTL;
914 		break;
915 	case SPEC_STORE_BYPASS_CMD_ON:
916 		mode = SPEC_STORE_BYPASS_DISABLE;
917 		break;
918 	case SPEC_STORE_BYPASS_CMD_PRCTL:
919 		mode = SPEC_STORE_BYPASS_PRCTL;
920 		break;
921 	case SPEC_STORE_BYPASS_CMD_NONE:
922 		break;
923 	}
924 
925 	/*
926 	 * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
927 	 * bit in the mask to allow guests to use the mitigation even in the
928 	 * case where the host does not enable it.
929 	 */
930 	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
931 	    static_cpu_has(X86_FEATURE_AMD_SSBD)) {
932 		x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
933 	}
934 
935 	/*
936 	 * We have three CPU feature flags that are in play here:
937 	 *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
938 	 *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
939 	 *  - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
940 	 */
941 	if (mode == SPEC_STORE_BYPASS_DISABLE) {
942 		setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
943 		/*
944 		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
945 		 * use a completely different MSR and bit dependent on family.
946 		 */
947 		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
948 		    !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
949 			x86_amd_ssb_disable();
950 		} else {
951 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
952 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
953 		}
954 	}
955 
956 	return mode;
957 }
958 
959 static void ssb_select_mitigation(void)
960 {
961 	ssb_mode = __ssb_select_mitigation();
962 
963 	if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
964 		pr_info("%s\n", ssb_strings[ssb_mode]);
965 }
966 
967 #undef pr_fmt
968 #define pr_fmt(fmt)     "Speculation prctl: " fmt
969 
970 static void task_update_spec_tif(struct task_struct *tsk)
971 {
972 	/* Force the update of the real TIF bits */
973 	set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
974 
975 	/*
976 	 * Immediately update the speculation control MSRs for the current
977 	 * task, but for a non-current task delay setting the CPU
978 	 * mitigation until it is scheduled next.
979 	 *
980 	 * This can only happen for SECCOMP mitigation. For PRCTL it's
981 	 * always the current task.
982 	 */
983 	if (tsk == current)
984 		speculation_ctrl_update_current();
985 }
986 
987 static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
988 {
989 	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
990 	    ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
991 		return -ENXIO;
992 
993 	switch (ctrl) {
994 	case PR_SPEC_ENABLE:
995 		/* If speculation is force disabled, enable is not allowed */
996 		if (task_spec_ssb_force_disable(task))
997 			return -EPERM;
998 		task_clear_spec_ssb_disable(task);
999 		task_clear_spec_ssb_noexec(task);
1000 		task_update_spec_tif(task);
1001 		break;
1002 	case PR_SPEC_DISABLE:
1003 		task_set_spec_ssb_disable(task);
1004 		task_clear_spec_ssb_noexec(task);
1005 		task_update_spec_tif(task);
1006 		break;
1007 	case PR_SPEC_FORCE_DISABLE:
1008 		task_set_spec_ssb_disable(task);
1009 		task_set_spec_ssb_force_disable(task);
1010 		task_clear_spec_ssb_noexec(task);
1011 		task_update_spec_tif(task);
1012 		break;
1013 	case PR_SPEC_DISABLE_NOEXEC:
1014 		if (task_spec_ssb_force_disable(task))
1015 			return -EPERM;
1016 		task_set_spec_ssb_disable(task);
1017 		task_set_spec_ssb_noexec(task);
1018 		task_update_spec_tif(task);
1019 		break;
1020 	default:
1021 		return -ERANGE;
1022 	}
1023 	return 0;
1024 }
1025 
1026 static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
1027 {
1028 	switch (ctrl) {
1029 	case PR_SPEC_ENABLE:
1030 		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
1031 			return 0;
1032 		/*
1033 		 * Indirect branch speculation is always disabled in strict
1034 		 * mode.
1035 		 */
1036 		if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
1037 		    spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
1038 			return -EPERM;
1039 		task_clear_spec_ib_disable(task);
1040 		task_update_spec_tif(task);
1041 		break;
1042 	case PR_SPEC_DISABLE:
1043 	case PR_SPEC_FORCE_DISABLE:
1044 		/*
1045 		 * Indirect branch speculation is always allowed when
1046 		 * mitigation is force disabled.
1047 		 */
1048 		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
1049 			return -EPERM;
1050 		if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
1051 		    spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
1052 			return 0;
1053 		task_set_spec_ib_disable(task);
1054 		if (ctrl == PR_SPEC_FORCE_DISABLE)
1055 			task_set_spec_ib_force_disable(task);
1056 		task_update_spec_tif(task);
1057 		break;
1058 	default:
1059 		return -ERANGE;
1060 	}
1061 	return 0;
1062 }
1063 
1064 int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
1065 			     unsigned long ctrl)
1066 {
1067 	switch (which) {
1068 	case PR_SPEC_STORE_BYPASS:
1069 		return ssb_prctl_set(task, ctrl);
1070 	case PR_SPEC_INDIRECT_BRANCH:
1071 		return ib_prctl_set(task, ctrl);
1072 	default:
1073 		return -ENODEV;
1074 	}
1075 }
1076 
1077 #ifdef CONFIG_SECCOMP
1078 void arch_seccomp_spec_mitigate(struct task_struct *task)
1079 {
1080 	if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
1081 		ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
1082 	if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
1083 		ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
1084 }
1085 #endif
1086 
1087 static int ssb_prctl_get(struct task_struct *task)
1088 {
1089 	switch (ssb_mode) {
1090 	case SPEC_STORE_BYPASS_DISABLE:
1091 		return PR_SPEC_DISABLE;
1092 	case SPEC_STORE_BYPASS_SECCOMP:
1093 	case SPEC_STORE_BYPASS_PRCTL:
1094 		if (task_spec_ssb_force_disable(task))
1095 			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
1096 		if (task_spec_ssb_noexec(task))
1097 			return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
1098 		if (task_spec_ssb_disable(task))
1099 			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
1100 		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
1101 	default:
1102 		if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1103 			return PR_SPEC_ENABLE;
1104 		return PR_SPEC_NOT_AFFECTED;
1105 	}
1106 }
1107 
1108 static int ib_prctl_get(struct task_struct *task)
1109 {
1110 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1111 		return PR_SPEC_NOT_AFFECTED;
1112 
1113 	switch (spectre_v2_user) {
1114 	case SPECTRE_V2_USER_NONE:
1115 		return PR_SPEC_ENABLE;
1116 	case SPECTRE_V2_USER_PRCTL:
1117 	case SPECTRE_V2_USER_SECCOMP:
1118 		if (task_spec_ib_force_disable(task))
1119 			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
1120 		if (task_spec_ib_disable(task))
1121 			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
1122 		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
1123 	case SPECTRE_V2_USER_STRICT:
1124 	case SPECTRE_V2_USER_STRICT_PREFERRED:
1125 		return PR_SPEC_DISABLE;
1126 	default:
1127 		return PR_SPEC_NOT_AFFECTED;
1128 	}
1129 }
1130 
1131 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
1132 {
1133 	switch (which) {
1134 	case PR_SPEC_STORE_BYPASS:
1135 		return ssb_prctl_get(task);
1136 	case PR_SPEC_INDIRECT_BRANCH:
1137 		return ib_prctl_get(task);
1138 	default:
1139 		return -ENODEV;
1140 	}
1141 }
1142 
1143 void x86_spec_ctrl_setup_ap(void)
1144 {
1145 	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
1146 		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
1147 
1148 	if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
1149 		x86_amd_ssb_disable();
1150 }
1151 
1152 #undef pr_fmt
1153 #define pr_fmt(fmt)	"L1TF: " fmt
1154 
1155 /* Default mitigation for L1TF-affected CPUs */
1156 enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH;
1157 #if IS_ENABLED(CONFIG_KVM_INTEL)
1158 EXPORT_SYMBOL_GPL(l1tf_mitigation);
1159 #endif
1160 enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
1161 EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
1162 
1163 /*
1164  * These CPUs all support 44bits physical address space internally in the
1165  * cache but CPUID can report a smaller number of physical address bits.
1166  *
1167  * The L1TF mitigation uses the top most address bit for the inversion of
1168  * non present PTEs. When the installed memory reaches into the top most
1169  * address bit due to memory holes, which has been observed on machines
1170  * which report 36bits physical address bits and have 32G RAM installed,
1171  * then the mitigation range check in l1tf_select_mitigation() triggers.
1172  * This is a false positive because the mitigation is still possible due to
1173  * the fact that the cache uses 44bit internally. Use the cache bits
1174  * instead of the reported physical bits and adjust them on the affected
1175  * machines to 44bit if the reported bits are less than 44.
1176  */
1177 static void override_cache_bits(struct cpuinfo_x86 *c)
1178 {
1179 	if (c->x86 != 6)
1180 		return;
1181 
1182 	switch (c->x86_model) {
1183 	case INTEL_FAM6_NEHALEM:
1184 	case INTEL_FAM6_WESTMERE:
1185 	case INTEL_FAM6_SANDYBRIDGE:
1186 	case INTEL_FAM6_IVYBRIDGE:
1187 	case INTEL_FAM6_HASWELL:
1188 	case INTEL_FAM6_HASWELL_L:
1189 	case INTEL_FAM6_HASWELL_G:
1190 	case INTEL_FAM6_BROADWELL:
1191 	case INTEL_FAM6_BROADWELL_G:
1192 	case INTEL_FAM6_SKYLAKE_L:
1193 	case INTEL_FAM6_SKYLAKE:
1194 	case INTEL_FAM6_KABYLAKE_L:
1195 	case INTEL_FAM6_KABYLAKE:
1196 		if (c->x86_cache_bits < 44)
1197 			c->x86_cache_bits = 44;
1198 		break;
1199 	}
1200 }
1201 
1202 static void __init l1tf_select_mitigation(void)
1203 {
1204 	u64 half_pa;
1205 
1206 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
1207 		return;
1208 
1209 	if (cpu_mitigations_off())
1210 		l1tf_mitigation = L1TF_MITIGATION_OFF;
1211 	else if (cpu_mitigations_auto_nosmt())
1212 		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
1213 
1214 	override_cache_bits(&boot_cpu_data);
1215 
1216 	switch (l1tf_mitigation) {
1217 	case L1TF_MITIGATION_OFF:
1218 	case L1TF_MITIGATION_FLUSH_NOWARN:
1219 	case L1TF_MITIGATION_FLUSH:
1220 		break;
1221 	case L1TF_MITIGATION_FLUSH_NOSMT:
1222 	case L1TF_MITIGATION_FULL:
1223 		cpu_smt_disable(false);
1224 		break;
1225 	case L1TF_MITIGATION_FULL_FORCE:
1226 		cpu_smt_disable(true);
1227 		break;
1228 	}
1229 
1230 #if CONFIG_PGTABLE_LEVELS == 2
1231 	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
1232 	return;
1233 #endif
1234 
1235 	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
1236 	if (l1tf_mitigation != L1TF_MITIGATION_OFF &&
1237 			e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
1238 		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
1239 		pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
1240 				half_pa);
1241 		pr_info("However, doing so will make a part of your RAM unusable.\n");
1242 		pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n");
1243 		return;
1244 	}
1245 
1246 	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
1247 }
1248 
1249 static int __init l1tf_cmdline(char *str)
1250 {
1251 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
1252 		return 0;
1253 
1254 	if (!str)
1255 		return -EINVAL;
1256 
1257 	if (!strcmp(str, "off"))
1258 		l1tf_mitigation = L1TF_MITIGATION_OFF;
1259 	else if (!strcmp(str, "flush,nowarn"))
1260 		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOWARN;
1261 	else if (!strcmp(str, "flush"))
1262 		l1tf_mitigation = L1TF_MITIGATION_FLUSH;
1263 	else if (!strcmp(str, "flush,nosmt"))
1264 		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
1265 	else if (!strcmp(str, "full"))
1266 		l1tf_mitigation = L1TF_MITIGATION_FULL;
1267 	else if (!strcmp(str, "full,force"))
1268 		l1tf_mitigation = L1TF_MITIGATION_FULL_FORCE;
1269 
1270 	return 0;
1271 }
1272 early_param("l1tf", l1tf_cmdline);
1273 
1274 #undef pr_fmt
1275 #define pr_fmt(fmt) fmt
1276 
1277 #ifdef CONFIG_SYSFS
1278 
1279 #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
1280 
1281 #if IS_ENABLED(CONFIG_KVM_INTEL)
1282 static const char * const l1tf_vmx_states[] = {
1283 	[VMENTER_L1D_FLUSH_AUTO]		= "auto",
1284 	[VMENTER_L1D_FLUSH_NEVER]		= "vulnerable",
1285 	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
1286 	[VMENTER_L1D_FLUSH_ALWAYS]		= "cache flushes",
1287 	[VMENTER_L1D_FLUSH_EPT_DISABLED]	= "EPT disabled",
1288 	[VMENTER_L1D_FLUSH_NOT_REQUIRED]	= "flush not necessary"
1289 };
1290 
1291 static ssize_t l1tf_show_state(char *buf)
1292 {
1293 	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
1294 		return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
1295 
1296 	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
1297 	    (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
1298 	     sched_smt_active())) {
1299 		return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
1300 			       l1tf_vmx_states[l1tf_vmx_mitigation]);
1301 	}
1302 
1303 	return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
1304 		       l1tf_vmx_states[l1tf_vmx_mitigation],
1305 		       sched_smt_active() ? "vulnerable" : "disabled");
1306 }
1307 #else
1308 static ssize_t l1tf_show_state(char *buf)
1309 {
1310 	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
1311 }
1312 #endif
1313 
1314 static ssize_t mds_show_state(char *buf)
1315 {
1316 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
1317 		return sprintf(buf, "%s; SMT Host state unknown\n",
1318 			       mds_strings[mds_mitigation]);
1319 	}
1320 
1321 	if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
1322 		return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
1323 			       (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
1324 			        sched_smt_active() ? "mitigated" : "disabled"));
1325 	}
1326 
1327 	return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
1328 		       sched_smt_active() ? "vulnerable" : "disabled");
1329 }
1330 
1331 static char *stibp_state(void)
1332 {
1333 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
1334 		return "";
1335 
1336 	switch (spectre_v2_user) {
1337 	case SPECTRE_V2_USER_NONE:
1338 		return ", STIBP: disabled";
1339 	case SPECTRE_V2_USER_STRICT:
1340 		return ", STIBP: forced";
1341 	case SPECTRE_V2_USER_STRICT_PREFERRED:
1342 		return ", STIBP: always-on";
1343 	case SPECTRE_V2_USER_PRCTL:
1344 	case SPECTRE_V2_USER_SECCOMP:
1345 		if (static_key_enabled(&switch_to_cond_stibp))
1346 			return ", STIBP: conditional";
1347 	}
1348 	return "";
1349 }
1350 
1351 static char *ibpb_state(void)
1352 {
1353 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
1354 		if (static_key_enabled(&switch_mm_always_ibpb))
1355 			return ", IBPB: always-on";
1356 		if (static_key_enabled(&switch_mm_cond_ibpb))
1357 			return ", IBPB: conditional";
1358 		return ", IBPB: disabled";
1359 	}
1360 	return "";
1361 }
1362 
1363 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
1364 			       char *buf, unsigned int bug)
1365 {
1366 	if (!boot_cpu_has_bug(bug))
1367 		return sprintf(buf, "Not affected\n");
1368 
1369 	switch (bug) {
1370 	case X86_BUG_CPU_MELTDOWN:
1371 		if (boot_cpu_has(X86_FEATURE_PTI))
1372 			return sprintf(buf, "Mitigation: PTI\n");
1373 
1374 		if (hypervisor_is_type(X86_HYPER_XEN_PV))
1375 			return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
1376 
1377 		break;
1378 
1379 	case X86_BUG_SPECTRE_V1:
1380 		return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
1381 
1382 	case X86_BUG_SPECTRE_V2:
1383 		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
1384 			       ibpb_state(),
1385 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
1386 			       stibp_state(),
1387 			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
1388 			       spectre_v2_module_string());
1389 
1390 	case X86_BUG_SPEC_STORE_BYPASS:
1391 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
1392 
1393 	case X86_BUG_L1TF:
1394 		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
1395 			return l1tf_show_state(buf);
1396 		break;
1397 
1398 	case X86_BUG_MDS:
1399 		return mds_show_state(buf);
1400 
1401 	default:
1402 		break;
1403 	}
1404 
1405 	return sprintf(buf, "Vulnerable\n");
1406 }
1407 
1408 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
1409 {
1410 	return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
1411 }
1412 
1413 ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
1414 {
1415 	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
1416 }
1417 
1418 ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
1419 {
1420 	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
1421 }
1422 
1423 ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
1424 {
1425 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
1426 }
1427 
1428 ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
1429 {
1430 	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
1431 }
1432 
1433 ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf)
1434 {
1435 	return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
1436 }
1437 #endif
1438