xref: /openbmc/linux/arch/x86/kernel/fpu/xstate.c (revision 1acbca93)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
262784854SIngo Molnar /*
362784854SIngo Molnar  * xsave/xrstor support.
462784854SIngo Molnar  *
562784854SIngo Molnar  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
662784854SIngo Molnar  */
7ffd3e504SThomas Gleixner #include <linux/bitops.h>
862784854SIngo Molnar #include <linux/compat.h>
962784854SIngo Molnar #include <linux/cpu.h>
10e8c24d3aSDave Hansen #include <linux/mman.h>
11db8268dfSChang S. Bae #include <linux/nospec.h>
1284594296SDave Hansen #include <linux/pkeys.h>
130c608dadSAubrey Li #include <linux/seq_file.h>
140c608dadSAubrey Li #include <linux/proc_fs.h>
15500afbf6SChang S. Bae #include <linux/vmalloc.h>
1659a36d16SIngo Molnar 
1762784854SIngo Molnar #include <asm/fpu/api.h>
1859a36d16SIngo Molnar #include <asm/fpu/regset.h>
19d9d005f3SThomas Gleixner #include <asm/fpu/signal.h>
20d9d005f3SThomas Gleixner #include <asm/fpu/xcr.h>
21b992c660SIngo Molnar 
2262784854SIngo Molnar #include <asm/tlbflush.h>
23db8268dfSChang S. Bae #include <asm/prctl.h>
24db8268dfSChang S. Bae #include <asm/elf.h>
25126fe040SThomas Gleixner 
26500afbf6SChang S. Bae #include "context.h"
2796034455SThomas Gleixner #include "internal.h"
2834002571SThomas Gleixner #include "legacy.h"
29126fe040SThomas Gleixner #include "xstate.h"
3062784854SIngo Molnar 
31ffd3e504SThomas Gleixner #define for_each_extended_xfeature(bit, mask)				\
32ffd3e504SThomas Gleixner 	(bit) = FIRST_EXTENDED_XFEATURE;				\
33ffd3e504SThomas Gleixner 	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
34ffd3e504SThomas Gleixner 
351f96b1efSDave Hansen /*
361f96b1efSDave Hansen  * Although we spell it out in here, the Processor Trace
371f96b1efSDave Hansen  * xfeature is completely unused.  We use other mechanisms
381f96b1efSDave Hansen  * to save/restore PT state in Linux.
391f96b1efSDave Hansen  */
405b073430SIngo Molnar static const char *xfeature_names[] =
415b073430SIngo Molnar {
425b073430SIngo Molnar 	"x87 floating point registers",
435b073430SIngo Molnar 	"SSE registers",
445b073430SIngo Molnar 	"AVX registers",
455b073430SIngo Molnar 	"MPX bounds registers",
465b073430SIngo Molnar 	"MPX CSR",
475b073430SIngo Molnar 	"AVX-512 opmask",
485b073430SIngo Molnar 	"AVX-512 Hi256",
495b073430SIngo Molnar 	"AVX-512 ZMM_Hi256",
501f96b1efSDave Hansen 	"Processor Trace (unused)",
51c8df4009SDave Hansen 	"Protection Keys User registers",
52b454feb9SYu-cheng Yu 	"PASID state",
538970ef02SRick Edgecombe 	"Control-flow User registers",
548970ef02SRick Edgecombe 	"Control-flow Kernel registers (unused)",
55eec2113eSChang S. Bae 	"unknown xstate feature",
56eec2113eSChang S. Bae 	"unknown xstate feature",
57eec2113eSChang S. Bae 	"unknown xstate feature",
58eec2113eSChang S. Bae 	"unknown xstate feature",
59eec2113eSChang S. Bae 	"AMX Tile config",
60eec2113eSChang S. Bae 	"AMX Tile data",
61eec2113eSChang S. Bae 	"unknown xstate feature",
625b073430SIngo Molnar };
635b073430SIngo Molnar 
6470c3f167SChang S. Bae static unsigned short xsave_cpuid_features[] __initdata = {
6570c3f167SChang S. Bae 	[XFEATURE_FP]				= X86_FEATURE_FPU,
6670c3f167SChang S. Bae 	[XFEATURE_SSE]				= X86_FEATURE_XMM,
6770c3f167SChang S. Bae 	[XFEATURE_YMM]				= X86_FEATURE_AVX,
6870c3f167SChang S. Bae 	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
6970c3f167SChang S. Bae 	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
7070c3f167SChang S. Bae 	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
7170c3f167SChang S. Bae 	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
7270c3f167SChang S. Bae 	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
7370c3f167SChang S. Bae 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
7418032b47SJim Mattson 	[XFEATURE_PKRU]				= X86_FEATURE_OSPKE,
7570c3f167SChang S. Bae 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
768970ef02SRick Edgecombe 	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
77eec2113eSChang S. Bae 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
78eec2113eSChang S. Bae 	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
79ccb18db2SAndi Kleen };
80ccb18db2SAndi Kleen 
81ce578f16SThomas Gleixner static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
82ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
83ce578f16SThomas Gleixner static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
84ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
856afbb58cSThomas Gleixner static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
866afbb58cSThomas Gleixner 
876afbb58cSThomas Gleixner #define XSTATE_FLAG_SUPERVISOR	BIT(0)
886afbb58cSThomas Gleixner #define XSTATE_FLAG_ALIGNED64	BIT(1)
8962784854SIngo Molnar 
9062784854SIngo Molnar /*
915b073430SIngo Molnar  * Return whether the system supports a given xfeature.
925b073430SIngo Molnar  *
935b073430SIngo Molnar  * Also return the name of the (most advanced) feature that the caller requested:
945b073430SIngo Molnar  */
cpu_has_xfeatures(u64 xfeatures_needed,const char ** feature_name)955b073430SIngo Molnar int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
965b073430SIngo Molnar {
971c253ff2SThomas Gleixner 	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
985b073430SIngo Molnar 
995b073430SIngo Molnar 	if (unlikely(feature_name)) {
1005b073430SIngo Molnar 		long xfeature_idx, max_idx;
1015b073430SIngo Molnar 		u64 xfeatures_print;
1025b073430SIngo Molnar 		/*
1035b073430SIngo Molnar 		 * So we use FLS here to be able to print the most advanced
1045b073430SIngo Molnar 		 * feature that was requested but is missing. So if a driver
105d91cab78SDave Hansen 		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
1065b073430SIngo Molnar 		 * missing AVX feature - this is the most informative message
1075b073430SIngo Molnar 		 * to users:
1085b073430SIngo Molnar 		 */
1095b073430SIngo Molnar 		if (xfeatures_missing)
1105b073430SIngo Molnar 			xfeatures_print = xfeatures_missing;
1115b073430SIngo Molnar 		else
1125b073430SIngo Molnar 			xfeatures_print = xfeatures_needed;
1135b073430SIngo Molnar 
1145b073430SIngo Molnar 		xfeature_idx = fls64(xfeatures_print)-1;
1155b073430SIngo Molnar 		max_idx = ARRAY_SIZE(xfeature_names)-1;
1165b073430SIngo Molnar 		xfeature_idx = min(xfeature_idx, max_idx);
1175b073430SIngo Molnar 
1185b073430SIngo Molnar 		*feature_name = xfeature_names[xfeature_idx];
1195b073430SIngo Molnar 	}
1205b073430SIngo Molnar 
1215b073430SIngo Molnar 	if (xfeatures_missing)
1225b073430SIngo Molnar 		return 0;
1235b073430SIngo Molnar 
1245b073430SIngo Molnar 	return 1;
1255b073430SIngo Molnar }
1265b073430SIngo Molnar EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
1275b073430SIngo Molnar 
xfeature_is_aligned64(int xfeature_nr)1286afbb58cSThomas Gleixner static bool xfeature_is_aligned64(int xfeature_nr)
1296afbb58cSThomas Gleixner {
1306afbb58cSThomas Gleixner 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
1316afbb58cSThomas Gleixner }
1326afbb58cSThomas Gleixner 
xfeature_is_supervisor(int xfeature_nr)133158e2ee6SYu-cheng Yu static bool xfeature_is_supervisor(int xfeature_nr)
1341499ce2dSYu-cheng Yu {
1356afbb58cSThomas Gleixner 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
1361499ce2dSYu-cheng Yu }
1371499ce2dSYu-cheng Yu 
xfeature_get_offset(u64 xcomp_bv,int xfeature)1387aa5128bSThomas Gleixner static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
1397aa5128bSThomas Gleixner {
1407aa5128bSThomas Gleixner 	unsigned int offs, i;
1417aa5128bSThomas Gleixner 
1427aa5128bSThomas Gleixner 	/*
1437aa5128bSThomas Gleixner 	 * Non-compacted format and legacy features use the cached fixed
1447aa5128bSThomas Gleixner 	 * offsets.
1457aa5128bSThomas Gleixner 	 */
1468ad7e8f6SThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
1478ad7e8f6SThomas Gleixner 	    xfeature <= XFEATURE_SSE)
1487aa5128bSThomas Gleixner 		return xstate_offsets[xfeature];
1497aa5128bSThomas Gleixner 
1507aa5128bSThomas Gleixner 	/*
1517aa5128bSThomas Gleixner 	 * Compacted format offsets depend on the actual content of the
1527aa5128bSThomas Gleixner 	 * compacted xsave area which is determined by the xcomp_bv header
1537aa5128bSThomas Gleixner 	 * field.
1547aa5128bSThomas Gleixner 	 */
1557aa5128bSThomas Gleixner 	offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
1567aa5128bSThomas Gleixner 	for_each_extended_xfeature(i, xcomp_bv) {
1577aa5128bSThomas Gleixner 		if (xfeature_is_aligned64(i))
1587aa5128bSThomas Gleixner 			offs = ALIGN(offs, 64);
1597aa5128bSThomas Gleixner 		if (i == xfeature)
1607aa5128bSThomas Gleixner 			break;
1617aa5128bSThomas Gleixner 		offs += xstate_sizes[i];
1627aa5128bSThomas Gleixner 	}
1637aa5128bSThomas Gleixner 	return offs;
1647aa5128bSThomas Gleixner }
1657aa5128bSThomas Gleixner 
1665b073430SIngo Molnar /*
16762784854SIngo Molnar  * Enable the extended processor state save/restore feature.
16862784854SIngo Molnar  * Called once per CPU onlining.
16962784854SIngo Molnar  */
fpu__init_cpu_xstate(void)17062784854SIngo Molnar void fpu__init_cpu_xstate(void)
17162784854SIngo Molnar {
1721c253ff2SThomas Gleixner 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
17362784854SIngo Molnar 		return;
17462784854SIngo Molnar 
17562784854SIngo Molnar 	cr4_set_bits(X86_CR4_OSXSAVE);
17662784854SIngo Molnar 
17762784854SIngo Molnar 	/*
17867236547SChang S. Bae 	 * Must happen after CR4 setup and before xsetbv() to allow KVM
17967236547SChang S. Bae 	 * lazy passthrough.  Write independent of the dynamic state static
18067236547SChang S. Bae 	 * key as that does not work on the boot CPU. This also ensures
1811acbca93SAdamos Ttofari 	 * that any stale state is wiped out from XFD. Reset the per CPU
1821acbca93SAdamos Ttofari 	 * xfd cache too.
18367236547SChang S. Bae 	 */
18467236547SChang S. Bae 	if (cpu_feature_enabled(X86_FEATURE_XFD))
1851acbca93SAdamos Ttofari 		xfd_set_state(init_fpstate.xfd);
18667236547SChang S. Bae 
18767236547SChang S. Bae 	/*
188524bb73bSYu-cheng Yu 	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
189524bb73bSYu-cheng Yu 	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
190524bb73bSYu-cheng Yu 	 * states can be set here.
191e6e888f9SDave Hansen 	 */
1921c253ff2SThomas Gleixner 	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
19362784854SIngo Molnar 
19462784854SIngo Molnar 	/*
19571581eefSYu-cheng Yu 	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
196e6e888f9SDave Hansen 	 */
197f0dccc9dSKan Liang 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
198f0dccc9dSKan Liang 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
19901707b66SAndy Lutomirski 				     xfeatures_mask_independent());
200f0dccc9dSKan Liang 	}
20171581eefSYu-cheng Yu }
20271581eefSYu-cheng Yu 
xfeature_enabled(enum xfeature xfeature)203524bb73bSYu-cheng Yu static bool xfeature_enabled(enum xfeature xfeature)
204e6e888f9SDave Hansen {
2051c253ff2SThomas Gleixner 	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
206e6e888f9SDave Hansen }
207e6e888f9SDave Hansen 
208e6e888f9SDave Hansen /*
20939f1acd2SIngo Molnar  * Record the offsets and sizes of various xstates contained
21039f1acd2SIngo Molnar  * in the XSAVE state memory layout.
21162784854SIngo Molnar  */
setup_xstate_cache(void)21235a77d45SThomas Gleixner static void __init setup_xstate_cache(void)
21362784854SIngo Molnar {
214ee9ae257SDave Hansen 	u32 eax, ebx, ecx, edx, i;
215d9f6e12fSIngo Molnar 	/* start at the beginning of the "extended state" */
216e6e888f9SDave Hansen 	unsigned int last_good_offset = offsetof(struct xregs_state,
217e6e888f9SDave Hansen 						 extended_state_area);
218ac73b27aSYu-cheng Yu 	/*
219ac73b27aSYu-cheng Yu 	 * The FP xstates and SSE xstates are legacy states. They are always
220ac73b27aSYu-cheng Yu 	 * in the fixed offsets in the xsave area in either compacted form
221ac73b27aSYu-cheng Yu 	 * or standard form.
222ac73b27aSYu-cheng Yu 	 */
223446e693cSCyrill Gorcunov 	xstate_offsets[XFEATURE_FP]	= 0;
224446e693cSCyrill Gorcunov 	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
225446e693cSCyrill Gorcunov 						   xmm_space);
226446e693cSCyrill Gorcunov 
227446e693cSCyrill Gorcunov 	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
228c593642cSPankaj Bharadiya 	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
229446e693cSCyrill Gorcunov 						       xmm_space);
23062784854SIngo Molnar 
2311c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
232e6e888f9SDave Hansen 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
2331499ce2dSYu-cheng Yu 
234c12e13dcSYu-cheng Yu 		xstate_sizes[i] = eax;
2356afbb58cSThomas Gleixner 		xstate_flags[i] = ecx;
236c12e13dcSYu-cheng Yu 
2371499ce2dSYu-cheng Yu 		/*
238c12e13dcSYu-cheng Yu 		 * If an xfeature is supervisor state, the offset in EBX is
239c12e13dcSYu-cheng Yu 		 * invalid, leave it to -1.
2401499ce2dSYu-cheng Yu 		 */
241c12e13dcSYu-cheng Yu 		if (xfeature_is_supervisor(i))
242c12e13dcSYu-cheng Yu 			continue;
243c12e13dcSYu-cheng Yu 
244ee9ae257SDave Hansen 		xstate_offsets[i] = ebx;
2451499ce2dSYu-cheng Yu 
246e6e888f9SDave Hansen 		/*
247c12e13dcSYu-cheng Yu 		 * In our xstate size checks, we assume that the highest-numbered
248c12e13dcSYu-cheng Yu 		 * xstate feature has the highest offset in the buffer.  Ensure
249c12e13dcSYu-cheng Yu 		 * it does.
250e6e888f9SDave Hansen 		 */
251e6e888f9SDave Hansen 		WARN_ONCE(last_good_offset > xstate_offsets[i],
252e6e888f9SDave Hansen 			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
253c12e13dcSYu-cheng Yu 
254e6e888f9SDave Hansen 		last_good_offset = xstate_offsets[i];
25539f1acd2SIngo Molnar 	}
25662784854SIngo Molnar }
25762784854SIngo Molnar 
print_xstate_feature(u64 xstate_mask)25832231879SIngo Molnar static void __init print_xstate_feature(u64 xstate_mask)
25962784854SIngo Molnar {
26033588b52SIngo Molnar 	const char *feature_name;
26162784854SIngo Molnar 
26233588b52SIngo Molnar 	if (cpu_has_xfeatures(xstate_mask, &feature_name))
263c8df4009SDave Hansen 		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
26462784854SIngo Molnar }
26562784854SIngo Molnar 
26662784854SIngo Molnar /*
26762784854SIngo Molnar  * Print out all the supported xstate features:
26862784854SIngo Molnar  */
print_xstate_features(void)26932231879SIngo Molnar static void __init print_xstate_features(void)
27062784854SIngo Molnar {
271d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_FP);
272d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_SSE);
273d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_YMM);
274d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_BNDREGS);
275d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_BNDCSR);
276d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_OPMASK);
277d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
278d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
279c8df4009SDave Hansen 	print_xstate_feature(XFEATURE_MASK_PKRU);
280b454feb9SYu-cheng Yu 	print_xstate_feature(XFEATURE_MASK_PASID);
2818970ef02SRick Edgecombe 	print_xstate_feature(XFEATURE_MASK_CET_USER);
282eec2113eSChang S. Bae 	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
283eec2113eSChang S. Bae 	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
28462784854SIngo Molnar }
28562784854SIngo Molnar 
28662784854SIngo Molnar /*
28703482e08SYu-cheng Yu  * This check is important because it is easy to get XSTATE_*
28803482e08SYu-cheng Yu  * confused with XSTATE_BIT_*.
28903482e08SYu-cheng Yu  */
29003482e08SYu-cheng Yu #define CHECK_XFEATURE(nr) do {		\
29103482e08SYu-cheng Yu 	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
29203482e08SYu-cheng Yu 	WARN_ON(nr >= XFEATURE_MAX);	\
29303482e08SYu-cheng Yu } while (0)
29403482e08SYu-cheng Yu 
29503482e08SYu-cheng Yu /*
296996952e0SYu-cheng Yu  * Print out xstate component offsets and sizes
297996952e0SYu-cheng Yu  */
print_xstate_offset_size(void)298996952e0SYu-cheng Yu static void __init print_xstate_offset_size(void)
299996952e0SYu-cheng Yu {
300996952e0SYu-cheng Yu 	int i;
301996952e0SYu-cheng Yu 
3021c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
303996952e0SYu-cheng Yu 		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
3047aa5128bSThomas Gleixner 			i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
3057aa5128bSThomas Gleixner 			i, xstate_sizes[i]);
306996952e0SYu-cheng Yu 	}
307996952e0SYu-cheng Yu }
308996952e0SYu-cheng Yu 
309996952e0SYu-cheng Yu /*
310b579d0c3SThomas Gleixner  * This function is called only during boot time when x86 caps are not set
311b579d0c3SThomas Gleixner  * up and alternative can not be used yet.
312b579d0c3SThomas Gleixner  */
os_xrstor_booting(struct xregs_state * xstate)313b579d0c3SThomas Gleixner static __init void os_xrstor_booting(struct xregs_state *xstate)
314b579d0c3SThomas Gleixner {
315eda32f4fSThomas Gleixner 	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
316b579d0c3SThomas Gleixner 	u32 lmask = mask;
317b579d0c3SThomas Gleixner 	u32 hmask = mask >> 32;
318b579d0c3SThomas Gleixner 	int err;
319b579d0c3SThomas Gleixner 
320b579d0c3SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
321b579d0c3SThomas Gleixner 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
322b579d0c3SThomas Gleixner 	else
323b579d0c3SThomas Gleixner 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
324b579d0c3SThomas Gleixner 
325b579d0c3SThomas Gleixner 	/*
326b579d0c3SThomas Gleixner 	 * We should never fault when copying from a kernel buffer, and the FPU
327b579d0c3SThomas Gleixner 	 * state we set at boot time should be valid.
328b579d0c3SThomas Gleixner 	 */
329b579d0c3SThomas Gleixner 	WARN_ON_FPU(err);
330b579d0c3SThomas Gleixner }
331b579d0c3SThomas Gleixner 
332b579d0c3SThomas Gleixner /*
333f9dfb5e3SThomas Gleixner  * All supported features have either init state all zeros or are
334f9dfb5e3SThomas Gleixner  * handled in setup_init_fpu() individually. This is an explicit
335f9dfb5e3SThomas Gleixner  * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
336f9dfb5e3SThomas Gleixner  * newly added supported features at build time and make people
337f9dfb5e3SThomas Gleixner  * actually look at the init state for the new feature.
338f9dfb5e3SThomas Gleixner  */
339f9dfb5e3SThomas Gleixner #define XFEATURES_INIT_FPSTATE_HANDLED		\
340f9dfb5e3SThomas Gleixner 	(XFEATURE_MASK_FP |			\
341f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_SSE |			\
342f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_YMM |			\
343f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_OPMASK |			\
344f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_ZMM_Hi256 |		\
345f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_Hi16_ZMM	 |		\
346f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_PKRU |			\
347f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_BNDREGS |		\
348f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_BNDCSR |			\
3492308ee57SChang S. Bae 	 XFEATURE_MASK_PASID |			\
3508970ef02SRick Edgecombe 	 XFEATURE_MASK_CET_USER |		\
3512308ee57SChang S. Bae 	 XFEATURE_MASK_XTILE)
352f9dfb5e3SThomas Gleixner 
353f9dfb5e3SThomas Gleixner /*
35462784854SIngo Molnar  * setup the xstate image representing the init state
35562784854SIngo Molnar  */
setup_init_fpu_buf(void)35632231879SIngo Molnar static void __init setup_init_fpu_buf(void)
35762784854SIngo Molnar {
358f9dfb5e3SThomas Gleixner 	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
359f9dfb5e3SThomas Gleixner 		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
360f9dfb5e3SThomas Gleixner 		     XFEATURES_INIT_FPSTATE_HANDLED);
361f9dfb5e3SThomas Gleixner 
362d366bf7eSBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
36362784854SIngo Molnar 		return;
36462784854SIngo Molnar 
36562784854SIngo Molnar 	print_xstate_features();
36662784854SIngo Molnar 
367c32d7cabSChang S. Bae 	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
36862784854SIngo Molnar 
36962784854SIngo Molnar 	/*
3707d937060SFenghua Yu 	 * Init all the features state with header.xfeatures being 0x0
37162784854SIngo Molnar 	 */
372f83ac56aSThomas Gleixner 	os_xrstor_booting(&init_fpstate.regs.xsave);
37362784854SIngo Molnar 
37462784854SIngo Molnar 	/*
375f9dfb5e3SThomas Gleixner 	 * All components are now in init state. Read the state back so
376f9dfb5e3SThomas Gleixner 	 * that init_fpstate contains all non-zero init state. This only
3778ad7e8f6SThomas Gleixner 	 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
378f9dfb5e3SThomas Gleixner 	 * those use the init optimization which skips writing data for
379f9dfb5e3SThomas Gleixner 	 * components in init state.
380f9dfb5e3SThomas Gleixner 	 *
381f9dfb5e3SThomas Gleixner 	 * XSAVE could be used, but that would require to reshuffle the
3828ad7e8f6SThomas Gleixner 	 * data when XSAVEC/S is available because XSAVEC/S uses xstate
383f9dfb5e3SThomas Gleixner 	 * compaction. But doing so is a pointless exercise because most
384f9dfb5e3SThomas Gleixner 	 * components have an all zeros init state except for the legacy
385f9dfb5e3SThomas Gleixner 	 * ones (FP and SSE). Those can be saved with FXSAVE into the
386f9dfb5e3SThomas Gleixner 	 * legacy area. Adding new features requires to ensure that init
387f9dfb5e3SThomas Gleixner 	 * state is all zeroes or if not to add the necessary handling
388f9dfb5e3SThomas Gleixner 	 * here.
38962784854SIngo Molnar 	 */
390f83ac56aSThomas Gleixner 	fxsave(&init_fpstate.regs.fxsave);
39162784854SIngo Molnar }
39262784854SIngo Molnar 
xfeature_size(int xfeature_nr)393ce711ea3SKan Liang int xfeature_size(int xfeature_nr)
39465ac2e9bSDave Hansen {
39565ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
39665ac2e9bSDave Hansen 
39765ac2e9bSDave Hansen 	CHECK_XFEATURE(xfeature_nr);
39865ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
39965ac2e9bSDave Hansen 	return eax;
40065ac2e9bSDave Hansen }
40165ac2e9bSDave Hansen 
402e63e5d5cSEric Biggers /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
validate_user_xstate_header(const struct xstate_header * hdr,struct fpstate * fpstate)40349e4eb41SThomas Gleixner static int validate_user_xstate_header(const struct xstate_header *hdr,
40449e4eb41SThomas Gleixner 				       struct fpstate *fpstate)
405e63e5d5cSEric Biggers {
406e63e5d5cSEric Biggers 	/* No unknown or supervisor features may be set */
40749e4eb41SThomas Gleixner 	if (hdr->xfeatures & ~fpstate->user_xfeatures)
408e63e5d5cSEric Biggers 		return -EINVAL;
409e63e5d5cSEric Biggers 
410e63e5d5cSEric Biggers 	/* Userspace must use the uncompacted format */
411e63e5d5cSEric Biggers 	if (hdr->xcomp_bv)
412e63e5d5cSEric Biggers 		return -EINVAL;
413e63e5d5cSEric Biggers 
414e63e5d5cSEric Biggers 	/*
415e63e5d5cSEric Biggers 	 * If 'reserved' is shrunken to add a new field, make sure to validate
416e63e5d5cSEric Biggers 	 * that new field here!
417e63e5d5cSEric Biggers 	 */
418e63e5d5cSEric Biggers 	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
419e63e5d5cSEric Biggers 
420e63e5d5cSEric Biggers 	/* No reserved bits may be set */
421e63e5d5cSEric Biggers 	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
422e63e5d5cSEric Biggers 		return -EINVAL;
423e63e5d5cSEric Biggers 
424e63e5d5cSEric Biggers 	return 0;
425e63e5d5cSEric Biggers }
426e63e5d5cSEric Biggers 
__xstate_dump_leaves(void)42763cf05a1SThomas Gleixner static void __init __xstate_dump_leaves(void)
42865ac2e9bSDave Hansen {
42965ac2e9bSDave Hansen 	int i;
43065ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
43165ac2e9bSDave Hansen 	static int should_dump = 1;
43265ac2e9bSDave Hansen 
43365ac2e9bSDave Hansen 	if (!should_dump)
43465ac2e9bSDave Hansen 		return;
43565ac2e9bSDave Hansen 	should_dump = 0;
43665ac2e9bSDave Hansen 	/*
43765ac2e9bSDave Hansen 	 * Dump out a few leaves past the ones that we support
43865ac2e9bSDave Hansen 	 * just in case there are some goodies up there
43965ac2e9bSDave Hansen 	 */
44065ac2e9bSDave Hansen 	for (i = 0; i < XFEATURE_MAX + 10; i++) {
44165ac2e9bSDave Hansen 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
44265ac2e9bSDave Hansen 		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
44365ac2e9bSDave Hansen 			XSTATE_CPUID, i, eax, ebx, ecx, edx);
44465ac2e9bSDave Hansen 	}
44565ac2e9bSDave Hansen }
44665ac2e9bSDave Hansen 
44748280042SAndrew Cooper #define XSTATE_WARN_ON(x, fmt, ...) do {					\
44848280042SAndrew Cooper 	if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {	\
44965ac2e9bSDave Hansen 		__xstate_dump_leaves();						\
45065ac2e9bSDave Hansen 	}									\
45165ac2e9bSDave Hansen } while (0)
45265ac2e9bSDave Hansen 
4538970ef02SRick Edgecombe #define XCHECK_SZ(sz, nr, __struct) ({					\
4548970ef02SRick Edgecombe 	if (WARN_ONCE(sz != sizeof(__struct),				\
4558970ef02SRick Edgecombe 	    "[%s]: struct is %zu bytes, cpu state %d bytes\n",		\
4568970ef02SRick Edgecombe 	    xfeature_names[nr], sizeof(__struct), sz)) {		\
457ef78f2a4SDave Hansen 		__xstate_dump_leaves();					\
458ef78f2a4SDave Hansen 	}								\
4598970ef02SRick Edgecombe 	true;								\
4608970ef02SRick Edgecombe })
4618970ef02SRick Edgecombe 
462ef78f2a4SDave Hansen 
463eec2113eSChang S. Bae /**
464eec2113eSChang S. Bae  * check_xtile_data_against_struct - Check tile data state size.
465eec2113eSChang S. Bae  *
466eec2113eSChang S. Bae  * Calculate the state size by multiplying the single tile size which is
467eec2113eSChang S. Bae  * recorded in a C struct, and the number of tiles that the CPU informs.
468eec2113eSChang S. Bae  * Compare the provided size with the calculation.
469eec2113eSChang S. Bae  *
470eec2113eSChang S. Bae  * @size:	The tile data state size
471eec2113eSChang S. Bae  *
472eec2113eSChang S. Bae  * Returns:	0 on success, -EINVAL on mismatch.
473eec2113eSChang S. Bae  */
check_xtile_data_against_struct(int size)474eec2113eSChang S. Bae static int __init check_xtile_data_against_struct(int size)
475eec2113eSChang S. Bae {
476eec2113eSChang S. Bae 	u32 max_palid, palid, state_size;
477eec2113eSChang S. Bae 	u32 eax, ebx, ecx, edx;
478eec2113eSChang S. Bae 	u16 max_tile;
479eec2113eSChang S. Bae 
480eec2113eSChang S. Bae 	/*
481eec2113eSChang S. Bae 	 * Check the maximum palette id:
482eec2113eSChang S. Bae 	 *   eax: the highest numbered palette subleaf.
483eec2113eSChang S. Bae 	 */
484eec2113eSChang S. Bae 	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
485eec2113eSChang S. Bae 
486eec2113eSChang S. Bae 	/*
487eec2113eSChang S. Bae 	 * Cross-check each tile size and find the maximum number of
488eec2113eSChang S. Bae 	 * supported tiles.
489eec2113eSChang S. Bae 	 */
490eec2113eSChang S. Bae 	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
491eec2113eSChang S. Bae 		u16 tile_size, max;
492eec2113eSChang S. Bae 
493eec2113eSChang S. Bae 		/*
494eec2113eSChang S. Bae 		 * Check the tile size info:
495eec2113eSChang S. Bae 		 *   eax[31:16]:  bytes per title
496eec2113eSChang S. Bae 		 *   ebx[31:16]:  the max names (or max number of tiles)
497eec2113eSChang S. Bae 		 */
498eec2113eSChang S. Bae 		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
499eec2113eSChang S. Bae 		tile_size = eax >> 16;
500eec2113eSChang S. Bae 		max = ebx >> 16;
501eec2113eSChang S. Bae 
502eec2113eSChang S. Bae 		if (tile_size != sizeof(struct xtile_data)) {
503eec2113eSChang S. Bae 			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
504eec2113eSChang S. Bae 			       __stringify(XFEATURE_XTILE_DATA),
505eec2113eSChang S. Bae 			       sizeof(struct xtile_data), tile_size);
506eec2113eSChang S. Bae 			__xstate_dump_leaves();
507eec2113eSChang S. Bae 			return -EINVAL;
508eec2113eSChang S. Bae 		}
509eec2113eSChang S. Bae 
510eec2113eSChang S. Bae 		if (max > max_tile)
511eec2113eSChang S. Bae 			max_tile = max;
512eec2113eSChang S. Bae 	}
513eec2113eSChang S. Bae 
514eec2113eSChang S. Bae 	state_size = sizeof(struct xtile_data) * max_tile;
515eec2113eSChang S. Bae 	if (size != state_size) {
516eec2113eSChang S. Bae 		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
517eec2113eSChang S. Bae 		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
518eec2113eSChang S. Bae 		__xstate_dump_leaves();
519eec2113eSChang S. Bae 		return -EINVAL;
520eec2113eSChang S. Bae 	}
521eec2113eSChang S. Bae 	return 0;
522eec2113eSChang S. Bae }
523eec2113eSChang S. Bae 
524ef78f2a4SDave Hansen /*
525ef78f2a4SDave Hansen  * We have a C struct for each 'xstate'.  We need to ensure
526ef78f2a4SDave Hansen  * that our software representation matches what the CPU
527ef78f2a4SDave Hansen  * tells us about the state's size.
528ef78f2a4SDave Hansen  */
check_xstate_against_struct(int nr)529cd9ae761SThomas Gleixner static bool __init check_xstate_against_struct(int nr)
530ef78f2a4SDave Hansen {
531ef78f2a4SDave Hansen 	/*
532ef78f2a4SDave Hansen 	 * Ask the CPU for the size of the state.
533ef78f2a4SDave Hansen 	 */
534ef78f2a4SDave Hansen 	int sz = xfeature_size(nr);
5358970ef02SRick Edgecombe 
536ef78f2a4SDave Hansen 	/*
537ef78f2a4SDave Hansen 	 * Match each CPU state with the corresponding software
538ef78f2a4SDave Hansen 	 * structure.
539ef78f2a4SDave Hansen 	 */
5408970ef02SRick Edgecombe 	switch (nr) {
5418970ef02SRick Edgecombe 	case XFEATURE_YMM:	  return XCHECK_SZ(sz, nr, struct ymmh_struct);
5428970ef02SRick Edgecombe 	case XFEATURE_BNDREGS:	  return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
5438970ef02SRick Edgecombe 	case XFEATURE_BNDCSR:	  return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
5448970ef02SRick Edgecombe 	case XFEATURE_OPMASK:	  return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
5458970ef02SRick Edgecombe 	case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
5468970ef02SRick Edgecombe 	case XFEATURE_Hi16_ZMM:	  return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
5478970ef02SRick Edgecombe 	case XFEATURE_PKRU:	  return XCHECK_SZ(sz, nr, struct pkru_state);
5488970ef02SRick Edgecombe 	case XFEATURE_PASID:	  return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
5498970ef02SRick Edgecombe 	case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
5508970ef02SRick Edgecombe 	case XFEATURE_CET_USER:	  return XCHECK_SZ(sz, nr, struct cet_user_state);
5518970ef02SRick Edgecombe 	case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
5528970ef02SRick Edgecombe 	default:
55348280042SAndrew Cooper 		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
554cd9ae761SThomas Gleixner 		return false;
555ef78f2a4SDave Hansen 	}
5568970ef02SRick Edgecombe 
557cd9ae761SThomas Gleixner 	return true;
558ef78f2a4SDave Hansen }
559ef78f2a4SDave Hansen 
xstate_calculate_size(u64 xfeatures,bool compacted)56084e4dcccSChang S. Bae static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
56184e4dcccSChang S. Bae {
562d6d6d50fSThomas Gleixner 	unsigned int topmost = fls64(xfeatures) -  1;
563d6d6d50fSThomas Gleixner 	unsigned int offset = xstate_offsets[topmost];
56484e4dcccSChang S. Bae 
565d6d6d50fSThomas Gleixner 	if (topmost <= XFEATURE_SSE)
566d6d6d50fSThomas Gleixner 		return sizeof(struct xregs_state);
567d6d6d50fSThomas Gleixner 
568d6d6d50fSThomas Gleixner 	if (compacted)
569d6d6d50fSThomas Gleixner 		offset = xfeature_get_offset(xfeatures, topmost);
570d6d6d50fSThomas Gleixner 	return offset + xstate_sizes[topmost];
57184e4dcccSChang S. Bae }
57284e4dcccSChang S. Bae 
57365ac2e9bSDave Hansen /*
57465ac2e9bSDave Hansen  * This essentially double-checks what the cpu told us about
57565ac2e9bSDave Hansen  * how large the XSAVE buffer needs to be.  We are recalculating
57665ac2e9bSDave Hansen  * it to be safe.
57776d10256SKan Liang  *
57801707b66SAndy Lutomirski  * Independent XSAVE features allocate their own buffers and are not
57976d10256SKan Liang  * covered by these checks. Only the size of the buffer for task->fpu
58076d10256SKan Liang  * is checked here.
58165ac2e9bSDave Hansen  */
paranoid_xstate_size_valid(unsigned int kernel_size)582cd9ae761SThomas Gleixner static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
58365ac2e9bSDave Hansen {
5848ad7e8f6SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
5858ad7e8f6SThomas Gleixner 	bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
586cd9ae761SThomas Gleixner 	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
58765ac2e9bSDave Hansen 	int i;
58865ac2e9bSDave Hansen 
5891c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
590cd9ae761SThomas Gleixner 		if (!check_xstate_against_struct(i))
591cd9ae761SThomas Gleixner 			return false;
59265ac2e9bSDave Hansen 		/*
59365ac2e9bSDave Hansen 		 * Supervisor state components can be managed only by
59402b93c0bSThomas Gleixner 		 * XSAVES.
59565ac2e9bSDave Hansen 		 */
5968ad7e8f6SThomas Gleixner 		if (!xsaves && xfeature_is_supervisor(i)) {
59748280042SAndrew Cooper 			XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
598cd9ae761SThomas Gleixner 			return false;
599cd9ae761SThomas Gleixner 		}
60065ac2e9bSDave Hansen 	}
60184e4dcccSChang S. Bae 	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
60248280042SAndrew Cooper 	XSTATE_WARN_ON(size != kernel_size,
60348280042SAndrew Cooper 		       "size %u != kernel_size %u\n", size, kernel_size);
604cd9ae761SThomas Gleixner 	return size == kernel_size;
60565ac2e9bSDave Hansen }
60665ac2e9bSDave Hansen 
60762784854SIngo Molnar /*
608524bb73bSYu-cheng Yu  * Get total size of enabled xstates in XCR0 | IA32_XSS.
60965ac2e9bSDave Hansen  *
61065ac2e9bSDave Hansen  * Note the SDM's wording here.  "sub-function 0" only enumerates
61165ac2e9bSDave Hansen  * the size of the *user* states.  If we use it to size a buffer
61265ac2e9bSDave Hansen  * that we use 'XSAVES' on, we could potentially overflow the
61365ac2e9bSDave Hansen  * buffer because 'XSAVES' saves system states too.
6148ad7e8f6SThomas Gleixner  *
6158ad7e8f6SThomas Gleixner  * This also takes compaction into account. So this works for
6168ad7e8f6SThomas Gleixner  * XSAVEC as well.
61762784854SIngo Molnar  */
get_compacted_size(void)6188ad7e8f6SThomas Gleixner static unsigned int __init get_compacted_size(void)
61962784854SIngo Molnar {
62062784854SIngo Molnar 	unsigned int eax, ebx, ecx, edx;
62165ac2e9bSDave Hansen 	/*
62265ac2e9bSDave Hansen 	 * - CPUID function 0DH, sub-function 1:
62365ac2e9bSDave Hansen 	 *    EBX enumerates the size (in bytes) required by
62465ac2e9bSDave Hansen 	 *    the XSAVES instruction for an XSAVE area
62565ac2e9bSDave Hansen 	 *    containing all the state components
62665ac2e9bSDave Hansen 	 *    corresponding to bits currently set in
62765ac2e9bSDave Hansen 	 *    XCR0 | IA32_XSS.
6288ad7e8f6SThomas Gleixner 	 *
6298ad7e8f6SThomas Gleixner 	 * When XSAVES is not available but XSAVEC is (virt), then there
6308ad7e8f6SThomas Gleixner 	 * are no supervisor states, but XSAVEC still uses compacted
6318ad7e8f6SThomas Gleixner 	 * format.
63265ac2e9bSDave Hansen 	 */
63365ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
634a1141e0bSFenghua Yu 	return ebx;
63562784854SIngo Molnar }
636a1141e0bSFenghua Yu 
63776d10256SKan Liang /*
63801707b66SAndy Lutomirski  * Get the total size of the enabled xstates without the independent supervisor
63976d10256SKan Liang  * features.
64076d10256SKan Liang  */
get_xsave_compacted_size(void)6418ad7e8f6SThomas Gleixner static unsigned int __init get_xsave_compacted_size(void)
64276d10256SKan Liang {
64301707b66SAndy Lutomirski 	u64 mask = xfeatures_mask_independent();
64476d10256SKan Liang 	unsigned int size;
64576d10256SKan Liang 
64676d10256SKan Liang 	if (!mask)
6478ad7e8f6SThomas Gleixner 		return get_compacted_size();
64876d10256SKan Liang 
64901707b66SAndy Lutomirski 	/* Disable independent features. */
65076d10256SKan Liang 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
65176d10256SKan Liang 
65276d10256SKan Liang 	/*
65376d10256SKan Liang 	 * Ask the hardware what size is required of the buffer.
65476d10256SKan Liang 	 * This is the size required for the task->fpu buffer.
65576d10256SKan Liang 	 */
6568ad7e8f6SThomas Gleixner 	size = get_compacted_size();
65776d10256SKan Liang 
65801707b66SAndy Lutomirski 	/* Re-enable independent features so XSAVES will work on them again. */
65976d10256SKan Liang 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
66076d10256SKan Liang 
66176d10256SKan Liang 	return size;
66276d10256SKan Liang }
66376d10256SKan Liang 
get_xsave_size_user(void)664cd9ae761SThomas Gleixner static unsigned int __init get_xsave_size_user(void)
665a1141e0bSFenghua Yu {
666a1141e0bSFenghua Yu 	unsigned int eax, ebx, ecx, edx;
667a1141e0bSFenghua Yu 	/*
668a1141e0bSFenghua Yu 	 * - CPUID function 0DH, sub-function 0:
669a1141e0bSFenghua Yu 	 *    EBX enumerates the size (in bytes) required by
670a1141e0bSFenghua Yu 	 *    the XSAVE instruction for an XSAVE area
671a1141e0bSFenghua Yu 	 *    containing all the *user* state components
672a1141e0bSFenghua Yu 	 *    corresponding to bits currently set in XCR0.
673a1141e0bSFenghua Yu 	 */
674a1141e0bSFenghua Yu 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
675a1141e0bSFenghua Yu 	return ebx;
6764109ca06SDave Hansen }
6774109ca06SDave Hansen 
init_xstate_size(void)678653a561bSSergey Senozhatsky static int __init init_xstate_size(void)
6794109ca06SDave Hansen {
6804109ca06SDave Hansen 	/* Recompute the context size for enabled features: */
6812ae996e0SChang S. Bae 	unsigned int user_size, kernel_size, kernel_default_size;
6828ad7e8f6SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
683a1141e0bSFenghua Yu 
684cd9ae761SThomas Gleixner 	/* Uncompacted user space size */
685cd9ae761SThomas Gleixner 	user_size = get_xsave_size_user();
686a1141e0bSFenghua Yu 
687cd9ae761SThomas Gleixner 	/*
6888ad7e8f6SThomas Gleixner 	 * XSAVES kernel size includes supervisor states and uses compacted
6898ad7e8f6SThomas Gleixner 	 * format. XSAVEC uses compacted format, but does not save
6908ad7e8f6SThomas Gleixner 	 * supervisor states.
691cd9ae761SThomas Gleixner 	 *
6928ad7e8f6SThomas Gleixner 	 * XSAVE[OPT] do not support supervisor states so kernel and user
6938ad7e8f6SThomas Gleixner 	 * size is identical.
694cd9ae761SThomas Gleixner 	 */
6952ae996e0SChang S. Bae 	if (compacted)
6968ad7e8f6SThomas Gleixner 		kernel_size = get_xsave_compacted_size();
697a1141e0bSFenghua Yu 	else
698cd9ae761SThomas Gleixner 		kernel_size = user_size;
6994109ca06SDave Hansen 
7002ae996e0SChang S. Bae 	kernel_default_size =
7012ae996e0SChang S. Bae 		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
7022ae996e0SChang S. Bae 
703cd9ae761SThomas Gleixner 	if (!paranoid_xstate_size_valid(kernel_size))
704cd9ae761SThomas Gleixner 		return -EINVAL;
705a1141e0bSFenghua Yu 
7062bd264bcSThomas Gleixner 	fpu_kernel_cfg.max_size = kernel_size;
7072bd264bcSThomas Gleixner 	fpu_user_cfg.max_size = user_size;
7082ae996e0SChang S. Bae 
7092ae996e0SChang S. Bae 	fpu_kernel_cfg.default_size = kernel_default_size;
7102ae996e0SChang S. Bae 	fpu_user_cfg.default_size =
7112ae996e0SChang S. Bae 		xstate_calculate_size(fpu_user_cfg.default_features, false);
712cd9ae761SThomas Gleixner 
7134109ca06SDave Hansen 	return 0;
7144109ca06SDave Hansen }
7154109ca06SDave Hansen 
716d91cab78SDave Hansen /*
717d91cab78SDave Hansen  * We enabled the XSAVE hardware, but something went wrong and
718d91cab78SDave Hansen  * we can not use it.  Disable it.
719d91cab78SDave Hansen  */
fpu__init_disable_system_xstate(unsigned int legacy_size)7202bd264bcSThomas Gleixner static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
7214109ca06SDave Hansen {
7221c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features = 0;
7234109ca06SDave Hansen 	cr4_clear_bits(X86_CR4_OSXSAVE);
7247891bc0aSSebastian Andrzej Siewior 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
7252bd264bcSThomas Gleixner 
7262bd264bcSThomas Gleixner 	/* Restore the legacy size.*/
7272bd264bcSThomas Gleixner 	fpu_kernel_cfg.max_size = legacy_size;
7282bd264bcSThomas Gleixner 	fpu_kernel_cfg.default_size = legacy_size;
7292bd264bcSThomas Gleixner 	fpu_user_cfg.max_size = legacy_size;
7302bd264bcSThomas Gleixner 	fpu_user_cfg.default_size = legacy_size;
7312bd264bcSThomas Gleixner 
732db3e7321SChang S. Bae 	/*
733db3e7321SChang S. Bae 	 * Prevent enabling the static branch which enables writes to the
734db3e7321SChang S. Bae 	 * XFD MSR.
735db3e7321SChang S. Bae 	 */
736db3e7321SChang S. Bae 	init_fpstate.xfd = 0;
737db3e7321SChang S. Bae 
738248452ceSThomas Gleixner 	fpstate_reset(&current->thread.fpu);
73962784854SIngo Molnar }
74062784854SIngo Molnar 
74162784854SIngo Molnar /*
74262784854SIngo Molnar  * Enable and initialize the xsave feature.
74362784854SIngo Molnar  * Called once per system bootup.
74462784854SIngo Molnar  */
fpu__init_system_xstate(unsigned int legacy_size)7452bd264bcSThomas Gleixner void __init fpu__init_system_xstate(unsigned int legacy_size)
74662784854SIngo Molnar {
74762784854SIngo Molnar 	unsigned int eax, ebx, ecx, edx;
7484e8e4313SThomas Gleixner 	u64 xfeatures;
7494109ca06SDave Hansen 	int err;
750ccb18db2SAndi Kleen 	int i;
751e97131a8SIngo Molnar 
7529170fb40SAndy Lutomirski 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
7539170fb40SAndy Lutomirski 		pr_info("x86/fpu: No FPU detected\n");
7549170fb40SAndy Lutomirski 		return;
7559170fb40SAndy Lutomirski 	}
7569170fb40SAndy Lutomirski 
757d366bf7eSBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
7589170fb40SAndy Lutomirski 		pr_info("x86/fpu: x87 FPU will use %s\n",
7599170fb40SAndy Lutomirski 			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
76062784854SIngo Molnar 		return;
76162784854SIngo Molnar 	}
76262784854SIngo Molnar 
76362784854SIngo Molnar 	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
764e97131a8SIngo Molnar 		WARN_ON_FPU(1);
76562784854SIngo Molnar 		return;
76662784854SIngo Molnar 	}
76762784854SIngo Molnar 
768524bb73bSYu-cheng Yu 	/*
769524bb73bSYu-cheng Yu 	 * Find user xstates supported by the processor.
770524bb73bSYu-cheng Yu 	 */
77162784854SIngo Molnar 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
7721c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
77362784854SIngo Molnar 
77471581eefSYu-cheng Yu 	/*
77571581eefSYu-cheng Yu 	 * Find supervisor xstates supported by the processor.
77671581eefSYu-cheng Yu 	 */
77771581eefSYu-cheng Yu 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
7781c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
77971581eefSYu-cheng Yu 
780daddee24SThomas Gleixner 	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
781ec3ed4a2SDave Hansen 		/*
782ec3ed4a2SDave Hansen 		 * This indicates that something really unexpected happened
783ec3ed4a2SDave Hansen 		 * with the enumeration.  Disable XSAVE and try to continue
784ec3ed4a2SDave Hansen 		 * booting without it.  This is too early to BUG().
785ec3ed4a2SDave Hansen 		 */
786524bb73bSYu-cheng Yu 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
7871c253ff2SThomas Gleixner 		       fpu_kernel_cfg.max_features);
788ec3ed4a2SDave Hansen 		goto out_disable;
78962784854SIngo Molnar 	}
79062784854SIngo Molnar 
791ccb18db2SAndi Kleen 	/*
792ccb18db2SAndi Kleen 	 * Clear XSAVE features that are disabled in the normal CPUID.
793ccb18db2SAndi Kleen 	 */
794ccb18db2SAndi Kleen 	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
79570c3f167SChang S. Bae 		unsigned short cid = xsave_cpuid_features[i];
79670c3f167SChang S. Bae 
79770c3f167SChang S. Bae 		/* Careful: X86_FEATURE_FPU is 0! */
79870c3f167SChang S. Bae 		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
7991c253ff2SThomas Gleixner 			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
800ccb18db2SAndi Kleen 	}
801ccb18db2SAndi Kleen 
8022ae996e0SChang S. Bae 	if (!cpu_feature_enabled(X86_FEATURE_XFD))
8032ae996e0SChang S. Bae 		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
8042ae996e0SChang S. Bae 
8058ad7e8f6SThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
8068ad7e8f6SThomas Gleixner 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
8078ad7e8f6SThomas Gleixner 	else
8081c253ff2SThomas Gleixner 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
809ce38f038SThomas Gleixner 					XFEATURE_MASK_SUPERVISOR_SUPPORTED;
810ce38f038SThomas Gleixner 
8111c253ff2SThomas Gleixner 	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
8121c253ff2SThomas Gleixner 	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
8131c253ff2SThomas Gleixner 
8142ae996e0SChang S. Bae 	/* Clean out dynamic features from default */
8151c253ff2SThomas Gleixner 	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
8162ae996e0SChang S. Bae 	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
8172ae996e0SChang S. Bae 
8181c253ff2SThomas Gleixner 	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
8192ae996e0SChang S. Bae 	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
8201c253ff2SThomas Gleixner 
8214e8e4313SThomas Gleixner 	/* Store it for paranoia check at the end */
8221c253ff2SThomas Gleixner 	xfeatures = fpu_kernel_cfg.max_features;
82362784854SIngo Molnar 
824db3e7321SChang S. Bae 	/*
825db3e7321SChang S. Bae 	 * Initialize the default XFD state in initfp_state and enable the
826db3e7321SChang S. Bae 	 * dynamic sizing mechanism if dynamic states are available.  The
827db3e7321SChang S. Bae 	 * static key cannot be enabled here because this runs before
828db3e7321SChang S. Bae 	 * jump_label_init(). This is delayed to an initcall.
829db3e7321SChang S. Bae 	 */
830db3e7321SChang S. Bae 	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
831db3e7321SChang S. Bae 
8328ad7e8f6SThomas Gleixner 	/* Set up compaction feature bit */
8338ad7e8f6SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
8348ad7e8f6SThomas Gleixner 	    cpu_feature_enabled(X86_FEATURE_XSAVES))
8358ad7e8f6SThomas Gleixner 		setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
8368ad7e8f6SThomas Gleixner 
83762784854SIngo Molnar 	/* Enable xstate instructions to be able to continue with initialization: */
83862784854SIngo Molnar 	fpu__init_cpu_xstate();
83935a77d45SThomas Gleixner 
84035a77d45SThomas Gleixner 	/* Cache size, offset and flags for initialization */
84135a77d45SThomas Gleixner 	setup_xstate_cache();
84235a77d45SThomas Gleixner 
8434109ca06SDave Hansen 	err = init_xstate_size();
844ec3ed4a2SDave Hansen 	if (err)
845ec3ed4a2SDave Hansen 		goto out_disable;
84662784854SIngo Molnar 
8472ae996e0SChang S. Bae 	/* Reset the state for the current task */
848248452ceSThomas Gleixner 	fpstate_reset(&current->thread.fpu);
849248452ceSThomas Gleixner 
85091c3dba7SYu-cheng Yu 	/*
85191c3dba7SYu-cheng Yu 	 * Update info used for ptrace frames; use standard-format size and no
85291c3dba7SYu-cheng Yu 	 * supervisor xstates:
85391c3dba7SYu-cheng Yu 	 */
8542bd264bcSThomas Gleixner 	update_regset_xstate_info(fpu_user_cfg.max_size,
855daddee24SThomas Gleixner 				  fpu_user_cfg.max_features);
85691c3dba7SYu-cheng Yu 
857a401f45eSChang S. Bae 	/*
858a401f45eSChang S. Bae 	 * init_fpstate excludes dynamic states as they are large but init
859a401f45eSChang S. Bae 	 * state is zero.
860a401f45eSChang S. Bae 	 */
861a401f45eSChang S. Bae 	init_fpstate.size		= fpu_kernel_cfg.default_size;
862a401f45eSChang S. Bae 	init_fpstate.xfeatures		= fpu_kernel_cfg.default_features;
863c32d7cabSChang S. Bae 
864d3e021adSChang S. Bae 	if (init_fpstate.size > sizeof(init_fpstate.regs)) {
865d3e021adSChang S. Bae 		pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
866d3e021adSChang S. Bae 			sizeof(init_fpstate.regs), init_fpstate.size);
867d3e021adSChang S. Bae 		goto out_disable;
868d3e021adSChang S. Bae 	}
869d3e021adSChang S. Bae 
87062784854SIngo Molnar 	setup_init_fpu_buf();
87162784854SIngo Molnar 
8724e8e4313SThomas Gleixner 	/*
8734e8e4313SThomas Gleixner 	 * Paranoia check whether something in the setup modified the
8744e8e4313SThomas Gleixner 	 * xfeatures mask.
8754e8e4313SThomas Gleixner 	 */
8761c253ff2SThomas Gleixner 	if (xfeatures != fpu_kernel_cfg.max_features) {
8774e8e4313SThomas Gleixner 		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
8781c253ff2SThomas Gleixner 		       xfeatures, fpu_kernel_cfg.max_features);
8794e8e4313SThomas Gleixner 		goto out_disable;
8804e8e4313SThomas Gleixner 	}
8814e8e4313SThomas Gleixner 
8822c66ca39SFeng Tang 	/*
8832c66ca39SFeng Tang 	 * CPU capabilities initialization runs before FPU init. So
8842c66ca39SFeng Tang 	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
8852c66ca39SFeng Tang 	 * functional, set the feature bit so depending code works.
8862c66ca39SFeng Tang 	 */
8872c66ca39SFeng Tang 	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
8882c66ca39SFeng Tang 
8894e8e4313SThomas Gleixner 	print_xstate_offset_size();
890b0815359SDave Hansen 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
8911c253ff2SThomas Gleixner 		fpu_kernel_cfg.max_features,
8922bd264bcSThomas Gleixner 		fpu_kernel_cfg.max_size,
8938ad7e8f6SThomas Gleixner 		boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
894ec3ed4a2SDave Hansen 	return;
895ec3ed4a2SDave Hansen 
896ec3ed4a2SDave Hansen out_disable:
897ec3ed4a2SDave Hansen 	/* something went wrong, try to boot without any XSAVE support */
8982bd264bcSThomas Gleixner 	fpu__init_disable_system_xstate(legacy_size);
89962784854SIngo Molnar }
90062784854SIngo Molnar 
90162784854SIngo Molnar /*
90262784854SIngo Molnar  * Restore minimal FPU state after suspend:
90362784854SIngo Molnar  */
fpu__resume_cpu(void)90462784854SIngo Molnar void fpu__resume_cpu(void)
90562784854SIngo Molnar {
90662784854SIngo Molnar 	/*
90762784854SIngo Molnar 	 * Restore XCR0 on xsave capable CPUs:
90862784854SIngo Molnar 	 */
90965e95210SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
910daddee24SThomas Gleixner 		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
91171581eefSYu-cheng Yu 
91271581eefSYu-cheng Yu 	/*
91371581eefSYu-cheng Yu 	 * Restore IA32_XSS. The same CPUID bit enumerates support
91471581eefSYu-cheng Yu 	 * of XSAVES and MSR_IA32_XSS.
91571581eefSYu-cheng Yu 	 */
91665e95210SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
917f0dccc9dSKan Liang 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
91801707b66SAndy Lutomirski 				     xfeatures_mask_independent());
919f0dccc9dSKan Liang 	}
92067236547SChang S. Bae 
92167236547SChang S. Bae 	if (fpu_state_size_dynamic())
92267236547SChang S. Bae 		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
92362784854SIngo Molnar }
92462784854SIngo Molnar 
92562784854SIngo Molnar /*
92607baeb04SSebastian Andrzej Siewior  * Given an xstate feature nr, calculate where in the xsave
927b8b9b6baSDave Hansen  * buffer the state is.  Callers should ensure that the buffer
928b8b9b6baSDave Hansen  * is valid.
929b8b9b6baSDave Hansen  */
__raw_xsave_addr(struct xregs_state * xsave,int xfeature_nr)93007baeb04SSebastian Andrzej Siewior static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
931b8b9b6baSDave Hansen {
9327aa5128bSThomas Gleixner 	u64 xcomp_bv = xsave->header.xcomp_bv;
9337aa5128bSThomas Gleixner 
9347aa5128bSThomas Gleixner 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
9357aa5128bSThomas Gleixner 		return NULL;
9367aa5128bSThomas Gleixner 
9378ad7e8f6SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
9387aa5128bSThomas Gleixner 		if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
9395060b915SYu-cheng Yu 			return NULL;
9405060b915SYu-cheng Yu 	}
9415060b915SYu-cheng Yu 
9427aa5128bSThomas Gleixner 	return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
943b8b9b6baSDave Hansen }
9447aa5128bSThomas Gleixner 
945b8b9b6baSDave Hansen /*
94662784854SIngo Molnar  * Given the xsave area and a state inside, this function returns the
94762784854SIngo Molnar  * address of the state.
94862784854SIngo Molnar  *
94962784854SIngo Molnar  * This is the API that is called to get xstate address in either
95062784854SIngo Molnar  * standard format or compacted format of xsave area.
95162784854SIngo Molnar  *
9520c4109beSDave Hansen  * Note that if there is no data for the field in the xsave buffer
9530c4109beSDave Hansen  * this will return NULL.
9540c4109beSDave Hansen  *
95562784854SIngo Molnar  * Inputs:
9560c4109beSDave Hansen  *	xstate: the thread's storage area for all FPU data
957abd16d68SSebastian Andrzej Siewior  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
958abd16d68SSebastian Andrzej Siewior  *	XFEATURE_SSE, etc...)
95962784854SIngo Molnar  * Output:
9600c4109beSDave Hansen  *	address of the state in the xsave area, or NULL if the
9610c4109beSDave Hansen  *	field is not present in the xsave buffer.
96262784854SIngo Molnar  */
get_xsave_addr(struct xregs_state * xsave,int xfeature_nr)963abd16d68SSebastian Andrzej Siewior void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
96462784854SIngo Molnar {
9650c4109beSDave Hansen 	/*
9660c4109beSDave Hansen 	 * Do we even *have* xsave state?
9670c4109beSDave Hansen 	 */
9680c4109beSDave Hansen 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
96962784854SIngo Molnar 		return NULL;
97062784854SIngo Molnar 
9710c4109beSDave Hansen 	/*
9720c4109beSDave Hansen 	 * We should not ever be requesting features that we
973524bb73bSYu-cheng Yu 	 * have not enabled.
9740c4109beSDave Hansen 	 */
9757aa5128bSThomas Gleixner 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
9767aa5128bSThomas Gleixner 		return NULL;
9777aa5128bSThomas Gleixner 
9780c4109beSDave Hansen 	/*
9790c4109beSDave Hansen 	 * This assumes the last 'xsave*' instruction to
980abd16d68SSebastian Andrzej Siewior 	 * have requested that 'xfeature_nr' be saved.
9810c4109beSDave Hansen 	 * If it did not, we might be seeing and old value
9820c4109beSDave Hansen 	 * of the field in the buffer.
9830c4109beSDave Hansen 	 *
9840c4109beSDave Hansen 	 * This can happen because the last 'xsave' did not
9850c4109beSDave Hansen 	 * request that this feature be saved (unlikely)
9860c4109beSDave Hansen 	 * or because the "init optimization" caused it
9870c4109beSDave Hansen 	 * to not be saved.
9880c4109beSDave Hansen 	 */
989abd16d68SSebastian Andrzej Siewior 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
9900c4109beSDave Hansen 		return NULL;
9910c4109beSDave Hansen 
99207baeb04SSebastian Andrzej Siewior 	return __raw_xsave_addr(xsave, xfeature_nr);
99362784854SIngo Molnar }
99404cd027bSDave Hansen 
995e8c24d3aSDave Hansen #ifdef CONFIG_ARCH_HAS_PKEYS
996e8c24d3aSDave Hansen 
99784594296SDave Hansen /*
998b79daf85SDave Hansen  * This will go out and modify PKRU register to set the access
999b79daf85SDave Hansen  * rights for @pkey to @init_val.
100084594296SDave Hansen  */
arch_set_user_pkey_access(struct task_struct * tsk,int pkey,unsigned long init_val)100184594296SDave Hansen int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
100284594296SDave Hansen 			      unsigned long init_val)
100384594296SDave Hansen {
10049fe8a6f5SThomas Gleixner 	u32 old_pkru, new_pkru_bits = 0;
10059fe8a6f5SThomas Gleixner 	int pkey_shift;
100684594296SDave Hansen 
100784594296SDave Hansen 	/*
100884594296SDave Hansen 	 * This check implies XSAVE support.  OSPKE only gets
100984594296SDave Hansen 	 * set if we enable XSAVE and we enable PKU in XCR0.
101084594296SDave Hansen 	 */
10118a1dc55aSThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
101284594296SDave Hansen 		return -EINVAL;
101384594296SDave Hansen 
101416171bffSDave Hansen 	/*
101516171bffSDave Hansen 	 * This code should only be called with valid 'pkey'
101616171bffSDave Hansen 	 * values originating from in-kernel users.  Complain
101716171bffSDave Hansen 	 * if a bad value is observed.
101816171bffSDave Hansen 	 */
10199fe8a6f5SThomas Gleixner 	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
10209fe8a6f5SThomas Gleixner 		return -EINVAL;
102116171bffSDave Hansen 
102291c3dba7SYu-cheng Yu 	/* Set the bits we need in PKRU:  */
102384594296SDave Hansen 	if (init_val & PKEY_DISABLE_ACCESS)
102484594296SDave Hansen 		new_pkru_bits |= PKRU_AD_BIT;
102584594296SDave Hansen 	if (init_val & PKEY_DISABLE_WRITE)
102684594296SDave Hansen 		new_pkru_bits |= PKRU_WD_BIT;
102784594296SDave Hansen 
102891c3dba7SYu-cheng Yu 	/* Shift the bits in to the correct place in PKRU for pkey: */
10299fe8a6f5SThomas Gleixner 	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
103084594296SDave Hansen 	new_pkru_bits <<= pkey_shift;
103184594296SDave Hansen 
1032b79daf85SDave Hansen 	/* Get old PKRU and mask off any old bits in place: */
1033b79daf85SDave Hansen 	old_pkru = read_pkru();
1034b79daf85SDave Hansen 	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
103584594296SDave Hansen 
1036b79daf85SDave Hansen 	/* Write old part along with new part: */
1037b79daf85SDave Hansen 	write_pkru(old_pkru | new_pkru_bits);
103891c3dba7SYu-cheng Yu 
103991c3dba7SYu-cheng Yu 	return 0;
104091c3dba7SYu-cheng Yu }
1041e8c24d3aSDave Hansen #endif /* ! CONFIG_ARCH_HAS_PKEYS */
104291c3dba7SYu-cheng Yu 
copy_feature(bool from_xstate,struct membuf * to,void * xstate,void * init_xstate,unsigned int size)104396258950SThomas Gleixner static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
104496258950SThomas Gleixner 			 void *init_xstate, unsigned int size)
1045f0d4f30aSIngo Molnar {
104696258950SThomas Gleixner 	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1047f0d4f30aSIngo Molnar }
1048f0d4f30aSIngo Molnar 
1049eb6f5172SThomas Gleixner /**
1050ca834defSThomas Gleixner  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1051eb6f5172SThomas Gleixner  * @to:		membuf descriptor
10523ac8d757SThomas Gleixner  * @fpstate:	The fpstate buffer from which to copy
105318164f66SSean Christopherson  * @xfeatures:	The mask of xfeatures to save (XSAVE mode only)
1054ca834defSThomas Gleixner  * @pkru_val:	The PKRU value to store in the PKRU component
1055eb6f5172SThomas Gleixner  * @copy_mode:	The requested copy mode
1056f0d4f30aSIngo Molnar  *
1057eb6f5172SThomas Gleixner  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1058eb6f5172SThomas Gleixner  * format, i.e. from the kernel internal hardware dependent storage format
1059eb6f5172SThomas Gleixner  * to the requested @mode. UABI XSTATE is always uncompacted!
1060eb6f5172SThomas Gleixner  *
1061eb6f5172SThomas Gleixner  * It supports partial copy but @to.pos always starts from zero.
1062f0d4f30aSIngo Molnar  */
__copy_xstate_to_uabi_buf(struct membuf to,struct fpstate * fpstate,u64 xfeatures,u32 pkru_val,enum xstate_copy_mode copy_mode)10633ac8d757SThomas Gleixner void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
106418164f66SSean Christopherson 			       u64 xfeatures, u32 pkru_val,
106518164f66SSean Christopherson 			       enum xstate_copy_mode copy_mode)
1066f0d4f30aSIngo Molnar {
106796258950SThomas Gleixner 	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1068f83ac56aSThomas Gleixner 	struct xregs_state *xinit = &init_fpstate.regs.xsave;
10693ac8d757SThomas Gleixner 	struct xregs_state *xsave = &fpstate->regs.xsave;
1070f0d4f30aSIngo Molnar 	struct xstate_header header;
107196258950SThomas Gleixner 	unsigned int zerofrom;
1072ffd3e504SThomas Gleixner 	u64 mask;
10738c0817f4SIngo Molnar 	int i;
1074f0d4f30aSIngo Molnar 
107593c2cdc9SThomas Gleixner 	memset(&header, 0, sizeof(header));
1076f0d4f30aSIngo Molnar 	header.xfeatures = xsave->header.xfeatures;
1077eb6f5172SThomas Gleixner 
1078eb6f5172SThomas Gleixner 	/* Mask out the feature bits depending on copy mode */
1079eb6f5172SThomas Gleixner 	switch (copy_mode) {
1080eb6f5172SThomas Gleixner 	case XSTATE_COPY_FP:
1081eb6f5172SThomas Gleixner 		header.xfeatures &= XFEATURE_MASK_FP;
1082eb6f5172SThomas Gleixner 		break;
1083eb6f5172SThomas Gleixner 
1084eb6f5172SThomas Gleixner 	case XSTATE_COPY_FX:
1085eb6f5172SThomas Gleixner 		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1086eb6f5172SThomas Gleixner 		break;
1087eb6f5172SThomas Gleixner 
1088eb6f5172SThomas Gleixner 	case XSTATE_COPY_XSAVE:
108918164f66SSean Christopherson 		header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1090eb6f5172SThomas Gleixner 		break;
1091eb6f5172SThomas Gleixner 	}
1092f0d4f30aSIngo Molnar 
109396258950SThomas Gleixner 	/* Copy FP state up to MXCSR */
109496258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
109596258950SThomas Gleixner 		     &xinit->i387, off_mxcsr);
109696258950SThomas Gleixner 
109796258950SThomas Gleixner 	/* Copy MXCSR when SSE or YMM are set in the feature mask */
109896258950SThomas Gleixner 	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
109996258950SThomas Gleixner 		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
110096258950SThomas Gleixner 		     MXCSR_AND_FLAGS_SIZE);
110196258950SThomas Gleixner 
110296258950SThomas Gleixner 	/* Copy the remaining FP state */
110396258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
110496258950SThomas Gleixner 		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
110596258950SThomas Gleixner 		     sizeof(xsave->i387.st_space));
110696258950SThomas Gleixner 
110796258950SThomas Gleixner 	/* Copy the SSE state - shared with YMM, but independently managed */
110896258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
110996258950SThomas Gleixner 		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
111096258950SThomas Gleixner 		     sizeof(xsave->i387.xmm_space));
111196258950SThomas Gleixner 
1112eb6f5172SThomas Gleixner 	if (copy_mode != XSTATE_COPY_XSAVE)
1113eb6f5172SThomas Gleixner 		goto out;
1114eb6f5172SThomas Gleixner 
111596258950SThomas Gleixner 	/* Zero the padding area */
111696258950SThomas Gleixner 	membuf_zero(&to, sizeof(xsave->i387.padding));
111796258950SThomas Gleixner 
111896258950SThomas Gleixner 	/* Copy xsave->i387.sw_reserved */
111996258950SThomas Gleixner 	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
112096258950SThomas Gleixner 
112196258950SThomas Gleixner 	/* Copy the user space relevant state of @xsave->header */
112296258950SThomas Gleixner 	membuf_write(&to, &header, sizeof(header));
112396258950SThomas Gleixner 
112496258950SThomas Gleixner 	zerofrom = offsetof(struct xregs_state, extended_state_area);
1125f0d4f30aSIngo Molnar 
1126f0d4f30aSIngo Molnar 	/*
1127b1588884SChang S. Bae 	 * This 'mask' indicates which states to copy from fpstate.
1128b1588884SChang S. Bae 	 * Those extended states that are not present in fpstate are
1129b1588884SChang S. Bae 	 * either disabled or initialized:
1130b1588884SChang S. Bae 	 *
1131b1588884SChang S. Bae 	 * In non-compacted format, disabled features still occupy
1132b1588884SChang S. Bae 	 * state space but there is no state to copy from in the
1133b1588884SChang S. Bae 	 * compacted init_fpstate. The gap tracking will zero these
1134b1588884SChang S. Bae 	 * states.
1135b1588884SChang S. Bae 	 *
1136b1588884SChang S. Bae 	 * The extended features have an all zeroes init state. Thus,
1137b1588884SChang S. Bae 	 * remove them from 'mask' to zero those features in the user
1138b1588884SChang S. Bae 	 * buffer instead of retrieving them from init_fpstate.
1139f0d4f30aSIngo Molnar 	 */
1140b1588884SChang S. Bae 	mask = header.xfeatures;
1141471f0aa7SChang S. Bae 
1142ffd3e504SThomas Gleixner 	for_each_extended_xfeature(i, mask) {
114396258950SThomas Gleixner 		/*
114496258950SThomas Gleixner 		 * If there was a feature or alignment gap, zero the space
114596258950SThomas Gleixner 		 * in the destination buffer.
114696258950SThomas Gleixner 		 */
114796258950SThomas Gleixner 		if (zerofrom < xstate_offsets[i])
114896258950SThomas Gleixner 			membuf_zero(&to, xstate_offsets[i] - zerofrom);
114996258950SThomas Gleixner 
1150e84ba47eSDave Hansen 		if (i == XFEATURE_PKRU) {
1151e84ba47eSDave Hansen 			struct pkru_state pkru = {0};
1152e84ba47eSDave Hansen 			/*
1153e84ba47eSDave Hansen 			 * PKRU is not necessarily up to date in the
1154ca834defSThomas Gleixner 			 * XSAVE buffer. Use the provided value.
1155e84ba47eSDave Hansen 			 */
1156ca834defSThomas Gleixner 			pkru.pkru = pkru_val;
1157e84ba47eSDave Hansen 			membuf_write(&to, &pkru, sizeof(pkru));
1158e84ba47eSDave Hansen 		} else {
1159b1588884SChang S. Bae 			membuf_write(&to,
116096258950SThomas Gleixner 				     __raw_xsave_addr(xsave, i),
116196258950SThomas Gleixner 				     xstate_sizes[i]);
1162e84ba47eSDave Hansen 		}
116396258950SThomas Gleixner 		/*
116496258950SThomas Gleixner 		 * Keep track of the last copied state in the non-compacted
116596258950SThomas Gleixner 		 * target buffer for gap zeroing.
116696258950SThomas Gleixner 		 */
116796258950SThomas Gleixner 		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1168f0d4f30aSIngo Molnar 	}
1169f0d4f30aSIngo Molnar 
1170eb6f5172SThomas Gleixner out:
117196258950SThomas Gleixner 	if (to.left)
117296258950SThomas Gleixner 		membuf_zero(&to, to.left);
117391c3dba7SYu-cheng Yu }
117491c3dba7SYu-cheng Yu 
1175ca834defSThomas Gleixner /**
1176ca834defSThomas Gleixner  * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1177ca834defSThomas Gleixner  * @to:		membuf descriptor
1178ca834defSThomas Gleixner  * @tsk:	The task from which to copy the saved xstate
1179ca834defSThomas Gleixner  * @copy_mode:	The requested copy mode
1180ca834defSThomas Gleixner  *
1181ca834defSThomas Gleixner  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1182ca834defSThomas Gleixner  * format, i.e. from the kernel internal hardware dependent storage format
1183ca834defSThomas Gleixner  * to the requested @mode. UABI XSTATE is always uncompacted!
1184ca834defSThomas Gleixner  *
1185ca834defSThomas Gleixner  * It supports partial copy but @to.pos always starts from zero.
1186ca834defSThomas Gleixner  */
copy_xstate_to_uabi_buf(struct membuf to,struct task_struct * tsk,enum xstate_copy_mode copy_mode)1187ca834defSThomas Gleixner void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1188ca834defSThomas Gleixner 			     enum xstate_copy_mode copy_mode)
1189ca834defSThomas Gleixner {
11903ac8d757SThomas Gleixner 	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
119118164f66SSean Christopherson 				  tsk->thread.fpu.fpstate->user_xfeatures,
1192ca834defSThomas Gleixner 				  tsk->thread.pkru, copy_mode);
1193ca834defSThomas Gleixner }
1194ca834defSThomas Gleixner 
copy_from_buffer(void * dst,unsigned int offset,unsigned int size,const void * kbuf,const void __user * ubuf)1195522e9274SThomas Gleixner static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1196522e9274SThomas Gleixner 			    const void *kbuf, const void __user *ubuf)
1197947f4947SThomas Gleixner {
1198522e9274SThomas Gleixner 	if (kbuf) {
1199522e9274SThomas Gleixner 		memcpy(dst, kbuf + offset, size);
1200522e9274SThomas Gleixner 	} else {
1201522e9274SThomas Gleixner 		if (copy_from_user(dst, ubuf + offset, size))
1202522e9274SThomas Gleixner 			return -EFAULT;
1203947f4947SThomas Gleixner 	}
1204522e9274SThomas Gleixner 	return 0;
1205947f4947SThomas Gleixner }
1206947f4947SThomas Gleixner 
1207522e9274SThomas Gleixner 
12082c87767cSKyle Huey /**
12092c87767cSKyle Huey  * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
12102c87767cSKyle Huey  * @fpstate:	The fpstate buffer to copy to
12112c87767cSKyle Huey  * @kbuf:	The UABI format buffer, if it comes from the kernel
12122c87767cSKyle Huey  * @ubuf:	The UABI format buffer, if it comes from userspace
12134a804c4fSKyle Huey  * @pkru:	The location to write the PKRU value to
12142c87767cSKyle Huey  *
12152c87767cSKyle Huey  * Converts from the UABI format into the kernel internal hardware
12162c87767cSKyle Huey  * dependent format.
12174a804c4fSKyle Huey  *
12184a804c4fSKyle Huey  * This function ultimately has three different callers with distinct PKRU
12194a804c4fSKyle Huey  * behavior.
12204a804c4fSKyle Huey  * 1.	When called from sigreturn the PKRU register will be restored from
12214a804c4fSKyle Huey  *	@fpstate via an XRSTOR. Correctly copying the UABI format buffer to
12224a804c4fSKyle Huey  *	@fpstate is sufficient to cover this case, but the caller will also
12234a804c4fSKyle Huey  *	pass a pointer to the thread_struct's pkru field in @pkru and updating
12244a804c4fSKyle Huey  *	it is harmless.
12254a804c4fSKyle Huey  * 2.	When called from ptrace the PKRU register will be restored from the
12264a804c4fSKyle Huey  *	thread_struct's pkru field. A pointer to that is passed in @pkru.
1227d7e5aceaSKyle Huey  *	The kernel will restore it manually, so the XRSTOR behavior that resets
1228d7e5aceaSKyle Huey  *	the PKRU register to the hardware init value (0) if the corresponding
1229d7e5aceaSKyle Huey  *	xfeatures bit is not set is emulated here.
12304a804c4fSKyle Huey  * 3.	When called from KVM the PKRU register will be restored from the vcpu's
1231d7e5aceaSKyle Huey  *	pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1232d7e5aceaSKyle Huey  *	XRSTOR and hasn't had the PKRU resetting behavior described above. To
1233d7e5aceaSKyle Huey  *	preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1234d7e5aceaSKyle Huey  *	bit is not set.
12352c87767cSKyle Huey  */
copy_uabi_to_xstate(struct fpstate * fpstate,const void * kbuf,const void __user * ubuf,u32 * pkru)123649e4eb41SThomas Gleixner static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
12372c87767cSKyle Huey 			       const void __user *ubuf, u32 *pkru)
123879fecc2bSIngo Molnar {
123949e4eb41SThomas Gleixner 	struct xregs_state *xsave = &fpstate->regs.xsave;
124079fecc2bSIngo Molnar 	unsigned int offset, size;
124180d8ae86SEric Biggers 	struct xstate_header hdr;
1242522e9274SThomas Gleixner 	u64 mask;
1243522e9274SThomas Gleixner 	int i;
124479fecc2bSIngo Molnar 
124579fecc2bSIngo Molnar 	offset = offsetof(struct xregs_state, header);
1246522e9274SThomas Gleixner 	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1247522e9274SThomas Gleixner 		return -EFAULT;
124879fecc2bSIngo Molnar 
124949e4eb41SThomas Gleixner 	if (validate_user_xstate_header(&hdr, fpstate))
125079fecc2bSIngo Molnar 		return -EINVAL;
125179fecc2bSIngo Molnar 
1252522e9274SThomas Gleixner 	/* Validate MXCSR when any of the related features is in use */
1253522e9274SThomas Gleixner 	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1254522e9274SThomas Gleixner 	if (hdr.xfeatures & mask) {
1255522e9274SThomas Gleixner 		u32 mxcsr[2];
1256522e9274SThomas Gleixner 
1257522e9274SThomas Gleixner 		offset = offsetof(struct fxregs_state, mxcsr);
1258522e9274SThomas Gleixner 		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1259522e9274SThomas Gleixner 			return -EFAULT;
1260522e9274SThomas Gleixner 
1261522e9274SThomas Gleixner 		/* Reserved bits in MXCSR must be zero. */
1262522e9274SThomas Gleixner 		if (mxcsr[0] & ~mxcsr_feature_mask)
1263947f4947SThomas Gleixner 			return -EINVAL;
1264947f4947SThomas Gleixner 
1265522e9274SThomas Gleixner 		/* SSE and YMM require MXCSR even when FP is not in use. */
1266522e9274SThomas Gleixner 		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1267522e9274SThomas Gleixner 			xsave->i387.mxcsr = mxcsr[0];
1268522e9274SThomas Gleixner 			xsave->i387.mxcsr_mask = mxcsr[1];
1269522e9274SThomas Gleixner 		}
1270522e9274SThomas Gleixner 	}
1271522e9274SThomas Gleixner 
127279fecc2bSIngo Molnar 	for (i = 0; i < XFEATURE_MAX; i++) {
1273b91c0922SThomas Gleixner 		mask = BIT_ULL(i);
127479fecc2bSIngo Molnar 
1275b89eda48SEric Biggers 		if (hdr.xfeatures & mask) {
127607baeb04SSebastian Andrzej Siewior 			void *dst = __raw_xsave_addr(xsave, i);
127779fecc2bSIngo Molnar 
127879fecc2bSIngo Molnar 			offset = xstate_offsets[i];
127979fecc2bSIngo Molnar 			size = xstate_sizes[i];
128079fecc2bSIngo Molnar 
1281522e9274SThomas Gleixner 			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1282522e9274SThomas Gleixner 				return -EFAULT;
128379fecc2bSIngo Molnar 		}
128479fecc2bSIngo Molnar 	}
128579fecc2bSIngo Molnar 
12864a804c4fSKyle Huey 	if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
12874a804c4fSKyle Huey 		struct pkru_state *xpkru;
12884a804c4fSKyle Huey 
12894a804c4fSKyle Huey 		xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
12904a804c4fSKyle Huey 		*pkru = xpkru->pkru;
1291d7e5aceaSKyle Huey 	} else {
1292d7e5aceaSKyle Huey 		/*
1293d7e5aceaSKyle Huey 		 * KVM may pass NULL here to indicate that it does not need
1294d7e5aceaSKyle Huey 		 * PKRU updated.
1295d7e5aceaSKyle Huey 		 */
1296d7e5aceaSKyle Huey 		if (pkru)
1297d7e5aceaSKyle Huey 			*pkru = 0;
12984a804c4fSKyle Huey 	}
12994a804c4fSKyle Huey 
130079fecc2bSIngo Molnar 	/*
130179fecc2bSIngo Molnar 	 * The state that came in from userspace was user-state only.
130279fecc2bSIngo Molnar 	 * Mask all the user states out of 'xfeatures':
130379fecc2bSIngo Molnar 	 */
13048ab22804SFenghua Yu 	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
130579fecc2bSIngo Molnar 
130679fecc2bSIngo Molnar 	/*
130779fecc2bSIngo Molnar 	 * Add back in the features that came in from userspace:
130879fecc2bSIngo Molnar 	 */
1309b89eda48SEric Biggers 	xsave->header.xfeatures |= hdr.xfeatures;
131079fecc2bSIngo Molnar 
131179fecc2bSIngo Molnar 	return 0;
131279fecc2bSIngo Molnar }
131379fecc2bSIngo Molnar 
131479fecc2bSIngo Molnar /*
1315522e9274SThomas Gleixner  * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1316ea4d6938SThomas Gleixner  * format and copy to the target thread. Used by ptrace and KVM.
1317522e9274SThomas Gleixner  */
copy_uabi_from_kernel_to_xstate(struct fpstate * fpstate,const void * kbuf,u32 * pkru)13181c813ce0SKyle Huey int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1319522e9274SThomas Gleixner {
13202c87767cSKyle Huey 	return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1321522e9274SThomas Gleixner }
1322522e9274SThomas Gleixner 
1323522e9274SThomas Gleixner /*
132443be46e8SThomas Gleixner  * Convert from a sigreturn standard-format user-space buffer to kernel
132543be46e8SThomas Gleixner  * XSAVE[S] format and copy to the target thread. This is called from the
132643be46e8SThomas Gleixner  * sigreturn() and rt_sigreturn() system calls.
132791c3dba7SYu-cheng Yu  */
copy_sigframe_from_user_to_xstate(struct task_struct * tsk,const void __user * ubuf)13286a877d24SKyle Huey int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
13291cc34413SThomas Gleixner 				      const void __user *ubuf)
133091c3dba7SYu-cheng Yu {
13312c87767cSKyle Huey 	return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
133284594296SDave Hansen }
13330c608dadSAubrey Li 
validate_independent_components(u64 mask)1334f5daf836SThomas Gleixner static bool validate_independent_components(u64 mask)
133550f408d9SKan Liang {
1336a75c5289SThomas Gleixner 	u64 xchk;
133750f408d9SKan Liang 
1338a75c5289SThomas Gleixner 	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1339a75c5289SThomas Gleixner 		return false;
1340f5daf836SThomas Gleixner 
1341a75c5289SThomas Gleixner 	xchk = ~xfeatures_mask_independent();
134250f408d9SKan Liang 
1343a75c5289SThomas Gleixner 	if (WARN_ON_ONCE(!mask || mask & xchk))
1344a75c5289SThomas Gleixner 		return false;
134550f408d9SKan Liang 
1346a75c5289SThomas Gleixner 	return true;
134750f408d9SKan Liang }
134850f408d9SKan Liang 
134950f408d9SKan Liang /**
1350a75c5289SThomas Gleixner  * xsaves - Save selected components to a kernel xstate buffer
1351a75c5289SThomas Gleixner  * @xstate:	Pointer to the buffer
1352a75c5289SThomas Gleixner  * @mask:	Feature mask to select the components to save
135350f408d9SKan Liang  *
1354a75c5289SThomas Gleixner  * The @xstate buffer must be 64 byte aligned and correctly initialized as
1355a75c5289SThomas Gleixner  * XSAVES does not write the full xstate header. Before first use the
1356a75c5289SThomas Gleixner  * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1357a75c5289SThomas Gleixner  * can #GP.
135850f408d9SKan Liang  *
1359f5daf836SThomas Gleixner  * The feature mask must be a subset of the independent features.
136050f408d9SKan Liang  */
xsaves(struct xregs_state * xstate,u64 mask)1361a75c5289SThomas Gleixner void xsaves(struct xregs_state *xstate, u64 mask)
136250f408d9SKan Liang {
136350f408d9SKan Liang 	int err;
136450f408d9SKan Liang 
1365f5daf836SThomas Gleixner 	if (!validate_independent_components(mask))
136650f408d9SKan Liang 		return;
136750f408d9SKan Liang 
1368a75c5289SThomas Gleixner 	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1369a75c5289SThomas Gleixner 	WARN_ON_ONCE(err);
1370a75c5289SThomas Gleixner }
1371a75c5289SThomas Gleixner 
1372a75c5289SThomas Gleixner /**
1373a75c5289SThomas Gleixner  * xrstors - Restore selected components from a kernel xstate buffer
1374a75c5289SThomas Gleixner  * @xstate:	Pointer to the buffer
1375a75c5289SThomas Gleixner  * @mask:	Feature mask to select the components to restore
1376a75c5289SThomas Gleixner  *
1377a75c5289SThomas Gleixner  * The @xstate buffer must be 64 byte aligned and correctly initialized
1378a75c5289SThomas Gleixner  * otherwise XRSTORS from that buffer can #GP.
1379a75c5289SThomas Gleixner  *
1380a75c5289SThomas Gleixner  * Proper usage is to restore the state which was saved with
1381a75c5289SThomas Gleixner  * xsaves() into @xstate.
1382a75c5289SThomas Gleixner  *
1383f5daf836SThomas Gleixner  * The feature mask must be a subset of the independent features.
1384a75c5289SThomas Gleixner  */
xrstors(struct xregs_state * xstate,u64 mask)1385a75c5289SThomas Gleixner void xrstors(struct xregs_state *xstate, u64 mask)
1386a75c5289SThomas Gleixner {
1387a75c5289SThomas Gleixner 	int err;
1388a75c5289SThomas Gleixner 
1389f5daf836SThomas Gleixner 	if (!validate_independent_components(mask))
139050f408d9SKan Liang 		return;
139150f408d9SKan Liang 
1392a75c5289SThomas Gleixner 	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1393a75c5289SThomas Gleixner 	WARN_ON_ONCE(err);
139450f408d9SKan Liang }
139550f408d9SKan Liang 
1396087df48cSThomas Gleixner #if IS_ENABLED(CONFIG_KVM)
fpstate_clear_xstate_component(struct fpstate * fps,unsigned int xfeature)1397087df48cSThomas Gleixner void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1398087df48cSThomas Gleixner {
1399087df48cSThomas Gleixner 	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1400087df48cSThomas Gleixner 
1401087df48cSThomas Gleixner 	if (addr)
1402087df48cSThomas Gleixner 		memset(addr, 0, xstate_sizes[xfeature]);
1403087df48cSThomas Gleixner }
1404087df48cSThomas Gleixner EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1405087df48cSThomas Gleixner #endif
1406087df48cSThomas Gleixner 
1407db8268dfSChang S. Bae #ifdef CONFIG_X86_64
14085529acf4SThomas Gleixner 
14095529acf4SThomas Gleixner #ifdef CONFIG_X86_DEBUG_FPU
14105529acf4SThomas Gleixner /*
14115529acf4SThomas Gleixner  * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
14125529acf4SThomas Gleixner  * can safely operate on the @fpstate buffer.
14135529acf4SThomas Gleixner  */
xstate_op_valid(struct fpstate * fpstate,u64 mask,bool rstor)14145529acf4SThomas Gleixner static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
14155529acf4SThomas Gleixner {
14165529acf4SThomas Gleixner 	u64 xfd = __this_cpu_read(xfd_state);
14175529acf4SThomas Gleixner 
14185529acf4SThomas Gleixner 	if (fpstate->xfd == xfd)
14195529acf4SThomas Gleixner 		return true;
14205529acf4SThomas Gleixner 
14215529acf4SThomas Gleixner 	 /*
14225529acf4SThomas Gleixner 	  * The XFD MSR does not match fpstate->xfd. That's invalid when
14235529acf4SThomas Gleixner 	  * the passed in fpstate is current's fpstate.
14245529acf4SThomas Gleixner 	  */
14255529acf4SThomas Gleixner 	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
14265529acf4SThomas Gleixner 		return false;
14275529acf4SThomas Gleixner 
14285529acf4SThomas Gleixner 	/*
14295529acf4SThomas Gleixner 	 * XRSTOR(S) from init_fpstate are always correct as it will just
14305529acf4SThomas Gleixner 	 * bring all components into init state and not read from the
14315529acf4SThomas Gleixner 	 * buffer. XSAVE(S) raises #PF after init.
14325529acf4SThomas Gleixner 	 */
14335529acf4SThomas Gleixner 	if (fpstate == &init_fpstate)
14345529acf4SThomas Gleixner 		return rstor;
14355529acf4SThomas Gleixner 
14365529acf4SThomas Gleixner 	/*
14375529acf4SThomas Gleixner 	 * XSAVE(S): clone(), fpu_swap_kvm_fpu()
14385529acf4SThomas Gleixner 	 * XRSTORS(S): fpu_swap_kvm_fpu()
14395529acf4SThomas Gleixner 	 */
14405529acf4SThomas Gleixner 
14415529acf4SThomas Gleixner 	/*
14425529acf4SThomas Gleixner 	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
14435529acf4SThomas Gleixner 	 * the buffer area for XFD-disabled state components.
14445529acf4SThomas Gleixner 	 */
14455529acf4SThomas Gleixner 	mask &= ~xfd;
14465529acf4SThomas Gleixner 
14475529acf4SThomas Gleixner 	/*
14485529acf4SThomas Gleixner 	 * Remove features which are valid in fpstate. They
14495529acf4SThomas Gleixner 	 * have space allocated in fpstate.
14505529acf4SThomas Gleixner 	 */
14515529acf4SThomas Gleixner 	mask &= ~fpstate->xfeatures;
14525529acf4SThomas Gleixner 
14535529acf4SThomas Gleixner 	/*
14545529acf4SThomas Gleixner 	 * Any remaining state components in 'mask' might be written
14555529acf4SThomas Gleixner 	 * by XSAVE/XRSTOR. Fail validation it found.
14565529acf4SThomas Gleixner 	 */
14575529acf4SThomas Gleixner 	return !mask;
14585529acf4SThomas Gleixner }
14595529acf4SThomas Gleixner 
xfd_validate_state(struct fpstate * fpstate,u64 mask,bool rstor)14605529acf4SThomas Gleixner void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
14615529acf4SThomas Gleixner {
14625529acf4SThomas Gleixner 	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
14635529acf4SThomas Gleixner }
14645529acf4SThomas Gleixner #endif /* CONFIG_X86_DEBUG_FPU */
14655529acf4SThomas Gleixner 
xfd_update_static_branch(void)1466db3e7321SChang S. Bae static int __init xfd_update_static_branch(void)
1467db3e7321SChang S. Bae {
1468db3e7321SChang S. Bae 	/*
1469db3e7321SChang S. Bae 	 * If init_fpstate.xfd has bits set then dynamic features are
1470db3e7321SChang S. Bae 	 * available and the dynamic sizing must be enabled.
1471db3e7321SChang S. Bae 	 */
1472db3e7321SChang S. Bae 	if (init_fpstate.xfd)
1473db3e7321SChang S. Bae 		static_branch_enable(&__fpu_state_size_dynamic);
1474db3e7321SChang S. Bae 	return 0;
1475db3e7321SChang S. Bae }
arch_initcall(xfd_update_static_branch)1476db3e7321SChang S. Bae arch_initcall(xfd_update_static_branch)
1477db3e7321SChang S. Bae 
1478500afbf6SChang S. Bae void fpstate_free(struct fpu *fpu)
1479500afbf6SChang S. Bae {
1480db3e7321SChang S. Bae 	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1481500afbf6SChang S. Bae 		vfree(fpu->fpstate);
1482500afbf6SChang S. Bae }
1483500afbf6SChang S. Bae 
1484500afbf6SChang S. Bae /**
1485500afbf6SChang S. Bae  * fpstate_realloc - Reallocate struct fpstate for the requested new features
1486500afbf6SChang S. Bae  *
1487500afbf6SChang S. Bae  * @xfeatures:	A bitmap of xstate features which extend the enabled features
1488500afbf6SChang S. Bae  *		of that task
1489500afbf6SChang S. Bae  * @ksize:	The required size for the kernel buffer
1490500afbf6SChang S. Bae  * @usize:	The required size for user space buffers
1491c270ce39SThomas Gleixner  * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1492500afbf6SChang S. Bae  *
1493500afbf6SChang S. Bae  * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1494500afbf6SChang S. Bae  * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1495500afbf6SChang S. Bae  * with large states are likely to live longer.
1496500afbf6SChang S. Bae  *
1497500afbf6SChang S. Bae  * Returns: 0 on success, -ENOMEM on allocation error.
1498500afbf6SChang S. Bae  */
fpstate_realloc(u64 xfeatures,unsigned int ksize,unsigned int usize,struct fpu_guest * guest_fpu)1499500afbf6SChang S. Bae static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1500c270ce39SThomas Gleixner 			   unsigned int usize, struct fpu_guest *guest_fpu)
1501500afbf6SChang S. Bae {
1502500afbf6SChang S. Bae 	struct fpu *fpu = &current->thread.fpu;
1503500afbf6SChang S. Bae 	struct fpstate *curfps, *newfps = NULL;
1504500afbf6SChang S. Bae 	unsigned int fpsize;
1505c270ce39SThomas Gleixner 	bool in_use;
1506500afbf6SChang S. Bae 
1507500afbf6SChang S. Bae 	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1508500afbf6SChang S. Bae 
1509500afbf6SChang S. Bae 	newfps = vzalloc(fpsize);
1510500afbf6SChang S. Bae 	if (!newfps)
1511500afbf6SChang S. Bae 		return -ENOMEM;
1512500afbf6SChang S. Bae 	newfps->size = ksize;
1513500afbf6SChang S. Bae 	newfps->user_size = usize;
1514500afbf6SChang S. Bae 	newfps->is_valloc = true;
1515500afbf6SChang S. Bae 
1516c270ce39SThomas Gleixner 	/*
1517c270ce39SThomas Gleixner 	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1518c270ce39SThomas Gleixner 	 * as reference independent whether it is in use or not.
1519c270ce39SThomas Gleixner 	 */
1520c270ce39SThomas Gleixner 	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1521c270ce39SThomas Gleixner 
1522c270ce39SThomas Gleixner 	/* Determine whether @curfps is the active fpstate */
1523c270ce39SThomas Gleixner 	in_use = fpu->fpstate == curfps;
1524c270ce39SThomas Gleixner 
1525c270ce39SThomas Gleixner 	if (guest_fpu) {
1526c270ce39SThomas Gleixner 		newfps->is_guest = true;
1527c270ce39SThomas Gleixner 		newfps->is_confidential = curfps->is_confidential;
1528c270ce39SThomas Gleixner 		newfps->in_use = curfps->in_use;
1529c270ce39SThomas Gleixner 		guest_fpu->xfeatures |= xfeatures;
1530c60427ddSThomas Gleixner 		guest_fpu->uabi_size = usize;
1531c270ce39SThomas Gleixner 	}
1532c270ce39SThomas Gleixner 
1533500afbf6SChang S. Bae 	fpregs_lock();
1534500afbf6SChang S. Bae 	/*
1535c270ce39SThomas Gleixner 	 * If @curfps is in use, ensure that the current state is in the
1536c270ce39SThomas Gleixner 	 * registers before swapping fpstate as that might invalidate it
1537c270ce39SThomas Gleixner 	 * due to layout changes.
1538500afbf6SChang S. Bae 	 */
1539c270ce39SThomas Gleixner 	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1540500afbf6SChang S. Bae 		fpregs_restore_userregs();
1541500afbf6SChang S. Bae 
1542500afbf6SChang S. Bae 	newfps->xfeatures = curfps->xfeatures | xfeatures;
1543500afbf6SChang S. Bae 	newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1544500afbf6SChang S. Bae 	newfps->xfd = curfps->xfd & ~xfeatures;
1545500afbf6SChang S. Bae 
1546500afbf6SChang S. Bae 	/* Do the final updates within the locked region */
1547500afbf6SChang S. Bae 	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1548500afbf6SChang S. Bae 
1549c270ce39SThomas Gleixner 	if (guest_fpu) {
1550c270ce39SThomas Gleixner 		guest_fpu->fpstate = newfps;
1551c270ce39SThomas Gleixner 		/* If curfps is active, update the FPU fpstate pointer */
1552c270ce39SThomas Gleixner 		if (in_use)
1553c270ce39SThomas Gleixner 			fpu->fpstate = newfps;
1554c270ce39SThomas Gleixner 	} else {
1555c270ce39SThomas Gleixner 		fpu->fpstate = newfps;
1556c270ce39SThomas Gleixner 	}
1557c270ce39SThomas Gleixner 
1558c270ce39SThomas Gleixner 	if (in_use)
1559c270ce39SThomas Gleixner 		xfd_update_state(fpu->fpstate);
1560500afbf6SChang S. Bae 	fpregs_unlock();
1561500afbf6SChang S. Bae 
1562c270ce39SThomas Gleixner 	/* Only free valloc'ed state */
1563c270ce39SThomas Gleixner 	if (curfps && curfps->is_valloc)
1564500afbf6SChang S. Bae 		vfree(curfps);
1565c270ce39SThomas Gleixner 
1566500afbf6SChang S. Bae 	return 0;
1567500afbf6SChang S. Bae }
1568500afbf6SChang S. Bae 
validate_sigaltstack(unsigned int usize)1569db8268dfSChang S. Bae static int validate_sigaltstack(unsigned int usize)
1570db8268dfSChang S. Bae {
1571db8268dfSChang S. Bae 	struct task_struct *thread, *leader = current->group_leader;
1572db8268dfSChang S. Bae 	unsigned long framesize = get_sigframe_size();
1573db8268dfSChang S. Bae 
1574db8268dfSChang S. Bae 	lockdep_assert_held(&current->sighand->siglock);
1575db8268dfSChang S. Bae 
1576db8268dfSChang S. Bae 	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1577db8268dfSChang S. Bae 	framesize -= fpu_user_cfg.max_size;
1578db8268dfSChang S. Bae 	framesize += usize;
1579db8268dfSChang S. Bae 	for_each_thread(leader, thread) {
1580db8268dfSChang S. Bae 		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1581db8268dfSChang S. Bae 			return -ENOSPC;
1582db8268dfSChang S. Bae 	}
1583db8268dfSChang S. Bae 	return 0;
1584db8268dfSChang S. Bae }
1585db8268dfSChang S. Bae 
__xstate_request_perm(u64 permitted,u64 requested,bool guest)1586980fe2fdSThomas Gleixner static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1587db8268dfSChang S. Bae {
1588db8268dfSChang S. Bae 	/*
1589db8268dfSChang S. Bae 	 * This deliberately does not exclude !XSAVES as we still might
1590db8268dfSChang S. Bae 	 * decide to optionally context switch XCR0 or talk the silicon
1591500afbf6SChang S. Bae 	 * vendors into extending XFD for the pre AMX states, especially
1592500afbf6SChang S. Bae 	 * AVX512.
1593db8268dfSChang S. Bae 	 */
15948ad7e8f6SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1595db8268dfSChang S. Bae 	struct fpu *fpu = &current->group_leader->thread.fpu;
1596980fe2fdSThomas Gleixner 	struct fpu_state_perm *perm;
1597db8268dfSChang S. Bae 	unsigned int ksize, usize;
1598db8268dfSChang S. Bae 	u64 mask;
1599980fe2fdSThomas Gleixner 	int ret = 0;
1600db8268dfSChang S. Bae 
1601db8268dfSChang S. Bae 	/* Check whether fully enabled */
1602db8268dfSChang S. Bae 	if ((permitted & requested) == requested)
1603db8268dfSChang S. Bae 		return 0;
1604db8268dfSChang S. Bae 
1605db8268dfSChang S. Bae 	/* Calculate the resulting kernel state size */
1606db8268dfSChang S. Bae 	mask = permitted | requested;
1607781c64bfSThomas Gleixner 	/* Take supervisor states into account on the host */
1608781c64bfSThomas Gleixner 	if (!guest)
1609781c64bfSThomas Gleixner 		mask |= xfeatures_mask_supervisor();
1610db8268dfSChang S. Bae 	ksize = xstate_calculate_size(mask, compacted);
1611db8268dfSChang S. Bae 
1612db8268dfSChang S. Bae 	/* Calculate the resulting user state size */
1613db8268dfSChang S. Bae 	mask &= XFEATURE_MASK_USER_SUPPORTED;
1614db8268dfSChang S. Bae 	usize = xstate_calculate_size(mask, false);
1615db8268dfSChang S. Bae 
1616980fe2fdSThomas Gleixner 	if (!guest) {
1617db8268dfSChang S. Bae 		ret = validate_sigaltstack(usize);
1618db8268dfSChang S. Bae 		if (ret)
1619db8268dfSChang S. Bae 			return ret;
1620980fe2fdSThomas Gleixner 	}
1621db8268dfSChang S. Bae 
1622980fe2fdSThomas Gleixner 	perm = guest ? &fpu->guest_perm : &fpu->perm;
1623db8268dfSChang S. Bae 	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1624063452fdSYang Zhong 	WRITE_ONCE(perm->__state_perm, mask);
1625db8268dfSChang S. Bae 	/* Protected by sighand lock */
1626980fe2fdSThomas Gleixner 	perm->__state_size = ksize;
1627980fe2fdSThomas Gleixner 	perm->__user_state_size = usize;
1628db8268dfSChang S. Bae 	return ret;
1629db8268dfSChang S. Bae }
1630db8268dfSChang S. Bae 
1631db8268dfSChang S. Bae /*
1632db8268dfSChang S. Bae  * Permissions array to map facilities with more than one component
1633db8268dfSChang S. Bae  */
1634db8268dfSChang S. Bae static const u64 xstate_prctl_req[XFEATURE_MAX] = {
16352308ee57SChang S. Bae 	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1636db8268dfSChang S. Bae };
1637db8268dfSChang S. Bae 
xstate_request_perm(unsigned long idx,bool guest)1638980fe2fdSThomas Gleixner static int xstate_request_perm(unsigned long idx, bool guest)
1639db8268dfSChang S. Bae {
1640db8268dfSChang S. Bae 	u64 permitted, requested;
1641db8268dfSChang S. Bae 	int ret;
1642db8268dfSChang S. Bae 
1643db8268dfSChang S. Bae 	if (idx >= XFEATURE_MAX)
1644db8268dfSChang S. Bae 		return -EINVAL;
1645db8268dfSChang S. Bae 
1646db8268dfSChang S. Bae 	/*
1647db8268dfSChang S. Bae 	 * Look up the facility mask which can require more than
1648db8268dfSChang S. Bae 	 * one xstate component.
1649db8268dfSChang S. Bae 	 */
1650db8268dfSChang S. Bae 	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1651db8268dfSChang S. Bae 	requested = xstate_prctl_req[idx];
1652db8268dfSChang S. Bae 	if (!requested)
1653db8268dfSChang S. Bae 		return -EOPNOTSUPP;
1654db8268dfSChang S. Bae 
1655db8268dfSChang S. Bae 	if ((fpu_user_cfg.max_features & requested) != requested)
1656db8268dfSChang S. Bae 		return -EOPNOTSUPP;
1657db8268dfSChang S. Bae 
1658db8268dfSChang S. Bae 	/* Lockless quick check */
1659980fe2fdSThomas Gleixner 	permitted = xstate_get_group_perm(guest);
1660db8268dfSChang S. Bae 	if ((permitted & requested) == requested)
1661db8268dfSChang S. Bae 		return 0;
1662db8268dfSChang S. Bae 
1663db8268dfSChang S. Bae 	/* Protect against concurrent modifications */
1664db8268dfSChang S. Bae 	spin_lock_irq(&current->sighand->siglock);
1665980fe2fdSThomas Gleixner 	permitted = xstate_get_group_perm(guest);
1666980fe2fdSThomas Gleixner 
1667980fe2fdSThomas Gleixner 	/* First vCPU allocation locks the permissions. */
1668980fe2fdSThomas Gleixner 	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1669980fe2fdSThomas Gleixner 		ret = -EBUSY;
1670980fe2fdSThomas Gleixner 	else
1671980fe2fdSThomas Gleixner 		ret = __xstate_request_perm(permitted, requested, guest);
1672db8268dfSChang S. Bae 	spin_unlock_irq(&current->sighand->siglock);
1673db8268dfSChang S. Bae 	return ret;
1674db8268dfSChang S. Bae }
1675783e87b4SChang S. Bae 
__xfd_enable_feature(u64 xfd_err,struct fpu_guest * guest_fpu)1676c270ce39SThomas Gleixner int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1677783e87b4SChang S. Bae {
1678783e87b4SChang S. Bae 	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1679c270ce39SThomas Gleixner 	struct fpu_state_perm *perm;
1680783e87b4SChang S. Bae 	unsigned int ksize, usize;
1681783e87b4SChang S. Bae 	struct fpu *fpu;
1682783e87b4SChang S. Bae 
1683783e87b4SChang S. Bae 	if (!xfd_event) {
1684c270ce39SThomas Gleixner 		if (!guest_fpu)
1685783e87b4SChang S. Bae 			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1686783e87b4SChang S. Bae 		return 0;
1687783e87b4SChang S. Bae 	}
1688783e87b4SChang S. Bae 
1689783e87b4SChang S. Bae 	/* Protect against concurrent modifications */
1690783e87b4SChang S. Bae 	spin_lock_irq(&current->sighand->siglock);
1691783e87b4SChang S. Bae 
1692783e87b4SChang S. Bae 	/* If not permitted let it die */
1693c270ce39SThomas Gleixner 	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1694783e87b4SChang S. Bae 		spin_unlock_irq(&current->sighand->siglock);
1695783e87b4SChang S. Bae 		return -EPERM;
1696783e87b4SChang S. Bae 	}
1697783e87b4SChang S. Bae 
1698783e87b4SChang S. Bae 	fpu = &current->group_leader->thread.fpu;
1699c270ce39SThomas Gleixner 	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1700c270ce39SThomas Gleixner 	ksize = perm->__state_size;
1701c270ce39SThomas Gleixner 	usize = perm->__user_state_size;
1702c270ce39SThomas Gleixner 
1703783e87b4SChang S. Bae 	/*
1704783e87b4SChang S. Bae 	 * The feature is permitted. State size is sufficient.  Dropping
1705783e87b4SChang S. Bae 	 * the lock is safe here even if more features are added from
1706783e87b4SChang S. Bae 	 * another task, the retrieved buffer sizes are valid for the
1707783e87b4SChang S. Bae 	 * currently requested feature(s).
1708783e87b4SChang S. Bae 	 */
1709783e87b4SChang S. Bae 	spin_unlock_irq(&current->sighand->siglock);
1710783e87b4SChang S. Bae 
1711783e87b4SChang S. Bae 	/*
1712783e87b4SChang S. Bae 	 * Try to allocate a new fpstate. If that fails there is no way
1713783e87b4SChang S. Bae 	 * out.
1714783e87b4SChang S. Bae 	 */
1715c270ce39SThomas Gleixner 	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1716783e87b4SChang S. Bae 		return -EFAULT;
1717783e87b4SChang S. Bae 	return 0;
1718783e87b4SChang S. Bae }
1719c270ce39SThomas Gleixner 
xfd_enable_feature(u64 xfd_err)1720c270ce39SThomas Gleixner int xfd_enable_feature(u64 xfd_err)
1721c270ce39SThomas Gleixner {
1722c270ce39SThomas Gleixner 	return __xfd_enable_feature(xfd_err, NULL);
1723c270ce39SThomas Gleixner }
1724c270ce39SThomas Gleixner 
1725db8268dfSChang S. Bae #else /* CONFIG_X86_64 */
xstate_request_perm(unsigned long idx,bool guest)1726980fe2fdSThomas Gleixner static inline int xstate_request_perm(unsigned long idx, bool guest)
1727db8268dfSChang S. Bae {
1728db8268dfSChang S. Bae 	return -EPERM;
1729db8268dfSChang S. Bae }
1730db8268dfSChang S. Bae #endif  /* !CONFIG_X86_64 */
1731db8268dfSChang S. Bae 
xstate_get_guest_group_perm(void)1732c862dcd1SYang Zhong u64 xstate_get_guest_group_perm(void)
1733980fe2fdSThomas Gleixner {
1734980fe2fdSThomas Gleixner 	return xstate_get_group_perm(true);
1735980fe2fdSThomas Gleixner }
1736980fe2fdSThomas Gleixner EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1737980fe2fdSThomas Gleixner 
1738db8268dfSChang S. Bae /**
1739db8268dfSChang S. Bae  * fpu_xstate_prctl - xstate permission operations
1740db8268dfSChang S. Bae  * @tsk:	Redundant pointer to current
1741db8268dfSChang S. Bae  * @option:	A subfunction of arch_prctl()
1742db8268dfSChang S. Bae  * @arg2:	option argument
1743db8268dfSChang S. Bae  * Return:	0 if successful; otherwise, an error code
1744db8268dfSChang S. Bae  *
1745db8268dfSChang S. Bae  * Option arguments:
1746db8268dfSChang S. Bae  *
1747db8268dfSChang S. Bae  * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1748db8268dfSChang S. Bae  * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1749db8268dfSChang S. Bae  * ARCH_REQ_XCOMP_PERM: Facility number requested
1750db8268dfSChang S. Bae  *
1751db8268dfSChang S. Bae  * For facilities which require more than one XSTATE component, the request
1752db8268dfSChang S. Bae  * must be the highest state component number related to that facility,
1753db8268dfSChang S. Bae  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1754db8268dfSChang S. Bae  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1755db8268dfSChang S. Bae  */
fpu_xstate_prctl(int option,unsigned long arg2)1756f5c0b4f3SThomas Gleixner long fpu_xstate_prctl(int option, unsigned long arg2)
1757db8268dfSChang S. Bae {
1758db8268dfSChang S. Bae 	u64 __user *uptr = (u64 __user *)arg2;
1759db8268dfSChang S. Bae 	u64 permitted, supported;
1760db8268dfSChang S. Bae 	unsigned long idx = arg2;
1761980fe2fdSThomas Gleixner 	bool guest = false;
1762db8268dfSChang S. Bae 
1763db8268dfSChang S. Bae 	switch (option) {
1764db8268dfSChang S. Bae 	case ARCH_GET_XCOMP_SUPP:
1765db8268dfSChang S. Bae 		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1766db8268dfSChang S. Bae 		return put_user(supported, uptr);
1767db8268dfSChang S. Bae 
1768db8268dfSChang S. Bae 	case ARCH_GET_XCOMP_PERM:
1769db8268dfSChang S. Bae 		/*
1770db8268dfSChang S. Bae 		 * Lockless snapshot as it can also change right after the
1771db8268dfSChang S. Bae 		 * dropping the lock.
1772db8268dfSChang S. Bae 		 */
1773db8268dfSChang S. Bae 		permitted = xstate_get_host_group_perm();
1774db8268dfSChang S. Bae 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1775db8268dfSChang S. Bae 		return put_user(permitted, uptr);
1776db8268dfSChang S. Bae 
1777980fe2fdSThomas Gleixner 	case ARCH_GET_XCOMP_GUEST_PERM:
1778980fe2fdSThomas Gleixner 		permitted = xstate_get_guest_group_perm();
1779980fe2fdSThomas Gleixner 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1780980fe2fdSThomas Gleixner 		return put_user(permitted, uptr);
1781980fe2fdSThomas Gleixner 
1782980fe2fdSThomas Gleixner 	case ARCH_REQ_XCOMP_GUEST_PERM:
1783980fe2fdSThomas Gleixner 		guest = true;
1784980fe2fdSThomas Gleixner 		fallthrough;
1785980fe2fdSThomas Gleixner 
1786db8268dfSChang S. Bae 	case ARCH_REQ_XCOMP_PERM:
1787db8268dfSChang S. Bae 		if (!IS_ENABLED(CONFIG_X86_64))
1788db8268dfSChang S. Bae 			return -EOPNOTSUPP;
1789db8268dfSChang S. Bae 
1790980fe2fdSThomas Gleixner 		return xstate_request_perm(idx, guest);
1791db8268dfSChang S. Bae 
1792db8268dfSChang S. Bae 	default:
1793db8268dfSChang S. Bae 		return -EINVAL;
1794db8268dfSChang S. Bae 	}
1795db8268dfSChang S. Bae }
1796db8268dfSChang S. Bae 
17970c608dadSAubrey Li #ifdef CONFIG_PROC_PID_ARCH_STATUS
17980c608dadSAubrey Li /*
17990c608dadSAubrey Li  * Report the amount of time elapsed in millisecond since last AVX512
18000c608dadSAubrey Li  * use in the task.
18010c608dadSAubrey Li  */
avx512_status(struct seq_file * m,struct task_struct * task)18020c608dadSAubrey Li static void avx512_status(struct seq_file *m, struct task_struct *task)
18030c608dadSAubrey Li {
18040c608dadSAubrey Li 	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
18050c608dadSAubrey Li 	long delta;
18060c608dadSAubrey Li 
18070c608dadSAubrey Li 	if (!timestamp) {
18080c608dadSAubrey Li 		/*
18090c608dadSAubrey Li 		 * Report -1 if no AVX512 usage
18100c608dadSAubrey Li 		 */
18110c608dadSAubrey Li 		delta = -1;
18120c608dadSAubrey Li 	} else {
18130c608dadSAubrey Li 		delta = (long)(jiffies - timestamp);
18140c608dadSAubrey Li 		/*
18150c608dadSAubrey Li 		 * Cap to LONG_MAX if time difference > LONG_MAX
18160c608dadSAubrey Li 		 */
18170c608dadSAubrey Li 		if (delta < 0)
18180c608dadSAubrey Li 			delta = LONG_MAX;
18190c608dadSAubrey Li 		delta = jiffies_to_msecs(delta);
18200c608dadSAubrey Li 	}
18210c608dadSAubrey Li 
18220c608dadSAubrey Li 	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
18230c608dadSAubrey Li 	seq_putc(m, '\n');
18240c608dadSAubrey Li }
18250c608dadSAubrey Li 
18260c608dadSAubrey Li /*
18270c608dadSAubrey Li  * Report architecture specific information
18280c608dadSAubrey Li  */
proc_pid_arch_status(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)18290c608dadSAubrey Li int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
18300c608dadSAubrey Li 			struct pid *pid, struct task_struct *task)
18310c608dadSAubrey Li {
18320c608dadSAubrey Li 	/*
18330c608dadSAubrey Li 	 * Report AVX512 state if the processor and build option supported.
18340c608dadSAubrey Li 	 */
18350c608dadSAubrey Li 	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
18360c608dadSAubrey Li 		avx512_status(m, task);
18370c608dadSAubrey Li 
18380c608dadSAubrey Li 	return 0;
18390c608dadSAubrey Li }
18400c608dadSAubrey Li #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1841