xref: /openbmc/linux/arch/x86/kernel/fpu/xstate.c (revision ad856280)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
262784854SIngo Molnar /*
362784854SIngo Molnar  * xsave/xrstor support.
462784854SIngo Molnar  *
562784854SIngo Molnar  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
662784854SIngo Molnar  */
7ffd3e504SThomas Gleixner #include <linux/bitops.h>
862784854SIngo Molnar #include <linux/compat.h>
962784854SIngo Molnar #include <linux/cpu.h>
10e8c24d3aSDave Hansen #include <linux/mman.h>
11db8268dfSChang S. Bae #include <linux/nospec.h>
1284594296SDave Hansen #include <linux/pkeys.h>
130c608dadSAubrey Li #include <linux/seq_file.h>
140c608dadSAubrey Li #include <linux/proc_fs.h>
15500afbf6SChang S. Bae #include <linux/vmalloc.h>
1659a36d16SIngo Molnar 
1762784854SIngo Molnar #include <asm/fpu/api.h>
1859a36d16SIngo Molnar #include <asm/fpu/regset.h>
19d9d005f3SThomas Gleixner #include <asm/fpu/signal.h>
20d9d005f3SThomas Gleixner #include <asm/fpu/xcr.h>
21b992c660SIngo Molnar 
2262784854SIngo Molnar #include <asm/tlbflush.h>
23db8268dfSChang S. Bae #include <asm/prctl.h>
24db8268dfSChang S. Bae #include <asm/elf.h>
25126fe040SThomas Gleixner 
26500afbf6SChang S. Bae #include "context.h"
2796034455SThomas Gleixner #include "internal.h"
2834002571SThomas Gleixner #include "legacy.h"
29126fe040SThomas Gleixner #include "xstate.h"
3062784854SIngo Molnar 
31ffd3e504SThomas Gleixner #define for_each_extended_xfeature(bit, mask)				\
32ffd3e504SThomas Gleixner 	(bit) = FIRST_EXTENDED_XFEATURE;				\
33ffd3e504SThomas Gleixner 	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
34ffd3e504SThomas Gleixner 
351f96b1efSDave Hansen /*
361f96b1efSDave Hansen  * Although we spell it out in here, the Processor Trace
371f96b1efSDave Hansen  * xfeature is completely unused.  We use other mechanisms
381f96b1efSDave Hansen  * to save/restore PT state in Linux.
391f96b1efSDave Hansen  */
405b073430SIngo Molnar static const char *xfeature_names[] =
415b073430SIngo Molnar {
425b073430SIngo Molnar 	"x87 floating point registers"	,
435b073430SIngo Molnar 	"SSE registers"			,
445b073430SIngo Molnar 	"AVX registers"			,
455b073430SIngo Molnar 	"MPX bounds registers"		,
465b073430SIngo Molnar 	"MPX CSR"			,
475b073430SIngo Molnar 	"AVX-512 opmask"		,
485b073430SIngo Molnar 	"AVX-512 Hi256"			,
495b073430SIngo Molnar 	"AVX-512 ZMM_Hi256"		,
501f96b1efSDave Hansen 	"Processor Trace (unused)"	,
51c8df4009SDave Hansen 	"Protection Keys User registers",
52b454feb9SYu-cheng Yu 	"PASID state",
535b073430SIngo Molnar 	"unknown xstate feature"	,
54eec2113eSChang S. Bae 	"unknown xstate feature"	,
55eec2113eSChang S. Bae 	"unknown xstate feature"	,
56eec2113eSChang S. Bae 	"unknown xstate feature"	,
57eec2113eSChang S. Bae 	"unknown xstate feature"	,
58eec2113eSChang S. Bae 	"unknown xstate feature"	,
59eec2113eSChang S. Bae 	"AMX Tile config"		,
60eec2113eSChang S. Bae 	"AMX Tile data"			,
61eec2113eSChang S. Bae 	"unknown xstate feature"	,
625b073430SIngo Molnar };
635b073430SIngo Molnar 
6470c3f167SChang S. Bae static unsigned short xsave_cpuid_features[] __initdata = {
6570c3f167SChang S. Bae 	[XFEATURE_FP]				= X86_FEATURE_FPU,
6670c3f167SChang S. Bae 	[XFEATURE_SSE]				= X86_FEATURE_XMM,
6770c3f167SChang S. Bae 	[XFEATURE_YMM]				= X86_FEATURE_AVX,
6870c3f167SChang S. Bae 	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
6970c3f167SChang S. Bae 	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
7070c3f167SChang S. Bae 	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
7170c3f167SChang S. Bae 	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
7270c3f167SChang S. Bae 	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
7370c3f167SChang S. Bae 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
7470c3f167SChang S. Bae 	[XFEATURE_PKRU]				= X86_FEATURE_PKU,
7570c3f167SChang S. Bae 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
76eec2113eSChang S. Bae 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
77eec2113eSChang S. Bae 	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
78ccb18db2SAndi Kleen };
79ccb18db2SAndi Kleen 
80ce578f16SThomas Gleixner static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
81ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
82ce578f16SThomas Gleixner static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
83ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
84ce578f16SThomas Gleixner static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init =
85ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
86ce578f16SThomas Gleixner static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init =
87ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
8862784854SIngo Molnar 
8962784854SIngo Molnar /*
905b073430SIngo Molnar  * Return whether the system supports a given xfeature.
915b073430SIngo Molnar  *
925b073430SIngo Molnar  * Also return the name of the (most advanced) feature that the caller requested:
935b073430SIngo Molnar  */
945b073430SIngo Molnar int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
955b073430SIngo Molnar {
961c253ff2SThomas Gleixner 	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
975b073430SIngo Molnar 
985b073430SIngo Molnar 	if (unlikely(feature_name)) {
995b073430SIngo Molnar 		long xfeature_idx, max_idx;
1005b073430SIngo Molnar 		u64 xfeatures_print;
1015b073430SIngo Molnar 		/*
1025b073430SIngo Molnar 		 * So we use FLS here to be able to print the most advanced
1035b073430SIngo Molnar 		 * feature that was requested but is missing. So if a driver
104d91cab78SDave Hansen 		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
1055b073430SIngo Molnar 		 * missing AVX feature - this is the most informative message
1065b073430SIngo Molnar 		 * to users:
1075b073430SIngo Molnar 		 */
1085b073430SIngo Molnar 		if (xfeatures_missing)
1095b073430SIngo Molnar 			xfeatures_print = xfeatures_missing;
1105b073430SIngo Molnar 		else
1115b073430SIngo Molnar 			xfeatures_print = xfeatures_needed;
1125b073430SIngo Molnar 
1135b073430SIngo Molnar 		xfeature_idx = fls64(xfeatures_print)-1;
1145b073430SIngo Molnar 		max_idx = ARRAY_SIZE(xfeature_names)-1;
1155b073430SIngo Molnar 		xfeature_idx = min(xfeature_idx, max_idx);
1165b073430SIngo Molnar 
1175b073430SIngo Molnar 		*feature_name = xfeature_names[xfeature_idx];
1185b073430SIngo Molnar 	}
1195b073430SIngo Molnar 
1205b073430SIngo Molnar 	if (xfeatures_missing)
1215b073430SIngo Molnar 		return 0;
1225b073430SIngo Molnar 
1235b073430SIngo Molnar 	return 1;
1245b073430SIngo Molnar }
1255b073430SIngo Molnar EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
1265b073430SIngo Molnar 
127158e2ee6SYu-cheng Yu static bool xfeature_is_supervisor(int xfeature_nr)
1281499ce2dSYu-cheng Yu {
1291499ce2dSYu-cheng Yu 	/*
1308c9e6073SYu-cheng Yu 	 * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1)
1318c9e6073SYu-cheng Yu 	 * returns ECX[0] set to (1) for a supervisor state, and cleared (0)
1328c9e6073SYu-cheng Yu 	 * for a user state.
1331499ce2dSYu-cheng Yu 	 */
1341499ce2dSYu-cheng Yu 	u32 eax, ebx, ecx, edx;
1351499ce2dSYu-cheng Yu 
1361499ce2dSYu-cheng Yu 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
137158e2ee6SYu-cheng Yu 	return ecx & 1;
1381499ce2dSYu-cheng Yu }
1391499ce2dSYu-cheng Yu 
1405b073430SIngo Molnar /*
14162784854SIngo Molnar  * Enable the extended processor state save/restore feature.
14262784854SIngo Molnar  * Called once per CPU onlining.
14362784854SIngo Molnar  */
14462784854SIngo Molnar void fpu__init_cpu_xstate(void)
14562784854SIngo Molnar {
1461c253ff2SThomas Gleixner 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
14762784854SIngo Molnar 		return;
14862784854SIngo Molnar 
14962784854SIngo Molnar 	cr4_set_bits(X86_CR4_OSXSAVE);
15062784854SIngo Molnar 
15162784854SIngo Molnar 	/*
15267236547SChang S. Bae 	 * Must happen after CR4 setup and before xsetbv() to allow KVM
15367236547SChang S. Bae 	 * lazy passthrough.  Write independent of the dynamic state static
15467236547SChang S. Bae 	 * key as that does not work on the boot CPU. This also ensures
15567236547SChang S. Bae 	 * that any stale state is wiped out from XFD.
15667236547SChang S. Bae 	 */
15767236547SChang S. Bae 	if (cpu_feature_enabled(X86_FEATURE_XFD))
15867236547SChang S. Bae 		wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
15967236547SChang S. Bae 
16067236547SChang S. Bae 	/*
161524bb73bSYu-cheng Yu 	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
162524bb73bSYu-cheng Yu 	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
163524bb73bSYu-cheng Yu 	 * states can be set here.
164e6e888f9SDave Hansen 	 */
1651c253ff2SThomas Gleixner 	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
16662784854SIngo Molnar 
16762784854SIngo Molnar 	/*
16871581eefSYu-cheng Yu 	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
169e6e888f9SDave Hansen 	 */
170f0dccc9dSKan Liang 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
171f0dccc9dSKan Liang 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
17201707b66SAndy Lutomirski 				     xfeatures_mask_independent());
173f0dccc9dSKan Liang 	}
17471581eefSYu-cheng Yu }
17571581eefSYu-cheng Yu 
176524bb73bSYu-cheng Yu static bool xfeature_enabled(enum xfeature xfeature)
177e6e888f9SDave Hansen {
1781c253ff2SThomas Gleixner 	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
179e6e888f9SDave Hansen }
180e6e888f9SDave Hansen 
181e6e888f9SDave Hansen /*
18239f1acd2SIngo Molnar  * Record the offsets and sizes of various xstates contained
18339f1acd2SIngo Molnar  * in the XSAVE state memory layout.
18462784854SIngo Molnar  */
18562784854SIngo Molnar static void __init setup_xstate_features(void)
18662784854SIngo Molnar {
187ee9ae257SDave Hansen 	u32 eax, ebx, ecx, edx, i;
188d9f6e12fSIngo Molnar 	/* start at the beginning of the "extended state" */
189e6e888f9SDave Hansen 	unsigned int last_good_offset = offsetof(struct xregs_state,
190e6e888f9SDave Hansen 						 extended_state_area);
191ac73b27aSYu-cheng Yu 	/*
192ac73b27aSYu-cheng Yu 	 * The FP xstates and SSE xstates are legacy states. They are always
193ac73b27aSYu-cheng Yu 	 * in the fixed offsets in the xsave area in either compacted form
194ac73b27aSYu-cheng Yu 	 * or standard form.
195ac73b27aSYu-cheng Yu 	 */
196446e693cSCyrill Gorcunov 	xstate_offsets[XFEATURE_FP]	= 0;
197446e693cSCyrill Gorcunov 	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
198446e693cSCyrill Gorcunov 						   xmm_space);
199446e693cSCyrill Gorcunov 
200446e693cSCyrill Gorcunov 	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
201c593642cSPankaj Bharadiya 	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
202446e693cSCyrill Gorcunov 						       xmm_space);
20362784854SIngo Molnar 
2041c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
205e6e888f9SDave Hansen 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
2061499ce2dSYu-cheng Yu 
207c12e13dcSYu-cheng Yu 		xstate_sizes[i] = eax;
208c12e13dcSYu-cheng Yu 
2091499ce2dSYu-cheng Yu 		/*
210c12e13dcSYu-cheng Yu 		 * If an xfeature is supervisor state, the offset in EBX is
211c12e13dcSYu-cheng Yu 		 * invalid, leave it to -1.
2121499ce2dSYu-cheng Yu 		 */
213c12e13dcSYu-cheng Yu 		if (xfeature_is_supervisor(i))
214c12e13dcSYu-cheng Yu 			continue;
215c12e13dcSYu-cheng Yu 
216ee9ae257SDave Hansen 		xstate_offsets[i] = ebx;
2171499ce2dSYu-cheng Yu 
218e6e888f9SDave Hansen 		/*
219c12e13dcSYu-cheng Yu 		 * In our xstate size checks, we assume that the highest-numbered
220c12e13dcSYu-cheng Yu 		 * xstate feature has the highest offset in the buffer.  Ensure
221c12e13dcSYu-cheng Yu 		 * it does.
222e6e888f9SDave Hansen 		 */
223e6e888f9SDave Hansen 		WARN_ONCE(last_good_offset > xstate_offsets[i],
224e6e888f9SDave Hansen 			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
225c12e13dcSYu-cheng Yu 
226e6e888f9SDave Hansen 		last_good_offset = xstate_offsets[i];
22739f1acd2SIngo Molnar 	}
22862784854SIngo Molnar }
22962784854SIngo Molnar 
23032231879SIngo Molnar static void __init print_xstate_feature(u64 xstate_mask)
23162784854SIngo Molnar {
23233588b52SIngo Molnar 	const char *feature_name;
23362784854SIngo Molnar 
23433588b52SIngo Molnar 	if (cpu_has_xfeatures(xstate_mask, &feature_name))
235c8df4009SDave Hansen 		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
23662784854SIngo Molnar }
23762784854SIngo Molnar 
23862784854SIngo Molnar /*
23962784854SIngo Molnar  * Print out all the supported xstate features:
24062784854SIngo Molnar  */
24132231879SIngo Molnar static void __init print_xstate_features(void)
24262784854SIngo Molnar {
243d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_FP);
244d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_SSE);
245d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_YMM);
246d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_BNDREGS);
247d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_BNDCSR);
248d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_OPMASK);
249d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
250d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
251c8df4009SDave Hansen 	print_xstate_feature(XFEATURE_MASK_PKRU);
252b454feb9SYu-cheng Yu 	print_xstate_feature(XFEATURE_MASK_PASID);
253eec2113eSChang S. Bae 	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
254eec2113eSChang S. Bae 	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
25562784854SIngo Molnar }
25662784854SIngo Molnar 
25762784854SIngo Molnar /*
25803482e08SYu-cheng Yu  * This check is important because it is easy to get XSTATE_*
25903482e08SYu-cheng Yu  * confused with XSTATE_BIT_*.
26003482e08SYu-cheng Yu  */
26103482e08SYu-cheng Yu #define CHECK_XFEATURE(nr) do {		\
26203482e08SYu-cheng Yu 	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
26303482e08SYu-cheng Yu 	WARN_ON(nr >= XFEATURE_MAX);	\
26403482e08SYu-cheng Yu } while (0)
26503482e08SYu-cheng Yu 
26603482e08SYu-cheng Yu /*
26703482e08SYu-cheng Yu  * We could cache this like xstate_size[], but we only use
26803482e08SYu-cheng Yu  * it here, so it would be a waste of space.
26903482e08SYu-cheng Yu  */
27003482e08SYu-cheng Yu static int xfeature_is_aligned(int xfeature_nr)
27103482e08SYu-cheng Yu {
27203482e08SYu-cheng Yu 	u32 eax, ebx, ecx, edx;
27303482e08SYu-cheng Yu 
27403482e08SYu-cheng Yu 	CHECK_XFEATURE(xfeature_nr);
275e70b1008SYu-cheng Yu 
276e70b1008SYu-cheng Yu 	if (!xfeature_enabled(xfeature_nr)) {
277e70b1008SYu-cheng Yu 		WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
278e70b1008SYu-cheng Yu 			  xfeature_nr);
279e70b1008SYu-cheng Yu 		return 0;
280e70b1008SYu-cheng Yu 	}
281e70b1008SYu-cheng Yu 
28203482e08SYu-cheng Yu 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
28303482e08SYu-cheng Yu 	/*
28403482e08SYu-cheng Yu 	 * The value returned by ECX[1] indicates the alignment
28503482e08SYu-cheng Yu 	 * of state component 'i' when the compacted format
28603482e08SYu-cheng Yu 	 * of the extended region of an XSAVE area is used:
28703482e08SYu-cheng Yu 	 */
28803482e08SYu-cheng Yu 	return !!(ecx & 2);
28903482e08SYu-cheng Yu }
29003482e08SYu-cheng Yu 
29103482e08SYu-cheng Yu /*
29262784854SIngo Molnar  * This function sets up offsets and sizes of all extended states in
29362784854SIngo Molnar  * xsave area. This supports both standard format and compacted format
29449a91d61SYu-cheng Yu  * of the xsave area.
29562784854SIngo Molnar  */
29649a91d61SYu-cheng Yu static void __init setup_xstate_comp_offsets(void)
29762784854SIngo Molnar {
29849a91d61SYu-cheng Yu 	unsigned int next_offset;
29962784854SIngo Molnar 	int i;
30062784854SIngo Molnar 
30162784854SIngo Molnar 	/*
30262784854SIngo Molnar 	 * The FP xstates and SSE xstates are legacy states. They are always
30362784854SIngo Molnar 	 * in the fixed offsets in the xsave area in either compacted form
30462784854SIngo Molnar 	 * or standard form.
30562784854SIngo Molnar 	 */
306446e693cSCyrill Gorcunov 	xstate_comp_offsets[XFEATURE_FP] = 0;
307446e693cSCyrill Gorcunov 	xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
308446e693cSCyrill Gorcunov 						     xmm_space);
30962784854SIngo Molnar 
310ffd3e504SThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) {
3111c253ff2SThomas Gleixner 		for_each_extended_xfeature(i, fpu_kernel_cfg.max_features)
31262784854SIngo Molnar 			xstate_comp_offsets[i] = xstate_offsets[i];
31362784854SIngo Molnar 		return;
31462784854SIngo Molnar 	}
31562784854SIngo Molnar 
31649a91d61SYu-cheng Yu 	next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
31762784854SIngo Molnar 
3181c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
31903482e08SYu-cheng Yu 		if (xfeature_is_aligned(i))
32049a91d61SYu-cheng Yu 			next_offset = ALIGN(next_offset, 64);
32149a91d61SYu-cheng Yu 
32249a91d61SYu-cheng Yu 		xstate_comp_offsets[i] = next_offset;
32349a91d61SYu-cheng Yu 		next_offset += xstate_sizes[i];
32462784854SIngo Molnar 	}
32562784854SIngo Molnar }
32662784854SIngo Molnar 
32762784854SIngo Molnar /*
328eeedf153SYu-cheng Yu  * Setup offsets of a supervisor-state-only XSAVES buffer:
329eeedf153SYu-cheng Yu  *
330eeedf153SYu-cheng Yu  * The offsets stored in xstate_comp_offsets[] only work for one specific
331eeedf153SYu-cheng Yu  * value of the Requested Feature BitMap (RFBM).  In cases where a different
332eeedf153SYu-cheng Yu  * RFBM value is used, a different set of offsets is required.  This set of
333eeedf153SYu-cheng Yu  * offsets is for when RFBM=xfeatures_mask_supervisor().
334eeedf153SYu-cheng Yu  */
335eeedf153SYu-cheng Yu static void __init setup_supervisor_only_offsets(void)
336eeedf153SYu-cheng Yu {
337eeedf153SYu-cheng Yu 	unsigned int next_offset;
338eeedf153SYu-cheng Yu 	int i;
339eeedf153SYu-cheng Yu 
340eeedf153SYu-cheng Yu 	next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
341eeedf153SYu-cheng Yu 
3421c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
343ffd3e504SThomas Gleixner 		if (!xfeature_is_supervisor(i))
344eeedf153SYu-cheng Yu 			continue;
345eeedf153SYu-cheng Yu 
346eeedf153SYu-cheng Yu 		if (xfeature_is_aligned(i))
347eeedf153SYu-cheng Yu 			next_offset = ALIGN(next_offset, 64);
348eeedf153SYu-cheng Yu 
349eeedf153SYu-cheng Yu 		xstate_supervisor_only_offsets[i] = next_offset;
350eeedf153SYu-cheng Yu 		next_offset += xstate_sizes[i];
351eeedf153SYu-cheng Yu 	}
352eeedf153SYu-cheng Yu }
353eeedf153SYu-cheng Yu 
354eeedf153SYu-cheng Yu /*
355996952e0SYu-cheng Yu  * Print out xstate component offsets and sizes
356996952e0SYu-cheng Yu  */
357996952e0SYu-cheng Yu static void __init print_xstate_offset_size(void)
358996952e0SYu-cheng Yu {
359996952e0SYu-cheng Yu 	int i;
360996952e0SYu-cheng Yu 
3611c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
362996952e0SYu-cheng Yu 		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
363996952e0SYu-cheng Yu 			 i, xstate_comp_offsets[i], i, xstate_sizes[i]);
364996952e0SYu-cheng Yu 	}
365996952e0SYu-cheng Yu }
366996952e0SYu-cheng Yu 
367996952e0SYu-cheng Yu /*
368b579d0c3SThomas Gleixner  * This function is called only during boot time when x86 caps are not set
369b579d0c3SThomas Gleixner  * up and alternative can not be used yet.
370b579d0c3SThomas Gleixner  */
371b579d0c3SThomas Gleixner static __init void os_xrstor_booting(struct xregs_state *xstate)
372b579d0c3SThomas Gleixner {
373eda32f4fSThomas Gleixner 	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
374b579d0c3SThomas Gleixner 	u32 lmask = mask;
375b579d0c3SThomas Gleixner 	u32 hmask = mask >> 32;
376b579d0c3SThomas Gleixner 	int err;
377b579d0c3SThomas Gleixner 
378b579d0c3SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
379b579d0c3SThomas Gleixner 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
380b579d0c3SThomas Gleixner 	else
381b579d0c3SThomas Gleixner 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
382b579d0c3SThomas Gleixner 
383b579d0c3SThomas Gleixner 	/*
384b579d0c3SThomas Gleixner 	 * We should never fault when copying from a kernel buffer, and the FPU
385b579d0c3SThomas Gleixner 	 * state we set at boot time should be valid.
386b579d0c3SThomas Gleixner 	 */
387b579d0c3SThomas Gleixner 	WARN_ON_FPU(err);
388b579d0c3SThomas Gleixner }
389b579d0c3SThomas Gleixner 
390b579d0c3SThomas Gleixner /*
391f9dfb5e3SThomas Gleixner  * All supported features have either init state all zeros or are
392f9dfb5e3SThomas Gleixner  * handled in setup_init_fpu() individually. This is an explicit
393f9dfb5e3SThomas Gleixner  * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
394f9dfb5e3SThomas Gleixner  * newly added supported features at build time and make people
395f9dfb5e3SThomas Gleixner  * actually look at the init state for the new feature.
396f9dfb5e3SThomas Gleixner  */
397f9dfb5e3SThomas Gleixner #define XFEATURES_INIT_FPSTATE_HANDLED		\
398f9dfb5e3SThomas Gleixner 	(XFEATURE_MASK_FP |			\
399f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_SSE |			\
400f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_YMM |			\
401f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_OPMASK |			\
402f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_ZMM_Hi256 |		\
403f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_Hi16_ZMM	 |		\
404f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_PKRU |			\
405f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_BNDREGS |		\
406f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_BNDCSR |			\
4072308ee57SChang S. Bae 	 XFEATURE_MASK_PASID |			\
4082308ee57SChang S. Bae 	 XFEATURE_MASK_XTILE)
409f9dfb5e3SThomas Gleixner 
410f9dfb5e3SThomas Gleixner /*
41162784854SIngo Molnar  * setup the xstate image representing the init state
41262784854SIngo Molnar  */
41332231879SIngo Molnar static void __init setup_init_fpu_buf(void)
41462784854SIngo Molnar {
415f9dfb5e3SThomas Gleixner 	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
416f9dfb5e3SThomas Gleixner 		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
417f9dfb5e3SThomas Gleixner 		     XFEATURES_INIT_FPSTATE_HANDLED);
418f9dfb5e3SThomas Gleixner 
419d366bf7eSBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
42062784854SIngo Molnar 		return;
42162784854SIngo Molnar 
42262784854SIngo Molnar 	setup_xstate_features();
42362784854SIngo Molnar 	print_xstate_features();
42462784854SIngo Molnar 
4251c253ff2SThomas Gleixner 	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features);
42662784854SIngo Molnar 
42762784854SIngo Molnar 	/*
4287d937060SFenghua Yu 	 * Init all the features state with header.xfeatures being 0x0
42962784854SIngo Molnar 	 */
430f83ac56aSThomas Gleixner 	os_xrstor_booting(&init_fpstate.regs.xsave);
43162784854SIngo Molnar 
43262784854SIngo Molnar 	/*
433f9dfb5e3SThomas Gleixner 	 * All components are now in init state. Read the state back so
434f9dfb5e3SThomas Gleixner 	 * that init_fpstate contains all non-zero init state. This only
435f9dfb5e3SThomas Gleixner 	 * works with XSAVE, but not with XSAVEOPT and XSAVES because
436f9dfb5e3SThomas Gleixner 	 * those use the init optimization which skips writing data for
437f9dfb5e3SThomas Gleixner 	 * components in init state.
438f9dfb5e3SThomas Gleixner 	 *
439f9dfb5e3SThomas Gleixner 	 * XSAVE could be used, but that would require to reshuffle the
440f9dfb5e3SThomas Gleixner 	 * data when XSAVES is available because XSAVES uses xstate
441f9dfb5e3SThomas Gleixner 	 * compaction. But doing so is a pointless exercise because most
442f9dfb5e3SThomas Gleixner 	 * components have an all zeros init state except for the legacy
443f9dfb5e3SThomas Gleixner 	 * ones (FP and SSE). Those can be saved with FXSAVE into the
444f9dfb5e3SThomas Gleixner 	 * legacy area. Adding new features requires to ensure that init
445f9dfb5e3SThomas Gleixner 	 * state is all zeroes or if not to add the necessary handling
446f9dfb5e3SThomas Gleixner 	 * here.
44762784854SIngo Molnar 	 */
448f83ac56aSThomas Gleixner 	fxsave(&init_fpstate.regs.fxsave);
44962784854SIngo Molnar }
45062784854SIngo Molnar 
45165ac2e9bSDave Hansen static int xfeature_uncompacted_offset(int xfeature_nr)
45265ac2e9bSDave Hansen {
45365ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
45465ac2e9bSDave Hansen 
4551499ce2dSYu-cheng Yu 	/*
4561499ce2dSYu-cheng Yu 	 * Only XSAVES supports supervisor states and it uses compacted
4571499ce2dSYu-cheng Yu 	 * format. Checking a supervisor state's uncompacted offset is
4581499ce2dSYu-cheng Yu 	 * an error.
4591499ce2dSYu-cheng Yu 	 */
4608ab22804SFenghua Yu 	if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
4611499ce2dSYu-cheng Yu 		WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
4621499ce2dSYu-cheng Yu 		return -1;
4631499ce2dSYu-cheng Yu 	}
4641499ce2dSYu-cheng Yu 
46565ac2e9bSDave Hansen 	CHECK_XFEATURE(xfeature_nr);
46665ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
46765ac2e9bSDave Hansen 	return ebx;
46865ac2e9bSDave Hansen }
46965ac2e9bSDave Hansen 
470ce711ea3SKan Liang int xfeature_size(int xfeature_nr)
47165ac2e9bSDave Hansen {
47265ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
47365ac2e9bSDave Hansen 
47465ac2e9bSDave Hansen 	CHECK_XFEATURE(xfeature_nr);
47565ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
47665ac2e9bSDave Hansen 	return eax;
47765ac2e9bSDave Hansen }
47865ac2e9bSDave Hansen 
479e63e5d5cSEric Biggers /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
48049e4eb41SThomas Gleixner static int validate_user_xstate_header(const struct xstate_header *hdr,
48149e4eb41SThomas Gleixner 				       struct fpstate *fpstate)
482e63e5d5cSEric Biggers {
483e63e5d5cSEric Biggers 	/* No unknown or supervisor features may be set */
48449e4eb41SThomas Gleixner 	if (hdr->xfeatures & ~fpstate->user_xfeatures)
485e63e5d5cSEric Biggers 		return -EINVAL;
486e63e5d5cSEric Biggers 
487e63e5d5cSEric Biggers 	/* Userspace must use the uncompacted format */
488e63e5d5cSEric Biggers 	if (hdr->xcomp_bv)
489e63e5d5cSEric Biggers 		return -EINVAL;
490e63e5d5cSEric Biggers 
491e63e5d5cSEric Biggers 	/*
492e63e5d5cSEric Biggers 	 * If 'reserved' is shrunken to add a new field, make sure to validate
493e63e5d5cSEric Biggers 	 * that new field here!
494e63e5d5cSEric Biggers 	 */
495e63e5d5cSEric Biggers 	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
496e63e5d5cSEric Biggers 
497e63e5d5cSEric Biggers 	/* No reserved bits may be set */
498e63e5d5cSEric Biggers 	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
499e63e5d5cSEric Biggers 		return -EINVAL;
500e63e5d5cSEric Biggers 
501e63e5d5cSEric Biggers 	return 0;
502e63e5d5cSEric Biggers }
503e63e5d5cSEric Biggers 
50463cf05a1SThomas Gleixner static void __init __xstate_dump_leaves(void)
50565ac2e9bSDave Hansen {
50665ac2e9bSDave Hansen 	int i;
50765ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
50865ac2e9bSDave Hansen 	static int should_dump = 1;
50965ac2e9bSDave Hansen 
51065ac2e9bSDave Hansen 	if (!should_dump)
51165ac2e9bSDave Hansen 		return;
51265ac2e9bSDave Hansen 	should_dump = 0;
51365ac2e9bSDave Hansen 	/*
51465ac2e9bSDave Hansen 	 * Dump out a few leaves past the ones that we support
51565ac2e9bSDave Hansen 	 * just in case there are some goodies up there
51665ac2e9bSDave Hansen 	 */
51765ac2e9bSDave Hansen 	for (i = 0; i < XFEATURE_MAX + 10; i++) {
51865ac2e9bSDave Hansen 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
51965ac2e9bSDave Hansen 		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
52065ac2e9bSDave Hansen 			XSTATE_CPUID, i, eax, ebx, ecx, edx);
52165ac2e9bSDave Hansen 	}
52265ac2e9bSDave Hansen }
52365ac2e9bSDave Hansen 
52465ac2e9bSDave Hansen #define XSTATE_WARN_ON(x) do {							\
52565ac2e9bSDave Hansen 	if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) {	\
52665ac2e9bSDave Hansen 		__xstate_dump_leaves();						\
52765ac2e9bSDave Hansen 	}									\
52865ac2e9bSDave Hansen } while (0)
52965ac2e9bSDave Hansen 
530ef78f2a4SDave Hansen #define XCHECK_SZ(sz, nr, nr_macro, __struct) do {			\
531ef78f2a4SDave Hansen 	if ((nr == nr_macro) &&						\
532ef78f2a4SDave Hansen 	    WARN_ONCE(sz != sizeof(__struct),				\
533ef78f2a4SDave Hansen 		"%s: struct is %zu bytes, cpu state %d bytes\n",	\
534ef78f2a4SDave Hansen 		__stringify(nr_macro), sizeof(__struct), sz)) {		\
535ef78f2a4SDave Hansen 		__xstate_dump_leaves();					\
536ef78f2a4SDave Hansen 	}								\
537ef78f2a4SDave Hansen } while (0)
538ef78f2a4SDave Hansen 
539eec2113eSChang S. Bae /**
540eec2113eSChang S. Bae  * check_xtile_data_against_struct - Check tile data state size.
541eec2113eSChang S. Bae  *
542eec2113eSChang S. Bae  * Calculate the state size by multiplying the single tile size which is
543eec2113eSChang S. Bae  * recorded in a C struct, and the number of tiles that the CPU informs.
544eec2113eSChang S. Bae  * Compare the provided size with the calculation.
545eec2113eSChang S. Bae  *
546eec2113eSChang S. Bae  * @size:	The tile data state size
547eec2113eSChang S. Bae  *
548eec2113eSChang S. Bae  * Returns:	0 on success, -EINVAL on mismatch.
549eec2113eSChang S. Bae  */
550eec2113eSChang S. Bae static int __init check_xtile_data_against_struct(int size)
551eec2113eSChang S. Bae {
552eec2113eSChang S. Bae 	u32 max_palid, palid, state_size;
553eec2113eSChang S. Bae 	u32 eax, ebx, ecx, edx;
554eec2113eSChang S. Bae 	u16 max_tile;
555eec2113eSChang S. Bae 
556eec2113eSChang S. Bae 	/*
557eec2113eSChang S. Bae 	 * Check the maximum palette id:
558eec2113eSChang S. Bae 	 *   eax: the highest numbered palette subleaf.
559eec2113eSChang S. Bae 	 */
560eec2113eSChang S. Bae 	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
561eec2113eSChang S. Bae 
562eec2113eSChang S. Bae 	/*
563eec2113eSChang S. Bae 	 * Cross-check each tile size and find the maximum number of
564eec2113eSChang S. Bae 	 * supported tiles.
565eec2113eSChang S. Bae 	 */
566eec2113eSChang S. Bae 	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
567eec2113eSChang S. Bae 		u16 tile_size, max;
568eec2113eSChang S. Bae 
569eec2113eSChang S. Bae 		/*
570eec2113eSChang S. Bae 		 * Check the tile size info:
571eec2113eSChang S. Bae 		 *   eax[31:16]:  bytes per title
572eec2113eSChang S. Bae 		 *   ebx[31:16]:  the max names (or max number of tiles)
573eec2113eSChang S. Bae 		 */
574eec2113eSChang S. Bae 		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
575eec2113eSChang S. Bae 		tile_size = eax >> 16;
576eec2113eSChang S. Bae 		max = ebx >> 16;
577eec2113eSChang S. Bae 
578eec2113eSChang S. Bae 		if (tile_size != sizeof(struct xtile_data)) {
579eec2113eSChang S. Bae 			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
580eec2113eSChang S. Bae 			       __stringify(XFEATURE_XTILE_DATA),
581eec2113eSChang S. Bae 			       sizeof(struct xtile_data), tile_size);
582eec2113eSChang S. Bae 			__xstate_dump_leaves();
583eec2113eSChang S. Bae 			return -EINVAL;
584eec2113eSChang S. Bae 		}
585eec2113eSChang S. Bae 
586eec2113eSChang S. Bae 		if (max > max_tile)
587eec2113eSChang S. Bae 			max_tile = max;
588eec2113eSChang S. Bae 	}
589eec2113eSChang S. Bae 
590eec2113eSChang S. Bae 	state_size = sizeof(struct xtile_data) * max_tile;
591eec2113eSChang S. Bae 	if (size != state_size) {
592eec2113eSChang S. Bae 		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
593eec2113eSChang S. Bae 		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
594eec2113eSChang S. Bae 		__xstate_dump_leaves();
595eec2113eSChang S. Bae 		return -EINVAL;
596eec2113eSChang S. Bae 	}
597eec2113eSChang S. Bae 	return 0;
598eec2113eSChang S. Bae }
599eec2113eSChang S. Bae 
600ef78f2a4SDave Hansen /*
601ef78f2a4SDave Hansen  * We have a C struct for each 'xstate'.  We need to ensure
602ef78f2a4SDave Hansen  * that our software representation matches what the CPU
603ef78f2a4SDave Hansen  * tells us about the state's size.
604ef78f2a4SDave Hansen  */
605cd9ae761SThomas Gleixner static bool __init check_xstate_against_struct(int nr)
606ef78f2a4SDave Hansen {
607ef78f2a4SDave Hansen 	/*
608ef78f2a4SDave Hansen 	 * Ask the CPU for the size of the state.
609ef78f2a4SDave Hansen 	 */
610ef78f2a4SDave Hansen 	int sz = xfeature_size(nr);
611ef78f2a4SDave Hansen 	/*
612ef78f2a4SDave Hansen 	 * Match each CPU state with the corresponding software
613ef78f2a4SDave Hansen 	 * structure.
614ef78f2a4SDave Hansen 	 */
615ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_YMM,       struct ymmh_struct);
616ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_BNDREGS,   struct mpx_bndreg_state);
617ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_BNDCSR,    struct mpx_bndcsr_state);
618ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_OPMASK,    struct avx_512_opmask_state);
619ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
620ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
621c8df4009SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
622b454feb9SYu-cheng Yu 	XCHECK_SZ(sz, nr, XFEATURE_PASID,     struct ia32_pasid_state);
623eec2113eSChang S. Bae 	XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
624eec2113eSChang S. Bae 
625eec2113eSChang S. Bae 	/* The tile data size varies between implementations. */
626eec2113eSChang S. Bae 	if (nr == XFEATURE_XTILE_DATA)
627eec2113eSChang S. Bae 		check_xtile_data_against_struct(sz);
628ef78f2a4SDave Hansen 
629ef78f2a4SDave Hansen 	/*
630ef78f2a4SDave Hansen 	 * Make *SURE* to add any feature numbers in below if
631ef78f2a4SDave Hansen 	 * there are "holes" in the xsave state component
632ef78f2a4SDave Hansen 	 * numbers.
633ef78f2a4SDave Hansen 	 */
634ef78f2a4SDave Hansen 	if ((nr < XFEATURE_YMM) ||
6351f96b1efSDave Hansen 	    (nr >= XFEATURE_MAX) ||
636f0dccc9dSKan Liang 	    (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
637eec2113eSChang S. Bae 	    ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
638ef78f2a4SDave Hansen 		WARN_ONCE(1, "no structure for xstate: %d\n", nr);
639ef78f2a4SDave Hansen 		XSTATE_WARN_ON(1);
640cd9ae761SThomas Gleixner 		return false;
641ef78f2a4SDave Hansen 	}
642cd9ae761SThomas Gleixner 	return true;
643ef78f2a4SDave Hansen }
644ef78f2a4SDave Hansen 
64584e4dcccSChang S. Bae static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
64684e4dcccSChang S. Bae {
64784e4dcccSChang S. Bae 	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
64884e4dcccSChang S. Bae 	int i;
64984e4dcccSChang S. Bae 
65084e4dcccSChang S. Bae 	for_each_extended_xfeature(i, xfeatures) {
65184e4dcccSChang S. Bae 		/* Align from the end of the previous feature */
65284e4dcccSChang S. Bae 		if (xfeature_is_aligned(i))
65384e4dcccSChang S. Bae 			size = ALIGN(size, 64);
65484e4dcccSChang S. Bae 		/*
65584e4dcccSChang S. Bae 		 * In compacted format the enabled features are packed,
65684e4dcccSChang S. Bae 		 * i.e. disabled features do not occupy space.
65784e4dcccSChang S. Bae 		 *
65884e4dcccSChang S. Bae 		 * In non-compacted format the offsets are fixed and
65984e4dcccSChang S. Bae 		 * disabled states still occupy space in the memory buffer.
66084e4dcccSChang S. Bae 		 */
66184e4dcccSChang S. Bae 		if (!compacted)
66284e4dcccSChang S. Bae 			size = xfeature_uncompacted_offset(i);
66384e4dcccSChang S. Bae 		/*
66484e4dcccSChang S. Bae 		 * Add the feature size even for non-compacted format
66584e4dcccSChang S. Bae 		 * to make the end result correct
66684e4dcccSChang S. Bae 		 */
66784e4dcccSChang S. Bae 		size += xfeature_size(i);
66884e4dcccSChang S. Bae 	}
66984e4dcccSChang S. Bae 	return size;
67084e4dcccSChang S. Bae }
67184e4dcccSChang S. Bae 
67265ac2e9bSDave Hansen /*
67365ac2e9bSDave Hansen  * This essentially double-checks what the cpu told us about
67465ac2e9bSDave Hansen  * how large the XSAVE buffer needs to be.  We are recalculating
67565ac2e9bSDave Hansen  * it to be safe.
67676d10256SKan Liang  *
67701707b66SAndy Lutomirski  * Independent XSAVE features allocate their own buffers and are not
67876d10256SKan Liang  * covered by these checks. Only the size of the buffer for task->fpu
67976d10256SKan Liang  * is checked here.
68065ac2e9bSDave Hansen  */
681cd9ae761SThomas Gleixner static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
68265ac2e9bSDave Hansen {
683cd9ae761SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
684cd9ae761SThomas Gleixner 	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
68565ac2e9bSDave Hansen 	int i;
68665ac2e9bSDave Hansen 
6871c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
688cd9ae761SThomas Gleixner 		if (!check_xstate_against_struct(i))
689cd9ae761SThomas Gleixner 			return false;
69065ac2e9bSDave Hansen 		/*
69165ac2e9bSDave Hansen 		 * Supervisor state components can be managed only by
69202b93c0bSThomas Gleixner 		 * XSAVES.
69365ac2e9bSDave Hansen 		 */
694cd9ae761SThomas Gleixner 		if (!compacted && xfeature_is_supervisor(i)) {
695cd9ae761SThomas Gleixner 			XSTATE_WARN_ON(1);
696cd9ae761SThomas Gleixner 			return false;
697cd9ae761SThomas Gleixner 		}
69865ac2e9bSDave Hansen 	}
69984e4dcccSChang S. Bae 	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
700cd9ae761SThomas Gleixner 	XSTATE_WARN_ON(size != kernel_size);
701cd9ae761SThomas Gleixner 	return size == kernel_size;
70265ac2e9bSDave Hansen }
70365ac2e9bSDave Hansen 
70462784854SIngo Molnar /*
705524bb73bSYu-cheng Yu  * Get total size of enabled xstates in XCR0 | IA32_XSS.
70665ac2e9bSDave Hansen  *
70765ac2e9bSDave Hansen  * Note the SDM's wording here.  "sub-function 0" only enumerates
70865ac2e9bSDave Hansen  * the size of the *user* states.  If we use it to size a buffer
70965ac2e9bSDave Hansen  * that we use 'XSAVES' on, we could potentially overflow the
71065ac2e9bSDave Hansen  * buffer because 'XSAVES' saves system states too.
71162784854SIngo Molnar  */
712a1141e0bSFenghua Yu static unsigned int __init get_xsaves_size(void)
71362784854SIngo Molnar {
71462784854SIngo Molnar 	unsigned int eax, ebx, ecx, edx;
71565ac2e9bSDave Hansen 	/*
71665ac2e9bSDave Hansen 	 * - CPUID function 0DH, sub-function 1:
71765ac2e9bSDave Hansen 	 *    EBX enumerates the size (in bytes) required by
71865ac2e9bSDave Hansen 	 *    the XSAVES instruction for an XSAVE area
71965ac2e9bSDave Hansen 	 *    containing all the state components
72065ac2e9bSDave Hansen 	 *    corresponding to bits currently set in
72165ac2e9bSDave Hansen 	 *    XCR0 | IA32_XSS.
72265ac2e9bSDave Hansen 	 */
72365ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
724a1141e0bSFenghua Yu 	return ebx;
72562784854SIngo Molnar }
726a1141e0bSFenghua Yu 
72776d10256SKan Liang /*
72801707b66SAndy Lutomirski  * Get the total size of the enabled xstates without the independent supervisor
72976d10256SKan Liang  * features.
73076d10256SKan Liang  */
73101707b66SAndy Lutomirski static unsigned int __init get_xsaves_size_no_independent(void)
73276d10256SKan Liang {
73301707b66SAndy Lutomirski 	u64 mask = xfeatures_mask_independent();
73476d10256SKan Liang 	unsigned int size;
73576d10256SKan Liang 
73676d10256SKan Liang 	if (!mask)
73776d10256SKan Liang 		return get_xsaves_size();
73876d10256SKan Liang 
73901707b66SAndy Lutomirski 	/* Disable independent features. */
74076d10256SKan Liang 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
74176d10256SKan Liang 
74276d10256SKan Liang 	/*
74376d10256SKan Liang 	 * Ask the hardware what size is required of the buffer.
74476d10256SKan Liang 	 * This is the size required for the task->fpu buffer.
74576d10256SKan Liang 	 */
74676d10256SKan Liang 	size = get_xsaves_size();
74776d10256SKan Liang 
74801707b66SAndy Lutomirski 	/* Re-enable independent features so XSAVES will work on them again. */
74976d10256SKan Liang 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
75076d10256SKan Liang 
75176d10256SKan Liang 	return size;
75276d10256SKan Liang }
75376d10256SKan Liang 
754cd9ae761SThomas Gleixner static unsigned int __init get_xsave_size_user(void)
755a1141e0bSFenghua Yu {
756a1141e0bSFenghua Yu 	unsigned int eax, ebx, ecx, edx;
757a1141e0bSFenghua Yu 	/*
758a1141e0bSFenghua Yu 	 * - CPUID function 0DH, sub-function 0:
759a1141e0bSFenghua Yu 	 *    EBX enumerates the size (in bytes) required by
760a1141e0bSFenghua Yu 	 *    the XSAVE instruction for an XSAVE area
761a1141e0bSFenghua Yu 	 *    containing all the *user* state components
762a1141e0bSFenghua Yu 	 *    corresponding to bits currently set in XCR0.
763a1141e0bSFenghua Yu 	 */
764a1141e0bSFenghua Yu 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
765a1141e0bSFenghua Yu 	return ebx;
7664109ca06SDave Hansen }
7674109ca06SDave Hansen 
7684109ca06SDave Hansen /*
7694109ca06SDave Hansen  * Will the runtime-enumerated 'xstate_size' fit in the init
7704109ca06SDave Hansen  * task's statically-allocated buffer?
7714109ca06SDave Hansen  */
77263cf05a1SThomas Gleixner static bool __init is_supported_xstate_size(unsigned int test_xstate_size)
7734109ca06SDave Hansen {
774f83ac56aSThomas Gleixner 	if (test_xstate_size <= sizeof(init_fpstate.regs))
7754109ca06SDave Hansen 		return true;
7764109ca06SDave Hansen 
7774109ca06SDave Hansen 	pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n",
778f83ac56aSThomas Gleixner 			sizeof(init_fpstate.regs), test_xstate_size);
7794109ca06SDave Hansen 	return false;
7804109ca06SDave Hansen }
7814109ca06SDave Hansen 
782653a561bSSergey Senozhatsky static int __init init_xstate_size(void)
7834109ca06SDave Hansen {
7844109ca06SDave Hansen 	/* Recompute the context size for enabled features: */
7852ae996e0SChang S. Bae 	unsigned int user_size, kernel_size, kernel_default_size;
7862ae996e0SChang S. Bae 	bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
787a1141e0bSFenghua Yu 
788cd9ae761SThomas Gleixner 	/* Uncompacted user space size */
789cd9ae761SThomas Gleixner 	user_size = get_xsave_size_user();
790a1141e0bSFenghua Yu 
791cd9ae761SThomas Gleixner 	/*
792cd9ae761SThomas Gleixner 	 * XSAVES kernel size includes supervisor states and
7932ae996e0SChang S. Bae 	 * uses compacted format when available.
794cd9ae761SThomas Gleixner 	 *
795cd9ae761SThomas Gleixner 	 * XSAVE does not support supervisor states so
796cd9ae761SThomas Gleixner 	 * kernel and user size is identical.
797cd9ae761SThomas Gleixner 	 */
7982ae996e0SChang S. Bae 	if (compacted)
799cd9ae761SThomas Gleixner 		kernel_size = get_xsaves_size_no_independent();
800a1141e0bSFenghua Yu 	else
801cd9ae761SThomas Gleixner 		kernel_size = user_size;
8024109ca06SDave Hansen 
8032ae996e0SChang S. Bae 	kernel_default_size =
8042ae996e0SChang S. Bae 		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
8052ae996e0SChang S. Bae 
8062ae996e0SChang S. Bae 	/* Ensure we have the space to store all default enabled features. */
8072ae996e0SChang S. Bae 	if (!is_supported_xstate_size(kernel_default_size))
8084109ca06SDave Hansen 		return -EINVAL;
8094109ca06SDave Hansen 
810cd9ae761SThomas Gleixner 	if (!paranoid_xstate_size_valid(kernel_size))
811cd9ae761SThomas Gleixner 		return -EINVAL;
812a1141e0bSFenghua Yu 
8132bd264bcSThomas Gleixner 	fpu_kernel_cfg.max_size = kernel_size;
8142bd264bcSThomas Gleixner 	fpu_user_cfg.max_size = user_size;
8152ae996e0SChang S. Bae 
8162ae996e0SChang S. Bae 	fpu_kernel_cfg.default_size = kernel_default_size;
8172ae996e0SChang S. Bae 	fpu_user_cfg.default_size =
8182ae996e0SChang S. Bae 		xstate_calculate_size(fpu_user_cfg.default_features, false);
819cd9ae761SThomas Gleixner 
8204109ca06SDave Hansen 	return 0;
8214109ca06SDave Hansen }
8224109ca06SDave Hansen 
823d91cab78SDave Hansen /*
824d91cab78SDave Hansen  * We enabled the XSAVE hardware, but something went wrong and
825d91cab78SDave Hansen  * we can not use it.  Disable it.
826d91cab78SDave Hansen  */
8272bd264bcSThomas Gleixner static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
8284109ca06SDave Hansen {
8291c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features = 0;
8304109ca06SDave Hansen 	cr4_clear_bits(X86_CR4_OSXSAVE);
8317891bc0aSSebastian Andrzej Siewior 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
8322bd264bcSThomas Gleixner 
8332bd264bcSThomas Gleixner 	/* Restore the legacy size.*/
8342bd264bcSThomas Gleixner 	fpu_kernel_cfg.max_size = legacy_size;
8352bd264bcSThomas Gleixner 	fpu_kernel_cfg.default_size = legacy_size;
8362bd264bcSThomas Gleixner 	fpu_user_cfg.max_size = legacy_size;
8372bd264bcSThomas Gleixner 	fpu_user_cfg.default_size = legacy_size;
8382bd264bcSThomas Gleixner 
839db3e7321SChang S. Bae 	/*
840db3e7321SChang S. Bae 	 * Prevent enabling the static branch which enables writes to the
841db3e7321SChang S. Bae 	 * XFD MSR.
842db3e7321SChang S. Bae 	 */
843db3e7321SChang S. Bae 	init_fpstate.xfd = 0;
844db3e7321SChang S. Bae 
845248452ceSThomas Gleixner 	fpstate_reset(&current->thread.fpu);
84662784854SIngo Molnar }
84762784854SIngo Molnar 
84862784854SIngo Molnar /*
84962784854SIngo Molnar  * Enable and initialize the xsave feature.
85062784854SIngo Molnar  * Called once per system bootup.
85162784854SIngo Molnar  */
8522bd264bcSThomas Gleixner void __init fpu__init_system_xstate(unsigned int legacy_size)
85362784854SIngo Molnar {
85462784854SIngo Molnar 	unsigned int eax, ebx, ecx, edx;
8554e8e4313SThomas Gleixner 	u64 xfeatures;
8564109ca06SDave Hansen 	int err;
857ccb18db2SAndi Kleen 	int i;
858e97131a8SIngo Molnar 
8599170fb40SAndy Lutomirski 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
8609170fb40SAndy Lutomirski 		pr_info("x86/fpu: No FPU detected\n");
8619170fb40SAndy Lutomirski 		return;
8629170fb40SAndy Lutomirski 	}
8639170fb40SAndy Lutomirski 
864d366bf7eSBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
8659170fb40SAndy Lutomirski 		pr_info("x86/fpu: x87 FPU will use %s\n",
8669170fb40SAndy Lutomirski 			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
86762784854SIngo Molnar 		return;
86862784854SIngo Molnar 	}
86962784854SIngo Molnar 
87062784854SIngo Molnar 	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
871e97131a8SIngo Molnar 		WARN_ON_FPU(1);
87262784854SIngo Molnar 		return;
87362784854SIngo Molnar 	}
87462784854SIngo Molnar 
875524bb73bSYu-cheng Yu 	/*
876524bb73bSYu-cheng Yu 	 * Find user xstates supported by the processor.
877524bb73bSYu-cheng Yu 	 */
87862784854SIngo Molnar 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
8791c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
88062784854SIngo Molnar 
88171581eefSYu-cheng Yu 	/*
88271581eefSYu-cheng Yu 	 * Find supervisor xstates supported by the processor.
88371581eefSYu-cheng Yu 	 */
88471581eefSYu-cheng Yu 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
8851c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
88671581eefSYu-cheng Yu 
887daddee24SThomas Gleixner 	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
888ec3ed4a2SDave Hansen 		/*
889ec3ed4a2SDave Hansen 		 * This indicates that something really unexpected happened
890ec3ed4a2SDave Hansen 		 * with the enumeration.  Disable XSAVE and try to continue
891ec3ed4a2SDave Hansen 		 * booting without it.  This is too early to BUG().
892ec3ed4a2SDave Hansen 		 */
893524bb73bSYu-cheng Yu 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
8941c253ff2SThomas Gleixner 		       fpu_kernel_cfg.max_features);
895ec3ed4a2SDave Hansen 		goto out_disable;
89662784854SIngo Molnar 	}
89762784854SIngo Molnar 
898ccb18db2SAndi Kleen 	/*
899ccb18db2SAndi Kleen 	 * Clear XSAVE features that are disabled in the normal CPUID.
900ccb18db2SAndi Kleen 	 */
901ccb18db2SAndi Kleen 	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
90270c3f167SChang S. Bae 		unsigned short cid = xsave_cpuid_features[i];
90370c3f167SChang S. Bae 
90470c3f167SChang S. Bae 		/* Careful: X86_FEATURE_FPU is 0! */
90570c3f167SChang S. Bae 		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
9061c253ff2SThomas Gleixner 			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
907ccb18db2SAndi Kleen 	}
908ccb18db2SAndi Kleen 
9092ae996e0SChang S. Bae 	if (!cpu_feature_enabled(X86_FEATURE_XFD))
9102ae996e0SChang S. Bae 		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
9112ae996e0SChang S. Bae 
9121c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
913ce38f038SThomas Gleixner 			      XFEATURE_MASK_SUPERVISOR_SUPPORTED;
914ce38f038SThomas Gleixner 
9151c253ff2SThomas Gleixner 	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
9161c253ff2SThomas Gleixner 	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
9171c253ff2SThomas Gleixner 
9182ae996e0SChang S. Bae 	/* Clean out dynamic features from default */
9191c253ff2SThomas Gleixner 	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
9202ae996e0SChang S. Bae 	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
9212ae996e0SChang S. Bae 
9221c253ff2SThomas Gleixner 	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
9232ae996e0SChang S. Bae 	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
9241c253ff2SThomas Gleixner 
9254e8e4313SThomas Gleixner 	/* Store it for paranoia check at the end */
9261c253ff2SThomas Gleixner 	xfeatures = fpu_kernel_cfg.max_features;
92762784854SIngo Molnar 
928db3e7321SChang S. Bae 	/*
929db3e7321SChang S. Bae 	 * Initialize the default XFD state in initfp_state and enable the
930db3e7321SChang S. Bae 	 * dynamic sizing mechanism if dynamic states are available.  The
931db3e7321SChang S. Bae 	 * static key cannot be enabled here because this runs before
932db3e7321SChang S. Bae 	 * jump_label_init(). This is delayed to an initcall.
933db3e7321SChang S. Bae 	 */
934db3e7321SChang S. Bae 	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
935db3e7321SChang S. Bae 
93662784854SIngo Molnar 	/* Enable xstate instructions to be able to continue with initialization: */
93762784854SIngo Molnar 	fpu__init_cpu_xstate();
9384109ca06SDave Hansen 	err = init_xstate_size();
939ec3ed4a2SDave Hansen 	if (err)
940ec3ed4a2SDave Hansen 		goto out_disable;
94162784854SIngo Molnar 
9422ae996e0SChang S. Bae 	/* Reset the state for the current task */
943248452ceSThomas Gleixner 	fpstate_reset(&current->thread.fpu);
944248452ceSThomas Gleixner 
94591c3dba7SYu-cheng Yu 	/*
94691c3dba7SYu-cheng Yu 	 * Update info used for ptrace frames; use standard-format size and no
94791c3dba7SYu-cheng Yu 	 * supervisor xstates:
94891c3dba7SYu-cheng Yu 	 */
9492bd264bcSThomas Gleixner 	update_regset_xstate_info(fpu_user_cfg.max_size,
950daddee24SThomas Gleixner 				  fpu_user_cfg.max_features);
95191c3dba7SYu-cheng Yu 
95262784854SIngo Molnar 	setup_init_fpu_buf();
95349a91d61SYu-cheng Yu 	setup_xstate_comp_offsets();
954eeedf153SYu-cheng Yu 	setup_supervisor_only_offsets();
95562784854SIngo Molnar 
9564e8e4313SThomas Gleixner 	/*
9574e8e4313SThomas Gleixner 	 * Paranoia check whether something in the setup modified the
9584e8e4313SThomas Gleixner 	 * xfeatures mask.
9594e8e4313SThomas Gleixner 	 */
9601c253ff2SThomas Gleixner 	if (xfeatures != fpu_kernel_cfg.max_features) {
9614e8e4313SThomas Gleixner 		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
9621c253ff2SThomas Gleixner 		       xfeatures, fpu_kernel_cfg.max_features);
9634e8e4313SThomas Gleixner 		goto out_disable;
9644e8e4313SThomas Gleixner 	}
9654e8e4313SThomas Gleixner 
9664e8e4313SThomas Gleixner 	print_xstate_offset_size();
967b0815359SDave Hansen 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
9681c253ff2SThomas Gleixner 		fpu_kernel_cfg.max_features,
9692bd264bcSThomas Gleixner 		fpu_kernel_cfg.max_size,
970782511b0SBorislav Petkov 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
971ec3ed4a2SDave Hansen 	return;
972ec3ed4a2SDave Hansen 
973ec3ed4a2SDave Hansen out_disable:
974ec3ed4a2SDave Hansen 	/* something went wrong, try to boot without any XSAVE support */
9752bd264bcSThomas Gleixner 	fpu__init_disable_system_xstate(legacy_size);
97662784854SIngo Molnar }
97762784854SIngo Molnar 
97862784854SIngo Molnar /*
97962784854SIngo Molnar  * Restore minimal FPU state after suspend:
98062784854SIngo Molnar  */
98162784854SIngo Molnar void fpu__resume_cpu(void)
98262784854SIngo Molnar {
98362784854SIngo Molnar 	/*
98462784854SIngo Molnar 	 * Restore XCR0 on xsave capable CPUs:
98562784854SIngo Molnar 	 */
98665e95210SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
987daddee24SThomas Gleixner 		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
98871581eefSYu-cheng Yu 
98971581eefSYu-cheng Yu 	/*
99071581eefSYu-cheng Yu 	 * Restore IA32_XSS. The same CPUID bit enumerates support
99171581eefSYu-cheng Yu 	 * of XSAVES and MSR_IA32_XSS.
99271581eefSYu-cheng Yu 	 */
99365e95210SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
994f0dccc9dSKan Liang 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
99501707b66SAndy Lutomirski 				     xfeatures_mask_independent());
996f0dccc9dSKan Liang 	}
99767236547SChang S. Bae 
99867236547SChang S. Bae 	if (fpu_state_size_dynamic())
99967236547SChang S. Bae 		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
100062784854SIngo Molnar }
100162784854SIngo Molnar 
100262784854SIngo Molnar /*
100307baeb04SSebastian Andrzej Siewior  * Given an xstate feature nr, calculate where in the xsave
1004b8b9b6baSDave Hansen  * buffer the state is.  Callers should ensure that the buffer
1005b8b9b6baSDave Hansen  * is valid.
1006b8b9b6baSDave Hansen  */
100707baeb04SSebastian Andrzej Siewior static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
1008b8b9b6baSDave Hansen {
100907baeb04SSebastian Andrzej Siewior 	if (!xfeature_enabled(xfeature_nr)) {
10105060b915SYu-cheng Yu 		WARN_ON_FPU(1);
10115060b915SYu-cheng Yu 		return NULL;
10125060b915SYu-cheng Yu 	}
10135060b915SYu-cheng Yu 
101407baeb04SSebastian Andrzej Siewior 	return (void *)xsave + xstate_comp_offsets[xfeature_nr];
1015b8b9b6baSDave Hansen }
1016b8b9b6baSDave Hansen /*
101762784854SIngo Molnar  * Given the xsave area and a state inside, this function returns the
101862784854SIngo Molnar  * address of the state.
101962784854SIngo Molnar  *
102062784854SIngo Molnar  * This is the API that is called to get xstate address in either
102162784854SIngo Molnar  * standard format or compacted format of xsave area.
102262784854SIngo Molnar  *
10230c4109beSDave Hansen  * Note that if there is no data for the field in the xsave buffer
10240c4109beSDave Hansen  * this will return NULL.
10250c4109beSDave Hansen  *
102662784854SIngo Molnar  * Inputs:
10270c4109beSDave Hansen  *	xstate: the thread's storage area for all FPU data
1028abd16d68SSebastian Andrzej Siewior  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
1029abd16d68SSebastian Andrzej Siewior  *	XFEATURE_SSE, etc...)
103062784854SIngo Molnar  * Output:
10310c4109beSDave Hansen  *	address of the state in the xsave area, or NULL if the
10320c4109beSDave Hansen  *	field is not present in the xsave buffer.
103362784854SIngo Molnar  */
1034abd16d68SSebastian Andrzej Siewior void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
103562784854SIngo Molnar {
10360c4109beSDave Hansen 	/*
10370c4109beSDave Hansen 	 * Do we even *have* xsave state?
10380c4109beSDave Hansen 	 */
10390c4109beSDave Hansen 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
104062784854SIngo Molnar 		return NULL;
104162784854SIngo Molnar 
10420c4109beSDave Hansen 	/*
10430c4109beSDave Hansen 	 * We should not ever be requesting features that we
1044524bb73bSYu-cheng Yu 	 * have not enabled.
10450c4109beSDave Hansen 	 */
10461c253ff2SThomas Gleixner 	WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)),
10470c4109beSDave Hansen 		  "get of unsupported state");
10480c4109beSDave Hansen 	/*
10490c4109beSDave Hansen 	 * This assumes the last 'xsave*' instruction to
1050abd16d68SSebastian Andrzej Siewior 	 * have requested that 'xfeature_nr' be saved.
10510c4109beSDave Hansen 	 * If it did not, we might be seeing and old value
10520c4109beSDave Hansen 	 * of the field in the buffer.
10530c4109beSDave Hansen 	 *
10540c4109beSDave Hansen 	 * This can happen because the last 'xsave' did not
10550c4109beSDave Hansen 	 * request that this feature be saved (unlikely)
10560c4109beSDave Hansen 	 * or because the "init optimization" caused it
10570c4109beSDave Hansen 	 * to not be saved.
10580c4109beSDave Hansen 	 */
1059abd16d68SSebastian Andrzej Siewior 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
10600c4109beSDave Hansen 		return NULL;
10610c4109beSDave Hansen 
106207baeb04SSebastian Andrzej Siewior 	return __raw_xsave_addr(xsave, xfeature_nr);
106362784854SIngo Molnar }
106404cd027bSDave Hansen 
1065e8c24d3aSDave Hansen #ifdef CONFIG_ARCH_HAS_PKEYS
1066e8c24d3aSDave Hansen 
106784594296SDave Hansen /*
1068b79daf85SDave Hansen  * This will go out and modify PKRU register to set the access
1069b79daf85SDave Hansen  * rights for @pkey to @init_val.
107084594296SDave Hansen  */
107184594296SDave Hansen int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
107284594296SDave Hansen 			      unsigned long init_val)
107384594296SDave Hansen {
10749fe8a6f5SThomas Gleixner 	u32 old_pkru, new_pkru_bits = 0;
10759fe8a6f5SThomas Gleixner 	int pkey_shift;
107684594296SDave Hansen 
107784594296SDave Hansen 	/*
107884594296SDave Hansen 	 * This check implies XSAVE support.  OSPKE only gets
107984594296SDave Hansen 	 * set if we enable XSAVE and we enable PKU in XCR0.
108084594296SDave Hansen 	 */
10818a1dc55aSThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
108284594296SDave Hansen 		return -EINVAL;
108384594296SDave Hansen 
108416171bffSDave Hansen 	/*
108516171bffSDave Hansen 	 * This code should only be called with valid 'pkey'
108616171bffSDave Hansen 	 * values originating from in-kernel users.  Complain
108716171bffSDave Hansen 	 * if a bad value is observed.
108816171bffSDave Hansen 	 */
10899fe8a6f5SThomas Gleixner 	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
10909fe8a6f5SThomas Gleixner 		return -EINVAL;
109116171bffSDave Hansen 
109291c3dba7SYu-cheng Yu 	/* Set the bits we need in PKRU:  */
109384594296SDave Hansen 	if (init_val & PKEY_DISABLE_ACCESS)
109484594296SDave Hansen 		new_pkru_bits |= PKRU_AD_BIT;
109584594296SDave Hansen 	if (init_val & PKEY_DISABLE_WRITE)
109684594296SDave Hansen 		new_pkru_bits |= PKRU_WD_BIT;
109784594296SDave Hansen 
109891c3dba7SYu-cheng Yu 	/* Shift the bits in to the correct place in PKRU for pkey: */
10999fe8a6f5SThomas Gleixner 	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
110084594296SDave Hansen 	new_pkru_bits <<= pkey_shift;
110184594296SDave Hansen 
1102b79daf85SDave Hansen 	/* Get old PKRU and mask off any old bits in place: */
1103b79daf85SDave Hansen 	old_pkru = read_pkru();
1104b79daf85SDave Hansen 	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
110584594296SDave Hansen 
1106b79daf85SDave Hansen 	/* Write old part along with new part: */
1107b79daf85SDave Hansen 	write_pkru(old_pkru | new_pkru_bits);
110891c3dba7SYu-cheng Yu 
110991c3dba7SYu-cheng Yu 	return 0;
111091c3dba7SYu-cheng Yu }
1111e8c24d3aSDave Hansen #endif /* ! CONFIG_ARCH_HAS_PKEYS */
111291c3dba7SYu-cheng Yu 
111396258950SThomas Gleixner static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
111496258950SThomas Gleixner 			 void *init_xstate, unsigned int size)
1115f0d4f30aSIngo Molnar {
111696258950SThomas Gleixner 	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1117f0d4f30aSIngo Molnar }
1118f0d4f30aSIngo Molnar 
1119eb6f5172SThomas Gleixner /**
1120ca834defSThomas Gleixner  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1121eb6f5172SThomas Gleixner  * @to:		membuf descriptor
11223ac8d757SThomas Gleixner  * @fpstate:	The fpstate buffer from which to copy
1123ca834defSThomas Gleixner  * @pkru_val:	The PKRU value to store in the PKRU component
1124eb6f5172SThomas Gleixner  * @copy_mode:	The requested copy mode
1125f0d4f30aSIngo Molnar  *
1126eb6f5172SThomas Gleixner  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1127eb6f5172SThomas Gleixner  * format, i.e. from the kernel internal hardware dependent storage format
1128eb6f5172SThomas Gleixner  * to the requested @mode. UABI XSTATE is always uncompacted!
1129eb6f5172SThomas Gleixner  *
1130eb6f5172SThomas Gleixner  * It supports partial copy but @to.pos always starts from zero.
1131f0d4f30aSIngo Molnar  */
11323ac8d757SThomas Gleixner void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1133ca834defSThomas Gleixner 			       u32 pkru_val, enum xstate_copy_mode copy_mode)
1134f0d4f30aSIngo Molnar {
113596258950SThomas Gleixner 	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1136f83ac56aSThomas Gleixner 	struct xregs_state *xinit = &init_fpstate.regs.xsave;
11373ac8d757SThomas Gleixner 	struct xregs_state *xsave = &fpstate->regs.xsave;
1138f0d4f30aSIngo Molnar 	struct xstate_header header;
113996258950SThomas Gleixner 	unsigned int zerofrom;
1140ffd3e504SThomas Gleixner 	u64 mask;
11418c0817f4SIngo Molnar 	int i;
1142f0d4f30aSIngo Molnar 
114393c2cdc9SThomas Gleixner 	memset(&header, 0, sizeof(header));
1144f0d4f30aSIngo Molnar 	header.xfeatures = xsave->header.xfeatures;
1145eb6f5172SThomas Gleixner 
1146eb6f5172SThomas Gleixner 	/* Mask out the feature bits depending on copy mode */
1147eb6f5172SThomas Gleixner 	switch (copy_mode) {
1148eb6f5172SThomas Gleixner 	case XSTATE_COPY_FP:
1149eb6f5172SThomas Gleixner 		header.xfeatures &= XFEATURE_MASK_FP;
1150eb6f5172SThomas Gleixner 		break;
1151eb6f5172SThomas Gleixner 
1152eb6f5172SThomas Gleixner 	case XSTATE_COPY_FX:
1153eb6f5172SThomas Gleixner 		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1154eb6f5172SThomas Gleixner 		break;
1155eb6f5172SThomas Gleixner 
1156eb6f5172SThomas Gleixner 	case XSTATE_COPY_XSAVE:
11573ac8d757SThomas Gleixner 		header.xfeatures &= fpstate->user_xfeatures;
1158eb6f5172SThomas Gleixner 		break;
1159eb6f5172SThomas Gleixner 	}
1160f0d4f30aSIngo Molnar 
116196258950SThomas Gleixner 	/* Copy FP state up to MXCSR */
116296258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
116396258950SThomas Gleixner 		     &xinit->i387, off_mxcsr);
116496258950SThomas Gleixner 
116596258950SThomas Gleixner 	/* Copy MXCSR when SSE or YMM are set in the feature mask */
116696258950SThomas Gleixner 	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
116796258950SThomas Gleixner 		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
116896258950SThomas Gleixner 		     MXCSR_AND_FLAGS_SIZE);
116996258950SThomas Gleixner 
117096258950SThomas Gleixner 	/* Copy the remaining FP state */
117196258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
117296258950SThomas Gleixner 		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
117396258950SThomas Gleixner 		     sizeof(xsave->i387.st_space));
117496258950SThomas Gleixner 
117596258950SThomas Gleixner 	/* Copy the SSE state - shared with YMM, but independently managed */
117696258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
117796258950SThomas Gleixner 		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
117896258950SThomas Gleixner 		     sizeof(xsave->i387.xmm_space));
117996258950SThomas Gleixner 
1180eb6f5172SThomas Gleixner 	if (copy_mode != XSTATE_COPY_XSAVE)
1181eb6f5172SThomas Gleixner 		goto out;
1182eb6f5172SThomas Gleixner 
118396258950SThomas Gleixner 	/* Zero the padding area */
118496258950SThomas Gleixner 	membuf_zero(&to, sizeof(xsave->i387.padding));
118596258950SThomas Gleixner 
118696258950SThomas Gleixner 	/* Copy xsave->i387.sw_reserved */
118796258950SThomas Gleixner 	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
118896258950SThomas Gleixner 
118996258950SThomas Gleixner 	/* Copy the user space relevant state of @xsave->header */
119096258950SThomas Gleixner 	membuf_write(&to, &header, sizeof(header));
119196258950SThomas Gleixner 
119296258950SThomas Gleixner 	zerofrom = offsetof(struct xregs_state, extended_state_area);
1193f0d4f30aSIngo Molnar 
1194f0d4f30aSIngo Molnar 	/*
1195ffd3e504SThomas Gleixner 	 * The ptrace buffer is in non-compacted XSAVE format.  In
1196ffd3e504SThomas Gleixner 	 * non-compacted format disabled features still occupy state space,
1197ffd3e504SThomas Gleixner 	 * but there is no state to copy from in the compacted
1198ffd3e504SThomas Gleixner 	 * init_fpstate. The gap tracking will zero these states.
1199f0d4f30aSIngo Molnar 	 */
12003ac8d757SThomas Gleixner 	mask = fpstate->user_xfeatures;
1201f0d4f30aSIngo Molnar 
1202ffd3e504SThomas Gleixner 	for_each_extended_xfeature(i, mask) {
120396258950SThomas Gleixner 		/*
120496258950SThomas Gleixner 		 * If there was a feature or alignment gap, zero the space
120596258950SThomas Gleixner 		 * in the destination buffer.
120696258950SThomas Gleixner 		 */
120796258950SThomas Gleixner 		if (zerofrom < xstate_offsets[i])
120896258950SThomas Gleixner 			membuf_zero(&to, xstate_offsets[i] - zerofrom);
120996258950SThomas Gleixner 
1210e84ba47eSDave Hansen 		if (i == XFEATURE_PKRU) {
1211e84ba47eSDave Hansen 			struct pkru_state pkru = {0};
1212e84ba47eSDave Hansen 			/*
1213e84ba47eSDave Hansen 			 * PKRU is not necessarily up to date in the
1214ca834defSThomas Gleixner 			 * XSAVE buffer. Use the provided value.
1215e84ba47eSDave Hansen 			 */
1216ca834defSThomas Gleixner 			pkru.pkru = pkru_val;
1217e84ba47eSDave Hansen 			membuf_write(&to, &pkru, sizeof(pkru));
1218e84ba47eSDave Hansen 		} else {
121996258950SThomas Gleixner 			copy_feature(header.xfeatures & BIT_ULL(i), &to,
122096258950SThomas Gleixner 				     __raw_xsave_addr(xsave, i),
122196258950SThomas Gleixner 				     __raw_xsave_addr(xinit, i),
122296258950SThomas Gleixner 				     xstate_sizes[i]);
1223e84ba47eSDave Hansen 		}
122496258950SThomas Gleixner 		/*
122596258950SThomas Gleixner 		 * Keep track of the last copied state in the non-compacted
122696258950SThomas Gleixner 		 * target buffer for gap zeroing.
122796258950SThomas Gleixner 		 */
122896258950SThomas Gleixner 		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1229f0d4f30aSIngo Molnar 	}
1230f0d4f30aSIngo Molnar 
1231eb6f5172SThomas Gleixner out:
123296258950SThomas Gleixner 	if (to.left)
123396258950SThomas Gleixner 		membuf_zero(&to, to.left);
123491c3dba7SYu-cheng Yu }
123591c3dba7SYu-cheng Yu 
1236ca834defSThomas Gleixner /**
1237ca834defSThomas Gleixner  * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1238ca834defSThomas Gleixner  * @to:		membuf descriptor
1239ca834defSThomas Gleixner  * @tsk:	The task from which to copy the saved xstate
1240ca834defSThomas Gleixner  * @copy_mode:	The requested copy mode
1241ca834defSThomas Gleixner  *
1242ca834defSThomas Gleixner  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1243ca834defSThomas Gleixner  * format, i.e. from the kernel internal hardware dependent storage format
1244ca834defSThomas Gleixner  * to the requested @mode. UABI XSTATE is always uncompacted!
1245ca834defSThomas Gleixner  *
1246ca834defSThomas Gleixner  * It supports partial copy but @to.pos always starts from zero.
1247ca834defSThomas Gleixner  */
1248ca834defSThomas Gleixner void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1249ca834defSThomas Gleixner 			     enum xstate_copy_mode copy_mode)
1250ca834defSThomas Gleixner {
12513ac8d757SThomas Gleixner 	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1252ca834defSThomas Gleixner 				  tsk->thread.pkru, copy_mode);
1253ca834defSThomas Gleixner }
1254ca834defSThomas Gleixner 
1255522e9274SThomas Gleixner static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1256522e9274SThomas Gleixner 			    const void *kbuf, const void __user *ubuf)
1257947f4947SThomas Gleixner {
1258522e9274SThomas Gleixner 	if (kbuf) {
1259522e9274SThomas Gleixner 		memcpy(dst, kbuf + offset, size);
1260522e9274SThomas Gleixner 	} else {
1261522e9274SThomas Gleixner 		if (copy_from_user(dst, ubuf + offset, size))
1262522e9274SThomas Gleixner 			return -EFAULT;
1263947f4947SThomas Gleixner 	}
1264522e9274SThomas Gleixner 	return 0;
1265947f4947SThomas Gleixner }
1266947f4947SThomas Gleixner 
1267522e9274SThomas Gleixner 
126849e4eb41SThomas Gleixner static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1269522e9274SThomas Gleixner 			       const void __user *ubuf)
127079fecc2bSIngo Molnar {
127149e4eb41SThomas Gleixner 	struct xregs_state *xsave = &fpstate->regs.xsave;
127279fecc2bSIngo Molnar 	unsigned int offset, size;
127380d8ae86SEric Biggers 	struct xstate_header hdr;
1274522e9274SThomas Gleixner 	u64 mask;
1275522e9274SThomas Gleixner 	int i;
127679fecc2bSIngo Molnar 
127779fecc2bSIngo Molnar 	offset = offsetof(struct xregs_state, header);
1278522e9274SThomas Gleixner 	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1279522e9274SThomas Gleixner 		return -EFAULT;
128079fecc2bSIngo Molnar 
128149e4eb41SThomas Gleixner 	if (validate_user_xstate_header(&hdr, fpstate))
128279fecc2bSIngo Molnar 		return -EINVAL;
128379fecc2bSIngo Molnar 
1284522e9274SThomas Gleixner 	/* Validate MXCSR when any of the related features is in use */
1285522e9274SThomas Gleixner 	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1286522e9274SThomas Gleixner 	if (hdr.xfeatures & mask) {
1287522e9274SThomas Gleixner 		u32 mxcsr[2];
1288522e9274SThomas Gleixner 
1289522e9274SThomas Gleixner 		offset = offsetof(struct fxregs_state, mxcsr);
1290522e9274SThomas Gleixner 		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1291522e9274SThomas Gleixner 			return -EFAULT;
1292522e9274SThomas Gleixner 
1293522e9274SThomas Gleixner 		/* Reserved bits in MXCSR must be zero. */
1294522e9274SThomas Gleixner 		if (mxcsr[0] & ~mxcsr_feature_mask)
1295947f4947SThomas Gleixner 			return -EINVAL;
1296947f4947SThomas Gleixner 
1297522e9274SThomas Gleixner 		/* SSE and YMM require MXCSR even when FP is not in use. */
1298522e9274SThomas Gleixner 		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1299522e9274SThomas Gleixner 			xsave->i387.mxcsr = mxcsr[0];
1300522e9274SThomas Gleixner 			xsave->i387.mxcsr_mask = mxcsr[1];
1301522e9274SThomas Gleixner 		}
1302522e9274SThomas Gleixner 	}
1303522e9274SThomas Gleixner 
130479fecc2bSIngo Molnar 	for (i = 0; i < XFEATURE_MAX; i++) {
130579fecc2bSIngo Molnar 		u64 mask = ((u64)1 << i);
130679fecc2bSIngo Molnar 
1307b89eda48SEric Biggers 		if (hdr.xfeatures & mask) {
130807baeb04SSebastian Andrzej Siewior 			void *dst = __raw_xsave_addr(xsave, i);
130979fecc2bSIngo Molnar 
131079fecc2bSIngo Molnar 			offset = xstate_offsets[i];
131179fecc2bSIngo Molnar 			size = xstate_sizes[i];
131279fecc2bSIngo Molnar 
1313522e9274SThomas Gleixner 			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1314522e9274SThomas Gleixner 				return -EFAULT;
131579fecc2bSIngo Molnar 		}
131679fecc2bSIngo Molnar 	}
131779fecc2bSIngo Molnar 
131879fecc2bSIngo Molnar 	/*
131979fecc2bSIngo Molnar 	 * The state that came in from userspace was user-state only.
132079fecc2bSIngo Molnar 	 * Mask all the user states out of 'xfeatures':
132179fecc2bSIngo Molnar 	 */
13228ab22804SFenghua Yu 	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
132379fecc2bSIngo Molnar 
132479fecc2bSIngo Molnar 	/*
132579fecc2bSIngo Molnar 	 * Add back in the features that came in from userspace:
132679fecc2bSIngo Molnar 	 */
1327b89eda48SEric Biggers 	xsave->header.xfeatures |= hdr.xfeatures;
132879fecc2bSIngo Molnar 
132979fecc2bSIngo Molnar 	return 0;
133079fecc2bSIngo Molnar }
133179fecc2bSIngo Molnar 
133279fecc2bSIngo Molnar /*
1333522e9274SThomas Gleixner  * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1334ea4d6938SThomas Gleixner  * format and copy to the target thread. Used by ptrace and KVM.
1335522e9274SThomas Gleixner  */
133649e4eb41SThomas Gleixner int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
1337522e9274SThomas Gleixner {
133849e4eb41SThomas Gleixner 	return copy_uabi_to_xstate(fpstate, kbuf, NULL);
1339522e9274SThomas Gleixner }
1340522e9274SThomas Gleixner 
1341522e9274SThomas Gleixner /*
134243be46e8SThomas Gleixner  * Convert from a sigreturn standard-format user-space buffer to kernel
134343be46e8SThomas Gleixner  * XSAVE[S] format and copy to the target thread. This is called from the
134443be46e8SThomas Gleixner  * sigreturn() and rt_sigreturn() system calls.
134591c3dba7SYu-cheng Yu  */
134649e4eb41SThomas Gleixner int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
13471cc34413SThomas Gleixner 				      const void __user *ubuf)
134891c3dba7SYu-cheng Yu {
134949e4eb41SThomas Gleixner 	return copy_uabi_to_xstate(fpstate, NULL, ubuf);
135084594296SDave Hansen }
13510c608dadSAubrey Li 
1352f5daf836SThomas Gleixner static bool validate_independent_components(u64 mask)
135350f408d9SKan Liang {
1354a75c5289SThomas Gleixner 	u64 xchk;
135550f408d9SKan Liang 
1356a75c5289SThomas Gleixner 	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1357a75c5289SThomas Gleixner 		return false;
1358f5daf836SThomas Gleixner 
1359a75c5289SThomas Gleixner 	xchk = ~xfeatures_mask_independent();
136050f408d9SKan Liang 
1361a75c5289SThomas Gleixner 	if (WARN_ON_ONCE(!mask || mask & xchk))
1362a75c5289SThomas Gleixner 		return false;
136350f408d9SKan Liang 
1364a75c5289SThomas Gleixner 	return true;
136550f408d9SKan Liang }
136650f408d9SKan Liang 
136750f408d9SKan Liang /**
1368a75c5289SThomas Gleixner  * xsaves - Save selected components to a kernel xstate buffer
1369a75c5289SThomas Gleixner  * @xstate:	Pointer to the buffer
1370a75c5289SThomas Gleixner  * @mask:	Feature mask to select the components to save
137150f408d9SKan Liang  *
1372a75c5289SThomas Gleixner  * The @xstate buffer must be 64 byte aligned and correctly initialized as
1373a75c5289SThomas Gleixner  * XSAVES does not write the full xstate header. Before first use the
1374a75c5289SThomas Gleixner  * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1375a75c5289SThomas Gleixner  * can #GP.
137650f408d9SKan Liang  *
1377f5daf836SThomas Gleixner  * The feature mask must be a subset of the independent features.
137850f408d9SKan Liang  */
1379a75c5289SThomas Gleixner void xsaves(struct xregs_state *xstate, u64 mask)
138050f408d9SKan Liang {
138150f408d9SKan Liang 	int err;
138250f408d9SKan Liang 
1383f5daf836SThomas Gleixner 	if (!validate_independent_components(mask))
138450f408d9SKan Liang 		return;
138550f408d9SKan Liang 
1386a75c5289SThomas Gleixner 	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1387a75c5289SThomas Gleixner 	WARN_ON_ONCE(err);
1388a75c5289SThomas Gleixner }
1389a75c5289SThomas Gleixner 
1390a75c5289SThomas Gleixner /**
1391a75c5289SThomas Gleixner  * xrstors - Restore selected components from a kernel xstate buffer
1392a75c5289SThomas Gleixner  * @xstate:	Pointer to the buffer
1393a75c5289SThomas Gleixner  * @mask:	Feature mask to select the components to restore
1394a75c5289SThomas Gleixner  *
1395a75c5289SThomas Gleixner  * The @xstate buffer must be 64 byte aligned and correctly initialized
1396a75c5289SThomas Gleixner  * otherwise XRSTORS from that buffer can #GP.
1397a75c5289SThomas Gleixner  *
1398a75c5289SThomas Gleixner  * Proper usage is to restore the state which was saved with
1399a75c5289SThomas Gleixner  * xsaves() into @xstate.
1400a75c5289SThomas Gleixner  *
1401f5daf836SThomas Gleixner  * The feature mask must be a subset of the independent features.
1402a75c5289SThomas Gleixner  */
1403a75c5289SThomas Gleixner void xrstors(struct xregs_state *xstate, u64 mask)
1404a75c5289SThomas Gleixner {
1405a75c5289SThomas Gleixner 	int err;
1406a75c5289SThomas Gleixner 
1407f5daf836SThomas Gleixner 	if (!validate_independent_components(mask))
140850f408d9SKan Liang 		return;
140950f408d9SKan Liang 
1410a75c5289SThomas Gleixner 	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1411a75c5289SThomas Gleixner 	WARN_ON_ONCE(err);
141250f408d9SKan Liang }
141350f408d9SKan Liang 
1414087df48cSThomas Gleixner #if IS_ENABLED(CONFIG_KVM)
1415087df48cSThomas Gleixner void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1416087df48cSThomas Gleixner {
1417087df48cSThomas Gleixner 	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1418087df48cSThomas Gleixner 
1419087df48cSThomas Gleixner 	if (addr)
1420087df48cSThomas Gleixner 		memset(addr, 0, xstate_sizes[xfeature]);
1421087df48cSThomas Gleixner }
1422087df48cSThomas Gleixner EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1423087df48cSThomas Gleixner #endif
1424087df48cSThomas Gleixner 
1425db8268dfSChang S. Bae #ifdef CONFIG_X86_64
14265529acf4SThomas Gleixner 
14275529acf4SThomas Gleixner #ifdef CONFIG_X86_DEBUG_FPU
14285529acf4SThomas Gleixner /*
14295529acf4SThomas Gleixner  * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
14305529acf4SThomas Gleixner  * can safely operate on the @fpstate buffer.
14315529acf4SThomas Gleixner  */
14325529acf4SThomas Gleixner static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
14335529acf4SThomas Gleixner {
14345529acf4SThomas Gleixner 	u64 xfd = __this_cpu_read(xfd_state);
14355529acf4SThomas Gleixner 
14365529acf4SThomas Gleixner 	if (fpstate->xfd == xfd)
14375529acf4SThomas Gleixner 		return true;
14385529acf4SThomas Gleixner 
14395529acf4SThomas Gleixner 	 /*
14405529acf4SThomas Gleixner 	  * The XFD MSR does not match fpstate->xfd. That's invalid when
14415529acf4SThomas Gleixner 	  * the passed in fpstate is current's fpstate.
14425529acf4SThomas Gleixner 	  */
14435529acf4SThomas Gleixner 	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
14445529acf4SThomas Gleixner 		return false;
14455529acf4SThomas Gleixner 
14465529acf4SThomas Gleixner 	/*
14475529acf4SThomas Gleixner 	 * XRSTOR(S) from init_fpstate are always correct as it will just
14485529acf4SThomas Gleixner 	 * bring all components into init state and not read from the
14495529acf4SThomas Gleixner 	 * buffer. XSAVE(S) raises #PF after init.
14505529acf4SThomas Gleixner 	 */
14515529acf4SThomas Gleixner 	if (fpstate == &init_fpstate)
14525529acf4SThomas Gleixner 		return rstor;
14535529acf4SThomas Gleixner 
14545529acf4SThomas Gleixner 	/*
14555529acf4SThomas Gleixner 	 * XSAVE(S): clone(), fpu_swap_kvm_fpu()
14565529acf4SThomas Gleixner 	 * XRSTORS(S): fpu_swap_kvm_fpu()
14575529acf4SThomas Gleixner 	 */
14585529acf4SThomas Gleixner 
14595529acf4SThomas Gleixner 	/*
14605529acf4SThomas Gleixner 	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
14615529acf4SThomas Gleixner 	 * the buffer area for XFD-disabled state components.
14625529acf4SThomas Gleixner 	 */
14635529acf4SThomas Gleixner 	mask &= ~xfd;
14645529acf4SThomas Gleixner 
14655529acf4SThomas Gleixner 	/*
14665529acf4SThomas Gleixner 	 * Remove features which are valid in fpstate. They
14675529acf4SThomas Gleixner 	 * have space allocated in fpstate.
14685529acf4SThomas Gleixner 	 */
14695529acf4SThomas Gleixner 	mask &= ~fpstate->xfeatures;
14705529acf4SThomas Gleixner 
14715529acf4SThomas Gleixner 	/*
14725529acf4SThomas Gleixner 	 * Any remaining state components in 'mask' might be written
14735529acf4SThomas Gleixner 	 * by XSAVE/XRSTOR. Fail validation it found.
14745529acf4SThomas Gleixner 	 */
14755529acf4SThomas Gleixner 	return !mask;
14765529acf4SThomas Gleixner }
14775529acf4SThomas Gleixner 
14785529acf4SThomas Gleixner void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
14795529acf4SThomas Gleixner {
14805529acf4SThomas Gleixner 	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
14815529acf4SThomas Gleixner }
14825529acf4SThomas Gleixner #endif /* CONFIG_X86_DEBUG_FPU */
14835529acf4SThomas Gleixner 
1484db3e7321SChang S. Bae static int __init xfd_update_static_branch(void)
1485db3e7321SChang S. Bae {
1486db3e7321SChang S. Bae 	/*
1487db3e7321SChang S. Bae 	 * If init_fpstate.xfd has bits set then dynamic features are
1488db3e7321SChang S. Bae 	 * available and the dynamic sizing must be enabled.
1489db3e7321SChang S. Bae 	 */
1490db3e7321SChang S. Bae 	if (init_fpstate.xfd)
1491db3e7321SChang S. Bae 		static_branch_enable(&__fpu_state_size_dynamic);
1492db3e7321SChang S. Bae 	return 0;
1493db3e7321SChang S. Bae }
1494db3e7321SChang S. Bae arch_initcall(xfd_update_static_branch)
1495db3e7321SChang S. Bae 
1496500afbf6SChang S. Bae void fpstate_free(struct fpu *fpu)
1497500afbf6SChang S. Bae {
1498db3e7321SChang S. Bae 	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1499500afbf6SChang S. Bae 		vfree(fpu->fpstate);
1500500afbf6SChang S. Bae }
1501500afbf6SChang S. Bae 
1502500afbf6SChang S. Bae /**
1503500afbf6SChang S. Bae  * fpstate_realloc - Reallocate struct fpstate for the requested new features
1504500afbf6SChang S. Bae  *
1505500afbf6SChang S. Bae  * @xfeatures:	A bitmap of xstate features which extend the enabled features
1506500afbf6SChang S. Bae  *		of that task
1507500afbf6SChang S. Bae  * @ksize:	The required size for the kernel buffer
1508500afbf6SChang S. Bae  * @usize:	The required size for user space buffers
1509c270ce39SThomas Gleixner  * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1510500afbf6SChang S. Bae  *
1511500afbf6SChang S. Bae  * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1512500afbf6SChang S. Bae  * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1513500afbf6SChang S. Bae  * with large states are likely to live longer.
1514500afbf6SChang S. Bae  *
1515500afbf6SChang S. Bae  * Returns: 0 on success, -ENOMEM on allocation error.
1516500afbf6SChang S. Bae  */
1517500afbf6SChang S. Bae static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1518c270ce39SThomas Gleixner 			   unsigned int usize, struct fpu_guest *guest_fpu)
1519500afbf6SChang S. Bae {
1520500afbf6SChang S. Bae 	struct fpu *fpu = &current->thread.fpu;
1521500afbf6SChang S. Bae 	struct fpstate *curfps, *newfps = NULL;
1522500afbf6SChang S. Bae 	unsigned int fpsize;
1523c270ce39SThomas Gleixner 	bool in_use;
1524500afbf6SChang S. Bae 
1525500afbf6SChang S. Bae 	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1526500afbf6SChang S. Bae 
1527500afbf6SChang S. Bae 	newfps = vzalloc(fpsize);
1528500afbf6SChang S. Bae 	if (!newfps)
1529500afbf6SChang S. Bae 		return -ENOMEM;
1530500afbf6SChang S. Bae 	newfps->size = ksize;
1531500afbf6SChang S. Bae 	newfps->user_size = usize;
1532500afbf6SChang S. Bae 	newfps->is_valloc = true;
1533500afbf6SChang S. Bae 
1534c270ce39SThomas Gleixner 	/*
1535c270ce39SThomas Gleixner 	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1536c270ce39SThomas Gleixner 	 * as reference independent whether it is in use or not.
1537c270ce39SThomas Gleixner 	 */
1538c270ce39SThomas Gleixner 	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1539c270ce39SThomas Gleixner 
1540c270ce39SThomas Gleixner 	/* Determine whether @curfps is the active fpstate */
1541c270ce39SThomas Gleixner 	in_use = fpu->fpstate == curfps;
1542c270ce39SThomas Gleixner 
1543c270ce39SThomas Gleixner 	if (guest_fpu) {
1544c270ce39SThomas Gleixner 		newfps->is_guest = true;
1545c270ce39SThomas Gleixner 		newfps->is_confidential = curfps->is_confidential;
1546c270ce39SThomas Gleixner 		newfps->in_use = curfps->in_use;
1547c270ce39SThomas Gleixner 		guest_fpu->xfeatures |= xfeatures;
1548c60427ddSThomas Gleixner 		guest_fpu->uabi_size = usize;
1549c270ce39SThomas Gleixner 	}
1550c270ce39SThomas Gleixner 
1551500afbf6SChang S. Bae 	fpregs_lock();
1552500afbf6SChang S. Bae 	/*
1553c270ce39SThomas Gleixner 	 * If @curfps is in use, ensure that the current state is in the
1554c270ce39SThomas Gleixner 	 * registers before swapping fpstate as that might invalidate it
1555c270ce39SThomas Gleixner 	 * due to layout changes.
1556500afbf6SChang S. Bae 	 */
1557c270ce39SThomas Gleixner 	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1558500afbf6SChang S. Bae 		fpregs_restore_userregs();
1559500afbf6SChang S. Bae 
1560500afbf6SChang S. Bae 	newfps->xfeatures = curfps->xfeatures | xfeatures;
1561*ad856280SLeonardo Bras 
1562*ad856280SLeonardo Bras 	if (!guest_fpu)
1563500afbf6SChang S. Bae 		newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1564*ad856280SLeonardo Bras 
1565500afbf6SChang S. Bae 	newfps->xfd = curfps->xfd & ~xfeatures;
1566500afbf6SChang S. Bae 
1567500afbf6SChang S. Bae 	/* Do the final updates within the locked region */
1568500afbf6SChang S. Bae 	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1569500afbf6SChang S. Bae 
1570c270ce39SThomas Gleixner 	if (guest_fpu) {
1571c270ce39SThomas Gleixner 		guest_fpu->fpstate = newfps;
1572c270ce39SThomas Gleixner 		/* If curfps is active, update the FPU fpstate pointer */
1573c270ce39SThomas Gleixner 		if (in_use)
1574c270ce39SThomas Gleixner 			fpu->fpstate = newfps;
1575c270ce39SThomas Gleixner 	} else {
1576c270ce39SThomas Gleixner 		fpu->fpstate = newfps;
1577c270ce39SThomas Gleixner 	}
1578c270ce39SThomas Gleixner 
1579c270ce39SThomas Gleixner 	if (in_use)
1580c270ce39SThomas Gleixner 		xfd_update_state(fpu->fpstate);
1581500afbf6SChang S. Bae 	fpregs_unlock();
1582500afbf6SChang S. Bae 
1583c270ce39SThomas Gleixner 	/* Only free valloc'ed state */
1584c270ce39SThomas Gleixner 	if (curfps && curfps->is_valloc)
1585500afbf6SChang S. Bae 		vfree(curfps);
1586c270ce39SThomas Gleixner 
1587500afbf6SChang S. Bae 	return 0;
1588500afbf6SChang S. Bae }
1589500afbf6SChang S. Bae 
1590db8268dfSChang S. Bae static int validate_sigaltstack(unsigned int usize)
1591db8268dfSChang S. Bae {
1592db8268dfSChang S. Bae 	struct task_struct *thread, *leader = current->group_leader;
1593db8268dfSChang S. Bae 	unsigned long framesize = get_sigframe_size();
1594db8268dfSChang S. Bae 
1595db8268dfSChang S. Bae 	lockdep_assert_held(&current->sighand->siglock);
1596db8268dfSChang S. Bae 
1597db8268dfSChang S. Bae 	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1598db8268dfSChang S. Bae 	framesize -= fpu_user_cfg.max_size;
1599db8268dfSChang S. Bae 	framesize += usize;
1600db8268dfSChang S. Bae 	for_each_thread(leader, thread) {
1601db8268dfSChang S. Bae 		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1602db8268dfSChang S. Bae 			return -ENOSPC;
1603db8268dfSChang S. Bae 	}
1604db8268dfSChang S. Bae 	return 0;
1605db8268dfSChang S. Bae }
1606db8268dfSChang S. Bae 
1607980fe2fdSThomas Gleixner static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1608db8268dfSChang S. Bae {
1609db8268dfSChang S. Bae 	/*
1610db8268dfSChang S. Bae 	 * This deliberately does not exclude !XSAVES as we still might
1611db8268dfSChang S. Bae 	 * decide to optionally context switch XCR0 or talk the silicon
1612500afbf6SChang S. Bae 	 * vendors into extending XFD for the pre AMX states, especially
1613500afbf6SChang S. Bae 	 * AVX512.
1614db8268dfSChang S. Bae 	 */
1615db8268dfSChang S. Bae 	bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
1616db8268dfSChang S. Bae 	struct fpu *fpu = &current->group_leader->thread.fpu;
1617980fe2fdSThomas Gleixner 	struct fpu_state_perm *perm;
1618db8268dfSChang S. Bae 	unsigned int ksize, usize;
1619db8268dfSChang S. Bae 	u64 mask;
1620980fe2fdSThomas Gleixner 	int ret = 0;
1621db8268dfSChang S. Bae 
1622db8268dfSChang S. Bae 	/* Check whether fully enabled */
1623db8268dfSChang S. Bae 	if ((permitted & requested) == requested)
1624db8268dfSChang S. Bae 		return 0;
1625db8268dfSChang S. Bae 
1626db8268dfSChang S. Bae 	/* Calculate the resulting kernel state size */
1627db8268dfSChang S. Bae 	mask = permitted | requested;
1628db8268dfSChang S. Bae 	ksize = xstate_calculate_size(mask, compacted);
1629db8268dfSChang S. Bae 
1630db8268dfSChang S. Bae 	/* Calculate the resulting user state size */
1631db8268dfSChang S. Bae 	mask &= XFEATURE_MASK_USER_SUPPORTED;
1632db8268dfSChang S. Bae 	usize = xstate_calculate_size(mask, false);
1633db8268dfSChang S. Bae 
1634980fe2fdSThomas Gleixner 	if (!guest) {
1635db8268dfSChang S. Bae 		ret = validate_sigaltstack(usize);
1636db8268dfSChang S. Bae 		if (ret)
1637db8268dfSChang S. Bae 			return ret;
1638980fe2fdSThomas Gleixner 	}
1639db8268dfSChang S. Bae 
1640980fe2fdSThomas Gleixner 	perm = guest ? &fpu->guest_perm : &fpu->perm;
1641db8268dfSChang S. Bae 	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1642980fe2fdSThomas Gleixner 	WRITE_ONCE(perm->__state_perm, requested);
1643db8268dfSChang S. Bae 	/* Protected by sighand lock */
1644980fe2fdSThomas Gleixner 	perm->__state_size = ksize;
1645980fe2fdSThomas Gleixner 	perm->__user_state_size = usize;
1646db8268dfSChang S. Bae 	return ret;
1647db8268dfSChang S. Bae }
1648db8268dfSChang S. Bae 
1649db8268dfSChang S. Bae /*
1650db8268dfSChang S. Bae  * Permissions array to map facilities with more than one component
1651db8268dfSChang S. Bae  */
1652db8268dfSChang S. Bae static const u64 xstate_prctl_req[XFEATURE_MAX] = {
16532308ee57SChang S. Bae 	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1654db8268dfSChang S. Bae };
1655db8268dfSChang S. Bae 
1656980fe2fdSThomas Gleixner static int xstate_request_perm(unsigned long idx, bool guest)
1657db8268dfSChang S. Bae {
1658db8268dfSChang S. Bae 	u64 permitted, requested;
1659db8268dfSChang S. Bae 	int ret;
1660db8268dfSChang S. Bae 
1661db8268dfSChang S. Bae 	if (idx >= XFEATURE_MAX)
1662db8268dfSChang S. Bae 		return -EINVAL;
1663db8268dfSChang S. Bae 
1664db8268dfSChang S. Bae 	/*
1665db8268dfSChang S. Bae 	 * Look up the facility mask which can require more than
1666db8268dfSChang S. Bae 	 * one xstate component.
1667db8268dfSChang S. Bae 	 */
1668db8268dfSChang S. Bae 	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1669db8268dfSChang S. Bae 	requested = xstate_prctl_req[idx];
1670db8268dfSChang S. Bae 	if (!requested)
1671db8268dfSChang S. Bae 		return -EOPNOTSUPP;
1672db8268dfSChang S. Bae 
1673db8268dfSChang S. Bae 	if ((fpu_user_cfg.max_features & requested) != requested)
1674db8268dfSChang S. Bae 		return -EOPNOTSUPP;
1675db8268dfSChang S. Bae 
1676db8268dfSChang S. Bae 	/* Lockless quick check */
1677980fe2fdSThomas Gleixner 	permitted = xstate_get_group_perm(guest);
1678db8268dfSChang S. Bae 	if ((permitted & requested) == requested)
1679db8268dfSChang S. Bae 		return 0;
1680db8268dfSChang S. Bae 
1681db8268dfSChang S. Bae 	/* Protect against concurrent modifications */
1682db8268dfSChang S. Bae 	spin_lock_irq(&current->sighand->siglock);
1683980fe2fdSThomas Gleixner 	permitted = xstate_get_group_perm(guest);
1684980fe2fdSThomas Gleixner 
1685980fe2fdSThomas Gleixner 	/* First vCPU allocation locks the permissions. */
1686980fe2fdSThomas Gleixner 	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1687980fe2fdSThomas Gleixner 		ret = -EBUSY;
1688980fe2fdSThomas Gleixner 	else
1689980fe2fdSThomas Gleixner 		ret = __xstate_request_perm(permitted, requested, guest);
1690db8268dfSChang S. Bae 	spin_unlock_irq(&current->sighand->siglock);
1691db8268dfSChang S. Bae 	return ret;
1692db8268dfSChang S. Bae }
1693783e87b4SChang S. Bae 
1694c270ce39SThomas Gleixner int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1695783e87b4SChang S. Bae {
1696783e87b4SChang S. Bae 	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1697c270ce39SThomas Gleixner 	struct fpu_state_perm *perm;
1698783e87b4SChang S. Bae 	unsigned int ksize, usize;
1699783e87b4SChang S. Bae 	struct fpu *fpu;
1700783e87b4SChang S. Bae 
1701783e87b4SChang S. Bae 	if (!xfd_event) {
1702c270ce39SThomas Gleixner 		if (!guest_fpu)
1703783e87b4SChang S. Bae 			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1704783e87b4SChang S. Bae 		return 0;
1705783e87b4SChang S. Bae 	}
1706783e87b4SChang S. Bae 
1707783e87b4SChang S. Bae 	/* Protect against concurrent modifications */
1708783e87b4SChang S. Bae 	spin_lock_irq(&current->sighand->siglock);
1709783e87b4SChang S. Bae 
1710783e87b4SChang S. Bae 	/* If not permitted let it die */
1711c270ce39SThomas Gleixner 	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1712783e87b4SChang S. Bae 		spin_unlock_irq(&current->sighand->siglock);
1713783e87b4SChang S. Bae 		return -EPERM;
1714783e87b4SChang S. Bae 	}
1715783e87b4SChang S. Bae 
1716783e87b4SChang S. Bae 	fpu = &current->group_leader->thread.fpu;
1717c270ce39SThomas Gleixner 	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1718c270ce39SThomas Gleixner 	ksize = perm->__state_size;
1719c270ce39SThomas Gleixner 	usize = perm->__user_state_size;
1720c270ce39SThomas Gleixner 
1721783e87b4SChang S. Bae 	/*
1722783e87b4SChang S. Bae 	 * The feature is permitted. State size is sufficient.  Dropping
1723783e87b4SChang S. Bae 	 * the lock is safe here even if more features are added from
1724783e87b4SChang S. Bae 	 * another task, the retrieved buffer sizes are valid for the
1725783e87b4SChang S. Bae 	 * currently requested feature(s).
1726783e87b4SChang S. Bae 	 */
1727783e87b4SChang S. Bae 	spin_unlock_irq(&current->sighand->siglock);
1728783e87b4SChang S. Bae 
1729783e87b4SChang S. Bae 	/*
1730783e87b4SChang S. Bae 	 * Try to allocate a new fpstate. If that fails there is no way
1731783e87b4SChang S. Bae 	 * out.
1732783e87b4SChang S. Bae 	 */
1733c270ce39SThomas Gleixner 	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1734783e87b4SChang S. Bae 		return -EFAULT;
1735783e87b4SChang S. Bae 	return 0;
1736783e87b4SChang S. Bae }
1737c270ce39SThomas Gleixner 
1738c270ce39SThomas Gleixner int xfd_enable_feature(u64 xfd_err)
1739c270ce39SThomas Gleixner {
1740c270ce39SThomas Gleixner 	return __xfd_enable_feature(xfd_err, NULL);
1741c270ce39SThomas Gleixner }
1742c270ce39SThomas Gleixner 
1743db8268dfSChang S. Bae #else /* CONFIG_X86_64 */
1744980fe2fdSThomas Gleixner static inline int xstate_request_perm(unsigned long idx, bool guest)
1745db8268dfSChang S. Bae {
1746db8268dfSChang S. Bae 	return -EPERM;
1747db8268dfSChang S. Bae }
1748db8268dfSChang S. Bae #endif  /* !CONFIG_X86_64 */
1749db8268dfSChang S. Bae 
1750c862dcd1SYang Zhong u64 xstate_get_guest_group_perm(void)
1751980fe2fdSThomas Gleixner {
1752980fe2fdSThomas Gleixner 	return xstate_get_group_perm(true);
1753980fe2fdSThomas Gleixner }
1754980fe2fdSThomas Gleixner EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1755980fe2fdSThomas Gleixner 
1756db8268dfSChang S. Bae /**
1757db8268dfSChang S. Bae  * fpu_xstate_prctl - xstate permission operations
1758db8268dfSChang S. Bae  * @tsk:	Redundant pointer to current
1759db8268dfSChang S. Bae  * @option:	A subfunction of arch_prctl()
1760db8268dfSChang S. Bae  * @arg2:	option argument
1761db8268dfSChang S. Bae  * Return:	0 if successful; otherwise, an error code
1762db8268dfSChang S. Bae  *
1763db8268dfSChang S. Bae  * Option arguments:
1764db8268dfSChang S. Bae  *
1765db8268dfSChang S. Bae  * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1766db8268dfSChang S. Bae  * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1767db8268dfSChang S. Bae  * ARCH_REQ_XCOMP_PERM: Facility number requested
1768db8268dfSChang S. Bae  *
1769db8268dfSChang S. Bae  * For facilities which require more than one XSTATE component, the request
1770db8268dfSChang S. Bae  * must be the highest state component number related to that facility,
1771db8268dfSChang S. Bae  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1772db8268dfSChang S. Bae  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1773db8268dfSChang S. Bae  */
1774db8268dfSChang S. Bae long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
1775db8268dfSChang S. Bae {
1776db8268dfSChang S. Bae 	u64 __user *uptr = (u64 __user *)arg2;
1777db8268dfSChang S. Bae 	u64 permitted, supported;
1778db8268dfSChang S. Bae 	unsigned long idx = arg2;
1779980fe2fdSThomas Gleixner 	bool guest = false;
1780db8268dfSChang S. Bae 
1781db8268dfSChang S. Bae 	if (tsk != current)
1782db8268dfSChang S. Bae 		return -EPERM;
1783db8268dfSChang S. Bae 
1784db8268dfSChang S. Bae 	switch (option) {
1785db8268dfSChang S. Bae 	case ARCH_GET_XCOMP_SUPP:
1786db8268dfSChang S. Bae 		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1787db8268dfSChang S. Bae 		return put_user(supported, uptr);
1788db8268dfSChang S. Bae 
1789db8268dfSChang S. Bae 	case ARCH_GET_XCOMP_PERM:
1790db8268dfSChang S. Bae 		/*
1791db8268dfSChang S. Bae 		 * Lockless snapshot as it can also change right after the
1792db8268dfSChang S. Bae 		 * dropping the lock.
1793db8268dfSChang S. Bae 		 */
1794db8268dfSChang S. Bae 		permitted = xstate_get_host_group_perm();
1795db8268dfSChang S. Bae 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1796db8268dfSChang S. Bae 		return put_user(permitted, uptr);
1797db8268dfSChang S. Bae 
1798980fe2fdSThomas Gleixner 	case ARCH_GET_XCOMP_GUEST_PERM:
1799980fe2fdSThomas Gleixner 		permitted = xstate_get_guest_group_perm();
1800980fe2fdSThomas Gleixner 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1801980fe2fdSThomas Gleixner 		return put_user(permitted, uptr);
1802980fe2fdSThomas Gleixner 
1803980fe2fdSThomas Gleixner 	case ARCH_REQ_XCOMP_GUEST_PERM:
1804980fe2fdSThomas Gleixner 		guest = true;
1805980fe2fdSThomas Gleixner 		fallthrough;
1806980fe2fdSThomas Gleixner 
1807db8268dfSChang S. Bae 	case ARCH_REQ_XCOMP_PERM:
1808db8268dfSChang S. Bae 		if (!IS_ENABLED(CONFIG_X86_64))
1809db8268dfSChang S. Bae 			return -EOPNOTSUPP;
1810db8268dfSChang S. Bae 
1811980fe2fdSThomas Gleixner 		return xstate_request_perm(idx, guest);
1812db8268dfSChang S. Bae 
1813db8268dfSChang S. Bae 	default:
1814db8268dfSChang S. Bae 		return -EINVAL;
1815db8268dfSChang S. Bae 	}
1816db8268dfSChang S. Bae }
1817db8268dfSChang S. Bae 
18180c608dadSAubrey Li #ifdef CONFIG_PROC_PID_ARCH_STATUS
18190c608dadSAubrey Li /*
18200c608dadSAubrey Li  * Report the amount of time elapsed in millisecond since last AVX512
18210c608dadSAubrey Li  * use in the task.
18220c608dadSAubrey Li  */
18230c608dadSAubrey Li static void avx512_status(struct seq_file *m, struct task_struct *task)
18240c608dadSAubrey Li {
18250c608dadSAubrey Li 	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
18260c608dadSAubrey Li 	long delta;
18270c608dadSAubrey Li 
18280c608dadSAubrey Li 	if (!timestamp) {
18290c608dadSAubrey Li 		/*
18300c608dadSAubrey Li 		 * Report -1 if no AVX512 usage
18310c608dadSAubrey Li 		 */
18320c608dadSAubrey Li 		delta = -1;
18330c608dadSAubrey Li 	} else {
18340c608dadSAubrey Li 		delta = (long)(jiffies - timestamp);
18350c608dadSAubrey Li 		/*
18360c608dadSAubrey Li 		 * Cap to LONG_MAX if time difference > LONG_MAX
18370c608dadSAubrey Li 		 */
18380c608dadSAubrey Li 		if (delta < 0)
18390c608dadSAubrey Li 			delta = LONG_MAX;
18400c608dadSAubrey Li 		delta = jiffies_to_msecs(delta);
18410c608dadSAubrey Li 	}
18420c608dadSAubrey Li 
18430c608dadSAubrey Li 	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
18440c608dadSAubrey Li 	seq_putc(m, '\n');
18450c608dadSAubrey Li }
18460c608dadSAubrey Li 
18470c608dadSAubrey Li /*
18480c608dadSAubrey Li  * Report architecture specific information
18490c608dadSAubrey Li  */
18500c608dadSAubrey Li int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
18510c608dadSAubrey Li 			struct pid *pid, struct task_struct *task)
18520c608dadSAubrey Li {
18530c608dadSAubrey Li 	/*
18540c608dadSAubrey Li 	 * Report AVX512 state if the processor and build option supported.
18550c608dadSAubrey Li 	 */
18560c608dadSAubrey Li 	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
18570c608dadSAubrey Li 		avx512_status(m, task);
18580c608dadSAubrey Li 
18590c608dadSAubrey Li 	return 0;
18600c608dadSAubrey Li }
18610c608dadSAubrey Li #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1862