xref: /openbmc/linux/arch/x86/kernel/fpu/xstate.c (revision 2c66ca39)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
262784854SIngo Molnar /*
362784854SIngo Molnar  * xsave/xrstor support.
462784854SIngo Molnar  *
562784854SIngo Molnar  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
662784854SIngo Molnar  */
7ffd3e504SThomas Gleixner #include <linux/bitops.h>
862784854SIngo Molnar #include <linux/compat.h>
962784854SIngo Molnar #include <linux/cpu.h>
10e8c24d3aSDave Hansen #include <linux/mman.h>
11db8268dfSChang S. Bae #include <linux/nospec.h>
1284594296SDave Hansen #include <linux/pkeys.h>
130c608dadSAubrey Li #include <linux/seq_file.h>
140c608dadSAubrey Li #include <linux/proc_fs.h>
15500afbf6SChang S. Bae #include <linux/vmalloc.h>
1659a36d16SIngo Molnar 
1762784854SIngo Molnar #include <asm/fpu/api.h>
1859a36d16SIngo Molnar #include <asm/fpu/regset.h>
19d9d005f3SThomas Gleixner #include <asm/fpu/signal.h>
20d9d005f3SThomas Gleixner #include <asm/fpu/xcr.h>
21b992c660SIngo Molnar 
2262784854SIngo Molnar #include <asm/tlbflush.h>
23db8268dfSChang S. Bae #include <asm/prctl.h>
24db8268dfSChang S. Bae #include <asm/elf.h>
25126fe040SThomas Gleixner 
26500afbf6SChang S. Bae #include "context.h"
2796034455SThomas Gleixner #include "internal.h"
2834002571SThomas Gleixner #include "legacy.h"
29126fe040SThomas Gleixner #include "xstate.h"
3062784854SIngo Molnar 
31ffd3e504SThomas Gleixner #define for_each_extended_xfeature(bit, mask)				\
32ffd3e504SThomas Gleixner 	(bit) = FIRST_EXTENDED_XFEATURE;				\
33ffd3e504SThomas Gleixner 	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
34ffd3e504SThomas Gleixner 
351f96b1efSDave Hansen /*
361f96b1efSDave Hansen  * Although we spell it out in here, the Processor Trace
371f96b1efSDave Hansen  * xfeature is completely unused.  We use other mechanisms
381f96b1efSDave Hansen  * to save/restore PT state in Linux.
391f96b1efSDave Hansen  */
405b073430SIngo Molnar static const char *xfeature_names[] =
415b073430SIngo Molnar {
425b073430SIngo Molnar 	"x87 floating point registers"	,
435b073430SIngo Molnar 	"SSE registers"			,
445b073430SIngo Molnar 	"AVX registers"			,
455b073430SIngo Molnar 	"MPX bounds registers"		,
465b073430SIngo Molnar 	"MPX CSR"			,
475b073430SIngo Molnar 	"AVX-512 opmask"		,
485b073430SIngo Molnar 	"AVX-512 Hi256"			,
495b073430SIngo Molnar 	"AVX-512 ZMM_Hi256"		,
501f96b1efSDave Hansen 	"Processor Trace (unused)"	,
51c8df4009SDave Hansen 	"Protection Keys User registers",
52b454feb9SYu-cheng Yu 	"PASID state",
535b073430SIngo Molnar 	"unknown xstate feature"	,
54eec2113eSChang S. Bae 	"unknown xstate feature"	,
55eec2113eSChang S. Bae 	"unknown xstate feature"	,
56eec2113eSChang S. Bae 	"unknown xstate feature"	,
57eec2113eSChang S. Bae 	"unknown xstate feature"	,
58eec2113eSChang S. Bae 	"unknown xstate feature"	,
59eec2113eSChang S. Bae 	"AMX Tile config"		,
60eec2113eSChang S. Bae 	"AMX Tile data"			,
61eec2113eSChang S. Bae 	"unknown xstate feature"	,
625b073430SIngo Molnar };
635b073430SIngo Molnar 
6470c3f167SChang S. Bae static unsigned short xsave_cpuid_features[] __initdata = {
6570c3f167SChang S. Bae 	[XFEATURE_FP]				= X86_FEATURE_FPU,
6670c3f167SChang S. Bae 	[XFEATURE_SSE]				= X86_FEATURE_XMM,
6770c3f167SChang S. Bae 	[XFEATURE_YMM]				= X86_FEATURE_AVX,
6870c3f167SChang S. Bae 	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
6970c3f167SChang S. Bae 	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
7070c3f167SChang S. Bae 	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
7170c3f167SChang S. Bae 	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
7270c3f167SChang S. Bae 	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
7370c3f167SChang S. Bae 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
7470c3f167SChang S. Bae 	[XFEATURE_PKRU]				= X86_FEATURE_PKU,
7570c3f167SChang S. Bae 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
76eec2113eSChang S. Bae 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
77eec2113eSChang S. Bae 	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
78ccb18db2SAndi Kleen };
79ccb18db2SAndi Kleen 
80ce578f16SThomas Gleixner static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
81ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
82ce578f16SThomas Gleixner static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
83ce578f16SThomas Gleixner 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
846afbb58cSThomas Gleixner static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
856afbb58cSThomas Gleixner 
866afbb58cSThomas Gleixner #define XSTATE_FLAG_SUPERVISOR	BIT(0)
876afbb58cSThomas Gleixner #define XSTATE_FLAG_ALIGNED64	BIT(1)
8862784854SIngo Molnar 
8962784854SIngo Molnar /*
905b073430SIngo Molnar  * Return whether the system supports a given xfeature.
915b073430SIngo Molnar  *
925b073430SIngo Molnar  * Also return the name of the (most advanced) feature that the caller requested:
935b073430SIngo Molnar  */
945b073430SIngo Molnar int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
955b073430SIngo Molnar {
961c253ff2SThomas Gleixner 	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
975b073430SIngo Molnar 
985b073430SIngo Molnar 	if (unlikely(feature_name)) {
995b073430SIngo Molnar 		long xfeature_idx, max_idx;
1005b073430SIngo Molnar 		u64 xfeatures_print;
1015b073430SIngo Molnar 		/*
1025b073430SIngo Molnar 		 * So we use FLS here to be able to print the most advanced
1035b073430SIngo Molnar 		 * feature that was requested but is missing. So if a driver
104d91cab78SDave Hansen 		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
1055b073430SIngo Molnar 		 * missing AVX feature - this is the most informative message
1065b073430SIngo Molnar 		 * to users:
1075b073430SIngo Molnar 		 */
1085b073430SIngo Molnar 		if (xfeatures_missing)
1095b073430SIngo Molnar 			xfeatures_print = xfeatures_missing;
1105b073430SIngo Molnar 		else
1115b073430SIngo Molnar 			xfeatures_print = xfeatures_needed;
1125b073430SIngo Molnar 
1135b073430SIngo Molnar 		xfeature_idx = fls64(xfeatures_print)-1;
1145b073430SIngo Molnar 		max_idx = ARRAY_SIZE(xfeature_names)-1;
1155b073430SIngo Molnar 		xfeature_idx = min(xfeature_idx, max_idx);
1165b073430SIngo Molnar 
1175b073430SIngo Molnar 		*feature_name = xfeature_names[xfeature_idx];
1185b073430SIngo Molnar 	}
1195b073430SIngo Molnar 
1205b073430SIngo Molnar 	if (xfeatures_missing)
1215b073430SIngo Molnar 		return 0;
1225b073430SIngo Molnar 
1235b073430SIngo Molnar 	return 1;
1245b073430SIngo Molnar }
1255b073430SIngo Molnar EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
1265b073430SIngo Molnar 
1276afbb58cSThomas Gleixner static bool xfeature_is_aligned64(int xfeature_nr)
1286afbb58cSThomas Gleixner {
1296afbb58cSThomas Gleixner 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
1306afbb58cSThomas Gleixner }
1316afbb58cSThomas Gleixner 
132158e2ee6SYu-cheng Yu static bool xfeature_is_supervisor(int xfeature_nr)
1331499ce2dSYu-cheng Yu {
1346afbb58cSThomas Gleixner 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
1351499ce2dSYu-cheng Yu }
1361499ce2dSYu-cheng Yu 
1377aa5128bSThomas Gleixner static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
1387aa5128bSThomas Gleixner {
1397aa5128bSThomas Gleixner 	unsigned int offs, i;
1407aa5128bSThomas Gleixner 
1417aa5128bSThomas Gleixner 	/*
1427aa5128bSThomas Gleixner 	 * Non-compacted format and legacy features use the cached fixed
1437aa5128bSThomas Gleixner 	 * offsets.
1447aa5128bSThomas Gleixner 	 */
1458ad7e8f6SThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
1468ad7e8f6SThomas Gleixner 	    xfeature <= XFEATURE_SSE)
1477aa5128bSThomas Gleixner 		return xstate_offsets[xfeature];
1487aa5128bSThomas Gleixner 
1497aa5128bSThomas Gleixner 	/*
1507aa5128bSThomas Gleixner 	 * Compacted format offsets depend on the actual content of the
1517aa5128bSThomas Gleixner 	 * compacted xsave area which is determined by the xcomp_bv header
1527aa5128bSThomas Gleixner 	 * field.
1537aa5128bSThomas Gleixner 	 */
1547aa5128bSThomas Gleixner 	offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
1557aa5128bSThomas Gleixner 	for_each_extended_xfeature(i, xcomp_bv) {
1567aa5128bSThomas Gleixner 		if (xfeature_is_aligned64(i))
1577aa5128bSThomas Gleixner 			offs = ALIGN(offs, 64);
1587aa5128bSThomas Gleixner 		if (i == xfeature)
1597aa5128bSThomas Gleixner 			break;
1607aa5128bSThomas Gleixner 		offs += xstate_sizes[i];
1617aa5128bSThomas Gleixner 	}
1627aa5128bSThomas Gleixner 	return offs;
1637aa5128bSThomas Gleixner }
1647aa5128bSThomas Gleixner 
1655b073430SIngo Molnar /*
16662784854SIngo Molnar  * Enable the extended processor state save/restore feature.
16762784854SIngo Molnar  * Called once per CPU onlining.
16862784854SIngo Molnar  */
16962784854SIngo Molnar void fpu__init_cpu_xstate(void)
17062784854SIngo Molnar {
1711c253ff2SThomas Gleixner 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
17262784854SIngo Molnar 		return;
17362784854SIngo Molnar 
17462784854SIngo Molnar 	cr4_set_bits(X86_CR4_OSXSAVE);
17562784854SIngo Molnar 
17662784854SIngo Molnar 	/*
17767236547SChang S. Bae 	 * Must happen after CR4 setup and before xsetbv() to allow KVM
17867236547SChang S. Bae 	 * lazy passthrough.  Write independent of the dynamic state static
17967236547SChang S. Bae 	 * key as that does not work on the boot CPU. This also ensures
18067236547SChang S. Bae 	 * that any stale state is wiped out from XFD.
18167236547SChang S. Bae 	 */
18267236547SChang S. Bae 	if (cpu_feature_enabled(X86_FEATURE_XFD))
18367236547SChang S. Bae 		wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
18467236547SChang S. Bae 
18567236547SChang S. Bae 	/*
186524bb73bSYu-cheng Yu 	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
187524bb73bSYu-cheng Yu 	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
188524bb73bSYu-cheng Yu 	 * states can be set here.
189e6e888f9SDave Hansen 	 */
1901c253ff2SThomas Gleixner 	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
19162784854SIngo Molnar 
19262784854SIngo Molnar 	/*
19371581eefSYu-cheng Yu 	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
194e6e888f9SDave Hansen 	 */
195f0dccc9dSKan Liang 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
196f0dccc9dSKan Liang 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
19701707b66SAndy Lutomirski 				     xfeatures_mask_independent());
198f0dccc9dSKan Liang 	}
19971581eefSYu-cheng Yu }
20071581eefSYu-cheng Yu 
201524bb73bSYu-cheng Yu static bool xfeature_enabled(enum xfeature xfeature)
202e6e888f9SDave Hansen {
2031c253ff2SThomas Gleixner 	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
204e6e888f9SDave Hansen }
205e6e888f9SDave Hansen 
206e6e888f9SDave Hansen /*
20739f1acd2SIngo Molnar  * Record the offsets and sizes of various xstates contained
20839f1acd2SIngo Molnar  * in the XSAVE state memory layout.
20962784854SIngo Molnar  */
21035a77d45SThomas Gleixner static void __init setup_xstate_cache(void)
21162784854SIngo Molnar {
212ee9ae257SDave Hansen 	u32 eax, ebx, ecx, edx, i;
213d9f6e12fSIngo Molnar 	/* start at the beginning of the "extended state" */
214e6e888f9SDave Hansen 	unsigned int last_good_offset = offsetof(struct xregs_state,
215e6e888f9SDave Hansen 						 extended_state_area);
216ac73b27aSYu-cheng Yu 	/*
217ac73b27aSYu-cheng Yu 	 * The FP xstates and SSE xstates are legacy states. They are always
218ac73b27aSYu-cheng Yu 	 * in the fixed offsets in the xsave area in either compacted form
219ac73b27aSYu-cheng Yu 	 * or standard form.
220ac73b27aSYu-cheng Yu 	 */
221446e693cSCyrill Gorcunov 	xstate_offsets[XFEATURE_FP]	= 0;
222446e693cSCyrill Gorcunov 	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
223446e693cSCyrill Gorcunov 						   xmm_space);
224446e693cSCyrill Gorcunov 
225446e693cSCyrill Gorcunov 	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
226c593642cSPankaj Bharadiya 	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
227446e693cSCyrill Gorcunov 						       xmm_space);
22862784854SIngo Molnar 
2291c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
230e6e888f9SDave Hansen 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
2311499ce2dSYu-cheng Yu 
232c12e13dcSYu-cheng Yu 		xstate_sizes[i] = eax;
2336afbb58cSThomas Gleixner 		xstate_flags[i] = ecx;
234c12e13dcSYu-cheng Yu 
2351499ce2dSYu-cheng Yu 		/*
236c12e13dcSYu-cheng Yu 		 * If an xfeature is supervisor state, the offset in EBX is
237c12e13dcSYu-cheng Yu 		 * invalid, leave it to -1.
2381499ce2dSYu-cheng Yu 		 */
239c12e13dcSYu-cheng Yu 		if (xfeature_is_supervisor(i))
240c12e13dcSYu-cheng Yu 			continue;
241c12e13dcSYu-cheng Yu 
242ee9ae257SDave Hansen 		xstate_offsets[i] = ebx;
2431499ce2dSYu-cheng Yu 
244e6e888f9SDave Hansen 		/*
245c12e13dcSYu-cheng Yu 		 * In our xstate size checks, we assume that the highest-numbered
246c12e13dcSYu-cheng Yu 		 * xstate feature has the highest offset in the buffer.  Ensure
247c12e13dcSYu-cheng Yu 		 * it does.
248e6e888f9SDave Hansen 		 */
249e6e888f9SDave Hansen 		WARN_ONCE(last_good_offset > xstate_offsets[i],
250e6e888f9SDave Hansen 			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
251c12e13dcSYu-cheng Yu 
252e6e888f9SDave Hansen 		last_good_offset = xstate_offsets[i];
25339f1acd2SIngo Molnar 	}
25462784854SIngo Molnar }
25562784854SIngo Molnar 
25632231879SIngo Molnar static void __init print_xstate_feature(u64 xstate_mask)
25762784854SIngo Molnar {
25833588b52SIngo Molnar 	const char *feature_name;
25962784854SIngo Molnar 
26033588b52SIngo Molnar 	if (cpu_has_xfeatures(xstate_mask, &feature_name))
261c8df4009SDave Hansen 		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
26262784854SIngo Molnar }
26362784854SIngo Molnar 
26462784854SIngo Molnar /*
26562784854SIngo Molnar  * Print out all the supported xstate features:
26662784854SIngo Molnar  */
26732231879SIngo Molnar static void __init print_xstate_features(void)
26862784854SIngo Molnar {
269d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_FP);
270d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_SSE);
271d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_YMM);
272d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_BNDREGS);
273d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_BNDCSR);
274d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_OPMASK);
275d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
276d91cab78SDave Hansen 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
277c8df4009SDave Hansen 	print_xstate_feature(XFEATURE_MASK_PKRU);
278b454feb9SYu-cheng Yu 	print_xstate_feature(XFEATURE_MASK_PASID);
279eec2113eSChang S. Bae 	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
280eec2113eSChang S. Bae 	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
28162784854SIngo Molnar }
28262784854SIngo Molnar 
28362784854SIngo Molnar /*
28403482e08SYu-cheng Yu  * This check is important because it is easy to get XSTATE_*
28503482e08SYu-cheng Yu  * confused with XSTATE_BIT_*.
28603482e08SYu-cheng Yu  */
28703482e08SYu-cheng Yu #define CHECK_XFEATURE(nr) do {		\
28803482e08SYu-cheng Yu 	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
28903482e08SYu-cheng Yu 	WARN_ON(nr >= XFEATURE_MAX);	\
29003482e08SYu-cheng Yu } while (0)
29103482e08SYu-cheng Yu 
29203482e08SYu-cheng Yu /*
293996952e0SYu-cheng Yu  * Print out xstate component offsets and sizes
294996952e0SYu-cheng Yu  */
295996952e0SYu-cheng Yu static void __init print_xstate_offset_size(void)
296996952e0SYu-cheng Yu {
297996952e0SYu-cheng Yu 	int i;
298996952e0SYu-cheng Yu 
2991c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
300996952e0SYu-cheng Yu 		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
3017aa5128bSThomas Gleixner 			i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
3027aa5128bSThomas Gleixner 			i, xstate_sizes[i]);
303996952e0SYu-cheng Yu 	}
304996952e0SYu-cheng Yu }
305996952e0SYu-cheng Yu 
306996952e0SYu-cheng Yu /*
307b579d0c3SThomas Gleixner  * This function is called only during boot time when x86 caps are not set
308b579d0c3SThomas Gleixner  * up and alternative can not be used yet.
309b579d0c3SThomas Gleixner  */
310b579d0c3SThomas Gleixner static __init void os_xrstor_booting(struct xregs_state *xstate)
311b579d0c3SThomas Gleixner {
312eda32f4fSThomas Gleixner 	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
313b579d0c3SThomas Gleixner 	u32 lmask = mask;
314b579d0c3SThomas Gleixner 	u32 hmask = mask >> 32;
315b579d0c3SThomas Gleixner 	int err;
316b579d0c3SThomas Gleixner 
317b579d0c3SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
318b579d0c3SThomas Gleixner 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
319b579d0c3SThomas Gleixner 	else
320b579d0c3SThomas Gleixner 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
321b579d0c3SThomas Gleixner 
322b579d0c3SThomas Gleixner 	/*
323b579d0c3SThomas Gleixner 	 * We should never fault when copying from a kernel buffer, and the FPU
324b579d0c3SThomas Gleixner 	 * state we set at boot time should be valid.
325b579d0c3SThomas Gleixner 	 */
326b579d0c3SThomas Gleixner 	WARN_ON_FPU(err);
327b579d0c3SThomas Gleixner }
328b579d0c3SThomas Gleixner 
329b579d0c3SThomas Gleixner /*
330f9dfb5e3SThomas Gleixner  * All supported features have either init state all zeros or are
331f9dfb5e3SThomas Gleixner  * handled in setup_init_fpu() individually. This is an explicit
332f9dfb5e3SThomas Gleixner  * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
333f9dfb5e3SThomas Gleixner  * newly added supported features at build time and make people
334f9dfb5e3SThomas Gleixner  * actually look at the init state for the new feature.
335f9dfb5e3SThomas Gleixner  */
336f9dfb5e3SThomas Gleixner #define XFEATURES_INIT_FPSTATE_HANDLED		\
337f9dfb5e3SThomas Gleixner 	(XFEATURE_MASK_FP |			\
338f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_SSE |			\
339f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_YMM |			\
340f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_OPMASK |			\
341f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_ZMM_Hi256 |		\
342f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_Hi16_ZMM	 |		\
343f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_PKRU |			\
344f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_BNDREGS |		\
345f9dfb5e3SThomas Gleixner 	 XFEATURE_MASK_BNDCSR |			\
3462308ee57SChang S. Bae 	 XFEATURE_MASK_PASID |			\
3472308ee57SChang S. Bae 	 XFEATURE_MASK_XTILE)
348f9dfb5e3SThomas Gleixner 
349f9dfb5e3SThomas Gleixner /*
35062784854SIngo Molnar  * setup the xstate image representing the init state
35162784854SIngo Molnar  */
35232231879SIngo Molnar static void __init setup_init_fpu_buf(void)
35362784854SIngo Molnar {
354f9dfb5e3SThomas Gleixner 	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
355f9dfb5e3SThomas Gleixner 		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
356f9dfb5e3SThomas Gleixner 		     XFEATURES_INIT_FPSTATE_HANDLED);
357f9dfb5e3SThomas Gleixner 
358d366bf7eSBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
35962784854SIngo Molnar 		return;
36062784854SIngo Molnar 
36162784854SIngo Molnar 	print_xstate_features();
36262784854SIngo Molnar 
363c32d7cabSChang S. Bae 	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
36462784854SIngo Molnar 
36562784854SIngo Molnar 	/*
3667d937060SFenghua Yu 	 * Init all the features state with header.xfeatures being 0x0
36762784854SIngo Molnar 	 */
368f83ac56aSThomas Gleixner 	os_xrstor_booting(&init_fpstate.regs.xsave);
36962784854SIngo Molnar 
37062784854SIngo Molnar 	/*
371f9dfb5e3SThomas Gleixner 	 * All components are now in init state. Read the state back so
372f9dfb5e3SThomas Gleixner 	 * that init_fpstate contains all non-zero init state. This only
3738ad7e8f6SThomas Gleixner 	 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
374f9dfb5e3SThomas Gleixner 	 * those use the init optimization which skips writing data for
375f9dfb5e3SThomas Gleixner 	 * components in init state.
376f9dfb5e3SThomas Gleixner 	 *
377f9dfb5e3SThomas Gleixner 	 * XSAVE could be used, but that would require to reshuffle the
3788ad7e8f6SThomas Gleixner 	 * data when XSAVEC/S is available because XSAVEC/S uses xstate
379f9dfb5e3SThomas Gleixner 	 * compaction. But doing so is a pointless exercise because most
380f9dfb5e3SThomas Gleixner 	 * components have an all zeros init state except for the legacy
381f9dfb5e3SThomas Gleixner 	 * ones (FP and SSE). Those can be saved with FXSAVE into the
382f9dfb5e3SThomas Gleixner 	 * legacy area. Adding new features requires to ensure that init
383f9dfb5e3SThomas Gleixner 	 * state is all zeroes or if not to add the necessary handling
384f9dfb5e3SThomas Gleixner 	 * here.
38562784854SIngo Molnar 	 */
386f83ac56aSThomas Gleixner 	fxsave(&init_fpstate.regs.fxsave);
38762784854SIngo Molnar }
38862784854SIngo Molnar 
389ce711ea3SKan Liang int xfeature_size(int xfeature_nr)
39065ac2e9bSDave Hansen {
39165ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
39265ac2e9bSDave Hansen 
39365ac2e9bSDave Hansen 	CHECK_XFEATURE(xfeature_nr);
39465ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
39565ac2e9bSDave Hansen 	return eax;
39665ac2e9bSDave Hansen }
39765ac2e9bSDave Hansen 
398e63e5d5cSEric Biggers /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
39949e4eb41SThomas Gleixner static int validate_user_xstate_header(const struct xstate_header *hdr,
40049e4eb41SThomas Gleixner 				       struct fpstate *fpstate)
401e63e5d5cSEric Biggers {
402e63e5d5cSEric Biggers 	/* No unknown or supervisor features may be set */
40349e4eb41SThomas Gleixner 	if (hdr->xfeatures & ~fpstate->user_xfeatures)
404e63e5d5cSEric Biggers 		return -EINVAL;
405e63e5d5cSEric Biggers 
406e63e5d5cSEric Biggers 	/* Userspace must use the uncompacted format */
407e63e5d5cSEric Biggers 	if (hdr->xcomp_bv)
408e63e5d5cSEric Biggers 		return -EINVAL;
409e63e5d5cSEric Biggers 
410e63e5d5cSEric Biggers 	/*
411e63e5d5cSEric Biggers 	 * If 'reserved' is shrunken to add a new field, make sure to validate
412e63e5d5cSEric Biggers 	 * that new field here!
413e63e5d5cSEric Biggers 	 */
414e63e5d5cSEric Biggers 	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
415e63e5d5cSEric Biggers 
416e63e5d5cSEric Biggers 	/* No reserved bits may be set */
417e63e5d5cSEric Biggers 	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
418e63e5d5cSEric Biggers 		return -EINVAL;
419e63e5d5cSEric Biggers 
420e63e5d5cSEric Biggers 	return 0;
421e63e5d5cSEric Biggers }
422e63e5d5cSEric Biggers 
42363cf05a1SThomas Gleixner static void __init __xstate_dump_leaves(void)
42465ac2e9bSDave Hansen {
42565ac2e9bSDave Hansen 	int i;
42665ac2e9bSDave Hansen 	u32 eax, ebx, ecx, edx;
42765ac2e9bSDave Hansen 	static int should_dump = 1;
42865ac2e9bSDave Hansen 
42965ac2e9bSDave Hansen 	if (!should_dump)
43065ac2e9bSDave Hansen 		return;
43165ac2e9bSDave Hansen 	should_dump = 0;
43265ac2e9bSDave Hansen 	/*
43365ac2e9bSDave Hansen 	 * Dump out a few leaves past the ones that we support
43465ac2e9bSDave Hansen 	 * just in case there are some goodies up there
43565ac2e9bSDave Hansen 	 */
43665ac2e9bSDave Hansen 	for (i = 0; i < XFEATURE_MAX + 10; i++) {
43765ac2e9bSDave Hansen 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
43865ac2e9bSDave Hansen 		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
43965ac2e9bSDave Hansen 			XSTATE_CPUID, i, eax, ebx, ecx, edx);
44065ac2e9bSDave Hansen 	}
44165ac2e9bSDave Hansen }
44265ac2e9bSDave Hansen 
44348280042SAndrew Cooper #define XSTATE_WARN_ON(x, fmt, ...) do {					\
44448280042SAndrew Cooper 	if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {	\
44565ac2e9bSDave Hansen 		__xstate_dump_leaves();						\
44665ac2e9bSDave Hansen 	}									\
44765ac2e9bSDave Hansen } while (0)
44865ac2e9bSDave Hansen 
449ef78f2a4SDave Hansen #define XCHECK_SZ(sz, nr, nr_macro, __struct) do {			\
450ef78f2a4SDave Hansen 	if ((nr == nr_macro) &&						\
451ef78f2a4SDave Hansen 	    WARN_ONCE(sz != sizeof(__struct),				\
452ef78f2a4SDave Hansen 		"%s: struct is %zu bytes, cpu state %d bytes\n",	\
453ef78f2a4SDave Hansen 		__stringify(nr_macro), sizeof(__struct), sz)) {		\
454ef78f2a4SDave Hansen 		__xstate_dump_leaves();					\
455ef78f2a4SDave Hansen 	}								\
456ef78f2a4SDave Hansen } while (0)
457ef78f2a4SDave Hansen 
458eec2113eSChang S. Bae /**
459eec2113eSChang S. Bae  * check_xtile_data_against_struct - Check tile data state size.
460eec2113eSChang S. Bae  *
461eec2113eSChang S. Bae  * Calculate the state size by multiplying the single tile size which is
462eec2113eSChang S. Bae  * recorded in a C struct, and the number of tiles that the CPU informs.
463eec2113eSChang S. Bae  * Compare the provided size with the calculation.
464eec2113eSChang S. Bae  *
465eec2113eSChang S. Bae  * @size:	The tile data state size
466eec2113eSChang S. Bae  *
467eec2113eSChang S. Bae  * Returns:	0 on success, -EINVAL on mismatch.
468eec2113eSChang S. Bae  */
469eec2113eSChang S. Bae static int __init check_xtile_data_against_struct(int size)
470eec2113eSChang S. Bae {
471eec2113eSChang S. Bae 	u32 max_palid, palid, state_size;
472eec2113eSChang S. Bae 	u32 eax, ebx, ecx, edx;
473eec2113eSChang S. Bae 	u16 max_tile;
474eec2113eSChang S. Bae 
475eec2113eSChang S. Bae 	/*
476eec2113eSChang S. Bae 	 * Check the maximum palette id:
477eec2113eSChang S. Bae 	 *   eax: the highest numbered palette subleaf.
478eec2113eSChang S. Bae 	 */
479eec2113eSChang S. Bae 	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
480eec2113eSChang S. Bae 
481eec2113eSChang S. Bae 	/*
482eec2113eSChang S. Bae 	 * Cross-check each tile size and find the maximum number of
483eec2113eSChang S. Bae 	 * supported tiles.
484eec2113eSChang S. Bae 	 */
485eec2113eSChang S. Bae 	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
486eec2113eSChang S. Bae 		u16 tile_size, max;
487eec2113eSChang S. Bae 
488eec2113eSChang S. Bae 		/*
489eec2113eSChang S. Bae 		 * Check the tile size info:
490eec2113eSChang S. Bae 		 *   eax[31:16]:  bytes per title
491eec2113eSChang S. Bae 		 *   ebx[31:16]:  the max names (or max number of tiles)
492eec2113eSChang S. Bae 		 */
493eec2113eSChang S. Bae 		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
494eec2113eSChang S. Bae 		tile_size = eax >> 16;
495eec2113eSChang S. Bae 		max = ebx >> 16;
496eec2113eSChang S. Bae 
497eec2113eSChang S. Bae 		if (tile_size != sizeof(struct xtile_data)) {
498eec2113eSChang S. Bae 			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
499eec2113eSChang S. Bae 			       __stringify(XFEATURE_XTILE_DATA),
500eec2113eSChang S. Bae 			       sizeof(struct xtile_data), tile_size);
501eec2113eSChang S. Bae 			__xstate_dump_leaves();
502eec2113eSChang S. Bae 			return -EINVAL;
503eec2113eSChang S. Bae 		}
504eec2113eSChang S. Bae 
505eec2113eSChang S. Bae 		if (max > max_tile)
506eec2113eSChang S. Bae 			max_tile = max;
507eec2113eSChang S. Bae 	}
508eec2113eSChang S. Bae 
509eec2113eSChang S. Bae 	state_size = sizeof(struct xtile_data) * max_tile;
510eec2113eSChang S. Bae 	if (size != state_size) {
511eec2113eSChang S. Bae 		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
512eec2113eSChang S. Bae 		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
513eec2113eSChang S. Bae 		__xstate_dump_leaves();
514eec2113eSChang S. Bae 		return -EINVAL;
515eec2113eSChang S. Bae 	}
516eec2113eSChang S. Bae 	return 0;
517eec2113eSChang S. Bae }
518eec2113eSChang S. Bae 
519ef78f2a4SDave Hansen /*
520ef78f2a4SDave Hansen  * We have a C struct for each 'xstate'.  We need to ensure
521ef78f2a4SDave Hansen  * that our software representation matches what the CPU
522ef78f2a4SDave Hansen  * tells us about the state's size.
523ef78f2a4SDave Hansen  */
524cd9ae761SThomas Gleixner static bool __init check_xstate_against_struct(int nr)
525ef78f2a4SDave Hansen {
526ef78f2a4SDave Hansen 	/*
527ef78f2a4SDave Hansen 	 * Ask the CPU for the size of the state.
528ef78f2a4SDave Hansen 	 */
529ef78f2a4SDave Hansen 	int sz = xfeature_size(nr);
530ef78f2a4SDave Hansen 	/*
531ef78f2a4SDave Hansen 	 * Match each CPU state with the corresponding software
532ef78f2a4SDave Hansen 	 * structure.
533ef78f2a4SDave Hansen 	 */
534ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_YMM,       struct ymmh_struct);
535ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_BNDREGS,   struct mpx_bndreg_state);
536ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_BNDCSR,    struct mpx_bndcsr_state);
537ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_OPMASK,    struct avx_512_opmask_state);
538ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
539ef78f2a4SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
540c8df4009SDave Hansen 	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
541b454feb9SYu-cheng Yu 	XCHECK_SZ(sz, nr, XFEATURE_PASID,     struct ia32_pasid_state);
542eec2113eSChang S. Bae 	XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
543eec2113eSChang S. Bae 
544eec2113eSChang S. Bae 	/* The tile data size varies between implementations. */
545eec2113eSChang S. Bae 	if (nr == XFEATURE_XTILE_DATA)
546eec2113eSChang S. Bae 		check_xtile_data_against_struct(sz);
547ef78f2a4SDave Hansen 
548ef78f2a4SDave Hansen 	/*
549ef78f2a4SDave Hansen 	 * Make *SURE* to add any feature numbers in below if
550ef78f2a4SDave Hansen 	 * there are "holes" in the xsave state component
551ef78f2a4SDave Hansen 	 * numbers.
552ef78f2a4SDave Hansen 	 */
553ef78f2a4SDave Hansen 	if ((nr < XFEATURE_YMM) ||
5541f96b1efSDave Hansen 	    (nr >= XFEATURE_MAX) ||
555f0dccc9dSKan Liang 	    (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
556eec2113eSChang S. Bae 	    ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
55748280042SAndrew Cooper 		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
558cd9ae761SThomas Gleixner 		return false;
559ef78f2a4SDave Hansen 	}
560cd9ae761SThomas Gleixner 	return true;
561ef78f2a4SDave Hansen }
562ef78f2a4SDave Hansen 
56384e4dcccSChang S. Bae static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
56484e4dcccSChang S. Bae {
565d6d6d50fSThomas Gleixner 	unsigned int topmost = fls64(xfeatures) -  1;
566d6d6d50fSThomas Gleixner 	unsigned int offset = xstate_offsets[topmost];
56784e4dcccSChang S. Bae 
568d6d6d50fSThomas Gleixner 	if (topmost <= XFEATURE_SSE)
569d6d6d50fSThomas Gleixner 		return sizeof(struct xregs_state);
570d6d6d50fSThomas Gleixner 
571d6d6d50fSThomas Gleixner 	if (compacted)
572d6d6d50fSThomas Gleixner 		offset = xfeature_get_offset(xfeatures, topmost);
573d6d6d50fSThomas Gleixner 	return offset + xstate_sizes[topmost];
57484e4dcccSChang S. Bae }
57584e4dcccSChang S. Bae 
57665ac2e9bSDave Hansen /*
57765ac2e9bSDave Hansen  * This essentially double-checks what the cpu told us about
57865ac2e9bSDave Hansen  * how large the XSAVE buffer needs to be.  We are recalculating
57965ac2e9bSDave Hansen  * it to be safe.
58076d10256SKan Liang  *
58101707b66SAndy Lutomirski  * Independent XSAVE features allocate their own buffers and are not
58276d10256SKan Liang  * covered by these checks. Only the size of the buffer for task->fpu
58376d10256SKan Liang  * is checked here.
58465ac2e9bSDave Hansen  */
585cd9ae761SThomas Gleixner static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
58665ac2e9bSDave Hansen {
5878ad7e8f6SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
5888ad7e8f6SThomas Gleixner 	bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
589cd9ae761SThomas Gleixner 	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
59065ac2e9bSDave Hansen 	int i;
59165ac2e9bSDave Hansen 
5921c253ff2SThomas Gleixner 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
593cd9ae761SThomas Gleixner 		if (!check_xstate_against_struct(i))
594cd9ae761SThomas Gleixner 			return false;
59565ac2e9bSDave Hansen 		/*
59665ac2e9bSDave Hansen 		 * Supervisor state components can be managed only by
59702b93c0bSThomas Gleixner 		 * XSAVES.
59865ac2e9bSDave Hansen 		 */
5998ad7e8f6SThomas Gleixner 		if (!xsaves && xfeature_is_supervisor(i)) {
60048280042SAndrew Cooper 			XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
601cd9ae761SThomas Gleixner 			return false;
602cd9ae761SThomas Gleixner 		}
60365ac2e9bSDave Hansen 	}
60484e4dcccSChang S. Bae 	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
60548280042SAndrew Cooper 	XSTATE_WARN_ON(size != kernel_size,
60648280042SAndrew Cooper 		       "size %u != kernel_size %u\n", size, kernel_size);
607cd9ae761SThomas Gleixner 	return size == kernel_size;
60865ac2e9bSDave Hansen }
60965ac2e9bSDave Hansen 
61062784854SIngo Molnar /*
611524bb73bSYu-cheng Yu  * Get total size of enabled xstates in XCR0 | IA32_XSS.
61265ac2e9bSDave Hansen  *
61365ac2e9bSDave Hansen  * Note the SDM's wording here.  "sub-function 0" only enumerates
61465ac2e9bSDave Hansen  * the size of the *user* states.  If we use it to size a buffer
61565ac2e9bSDave Hansen  * that we use 'XSAVES' on, we could potentially overflow the
61665ac2e9bSDave Hansen  * buffer because 'XSAVES' saves system states too.
6178ad7e8f6SThomas Gleixner  *
6188ad7e8f6SThomas Gleixner  * This also takes compaction into account. So this works for
6198ad7e8f6SThomas Gleixner  * XSAVEC as well.
62062784854SIngo Molnar  */
6218ad7e8f6SThomas Gleixner static unsigned int __init get_compacted_size(void)
62262784854SIngo Molnar {
62362784854SIngo Molnar 	unsigned int eax, ebx, ecx, edx;
62465ac2e9bSDave Hansen 	/*
62565ac2e9bSDave Hansen 	 * - CPUID function 0DH, sub-function 1:
62665ac2e9bSDave Hansen 	 *    EBX enumerates the size (in bytes) required by
62765ac2e9bSDave Hansen 	 *    the XSAVES instruction for an XSAVE area
62865ac2e9bSDave Hansen 	 *    containing all the state components
62965ac2e9bSDave Hansen 	 *    corresponding to bits currently set in
63065ac2e9bSDave Hansen 	 *    XCR0 | IA32_XSS.
6318ad7e8f6SThomas Gleixner 	 *
6328ad7e8f6SThomas Gleixner 	 * When XSAVES is not available but XSAVEC is (virt), then there
6338ad7e8f6SThomas Gleixner 	 * are no supervisor states, but XSAVEC still uses compacted
6348ad7e8f6SThomas Gleixner 	 * format.
63565ac2e9bSDave Hansen 	 */
63665ac2e9bSDave Hansen 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
637a1141e0bSFenghua Yu 	return ebx;
63862784854SIngo Molnar }
639a1141e0bSFenghua Yu 
64076d10256SKan Liang /*
64101707b66SAndy Lutomirski  * Get the total size of the enabled xstates without the independent supervisor
64276d10256SKan Liang  * features.
64376d10256SKan Liang  */
6448ad7e8f6SThomas Gleixner static unsigned int __init get_xsave_compacted_size(void)
64576d10256SKan Liang {
64601707b66SAndy Lutomirski 	u64 mask = xfeatures_mask_independent();
64776d10256SKan Liang 	unsigned int size;
64876d10256SKan Liang 
64976d10256SKan Liang 	if (!mask)
6508ad7e8f6SThomas Gleixner 		return get_compacted_size();
65176d10256SKan Liang 
65201707b66SAndy Lutomirski 	/* Disable independent features. */
65376d10256SKan Liang 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
65476d10256SKan Liang 
65576d10256SKan Liang 	/*
65676d10256SKan Liang 	 * Ask the hardware what size is required of the buffer.
65776d10256SKan Liang 	 * This is the size required for the task->fpu buffer.
65876d10256SKan Liang 	 */
6598ad7e8f6SThomas Gleixner 	size = get_compacted_size();
66076d10256SKan Liang 
66101707b66SAndy Lutomirski 	/* Re-enable independent features so XSAVES will work on them again. */
66276d10256SKan Liang 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
66376d10256SKan Liang 
66476d10256SKan Liang 	return size;
66576d10256SKan Liang }
66676d10256SKan Liang 
667cd9ae761SThomas Gleixner static unsigned int __init get_xsave_size_user(void)
668a1141e0bSFenghua Yu {
669a1141e0bSFenghua Yu 	unsigned int eax, ebx, ecx, edx;
670a1141e0bSFenghua Yu 	/*
671a1141e0bSFenghua Yu 	 * - CPUID function 0DH, sub-function 0:
672a1141e0bSFenghua Yu 	 *    EBX enumerates the size (in bytes) required by
673a1141e0bSFenghua Yu 	 *    the XSAVE instruction for an XSAVE area
674a1141e0bSFenghua Yu 	 *    containing all the *user* state components
675a1141e0bSFenghua Yu 	 *    corresponding to bits currently set in XCR0.
676a1141e0bSFenghua Yu 	 */
677a1141e0bSFenghua Yu 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
678a1141e0bSFenghua Yu 	return ebx;
6794109ca06SDave Hansen }
6804109ca06SDave Hansen 
681653a561bSSergey Senozhatsky static int __init init_xstate_size(void)
6824109ca06SDave Hansen {
6834109ca06SDave Hansen 	/* Recompute the context size for enabled features: */
6842ae996e0SChang S. Bae 	unsigned int user_size, kernel_size, kernel_default_size;
6858ad7e8f6SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
686a1141e0bSFenghua Yu 
687cd9ae761SThomas Gleixner 	/* Uncompacted user space size */
688cd9ae761SThomas Gleixner 	user_size = get_xsave_size_user();
689a1141e0bSFenghua Yu 
690cd9ae761SThomas Gleixner 	/*
6918ad7e8f6SThomas Gleixner 	 * XSAVES kernel size includes supervisor states and uses compacted
6928ad7e8f6SThomas Gleixner 	 * format. XSAVEC uses compacted format, but does not save
6938ad7e8f6SThomas Gleixner 	 * supervisor states.
694cd9ae761SThomas Gleixner 	 *
6958ad7e8f6SThomas Gleixner 	 * XSAVE[OPT] do not support supervisor states so kernel and user
6968ad7e8f6SThomas Gleixner 	 * size is identical.
697cd9ae761SThomas Gleixner 	 */
6982ae996e0SChang S. Bae 	if (compacted)
6998ad7e8f6SThomas Gleixner 		kernel_size = get_xsave_compacted_size();
700a1141e0bSFenghua Yu 	else
701cd9ae761SThomas Gleixner 		kernel_size = user_size;
7024109ca06SDave Hansen 
7032ae996e0SChang S. Bae 	kernel_default_size =
7042ae996e0SChang S. Bae 		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
7052ae996e0SChang S. Bae 
706cd9ae761SThomas Gleixner 	if (!paranoid_xstate_size_valid(kernel_size))
707cd9ae761SThomas Gleixner 		return -EINVAL;
708a1141e0bSFenghua Yu 
7092bd264bcSThomas Gleixner 	fpu_kernel_cfg.max_size = kernel_size;
7102bd264bcSThomas Gleixner 	fpu_user_cfg.max_size = user_size;
7112ae996e0SChang S. Bae 
7122ae996e0SChang S. Bae 	fpu_kernel_cfg.default_size = kernel_default_size;
7132ae996e0SChang S. Bae 	fpu_user_cfg.default_size =
7142ae996e0SChang S. Bae 		xstate_calculate_size(fpu_user_cfg.default_features, false);
715cd9ae761SThomas Gleixner 
7164109ca06SDave Hansen 	return 0;
7174109ca06SDave Hansen }
7184109ca06SDave Hansen 
719d91cab78SDave Hansen /*
720d91cab78SDave Hansen  * We enabled the XSAVE hardware, but something went wrong and
721d91cab78SDave Hansen  * we can not use it.  Disable it.
722d91cab78SDave Hansen  */
7232bd264bcSThomas Gleixner static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
7244109ca06SDave Hansen {
7251c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features = 0;
7264109ca06SDave Hansen 	cr4_clear_bits(X86_CR4_OSXSAVE);
7277891bc0aSSebastian Andrzej Siewior 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
7282bd264bcSThomas Gleixner 
7292bd264bcSThomas Gleixner 	/* Restore the legacy size.*/
7302bd264bcSThomas Gleixner 	fpu_kernel_cfg.max_size = legacy_size;
7312bd264bcSThomas Gleixner 	fpu_kernel_cfg.default_size = legacy_size;
7322bd264bcSThomas Gleixner 	fpu_user_cfg.max_size = legacy_size;
7332bd264bcSThomas Gleixner 	fpu_user_cfg.default_size = legacy_size;
7342bd264bcSThomas Gleixner 
735db3e7321SChang S. Bae 	/*
736db3e7321SChang S. Bae 	 * Prevent enabling the static branch which enables writes to the
737db3e7321SChang S. Bae 	 * XFD MSR.
738db3e7321SChang S. Bae 	 */
739db3e7321SChang S. Bae 	init_fpstate.xfd = 0;
740db3e7321SChang S. Bae 
741248452ceSThomas Gleixner 	fpstate_reset(&current->thread.fpu);
74262784854SIngo Molnar }
74362784854SIngo Molnar 
74462784854SIngo Molnar /*
74562784854SIngo Molnar  * Enable and initialize the xsave feature.
74662784854SIngo Molnar  * Called once per system bootup.
74762784854SIngo Molnar  */
7482bd264bcSThomas Gleixner void __init fpu__init_system_xstate(unsigned int legacy_size)
74962784854SIngo Molnar {
75062784854SIngo Molnar 	unsigned int eax, ebx, ecx, edx;
7514e8e4313SThomas Gleixner 	u64 xfeatures;
7524109ca06SDave Hansen 	int err;
753ccb18db2SAndi Kleen 	int i;
754e97131a8SIngo Molnar 
7559170fb40SAndy Lutomirski 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
7569170fb40SAndy Lutomirski 		pr_info("x86/fpu: No FPU detected\n");
7579170fb40SAndy Lutomirski 		return;
7589170fb40SAndy Lutomirski 	}
7599170fb40SAndy Lutomirski 
760d366bf7eSBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
7619170fb40SAndy Lutomirski 		pr_info("x86/fpu: x87 FPU will use %s\n",
7629170fb40SAndy Lutomirski 			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
76362784854SIngo Molnar 		return;
76462784854SIngo Molnar 	}
76562784854SIngo Molnar 
76662784854SIngo Molnar 	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
767e97131a8SIngo Molnar 		WARN_ON_FPU(1);
76862784854SIngo Molnar 		return;
76962784854SIngo Molnar 	}
77062784854SIngo Molnar 
771524bb73bSYu-cheng Yu 	/*
772524bb73bSYu-cheng Yu 	 * Find user xstates supported by the processor.
773524bb73bSYu-cheng Yu 	 */
77462784854SIngo Molnar 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
7751c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
77662784854SIngo Molnar 
77771581eefSYu-cheng Yu 	/*
77871581eefSYu-cheng Yu 	 * Find supervisor xstates supported by the processor.
77971581eefSYu-cheng Yu 	 */
78071581eefSYu-cheng Yu 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
7811c253ff2SThomas Gleixner 	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
78271581eefSYu-cheng Yu 
783daddee24SThomas Gleixner 	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
784ec3ed4a2SDave Hansen 		/*
785ec3ed4a2SDave Hansen 		 * This indicates that something really unexpected happened
786ec3ed4a2SDave Hansen 		 * with the enumeration.  Disable XSAVE and try to continue
787ec3ed4a2SDave Hansen 		 * booting without it.  This is too early to BUG().
788ec3ed4a2SDave Hansen 		 */
789524bb73bSYu-cheng Yu 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
7901c253ff2SThomas Gleixner 		       fpu_kernel_cfg.max_features);
791ec3ed4a2SDave Hansen 		goto out_disable;
79262784854SIngo Molnar 	}
79362784854SIngo Molnar 
794ccb18db2SAndi Kleen 	/*
795ccb18db2SAndi Kleen 	 * Clear XSAVE features that are disabled in the normal CPUID.
796ccb18db2SAndi Kleen 	 */
797ccb18db2SAndi Kleen 	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
79870c3f167SChang S. Bae 		unsigned short cid = xsave_cpuid_features[i];
79970c3f167SChang S. Bae 
80070c3f167SChang S. Bae 		/* Careful: X86_FEATURE_FPU is 0! */
80170c3f167SChang S. Bae 		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
8021c253ff2SThomas Gleixner 			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
803ccb18db2SAndi Kleen 	}
804ccb18db2SAndi Kleen 
8052ae996e0SChang S. Bae 	if (!cpu_feature_enabled(X86_FEATURE_XFD))
8062ae996e0SChang S. Bae 		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
8072ae996e0SChang S. Bae 
8088ad7e8f6SThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
8098ad7e8f6SThomas Gleixner 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
8108ad7e8f6SThomas Gleixner 	else
8111c253ff2SThomas Gleixner 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
812ce38f038SThomas Gleixner 					XFEATURE_MASK_SUPERVISOR_SUPPORTED;
813ce38f038SThomas Gleixner 
8141c253ff2SThomas Gleixner 	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
8151c253ff2SThomas Gleixner 	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
8161c253ff2SThomas Gleixner 
8172ae996e0SChang S. Bae 	/* Clean out dynamic features from default */
8181c253ff2SThomas Gleixner 	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
8192ae996e0SChang S. Bae 	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
8202ae996e0SChang S. Bae 
8211c253ff2SThomas Gleixner 	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
8222ae996e0SChang S. Bae 	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
8231c253ff2SThomas Gleixner 
8244e8e4313SThomas Gleixner 	/* Store it for paranoia check at the end */
8251c253ff2SThomas Gleixner 	xfeatures = fpu_kernel_cfg.max_features;
82662784854SIngo Molnar 
827db3e7321SChang S. Bae 	/*
828db3e7321SChang S. Bae 	 * Initialize the default XFD state in initfp_state and enable the
829db3e7321SChang S. Bae 	 * dynamic sizing mechanism if dynamic states are available.  The
830db3e7321SChang S. Bae 	 * static key cannot be enabled here because this runs before
831db3e7321SChang S. Bae 	 * jump_label_init(). This is delayed to an initcall.
832db3e7321SChang S. Bae 	 */
833db3e7321SChang S. Bae 	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
834db3e7321SChang S. Bae 
8358ad7e8f6SThomas Gleixner 	/* Set up compaction feature bit */
8368ad7e8f6SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
8378ad7e8f6SThomas Gleixner 	    cpu_feature_enabled(X86_FEATURE_XSAVES))
8388ad7e8f6SThomas Gleixner 		setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
8398ad7e8f6SThomas Gleixner 
84062784854SIngo Molnar 	/* Enable xstate instructions to be able to continue with initialization: */
84162784854SIngo Molnar 	fpu__init_cpu_xstate();
84235a77d45SThomas Gleixner 
84335a77d45SThomas Gleixner 	/* Cache size, offset and flags for initialization */
84435a77d45SThomas Gleixner 	setup_xstate_cache();
84535a77d45SThomas Gleixner 
8464109ca06SDave Hansen 	err = init_xstate_size();
847ec3ed4a2SDave Hansen 	if (err)
848ec3ed4a2SDave Hansen 		goto out_disable;
84962784854SIngo Molnar 
8502ae996e0SChang S. Bae 	/* Reset the state for the current task */
851248452ceSThomas Gleixner 	fpstate_reset(&current->thread.fpu);
852248452ceSThomas Gleixner 
85391c3dba7SYu-cheng Yu 	/*
85491c3dba7SYu-cheng Yu 	 * Update info used for ptrace frames; use standard-format size and no
85591c3dba7SYu-cheng Yu 	 * supervisor xstates:
85691c3dba7SYu-cheng Yu 	 */
8572bd264bcSThomas Gleixner 	update_regset_xstate_info(fpu_user_cfg.max_size,
858daddee24SThomas Gleixner 				  fpu_user_cfg.max_features);
85991c3dba7SYu-cheng Yu 
860a401f45eSChang S. Bae 	/*
861a401f45eSChang S. Bae 	 * init_fpstate excludes dynamic states as they are large but init
862a401f45eSChang S. Bae 	 * state is zero.
863a401f45eSChang S. Bae 	 */
864a401f45eSChang S. Bae 	init_fpstate.size		= fpu_kernel_cfg.default_size;
865a401f45eSChang S. Bae 	init_fpstate.xfeatures		= fpu_kernel_cfg.default_features;
866c32d7cabSChang S. Bae 
867d3e021adSChang S. Bae 	if (init_fpstate.size > sizeof(init_fpstate.regs)) {
868d3e021adSChang S. Bae 		pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
869d3e021adSChang S. Bae 			sizeof(init_fpstate.regs), init_fpstate.size);
870d3e021adSChang S. Bae 		goto out_disable;
871d3e021adSChang S. Bae 	}
872d3e021adSChang S. Bae 
87362784854SIngo Molnar 	setup_init_fpu_buf();
87462784854SIngo Molnar 
8754e8e4313SThomas Gleixner 	/*
8764e8e4313SThomas Gleixner 	 * Paranoia check whether something in the setup modified the
8774e8e4313SThomas Gleixner 	 * xfeatures mask.
8784e8e4313SThomas Gleixner 	 */
8791c253ff2SThomas Gleixner 	if (xfeatures != fpu_kernel_cfg.max_features) {
8804e8e4313SThomas Gleixner 		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
8811c253ff2SThomas Gleixner 		       xfeatures, fpu_kernel_cfg.max_features);
8824e8e4313SThomas Gleixner 		goto out_disable;
8834e8e4313SThomas Gleixner 	}
8844e8e4313SThomas Gleixner 
885*2c66ca39SFeng Tang 	/*
886*2c66ca39SFeng Tang 	 * CPU capabilities initialization runs before FPU init. So
887*2c66ca39SFeng Tang 	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
888*2c66ca39SFeng Tang 	 * functional, set the feature bit so depending code works.
889*2c66ca39SFeng Tang 	 */
890*2c66ca39SFeng Tang 	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
891*2c66ca39SFeng Tang 
8924e8e4313SThomas Gleixner 	print_xstate_offset_size();
893b0815359SDave Hansen 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
8941c253ff2SThomas Gleixner 		fpu_kernel_cfg.max_features,
8952bd264bcSThomas Gleixner 		fpu_kernel_cfg.max_size,
8968ad7e8f6SThomas Gleixner 		boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
897ec3ed4a2SDave Hansen 	return;
898ec3ed4a2SDave Hansen 
899ec3ed4a2SDave Hansen out_disable:
900ec3ed4a2SDave Hansen 	/* something went wrong, try to boot without any XSAVE support */
9012bd264bcSThomas Gleixner 	fpu__init_disable_system_xstate(legacy_size);
90262784854SIngo Molnar }
90362784854SIngo Molnar 
90462784854SIngo Molnar /*
90562784854SIngo Molnar  * Restore minimal FPU state after suspend:
90662784854SIngo Molnar  */
90762784854SIngo Molnar void fpu__resume_cpu(void)
90862784854SIngo Molnar {
90962784854SIngo Molnar 	/*
91062784854SIngo Molnar 	 * Restore XCR0 on xsave capable CPUs:
91162784854SIngo Molnar 	 */
91265e95210SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
913daddee24SThomas Gleixner 		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
91471581eefSYu-cheng Yu 
91571581eefSYu-cheng Yu 	/*
91671581eefSYu-cheng Yu 	 * Restore IA32_XSS. The same CPUID bit enumerates support
91771581eefSYu-cheng Yu 	 * of XSAVES and MSR_IA32_XSS.
91871581eefSYu-cheng Yu 	 */
91965e95210SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
920f0dccc9dSKan Liang 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
92101707b66SAndy Lutomirski 				     xfeatures_mask_independent());
922f0dccc9dSKan Liang 	}
92367236547SChang S. Bae 
92467236547SChang S. Bae 	if (fpu_state_size_dynamic())
92567236547SChang S. Bae 		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
92662784854SIngo Molnar }
92762784854SIngo Molnar 
92862784854SIngo Molnar /*
92907baeb04SSebastian Andrzej Siewior  * Given an xstate feature nr, calculate where in the xsave
930b8b9b6baSDave Hansen  * buffer the state is.  Callers should ensure that the buffer
931b8b9b6baSDave Hansen  * is valid.
932b8b9b6baSDave Hansen  */
93307baeb04SSebastian Andrzej Siewior static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
934b8b9b6baSDave Hansen {
9357aa5128bSThomas Gleixner 	u64 xcomp_bv = xsave->header.xcomp_bv;
9367aa5128bSThomas Gleixner 
9377aa5128bSThomas Gleixner 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
9387aa5128bSThomas Gleixner 		return NULL;
9397aa5128bSThomas Gleixner 
9408ad7e8f6SThomas Gleixner 	if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
9417aa5128bSThomas Gleixner 		if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
9425060b915SYu-cheng Yu 			return NULL;
9435060b915SYu-cheng Yu 	}
9445060b915SYu-cheng Yu 
9457aa5128bSThomas Gleixner 	return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
946b8b9b6baSDave Hansen }
9477aa5128bSThomas Gleixner 
948b8b9b6baSDave Hansen /*
94962784854SIngo Molnar  * Given the xsave area and a state inside, this function returns the
95062784854SIngo Molnar  * address of the state.
95162784854SIngo Molnar  *
95262784854SIngo Molnar  * This is the API that is called to get xstate address in either
95362784854SIngo Molnar  * standard format or compacted format of xsave area.
95462784854SIngo Molnar  *
9550c4109beSDave Hansen  * Note that if there is no data for the field in the xsave buffer
9560c4109beSDave Hansen  * this will return NULL.
9570c4109beSDave Hansen  *
95862784854SIngo Molnar  * Inputs:
9590c4109beSDave Hansen  *	xstate: the thread's storage area for all FPU data
960abd16d68SSebastian Andrzej Siewior  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
961abd16d68SSebastian Andrzej Siewior  *	XFEATURE_SSE, etc...)
96262784854SIngo Molnar  * Output:
9630c4109beSDave Hansen  *	address of the state in the xsave area, or NULL if the
9640c4109beSDave Hansen  *	field is not present in the xsave buffer.
96562784854SIngo Molnar  */
966abd16d68SSebastian Andrzej Siewior void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
96762784854SIngo Molnar {
9680c4109beSDave Hansen 	/*
9690c4109beSDave Hansen 	 * Do we even *have* xsave state?
9700c4109beSDave Hansen 	 */
9710c4109beSDave Hansen 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
97262784854SIngo Molnar 		return NULL;
97362784854SIngo Molnar 
9740c4109beSDave Hansen 	/*
9750c4109beSDave Hansen 	 * We should not ever be requesting features that we
976524bb73bSYu-cheng Yu 	 * have not enabled.
9770c4109beSDave Hansen 	 */
9787aa5128bSThomas Gleixner 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
9797aa5128bSThomas Gleixner 		return NULL;
9807aa5128bSThomas Gleixner 
9810c4109beSDave Hansen 	/*
9820c4109beSDave Hansen 	 * This assumes the last 'xsave*' instruction to
983abd16d68SSebastian Andrzej Siewior 	 * have requested that 'xfeature_nr' be saved.
9840c4109beSDave Hansen 	 * If it did not, we might be seeing and old value
9850c4109beSDave Hansen 	 * of the field in the buffer.
9860c4109beSDave Hansen 	 *
9870c4109beSDave Hansen 	 * This can happen because the last 'xsave' did not
9880c4109beSDave Hansen 	 * request that this feature be saved (unlikely)
9890c4109beSDave Hansen 	 * or because the "init optimization" caused it
9900c4109beSDave Hansen 	 * to not be saved.
9910c4109beSDave Hansen 	 */
992abd16d68SSebastian Andrzej Siewior 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
9930c4109beSDave Hansen 		return NULL;
9940c4109beSDave Hansen 
99507baeb04SSebastian Andrzej Siewior 	return __raw_xsave_addr(xsave, xfeature_nr);
99662784854SIngo Molnar }
99704cd027bSDave Hansen 
998e8c24d3aSDave Hansen #ifdef CONFIG_ARCH_HAS_PKEYS
999e8c24d3aSDave Hansen 
100084594296SDave Hansen /*
1001b79daf85SDave Hansen  * This will go out and modify PKRU register to set the access
1002b79daf85SDave Hansen  * rights for @pkey to @init_val.
100384594296SDave Hansen  */
100484594296SDave Hansen int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
100584594296SDave Hansen 			      unsigned long init_val)
100684594296SDave Hansen {
10079fe8a6f5SThomas Gleixner 	u32 old_pkru, new_pkru_bits = 0;
10089fe8a6f5SThomas Gleixner 	int pkey_shift;
100984594296SDave Hansen 
101084594296SDave Hansen 	/*
101184594296SDave Hansen 	 * This check implies XSAVE support.  OSPKE only gets
101284594296SDave Hansen 	 * set if we enable XSAVE and we enable PKU in XCR0.
101384594296SDave Hansen 	 */
10148a1dc55aSThomas Gleixner 	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
101584594296SDave Hansen 		return -EINVAL;
101684594296SDave Hansen 
101716171bffSDave Hansen 	/*
101816171bffSDave Hansen 	 * This code should only be called with valid 'pkey'
101916171bffSDave Hansen 	 * values originating from in-kernel users.  Complain
102016171bffSDave Hansen 	 * if a bad value is observed.
102116171bffSDave Hansen 	 */
10229fe8a6f5SThomas Gleixner 	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
10239fe8a6f5SThomas Gleixner 		return -EINVAL;
102416171bffSDave Hansen 
102591c3dba7SYu-cheng Yu 	/* Set the bits we need in PKRU:  */
102684594296SDave Hansen 	if (init_val & PKEY_DISABLE_ACCESS)
102784594296SDave Hansen 		new_pkru_bits |= PKRU_AD_BIT;
102884594296SDave Hansen 	if (init_val & PKEY_DISABLE_WRITE)
102984594296SDave Hansen 		new_pkru_bits |= PKRU_WD_BIT;
103084594296SDave Hansen 
103191c3dba7SYu-cheng Yu 	/* Shift the bits in to the correct place in PKRU for pkey: */
10329fe8a6f5SThomas Gleixner 	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
103384594296SDave Hansen 	new_pkru_bits <<= pkey_shift;
103484594296SDave Hansen 
1035b79daf85SDave Hansen 	/* Get old PKRU and mask off any old bits in place: */
1036b79daf85SDave Hansen 	old_pkru = read_pkru();
1037b79daf85SDave Hansen 	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
103884594296SDave Hansen 
1039b79daf85SDave Hansen 	/* Write old part along with new part: */
1040b79daf85SDave Hansen 	write_pkru(old_pkru | new_pkru_bits);
104191c3dba7SYu-cheng Yu 
104291c3dba7SYu-cheng Yu 	return 0;
104391c3dba7SYu-cheng Yu }
1044e8c24d3aSDave Hansen #endif /* ! CONFIG_ARCH_HAS_PKEYS */
104591c3dba7SYu-cheng Yu 
104696258950SThomas Gleixner static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
104796258950SThomas Gleixner 			 void *init_xstate, unsigned int size)
1048f0d4f30aSIngo Molnar {
104996258950SThomas Gleixner 	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1050f0d4f30aSIngo Molnar }
1051f0d4f30aSIngo Molnar 
1052eb6f5172SThomas Gleixner /**
1053ca834defSThomas Gleixner  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1054eb6f5172SThomas Gleixner  * @to:		membuf descriptor
10553ac8d757SThomas Gleixner  * @fpstate:	The fpstate buffer from which to copy
1056ca834defSThomas Gleixner  * @pkru_val:	The PKRU value to store in the PKRU component
1057eb6f5172SThomas Gleixner  * @copy_mode:	The requested copy mode
1058f0d4f30aSIngo Molnar  *
1059eb6f5172SThomas Gleixner  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1060eb6f5172SThomas Gleixner  * format, i.e. from the kernel internal hardware dependent storage format
1061eb6f5172SThomas Gleixner  * to the requested @mode. UABI XSTATE is always uncompacted!
1062eb6f5172SThomas Gleixner  *
1063eb6f5172SThomas Gleixner  * It supports partial copy but @to.pos always starts from zero.
1064f0d4f30aSIngo Molnar  */
10653ac8d757SThomas Gleixner void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1066ca834defSThomas Gleixner 			       u32 pkru_val, enum xstate_copy_mode copy_mode)
1067f0d4f30aSIngo Molnar {
106896258950SThomas Gleixner 	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1069f83ac56aSThomas Gleixner 	struct xregs_state *xinit = &init_fpstate.regs.xsave;
10703ac8d757SThomas Gleixner 	struct xregs_state *xsave = &fpstate->regs.xsave;
1071f0d4f30aSIngo Molnar 	struct xstate_header header;
107296258950SThomas Gleixner 	unsigned int zerofrom;
1073ffd3e504SThomas Gleixner 	u64 mask;
10748c0817f4SIngo Molnar 	int i;
1075f0d4f30aSIngo Molnar 
107693c2cdc9SThomas Gleixner 	memset(&header, 0, sizeof(header));
1077f0d4f30aSIngo Molnar 	header.xfeatures = xsave->header.xfeatures;
1078eb6f5172SThomas Gleixner 
1079eb6f5172SThomas Gleixner 	/* Mask out the feature bits depending on copy mode */
1080eb6f5172SThomas Gleixner 	switch (copy_mode) {
1081eb6f5172SThomas Gleixner 	case XSTATE_COPY_FP:
1082eb6f5172SThomas Gleixner 		header.xfeatures &= XFEATURE_MASK_FP;
1083eb6f5172SThomas Gleixner 		break;
1084eb6f5172SThomas Gleixner 
1085eb6f5172SThomas Gleixner 	case XSTATE_COPY_FX:
1086eb6f5172SThomas Gleixner 		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1087eb6f5172SThomas Gleixner 		break;
1088eb6f5172SThomas Gleixner 
1089eb6f5172SThomas Gleixner 	case XSTATE_COPY_XSAVE:
10903ac8d757SThomas Gleixner 		header.xfeatures &= fpstate->user_xfeatures;
1091eb6f5172SThomas Gleixner 		break;
1092eb6f5172SThomas Gleixner 	}
1093f0d4f30aSIngo Molnar 
109496258950SThomas Gleixner 	/* Copy FP state up to MXCSR */
109596258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
109696258950SThomas Gleixner 		     &xinit->i387, off_mxcsr);
109796258950SThomas Gleixner 
109896258950SThomas Gleixner 	/* Copy MXCSR when SSE or YMM are set in the feature mask */
109996258950SThomas Gleixner 	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
110096258950SThomas Gleixner 		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
110196258950SThomas Gleixner 		     MXCSR_AND_FLAGS_SIZE);
110296258950SThomas Gleixner 
110396258950SThomas Gleixner 	/* Copy the remaining FP state */
110496258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
110596258950SThomas Gleixner 		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
110696258950SThomas Gleixner 		     sizeof(xsave->i387.st_space));
110796258950SThomas Gleixner 
110896258950SThomas Gleixner 	/* Copy the SSE state - shared with YMM, but independently managed */
110996258950SThomas Gleixner 	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
111096258950SThomas Gleixner 		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
111196258950SThomas Gleixner 		     sizeof(xsave->i387.xmm_space));
111296258950SThomas Gleixner 
1113eb6f5172SThomas Gleixner 	if (copy_mode != XSTATE_COPY_XSAVE)
1114eb6f5172SThomas Gleixner 		goto out;
1115eb6f5172SThomas Gleixner 
111696258950SThomas Gleixner 	/* Zero the padding area */
111796258950SThomas Gleixner 	membuf_zero(&to, sizeof(xsave->i387.padding));
111896258950SThomas Gleixner 
111996258950SThomas Gleixner 	/* Copy xsave->i387.sw_reserved */
112096258950SThomas Gleixner 	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
112196258950SThomas Gleixner 
112296258950SThomas Gleixner 	/* Copy the user space relevant state of @xsave->header */
112396258950SThomas Gleixner 	membuf_write(&to, &header, sizeof(header));
112496258950SThomas Gleixner 
112596258950SThomas Gleixner 	zerofrom = offsetof(struct xregs_state, extended_state_area);
1126f0d4f30aSIngo Molnar 
1127f0d4f30aSIngo Molnar 	/*
1128b1588884SChang S. Bae 	 * This 'mask' indicates which states to copy from fpstate.
1129b1588884SChang S. Bae 	 * Those extended states that are not present in fpstate are
1130b1588884SChang S. Bae 	 * either disabled or initialized:
1131b1588884SChang S. Bae 	 *
1132b1588884SChang S. Bae 	 * In non-compacted format, disabled features still occupy
1133b1588884SChang S. Bae 	 * state space but there is no state to copy from in the
1134b1588884SChang S. Bae 	 * compacted init_fpstate. The gap tracking will zero these
1135b1588884SChang S. Bae 	 * states.
1136b1588884SChang S. Bae 	 *
1137b1588884SChang S. Bae 	 * The extended features have an all zeroes init state. Thus,
1138b1588884SChang S. Bae 	 * remove them from 'mask' to zero those features in the user
1139b1588884SChang S. Bae 	 * buffer instead of retrieving them from init_fpstate.
1140f0d4f30aSIngo Molnar 	 */
1141b1588884SChang S. Bae 	mask = header.xfeatures;
1142471f0aa7SChang S. Bae 
1143ffd3e504SThomas Gleixner 	for_each_extended_xfeature(i, mask) {
114496258950SThomas Gleixner 		/*
114596258950SThomas Gleixner 		 * If there was a feature or alignment gap, zero the space
114696258950SThomas Gleixner 		 * in the destination buffer.
114796258950SThomas Gleixner 		 */
114896258950SThomas Gleixner 		if (zerofrom < xstate_offsets[i])
114996258950SThomas Gleixner 			membuf_zero(&to, xstate_offsets[i] - zerofrom);
115096258950SThomas Gleixner 
1151e84ba47eSDave Hansen 		if (i == XFEATURE_PKRU) {
1152e84ba47eSDave Hansen 			struct pkru_state pkru = {0};
1153e84ba47eSDave Hansen 			/*
1154e84ba47eSDave Hansen 			 * PKRU is not necessarily up to date in the
1155ca834defSThomas Gleixner 			 * XSAVE buffer. Use the provided value.
1156e84ba47eSDave Hansen 			 */
1157ca834defSThomas Gleixner 			pkru.pkru = pkru_val;
1158e84ba47eSDave Hansen 			membuf_write(&to, &pkru, sizeof(pkru));
1159e84ba47eSDave Hansen 		} else {
1160b1588884SChang S. Bae 			membuf_write(&to,
116196258950SThomas Gleixner 				     __raw_xsave_addr(xsave, i),
116296258950SThomas Gleixner 				     xstate_sizes[i]);
1163e84ba47eSDave Hansen 		}
116496258950SThomas Gleixner 		/*
116596258950SThomas Gleixner 		 * Keep track of the last copied state in the non-compacted
116696258950SThomas Gleixner 		 * target buffer for gap zeroing.
116796258950SThomas Gleixner 		 */
116896258950SThomas Gleixner 		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1169f0d4f30aSIngo Molnar 	}
1170f0d4f30aSIngo Molnar 
1171eb6f5172SThomas Gleixner out:
117296258950SThomas Gleixner 	if (to.left)
117396258950SThomas Gleixner 		membuf_zero(&to, to.left);
117491c3dba7SYu-cheng Yu }
117591c3dba7SYu-cheng Yu 
1176ca834defSThomas Gleixner /**
1177ca834defSThomas Gleixner  * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1178ca834defSThomas Gleixner  * @to:		membuf descriptor
1179ca834defSThomas Gleixner  * @tsk:	The task from which to copy the saved xstate
1180ca834defSThomas Gleixner  * @copy_mode:	The requested copy mode
1181ca834defSThomas Gleixner  *
1182ca834defSThomas Gleixner  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1183ca834defSThomas Gleixner  * format, i.e. from the kernel internal hardware dependent storage format
1184ca834defSThomas Gleixner  * to the requested @mode. UABI XSTATE is always uncompacted!
1185ca834defSThomas Gleixner  *
1186ca834defSThomas Gleixner  * It supports partial copy but @to.pos always starts from zero.
1187ca834defSThomas Gleixner  */
1188ca834defSThomas Gleixner void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1189ca834defSThomas Gleixner 			     enum xstate_copy_mode copy_mode)
1190ca834defSThomas Gleixner {
11913ac8d757SThomas Gleixner 	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1192ca834defSThomas Gleixner 				  tsk->thread.pkru, copy_mode);
1193ca834defSThomas Gleixner }
1194ca834defSThomas Gleixner 
1195522e9274SThomas Gleixner static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1196522e9274SThomas Gleixner 			    const void *kbuf, const void __user *ubuf)
1197947f4947SThomas Gleixner {
1198522e9274SThomas Gleixner 	if (kbuf) {
1199522e9274SThomas Gleixner 		memcpy(dst, kbuf + offset, size);
1200522e9274SThomas Gleixner 	} else {
1201522e9274SThomas Gleixner 		if (copy_from_user(dst, ubuf + offset, size))
1202522e9274SThomas Gleixner 			return -EFAULT;
1203947f4947SThomas Gleixner 	}
1204522e9274SThomas Gleixner 	return 0;
1205947f4947SThomas Gleixner }
1206947f4947SThomas Gleixner 
1207522e9274SThomas Gleixner 
12082c87767cSKyle Huey /**
12092c87767cSKyle Huey  * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
12102c87767cSKyle Huey  * @fpstate:	The fpstate buffer to copy to
12112c87767cSKyle Huey  * @kbuf:	The UABI format buffer, if it comes from the kernel
12122c87767cSKyle Huey  * @ubuf:	The UABI format buffer, if it comes from userspace
12134a804c4fSKyle Huey  * @pkru:	The location to write the PKRU value to
12142c87767cSKyle Huey  *
12152c87767cSKyle Huey  * Converts from the UABI format into the kernel internal hardware
12162c87767cSKyle Huey  * dependent format.
12174a804c4fSKyle Huey  *
12184a804c4fSKyle Huey  * This function ultimately has three different callers with distinct PKRU
12194a804c4fSKyle Huey  * behavior.
12204a804c4fSKyle Huey  * 1.	When called from sigreturn the PKRU register will be restored from
12214a804c4fSKyle Huey  *	@fpstate via an XRSTOR. Correctly copying the UABI format buffer to
12224a804c4fSKyle Huey  *	@fpstate is sufficient to cover this case, but the caller will also
12234a804c4fSKyle Huey  *	pass a pointer to the thread_struct's pkru field in @pkru and updating
12244a804c4fSKyle Huey  *	it is harmless.
12254a804c4fSKyle Huey  * 2.	When called from ptrace the PKRU register will be restored from the
12264a804c4fSKyle Huey  *	thread_struct's pkru field. A pointer to that is passed in @pkru.
1227d7e5aceaSKyle Huey  *	The kernel will restore it manually, so the XRSTOR behavior that resets
1228d7e5aceaSKyle Huey  *	the PKRU register to the hardware init value (0) if the corresponding
1229d7e5aceaSKyle Huey  *	xfeatures bit is not set is emulated here.
12304a804c4fSKyle Huey  * 3.	When called from KVM the PKRU register will be restored from the vcpu's
1231d7e5aceaSKyle Huey  *	pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1232d7e5aceaSKyle Huey  *	XRSTOR and hasn't had the PKRU resetting behavior described above. To
1233d7e5aceaSKyle Huey  *	preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1234d7e5aceaSKyle Huey  *	bit is not set.
12352c87767cSKyle Huey  */
123649e4eb41SThomas Gleixner static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
12372c87767cSKyle Huey 			       const void __user *ubuf, u32 *pkru)
123879fecc2bSIngo Molnar {
123949e4eb41SThomas Gleixner 	struct xregs_state *xsave = &fpstate->regs.xsave;
124079fecc2bSIngo Molnar 	unsigned int offset, size;
124180d8ae86SEric Biggers 	struct xstate_header hdr;
1242522e9274SThomas Gleixner 	u64 mask;
1243522e9274SThomas Gleixner 	int i;
124479fecc2bSIngo Molnar 
124579fecc2bSIngo Molnar 	offset = offsetof(struct xregs_state, header);
1246522e9274SThomas Gleixner 	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1247522e9274SThomas Gleixner 		return -EFAULT;
124879fecc2bSIngo Molnar 
124949e4eb41SThomas Gleixner 	if (validate_user_xstate_header(&hdr, fpstate))
125079fecc2bSIngo Molnar 		return -EINVAL;
125179fecc2bSIngo Molnar 
1252522e9274SThomas Gleixner 	/* Validate MXCSR when any of the related features is in use */
1253522e9274SThomas Gleixner 	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1254522e9274SThomas Gleixner 	if (hdr.xfeatures & mask) {
1255522e9274SThomas Gleixner 		u32 mxcsr[2];
1256522e9274SThomas Gleixner 
1257522e9274SThomas Gleixner 		offset = offsetof(struct fxregs_state, mxcsr);
1258522e9274SThomas Gleixner 		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1259522e9274SThomas Gleixner 			return -EFAULT;
1260522e9274SThomas Gleixner 
1261522e9274SThomas Gleixner 		/* Reserved bits in MXCSR must be zero. */
1262522e9274SThomas Gleixner 		if (mxcsr[0] & ~mxcsr_feature_mask)
1263947f4947SThomas Gleixner 			return -EINVAL;
1264947f4947SThomas Gleixner 
1265522e9274SThomas Gleixner 		/* SSE and YMM require MXCSR even when FP is not in use. */
1266522e9274SThomas Gleixner 		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1267522e9274SThomas Gleixner 			xsave->i387.mxcsr = mxcsr[0];
1268522e9274SThomas Gleixner 			xsave->i387.mxcsr_mask = mxcsr[1];
1269522e9274SThomas Gleixner 		}
1270522e9274SThomas Gleixner 	}
1271522e9274SThomas Gleixner 
127279fecc2bSIngo Molnar 	for (i = 0; i < XFEATURE_MAX; i++) {
1273b91c0922SThomas Gleixner 		mask = BIT_ULL(i);
127479fecc2bSIngo Molnar 
1275b89eda48SEric Biggers 		if (hdr.xfeatures & mask) {
127607baeb04SSebastian Andrzej Siewior 			void *dst = __raw_xsave_addr(xsave, i);
127779fecc2bSIngo Molnar 
127879fecc2bSIngo Molnar 			offset = xstate_offsets[i];
127979fecc2bSIngo Molnar 			size = xstate_sizes[i];
128079fecc2bSIngo Molnar 
1281522e9274SThomas Gleixner 			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1282522e9274SThomas Gleixner 				return -EFAULT;
128379fecc2bSIngo Molnar 		}
128479fecc2bSIngo Molnar 	}
128579fecc2bSIngo Molnar 
12864a804c4fSKyle Huey 	if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
12874a804c4fSKyle Huey 		struct pkru_state *xpkru;
12884a804c4fSKyle Huey 
12894a804c4fSKyle Huey 		xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
12904a804c4fSKyle Huey 		*pkru = xpkru->pkru;
1291d7e5aceaSKyle Huey 	} else {
1292d7e5aceaSKyle Huey 		/*
1293d7e5aceaSKyle Huey 		 * KVM may pass NULL here to indicate that it does not need
1294d7e5aceaSKyle Huey 		 * PKRU updated.
1295d7e5aceaSKyle Huey 		 */
1296d7e5aceaSKyle Huey 		if (pkru)
1297d7e5aceaSKyle Huey 			*pkru = 0;
12984a804c4fSKyle Huey 	}
12994a804c4fSKyle Huey 
130079fecc2bSIngo Molnar 	/*
130179fecc2bSIngo Molnar 	 * The state that came in from userspace was user-state only.
130279fecc2bSIngo Molnar 	 * Mask all the user states out of 'xfeatures':
130379fecc2bSIngo Molnar 	 */
13048ab22804SFenghua Yu 	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
130579fecc2bSIngo Molnar 
130679fecc2bSIngo Molnar 	/*
130779fecc2bSIngo Molnar 	 * Add back in the features that came in from userspace:
130879fecc2bSIngo Molnar 	 */
1309b89eda48SEric Biggers 	xsave->header.xfeatures |= hdr.xfeatures;
131079fecc2bSIngo Molnar 
131179fecc2bSIngo Molnar 	return 0;
131279fecc2bSIngo Molnar }
131379fecc2bSIngo Molnar 
131479fecc2bSIngo Molnar /*
1315522e9274SThomas Gleixner  * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1316ea4d6938SThomas Gleixner  * format and copy to the target thread. Used by ptrace and KVM.
1317522e9274SThomas Gleixner  */
13181c813ce0SKyle Huey int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1319522e9274SThomas Gleixner {
13202c87767cSKyle Huey 	return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1321522e9274SThomas Gleixner }
1322522e9274SThomas Gleixner 
1323522e9274SThomas Gleixner /*
132443be46e8SThomas Gleixner  * Convert from a sigreturn standard-format user-space buffer to kernel
132543be46e8SThomas Gleixner  * XSAVE[S] format and copy to the target thread. This is called from the
132643be46e8SThomas Gleixner  * sigreturn() and rt_sigreturn() system calls.
132791c3dba7SYu-cheng Yu  */
13286a877d24SKyle Huey int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
13291cc34413SThomas Gleixner 				      const void __user *ubuf)
133091c3dba7SYu-cheng Yu {
13312c87767cSKyle Huey 	return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
133284594296SDave Hansen }
13330c608dadSAubrey Li 
1334f5daf836SThomas Gleixner static bool validate_independent_components(u64 mask)
133550f408d9SKan Liang {
1336a75c5289SThomas Gleixner 	u64 xchk;
133750f408d9SKan Liang 
1338a75c5289SThomas Gleixner 	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1339a75c5289SThomas Gleixner 		return false;
1340f5daf836SThomas Gleixner 
1341a75c5289SThomas Gleixner 	xchk = ~xfeatures_mask_independent();
134250f408d9SKan Liang 
1343a75c5289SThomas Gleixner 	if (WARN_ON_ONCE(!mask || mask & xchk))
1344a75c5289SThomas Gleixner 		return false;
134550f408d9SKan Liang 
1346a75c5289SThomas Gleixner 	return true;
134750f408d9SKan Liang }
134850f408d9SKan Liang 
134950f408d9SKan Liang /**
1350a75c5289SThomas Gleixner  * xsaves - Save selected components to a kernel xstate buffer
1351a75c5289SThomas Gleixner  * @xstate:	Pointer to the buffer
1352a75c5289SThomas Gleixner  * @mask:	Feature mask to select the components to save
135350f408d9SKan Liang  *
1354a75c5289SThomas Gleixner  * The @xstate buffer must be 64 byte aligned and correctly initialized as
1355a75c5289SThomas Gleixner  * XSAVES does not write the full xstate header. Before first use the
1356a75c5289SThomas Gleixner  * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1357a75c5289SThomas Gleixner  * can #GP.
135850f408d9SKan Liang  *
1359f5daf836SThomas Gleixner  * The feature mask must be a subset of the independent features.
136050f408d9SKan Liang  */
1361a75c5289SThomas Gleixner void xsaves(struct xregs_state *xstate, u64 mask)
136250f408d9SKan Liang {
136350f408d9SKan Liang 	int err;
136450f408d9SKan Liang 
1365f5daf836SThomas Gleixner 	if (!validate_independent_components(mask))
136650f408d9SKan Liang 		return;
136750f408d9SKan Liang 
1368a75c5289SThomas Gleixner 	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1369a75c5289SThomas Gleixner 	WARN_ON_ONCE(err);
1370a75c5289SThomas Gleixner }
1371a75c5289SThomas Gleixner 
1372a75c5289SThomas Gleixner /**
1373a75c5289SThomas Gleixner  * xrstors - Restore selected components from a kernel xstate buffer
1374a75c5289SThomas Gleixner  * @xstate:	Pointer to the buffer
1375a75c5289SThomas Gleixner  * @mask:	Feature mask to select the components to restore
1376a75c5289SThomas Gleixner  *
1377a75c5289SThomas Gleixner  * The @xstate buffer must be 64 byte aligned and correctly initialized
1378a75c5289SThomas Gleixner  * otherwise XRSTORS from that buffer can #GP.
1379a75c5289SThomas Gleixner  *
1380a75c5289SThomas Gleixner  * Proper usage is to restore the state which was saved with
1381a75c5289SThomas Gleixner  * xsaves() into @xstate.
1382a75c5289SThomas Gleixner  *
1383f5daf836SThomas Gleixner  * The feature mask must be a subset of the independent features.
1384a75c5289SThomas Gleixner  */
1385a75c5289SThomas Gleixner void xrstors(struct xregs_state *xstate, u64 mask)
1386a75c5289SThomas Gleixner {
1387a75c5289SThomas Gleixner 	int err;
1388a75c5289SThomas Gleixner 
1389f5daf836SThomas Gleixner 	if (!validate_independent_components(mask))
139050f408d9SKan Liang 		return;
139150f408d9SKan Liang 
1392a75c5289SThomas Gleixner 	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1393a75c5289SThomas Gleixner 	WARN_ON_ONCE(err);
139450f408d9SKan Liang }
139550f408d9SKan Liang 
1396087df48cSThomas Gleixner #if IS_ENABLED(CONFIG_KVM)
1397087df48cSThomas Gleixner void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1398087df48cSThomas Gleixner {
1399087df48cSThomas Gleixner 	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1400087df48cSThomas Gleixner 
1401087df48cSThomas Gleixner 	if (addr)
1402087df48cSThomas Gleixner 		memset(addr, 0, xstate_sizes[xfeature]);
1403087df48cSThomas Gleixner }
1404087df48cSThomas Gleixner EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1405087df48cSThomas Gleixner #endif
1406087df48cSThomas Gleixner 
1407db8268dfSChang S. Bae #ifdef CONFIG_X86_64
14085529acf4SThomas Gleixner 
14095529acf4SThomas Gleixner #ifdef CONFIG_X86_DEBUG_FPU
14105529acf4SThomas Gleixner /*
14115529acf4SThomas Gleixner  * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
14125529acf4SThomas Gleixner  * can safely operate on the @fpstate buffer.
14135529acf4SThomas Gleixner  */
14145529acf4SThomas Gleixner static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
14155529acf4SThomas Gleixner {
14165529acf4SThomas Gleixner 	u64 xfd = __this_cpu_read(xfd_state);
14175529acf4SThomas Gleixner 
14185529acf4SThomas Gleixner 	if (fpstate->xfd == xfd)
14195529acf4SThomas Gleixner 		return true;
14205529acf4SThomas Gleixner 
14215529acf4SThomas Gleixner 	 /*
14225529acf4SThomas Gleixner 	  * The XFD MSR does not match fpstate->xfd. That's invalid when
14235529acf4SThomas Gleixner 	  * the passed in fpstate is current's fpstate.
14245529acf4SThomas Gleixner 	  */
14255529acf4SThomas Gleixner 	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
14265529acf4SThomas Gleixner 		return false;
14275529acf4SThomas Gleixner 
14285529acf4SThomas Gleixner 	/*
14295529acf4SThomas Gleixner 	 * XRSTOR(S) from init_fpstate are always correct as it will just
14305529acf4SThomas Gleixner 	 * bring all components into init state and not read from the
14315529acf4SThomas Gleixner 	 * buffer. XSAVE(S) raises #PF after init.
14325529acf4SThomas Gleixner 	 */
14335529acf4SThomas Gleixner 	if (fpstate == &init_fpstate)
14345529acf4SThomas Gleixner 		return rstor;
14355529acf4SThomas Gleixner 
14365529acf4SThomas Gleixner 	/*
14375529acf4SThomas Gleixner 	 * XSAVE(S): clone(), fpu_swap_kvm_fpu()
14385529acf4SThomas Gleixner 	 * XRSTORS(S): fpu_swap_kvm_fpu()
14395529acf4SThomas Gleixner 	 */
14405529acf4SThomas Gleixner 
14415529acf4SThomas Gleixner 	/*
14425529acf4SThomas Gleixner 	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
14435529acf4SThomas Gleixner 	 * the buffer area for XFD-disabled state components.
14445529acf4SThomas Gleixner 	 */
14455529acf4SThomas Gleixner 	mask &= ~xfd;
14465529acf4SThomas Gleixner 
14475529acf4SThomas Gleixner 	/*
14485529acf4SThomas Gleixner 	 * Remove features which are valid in fpstate. They
14495529acf4SThomas Gleixner 	 * have space allocated in fpstate.
14505529acf4SThomas Gleixner 	 */
14515529acf4SThomas Gleixner 	mask &= ~fpstate->xfeatures;
14525529acf4SThomas Gleixner 
14535529acf4SThomas Gleixner 	/*
14545529acf4SThomas Gleixner 	 * Any remaining state components in 'mask' might be written
14555529acf4SThomas Gleixner 	 * by XSAVE/XRSTOR. Fail validation it found.
14565529acf4SThomas Gleixner 	 */
14575529acf4SThomas Gleixner 	return !mask;
14585529acf4SThomas Gleixner }
14595529acf4SThomas Gleixner 
14605529acf4SThomas Gleixner void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
14615529acf4SThomas Gleixner {
14625529acf4SThomas Gleixner 	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
14635529acf4SThomas Gleixner }
14645529acf4SThomas Gleixner #endif /* CONFIG_X86_DEBUG_FPU */
14655529acf4SThomas Gleixner 
1466db3e7321SChang S. Bae static int __init xfd_update_static_branch(void)
1467db3e7321SChang S. Bae {
1468db3e7321SChang S. Bae 	/*
1469db3e7321SChang S. Bae 	 * If init_fpstate.xfd has bits set then dynamic features are
1470db3e7321SChang S. Bae 	 * available and the dynamic sizing must be enabled.
1471db3e7321SChang S. Bae 	 */
1472db3e7321SChang S. Bae 	if (init_fpstate.xfd)
1473db3e7321SChang S. Bae 		static_branch_enable(&__fpu_state_size_dynamic);
1474db3e7321SChang S. Bae 	return 0;
1475db3e7321SChang S. Bae }
1476db3e7321SChang S. Bae arch_initcall(xfd_update_static_branch)
1477db3e7321SChang S. Bae 
1478500afbf6SChang S. Bae void fpstate_free(struct fpu *fpu)
1479500afbf6SChang S. Bae {
1480db3e7321SChang S. Bae 	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1481500afbf6SChang S. Bae 		vfree(fpu->fpstate);
1482500afbf6SChang S. Bae }
1483500afbf6SChang S. Bae 
1484500afbf6SChang S. Bae /**
1485500afbf6SChang S. Bae  * fpstate_realloc - Reallocate struct fpstate for the requested new features
1486500afbf6SChang S. Bae  *
1487500afbf6SChang S. Bae  * @xfeatures:	A bitmap of xstate features which extend the enabled features
1488500afbf6SChang S. Bae  *		of that task
1489500afbf6SChang S. Bae  * @ksize:	The required size for the kernel buffer
1490500afbf6SChang S. Bae  * @usize:	The required size for user space buffers
1491c270ce39SThomas Gleixner  * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1492500afbf6SChang S. Bae  *
1493500afbf6SChang S. Bae  * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1494500afbf6SChang S. Bae  * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1495500afbf6SChang S. Bae  * with large states are likely to live longer.
1496500afbf6SChang S. Bae  *
1497500afbf6SChang S. Bae  * Returns: 0 on success, -ENOMEM on allocation error.
1498500afbf6SChang S. Bae  */
1499500afbf6SChang S. Bae static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1500c270ce39SThomas Gleixner 			   unsigned int usize, struct fpu_guest *guest_fpu)
1501500afbf6SChang S. Bae {
1502500afbf6SChang S. Bae 	struct fpu *fpu = &current->thread.fpu;
1503500afbf6SChang S. Bae 	struct fpstate *curfps, *newfps = NULL;
1504500afbf6SChang S. Bae 	unsigned int fpsize;
1505c270ce39SThomas Gleixner 	bool in_use;
1506500afbf6SChang S. Bae 
1507500afbf6SChang S. Bae 	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1508500afbf6SChang S. Bae 
1509500afbf6SChang S. Bae 	newfps = vzalloc(fpsize);
1510500afbf6SChang S. Bae 	if (!newfps)
1511500afbf6SChang S. Bae 		return -ENOMEM;
1512500afbf6SChang S. Bae 	newfps->size = ksize;
1513500afbf6SChang S. Bae 	newfps->user_size = usize;
1514500afbf6SChang S. Bae 	newfps->is_valloc = true;
1515500afbf6SChang S. Bae 
1516c270ce39SThomas Gleixner 	/*
1517c270ce39SThomas Gleixner 	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1518c270ce39SThomas Gleixner 	 * as reference independent whether it is in use or not.
1519c270ce39SThomas Gleixner 	 */
1520c270ce39SThomas Gleixner 	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1521c270ce39SThomas Gleixner 
1522c270ce39SThomas Gleixner 	/* Determine whether @curfps is the active fpstate */
1523c270ce39SThomas Gleixner 	in_use = fpu->fpstate == curfps;
1524c270ce39SThomas Gleixner 
1525c270ce39SThomas Gleixner 	if (guest_fpu) {
1526c270ce39SThomas Gleixner 		newfps->is_guest = true;
1527c270ce39SThomas Gleixner 		newfps->is_confidential = curfps->is_confidential;
1528c270ce39SThomas Gleixner 		newfps->in_use = curfps->in_use;
1529c270ce39SThomas Gleixner 		guest_fpu->xfeatures |= xfeatures;
1530c60427ddSThomas Gleixner 		guest_fpu->uabi_size = usize;
1531c270ce39SThomas Gleixner 	}
1532c270ce39SThomas Gleixner 
1533500afbf6SChang S. Bae 	fpregs_lock();
1534500afbf6SChang S. Bae 	/*
1535c270ce39SThomas Gleixner 	 * If @curfps is in use, ensure that the current state is in the
1536c270ce39SThomas Gleixner 	 * registers before swapping fpstate as that might invalidate it
1537c270ce39SThomas Gleixner 	 * due to layout changes.
1538500afbf6SChang S. Bae 	 */
1539c270ce39SThomas Gleixner 	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1540500afbf6SChang S. Bae 		fpregs_restore_userregs();
1541500afbf6SChang S. Bae 
1542500afbf6SChang S. Bae 	newfps->xfeatures = curfps->xfeatures | xfeatures;
1543ad856280SLeonardo Bras 
1544ad856280SLeonardo Bras 	if (!guest_fpu)
1545500afbf6SChang S. Bae 		newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1546ad856280SLeonardo Bras 
1547500afbf6SChang S. Bae 	newfps->xfd = curfps->xfd & ~xfeatures;
1548500afbf6SChang S. Bae 
1549500afbf6SChang S. Bae 	/* Do the final updates within the locked region */
1550500afbf6SChang S. Bae 	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1551500afbf6SChang S. Bae 
1552c270ce39SThomas Gleixner 	if (guest_fpu) {
1553c270ce39SThomas Gleixner 		guest_fpu->fpstate = newfps;
1554c270ce39SThomas Gleixner 		/* If curfps is active, update the FPU fpstate pointer */
1555c270ce39SThomas Gleixner 		if (in_use)
1556c270ce39SThomas Gleixner 			fpu->fpstate = newfps;
1557c270ce39SThomas Gleixner 	} else {
1558c270ce39SThomas Gleixner 		fpu->fpstate = newfps;
1559c270ce39SThomas Gleixner 	}
1560c270ce39SThomas Gleixner 
1561c270ce39SThomas Gleixner 	if (in_use)
1562c270ce39SThomas Gleixner 		xfd_update_state(fpu->fpstate);
1563500afbf6SChang S. Bae 	fpregs_unlock();
1564500afbf6SChang S. Bae 
1565c270ce39SThomas Gleixner 	/* Only free valloc'ed state */
1566c270ce39SThomas Gleixner 	if (curfps && curfps->is_valloc)
1567500afbf6SChang S. Bae 		vfree(curfps);
1568c270ce39SThomas Gleixner 
1569500afbf6SChang S. Bae 	return 0;
1570500afbf6SChang S. Bae }
1571500afbf6SChang S. Bae 
1572db8268dfSChang S. Bae static int validate_sigaltstack(unsigned int usize)
1573db8268dfSChang S. Bae {
1574db8268dfSChang S. Bae 	struct task_struct *thread, *leader = current->group_leader;
1575db8268dfSChang S. Bae 	unsigned long framesize = get_sigframe_size();
1576db8268dfSChang S. Bae 
1577db8268dfSChang S. Bae 	lockdep_assert_held(&current->sighand->siglock);
1578db8268dfSChang S. Bae 
1579db8268dfSChang S. Bae 	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1580db8268dfSChang S. Bae 	framesize -= fpu_user_cfg.max_size;
1581db8268dfSChang S. Bae 	framesize += usize;
1582db8268dfSChang S. Bae 	for_each_thread(leader, thread) {
1583db8268dfSChang S. Bae 		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1584db8268dfSChang S. Bae 			return -ENOSPC;
1585db8268dfSChang S. Bae 	}
1586db8268dfSChang S. Bae 	return 0;
1587db8268dfSChang S. Bae }
1588db8268dfSChang S. Bae 
1589980fe2fdSThomas Gleixner static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1590db8268dfSChang S. Bae {
1591db8268dfSChang S. Bae 	/*
1592db8268dfSChang S. Bae 	 * This deliberately does not exclude !XSAVES as we still might
1593db8268dfSChang S. Bae 	 * decide to optionally context switch XCR0 or talk the silicon
1594500afbf6SChang S. Bae 	 * vendors into extending XFD for the pre AMX states, especially
1595500afbf6SChang S. Bae 	 * AVX512.
1596db8268dfSChang S. Bae 	 */
15978ad7e8f6SThomas Gleixner 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1598db8268dfSChang S. Bae 	struct fpu *fpu = &current->group_leader->thread.fpu;
1599980fe2fdSThomas Gleixner 	struct fpu_state_perm *perm;
1600db8268dfSChang S. Bae 	unsigned int ksize, usize;
1601db8268dfSChang S. Bae 	u64 mask;
1602980fe2fdSThomas Gleixner 	int ret = 0;
1603db8268dfSChang S. Bae 
1604db8268dfSChang S. Bae 	/* Check whether fully enabled */
1605db8268dfSChang S. Bae 	if ((permitted & requested) == requested)
1606db8268dfSChang S. Bae 		return 0;
1607db8268dfSChang S. Bae 
1608db8268dfSChang S. Bae 	/* Calculate the resulting kernel state size */
1609db8268dfSChang S. Bae 	mask = permitted | requested;
1610781c64bfSThomas Gleixner 	/* Take supervisor states into account on the host */
1611781c64bfSThomas Gleixner 	if (!guest)
1612781c64bfSThomas Gleixner 		mask |= xfeatures_mask_supervisor();
1613db8268dfSChang S. Bae 	ksize = xstate_calculate_size(mask, compacted);
1614db8268dfSChang S. Bae 
1615db8268dfSChang S. Bae 	/* Calculate the resulting user state size */
1616db8268dfSChang S. Bae 	mask &= XFEATURE_MASK_USER_SUPPORTED;
1617db8268dfSChang S. Bae 	usize = xstate_calculate_size(mask, false);
1618db8268dfSChang S. Bae 
1619980fe2fdSThomas Gleixner 	if (!guest) {
1620db8268dfSChang S. Bae 		ret = validate_sigaltstack(usize);
1621db8268dfSChang S. Bae 		if (ret)
1622db8268dfSChang S. Bae 			return ret;
1623980fe2fdSThomas Gleixner 	}
1624db8268dfSChang S. Bae 
1625980fe2fdSThomas Gleixner 	perm = guest ? &fpu->guest_perm : &fpu->perm;
1626db8268dfSChang S. Bae 	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1627063452fdSYang Zhong 	WRITE_ONCE(perm->__state_perm, mask);
1628db8268dfSChang S. Bae 	/* Protected by sighand lock */
1629980fe2fdSThomas Gleixner 	perm->__state_size = ksize;
1630980fe2fdSThomas Gleixner 	perm->__user_state_size = usize;
1631db8268dfSChang S. Bae 	return ret;
1632db8268dfSChang S. Bae }
1633db8268dfSChang S. Bae 
1634db8268dfSChang S. Bae /*
1635db8268dfSChang S. Bae  * Permissions array to map facilities with more than one component
1636db8268dfSChang S. Bae  */
1637db8268dfSChang S. Bae static const u64 xstate_prctl_req[XFEATURE_MAX] = {
16382308ee57SChang S. Bae 	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1639db8268dfSChang S. Bae };
1640db8268dfSChang S. Bae 
1641980fe2fdSThomas Gleixner static int xstate_request_perm(unsigned long idx, bool guest)
1642db8268dfSChang S. Bae {
1643db8268dfSChang S. Bae 	u64 permitted, requested;
1644db8268dfSChang S. Bae 	int ret;
1645db8268dfSChang S. Bae 
1646db8268dfSChang S. Bae 	if (idx >= XFEATURE_MAX)
1647db8268dfSChang S. Bae 		return -EINVAL;
1648db8268dfSChang S. Bae 
1649db8268dfSChang S. Bae 	/*
1650db8268dfSChang S. Bae 	 * Look up the facility mask which can require more than
1651db8268dfSChang S. Bae 	 * one xstate component.
1652db8268dfSChang S. Bae 	 */
1653db8268dfSChang S. Bae 	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1654db8268dfSChang S. Bae 	requested = xstate_prctl_req[idx];
1655db8268dfSChang S. Bae 	if (!requested)
1656db8268dfSChang S. Bae 		return -EOPNOTSUPP;
1657db8268dfSChang S. Bae 
1658db8268dfSChang S. Bae 	if ((fpu_user_cfg.max_features & requested) != requested)
1659db8268dfSChang S. Bae 		return -EOPNOTSUPP;
1660db8268dfSChang S. Bae 
1661db8268dfSChang S. Bae 	/* Lockless quick check */
1662980fe2fdSThomas Gleixner 	permitted = xstate_get_group_perm(guest);
1663db8268dfSChang S. Bae 	if ((permitted & requested) == requested)
1664db8268dfSChang S. Bae 		return 0;
1665db8268dfSChang S. Bae 
1666db8268dfSChang S. Bae 	/* Protect against concurrent modifications */
1667db8268dfSChang S. Bae 	spin_lock_irq(&current->sighand->siglock);
1668980fe2fdSThomas Gleixner 	permitted = xstate_get_group_perm(guest);
1669980fe2fdSThomas Gleixner 
1670980fe2fdSThomas Gleixner 	/* First vCPU allocation locks the permissions. */
1671980fe2fdSThomas Gleixner 	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1672980fe2fdSThomas Gleixner 		ret = -EBUSY;
1673980fe2fdSThomas Gleixner 	else
1674980fe2fdSThomas Gleixner 		ret = __xstate_request_perm(permitted, requested, guest);
1675db8268dfSChang S. Bae 	spin_unlock_irq(&current->sighand->siglock);
1676db8268dfSChang S. Bae 	return ret;
1677db8268dfSChang S. Bae }
1678783e87b4SChang S. Bae 
1679c270ce39SThomas Gleixner int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1680783e87b4SChang S. Bae {
1681783e87b4SChang S. Bae 	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1682c270ce39SThomas Gleixner 	struct fpu_state_perm *perm;
1683783e87b4SChang S. Bae 	unsigned int ksize, usize;
1684783e87b4SChang S. Bae 	struct fpu *fpu;
1685783e87b4SChang S. Bae 
1686783e87b4SChang S. Bae 	if (!xfd_event) {
1687c270ce39SThomas Gleixner 		if (!guest_fpu)
1688783e87b4SChang S. Bae 			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1689783e87b4SChang S. Bae 		return 0;
1690783e87b4SChang S. Bae 	}
1691783e87b4SChang S. Bae 
1692783e87b4SChang S. Bae 	/* Protect against concurrent modifications */
1693783e87b4SChang S. Bae 	spin_lock_irq(&current->sighand->siglock);
1694783e87b4SChang S. Bae 
1695783e87b4SChang S. Bae 	/* If not permitted let it die */
1696c270ce39SThomas Gleixner 	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1697783e87b4SChang S. Bae 		spin_unlock_irq(&current->sighand->siglock);
1698783e87b4SChang S. Bae 		return -EPERM;
1699783e87b4SChang S. Bae 	}
1700783e87b4SChang S. Bae 
1701783e87b4SChang S. Bae 	fpu = &current->group_leader->thread.fpu;
1702c270ce39SThomas Gleixner 	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1703c270ce39SThomas Gleixner 	ksize = perm->__state_size;
1704c270ce39SThomas Gleixner 	usize = perm->__user_state_size;
1705c270ce39SThomas Gleixner 
1706783e87b4SChang S. Bae 	/*
1707783e87b4SChang S. Bae 	 * The feature is permitted. State size is sufficient.  Dropping
1708783e87b4SChang S. Bae 	 * the lock is safe here even if more features are added from
1709783e87b4SChang S. Bae 	 * another task, the retrieved buffer sizes are valid for the
1710783e87b4SChang S. Bae 	 * currently requested feature(s).
1711783e87b4SChang S. Bae 	 */
1712783e87b4SChang S. Bae 	spin_unlock_irq(&current->sighand->siglock);
1713783e87b4SChang S. Bae 
1714783e87b4SChang S. Bae 	/*
1715783e87b4SChang S. Bae 	 * Try to allocate a new fpstate. If that fails there is no way
1716783e87b4SChang S. Bae 	 * out.
1717783e87b4SChang S. Bae 	 */
1718c270ce39SThomas Gleixner 	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1719783e87b4SChang S. Bae 		return -EFAULT;
1720783e87b4SChang S. Bae 	return 0;
1721783e87b4SChang S. Bae }
1722c270ce39SThomas Gleixner 
1723c270ce39SThomas Gleixner int xfd_enable_feature(u64 xfd_err)
1724c270ce39SThomas Gleixner {
1725c270ce39SThomas Gleixner 	return __xfd_enable_feature(xfd_err, NULL);
1726c270ce39SThomas Gleixner }
1727c270ce39SThomas Gleixner 
1728db8268dfSChang S. Bae #else /* CONFIG_X86_64 */
1729980fe2fdSThomas Gleixner static inline int xstate_request_perm(unsigned long idx, bool guest)
1730db8268dfSChang S. Bae {
1731db8268dfSChang S. Bae 	return -EPERM;
1732db8268dfSChang S. Bae }
1733db8268dfSChang S. Bae #endif  /* !CONFIG_X86_64 */
1734db8268dfSChang S. Bae 
1735c862dcd1SYang Zhong u64 xstate_get_guest_group_perm(void)
1736980fe2fdSThomas Gleixner {
1737980fe2fdSThomas Gleixner 	return xstate_get_group_perm(true);
1738980fe2fdSThomas Gleixner }
1739980fe2fdSThomas Gleixner EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1740980fe2fdSThomas Gleixner 
1741db8268dfSChang S. Bae /**
1742db8268dfSChang S. Bae  * fpu_xstate_prctl - xstate permission operations
1743db8268dfSChang S. Bae  * @tsk:	Redundant pointer to current
1744db8268dfSChang S. Bae  * @option:	A subfunction of arch_prctl()
1745db8268dfSChang S. Bae  * @arg2:	option argument
1746db8268dfSChang S. Bae  * Return:	0 if successful; otherwise, an error code
1747db8268dfSChang S. Bae  *
1748db8268dfSChang S. Bae  * Option arguments:
1749db8268dfSChang S. Bae  *
1750db8268dfSChang S. Bae  * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1751db8268dfSChang S. Bae  * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1752db8268dfSChang S. Bae  * ARCH_REQ_XCOMP_PERM: Facility number requested
1753db8268dfSChang S. Bae  *
1754db8268dfSChang S. Bae  * For facilities which require more than one XSTATE component, the request
1755db8268dfSChang S. Bae  * must be the highest state component number related to that facility,
1756db8268dfSChang S. Bae  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1757db8268dfSChang S. Bae  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1758db8268dfSChang S. Bae  */
1759f5c0b4f3SThomas Gleixner long fpu_xstate_prctl(int option, unsigned long arg2)
1760db8268dfSChang S. Bae {
1761db8268dfSChang S. Bae 	u64 __user *uptr = (u64 __user *)arg2;
1762db8268dfSChang S. Bae 	u64 permitted, supported;
1763db8268dfSChang S. Bae 	unsigned long idx = arg2;
1764980fe2fdSThomas Gleixner 	bool guest = false;
1765db8268dfSChang S. Bae 
1766db8268dfSChang S. Bae 	switch (option) {
1767db8268dfSChang S. Bae 	case ARCH_GET_XCOMP_SUPP:
1768db8268dfSChang S. Bae 		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1769db8268dfSChang S. Bae 		return put_user(supported, uptr);
1770db8268dfSChang S. Bae 
1771db8268dfSChang S. Bae 	case ARCH_GET_XCOMP_PERM:
1772db8268dfSChang S. Bae 		/*
1773db8268dfSChang S. Bae 		 * Lockless snapshot as it can also change right after the
1774db8268dfSChang S. Bae 		 * dropping the lock.
1775db8268dfSChang S. Bae 		 */
1776db8268dfSChang S. Bae 		permitted = xstate_get_host_group_perm();
1777db8268dfSChang S. Bae 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1778db8268dfSChang S. Bae 		return put_user(permitted, uptr);
1779db8268dfSChang S. Bae 
1780980fe2fdSThomas Gleixner 	case ARCH_GET_XCOMP_GUEST_PERM:
1781980fe2fdSThomas Gleixner 		permitted = xstate_get_guest_group_perm();
1782980fe2fdSThomas Gleixner 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1783980fe2fdSThomas Gleixner 		return put_user(permitted, uptr);
1784980fe2fdSThomas Gleixner 
1785980fe2fdSThomas Gleixner 	case ARCH_REQ_XCOMP_GUEST_PERM:
1786980fe2fdSThomas Gleixner 		guest = true;
1787980fe2fdSThomas Gleixner 		fallthrough;
1788980fe2fdSThomas Gleixner 
1789db8268dfSChang S. Bae 	case ARCH_REQ_XCOMP_PERM:
1790db8268dfSChang S. Bae 		if (!IS_ENABLED(CONFIG_X86_64))
1791db8268dfSChang S. Bae 			return -EOPNOTSUPP;
1792db8268dfSChang S. Bae 
1793980fe2fdSThomas Gleixner 		return xstate_request_perm(idx, guest);
1794db8268dfSChang S. Bae 
1795db8268dfSChang S. Bae 	default:
1796db8268dfSChang S. Bae 		return -EINVAL;
1797db8268dfSChang S. Bae 	}
1798db8268dfSChang S. Bae }
1799db8268dfSChang S. Bae 
18000c608dadSAubrey Li #ifdef CONFIG_PROC_PID_ARCH_STATUS
18010c608dadSAubrey Li /*
18020c608dadSAubrey Li  * Report the amount of time elapsed in millisecond since last AVX512
18030c608dadSAubrey Li  * use in the task.
18040c608dadSAubrey Li  */
18050c608dadSAubrey Li static void avx512_status(struct seq_file *m, struct task_struct *task)
18060c608dadSAubrey Li {
18070c608dadSAubrey Li 	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
18080c608dadSAubrey Li 	long delta;
18090c608dadSAubrey Li 
18100c608dadSAubrey Li 	if (!timestamp) {
18110c608dadSAubrey Li 		/*
18120c608dadSAubrey Li 		 * Report -1 if no AVX512 usage
18130c608dadSAubrey Li 		 */
18140c608dadSAubrey Li 		delta = -1;
18150c608dadSAubrey Li 	} else {
18160c608dadSAubrey Li 		delta = (long)(jiffies - timestamp);
18170c608dadSAubrey Li 		/*
18180c608dadSAubrey Li 		 * Cap to LONG_MAX if time difference > LONG_MAX
18190c608dadSAubrey Li 		 */
18200c608dadSAubrey Li 		if (delta < 0)
18210c608dadSAubrey Li 			delta = LONG_MAX;
18220c608dadSAubrey Li 		delta = jiffies_to_msecs(delta);
18230c608dadSAubrey Li 	}
18240c608dadSAubrey Li 
18250c608dadSAubrey Li 	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
18260c608dadSAubrey Li 	seq_putc(m, '\n');
18270c608dadSAubrey Li }
18280c608dadSAubrey Li 
18290c608dadSAubrey Li /*
18300c608dadSAubrey Li  * Report architecture specific information
18310c608dadSAubrey Li  */
18320c608dadSAubrey Li int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
18330c608dadSAubrey Li 			struct pid *pid, struct task_struct *task)
18340c608dadSAubrey Li {
18350c608dadSAubrey Li 	/*
18360c608dadSAubrey Li 	 * Report AVX512 state if the processor and build option supported.
18370c608dadSAubrey Li 	 */
18380c608dadSAubrey Li 	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
18390c608dadSAubrey Li 		avx512_status(m, task);
18400c608dadSAubrey Li 
18410c608dadSAubrey Li 	return 0;
18420c608dadSAubrey Li }
18430c608dadSAubrey Li #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1844