xref: /openbmc/linux/arch/x86/kernel/fpu/xstate.c (revision 1acbca93)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * xsave/xrstor support.
4  *
5  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
6  */
7 #include <linux/bitops.h>
8 #include <linux/compat.h>
9 #include <linux/cpu.h>
10 #include <linux/mman.h>
11 #include <linux/nospec.h>
12 #include <linux/pkeys.h>
13 #include <linux/seq_file.h>
14 #include <linux/proc_fs.h>
15 #include <linux/vmalloc.h>
16 
17 #include <asm/fpu/api.h>
18 #include <asm/fpu/regset.h>
19 #include <asm/fpu/signal.h>
20 #include <asm/fpu/xcr.h>
21 
22 #include <asm/tlbflush.h>
23 #include <asm/prctl.h>
24 #include <asm/elf.h>
25 
26 #include "context.h"
27 #include "internal.h"
28 #include "legacy.h"
29 #include "xstate.h"
30 
31 #define for_each_extended_xfeature(bit, mask)				\
32 	(bit) = FIRST_EXTENDED_XFEATURE;				\
33 	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
34 
35 /*
36  * Although we spell it out in here, the Processor Trace
37  * xfeature is completely unused.  We use other mechanisms
38  * to save/restore PT state in Linux.
39  */
40 static const char *xfeature_names[] =
41 {
42 	"x87 floating point registers",
43 	"SSE registers",
44 	"AVX registers",
45 	"MPX bounds registers",
46 	"MPX CSR",
47 	"AVX-512 opmask",
48 	"AVX-512 Hi256",
49 	"AVX-512 ZMM_Hi256",
50 	"Processor Trace (unused)",
51 	"Protection Keys User registers",
52 	"PASID state",
53 	"Control-flow User registers",
54 	"Control-flow Kernel registers (unused)",
55 	"unknown xstate feature",
56 	"unknown xstate feature",
57 	"unknown xstate feature",
58 	"unknown xstate feature",
59 	"AMX Tile config",
60 	"AMX Tile data",
61 	"unknown xstate feature",
62 };
63 
64 static unsigned short xsave_cpuid_features[] __initdata = {
65 	[XFEATURE_FP]				= X86_FEATURE_FPU,
66 	[XFEATURE_SSE]				= X86_FEATURE_XMM,
67 	[XFEATURE_YMM]				= X86_FEATURE_AVX,
68 	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
69 	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
70 	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
71 	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
72 	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
73 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
74 	[XFEATURE_PKRU]				= X86_FEATURE_OSPKE,
75 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
76 	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
77 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
78 	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
79 };
80 
81 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
82 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
83 static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
84 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
85 static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
86 
87 #define XSTATE_FLAG_SUPERVISOR	BIT(0)
88 #define XSTATE_FLAG_ALIGNED64	BIT(1)
89 
90 /*
91  * Return whether the system supports a given xfeature.
92  *
93  * Also return the name of the (most advanced) feature that the caller requested:
94  */
cpu_has_xfeatures(u64 xfeatures_needed,const char ** feature_name)95 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
96 {
97 	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
98 
99 	if (unlikely(feature_name)) {
100 		long xfeature_idx, max_idx;
101 		u64 xfeatures_print;
102 		/*
103 		 * So we use FLS here to be able to print the most advanced
104 		 * feature that was requested but is missing. So if a driver
105 		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
106 		 * missing AVX feature - this is the most informative message
107 		 * to users:
108 		 */
109 		if (xfeatures_missing)
110 			xfeatures_print = xfeatures_missing;
111 		else
112 			xfeatures_print = xfeatures_needed;
113 
114 		xfeature_idx = fls64(xfeatures_print)-1;
115 		max_idx = ARRAY_SIZE(xfeature_names)-1;
116 		xfeature_idx = min(xfeature_idx, max_idx);
117 
118 		*feature_name = xfeature_names[xfeature_idx];
119 	}
120 
121 	if (xfeatures_missing)
122 		return 0;
123 
124 	return 1;
125 }
126 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
127 
xfeature_is_aligned64(int xfeature_nr)128 static bool xfeature_is_aligned64(int xfeature_nr)
129 {
130 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
131 }
132 
xfeature_is_supervisor(int xfeature_nr)133 static bool xfeature_is_supervisor(int xfeature_nr)
134 {
135 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
136 }
137 
xfeature_get_offset(u64 xcomp_bv,int xfeature)138 static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
139 {
140 	unsigned int offs, i;
141 
142 	/*
143 	 * Non-compacted format and legacy features use the cached fixed
144 	 * offsets.
145 	 */
146 	if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
147 	    xfeature <= XFEATURE_SSE)
148 		return xstate_offsets[xfeature];
149 
150 	/*
151 	 * Compacted format offsets depend on the actual content of the
152 	 * compacted xsave area which is determined by the xcomp_bv header
153 	 * field.
154 	 */
155 	offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
156 	for_each_extended_xfeature(i, xcomp_bv) {
157 		if (xfeature_is_aligned64(i))
158 			offs = ALIGN(offs, 64);
159 		if (i == xfeature)
160 			break;
161 		offs += xstate_sizes[i];
162 	}
163 	return offs;
164 }
165 
166 /*
167  * Enable the extended processor state save/restore feature.
168  * Called once per CPU onlining.
169  */
fpu__init_cpu_xstate(void)170 void fpu__init_cpu_xstate(void)
171 {
172 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
173 		return;
174 
175 	cr4_set_bits(X86_CR4_OSXSAVE);
176 
177 	/*
178 	 * Must happen after CR4 setup and before xsetbv() to allow KVM
179 	 * lazy passthrough.  Write independent of the dynamic state static
180 	 * key as that does not work on the boot CPU. This also ensures
181 	 * that any stale state is wiped out from XFD. Reset the per CPU
182 	 * xfd cache too.
183 	 */
184 	if (cpu_feature_enabled(X86_FEATURE_XFD))
185 		xfd_set_state(init_fpstate.xfd);
186 
187 	/*
188 	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
189 	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
190 	 * states can be set here.
191 	 */
192 	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
193 
194 	/*
195 	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
196 	 */
197 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
198 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
199 				     xfeatures_mask_independent());
200 	}
201 }
202 
xfeature_enabled(enum xfeature xfeature)203 static bool xfeature_enabled(enum xfeature xfeature)
204 {
205 	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
206 }
207 
208 /*
209  * Record the offsets and sizes of various xstates contained
210  * in the XSAVE state memory layout.
211  */
setup_xstate_cache(void)212 static void __init setup_xstate_cache(void)
213 {
214 	u32 eax, ebx, ecx, edx, i;
215 	/* start at the beginning of the "extended state" */
216 	unsigned int last_good_offset = offsetof(struct xregs_state,
217 						 extended_state_area);
218 	/*
219 	 * The FP xstates and SSE xstates are legacy states. They are always
220 	 * in the fixed offsets in the xsave area in either compacted form
221 	 * or standard form.
222 	 */
223 	xstate_offsets[XFEATURE_FP]	= 0;
224 	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
225 						   xmm_space);
226 
227 	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
228 	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
229 						       xmm_space);
230 
231 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
232 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
233 
234 		xstate_sizes[i] = eax;
235 		xstate_flags[i] = ecx;
236 
237 		/*
238 		 * If an xfeature is supervisor state, the offset in EBX is
239 		 * invalid, leave it to -1.
240 		 */
241 		if (xfeature_is_supervisor(i))
242 			continue;
243 
244 		xstate_offsets[i] = ebx;
245 
246 		/*
247 		 * In our xstate size checks, we assume that the highest-numbered
248 		 * xstate feature has the highest offset in the buffer.  Ensure
249 		 * it does.
250 		 */
251 		WARN_ONCE(last_good_offset > xstate_offsets[i],
252 			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
253 
254 		last_good_offset = xstate_offsets[i];
255 	}
256 }
257 
print_xstate_feature(u64 xstate_mask)258 static void __init print_xstate_feature(u64 xstate_mask)
259 {
260 	const char *feature_name;
261 
262 	if (cpu_has_xfeatures(xstate_mask, &feature_name))
263 		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
264 }
265 
266 /*
267  * Print out all the supported xstate features:
268  */
print_xstate_features(void)269 static void __init print_xstate_features(void)
270 {
271 	print_xstate_feature(XFEATURE_MASK_FP);
272 	print_xstate_feature(XFEATURE_MASK_SSE);
273 	print_xstate_feature(XFEATURE_MASK_YMM);
274 	print_xstate_feature(XFEATURE_MASK_BNDREGS);
275 	print_xstate_feature(XFEATURE_MASK_BNDCSR);
276 	print_xstate_feature(XFEATURE_MASK_OPMASK);
277 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
278 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
279 	print_xstate_feature(XFEATURE_MASK_PKRU);
280 	print_xstate_feature(XFEATURE_MASK_PASID);
281 	print_xstate_feature(XFEATURE_MASK_CET_USER);
282 	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
283 	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
284 }
285 
286 /*
287  * This check is important because it is easy to get XSTATE_*
288  * confused with XSTATE_BIT_*.
289  */
290 #define CHECK_XFEATURE(nr) do {		\
291 	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
292 	WARN_ON(nr >= XFEATURE_MAX);	\
293 } while (0)
294 
295 /*
296  * Print out xstate component offsets and sizes
297  */
print_xstate_offset_size(void)298 static void __init print_xstate_offset_size(void)
299 {
300 	int i;
301 
302 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
303 		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
304 			i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
305 			i, xstate_sizes[i]);
306 	}
307 }
308 
309 /*
310  * This function is called only during boot time when x86 caps are not set
311  * up and alternative can not be used yet.
312  */
os_xrstor_booting(struct xregs_state * xstate)313 static __init void os_xrstor_booting(struct xregs_state *xstate)
314 {
315 	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
316 	u32 lmask = mask;
317 	u32 hmask = mask >> 32;
318 	int err;
319 
320 	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
321 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
322 	else
323 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
324 
325 	/*
326 	 * We should never fault when copying from a kernel buffer, and the FPU
327 	 * state we set at boot time should be valid.
328 	 */
329 	WARN_ON_FPU(err);
330 }
331 
332 /*
333  * All supported features have either init state all zeros or are
334  * handled in setup_init_fpu() individually. This is an explicit
335  * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
336  * newly added supported features at build time and make people
337  * actually look at the init state for the new feature.
338  */
339 #define XFEATURES_INIT_FPSTATE_HANDLED		\
340 	(XFEATURE_MASK_FP |			\
341 	 XFEATURE_MASK_SSE |			\
342 	 XFEATURE_MASK_YMM |			\
343 	 XFEATURE_MASK_OPMASK |			\
344 	 XFEATURE_MASK_ZMM_Hi256 |		\
345 	 XFEATURE_MASK_Hi16_ZMM	 |		\
346 	 XFEATURE_MASK_PKRU |			\
347 	 XFEATURE_MASK_BNDREGS |		\
348 	 XFEATURE_MASK_BNDCSR |			\
349 	 XFEATURE_MASK_PASID |			\
350 	 XFEATURE_MASK_CET_USER |		\
351 	 XFEATURE_MASK_XTILE)
352 
353 /*
354  * setup the xstate image representing the init state
355  */
setup_init_fpu_buf(void)356 static void __init setup_init_fpu_buf(void)
357 {
358 	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
359 		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
360 		     XFEATURES_INIT_FPSTATE_HANDLED);
361 
362 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
363 		return;
364 
365 	print_xstate_features();
366 
367 	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
368 
369 	/*
370 	 * Init all the features state with header.xfeatures being 0x0
371 	 */
372 	os_xrstor_booting(&init_fpstate.regs.xsave);
373 
374 	/*
375 	 * All components are now in init state. Read the state back so
376 	 * that init_fpstate contains all non-zero init state. This only
377 	 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
378 	 * those use the init optimization which skips writing data for
379 	 * components in init state.
380 	 *
381 	 * XSAVE could be used, but that would require to reshuffle the
382 	 * data when XSAVEC/S is available because XSAVEC/S uses xstate
383 	 * compaction. But doing so is a pointless exercise because most
384 	 * components have an all zeros init state except for the legacy
385 	 * ones (FP and SSE). Those can be saved with FXSAVE into the
386 	 * legacy area. Adding new features requires to ensure that init
387 	 * state is all zeroes or if not to add the necessary handling
388 	 * here.
389 	 */
390 	fxsave(&init_fpstate.regs.fxsave);
391 }
392 
xfeature_size(int xfeature_nr)393 int xfeature_size(int xfeature_nr)
394 {
395 	u32 eax, ebx, ecx, edx;
396 
397 	CHECK_XFEATURE(xfeature_nr);
398 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
399 	return eax;
400 }
401 
402 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
validate_user_xstate_header(const struct xstate_header * hdr,struct fpstate * fpstate)403 static int validate_user_xstate_header(const struct xstate_header *hdr,
404 				       struct fpstate *fpstate)
405 {
406 	/* No unknown or supervisor features may be set */
407 	if (hdr->xfeatures & ~fpstate->user_xfeatures)
408 		return -EINVAL;
409 
410 	/* Userspace must use the uncompacted format */
411 	if (hdr->xcomp_bv)
412 		return -EINVAL;
413 
414 	/*
415 	 * If 'reserved' is shrunken to add a new field, make sure to validate
416 	 * that new field here!
417 	 */
418 	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
419 
420 	/* No reserved bits may be set */
421 	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
422 		return -EINVAL;
423 
424 	return 0;
425 }
426 
__xstate_dump_leaves(void)427 static void __init __xstate_dump_leaves(void)
428 {
429 	int i;
430 	u32 eax, ebx, ecx, edx;
431 	static int should_dump = 1;
432 
433 	if (!should_dump)
434 		return;
435 	should_dump = 0;
436 	/*
437 	 * Dump out a few leaves past the ones that we support
438 	 * just in case there are some goodies up there
439 	 */
440 	for (i = 0; i < XFEATURE_MAX + 10; i++) {
441 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
442 		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
443 			XSTATE_CPUID, i, eax, ebx, ecx, edx);
444 	}
445 }
446 
447 #define XSTATE_WARN_ON(x, fmt, ...) do {					\
448 	if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {	\
449 		__xstate_dump_leaves();						\
450 	}									\
451 } while (0)
452 
453 #define XCHECK_SZ(sz, nr, __struct) ({					\
454 	if (WARN_ONCE(sz != sizeof(__struct),				\
455 	    "[%s]: struct is %zu bytes, cpu state %d bytes\n",		\
456 	    xfeature_names[nr], sizeof(__struct), sz)) {		\
457 		__xstate_dump_leaves();					\
458 	}								\
459 	true;								\
460 })
461 
462 
463 /**
464  * check_xtile_data_against_struct - Check tile data state size.
465  *
466  * Calculate the state size by multiplying the single tile size which is
467  * recorded in a C struct, and the number of tiles that the CPU informs.
468  * Compare the provided size with the calculation.
469  *
470  * @size:	The tile data state size
471  *
472  * Returns:	0 on success, -EINVAL on mismatch.
473  */
check_xtile_data_against_struct(int size)474 static int __init check_xtile_data_against_struct(int size)
475 {
476 	u32 max_palid, palid, state_size;
477 	u32 eax, ebx, ecx, edx;
478 	u16 max_tile;
479 
480 	/*
481 	 * Check the maximum palette id:
482 	 *   eax: the highest numbered palette subleaf.
483 	 */
484 	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
485 
486 	/*
487 	 * Cross-check each tile size and find the maximum number of
488 	 * supported tiles.
489 	 */
490 	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
491 		u16 tile_size, max;
492 
493 		/*
494 		 * Check the tile size info:
495 		 *   eax[31:16]:  bytes per title
496 		 *   ebx[31:16]:  the max names (or max number of tiles)
497 		 */
498 		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
499 		tile_size = eax >> 16;
500 		max = ebx >> 16;
501 
502 		if (tile_size != sizeof(struct xtile_data)) {
503 			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
504 			       __stringify(XFEATURE_XTILE_DATA),
505 			       sizeof(struct xtile_data), tile_size);
506 			__xstate_dump_leaves();
507 			return -EINVAL;
508 		}
509 
510 		if (max > max_tile)
511 			max_tile = max;
512 	}
513 
514 	state_size = sizeof(struct xtile_data) * max_tile;
515 	if (size != state_size) {
516 		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
517 		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
518 		__xstate_dump_leaves();
519 		return -EINVAL;
520 	}
521 	return 0;
522 }
523 
524 /*
525  * We have a C struct for each 'xstate'.  We need to ensure
526  * that our software representation matches what the CPU
527  * tells us about the state's size.
528  */
check_xstate_against_struct(int nr)529 static bool __init check_xstate_against_struct(int nr)
530 {
531 	/*
532 	 * Ask the CPU for the size of the state.
533 	 */
534 	int sz = xfeature_size(nr);
535 
536 	/*
537 	 * Match each CPU state with the corresponding software
538 	 * structure.
539 	 */
540 	switch (nr) {
541 	case XFEATURE_YMM:	  return XCHECK_SZ(sz, nr, struct ymmh_struct);
542 	case XFEATURE_BNDREGS:	  return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
543 	case XFEATURE_BNDCSR:	  return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
544 	case XFEATURE_OPMASK:	  return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
545 	case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
546 	case XFEATURE_Hi16_ZMM:	  return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
547 	case XFEATURE_PKRU:	  return XCHECK_SZ(sz, nr, struct pkru_state);
548 	case XFEATURE_PASID:	  return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
549 	case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
550 	case XFEATURE_CET_USER:	  return XCHECK_SZ(sz, nr, struct cet_user_state);
551 	case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
552 	default:
553 		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
554 		return false;
555 	}
556 
557 	return true;
558 }
559 
xstate_calculate_size(u64 xfeatures,bool compacted)560 static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
561 {
562 	unsigned int topmost = fls64(xfeatures) -  1;
563 	unsigned int offset = xstate_offsets[topmost];
564 
565 	if (topmost <= XFEATURE_SSE)
566 		return sizeof(struct xregs_state);
567 
568 	if (compacted)
569 		offset = xfeature_get_offset(xfeatures, topmost);
570 	return offset + xstate_sizes[topmost];
571 }
572 
573 /*
574  * This essentially double-checks what the cpu told us about
575  * how large the XSAVE buffer needs to be.  We are recalculating
576  * it to be safe.
577  *
578  * Independent XSAVE features allocate their own buffers and are not
579  * covered by these checks. Only the size of the buffer for task->fpu
580  * is checked here.
581  */
paranoid_xstate_size_valid(unsigned int kernel_size)582 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
583 {
584 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
585 	bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
586 	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
587 	int i;
588 
589 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
590 		if (!check_xstate_against_struct(i))
591 			return false;
592 		/*
593 		 * Supervisor state components can be managed only by
594 		 * XSAVES.
595 		 */
596 		if (!xsaves && xfeature_is_supervisor(i)) {
597 			XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
598 			return false;
599 		}
600 	}
601 	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
602 	XSTATE_WARN_ON(size != kernel_size,
603 		       "size %u != kernel_size %u\n", size, kernel_size);
604 	return size == kernel_size;
605 }
606 
607 /*
608  * Get total size of enabled xstates in XCR0 | IA32_XSS.
609  *
610  * Note the SDM's wording here.  "sub-function 0" only enumerates
611  * the size of the *user* states.  If we use it to size a buffer
612  * that we use 'XSAVES' on, we could potentially overflow the
613  * buffer because 'XSAVES' saves system states too.
614  *
615  * This also takes compaction into account. So this works for
616  * XSAVEC as well.
617  */
get_compacted_size(void)618 static unsigned int __init get_compacted_size(void)
619 {
620 	unsigned int eax, ebx, ecx, edx;
621 	/*
622 	 * - CPUID function 0DH, sub-function 1:
623 	 *    EBX enumerates the size (in bytes) required by
624 	 *    the XSAVES instruction for an XSAVE area
625 	 *    containing all the state components
626 	 *    corresponding to bits currently set in
627 	 *    XCR0 | IA32_XSS.
628 	 *
629 	 * When XSAVES is not available but XSAVEC is (virt), then there
630 	 * are no supervisor states, but XSAVEC still uses compacted
631 	 * format.
632 	 */
633 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
634 	return ebx;
635 }
636 
637 /*
638  * Get the total size of the enabled xstates without the independent supervisor
639  * features.
640  */
get_xsave_compacted_size(void)641 static unsigned int __init get_xsave_compacted_size(void)
642 {
643 	u64 mask = xfeatures_mask_independent();
644 	unsigned int size;
645 
646 	if (!mask)
647 		return get_compacted_size();
648 
649 	/* Disable independent features. */
650 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
651 
652 	/*
653 	 * Ask the hardware what size is required of the buffer.
654 	 * This is the size required for the task->fpu buffer.
655 	 */
656 	size = get_compacted_size();
657 
658 	/* Re-enable independent features so XSAVES will work on them again. */
659 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
660 
661 	return size;
662 }
663 
get_xsave_size_user(void)664 static unsigned int __init get_xsave_size_user(void)
665 {
666 	unsigned int eax, ebx, ecx, edx;
667 	/*
668 	 * - CPUID function 0DH, sub-function 0:
669 	 *    EBX enumerates the size (in bytes) required by
670 	 *    the XSAVE instruction for an XSAVE area
671 	 *    containing all the *user* state components
672 	 *    corresponding to bits currently set in XCR0.
673 	 */
674 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
675 	return ebx;
676 }
677 
init_xstate_size(void)678 static int __init init_xstate_size(void)
679 {
680 	/* Recompute the context size for enabled features: */
681 	unsigned int user_size, kernel_size, kernel_default_size;
682 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
683 
684 	/* Uncompacted user space size */
685 	user_size = get_xsave_size_user();
686 
687 	/*
688 	 * XSAVES kernel size includes supervisor states and uses compacted
689 	 * format. XSAVEC uses compacted format, but does not save
690 	 * supervisor states.
691 	 *
692 	 * XSAVE[OPT] do not support supervisor states so kernel and user
693 	 * size is identical.
694 	 */
695 	if (compacted)
696 		kernel_size = get_xsave_compacted_size();
697 	else
698 		kernel_size = user_size;
699 
700 	kernel_default_size =
701 		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
702 
703 	if (!paranoid_xstate_size_valid(kernel_size))
704 		return -EINVAL;
705 
706 	fpu_kernel_cfg.max_size = kernel_size;
707 	fpu_user_cfg.max_size = user_size;
708 
709 	fpu_kernel_cfg.default_size = kernel_default_size;
710 	fpu_user_cfg.default_size =
711 		xstate_calculate_size(fpu_user_cfg.default_features, false);
712 
713 	return 0;
714 }
715 
716 /*
717  * We enabled the XSAVE hardware, but something went wrong and
718  * we can not use it.  Disable it.
719  */
fpu__init_disable_system_xstate(unsigned int legacy_size)720 static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
721 {
722 	fpu_kernel_cfg.max_features = 0;
723 	cr4_clear_bits(X86_CR4_OSXSAVE);
724 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
725 
726 	/* Restore the legacy size.*/
727 	fpu_kernel_cfg.max_size = legacy_size;
728 	fpu_kernel_cfg.default_size = legacy_size;
729 	fpu_user_cfg.max_size = legacy_size;
730 	fpu_user_cfg.default_size = legacy_size;
731 
732 	/*
733 	 * Prevent enabling the static branch which enables writes to the
734 	 * XFD MSR.
735 	 */
736 	init_fpstate.xfd = 0;
737 
738 	fpstate_reset(&current->thread.fpu);
739 }
740 
741 /*
742  * Enable and initialize the xsave feature.
743  * Called once per system bootup.
744  */
fpu__init_system_xstate(unsigned int legacy_size)745 void __init fpu__init_system_xstate(unsigned int legacy_size)
746 {
747 	unsigned int eax, ebx, ecx, edx;
748 	u64 xfeatures;
749 	int err;
750 	int i;
751 
752 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
753 		pr_info("x86/fpu: No FPU detected\n");
754 		return;
755 	}
756 
757 	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
758 		pr_info("x86/fpu: x87 FPU will use %s\n",
759 			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
760 		return;
761 	}
762 
763 	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
764 		WARN_ON_FPU(1);
765 		return;
766 	}
767 
768 	/*
769 	 * Find user xstates supported by the processor.
770 	 */
771 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
772 	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
773 
774 	/*
775 	 * Find supervisor xstates supported by the processor.
776 	 */
777 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
778 	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
779 
780 	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
781 		/*
782 		 * This indicates that something really unexpected happened
783 		 * with the enumeration.  Disable XSAVE and try to continue
784 		 * booting without it.  This is too early to BUG().
785 		 */
786 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
787 		       fpu_kernel_cfg.max_features);
788 		goto out_disable;
789 	}
790 
791 	/*
792 	 * Clear XSAVE features that are disabled in the normal CPUID.
793 	 */
794 	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
795 		unsigned short cid = xsave_cpuid_features[i];
796 
797 		/* Careful: X86_FEATURE_FPU is 0! */
798 		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
799 			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
800 	}
801 
802 	if (!cpu_feature_enabled(X86_FEATURE_XFD))
803 		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
804 
805 	if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
806 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
807 	else
808 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
809 					XFEATURE_MASK_SUPERVISOR_SUPPORTED;
810 
811 	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
812 	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
813 
814 	/* Clean out dynamic features from default */
815 	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
816 	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
817 
818 	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
819 	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
820 
821 	/* Store it for paranoia check at the end */
822 	xfeatures = fpu_kernel_cfg.max_features;
823 
824 	/*
825 	 * Initialize the default XFD state in initfp_state and enable the
826 	 * dynamic sizing mechanism if dynamic states are available.  The
827 	 * static key cannot be enabled here because this runs before
828 	 * jump_label_init(). This is delayed to an initcall.
829 	 */
830 	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
831 
832 	/* Set up compaction feature bit */
833 	if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
834 	    cpu_feature_enabled(X86_FEATURE_XSAVES))
835 		setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
836 
837 	/* Enable xstate instructions to be able to continue with initialization: */
838 	fpu__init_cpu_xstate();
839 
840 	/* Cache size, offset and flags for initialization */
841 	setup_xstate_cache();
842 
843 	err = init_xstate_size();
844 	if (err)
845 		goto out_disable;
846 
847 	/* Reset the state for the current task */
848 	fpstate_reset(&current->thread.fpu);
849 
850 	/*
851 	 * Update info used for ptrace frames; use standard-format size and no
852 	 * supervisor xstates:
853 	 */
854 	update_regset_xstate_info(fpu_user_cfg.max_size,
855 				  fpu_user_cfg.max_features);
856 
857 	/*
858 	 * init_fpstate excludes dynamic states as they are large but init
859 	 * state is zero.
860 	 */
861 	init_fpstate.size		= fpu_kernel_cfg.default_size;
862 	init_fpstate.xfeatures		= fpu_kernel_cfg.default_features;
863 
864 	if (init_fpstate.size > sizeof(init_fpstate.regs)) {
865 		pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
866 			sizeof(init_fpstate.regs), init_fpstate.size);
867 		goto out_disable;
868 	}
869 
870 	setup_init_fpu_buf();
871 
872 	/*
873 	 * Paranoia check whether something in the setup modified the
874 	 * xfeatures mask.
875 	 */
876 	if (xfeatures != fpu_kernel_cfg.max_features) {
877 		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
878 		       xfeatures, fpu_kernel_cfg.max_features);
879 		goto out_disable;
880 	}
881 
882 	/*
883 	 * CPU capabilities initialization runs before FPU init. So
884 	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
885 	 * functional, set the feature bit so depending code works.
886 	 */
887 	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
888 
889 	print_xstate_offset_size();
890 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
891 		fpu_kernel_cfg.max_features,
892 		fpu_kernel_cfg.max_size,
893 		boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
894 	return;
895 
896 out_disable:
897 	/* something went wrong, try to boot without any XSAVE support */
898 	fpu__init_disable_system_xstate(legacy_size);
899 }
900 
901 /*
902  * Restore minimal FPU state after suspend:
903  */
fpu__resume_cpu(void)904 void fpu__resume_cpu(void)
905 {
906 	/*
907 	 * Restore XCR0 on xsave capable CPUs:
908 	 */
909 	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
910 		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
911 
912 	/*
913 	 * Restore IA32_XSS. The same CPUID bit enumerates support
914 	 * of XSAVES and MSR_IA32_XSS.
915 	 */
916 	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
917 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
918 				     xfeatures_mask_independent());
919 	}
920 
921 	if (fpu_state_size_dynamic())
922 		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
923 }
924 
925 /*
926  * Given an xstate feature nr, calculate where in the xsave
927  * buffer the state is.  Callers should ensure that the buffer
928  * is valid.
929  */
__raw_xsave_addr(struct xregs_state * xsave,int xfeature_nr)930 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
931 {
932 	u64 xcomp_bv = xsave->header.xcomp_bv;
933 
934 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
935 		return NULL;
936 
937 	if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
938 		if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
939 			return NULL;
940 	}
941 
942 	return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
943 }
944 
945 /*
946  * Given the xsave area and a state inside, this function returns the
947  * address of the state.
948  *
949  * This is the API that is called to get xstate address in either
950  * standard format or compacted format of xsave area.
951  *
952  * Note that if there is no data for the field in the xsave buffer
953  * this will return NULL.
954  *
955  * Inputs:
956  *	xstate: the thread's storage area for all FPU data
957  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
958  *	XFEATURE_SSE, etc...)
959  * Output:
960  *	address of the state in the xsave area, or NULL if the
961  *	field is not present in the xsave buffer.
962  */
get_xsave_addr(struct xregs_state * xsave,int xfeature_nr)963 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
964 {
965 	/*
966 	 * Do we even *have* xsave state?
967 	 */
968 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
969 		return NULL;
970 
971 	/*
972 	 * We should not ever be requesting features that we
973 	 * have not enabled.
974 	 */
975 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
976 		return NULL;
977 
978 	/*
979 	 * This assumes the last 'xsave*' instruction to
980 	 * have requested that 'xfeature_nr' be saved.
981 	 * If it did not, we might be seeing and old value
982 	 * of the field in the buffer.
983 	 *
984 	 * This can happen because the last 'xsave' did not
985 	 * request that this feature be saved (unlikely)
986 	 * or because the "init optimization" caused it
987 	 * to not be saved.
988 	 */
989 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
990 		return NULL;
991 
992 	return __raw_xsave_addr(xsave, xfeature_nr);
993 }
994 
995 #ifdef CONFIG_ARCH_HAS_PKEYS
996 
997 /*
998  * This will go out and modify PKRU register to set the access
999  * rights for @pkey to @init_val.
1000  */
arch_set_user_pkey_access(struct task_struct * tsk,int pkey,unsigned long init_val)1001 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1002 			      unsigned long init_val)
1003 {
1004 	u32 old_pkru, new_pkru_bits = 0;
1005 	int pkey_shift;
1006 
1007 	/*
1008 	 * This check implies XSAVE support.  OSPKE only gets
1009 	 * set if we enable XSAVE and we enable PKU in XCR0.
1010 	 */
1011 	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1012 		return -EINVAL;
1013 
1014 	/*
1015 	 * This code should only be called with valid 'pkey'
1016 	 * values originating from in-kernel users.  Complain
1017 	 * if a bad value is observed.
1018 	 */
1019 	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1020 		return -EINVAL;
1021 
1022 	/* Set the bits we need in PKRU:  */
1023 	if (init_val & PKEY_DISABLE_ACCESS)
1024 		new_pkru_bits |= PKRU_AD_BIT;
1025 	if (init_val & PKEY_DISABLE_WRITE)
1026 		new_pkru_bits |= PKRU_WD_BIT;
1027 
1028 	/* Shift the bits in to the correct place in PKRU for pkey: */
1029 	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1030 	new_pkru_bits <<= pkey_shift;
1031 
1032 	/* Get old PKRU and mask off any old bits in place: */
1033 	old_pkru = read_pkru();
1034 	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1035 
1036 	/* Write old part along with new part: */
1037 	write_pkru(old_pkru | new_pkru_bits);
1038 
1039 	return 0;
1040 }
1041 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
1042 
copy_feature(bool from_xstate,struct membuf * to,void * xstate,void * init_xstate,unsigned int size)1043 static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1044 			 void *init_xstate, unsigned int size)
1045 {
1046 	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1047 }
1048 
1049 /**
1050  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1051  * @to:		membuf descriptor
1052  * @fpstate:	The fpstate buffer from which to copy
1053  * @xfeatures:	The mask of xfeatures to save (XSAVE mode only)
1054  * @pkru_val:	The PKRU value to store in the PKRU component
1055  * @copy_mode:	The requested copy mode
1056  *
1057  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1058  * format, i.e. from the kernel internal hardware dependent storage format
1059  * to the requested @mode. UABI XSTATE is always uncompacted!
1060  *
1061  * It supports partial copy but @to.pos always starts from zero.
1062  */
__copy_xstate_to_uabi_buf(struct membuf to,struct fpstate * fpstate,u64 xfeatures,u32 pkru_val,enum xstate_copy_mode copy_mode)1063 void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1064 			       u64 xfeatures, u32 pkru_val,
1065 			       enum xstate_copy_mode copy_mode)
1066 {
1067 	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1068 	struct xregs_state *xinit = &init_fpstate.regs.xsave;
1069 	struct xregs_state *xsave = &fpstate->regs.xsave;
1070 	struct xstate_header header;
1071 	unsigned int zerofrom;
1072 	u64 mask;
1073 	int i;
1074 
1075 	memset(&header, 0, sizeof(header));
1076 	header.xfeatures = xsave->header.xfeatures;
1077 
1078 	/* Mask out the feature bits depending on copy mode */
1079 	switch (copy_mode) {
1080 	case XSTATE_COPY_FP:
1081 		header.xfeatures &= XFEATURE_MASK_FP;
1082 		break;
1083 
1084 	case XSTATE_COPY_FX:
1085 		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1086 		break;
1087 
1088 	case XSTATE_COPY_XSAVE:
1089 		header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1090 		break;
1091 	}
1092 
1093 	/* Copy FP state up to MXCSR */
1094 	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1095 		     &xinit->i387, off_mxcsr);
1096 
1097 	/* Copy MXCSR when SSE or YMM are set in the feature mask */
1098 	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1099 		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1100 		     MXCSR_AND_FLAGS_SIZE);
1101 
1102 	/* Copy the remaining FP state */
1103 	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1104 		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
1105 		     sizeof(xsave->i387.st_space));
1106 
1107 	/* Copy the SSE state - shared with YMM, but independently managed */
1108 	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1109 		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1110 		     sizeof(xsave->i387.xmm_space));
1111 
1112 	if (copy_mode != XSTATE_COPY_XSAVE)
1113 		goto out;
1114 
1115 	/* Zero the padding area */
1116 	membuf_zero(&to, sizeof(xsave->i387.padding));
1117 
1118 	/* Copy xsave->i387.sw_reserved */
1119 	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1120 
1121 	/* Copy the user space relevant state of @xsave->header */
1122 	membuf_write(&to, &header, sizeof(header));
1123 
1124 	zerofrom = offsetof(struct xregs_state, extended_state_area);
1125 
1126 	/*
1127 	 * This 'mask' indicates which states to copy from fpstate.
1128 	 * Those extended states that are not present in fpstate are
1129 	 * either disabled or initialized:
1130 	 *
1131 	 * In non-compacted format, disabled features still occupy
1132 	 * state space but there is no state to copy from in the
1133 	 * compacted init_fpstate. The gap tracking will zero these
1134 	 * states.
1135 	 *
1136 	 * The extended features have an all zeroes init state. Thus,
1137 	 * remove them from 'mask' to zero those features in the user
1138 	 * buffer instead of retrieving them from init_fpstate.
1139 	 */
1140 	mask = header.xfeatures;
1141 
1142 	for_each_extended_xfeature(i, mask) {
1143 		/*
1144 		 * If there was a feature or alignment gap, zero the space
1145 		 * in the destination buffer.
1146 		 */
1147 		if (zerofrom < xstate_offsets[i])
1148 			membuf_zero(&to, xstate_offsets[i] - zerofrom);
1149 
1150 		if (i == XFEATURE_PKRU) {
1151 			struct pkru_state pkru = {0};
1152 			/*
1153 			 * PKRU is not necessarily up to date in the
1154 			 * XSAVE buffer. Use the provided value.
1155 			 */
1156 			pkru.pkru = pkru_val;
1157 			membuf_write(&to, &pkru, sizeof(pkru));
1158 		} else {
1159 			membuf_write(&to,
1160 				     __raw_xsave_addr(xsave, i),
1161 				     xstate_sizes[i]);
1162 		}
1163 		/*
1164 		 * Keep track of the last copied state in the non-compacted
1165 		 * target buffer for gap zeroing.
1166 		 */
1167 		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1168 	}
1169 
1170 out:
1171 	if (to.left)
1172 		membuf_zero(&to, to.left);
1173 }
1174 
1175 /**
1176  * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1177  * @to:		membuf descriptor
1178  * @tsk:	The task from which to copy the saved xstate
1179  * @copy_mode:	The requested copy mode
1180  *
1181  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1182  * format, i.e. from the kernel internal hardware dependent storage format
1183  * to the requested @mode. UABI XSTATE is always uncompacted!
1184  *
1185  * It supports partial copy but @to.pos always starts from zero.
1186  */
copy_xstate_to_uabi_buf(struct membuf to,struct task_struct * tsk,enum xstate_copy_mode copy_mode)1187 void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1188 			     enum xstate_copy_mode copy_mode)
1189 {
1190 	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1191 				  tsk->thread.fpu.fpstate->user_xfeatures,
1192 				  tsk->thread.pkru, copy_mode);
1193 }
1194 
copy_from_buffer(void * dst,unsigned int offset,unsigned int size,const void * kbuf,const void __user * ubuf)1195 static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1196 			    const void *kbuf, const void __user *ubuf)
1197 {
1198 	if (kbuf) {
1199 		memcpy(dst, kbuf + offset, size);
1200 	} else {
1201 		if (copy_from_user(dst, ubuf + offset, size))
1202 			return -EFAULT;
1203 	}
1204 	return 0;
1205 }
1206 
1207 
1208 /**
1209  * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1210  * @fpstate:	The fpstate buffer to copy to
1211  * @kbuf:	The UABI format buffer, if it comes from the kernel
1212  * @ubuf:	The UABI format buffer, if it comes from userspace
1213  * @pkru:	The location to write the PKRU value to
1214  *
1215  * Converts from the UABI format into the kernel internal hardware
1216  * dependent format.
1217  *
1218  * This function ultimately has three different callers with distinct PKRU
1219  * behavior.
1220  * 1.	When called from sigreturn the PKRU register will be restored from
1221  *	@fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1222  *	@fpstate is sufficient to cover this case, but the caller will also
1223  *	pass a pointer to the thread_struct's pkru field in @pkru and updating
1224  *	it is harmless.
1225  * 2.	When called from ptrace the PKRU register will be restored from the
1226  *	thread_struct's pkru field. A pointer to that is passed in @pkru.
1227  *	The kernel will restore it manually, so the XRSTOR behavior that resets
1228  *	the PKRU register to the hardware init value (0) if the corresponding
1229  *	xfeatures bit is not set is emulated here.
1230  * 3.	When called from KVM the PKRU register will be restored from the vcpu's
1231  *	pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1232  *	XRSTOR and hasn't had the PKRU resetting behavior described above. To
1233  *	preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1234  *	bit is not set.
1235  */
copy_uabi_to_xstate(struct fpstate * fpstate,const void * kbuf,const void __user * ubuf,u32 * pkru)1236 static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1237 			       const void __user *ubuf, u32 *pkru)
1238 {
1239 	struct xregs_state *xsave = &fpstate->regs.xsave;
1240 	unsigned int offset, size;
1241 	struct xstate_header hdr;
1242 	u64 mask;
1243 	int i;
1244 
1245 	offset = offsetof(struct xregs_state, header);
1246 	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1247 		return -EFAULT;
1248 
1249 	if (validate_user_xstate_header(&hdr, fpstate))
1250 		return -EINVAL;
1251 
1252 	/* Validate MXCSR when any of the related features is in use */
1253 	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1254 	if (hdr.xfeatures & mask) {
1255 		u32 mxcsr[2];
1256 
1257 		offset = offsetof(struct fxregs_state, mxcsr);
1258 		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1259 			return -EFAULT;
1260 
1261 		/* Reserved bits in MXCSR must be zero. */
1262 		if (mxcsr[0] & ~mxcsr_feature_mask)
1263 			return -EINVAL;
1264 
1265 		/* SSE and YMM require MXCSR even when FP is not in use. */
1266 		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1267 			xsave->i387.mxcsr = mxcsr[0];
1268 			xsave->i387.mxcsr_mask = mxcsr[1];
1269 		}
1270 	}
1271 
1272 	for (i = 0; i < XFEATURE_MAX; i++) {
1273 		mask = BIT_ULL(i);
1274 
1275 		if (hdr.xfeatures & mask) {
1276 			void *dst = __raw_xsave_addr(xsave, i);
1277 
1278 			offset = xstate_offsets[i];
1279 			size = xstate_sizes[i];
1280 
1281 			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1282 				return -EFAULT;
1283 		}
1284 	}
1285 
1286 	if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1287 		struct pkru_state *xpkru;
1288 
1289 		xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1290 		*pkru = xpkru->pkru;
1291 	} else {
1292 		/*
1293 		 * KVM may pass NULL here to indicate that it does not need
1294 		 * PKRU updated.
1295 		 */
1296 		if (pkru)
1297 			*pkru = 0;
1298 	}
1299 
1300 	/*
1301 	 * The state that came in from userspace was user-state only.
1302 	 * Mask all the user states out of 'xfeatures':
1303 	 */
1304 	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
1305 
1306 	/*
1307 	 * Add back in the features that came in from userspace:
1308 	 */
1309 	xsave->header.xfeatures |= hdr.xfeatures;
1310 
1311 	return 0;
1312 }
1313 
1314 /*
1315  * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1316  * format and copy to the target thread. Used by ptrace and KVM.
1317  */
copy_uabi_from_kernel_to_xstate(struct fpstate * fpstate,const void * kbuf,u32 * pkru)1318 int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1319 {
1320 	return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1321 }
1322 
1323 /*
1324  * Convert from a sigreturn standard-format user-space buffer to kernel
1325  * XSAVE[S] format and copy to the target thread. This is called from the
1326  * sigreturn() and rt_sigreturn() system calls.
1327  */
copy_sigframe_from_user_to_xstate(struct task_struct * tsk,const void __user * ubuf)1328 int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1329 				      const void __user *ubuf)
1330 {
1331 	return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
1332 }
1333 
validate_independent_components(u64 mask)1334 static bool validate_independent_components(u64 mask)
1335 {
1336 	u64 xchk;
1337 
1338 	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1339 		return false;
1340 
1341 	xchk = ~xfeatures_mask_independent();
1342 
1343 	if (WARN_ON_ONCE(!mask || mask & xchk))
1344 		return false;
1345 
1346 	return true;
1347 }
1348 
1349 /**
1350  * xsaves - Save selected components to a kernel xstate buffer
1351  * @xstate:	Pointer to the buffer
1352  * @mask:	Feature mask to select the components to save
1353  *
1354  * The @xstate buffer must be 64 byte aligned and correctly initialized as
1355  * XSAVES does not write the full xstate header. Before first use the
1356  * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1357  * can #GP.
1358  *
1359  * The feature mask must be a subset of the independent features.
1360  */
xsaves(struct xregs_state * xstate,u64 mask)1361 void xsaves(struct xregs_state *xstate, u64 mask)
1362 {
1363 	int err;
1364 
1365 	if (!validate_independent_components(mask))
1366 		return;
1367 
1368 	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1369 	WARN_ON_ONCE(err);
1370 }
1371 
1372 /**
1373  * xrstors - Restore selected components from a kernel xstate buffer
1374  * @xstate:	Pointer to the buffer
1375  * @mask:	Feature mask to select the components to restore
1376  *
1377  * The @xstate buffer must be 64 byte aligned and correctly initialized
1378  * otherwise XRSTORS from that buffer can #GP.
1379  *
1380  * Proper usage is to restore the state which was saved with
1381  * xsaves() into @xstate.
1382  *
1383  * The feature mask must be a subset of the independent features.
1384  */
xrstors(struct xregs_state * xstate,u64 mask)1385 void xrstors(struct xregs_state *xstate, u64 mask)
1386 {
1387 	int err;
1388 
1389 	if (!validate_independent_components(mask))
1390 		return;
1391 
1392 	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1393 	WARN_ON_ONCE(err);
1394 }
1395 
1396 #if IS_ENABLED(CONFIG_KVM)
fpstate_clear_xstate_component(struct fpstate * fps,unsigned int xfeature)1397 void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1398 {
1399 	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1400 
1401 	if (addr)
1402 		memset(addr, 0, xstate_sizes[xfeature]);
1403 }
1404 EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1405 #endif
1406 
1407 #ifdef CONFIG_X86_64
1408 
1409 #ifdef CONFIG_X86_DEBUG_FPU
1410 /*
1411  * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1412  * can safely operate on the @fpstate buffer.
1413  */
xstate_op_valid(struct fpstate * fpstate,u64 mask,bool rstor)1414 static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1415 {
1416 	u64 xfd = __this_cpu_read(xfd_state);
1417 
1418 	if (fpstate->xfd == xfd)
1419 		return true;
1420 
1421 	 /*
1422 	  * The XFD MSR does not match fpstate->xfd. That's invalid when
1423 	  * the passed in fpstate is current's fpstate.
1424 	  */
1425 	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
1426 		return false;
1427 
1428 	/*
1429 	 * XRSTOR(S) from init_fpstate are always correct as it will just
1430 	 * bring all components into init state and not read from the
1431 	 * buffer. XSAVE(S) raises #PF after init.
1432 	 */
1433 	if (fpstate == &init_fpstate)
1434 		return rstor;
1435 
1436 	/*
1437 	 * XSAVE(S): clone(), fpu_swap_kvm_fpu()
1438 	 * XRSTORS(S): fpu_swap_kvm_fpu()
1439 	 */
1440 
1441 	/*
1442 	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1443 	 * the buffer area for XFD-disabled state components.
1444 	 */
1445 	mask &= ~xfd;
1446 
1447 	/*
1448 	 * Remove features which are valid in fpstate. They
1449 	 * have space allocated in fpstate.
1450 	 */
1451 	mask &= ~fpstate->xfeatures;
1452 
1453 	/*
1454 	 * Any remaining state components in 'mask' might be written
1455 	 * by XSAVE/XRSTOR. Fail validation it found.
1456 	 */
1457 	return !mask;
1458 }
1459 
xfd_validate_state(struct fpstate * fpstate,u64 mask,bool rstor)1460 void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1461 {
1462 	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1463 }
1464 #endif /* CONFIG_X86_DEBUG_FPU */
1465 
xfd_update_static_branch(void)1466 static int __init xfd_update_static_branch(void)
1467 {
1468 	/*
1469 	 * If init_fpstate.xfd has bits set then dynamic features are
1470 	 * available and the dynamic sizing must be enabled.
1471 	 */
1472 	if (init_fpstate.xfd)
1473 		static_branch_enable(&__fpu_state_size_dynamic);
1474 	return 0;
1475 }
arch_initcall(xfd_update_static_branch)1476 arch_initcall(xfd_update_static_branch)
1477 
1478 void fpstate_free(struct fpu *fpu)
1479 {
1480 	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1481 		vfree(fpu->fpstate);
1482 }
1483 
1484 /**
1485  * fpstate_realloc - Reallocate struct fpstate for the requested new features
1486  *
1487  * @xfeatures:	A bitmap of xstate features which extend the enabled features
1488  *		of that task
1489  * @ksize:	The required size for the kernel buffer
1490  * @usize:	The required size for user space buffers
1491  * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1492  *
1493  * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1494  * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1495  * with large states are likely to live longer.
1496  *
1497  * Returns: 0 on success, -ENOMEM on allocation error.
1498  */
fpstate_realloc(u64 xfeatures,unsigned int ksize,unsigned int usize,struct fpu_guest * guest_fpu)1499 static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1500 			   unsigned int usize, struct fpu_guest *guest_fpu)
1501 {
1502 	struct fpu *fpu = &current->thread.fpu;
1503 	struct fpstate *curfps, *newfps = NULL;
1504 	unsigned int fpsize;
1505 	bool in_use;
1506 
1507 	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1508 
1509 	newfps = vzalloc(fpsize);
1510 	if (!newfps)
1511 		return -ENOMEM;
1512 	newfps->size = ksize;
1513 	newfps->user_size = usize;
1514 	newfps->is_valloc = true;
1515 
1516 	/*
1517 	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1518 	 * as reference independent whether it is in use or not.
1519 	 */
1520 	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1521 
1522 	/* Determine whether @curfps is the active fpstate */
1523 	in_use = fpu->fpstate == curfps;
1524 
1525 	if (guest_fpu) {
1526 		newfps->is_guest = true;
1527 		newfps->is_confidential = curfps->is_confidential;
1528 		newfps->in_use = curfps->in_use;
1529 		guest_fpu->xfeatures |= xfeatures;
1530 		guest_fpu->uabi_size = usize;
1531 	}
1532 
1533 	fpregs_lock();
1534 	/*
1535 	 * If @curfps is in use, ensure that the current state is in the
1536 	 * registers before swapping fpstate as that might invalidate it
1537 	 * due to layout changes.
1538 	 */
1539 	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1540 		fpregs_restore_userregs();
1541 
1542 	newfps->xfeatures = curfps->xfeatures | xfeatures;
1543 	newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1544 	newfps->xfd = curfps->xfd & ~xfeatures;
1545 
1546 	/* Do the final updates within the locked region */
1547 	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1548 
1549 	if (guest_fpu) {
1550 		guest_fpu->fpstate = newfps;
1551 		/* If curfps is active, update the FPU fpstate pointer */
1552 		if (in_use)
1553 			fpu->fpstate = newfps;
1554 	} else {
1555 		fpu->fpstate = newfps;
1556 	}
1557 
1558 	if (in_use)
1559 		xfd_update_state(fpu->fpstate);
1560 	fpregs_unlock();
1561 
1562 	/* Only free valloc'ed state */
1563 	if (curfps && curfps->is_valloc)
1564 		vfree(curfps);
1565 
1566 	return 0;
1567 }
1568 
validate_sigaltstack(unsigned int usize)1569 static int validate_sigaltstack(unsigned int usize)
1570 {
1571 	struct task_struct *thread, *leader = current->group_leader;
1572 	unsigned long framesize = get_sigframe_size();
1573 
1574 	lockdep_assert_held(&current->sighand->siglock);
1575 
1576 	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1577 	framesize -= fpu_user_cfg.max_size;
1578 	framesize += usize;
1579 	for_each_thread(leader, thread) {
1580 		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1581 			return -ENOSPC;
1582 	}
1583 	return 0;
1584 }
1585 
__xstate_request_perm(u64 permitted,u64 requested,bool guest)1586 static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1587 {
1588 	/*
1589 	 * This deliberately does not exclude !XSAVES as we still might
1590 	 * decide to optionally context switch XCR0 or talk the silicon
1591 	 * vendors into extending XFD for the pre AMX states, especially
1592 	 * AVX512.
1593 	 */
1594 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1595 	struct fpu *fpu = &current->group_leader->thread.fpu;
1596 	struct fpu_state_perm *perm;
1597 	unsigned int ksize, usize;
1598 	u64 mask;
1599 	int ret = 0;
1600 
1601 	/* Check whether fully enabled */
1602 	if ((permitted & requested) == requested)
1603 		return 0;
1604 
1605 	/* Calculate the resulting kernel state size */
1606 	mask = permitted | requested;
1607 	/* Take supervisor states into account on the host */
1608 	if (!guest)
1609 		mask |= xfeatures_mask_supervisor();
1610 	ksize = xstate_calculate_size(mask, compacted);
1611 
1612 	/* Calculate the resulting user state size */
1613 	mask &= XFEATURE_MASK_USER_SUPPORTED;
1614 	usize = xstate_calculate_size(mask, false);
1615 
1616 	if (!guest) {
1617 		ret = validate_sigaltstack(usize);
1618 		if (ret)
1619 			return ret;
1620 	}
1621 
1622 	perm = guest ? &fpu->guest_perm : &fpu->perm;
1623 	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1624 	WRITE_ONCE(perm->__state_perm, mask);
1625 	/* Protected by sighand lock */
1626 	perm->__state_size = ksize;
1627 	perm->__user_state_size = usize;
1628 	return ret;
1629 }
1630 
1631 /*
1632  * Permissions array to map facilities with more than one component
1633  */
1634 static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1635 	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1636 };
1637 
xstate_request_perm(unsigned long idx,bool guest)1638 static int xstate_request_perm(unsigned long idx, bool guest)
1639 {
1640 	u64 permitted, requested;
1641 	int ret;
1642 
1643 	if (idx >= XFEATURE_MAX)
1644 		return -EINVAL;
1645 
1646 	/*
1647 	 * Look up the facility mask which can require more than
1648 	 * one xstate component.
1649 	 */
1650 	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1651 	requested = xstate_prctl_req[idx];
1652 	if (!requested)
1653 		return -EOPNOTSUPP;
1654 
1655 	if ((fpu_user_cfg.max_features & requested) != requested)
1656 		return -EOPNOTSUPP;
1657 
1658 	/* Lockless quick check */
1659 	permitted = xstate_get_group_perm(guest);
1660 	if ((permitted & requested) == requested)
1661 		return 0;
1662 
1663 	/* Protect against concurrent modifications */
1664 	spin_lock_irq(&current->sighand->siglock);
1665 	permitted = xstate_get_group_perm(guest);
1666 
1667 	/* First vCPU allocation locks the permissions. */
1668 	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1669 		ret = -EBUSY;
1670 	else
1671 		ret = __xstate_request_perm(permitted, requested, guest);
1672 	spin_unlock_irq(&current->sighand->siglock);
1673 	return ret;
1674 }
1675 
__xfd_enable_feature(u64 xfd_err,struct fpu_guest * guest_fpu)1676 int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1677 {
1678 	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1679 	struct fpu_state_perm *perm;
1680 	unsigned int ksize, usize;
1681 	struct fpu *fpu;
1682 
1683 	if (!xfd_event) {
1684 		if (!guest_fpu)
1685 			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1686 		return 0;
1687 	}
1688 
1689 	/* Protect against concurrent modifications */
1690 	spin_lock_irq(&current->sighand->siglock);
1691 
1692 	/* If not permitted let it die */
1693 	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1694 		spin_unlock_irq(&current->sighand->siglock);
1695 		return -EPERM;
1696 	}
1697 
1698 	fpu = &current->group_leader->thread.fpu;
1699 	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1700 	ksize = perm->__state_size;
1701 	usize = perm->__user_state_size;
1702 
1703 	/*
1704 	 * The feature is permitted. State size is sufficient.  Dropping
1705 	 * the lock is safe here even if more features are added from
1706 	 * another task, the retrieved buffer sizes are valid for the
1707 	 * currently requested feature(s).
1708 	 */
1709 	spin_unlock_irq(&current->sighand->siglock);
1710 
1711 	/*
1712 	 * Try to allocate a new fpstate. If that fails there is no way
1713 	 * out.
1714 	 */
1715 	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1716 		return -EFAULT;
1717 	return 0;
1718 }
1719 
xfd_enable_feature(u64 xfd_err)1720 int xfd_enable_feature(u64 xfd_err)
1721 {
1722 	return __xfd_enable_feature(xfd_err, NULL);
1723 }
1724 
1725 #else /* CONFIG_X86_64 */
xstate_request_perm(unsigned long idx,bool guest)1726 static inline int xstate_request_perm(unsigned long idx, bool guest)
1727 {
1728 	return -EPERM;
1729 }
1730 #endif  /* !CONFIG_X86_64 */
1731 
xstate_get_guest_group_perm(void)1732 u64 xstate_get_guest_group_perm(void)
1733 {
1734 	return xstate_get_group_perm(true);
1735 }
1736 EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1737 
1738 /**
1739  * fpu_xstate_prctl - xstate permission operations
1740  * @tsk:	Redundant pointer to current
1741  * @option:	A subfunction of arch_prctl()
1742  * @arg2:	option argument
1743  * Return:	0 if successful; otherwise, an error code
1744  *
1745  * Option arguments:
1746  *
1747  * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1748  * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1749  * ARCH_REQ_XCOMP_PERM: Facility number requested
1750  *
1751  * For facilities which require more than one XSTATE component, the request
1752  * must be the highest state component number related to that facility,
1753  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1754  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1755  */
fpu_xstate_prctl(int option,unsigned long arg2)1756 long fpu_xstate_prctl(int option, unsigned long arg2)
1757 {
1758 	u64 __user *uptr = (u64 __user *)arg2;
1759 	u64 permitted, supported;
1760 	unsigned long idx = arg2;
1761 	bool guest = false;
1762 
1763 	switch (option) {
1764 	case ARCH_GET_XCOMP_SUPP:
1765 		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1766 		return put_user(supported, uptr);
1767 
1768 	case ARCH_GET_XCOMP_PERM:
1769 		/*
1770 		 * Lockless snapshot as it can also change right after the
1771 		 * dropping the lock.
1772 		 */
1773 		permitted = xstate_get_host_group_perm();
1774 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1775 		return put_user(permitted, uptr);
1776 
1777 	case ARCH_GET_XCOMP_GUEST_PERM:
1778 		permitted = xstate_get_guest_group_perm();
1779 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1780 		return put_user(permitted, uptr);
1781 
1782 	case ARCH_REQ_XCOMP_GUEST_PERM:
1783 		guest = true;
1784 		fallthrough;
1785 
1786 	case ARCH_REQ_XCOMP_PERM:
1787 		if (!IS_ENABLED(CONFIG_X86_64))
1788 			return -EOPNOTSUPP;
1789 
1790 		return xstate_request_perm(idx, guest);
1791 
1792 	default:
1793 		return -EINVAL;
1794 	}
1795 }
1796 
1797 #ifdef CONFIG_PROC_PID_ARCH_STATUS
1798 /*
1799  * Report the amount of time elapsed in millisecond since last AVX512
1800  * use in the task.
1801  */
avx512_status(struct seq_file * m,struct task_struct * task)1802 static void avx512_status(struct seq_file *m, struct task_struct *task)
1803 {
1804 	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1805 	long delta;
1806 
1807 	if (!timestamp) {
1808 		/*
1809 		 * Report -1 if no AVX512 usage
1810 		 */
1811 		delta = -1;
1812 	} else {
1813 		delta = (long)(jiffies - timestamp);
1814 		/*
1815 		 * Cap to LONG_MAX if time difference > LONG_MAX
1816 		 */
1817 		if (delta < 0)
1818 			delta = LONG_MAX;
1819 		delta = jiffies_to_msecs(delta);
1820 	}
1821 
1822 	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1823 	seq_putc(m, '\n');
1824 }
1825 
1826 /*
1827  * Report architecture specific information
1828  */
proc_pid_arch_status(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)1829 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1830 			struct pid *pid, struct task_struct *task)
1831 {
1832 	/*
1833 	 * Report AVX512 state if the processor and build option supported.
1834 	 */
1835 	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1836 		avx512_status(m, task);
1837 
1838 	return 0;
1839 }
1840 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1841