1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xsave/xrstor support. 4 * 5 * Author: Suresh Siddha <suresh.b.siddha@intel.com> 6 */ 7 #include <linux/compat.h> 8 #include <linux/cpu.h> 9 #include <linux/mman.h> 10 #include <linux/pkeys.h> 11 #include <linux/seq_file.h> 12 #include <linux/proc_fs.h> 13 14 #include <asm/fpu/api.h> 15 #include <asm/fpu/internal.h> 16 #include <asm/fpu/signal.h> 17 #include <asm/fpu/regset.h> 18 #include <asm/fpu/xstate.h> 19 20 #include <asm/tlbflush.h> 21 #include <asm/cpufeature.h> 22 23 /* 24 * Although we spell it out in here, the Processor Trace 25 * xfeature is completely unused. We use other mechanisms 26 * to save/restore PT state in Linux. 27 */ 28 static const char *xfeature_names[] = 29 { 30 "x87 floating point registers" , 31 "SSE registers" , 32 "AVX registers" , 33 "MPX bounds registers" , 34 "MPX CSR" , 35 "AVX-512 opmask" , 36 "AVX-512 Hi256" , 37 "AVX-512 ZMM_Hi256" , 38 "Processor Trace (unused)" , 39 "Protection Keys User registers", 40 "unknown xstate feature" , 41 }; 42 43 static short xsave_cpuid_features[] __initdata = { 44 X86_FEATURE_FPU, 45 X86_FEATURE_XMM, 46 X86_FEATURE_AVX, 47 X86_FEATURE_MPX, 48 X86_FEATURE_MPX, 49 X86_FEATURE_AVX512F, 50 X86_FEATURE_AVX512F, 51 X86_FEATURE_AVX512F, 52 X86_FEATURE_INTEL_PT, 53 X86_FEATURE_PKU, 54 }; 55 56 /* 57 * Mask of xstate features supported by the CPU and the kernel: 58 */ 59 u64 xfeatures_mask __read_mostly; 60 61 static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; 62 static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; 63 static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; 64 65 /* 66 * The XSAVE area of kernel can be in standard or compacted format; 67 * it is always in standard format for user mode. This is the user 68 * mode standard format size used for signal and ptrace frames. 69 */ 70 unsigned int fpu_user_xstate_size; 71 72 /* 73 * Return whether the system supports a given xfeature. 74 * 75 * Also return the name of the (most advanced) feature that the caller requested: 76 */ 77 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) 78 { 79 u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; 80 81 if (unlikely(feature_name)) { 82 long xfeature_idx, max_idx; 83 u64 xfeatures_print; 84 /* 85 * So we use FLS here to be able to print the most advanced 86 * feature that was requested but is missing. So if a driver 87 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the 88 * missing AVX feature - this is the most informative message 89 * to users: 90 */ 91 if (xfeatures_missing) 92 xfeatures_print = xfeatures_missing; 93 else 94 xfeatures_print = xfeatures_needed; 95 96 xfeature_idx = fls64(xfeatures_print)-1; 97 max_idx = ARRAY_SIZE(xfeature_names)-1; 98 xfeature_idx = min(xfeature_idx, max_idx); 99 100 *feature_name = xfeature_names[xfeature_idx]; 101 } 102 103 if (xfeatures_missing) 104 return 0; 105 106 return 1; 107 } 108 EXPORT_SYMBOL_GPL(cpu_has_xfeatures); 109 110 static bool xfeature_is_supervisor(int xfeature_nr) 111 { 112 /* 113 * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1) 114 * returns ECX[0] set to (1) for a supervisor state, and cleared (0) 115 * for a user state. 116 */ 117 u32 eax, ebx, ecx, edx; 118 119 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 120 return ecx & 1; 121 } 122 123 static bool xfeature_is_user(int xfeature_nr) 124 { 125 return !xfeature_is_supervisor(xfeature_nr); 126 } 127 128 /* 129 * When executing XSAVEOPT (or other optimized XSAVE instructions), if 130 * a processor implementation detects that an FPU state component is still 131 * (or is again) in its initialized state, it may clear the corresponding 132 * bit in the header.xfeatures field, and can skip the writeout of registers 133 * to the corresponding memory layout. 134 * 135 * This means that when the bit is zero, the state component might still contain 136 * some previous - non-initialized register state. 137 * 138 * Before writing xstate information to user-space we sanitize those components, 139 * to always ensure that the memory layout of a feature will be in the init state 140 * if the corresponding header bit is zero. This is to ensure that user-space doesn't 141 * see some stale state in the memory layout during signal handling, debugging etc. 142 */ 143 void fpstate_sanitize_xstate(struct fpu *fpu) 144 { 145 struct fxregs_state *fx = &fpu->state.fxsave; 146 int feature_bit; 147 u64 xfeatures; 148 149 if (!use_xsaveopt()) 150 return; 151 152 xfeatures = fpu->state.xsave.header.xfeatures; 153 154 /* 155 * None of the feature bits are in init state. So nothing else 156 * to do for us, as the memory layout is up to date. 157 */ 158 if ((xfeatures & xfeatures_mask) == xfeatures_mask) 159 return; 160 161 /* 162 * FP is in init state 163 */ 164 if (!(xfeatures & XFEATURE_MASK_FP)) { 165 fx->cwd = 0x37f; 166 fx->swd = 0; 167 fx->twd = 0; 168 fx->fop = 0; 169 fx->rip = 0; 170 fx->rdp = 0; 171 memset(&fx->st_space[0], 0, 128); 172 } 173 174 /* 175 * SSE is in init state 176 */ 177 if (!(xfeatures & XFEATURE_MASK_SSE)) 178 memset(&fx->xmm_space[0], 0, 256); 179 180 /* 181 * First two features are FPU and SSE, which above we handled 182 * in a special way already: 183 */ 184 feature_bit = 0x2; 185 xfeatures = (xfeatures_mask & ~xfeatures) >> 2; 186 187 /* 188 * Update all the remaining memory layouts according to their 189 * standard xstate layout, if their header bit is in the init 190 * state: 191 */ 192 while (xfeatures) { 193 if (xfeatures & 0x1) { 194 int offset = xstate_comp_offsets[feature_bit]; 195 int size = xstate_sizes[feature_bit]; 196 197 memcpy((void *)fx + offset, 198 (void *)&init_fpstate.xsave + offset, 199 size); 200 } 201 202 xfeatures >>= 1; 203 feature_bit++; 204 } 205 } 206 207 /* 208 * Enable the extended processor state save/restore feature. 209 * Called once per CPU onlining. 210 */ 211 void fpu__init_cpu_xstate(void) 212 { 213 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) 214 return; 215 /* 216 * Make it clear that XSAVES supervisor states are not yet 217 * implemented should anyone expect it to work by changing 218 * bits in XFEATURE_MASK_* macros and XCR0. 219 */ 220 WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), 221 "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); 222 223 xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; 224 225 cr4_set_bits(X86_CR4_OSXSAVE); 226 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); 227 } 228 229 /* 230 * Note that in the future we will likely need a pair of 231 * functions here: one for user xstates and the other for 232 * system xstates. For now, they are the same. 233 */ 234 static int xfeature_enabled(enum xfeature xfeature) 235 { 236 return !!(xfeatures_mask & (1UL << xfeature)); 237 } 238 239 /* 240 * Record the offsets and sizes of various xstates contained 241 * in the XSAVE state memory layout. 242 */ 243 static void __init setup_xstate_features(void) 244 { 245 u32 eax, ebx, ecx, edx, i; 246 /* start at the beginnning of the "extended state" */ 247 unsigned int last_good_offset = offsetof(struct xregs_state, 248 extended_state_area); 249 /* 250 * The FP xstates and SSE xstates are legacy states. They are always 251 * in the fixed offsets in the xsave area in either compacted form 252 * or standard form. 253 */ 254 xstate_offsets[XFEATURE_FP] = 0; 255 xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, 256 xmm_space); 257 258 xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; 259 xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, 260 xmm_space); 261 262 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 263 if (!xfeature_enabled(i)) 264 continue; 265 266 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); 267 268 /* 269 * If an xfeature is supervisor state, the offset 270 * in EBX is invalid. We leave it to -1. 271 */ 272 if (xfeature_is_user(i)) 273 xstate_offsets[i] = ebx; 274 275 xstate_sizes[i] = eax; 276 /* 277 * In our xstate size checks, we assume that the 278 * highest-numbered xstate feature has the 279 * highest offset in the buffer. Ensure it does. 280 */ 281 WARN_ONCE(last_good_offset > xstate_offsets[i], 282 "x86/fpu: misordered xstate at %d\n", last_good_offset); 283 last_good_offset = xstate_offsets[i]; 284 } 285 } 286 287 static void __init print_xstate_feature(u64 xstate_mask) 288 { 289 const char *feature_name; 290 291 if (cpu_has_xfeatures(xstate_mask, &feature_name)) 292 pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name); 293 } 294 295 /* 296 * Print out all the supported xstate features: 297 */ 298 static void __init print_xstate_features(void) 299 { 300 print_xstate_feature(XFEATURE_MASK_FP); 301 print_xstate_feature(XFEATURE_MASK_SSE); 302 print_xstate_feature(XFEATURE_MASK_YMM); 303 print_xstate_feature(XFEATURE_MASK_BNDREGS); 304 print_xstate_feature(XFEATURE_MASK_BNDCSR); 305 print_xstate_feature(XFEATURE_MASK_OPMASK); 306 print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); 307 print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); 308 print_xstate_feature(XFEATURE_MASK_PKRU); 309 } 310 311 /* 312 * This check is important because it is easy to get XSTATE_* 313 * confused with XSTATE_BIT_*. 314 */ 315 #define CHECK_XFEATURE(nr) do { \ 316 WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ 317 WARN_ON(nr >= XFEATURE_MAX); \ 318 } while (0) 319 320 /* 321 * We could cache this like xstate_size[], but we only use 322 * it here, so it would be a waste of space. 323 */ 324 static int xfeature_is_aligned(int xfeature_nr) 325 { 326 u32 eax, ebx, ecx, edx; 327 328 CHECK_XFEATURE(xfeature_nr); 329 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 330 /* 331 * The value returned by ECX[1] indicates the alignment 332 * of state component 'i' when the compacted format 333 * of the extended region of an XSAVE area is used: 334 */ 335 return !!(ecx & 2); 336 } 337 338 /* 339 * This function sets up offsets and sizes of all extended states in 340 * xsave area. This supports both standard format and compacted format 341 * of the xsave aread. 342 */ 343 static void __init setup_xstate_comp(void) 344 { 345 unsigned int xstate_comp_sizes[XFEATURE_MAX]; 346 int i; 347 348 /* 349 * The FP xstates and SSE xstates are legacy states. They are always 350 * in the fixed offsets in the xsave area in either compacted form 351 * or standard form. 352 */ 353 xstate_comp_offsets[XFEATURE_FP] = 0; 354 xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state, 355 xmm_space); 356 357 if (!boot_cpu_has(X86_FEATURE_XSAVES)) { 358 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 359 if (xfeature_enabled(i)) { 360 xstate_comp_offsets[i] = xstate_offsets[i]; 361 xstate_comp_sizes[i] = xstate_sizes[i]; 362 } 363 } 364 return; 365 } 366 367 xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] = 368 FXSAVE_SIZE + XSAVE_HDR_SIZE; 369 370 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 371 if (xfeature_enabled(i)) 372 xstate_comp_sizes[i] = xstate_sizes[i]; 373 else 374 xstate_comp_sizes[i] = 0; 375 376 if (i > FIRST_EXTENDED_XFEATURE) { 377 xstate_comp_offsets[i] = xstate_comp_offsets[i-1] 378 + xstate_comp_sizes[i-1]; 379 380 if (xfeature_is_aligned(i)) 381 xstate_comp_offsets[i] = 382 ALIGN(xstate_comp_offsets[i], 64); 383 } 384 } 385 } 386 387 /* 388 * Print out xstate component offsets and sizes 389 */ 390 static void __init print_xstate_offset_size(void) 391 { 392 int i; 393 394 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 395 if (!xfeature_enabled(i)) 396 continue; 397 pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", 398 i, xstate_comp_offsets[i], i, xstate_sizes[i]); 399 } 400 } 401 402 /* 403 * setup the xstate image representing the init state 404 */ 405 static void __init setup_init_fpu_buf(void) 406 { 407 static int on_boot_cpu __initdata = 1; 408 409 WARN_ON_FPU(!on_boot_cpu); 410 on_boot_cpu = 0; 411 412 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 413 return; 414 415 setup_xstate_features(); 416 print_xstate_features(); 417 418 if (boot_cpu_has(X86_FEATURE_XSAVES)) 419 init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | 420 xfeatures_mask; 421 422 /* 423 * Init all the features state with header.xfeatures being 0x0 424 */ 425 copy_kernel_to_xregs_booting(&init_fpstate.xsave); 426 427 /* 428 * Dump the init state again. This is to identify the init state 429 * of any feature which is not represented by all zero's. 430 */ 431 copy_xregs_to_kernel_booting(&init_fpstate.xsave); 432 } 433 434 static int xfeature_uncompacted_offset(int xfeature_nr) 435 { 436 u32 eax, ebx, ecx, edx; 437 438 /* 439 * Only XSAVES supports supervisor states and it uses compacted 440 * format. Checking a supervisor state's uncompacted offset is 441 * an error. 442 */ 443 if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) { 444 WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); 445 return -1; 446 } 447 448 CHECK_XFEATURE(xfeature_nr); 449 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 450 return ebx; 451 } 452 453 static int xfeature_size(int xfeature_nr) 454 { 455 u32 eax, ebx, ecx, edx; 456 457 CHECK_XFEATURE(xfeature_nr); 458 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 459 return eax; 460 } 461 462 /* 463 * 'XSAVES' implies two different things: 464 * 1. saving of supervisor/system state 465 * 2. using the compacted format 466 * 467 * Use this function when dealing with the compacted format so 468 * that it is obvious which aspect of 'XSAVES' is being handled 469 * by the calling code. 470 */ 471 int using_compacted_format(void) 472 { 473 return boot_cpu_has(X86_FEATURE_XSAVES); 474 } 475 476 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ 477 int validate_xstate_header(const struct xstate_header *hdr) 478 { 479 /* No unknown or supervisor features may be set */ 480 if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR)) 481 return -EINVAL; 482 483 /* Userspace must use the uncompacted format */ 484 if (hdr->xcomp_bv) 485 return -EINVAL; 486 487 /* 488 * If 'reserved' is shrunken to add a new field, make sure to validate 489 * that new field here! 490 */ 491 BUILD_BUG_ON(sizeof(hdr->reserved) != 48); 492 493 /* No reserved bits may be set */ 494 if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 495 return -EINVAL; 496 497 return 0; 498 } 499 500 static void __xstate_dump_leaves(void) 501 { 502 int i; 503 u32 eax, ebx, ecx, edx; 504 static int should_dump = 1; 505 506 if (!should_dump) 507 return; 508 should_dump = 0; 509 /* 510 * Dump out a few leaves past the ones that we support 511 * just in case there are some goodies up there 512 */ 513 for (i = 0; i < XFEATURE_MAX + 10; i++) { 514 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); 515 pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", 516 XSTATE_CPUID, i, eax, ebx, ecx, edx); 517 } 518 } 519 520 #define XSTATE_WARN_ON(x) do { \ 521 if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \ 522 __xstate_dump_leaves(); \ 523 } \ 524 } while (0) 525 526 #define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \ 527 if ((nr == nr_macro) && \ 528 WARN_ONCE(sz != sizeof(__struct), \ 529 "%s: struct is %zu bytes, cpu state %d bytes\n", \ 530 __stringify(nr_macro), sizeof(__struct), sz)) { \ 531 __xstate_dump_leaves(); \ 532 } \ 533 } while (0) 534 535 /* 536 * We have a C struct for each 'xstate'. We need to ensure 537 * that our software representation matches what the CPU 538 * tells us about the state's size. 539 */ 540 static void check_xstate_against_struct(int nr) 541 { 542 /* 543 * Ask the CPU for the size of the state. 544 */ 545 int sz = xfeature_size(nr); 546 /* 547 * Match each CPU state with the corresponding software 548 * structure. 549 */ 550 XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct); 551 XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state); 552 XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state); 553 XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state); 554 XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state); 555 XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); 556 XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); 557 558 /* 559 * Make *SURE* to add any feature numbers in below if 560 * there are "holes" in the xsave state component 561 * numbers. 562 */ 563 if ((nr < XFEATURE_YMM) || 564 (nr >= XFEATURE_MAX) || 565 (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) { 566 WARN_ONCE(1, "no structure for xstate: %d\n", nr); 567 XSTATE_WARN_ON(1); 568 } 569 } 570 571 /* 572 * This essentially double-checks what the cpu told us about 573 * how large the XSAVE buffer needs to be. We are recalculating 574 * it to be safe. 575 */ 576 static void do_extra_xstate_size_checks(void) 577 { 578 int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; 579 int i; 580 581 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 582 if (!xfeature_enabled(i)) 583 continue; 584 585 check_xstate_against_struct(i); 586 /* 587 * Supervisor state components can be managed only by 588 * XSAVES, which is compacted-format only. 589 */ 590 if (!using_compacted_format()) 591 XSTATE_WARN_ON(xfeature_is_supervisor(i)); 592 593 /* Align from the end of the previous feature */ 594 if (xfeature_is_aligned(i)) 595 paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64); 596 /* 597 * The offset of a given state in the non-compacted 598 * format is given to us in a CPUID leaf. We check 599 * them for being ordered (increasing offsets) in 600 * setup_xstate_features(). 601 */ 602 if (!using_compacted_format()) 603 paranoid_xstate_size = xfeature_uncompacted_offset(i); 604 /* 605 * The compacted-format offset always depends on where 606 * the previous state ended. 607 */ 608 paranoid_xstate_size += xfeature_size(i); 609 } 610 XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); 611 } 612 613 614 /* 615 * Get total size of enabled xstates in XCR0/xfeatures_mask. 616 * 617 * Note the SDM's wording here. "sub-function 0" only enumerates 618 * the size of the *user* states. If we use it to size a buffer 619 * that we use 'XSAVES' on, we could potentially overflow the 620 * buffer because 'XSAVES' saves system states too. 621 * 622 * Note that we do not currently set any bits on IA32_XSS so 623 * 'XCR0 | IA32_XSS == XCR0' for now. 624 */ 625 static unsigned int __init get_xsaves_size(void) 626 { 627 unsigned int eax, ebx, ecx, edx; 628 /* 629 * - CPUID function 0DH, sub-function 1: 630 * EBX enumerates the size (in bytes) required by 631 * the XSAVES instruction for an XSAVE area 632 * containing all the state components 633 * corresponding to bits currently set in 634 * XCR0 | IA32_XSS. 635 */ 636 cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); 637 return ebx; 638 } 639 640 static unsigned int __init get_xsave_size(void) 641 { 642 unsigned int eax, ebx, ecx, edx; 643 /* 644 * - CPUID function 0DH, sub-function 0: 645 * EBX enumerates the size (in bytes) required by 646 * the XSAVE instruction for an XSAVE area 647 * containing all the *user* state components 648 * corresponding to bits currently set in XCR0. 649 */ 650 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); 651 return ebx; 652 } 653 654 /* 655 * Will the runtime-enumerated 'xstate_size' fit in the init 656 * task's statically-allocated buffer? 657 */ 658 static bool is_supported_xstate_size(unsigned int test_xstate_size) 659 { 660 if (test_xstate_size <= sizeof(union fpregs_state)) 661 return true; 662 663 pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", 664 sizeof(union fpregs_state), test_xstate_size); 665 return false; 666 } 667 668 static int __init init_xstate_size(void) 669 { 670 /* Recompute the context size for enabled features: */ 671 unsigned int possible_xstate_size; 672 unsigned int xsave_size; 673 674 xsave_size = get_xsave_size(); 675 676 if (boot_cpu_has(X86_FEATURE_XSAVES)) 677 possible_xstate_size = get_xsaves_size(); 678 else 679 possible_xstate_size = xsave_size; 680 681 /* Ensure we have the space to store all enabled: */ 682 if (!is_supported_xstate_size(possible_xstate_size)) 683 return -EINVAL; 684 685 /* 686 * The size is OK, we are definitely going to use xsave, 687 * make it known to the world that we need more space. 688 */ 689 fpu_kernel_xstate_size = possible_xstate_size; 690 do_extra_xstate_size_checks(); 691 692 /* 693 * User space is always in standard format. 694 */ 695 fpu_user_xstate_size = xsave_size; 696 return 0; 697 } 698 699 /* 700 * We enabled the XSAVE hardware, but something went wrong and 701 * we can not use it. Disable it. 702 */ 703 static void fpu__init_disable_system_xstate(void) 704 { 705 xfeatures_mask = 0; 706 cr4_clear_bits(X86_CR4_OSXSAVE); 707 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 708 } 709 710 /* 711 * Enable and initialize the xsave feature. 712 * Called once per system bootup. 713 */ 714 void __init fpu__init_system_xstate(void) 715 { 716 unsigned int eax, ebx, ecx, edx; 717 static int on_boot_cpu __initdata = 1; 718 int err; 719 int i; 720 721 WARN_ON_FPU(!on_boot_cpu); 722 on_boot_cpu = 0; 723 724 if (!boot_cpu_has(X86_FEATURE_FPU)) { 725 pr_info("x86/fpu: No FPU detected\n"); 726 return; 727 } 728 729 if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 730 pr_info("x86/fpu: x87 FPU will use %s\n", 731 boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); 732 return; 733 } 734 735 if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { 736 WARN_ON_FPU(1); 737 return; 738 } 739 740 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); 741 xfeatures_mask = eax + ((u64)edx << 32); 742 743 if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { 744 /* 745 * This indicates that something really unexpected happened 746 * with the enumeration. Disable XSAVE and try to continue 747 * booting without it. This is too early to BUG(). 748 */ 749 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); 750 goto out_disable; 751 } 752 753 /* 754 * Clear XSAVE features that are disabled in the normal CPUID. 755 */ 756 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { 757 if (!boot_cpu_has(xsave_cpuid_features[i])) 758 xfeatures_mask &= ~BIT(i); 759 } 760 761 xfeatures_mask &= fpu__get_supported_xfeatures_mask(); 762 763 /* Enable xstate instructions to be able to continue with initialization: */ 764 fpu__init_cpu_xstate(); 765 err = init_xstate_size(); 766 if (err) 767 goto out_disable; 768 769 /* 770 * Update info used for ptrace frames; use standard-format size and no 771 * supervisor xstates: 772 */ 773 update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR); 774 775 fpu__init_prepare_fx_sw_frame(); 776 setup_init_fpu_buf(); 777 setup_xstate_comp(); 778 print_xstate_offset_size(); 779 780 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", 781 xfeatures_mask, 782 fpu_kernel_xstate_size, 783 boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); 784 return; 785 786 out_disable: 787 /* something went wrong, try to boot without any XSAVE support */ 788 fpu__init_disable_system_xstate(); 789 } 790 791 /* 792 * Restore minimal FPU state after suspend: 793 */ 794 void fpu__resume_cpu(void) 795 { 796 /* 797 * Restore XCR0 on xsave capable CPUs: 798 */ 799 if (boot_cpu_has(X86_FEATURE_XSAVE)) 800 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); 801 } 802 803 /* 804 * Given an xstate feature nr, calculate where in the xsave 805 * buffer the state is. Callers should ensure that the buffer 806 * is valid. 807 */ 808 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 809 { 810 if (!xfeature_enabled(xfeature_nr)) { 811 WARN_ON_FPU(1); 812 return NULL; 813 } 814 815 return (void *)xsave + xstate_comp_offsets[xfeature_nr]; 816 } 817 /* 818 * Given the xsave area and a state inside, this function returns the 819 * address of the state. 820 * 821 * This is the API that is called to get xstate address in either 822 * standard format or compacted format of xsave area. 823 * 824 * Note that if there is no data for the field in the xsave buffer 825 * this will return NULL. 826 * 827 * Inputs: 828 * xstate: the thread's storage area for all FPU data 829 * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, 830 * XFEATURE_SSE, etc...) 831 * Output: 832 * address of the state in the xsave area, or NULL if the 833 * field is not present in the xsave buffer. 834 */ 835 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 836 { 837 /* 838 * Do we even *have* xsave state? 839 */ 840 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 841 return NULL; 842 843 /* 844 * We should not ever be requesting features that we 845 * have not enabled. Remember that xfeatures_mask is 846 * what we write to the XCR0 register. 847 */ 848 WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), 849 "get of unsupported state"); 850 /* 851 * This assumes the last 'xsave*' instruction to 852 * have requested that 'xfeature_nr' be saved. 853 * If it did not, we might be seeing and old value 854 * of the field in the buffer. 855 * 856 * This can happen because the last 'xsave' did not 857 * request that this feature be saved (unlikely) 858 * or because the "init optimization" caused it 859 * to not be saved. 860 */ 861 if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) 862 return NULL; 863 864 return __raw_xsave_addr(xsave, xfeature_nr); 865 } 866 EXPORT_SYMBOL_GPL(get_xsave_addr); 867 868 /* 869 * This wraps up the common operations that need to occur when retrieving 870 * data from xsave state. It first ensures that the current task was 871 * using the FPU and retrieves the data in to a buffer. It then calculates 872 * the offset of the requested field in the buffer. 873 * 874 * This function is safe to call whether the FPU is in use or not. 875 * 876 * Note that this only works on the current task. 877 * 878 * Inputs: 879 * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, 880 * XFEATURE_SSE, etc...) 881 * Output: 882 * address of the state in the xsave area or NULL if the state 883 * is not present or is in its 'init state'. 884 */ 885 const void *get_xsave_field_ptr(int xfeature_nr) 886 { 887 struct fpu *fpu = ¤t->thread.fpu; 888 889 /* 890 * fpu__save() takes the CPU's xstate registers 891 * and saves them off to the 'fpu memory buffer. 892 */ 893 fpu__save(fpu); 894 895 return get_xsave_addr(&fpu->state.xsave, xfeature_nr); 896 } 897 898 #ifdef CONFIG_ARCH_HAS_PKEYS 899 900 #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) 901 #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) 902 /* 903 * This will go out and modify PKRU register to set the access 904 * rights for @pkey to @init_val. 905 */ 906 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 907 unsigned long init_val) 908 { 909 u32 old_pkru; 910 int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); 911 u32 new_pkru_bits = 0; 912 913 /* 914 * This check implies XSAVE support. OSPKE only gets 915 * set if we enable XSAVE and we enable PKU in XCR0. 916 */ 917 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 918 return -EINVAL; 919 920 /* Set the bits we need in PKRU: */ 921 if (init_val & PKEY_DISABLE_ACCESS) 922 new_pkru_bits |= PKRU_AD_BIT; 923 if (init_val & PKEY_DISABLE_WRITE) 924 new_pkru_bits |= PKRU_WD_BIT; 925 926 /* Shift the bits in to the correct place in PKRU for pkey: */ 927 new_pkru_bits <<= pkey_shift; 928 929 /* Get old PKRU and mask off any old bits in place: */ 930 old_pkru = read_pkru(); 931 old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); 932 933 /* Write old part along with new part: */ 934 write_pkru(old_pkru | new_pkru_bits); 935 936 return 0; 937 } 938 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ 939 940 /* 941 * Weird legacy quirk: SSE and YMM states store information in the 942 * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP 943 * area is marked as unused in the xfeatures header, we need to copy 944 * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use. 945 */ 946 static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) 947 { 948 if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM))) 949 return false; 950 951 if (xfeatures & XFEATURE_MASK_FP) 952 return false; 953 954 return true; 955 } 956 957 /* 958 * This is similar to user_regset_copyout(), but will not add offset to 959 * the source data pointer or increment pos, count, kbuf, and ubuf. 960 */ 961 static inline void 962 __copy_xstate_to_kernel(void *kbuf, const void *data, 963 unsigned int offset, unsigned int size, unsigned int size_total) 964 { 965 if (offset < size_total) { 966 unsigned int copy = min(size, size_total - offset); 967 968 memcpy(kbuf + offset, data, copy); 969 } 970 } 971 972 /* 973 * Convert from kernel XSAVES compacted format to standard format and copy 974 * to a kernel-space ptrace buffer. 975 * 976 * It supports partial copy but pos always starts from zero. This is called 977 * from xstateregs_get() and there we check the CPU has XSAVES. 978 */ 979 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) 980 { 981 unsigned int offset, size; 982 struct xstate_header header; 983 int i; 984 985 /* 986 * Currently copy_regset_to_user() starts from pos 0: 987 */ 988 if (unlikely(offset_start != 0)) 989 return -EFAULT; 990 991 /* 992 * The destination is a ptrace buffer; we put in only user xstates: 993 */ 994 memset(&header, 0, sizeof(header)); 995 header.xfeatures = xsave->header.xfeatures; 996 header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; 997 998 /* 999 * Copy xregs_state->header: 1000 */ 1001 offset = offsetof(struct xregs_state, header); 1002 size = sizeof(header); 1003 1004 __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); 1005 1006 for (i = 0; i < XFEATURE_MAX; i++) { 1007 /* 1008 * Copy only in-use xstates: 1009 */ 1010 if ((header.xfeatures >> i) & 1) { 1011 void *src = __raw_xsave_addr(xsave, i); 1012 1013 offset = xstate_offsets[i]; 1014 size = xstate_sizes[i]; 1015 1016 /* The next component has to fit fully into the output buffer: */ 1017 if (offset + size > size_total) 1018 break; 1019 1020 __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); 1021 } 1022 1023 } 1024 1025 if (xfeatures_mxcsr_quirk(header.xfeatures)) { 1026 offset = offsetof(struct fxregs_state, mxcsr); 1027 size = MXCSR_AND_FLAGS_SIZE; 1028 __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); 1029 } 1030 1031 /* 1032 * Fill xsave->i387.sw_reserved value for ptrace frame: 1033 */ 1034 offset = offsetof(struct fxregs_state, sw_reserved); 1035 size = sizeof(xstate_fx_sw_bytes); 1036 1037 __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); 1038 1039 return 0; 1040 } 1041 1042 static inline int 1043 __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total) 1044 { 1045 if (!size) 1046 return 0; 1047 1048 if (offset < size_total) { 1049 unsigned int copy = min(size, size_total - offset); 1050 1051 if (__copy_to_user(ubuf + offset, data, copy)) 1052 return -EFAULT; 1053 } 1054 return 0; 1055 } 1056 1057 /* 1058 * Convert from kernel XSAVES compacted format to standard format and copy 1059 * to a user-space buffer. It supports partial copy but pos always starts from 1060 * zero. This is called from xstateregs_get() and there we check the CPU 1061 * has XSAVES. 1062 */ 1063 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) 1064 { 1065 unsigned int offset, size; 1066 int ret, i; 1067 struct xstate_header header; 1068 1069 /* 1070 * Currently copy_regset_to_user() starts from pos 0: 1071 */ 1072 if (unlikely(offset_start != 0)) 1073 return -EFAULT; 1074 1075 /* 1076 * The destination is a ptrace buffer; we put in only user xstates: 1077 */ 1078 memset(&header, 0, sizeof(header)); 1079 header.xfeatures = xsave->header.xfeatures; 1080 header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; 1081 1082 /* 1083 * Copy xregs_state->header: 1084 */ 1085 offset = offsetof(struct xregs_state, header); 1086 size = sizeof(header); 1087 1088 ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total); 1089 if (ret) 1090 return ret; 1091 1092 for (i = 0; i < XFEATURE_MAX; i++) { 1093 /* 1094 * Copy only in-use xstates: 1095 */ 1096 if ((header.xfeatures >> i) & 1) { 1097 void *src = __raw_xsave_addr(xsave, i); 1098 1099 offset = xstate_offsets[i]; 1100 size = xstate_sizes[i]; 1101 1102 /* The next component has to fit fully into the output buffer: */ 1103 if (offset + size > size_total) 1104 break; 1105 1106 ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total); 1107 if (ret) 1108 return ret; 1109 } 1110 1111 } 1112 1113 if (xfeatures_mxcsr_quirk(header.xfeatures)) { 1114 offset = offsetof(struct fxregs_state, mxcsr); 1115 size = MXCSR_AND_FLAGS_SIZE; 1116 __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total); 1117 } 1118 1119 /* 1120 * Fill xsave->i387.sw_reserved value for ptrace frame: 1121 */ 1122 offset = offsetof(struct fxregs_state, sw_reserved); 1123 size = sizeof(xstate_fx_sw_bytes); 1124 1125 ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total); 1126 if (ret) 1127 return ret; 1128 1129 return 0; 1130 } 1131 1132 /* 1133 * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format 1134 * and copy to the target thread. This is called from xstateregs_set(). 1135 */ 1136 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) 1137 { 1138 unsigned int offset, size; 1139 int i; 1140 struct xstate_header hdr; 1141 1142 offset = offsetof(struct xregs_state, header); 1143 size = sizeof(hdr); 1144 1145 memcpy(&hdr, kbuf + offset, size); 1146 1147 if (validate_xstate_header(&hdr)) 1148 return -EINVAL; 1149 1150 for (i = 0; i < XFEATURE_MAX; i++) { 1151 u64 mask = ((u64)1 << i); 1152 1153 if (hdr.xfeatures & mask) { 1154 void *dst = __raw_xsave_addr(xsave, i); 1155 1156 offset = xstate_offsets[i]; 1157 size = xstate_sizes[i]; 1158 1159 memcpy(dst, kbuf + offset, size); 1160 } 1161 } 1162 1163 if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { 1164 offset = offsetof(struct fxregs_state, mxcsr); 1165 size = MXCSR_AND_FLAGS_SIZE; 1166 memcpy(&xsave->i387.mxcsr, kbuf + offset, size); 1167 } 1168 1169 /* 1170 * The state that came in from userspace was user-state only. 1171 * Mask all the user states out of 'xfeatures': 1172 */ 1173 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; 1174 1175 /* 1176 * Add back in the features that came in from userspace: 1177 */ 1178 xsave->header.xfeatures |= hdr.xfeatures; 1179 1180 return 0; 1181 } 1182 1183 /* 1184 * Convert from a ptrace or sigreturn standard-format user-space buffer to 1185 * kernel XSAVES format and copy to the target thread. This is called from 1186 * xstateregs_set(), as well as potentially from the sigreturn() and 1187 * rt_sigreturn() system calls. 1188 */ 1189 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) 1190 { 1191 unsigned int offset, size; 1192 int i; 1193 struct xstate_header hdr; 1194 1195 offset = offsetof(struct xregs_state, header); 1196 size = sizeof(hdr); 1197 1198 if (__copy_from_user(&hdr, ubuf + offset, size)) 1199 return -EFAULT; 1200 1201 if (validate_xstate_header(&hdr)) 1202 return -EINVAL; 1203 1204 for (i = 0; i < XFEATURE_MAX; i++) { 1205 u64 mask = ((u64)1 << i); 1206 1207 if (hdr.xfeatures & mask) { 1208 void *dst = __raw_xsave_addr(xsave, i); 1209 1210 offset = xstate_offsets[i]; 1211 size = xstate_sizes[i]; 1212 1213 if (__copy_from_user(dst, ubuf + offset, size)) 1214 return -EFAULT; 1215 } 1216 } 1217 1218 if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { 1219 offset = offsetof(struct fxregs_state, mxcsr); 1220 size = MXCSR_AND_FLAGS_SIZE; 1221 if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size)) 1222 return -EFAULT; 1223 } 1224 1225 /* 1226 * The state that came in from userspace was user-state only. 1227 * Mask all the user states out of 'xfeatures': 1228 */ 1229 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; 1230 1231 /* 1232 * Add back in the features that came in from userspace: 1233 */ 1234 xsave->header.xfeatures |= hdr.xfeatures; 1235 1236 return 0; 1237 } 1238 1239 #ifdef CONFIG_PROC_PID_ARCH_STATUS 1240 /* 1241 * Report the amount of time elapsed in millisecond since last AVX512 1242 * use in the task. 1243 */ 1244 static void avx512_status(struct seq_file *m, struct task_struct *task) 1245 { 1246 unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp); 1247 long delta; 1248 1249 if (!timestamp) { 1250 /* 1251 * Report -1 if no AVX512 usage 1252 */ 1253 delta = -1; 1254 } else { 1255 delta = (long)(jiffies - timestamp); 1256 /* 1257 * Cap to LONG_MAX if time difference > LONG_MAX 1258 */ 1259 if (delta < 0) 1260 delta = LONG_MAX; 1261 delta = jiffies_to_msecs(delta); 1262 } 1263 1264 seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); 1265 seq_putc(m, '\n'); 1266 } 1267 1268 /* 1269 * Report architecture specific information 1270 */ 1271 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, 1272 struct pid *pid, struct task_struct *task) 1273 { 1274 /* 1275 * Report AVX512 state if the processor and build option supported. 1276 */ 1277 if (cpu_feature_enabled(X86_FEATURE_AVX512F)) 1278 avx512_status(m, task); 1279 1280 return 0; 1281 } 1282 #endif /* CONFIG_PROC_PID_ARCH_STATUS */ 1283