1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * AMD Encrypted Register State Support 4 * 5 * Author: Joerg Roedel <jroedel@suse.de> 6 * 7 * This file is not compiled stand-alone. It contains code shared 8 * between the pre-decompression boot code and the running Linux kernel 9 * and is included directly into both code-bases. 10 */ 11 12 #ifndef __BOOT_COMPRESSED 13 #define error(v) pr_err(v) 14 #define has_cpuflag(f) boot_cpu_has(f) 15 #endif 16 17 /* I/O parameters for CPUID-related helpers */ 18 struct cpuid_leaf { 19 u32 fn; 20 u32 subfn; 21 u32 eax; 22 u32 ebx; 23 u32 ecx; 24 u32 edx; 25 }; 26 27 /* 28 * Individual entries of the SNP CPUID table, as defined by the SNP 29 * Firmware ABI, Revision 0.9, Section 7.1, Table 14. 30 */ 31 struct snp_cpuid_fn { 32 u32 eax_in; 33 u32 ecx_in; 34 u64 xcr0_in; 35 u64 xss_in; 36 u32 eax; 37 u32 ebx; 38 u32 ecx; 39 u32 edx; 40 u64 __reserved; 41 } __packed; 42 43 /* 44 * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9, 45 * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit 46 * of 64 entries per CPUID table. 47 */ 48 #define SNP_CPUID_COUNT_MAX 64 49 50 struct snp_cpuid_table { 51 u32 count; 52 u32 __reserved1; 53 u64 __reserved2; 54 struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX]; 55 } __packed; 56 57 /* 58 * Since feature negotiation related variables are set early in the boot 59 * process they must reside in the .data section so as not to be zeroed 60 * out when the .bss section is later cleared. 61 * 62 * GHCB protocol version negotiated with the hypervisor. 63 */ 64 static u16 ghcb_version __ro_after_init; 65 66 /* Copy of the SNP firmware's CPUID page. */ 67 static struct snp_cpuid_table cpuid_table_copy __ro_after_init; 68 69 /* 70 * These will be initialized based on CPUID table so that non-present 71 * all-zero leaves (for sparse tables) can be differentiated from 72 * invalid/out-of-range leaves. This is needed since all-zero leaves 73 * still need to be post-processed. 74 */ 75 static u32 cpuid_std_range_max __ro_after_init; 76 static u32 cpuid_hyp_range_max __ro_after_init; 77 static u32 cpuid_ext_range_max __ro_after_init; 78 79 static bool __init sev_es_check_cpu_features(void) 80 { 81 if (!has_cpuflag(X86_FEATURE_RDRAND)) { 82 error("RDRAND instruction not supported - no trusted source of randomness available\n"); 83 return false; 84 } 85 86 return true; 87 } 88 89 static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) 90 { 91 u64 val = GHCB_MSR_TERM_REQ; 92 93 /* Tell the hypervisor what went wrong. */ 94 val |= GHCB_SEV_TERM_REASON(set, reason); 95 96 /* Request Guest Termination from Hypvervisor */ 97 sev_es_wr_ghcb_msr(val); 98 VMGEXIT(); 99 100 while (true) 101 asm volatile("hlt\n" : : : "memory"); 102 } 103 104 /* 105 * The hypervisor features are available from GHCB version 2 onward. 106 */ 107 static u64 get_hv_features(void) 108 { 109 u64 val; 110 111 if (ghcb_version < 2) 112 return 0; 113 114 sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ); 115 VMGEXIT(); 116 117 val = sev_es_rd_ghcb_msr(); 118 if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP) 119 return 0; 120 121 return GHCB_MSR_HV_FT_RESP_VAL(val); 122 } 123 124 static void snp_register_ghcb_early(unsigned long paddr) 125 { 126 unsigned long pfn = paddr >> PAGE_SHIFT; 127 u64 val; 128 129 sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); 130 VMGEXIT(); 131 132 val = sev_es_rd_ghcb_msr(); 133 134 /* If the response GPA is not ours then abort the guest */ 135 if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || 136 (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) 137 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); 138 } 139 140 static bool sev_es_negotiate_protocol(void) 141 { 142 u64 val; 143 144 /* Do the GHCB protocol version negotiation */ 145 sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); 146 VMGEXIT(); 147 val = sev_es_rd_ghcb_msr(); 148 149 if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) 150 return false; 151 152 if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || 153 GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) 154 return false; 155 156 ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); 157 158 return true; 159 } 160 161 static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) 162 { 163 ghcb->save.sw_exit_code = 0; 164 __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 165 } 166 167 static bool vc_decoding_needed(unsigned long exit_code) 168 { 169 /* Exceptions don't require to decode the instruction */ 170 return !(exit_code >= SVM_EXIT_EXCP_BASE && 171 exit_code <= SVM_EXIT_LAST_EXCP); 172 } 173 174 static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, 175 struct pt_regs *regs, 176 unsigned long exit_code) 177 { 178 enum es_result ret = ES_OK; 179 180 memset(ctxt, 0, sizeof(*ctxt)); 181 ctxt->regs = regs; 182 183 if (vc_decoding_needed(exit_code)) 184 ret = vc_decode_insn(ctxt); 185 186 return ret; 187 } 188 189 static void vc_finish_insn(struct es_em_ctxt *ctxt) 190 { 191 ctxt->regs->ip += ctxt->insn.length; 192 } 193 194 static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 195 { 196 u32 ret; 197 198 ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); 199 if (!ret) 200 return ES_OK; 201 202 if (ret == 1) { 203 u64 info = ghcb->save.sw_exit_info_2; 204 unsigned long v = info & SVM_EVTINJ_VEC_MASK; 205 206 /* Check if exception information from hypervisor is sane. */ 207 if ((info & SVM_EVTINJ_VALID) && 208 ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && 209 ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { 210 ctxt->fi.vector = v; 211 212 if (info & SVM_EVTINJ_VALID_ERR) 213 ctxt->fi.error_code = info >> 32; 214 215 return ES_EXCEPTION; 216 } 217 } 218 219 return ES_VMM_ERROR; 220 } 221 222 enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr, 223 struct es_em_ctxt *ctxt, u64 exit_code, 224 u64 exit_info_1, u64 exit_info_2) 225 { 226 /* Fill in protocol and format specifiers */ 227 ghcb->protocol_version = ghcb_version; 228 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; 229 230 ghcb_set_sw_exit_code(ghcb, exit_code); 231 ghcb_set_sw_exit_info_1(ghcb, exit_info_1); 232 ghcb_set_sw_exit_info_2(ghcb, exit_info_2); 233 234 /* 235 * Hyper-V unenlightened guests use a paravisor for communicating and 236 * GHCB pages are being allocated and set up by that paravisor. Linux 237 * should not change the GHCB page's physical address. 238 */ 239 if (set_ghcb_msr) 240 sev_es_wr_ghcb_msr(__pa(ghcb)); 241 242 VMGEXIT(); 243 244 return verify_exception_info(ghcb, ctxt); 245 } 246 247 static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) 248 { 249 u64 val; 250 251 sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx)); 252 VMGEXIT(); 253 val = sev_es_rd_ghcb_msr(); 254 if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) 255 return -EIO; 256 257 *reg = (val >> 32); 258 259 return 0; 260 } 261 262 static int sev_cpuid_hv(struct cpuid_leaf *leaf) 263 { 264 int ret; 265 266 /* 267 * MSR protocol does not support fetching non-zero subfunctions, but is 268 * sufficient to handle current early-boot cases. Should that change, 269 * make sure to report an error rather than ignoring the index and 270 * grabbing random values. If this issue arises in the future, handling 271 * can be added here to use GHCB-page protocol for cases that occur late 272 * enough in boot that GHCB page is available. 273 */ 274 if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn) 275 return -EINVAL; 276 277 ret = __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax); 278 ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx); 279 ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx); 280 ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx); 281 282 return ret; 283 } 284 285 /* 286 * This may be called early while still running on the initial identity 287 * mapping. Use RIP-relative addressing to obtain the correct address 288 * while running with the initial identity mapping as well as the 289 * switch-over to kernel virtual addresses later. 290 */ 291 static const struct snp_cpuid_table *snp_cpuid_get_table(void) 292 { 293 void *ptr; 294 295 asm ("lea cpuid_table_copy(%%rip), %0" 296 : "=r" (ptr) 297 : "p" (&cpuid_table_copy)); 298 299 return ptr; 300 } 301 302 /* 303 * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of 304 * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0 305 * and 1 based on the corresponding features enabled by a particular 306 * combination of XCR0 and XSS registers so that a guest can look up the 307 * version corresponding to the features currently enabled in its XCR0/XSS 308 * registers. The only values that differ between these versions/table 309 * entries is the enabled XSAVE area size advertised via EBX. 310 * 311 * While hypervisors may choose to make use of this support, it is more 312 * robust/secure for a guest to simply find the entry corresponding to the 313 * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the 314 * XSAVE area size using subfunctions 2 through 64, as documented in APM 315 * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here. 316 * 317 * Since base/legacy XSAVE area size is documented as 0x240, use that value 318 * directly rather than relying on the base size in the CPUID table. 319 * 320 * Return: XSAVE area size on success, 0 otherwise. 321 */ 322 static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) 323 { 324 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 325 u64 xfeatures_found = 0; 326 u32 xsave_size = 0x240; 327 int i; 328 329 for (i = 0; i < cpuid_table->count; i++) { 330 const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; 331 332 if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64)) 333 continue; 334 if (!(xfeatures_en & (BIT_ULL(e->ecx_in)))) 335 continue; 336 if (xfeatures_found & (BIT_ULL(e->ecx_in))) 337 continue; 338 339 xfeatures_found |= (BIT_ULL(e->ecx_in)); 340 341 if (compacted) 342 xsave_size += e->eax; 343 else 344 xsave_size = max(xsave_size, e->eax + e->ebx); 345 } 346 347 /* 348 * Either the guest set unsupported XCR0/XSS bits, or the corresponding 349 * entries in the CPUID table were not present. This is not a valid 350 * state to be in. 351 */ 352 if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2))) 353 return 0; 354 355 return xsave_size; 356 } 357 358 static bool 359 snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) 360 { 361 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 362 int i; 363 364 for (i = 0; i < cpuid_table->count; i++) { 365 const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; 366 367 if (e->eax_in != leaf->fn) 368 continue; 369 370 if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn) 371 continue; 372 373 /* 374 * For 0xD subfunctions 0 and 1, only use the entry corresponding 375 * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0). 376 * See the comments above snp_cpuid_calc_xsave_size() for more 377 * details. 378 */ 379 if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1)) 380 if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in) 381 continue; 382 383 leaf->eax = e->eax; 384 leaf->ebx = e->ebx; 385 leaf->ecx = e->ecx; 386 leaf->edx = e->edx; 387 388 return true; 389 } 390 391 return false; 392 } 393 394 static void snp_cpuid_hv(struct cpuid_leaf *leaf) 395 { 396 if (sev_cpuid_hv(leaf)) 397 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); 398 } 399 400 static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) 401 { 402 struct cpuid_leaf leaf_hv = *leaf; 403 404 switch (leaf->fn) { 405 case 0x1: 406 snp_cpuid_hv(&leaf_hv); 407 408 /* initial APIC ID */ 409 leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); 410 /* APIC enabled bit */ 411 leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9)); 412 413 /* OSXSAVE enabled bit */ 414 if (native_read_cr4() & X86_CR4_OSXSAVE) 415 leaf->ecx |= BIT(27); 416 break; 417 case 0x7: 418 /* OSPKE enabled bit */ 419 leaf->ecx &= ~BIT(4); 420 if (native_read_cr4() & X86_CR4_PKE) 421 leaf->ecx |= BIT(4); 422 break; 423 case 0xB: 424 leaf_hv.subfn = 0; 425 snp_cpuid_hv(&leaf_hv); 426 427 /* extended APIC ID */ 428 leaf->edx = leaf_hv.edx; 429 break; 430 case 0xD: { 431 bool compacted = false; 432 u64 xcr0 = 1, xss = 0; 433 u32 xsave_size; 434 435 if (leaf->subfn != 0 && leaf->subfn != 1) 436 return 0; 437 438 if (native_read_cr4() & X86_CR4_OSXSAVE) 439 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 440 if (leaf->subfn == 1) { 441 /* Get XSS value if XSAVES is enabled. */ 442 if (leaf->eax & BIT(3)) { 443 unsigned long lo, hi; 444 445 asm volatile("rdmsr" : "=a" (lo), "=d" (hi) 446 : "c" (MSR_IA32_XSS)); 447 xss = (hi << 32) | lo; 448 } 449 450 /* 451 * The PPR and APM aren't clear on what size should be 452 * encoded in 0xD:0x1:EBX when compaction is not enabled 453 * by either XSAVEC (feature bit 1) or XSAVES (feature 454 * bit 3) since SNP-capable hardware has these feature 455 * bits fixed as 1. KVM sets it to 0 in this case, but 456 * to avoid this becoming an issue it's safer to simply 457 * treat this as unsupported for SNP guests. 458 */ 459 if (!(leaf->eax & (BIT(1) | BIT(3)))) 460 return -EINVAL; 461 462 compacted = true; 463 } 464 465 xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted); 466 if (!xsave_size) 467 return -EINVAL; 468 469 leaf->ebx = xsave_size; 470 } 471 break; 472 case 0x8000001E: 473 snp_cpuid_hv(&leaf_hv); 474 475 /* extended APIC ID */ 476 leaf->eax = leaf_hv.eax; 477 /* compute ID */ 478 leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0)); 479 /* node ID */ 480 leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0)); 481 break; 482 default: 483 /* No fix-ups needed, use values as-is. */ 484 break; 485 } 486 487 return 0; 488 } 489 490 /* 491 * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value 492 * should be treated as fatal by caller. 493 */ 494 static int snp_cpuid(struct cpuid_leaf *leaf) 495 { 496 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 497 498 if (!cpuid_table->count) 499 return -EOPNOTSUPP; 500 501 if (!snp_cpuid_get_validated_func(leaf)) { 502 /* 503 * Some hypervisors will avoid keeping track of CPUID entries 504 * where all values are zero, since they can be handled the 505 * same as out-of-range values (all-zero). This is useful here 506 * as well as it allows virtually all guest configurations to 507 * work using a single SNP CPUID table. 508 * 509 * To allow for this, there is a need to distinguish between 510 * out-of-range entries and in-range zero entries, since the 511 * CPUID table entries are only a template that may need to be 512 * augmented with additional values for things like 513 * CPU-specific information during post-processing. So if it's 514 * not in the table, set the values to zero. Then, if they are 515 * within a valid CPUID range, proceed with post-processing 516 * using zeros as the initial values. Otherwise, skip 517 * post-processing and just return zeros immediately. 518 */ 519 leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; 520 521 /* Skip post-processing for out-of-range zero leafs. */ 522 if (!(leaf->fn <= cpuid_std_range_max || 523 (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || 524 (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) 525 return 0; 526 } 527 528 return snp_cpuid_postprocess(leaf); 529 } 530 531 /* 532 * Boot VC Handler - This is the first VC handler during boot, there is no GHCB 533 * page yet, so it only supports the MSR based communication with the 534 * hypervisor and only the CPUID exit-code. 535 */ 536 void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) 537 { 538 unsigned int subfn = lower_bits(regs->cx, 32); 539 unsigned int fn = lower_bits(regs->ax, 32); 540 struct cpuid_leaf leaf; 541 int ret; 542 543 /* Only CPUID is supported via MSR protocol */ 544 if (exit_code != SVM_EXIT_CPUID) 545 goto fail; 546 547 leaf.fn = fn; 548 leaf.subfn = subfn; 549 550 ret = snp_cpuid(&leaf); 551 if (!ret) 552 goto cpuid_done; 553 554 if (ret != -EOPNOTSUPP) 555 goto fail; 556 557 if (sev_cpuid_hv(&leaf)) 558 goto fail; 559 560 cpuid_done: 561 regs->ax = leaf.eax; 562 regs->bx = leaf.ebx; 563 regs->cx = leaf.ecx; 564 regs->dx = leaf.edx; 565 566 /* 567 * This is a VC handler and the #VC is only raised when SEV-ES is 568 * active, which means SEV must be active too. Do sanity checks on the 569 * CPUID results to make sure the hypervisor does not trick the kernel 570 * into the no-sev path. This could map sensitive data unencrypted and 571 * make it accessible to the hypervisor. 572 * 573 * In particular, check for: 574 * - Availability of CPUID leaf 0x8000001f 575 * - SEV CPUID bit. 576 * 577 * The hypervisor might still report the wrong C-bit position, but this 578 * can't be checked here. 579 */ 580 581 if (fn == 0x80000000 && (regs->ax < 0x8000001f)) 582 /* SEV leaf check */ 583 goto fail; 584 else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) 585 /* SEV bit */ 586 goto fail; 587 588 /* Skip over the CPUID two-byte opcode */ 589 regs->ip += 2; 590 591 return; 592 593 fail: 594 /* Terminate the guest */ 595 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 596 } 597 598 static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, 599 void *src, char *buf, 600 unsigned int data_size, 601 unsigned int count, 602 bool backwards) 603 { 604 int i, b = backwards ? -1 : 1; 605 enum es_result ret = ES_OK; 606 607 for (i = 0; i < count; i++) { 608 void *s = src + (i * data_size * b); 609 char *d = buf + (i * data_size); 610 611 ret = vc_read_mem(ctxt, s, d, data_size); 612 if (ret != ES_OK) 613 break; 614 } 615 616 return ret; 617 } 618 619 static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, 620 void *dst, char *buf, 621 unsigned int data_size, 622 unsigned int count, 623 bool backwards) 624 { 625 int i, s = backwards ? -1 : 1; 626 enum es_result ret = ES_OK; 627 628 for (i = 0; i < count; i++) { 629 void *d = dst + (i * data_size * s); 630 char *b = buf + (i * data_size); 631 632 ret = vc_write_mem(ctxt, d, b, data_size); 633 if (ret != ES_OK) 634 break; 635 } 636 637 return ret; 638 } 639 640 #define IOIO_TYPE_STR BIT(2) 641 #define IOIO_TYPE_IN 1 642 #define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) 643 #define IOIO_TYPE_OUT 0 644 #define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) 645 646 #define IOIO_REP BIT(3) 647 648 #define IOIO_ADDR_64 BIT(9) 649 #define IOIO_ADDR_32 BIT(8) 650 #define IOIO_ADDR_16 BIT(7) 651 652 #define IOIO_DATA_32 BIT(6) 653 #define IOIO_DATA_16 BIT(5) 654 #define IOIO_DATA_8 BIT(4) 655 656 #define IOIO_SEG_ES (0 << 10) 657 #define IOIO_SEG_DS (3 << 10) 658 659 static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) 660 { 661 struct insn *insn = &ctxt->insn; 662 *exitinfo = 0; 663 664 switch (insn->opcode.bytes[0]) { 665 /* INS opcodes */ 666 case 0x6c: 667 case 0x6d: 668 *exitinfo |= IOIO_TYPE_INS; 669 *exitinfo |= IOIO_SEG_ES; 670 *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; 671 break; 672 673 /* OUTS opcodes */ 674 case 0x6e: 675 case 0x6f: 676 *exitinfo |= IOIO_TYPE_OUTS; 677 *exitinfo |= IOIO_SEG_DS; 678 *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; 679 break; 680 681 /* IN immediate opcodes */ 682 case 0xe4: 683 case 0xe5: 684 *exitinfo |= IOIO_TYPE_IN; 685 *exitinfo |= (u8)insn->immediate.value << 16; 686 break; 687 688 /* OUT immediate opcodes */ 689 case 0xe6: 690 case 0xe7: 691 *exitinfo |= IOIO_TYPE_OUT; 692 *exitinfo |= (u8)insn->immediate.value << 16; 693 break; 694 695 /* IN register opcodes */ 696 case 0xec: 697 case 0xed: 698 *exitinfo |= IOIO_TYPE_IN; 699 *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; 700 break; 701 702 /* OUT register opcodes */ 703 case 0xee: 704 case 0xef: 705 *exitinfo |= IOIO_TYPE_OUT; 706 *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; 707 break; 708 709 default: 710 return ES_DECODE_FAILED; 711 } 712 713 switch (insn->opcode.bytes[0]) { 714 case 0x6c: 715 case 0x6e: 716 case 0xe4: 717 case 0xe6: 718 case 0xec: 719 case 0xee: 720 /* Single byte opcodes */ 721 *exitinfo |= IOIO_DATA_8; 722 break; 723 default: 724 /* Length determined by instruction parsing */ 725 *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 726 : IOIO_DATA_32; 727 } 728 switch (insn->addr_bytes) { 729 case 2: 730 *exitinfo |= IOIO_ADDR_16; 731 break; 732 case 4: 733 *exitinfo |= IOIO_ADDR_32; 734 break; 735 case 8: 736 *exitinfo |= IOIO_ADDR_64; 737 break; 738 } 739 740 if (insn_has_rep_prefix(insn)) 741 *exitinfo |= IOIO_REP; 742 743 return ES_OK; 744 } 745 746 static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 747 { 748 struct pt_regs *regs = ctxt->regs; 749 u64 exit_info_1, exit_info_2; 750 enum es_result ret; 751 752 ret = vc_ioio_exitinfo(ctxt, &exit_info_1); 753 if (ret != ES_OK) 754 return ret; 755 756 if (exit_info_1 & IOIO_TYPE_STR) { 757 758 /* (REP) INS/OUTS */ 759 760 bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); 761 unsigned int io_bytes, exit_bytes; 762 unsigned int ghcb_count, op_count; 763 unsigned long es_base; 764 u64 sw_scratch; 765 766 /* 767 * For the string variants with rep prefix the amount of in/out 768 * operations per #VC exception is limited so that the kernel 769 * has a chance to take interrupts and re-schedule while the 770 * instruction is emulated. 771 */ 772 io_bytes = (exit_info_1 >> 4) & 0x7; 773 ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; 774 775 op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; 776 exit_info_2 = min(op_count, ghcb_count); 777 exit_bytes = exit_info_2 * io_bytes; 778 779 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 780 781 /* Read bytes of OUTS into the shared buffer */ 782 if (!(exit_info_1 & IOIO_TYPE_IN)) { 783 ret = vc_insn_string_read(ctxt, 784 (void *)(es_base + regs->si), 785 ghcb->shared_buffer, io_bytes, 786 exit_info_2, df); 787 if (ret) 788 return ret; 789 } 790 791 /* 792 * Issue an VMGEXIT to the HV to consume the bytes from the 793 * shared buffer or to have it write them into the shared buffer 794 * depending on the instruction: OUTS or INS. 795 */ 796 sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); 797 ghcb_set_sw_scratch(ghcb, sw_scratch); 798 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO, 799 exit_info_1, exit_info_2); 800 if (ret != ES_OK) 801 return ret; 802 803 /* Read bytes from shared buffer into the guest's destination. */ 804 if (exit_info_1 & IOIO_TYPE_IN) { 805 ret = vc_insn_string_write(ctxt, 806 (void *)(es_base + regs->di), 807 ghcb->shared_buffer, io_bytes, 808 exit_info_2, df); 809 if (ret) 810 return ret; 811 812 if (df) 813 regs->di -= exit_bytes; 814 else 815 regs->di += exit_bytes; 816 } else { 817 if (df) 818 regs->si -= exit_bytes; 819 else 820 regs->si += exit_bytes; 821 } 822 823 if (exit_info_1 & IOIO_REP) 824 regs->cx -= exit_info_2; 825 826 ret = regs->cx ? ES_RETRY : ES_OK; 827 828 } else { 829 830 /* IN/OUT into/from rAX */ 831 832 int bits = (exit_info_1 & 0x70) >> 1; 833 u64 rax = 0; 834 835 if (!(exit_info_1 & IOIO_TYPE_IN)) 836 rax = lower_bits(regs->ax, bits); 837 838 ghcb_set_rax(ghcb, rax); 839 840 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, 841 SVM_EXIT_IOIO, exit_info_1, 0); 842 if (ret != ES_OK) 843 return ret; 844 845 if (exit_info_1 & IOIO_TYPE_IN) { 846 if (!ghcb_rax_is_valid(ghcb)) 847 return ES_VMM_ERROR; 848 regs->ax = lower_bits(ghcb->save.rax, bits); 849 } 850 } 851 852 return ret; 853 } 854 855 static int vc_handle_cpuid_snp(struct pt_regs *regs) 856 { 857 struct cpuid_leaf leaf; 858 int ret; 859 860 leaf.fn = regs->ax; 861 leaf.subfn = regs->cx; 862 ret = snp_cpuid(&leaf); 863 if (!ret) { 864 regs->ax = leaf.eax; 865 regs->bx = leaf.ebx; 866 regs->cx = leaf.ecx; 867 regs->dx = leaf.edx; 868 } 869 870 return ret; 871 } 872 873 static enum es_result vc_handle_cpuid(struct ghcb *ghcb, 874 struct es_em_ctxt *ctxt) 875 { 876 struct pt_regs *regs = ctxt->regs; 877 u32 cr4 = native_read_cr4(); 878 enum es_result ret; 879 int snp_cpuid_ret; 880 881 snp_cpuid_ret = vc_handle_cpuid_snp(regs); 882 if (!snp_cpuid_ret) 883 return ES_OK; 884 if (snp_cpuid_ret != -EOPNOTSUPP) 885 return ES_VMM_ERROR; 886 887 ghcb_set_rax(ghcb, regs->ax); 888 ghcb_set_rcx(ghcb, regs->cx); 889 890 if (cr4 & X86_CR4_OSXSAVE) 891 /* Safe to read xcr0 */ 892 ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); 893 else 894 /* xgetbv will cause #GP - use reset value for xcr0 */ 895 ghcb_set_xcr0(ghcb, 1); 896 897 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0); 898 if (ret != ES_OK) 899 return ret; 900 901 if (!(ghcb_rax_is_valid(ghcb) && 902 ghcb_rbx_is_valid(ghcb) && 903 ghcb_rcx_is_valid(ghcb) && 904 ghcb_rdx_is_valid(ghcb))) 905 return ES_VMM_ERROR; 906 907 regs->ax = ghcb->save.rax; 908 regs->bx = ghcb->save.rbx; 909 regs->cx = ghcb->save.rcx; 910 regs->dx = ghcb->save.rdx; 911 912 return ES_OK; 913 } 914 915 static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, 916 struct es_em_ctxt *ctxt, 917 unsigned long exit_code) 918 { 919 bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); 920 enum es_result ret; 921 922 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0); 923 if (ret != ES_OK) 924 return ret; 925 926 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && 927 (!rdtscp || ghcb_rcx_is_valid(ghcb)))) 928 return ES_VMM_ERROR; 929 930 ctxt->regs->ax = ghcb->save.rax; 931 ctxt->regs->dx = ghcb->save.rdx; 932 if (rdtscp) 933 ctxt->regs->cx = ghcb->save.rcx; 934 935 return ES_OK; 936 } 937 938 struct cc_setup_data { 939 struct setup_data header; 940 u32 cc_blob_address; 941 }; 942 943 /* 944 * Search for a Confidential Computing blob passed in as a setup_data entry 945 * via the Linux Boot Protocol. 946 */ 947 static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) 948 { 949 struct cc_setup_data *sd = NULL; 950 struct setup_data *hdr; 951 952 hdr = (struct setup_data *)bp->hdr.setup_data; 953 954 while (hdr) { 955 if (hdr->type == SETUP_CC_BLOB) { 956 sd = (struct cc_setup_data *)hdr; 957 return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; 958 } 959 hdr = (struct setup_data *)hdr->next; 960 } 961 962 return NULL; 963 } 964 965 /* 966 * Initialize the kernel's copy of the SNP CPUID table, and set up the 967 * pointer that will be used to access it. 968 * 969 * Maintaining a direct mapping of the SNP CPUID table used by firmware would 970 * be possible as an alternative, but the approach is brittle since the 971 * mapping needs to be updated in sync with all the changes to virtual memory 972 * layout and related mapping facilities throughout the boot process. 973 */ 974 static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) 975 { 976 const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; 977 int i; 978 979 if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) 980 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); 981 982 cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; 983 if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) 984 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); 985 986 cpuid_table = snp_cpuid_get_table(); 987 memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table)); 988 989 /* Initialize CPUID ranges for range-checking. */ 990 for (i = 0; i < cpuid_table->count; i++) { 991 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 992 993 if (fn->eax_in == 0x0) 994 cpuid_std_range_max = fn->eax; 995 else if (fn->eax_in == 0x40000000) 996 cpuid_hyp_range_max = fn->eax; 997 else if (fn->eax_in == 0x80000000) 998 cpuid_ext_range_max = fn->eax; 999 } 1000 } 1001