1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (C) 2021-2022 Intel Corporation */ 3 4 #undef pr_fmt 5 #define pr_fmt(fmt) "tdx: " fmt 6 7 #include <linux/cpufeature.h> 8 #include <asm/coco.h> 9 #include <asm/tdx.h> 10 #include <asm/vmx.h> 11 #include <asm/insn.h> 12 #include <asm/insn-eval.h> 13 #include <asm/pgtable.h> 14 15 /* TDX module Call Leaf IDs */ 16 #define TDX_GET_INFO 1 17 #define TDX_GET_VEINFO 3 18 #define TDX_ACCEPT_PAGE 6 19 20 /* TDX hypercall Leaf IDs */ 21 #define TDVMCALL_MAP_GPA 0x10001 22 23 /* MMIO direction */ 24 #define EPT_READ 0 25 #define EPT_WRITE 1 26 27 /* Port I/O direction */ 28 #define PORT_READ 0 29 #define PORT_WRITE 1 30 31 /* See Exit Qualification for I/O Instructions in VMX documentation */ 32 #define VE_IS_IO_IN(e) ((e) & BIT(3)) 33 #define VE_GET_IO_SIZE(e) (((e) & GENMASK(2, 0)) + 1) 34 #define VE_GET_PORT_NUM(e) ((e) >> 16) 35 #define VE_IS_IO_STRING(e) ((e) & BIT(4)) 36 37 #define ATTR_SEPT_VE_DISABLE BIT(28) 38 39 /* 40 * Wrapper for standard use of __tdx_hypercall with no output aside from 41 * return code. 42 */ 43 static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) 44 { 45 struct tdx_hypercall_args args = { 46 .r10 = TDX_HYPERCALL_STANDARD, 47 .r11 = fn, 48 .r12 = r12, 49 .r13 = r13, 50 .r14 = r14, 51 .r15 = r15, 52 }; 53 54 return __tdx_hypercall(&args, 0); 55 } 56 57 /* Called from __tdx_hypercall() for unrecoverable failure */ 58 void __tdx_hypercall_failed(void) 59 { 60 panic("TDVMCALL failed. TDX module bug?"); 61 } 62 63 /* 64 * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined 65 * independently from but are currently matched 1:1 with VMX EXIT_REASONs. 66 * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and 67 * guest sides of these calls. 68 */ 69 static u64 hcall_func(u64 exit_reason) 70 { 71 return exit_reason; 72 } 73 74 #ifdef CONFIG_KVM_GUEST 75 long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, 76 unsigned long p3, unsigned long p4) 77 { 78 struct tdx_hypercall_args args = { 79 .r10 = nr, 80 .r11 = p1, 81 .r12 = p2, 82 .r13 = p3, 83 .r14 = p4, 84 }; 85 86 return __tdx_hypercall(&args, 0); 87 } 88 EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); 89 #endif 90 91 /* 92 * Used for TDX guests to make calls directly to the TD module. This 93 * should only be used for calls that have no legitimate reason to fail 94 * or where the kernel can not survive the call failing. 95 */ 96 static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, 97 struct tdx_module_output *out) 98 { 99 if (__tdx_module_call(fn, rcx, rdx, r8, r9, out)) 100 panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); 101 } 102 103 static void tdx_parse_tdinfo(u64 *cc_mask) 104 { 105 struct tdx_module_output out; 106 unsigned int gpa_width; 107 u64 td_attr; 108 109 /* 110 * TDINFO TDX module call is used to get the TD execution environment 111 * information like GPA width, number of available vcpus, debug mode 112 * information, etc. More details about the ABI can be found in TDX 113 * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL 114 * [TDG.VP.INFO]. 115 */ 116 tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); 117 118 /* 119 * The highest bit of a guest physical address is the "sharing" bit. 120 * Set it for shared pages and clear it for private pages. 121 * 122 * The GPA width that comes out of this call is critical. TDX guests 123 * can not meaningfully run without it. 124 */ 125 gpa_width = out.rcx & GENMASK(5, 0); 126 *cc_mask = BIT_ULL(gpa_width - 1); 127 128 /* 129 * The kernel can not handle #VE's when accessing normal kernel 130 * memory. Ensure that no #VE will be delivered for accesses to 131 * TD-private memory. Only VMM-shared memory (MMIO) will #VE. 132 */ 133 td_attr = out.rdx; 134 if (!(td_attr & ATTR_SEPT_VE_DISABLE)) 135 panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n"); 136 } 137 138 /* 139 * The TDX module spec states that #VE may be injected for a limited set of 140 * reasons: 141 * 142 * - Emulation of the architectural #VE injection on EPT violation; 143 * 144 * - As a result of guest TD execution of a disallowed instruction, 145 * a disallowed MSR access, or CPUID virtualization; 146 * 147 * - A notification to the guest TD about anomalous behavior; 148 * 149 * The last one is opt-in and is not used by the kernel. 150 * 151 * The Intel Software Developer's Manual describes cases when instruction 152 * length field can be used in section "Information for VM Exits Due to 153 * Instruction Execution". 154 * 155 * For TDX, it ultimately means GET_VEINFO provides reliable instruction length 156 * information if #VE occurred due to instruction execution, but not for EPT 157 * violations. 158 */ 159 static int ve_instr_len(struct ve_info *ve) 160 { 161 switch (ve->exit_reason) { 162 case EXIT_REASON_HLT: 163 case EXIT_REASON_MSR_READ: 164 case EXIT_REASON_MSR_WRITE: 165 case EXIT_REASON_CPUID: 166 case EXIT_REASON_IO_INSTRUCTION: 167 /* It is safe to use ve->instr_len for #VE due instructions */ 168 return ve->instr_len; 169 case EXIT_REASON_EPT_VIOLATION: 170 /* 171 * For EPT violations, ve->insn_len is not defined. For those, 172 * the kernel must decode instructions manually and should not 173 * be using this function. 174 */ 175 WARN_ONCE(1, "ve->instr_len is not defined for EPT violations"); 176 return 0; 177 default: 178 WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason); 179 return ve->instr_len; 180 } 181 } 182 183 static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) 184 { 185 struct tdx_hypercall_args args = { 186 .r10 = TDX_HYPERCALL_STANDARD, 187 .r11 = hcall_func(EXIT_REASON_HLT), 188 .r12 = irq_disabled, 189 }; 190 191 /* 192 * Emulate HLT operation via hypercall. More info about ABI 193 * can be found in TDX Guest-Host-Communication Interface 194 * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>. 195 * 196 * The VMM uses the "IRQ disabled" param to understand IRQ 197 * enabled status (RFLAGS.IF) of the TD guest and to determine 198 * whether or not it should schedule the halted vCPU if an 199 * IRQ becomes pending. E.g. if IRQs are disabled, the VMM 200 * can keep the vCPU in virtual HLT, even if an IRQ is 201 * pending, without hanging/breaking the guest. 202 */ 203 return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); 204 } 205 206 static int handle_halt(struct ve_info *ve) 207 { 208 /* 209 * Since non safe halt is mainly used in CPU offlining 210 * and the guest will always stay in the halt state, don't 211 * call the STI instruction (set do_sti as false). 212 */ 213 const bool irq_disabled = irqs_disabled(); 214 const bool do_sti = false; 215 216 if (__halt(irq_disabled, do_sti)) 217 return -EIO; 218 219 return ve_instr_len(ve); 220 } 221 222 void __cpuidle tdx_safe_halt(void) 223 { 224 /* 225 * For do_sti=true case, __tdx_hypercall() function enables 226 * interrupts using the STI instruction before the TDCALL. So 227 * set irq_disabled as false. 228 */ 229 const bool irq_disabled = false; 230 const bool do_sti = true; 231 232 /* 233 * Use WARN_ONCE() to report the failure. 234 */ 235 if (__halt(irq_disabled, do_sti)) 236 WARN_ONCE(1, "HLT instruction emulation failed\n"); 237 } 238 239 static int read_msr(struct pt_regs *regs, struct ve_info *ve) 240 { 241 struct tdx_hypercall_args args = { 242 .r10 = TDX_HYPERCALL_STANDARD, 243 .r11 = hcall_func(EXIT_REASON_MSR_READ), 244 .r12 = regs->cx, 245 }; 246 247 /* 248 * Emulate the MSR read via hypercall. More info about ABI 249 * can be found in TDX Guest-Host-Communication Interface 250 * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>". 251 */ 252 if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) 253 return -EIO; 254 255 regs->ax = lower_32_bits(args.r11); 256 regs->dx = upper_32_bits(args.r11); 257 return ve_instr_len(ve); 258 } 259 260 static int write_msr(struct pt_regs *regs, struct ve_info *ve) 261 { 262 struct tdx_hypercall_args args = { 263 .r10 = TDX_HYPERCALL_STANDARD, 264 .r11 = hcall_func(EXIT_REASON_MSR_WRITE), 265 .r12 = regs->cx, 266 .r13 = (u64)regs->dx << 32 | regs->ax, 267 }; 268 269 /* 270 * Emulate the MSR write via hypercall. More info about ABI 271 * can be found in TDX Guest-Host-Communication Interface 272 * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>". 273 */ 274 if (__tdx_hypercall(&args, 0)) 275 return -EIO; 276 277 return ve_instr_len(ve); 278 } 279 280 static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) 281 { 282 struct tdx_hypercall_args args = { 283 .r10 = TDX_HYPERCALL_STANDARD, 284 .r11 = hcall_func(EXIT_REASON_CPUID), 285 .r12 = regs->ax, 286 .r13 = regs->cx, 287 }; 288 289 /* 290 * Only allow VMM to control range reserved for hypervisor 291 * communication. 292 * 293 * Return all-zeros for any CPUID outside the range. It matches CPU 294 * behaviour for non-supported leaf. 295 */ 296 if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) { 297 regs->ax = regs->bx = regs->cx = regs->dx = 0; 298 return ve_instr_len(ve); 299 } 300 301 /* 302 * Emulate the CPUID instruction via a hypercall. More info about 303 * ABI can be found in TDX Guest-Host-Communication Interface 304 * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>". 305 */ 306 if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) 307 return -EIO; 308 309 /* 310 * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of 311 * EAX, EBX, ECX, EDX registers after the CPUID instruction execution. 312 * So copy the register contents back to pt_regs. 313 */ 314 regs->ax = args.r12; 315 regs->bx = args.r13; 316 regs->cx = args.r14; 317 regs->dx = args.r15; 318 319 return ve_instr_len(ve); 320 } 321 322 static bool mmio_read(int size, unsigned long addr, unsigned long *val) 323 { 324 struct tdx_hypercall_args args = { 325 .r10 = TDX_HYPERCALL_STANDARD, 326 .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION), 327 .r12 = size, 328 .r13 = EPT_READ, 329 .r14 = addr, 330 .r15 = *val, 331 }; 332 333 if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) 334 return false; 335 *val = args.r11; 336 return true; 337 } 338 339 static bool mmio_write(int size, unsigned long addr, unsigned long val) 340 { 341 return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size, 342 EPT_WRITE, addr, val); 343 } 344 345 static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) 346 { 347 unsigned long *reg, val, vaddr; 348 char buffer[MAX_INSN_SIZE]; 349 struct insn insn = {}; 350 enum mmio_type mmio; 351 int size, extend_size; 352 u8 extend_val = 0; 353 354 /* Only in-kernel MMIO is supported */ 355 if (WARN_ON_ONCE(user_mode(regs))) 356 return -EFAULT; 357 358 if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) 359 return -EFAULT; 360 361 if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) 362 return -EINVAL; 363 364 mmio = insn_decode_mmio(&insn, &size); 365 if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) 366 return -EINVAL; 367 368 if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { 369 reg = insn_get_modrm_reg_ptr(&insn, regs); 370 if (!reg) 371 return -EINVAL; 372 } 373 374 /* 375 * Reject EPT violation #VEs that split pages. 376 * 377 * MMIO accesses are supposed to be naturally aligned and therefore 378 * never cross page boundaries. Seeing split page accesses indicates 379 * a bug or a load_unaligned_zeropad() that stepped into an MMIO page. 380 * 381 * load_unaligned_zeropad() will recover using exception fixups. 382 */ 383 vaddr = (unsigned long)insn_get_addr_ref(&insn, regs); 384 if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE) 385 return -EFAULT; 386 387 /* Handle writes first */ 388 switch (mmio) { 389 case MMIO_WRITE: 390 memcpy(&val, reg, size); 391 if (!mmio_write(size, ve->gpa, val)) 392 return -EIO; 393 return insn.length; 394 case MMIO_WRITE_IMM: 395 val = insn.immediate.value; 396 if (!mmio_write(size, ve->gpa, val)) 397 return -EIO; 398 return insn.length; 399 case MMIO_READ: 400 case MMIO_READ_ZERO_EXTEND: 401 case MMIO_READ_SIGN_EXTEND: 402 /* Reads are handled below */ 403 break; 404 case MMIO_MOVS: 405 case MMIO_DECODE_FAILED: 406 /* 407 * MMIO was accessed with an instruction that could not be 408 * decoded or handled properly. It was likely not using io.h 409 * helpers or accessed MMIO accidentally. 410 */ 411 return -EINVAL; 412 default: 413 WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); 414 return -EINVAL; 415 } 416 417 /* Handle reads */ 418 if (!mmio_read(size, ve->gpa, &val)) 419 return -EIO; 420 421 switch (mmio) { 422 case MMIO_READ: 423 /* Zero-extend for 32-bit operation */ 424 extend_size = size == 4 ? sizeof(*reg) : 0; 425 break; 426 case MMIO_READ_ZERO_EXTEND: 427 /* Zero extend based on operand size */ 428 extend_size = insn.opnd_bytes; 429 break; 430 case MMIO_READ_SIGN_EXTEND: 431 /* Sign extend based on operand size */ 432 extend_size = insn.opnd_bytes; 433 if (size == 1 && val & BIT(7)) 434 extend_val = 0xFF; 435 else if (size > 1 && val & BIT(15)) 436 extend_val = 0xFF; 437 break; 438 default: 439 /* All other cases has to be covered with the first switch() */ 440 WARN_ON_ONCE(1); 441 return -EINVAL; 442 } 443 444 if (extend_size) 445 memset(reg, extend_val, extend_size); 446 memcpy(reg, &val, size); 447 return insn.length; 448 } 449 450 static bool handle_in(struct pt_regs *regs, int size, int port) 451 { 452 struct tdx_hypercall_args args = { 453 .r10 = TDX_HYPERCALL_STANDARD, 454 .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), 455 .r12 = size, 456 .r13 = PORT_READ, 457 .r14 = port, 458 }; 459 u64 mask = GENMASK(BITS_PER_BYTE * size, 0); 460 bool success; 461 462 /* 463 * Emulate the I/O read via hypercall. More info about ABI can be found 464 * in TDX Guest-Host-Communication Interface (GHCI) section titled 465 * "TDG.VP.VMCALL<Instruction.IO>". 466 */ 467 success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT); 468 469 /* Update part of the register affected by the emulated instruction */ 470 regs->ax &= ~mask; 471 if (success) 472 regs->ax |= args.r11 & mask; 473 474 return success; 475 } 476 477 static bool handle_out(struct pt_regs *regs, int size, int port) 478 { 479 u64 mask = GENMASK(BITS_PER_BYTE * size, 0); 480 481 /* 482 * Emulate the I/O write via hypercall. More info about ABI can be found 483 * in TDX Guest-Host-Communication Interface (GHCI) section titled 484 * "TDG.VP.VMCALL<Instruction.IO>". 485 */ 486 return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size, 487 PORT_WRITE, port, regs->ax & mask); 488 } 489 490 /* 491 * Emulate I/O using hypercall. 492 * 493 * Assumes the IO instruction was using ax, which is enforced 494 * by the standard io.h macros. 495 * 496 * Return True on success or False on failure. 497 */ 498 static int handle_io(struct pt_regs *regs, struct ve_info *ve) 499 { 500 u32 exit_qual = ve->exit_qual; 501 int size, port; 502 bool in, ret; 503 504 if (VE_IS_IO_STRING(exit_qual)) 505 return -EIO; 506 507 in = VE_IS_IO_IN(exit_qual); 508 size = VE_GET_IO_SIZE(exit_qual); 509 port = VE_GET_PORT_NUM(exit_qual); 510 511 512 if (in) 513 ret = handle_in(regs, size, port); 514 else 515 ret = handle_out(regs, size, port); 516 if (!ret) 517 return -EIO; 518 519 return ve_instr_len(ve); 520 } 521 522 /* 523 * Early #VE exception handler. Only handles a subset of port I/O. 524 * Intended only for earlyprintk. If failed, return false. 525 */ 526 __init bool tdx_early_handle_ve(struct pt_regs *regs) 527 { 528 struct ve_info ve; 529 int insn_len; 530 531 tdx_get_ve_info(&ve); 532 533 if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION) 534 return false; 535 536 insn_len = handle_io(regs, &ve); 537 if (insn_len < 0) 538 return false; 539 540 regs->ip += insn_len; 541 return true; 542 } 543 544 void tdx_get_ve_info(struct ve_info *ve) 545 { 546 struct tdx_module_output out; 547 548 /* 549 * Called during #VE handling to retrieve the #VE info from the 550 * TDX module. 551 * 552 * This has to be called early in #VE handling. A "nested" #VE which 553 * occurs before this will raise a #DF and is not recoverable. 554 * 555 * The call retrieves the #VE info from the TDX module, which also 556 * clears the "#VE valid" flag. This must be done before anything else 557 * because any #VE that occurs while the valid flag is set will lead to 558 * #DF. 559 * 560 * Note, the TDX module treats virtual NMIs as inhibited if the #VE 561 * valid flag is set. It means that NMI=>#VE will not result in a #DF. 562 */ 563 tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out); 564 565 /* Transfer the output parameters */ 566 ve->exit_reason = out.rcx; 567 ve->exit_qual = out.rdx; 568 ve->gla = out.r8; 569 ve->gpa = out.r9; 570 ve->instr_len = lower_32_bits(out.r10); 571 ve->instr_info = upper_32_bits(out.r10); 572 } 573 574 /* 575 * Handle the user initiated #VE. 576 * 577 * On success, returns the number of bytes RIP should be incremented (>=0) 578 * or -errno on error. 579 */ 580 static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve) 581 { 582 switch (ve->exit_reason) { 583 case EXIT_REASON_CPUID: 584 return handle_cpuid(regs, ve); 585 default: 586 pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); 587 return -EIO; 588 } 589 } 590 591 /* 592 * Handle the kernel #VE. 593 * 594 * On success, returns the number of bytes RIP should be incremented (>=0) 595 * or -errno on error. 596 */ 597 static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) 598 { 599 switch (ve->exit_reason) { 600 case EXIT_REASON_HLT: 601 return handle_halt(ve); 602 case EXIT_REASON_MSR_READ: 603 return read_msr(regs, ve); 604 case EXIT_REASON_MSR_WRITE: 605 return write_msr(regs, ve); 606 case EXIT_REASON_CPUID: 607 return handle_cpuid(regs, ve); 608 case EXIT_REASON_EPT_VIOLATION: 609 return handle_mmio(regs, ve); 610 case EXIT_REASON_IO_INSTRUCTION: 611 return handle_io(regs, ve); 612 default: 613 pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); 614 return -EIO; 615 } 616 } 617 618 bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve) 619 { 620 int insn_len; 621 622 if (user_mode(regs)) 623 insn_len = virt_exception_user(regs, ve); 624 else 625 insn_len = virt_exception_kernel(regs, ve); 626 if (insn_len < 0) 627 return false; 628 629 /* After successful #VE handling, move the IP */ 630 regs->ip += insn_len; 631 632 return true; 633 } 634 635 static bool tdx_tlb_flush_required(bool private) 636 { 637 /* 638 * TDX guest is responsible for flushing TLB on private->shared 639 * transition. VMM is responsible for flushing on shared->private. 640 * 641 * The VMM _can't_ flush private addresses as it can't generate PAs 642 * with the guest's HKID. Shared memory isn't subject to integrity 643 * checking, i.e. the VMM doesn't need to flush for its own protection. 644 * 645 * There's no need to flush when converting from shared to private, 646 * as flushing is the VMM's responsibility in this case, e.g. it must 647 * flush to avoid integrity failures in the face of a buggy or 648 * malicious guest. 649 */ 650 return !private; 651 } 652 653 static bool tdx_cache_flush_required(void) 654 { 655 /* 656 * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence. 657 * TDX doesn't have such capability. 658 * 659 * Flush cache unconditionally. 660 */ 661 return true; 662 } 663 664 static bool try_accept_one(phys_addr_t *start, unsigned long len, 665 enum pg_level pg_level) 666 { 667 unsigned long accept_size = page_level_size(pg_level); 668 u64 tdcall_rcx; 669 u8 page_size; 670 671 if (!IS_ALIGNED(*start, accept_size)) 672 return false; 673 674 if (len < accept_size) 675 return false; 676 677 /* 678 * Pass the page physical address to the TDX module to accept the 679 * pending, private page. 680 * 681 * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. 682 */ 683 switch (pg_level) { 684 case PG_LEVEL_4K: 685 page_size = 0; 686 break; 687 case PG_LEVEL_2M: 688 page_size = 1; 689 break; 690 case PG_LEVEL_1G: 691 page_size = 2; 692 break; 693 default: 694 return false; 695 } 696 697 tdcall_rcx = *start | page_size; 698 if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) 699 return false; 700 701 *start += accept_size; 702 return true; 703 } 704 705 /* 706 * Inform the VMM of the guest's intent for this physical page: shared with 707 * the VMM or private to the guest. The VMM is expected to change its mapping 708 * of the page in response. 709 */ 710 static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) 711 { 712 phys_addr_t start = __pa(vaddr); 713 phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); 714 715 if (!enc) { 716 /* Set the shared (decrypted) bits: */ 717 start |= cc_mkdec(0); 718 end |= cc_mkdec(0); 719 } 720 721 /* 722 * Notify the VMM about page mapping conversion. More info about ABI 723 * can be found in TDX Guest-Host-Communication Interface (GHCI), 724 * section "TDG.VP.VMCALL<MapGPA>" 725 */ 726 if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) 727 return false; 728 729 /* private->shared conversion requires only MapGPA call */ 730 if (!enc) 731 return true; 732 733 /* 734 * For shared->private conversion, accept the page using 735 * TDX_ACCEPT_PAGE TDX module call. 736 */ 737 while (start < end) { 738 unsigned long len = end - start; 739 740 /* 741 * Try larger accepts first. It gives chance to VMM to keep 742 * 1G/2M SEPT entries where possible and speeds up process by 743 * cutting number of hypercalls (if successful). 744 */ 745 746 if (try_accept_one(&start, len, PG_LEVEL_1G)) 747 continue; 748 749 if (try_accept_one(&start, len, PG_LEVEL_2M)) 750 continue; 751 752 if (!try_accept_one(&start, len, PG_LEVEL_4K)) 753 return false; 754 } 755 756 return true; 757 } 758 759 void __init tdx_early_init(void) 760 { 761 u64 cc_mask; 762 u32 eax, sig[3]; 763 764 cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]); 765 766 if (memcmp(TDX_IDENT, sig, sizeof(sig))) 767 return; 768 769 setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); 770 771 cc_set_vendor(CC_VENDOR_INTEL); 772 tdx_parse_tdinfo(&cc_mask); 773 cc_set_mask(cc_mask); 774 775 /* 776 * All bits above GPA width are reserved and kernel treats shared bit 777 * as flag, not as part of physical address. 778 * 779 * Adjust physical mask to only cover valid GPA bits. 780 */ 781 physical_mask &= cc_mask - 1; 782 783 x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; 784 x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; 785 x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; 786 787 pr_info("Guest detected\n"); 788 } 789