1 /* 2 * QEMU TDX support 3 * 4 * Copyright (c) 2025 Intel Corporation 5 * 6 * Author: 7 * Xiaoyao Li <xiaoyao.li@intel.com> 8 * 9 * SPDX-License-Identifier: GPL-2.0-or-later 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/error-report.h" 14 #include "qemu/base64.h" 15 #include "qemu/mmap-alloc.h" 16 #include "qapi/error.h" 17 #include "qapi/qapi-visit-sockets.h" 18 #include "qom/object_interfaces.h" 19 #include "crypto/hash.h" 20 #include "system/kvm_int.h" 21 #include "system/runstate.h" 22 #include "system/system.h" 23 #include "system/ramblock.h" 24 #include "system/address-spaces.h" 25 26 #include <linux/kvm_para.h> 27 28 #include "cpu.h" 29 #include "cpu-internal.h" 30 #include "host-cpu.h" 31 #include "hw/i386/e820_memory_layout.h" 32 #include "hw/i386/tdvf.h" 33 #include "hw/i386/x86.h" 34 #include "hw/i386/tdvf-hob.h" 35 #include "kvm_i386.h" 36 #include "tdx.h" 37 #include "tdx-quote-generator.h" 38 39 #include "standard-headers/asm-x86/kvm_para.h" 40 41 #define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) 42 #define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) 43 44 #define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) 45 #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) 46 #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) 47 #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) 48 49 #define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ 50 TDX_TD_ATTRIBUTES_PKS | \ 51 TDX_TD_ATTRIBUTES_PERFMON) 52 53 #define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ 54 (1U << KVM_FEATURE_PV_UNHALT) | \ 55 (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ 56 (1U << KVM_FEATURE_PV_SEND_IPI) | \ 57 (1U << KVM_FEATURE_POLL_CONTROL) | \ 58 (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ 59 (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) 60 61 static TdxGuest *tdx_guest; 62 63 static struct kvm_tdx_capabilities *tdx_caps; 64 static struct kvm_cpuid2 *tdx_supported_cpuid; 65 66 /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ 67 bool is_tdx_vm(void) 68 { 69 return !!tdx_guest; 70 } 71 72 enum tdx_ioctl_level { 73 TDX_VM_IOCTL, 74 TDX_VCPU_IOCTL, 75 }; 76 77 static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, 78 int cmd_id, __u32 flags, void *data, 79 Error **errp) 80 { 81 struct kvm_tdx_cmd tdx_cmd = {}; 82 int r; 83 84 const char *tdx_ioctl_name[] = { 85 [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", 86 [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", 87 [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", 88 [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", 89 [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", 90 [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", 91 }; 92 93 tdx_cmd.id = cmd_id; 94 tdx_cmd.flags = flags; 95 tdx_cmd.data = (__u64)(unsigned long)data; 96 97 switch (level) { 98 case TDX_VM_IOCTL: 99 r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 100 break; 101 case TDX_VCPU_IOCTL: 102 r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 103 break; 104 default: 105 error_setg(errp, "Invalid tdx_ioctl_level %d", level); 106 return -EINVAL; 107 } 108 109 if (r < 0) { 110 error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", 111 tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); 112 } 113 return r; 114 } 115 116 static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, 117 Error **errp) 118 { 119 return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); 120 } 121 122 static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, 123 void *data, Error **errp) 124 { 125 return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); 126 } 127 128 static int get_tdx_capabilities(Error **errp) 129 { 130 struct kvm_tdx_capabilities *caps; 131 /* 1st generation of TDX reports 6 cpuid configs */ 132 int nr_cpuid_configs = 6; 133 size_t size; 134 int r; 135 136 do { 137 Error *local_err = NULL; 138 size = sizeof(struct kvm_tdx_capabilities) + 139 nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); 140 caps = g_malloc0(size); 141 caps->cpuid.nent = nr_cpuid_configs; 142 143 r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); 144 if (r == -E2BIG) { 145 g_free(caps); 146 nr_cpuid_configs *= 2; 147 if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { 148 error_report("KVM TDX seems broken that number of CPUID entries" 149 " in kvm_tdx_capabilities exceeds limit: %d", 150 KVM_MAX_CPUID_ENTRIES); 151 error_propagate(errp, local_err); 152 return r; 153 } 154 error_free(local_err); 155 } else if (r < 0) { 156 g_free(caps); 157 error_propagate(errp, local_err); 158 return r; 159 } 160 } while (r == -E2BIG); 161 162 tdx_caps = caps; 163 164 return 0; 165 } 166 167 void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) 168 { 169 assert(!tdx_guest->tdvf_mr); 170 tdx_guest->tdvf_mr = tdvf_mr; 171 } 172 173 static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) 174 { 175 TdxFirmwareEntry *entry; 176 177 for_each_tdx_fw_entry(&tdx->tdvf, entry) { 178 if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { 179 return entry; 180 } 181 } 182 error_report("TDVF metadata doesn't specify TD_HOB location."); 183 exit(1); 184 } 185 186 static void tdx_add_ram_entry(uint64_t address, uint64_t length, 187 enum TdxRamType type) 188 { 189 uint32_t nr_entries = tdx_guest->nr_ram_entries; 190 tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, 191 nr_entries + 1); 192 193 tdx_guest->ram_entries[nr_entries].address = address; 194 tdx_guest->ram_entries[nr_entries].length = length; 195 tdx_guest->ram_entries[nr_entries].type = type; 196 tdx_guest->nr_ram_entries++; 197 } 198 199 static int tdx_accept_ram_range(uint64_t address, uint64_t length) 200 { 201 uint64_t head_start, tail_start, head_length, tail_length; 202 uint64_t tmp_address, tmp_length; 203 TdxRamEntry *e; 204 int i = 0; 205 206 do { 207 if (i == tdx_guest->nr_ram_entries) { 208 return -1; 209 } 210 211 e = &tdx_guest->ram_entries[i++]; 212 } while (address + length <= e->address || address >= e->address + e->length); 213 214 /* 215 * The to-be-accepted ram range must be fully contained by one 216 * RAM entry. 217 */ 218 if (e->address > address || 219 e->address + e->length < address + length) { 220 return -1; 221 } 222 223 if (e->type == TDX_RAM_ADDED) { 224 return 0; 225 } 226 227 tmp_address = e->address; 228 tmp_length = e->length; 229 230 e->address = address; 231 e->length = length; 232 e->type = TDX_RAM_ADDED; 233 234 head_length = address - tmp_address; 235 if (head_length > 0) { 236 head_start = tmp_address; 237 tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); 238 } 239 240 tail_start = address + length; 241 if (tail_start < tmp_address + tmp_length) { 242 tail_length = tmp_address + tmp_length - tail_start; 243 tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); 244 } 245 246 return 0; 247 } 248 249 static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) 250 { 251 const TdxRamEntry *lhs = lhs_; 252 const TdxRamEntry *rhs = rhs_; 253 254 if (lhs->address == rhs->address) { 255 return 0; 256 } 257 if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { 258 return 1; 259 } 260 return -1; 261 } 262 263 static void tdx_init_ram_entries(void) 264 { 265 unsigned i, j, nr_e820_entries; 266 267 nr_e820_entries = e820_get_table(NULL); 268 tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); 269 270 for (i = 0, j = 0; i < nr_e820_entries; i++) { 271 uint64_t addr, len; 272 273 if (e820_get_entry(i, E820_RAM, &addr, &len)) { 274 tdx_guest->ram_entries[j].address = addr; 275 tdx_guest->ram_entries[j].length = len; 276 tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; 277 j++; 278 } 279 } 280 tdx_guest->nr_ram_entries = j; 281 } 282 283 static void tdx_post_init_vcpus(void) 284 { 285 TdxFirmwareEntry *hob; 286 CPUState *cpu; 287 288 hob = tdx_get_hob_entry(tdx_guest); 289 CPU_FOREACH(cpu) { 290 tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, 291 &error_fatal); 292 } 293 } 294 295 static void tdx_finalize_vm(Notifier *notifier, void *unused) 296 { 297 TdxFirmware *tdvf = &tdx_guest->tdvf; 298 TdxFirmwareEntry *entry; 299 RAMBlock *ram_block; 300 Error *local_err = NULL; 301 int r; 302 303 tdx_init_ram_entries(); 304 305 for_each_tdx_fw_entry(tdvf, entry) { 306 switch (entry->type) { 307 case TDVF_SECTION_TYPE_BFV: 308 case TDVF_SECTION_TYPE_CFV: 309 entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; 310 break; 311 case TDVF_SECTION_TYPE_TD_HOB: 312 case TDVF_SECTION_TYPE_TEMP_MEM: 313 entry->mem_ptr = qemu_ram_mmap(-1, entry->size, 314 qemu_real_host_page_size(), 0, 0); 315 if (entry->mem_ptr == MAP_FAILED) { 316 error_report("Failed to mmap memory for TDVF section %d", 317 entry->type); 318 exit(1); 319 } 320 if (tdx_accept_ram_range(entry->address, entry->size)) { 321 error_report("Failed to accept memory for TDVF section %d", 322 entry->type); 323 qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 324 exit(1); 325 } 326 break; 327 default: 328 error_report("Unsupported TDVF section %d", entry->type); 329 exit(1); 330 } 331 } 332 333 qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, 334 sizeof(TdxRamEntry), &tdx_ram_entry_compare); 335 336 tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); 337 338 tdx_post_init_vcpus(); 339 340 for_each_tdx_fw_entry(tdvf, entry) { 341 struct kvm_tdx_init_mem_region region; 342 uint32_t flags; 343 344 region = (struct kvm_tdx_init_mem_region) { 345 .source_addr = (uintptr_t)entry->mem_ptr, 346 .gpa = entry->address, 347 .nr_pages = entry->size >> 12, 348 }; 349 350 flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? 351 KVM_TDX_MEASURE_MEMORY_REGION : 0; 352 353 do { 354 error_free(local_err); 355 local_err = NULL; 356 r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, 357 ®ion, &local_err); 358 } while (r == -EAGAIN || r == -EINTR); 359 if (r < 0) { 360 error_report_err(local_err); 361 exit(1); 362 } 363 364 if (entry->type == TDVF_SECTION_TYPE_TD_HOB || 365 entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { 366 qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 367 entry->mem_ptr = NULL; 368 } 369 } 370 371 /* 372 * TDVF image has been copied into private region above via 373 * KVM_MEMORY_MAPPING. It becomes useless. 374 */ 375 ram_block = tdx_guest->tdvf_mr->ram_block; 376 ram_block_discard_range(ram_block, 0, ram_block->max_length); 377 378 tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); 379 CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; 380 } 381 382 static Notifier tdx_machine_done_notify = { 383 .notify = tdx_finalize_vm, 384 }; 385 386 /* 387 * Some CPUID bits change from fixed1 to configurable bits when TDX module 388 * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. 389 * 390 * To make QEMU work with all the versions of TDX module, keep the fixed1 bits 391 * here if they are ever fixed1 bits in any of the version though not fixed1 in 392 * the latest version. Otherwise, with the older version of TDX module, QEMU may 393 * treat the fixed1 bit as unsupported. 394 * 395 * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even 396 * though they changed to configurable bits. Because tdx_fixed1_bits is used to 397 * setup the supported bits. 398 */ 399 KvmCpuidInfo tdx_fixed1_bits = { 400 .cpuid.nent = 8, 401 .entries[0] = { 402 .function = 0x1, 403 .index = 0, 404 .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | 405 CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | 406 CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | 407 CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | 408 CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | 409 CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, 410 .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | 411 CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | 412 CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | 413 CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | 414 CPUID_SSE | CPUID_SSE2, 415 }, 416 .entries[1] = { 417 .function = 0x6, 418 .index = 0, 419 .eax = CPUID_6_EAX_ARAT, 420 }, 421 .entries[2] = { 422 .function = 0x7, 423 .index = 0, 424 .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 425 .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | 426 CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | 427 CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | 428 CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | 429 CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, 430 .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | 431 CPUID_7_0_ECX_MOVDIR64B, 432 .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | 433 CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | 434 CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | 435 CPUID_7_0_EDX_SPEC_CTRL_SSBD, 436 }, 437 .entries[3] = { 438 .function = 0x7, 439 .index = 2, 440 .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 441 .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | 442 CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, 443 }, 444 .entries[4] = { 445 .function = 0xD, 446 .index = 0, 447 .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 448 .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, 449 }, 450 .entries[5] = { 451 .function = 0xD, 452 .index = 1, 453 .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 454 .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| 455 CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, 456 }, 457 .entries[6] = { 458 .function = 0x80000001, 459 .index = 0, 460 .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, 461 /* 462 * Strictly speaking, SYSCALL is not fixed1 bit since it depends on 463 * the CPU to be in 64-bit mode. But here fixed1 is used to serve the 464 * purpose of supported bits for TDX. In this sense, SYACALL is always 465 * supported. 466 */ 467 .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | 468 CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, 469 }, 470 .entries[7] = { 471 .function = 0x80000007, 472 .index = 0, 473 .edx = CPUID_APM_INVTSC, 474 }, 475 }; 476 477 typedef struct TdxAttrsMap { 478 uint32_t attr_index; 479 uint32_t cpuid_leaf; 480 uint32_t cpuid_subleaf; 481 int cpuid_reg; 482 uint32_t feat_mask; 483 } TdxAttrsMap; 484 485 static TdxAttrsMap tdx_attrs_maps[] = { 486 {.attr_index = 27, 487 .cpuid_leaf = 7, 488 .cpuid_subleaf = 1, 489 .cpuid_reg = R_EAX, 490 .feat_mask = CPUID_7_1_EAX_LASS,}, 491 492 {.attr_index = 30, 493 .cpuid_leaf = 7, 494 .cpuid_subleaf = 0, 495 .cpuid_reg = R_ECX, 496 .feat_mask = CPUID_7_0_ECX_PKS,}, 497 498 {.attr_index = 31, 499 .cpuid_leaf = 7, 500 .cpuid_subleaf = 0, 501 .cpuid_reg = R_ECX, 502 .feat_mask = CPUID_7_0_ECX_KeyLocker,}, 503 }; 504 505 typedef struct TdxXFAMDep { 506 int xfam_bit; 507 FeatureMask feat_mask; 508 } TdxXFAMDep; 509 510 /* 511 * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are 512 * defiend here. 513 * 514 * For those whose virtualization type are "XFAM & Configured & Native", they 515 * are reported as configurable bits. And they are not supported if not in the 516 * configureable bits list from KVM even if the corresponding XFAM bit is 517 * supported. 518 */ 519 TdxXFAMDep tdx_xfam_deps[] = { 520 { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, 521 { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, 522 { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, 523 { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, 524 { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, 525 { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, 526 { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, 527 { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, 528 { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, 529 }; 530 531 static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, 532 uint32_t index) 533 { 534 struct kvm_cpuid_entry2 *e; 535 536 e = cpuid_find_entry(tdx_supported_cpuid, function, index); 537 if (!e) { 538 if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { 539 error_report("tdx_supported_cpuid requries more space than %d entries", 540 KVM_MAX_CPUID_ENTRIES); 541 exit(1); 542 } 543 e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; 544 e->function = function; 545 e->index = index; 546 } 547 548 return e; 549 } 550 551 static void tdx_add_supported_cpuid_by_fixed1_bits(void) 552 { 553 struct kvm_cpuid_entry2 *e, *e1; 554 int i; 555 556 for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { 557 e = &tdx_fixed1_bits.entries[i]; 558 559 e1 = find_in_supported_entry(e->function, e->index); 560 e1->eax |= e->eax; 561 e1->ebx |= e->ebx; 562 e1->ecx |= e->ecx; 563 e1->edx |= e->edx; 564 } 565 } 566 567 static void tdx_add_supported_cpuid_by_attrs(void) 568 { 569 struct kvm_cpuid_entry2 *e; 570 TdxAttrsMap *map; 571 int i; 572 573 for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { 574 map = &tdx_attrs_maps[i]; 575 if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { 576 continue; 577 } 578 579 e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); 580 581 switch(map->cpuid_reg) { 582 case R_EAX: 583 e->eax |= map->feat_mask; 584 break; 585 case R_EBX: 586 e->ebx |= map->feat_mask; 587 break; 588 case R_ECX: 589 e->ecx |= map->feat_mask; 590 break; 591 case R_EDX: 592 e->edx |= map->feat_mask; 593 break; 594 } 595 } 596 } 597 598 static void tdx_add_supported_cpuid_by_xfam(void) 599 { 600 struct kvm_cpuid_entry2 *e; 601 int i; 602 603 const TdxXFAMDep *xfam_dep; 604 const FeatureWordInfo *f; 605 for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { 606 xfam_dep = &tdx_xfam_deps[i]; 607 if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { 608 continue; 609 } 610 611 f = &feature_word_info[xfam_dep->feat_mask.index]; 612 if (f->type != CPUID_FEATURE_WORD) { 613 continue; 614 } 615 616 e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); 617 switch(f->cpuid.reg) { 618 case R_EAX: 619 e->eax |= xfam_dep->feat_mask.mask; 620 break; 621 case R_EBX: 622 e->ebx |= xfam_dep->feat_mask.mask; 623 break; 624 case R_ECX: 625 e->ecx |= xfam_dep->feat_mask.mask; 626 break; 627 case R_EDX: 628 e->edx |= xfam_dep->feat_mask.mask; 629 break; 630 } 631 } 632 633 e = find_in_supported_entry(0xd, 0); 634 e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); 635 e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; 636 637 e = find_in_supported_entry(0xd, 1); 638 /* 639 * Mark XFD always support for TDX, it will be cleared finally in 640 * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware 641 * because in this case the original data has it as 0. 642 */ 643 e->eax |= CPUID_XSAVE_XFD; 644 e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); 645 e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; 646 } 647 648 static void tdx_add_supported_kvm_features(void) 649 { 650 struct kvm_cpuid_entry2 *e; 651 652 e = find_in_supported_entry(0x40000001, 0); 653 e->eax = TDX_SUPPORTED_KVM_FEATURES; 654 } 655 656 static void tdx_setup_supported_cpuid(void) 657 { 658 if (tdx_supported_cpuid) { 659 return; 660 } 661 662 tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + 663 KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); 664 665 memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, 666 tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); 667 tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; 668 669 tdx_add_supported_cpuid_by_fixed1_bits(); 670 tdx_add_supported_cpuid_by_attrs(); 671 tdx_add_supported_cpuid_by_xfam(); 672 673 tdx_add_supported_kvm_features(); 674 } 675 676 static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) 677 { 678 MachineState *ms = MACHINE(qdev_get_machine()); 679 X86MachineState *x86ms = X86_MACHINE(ms); 680 TdxGuest *tdx = TDX_GUEST(cgs); 681 int r = 0; 682 683 kvm_mark_guest_state_protected(); 684 685 if (x86ms->smm == ON_OFF_AUTO_AUTO) { 686 x86ms->smm = ON_OFF_AUTO_OFF; 687 } else if (x86ms->smm == ON_OFF_AUTO_ON) { 688 error_setg(errp, "TDX VM doesn't support SMM"); 689 return -EINVAL; 690 } 691 692 if (x86ms->pic == ON_OFF_AUTO_AUTO) { 693 x86ms->pic = ON_OFF_AUTO_OFF; 694 } else if (x86ms->pic == ON_OFF_AUTO_ON) { 695 error_setg(errp, "TDX VM doesn't support PIC"); 696 return -EINVAL; 697 } 698 699 if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { 700 kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; 701 } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { 702 error_setg(errp, "TDX VM requires kernel_irqchip to be split"); 703 return -EINVAL; 704 } 705 706 if (!tdx_caps) { 707 r = get_tdx_capabilities(errp); 708 if (r) { 709 return r; 710 } 711 } 712 713 tdx_setup_supported_cpuid(); 714 715 /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ 716 if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { 717 return -EOPNOTSUPP; 718 } 719 720 /* 721 * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly 722 * memory for shared memory but not for private memory. Besides, whether a 723 * memslot is private or shared is not determined by QEMU. 724 * 725 * Thus, just mark readonly memory not supported for simplicity. 726 */ 727 kvm_readonly_mem_allowed = false; 728 729 qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); 730 731 tdx_guest = tdx; 732 return 0; 733 } 734 735 static int tdx_kvm_type(X86ConfidentialGuest *cg) 736 { 737 /* Do the object check */ 738 TDX_GUEST(cg); 739 740 return KVM_X86_TDX_VM; 741 } 742 743 static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) 744 { 745 X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); 746 X86CPU *x86cpu = X86_CPU(cpu); 747 748 if (xcc->model) { 749 error_report("Named cpu model is not supported for TDX yet!"); 750 exit(1); 751 } 752 753 object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); 754 755 /* invtsc is fixed1 for TD guest */ 756 object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); 757 758 x86cpu->force_cpuid_0x1f = true; 759 } 760 761 static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, 762 uint32_t feature, uint32_t index, 763 int reg, uint32_t value) 764 { 765 struct kvm_cpuid_entry2 *e; 766 767 e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); 768 if (e) { 769 value |= cpuid_entry_get_reg(e, reg); 770 } 771 772 if (is_feature_word_cpuid(feature, index, reg)) { 773 e = cpuid_find_entry(tdx_supported_cpuid, feature, index); 774 if (e) { 775 value &= cpuid_entry_get_reg(e, reg); 776 } 777 } 778 779 return value; 780 } 781 782 static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) 783 { 784 struct kvm_cpuid2 *fetch_cpuid; 785 int size = KVM_MAX_CPUID_ENTRIES; 786 Error *local_err = NULL; 787 int r; 788 789 do { 790 error_free(local_err); 791 local_err = NULL; 792 793 fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + 794 sizeof(struct kvm_cpuid_entry2) * size); 795 fetch_cpuid->nent = size; 796 r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); 797 if (r == -E2BIG) { 798 g_free(fetch_cpuid); 799 size = fetch_cpuid->nent; 800 } 801 } while (r == -E2BIG); 802 803 if (r < 0) { 804 error_report_err(local_err); 805 *ret = r; 806 return NULL; 807 } 808 809 return fetch_cpuid; 810 } 811 812 static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) 813 { 814 uint64_t actual, requested, unavailable, forced_on; 815 g_autofree struct kvm_cpuid2 *fetch_cpuid; 816 const char *forced_on_prefix = NULL; 817 const char *unav_prefix = NULL; 818 struct kvm_cpuid_entry2 *entry; 819 X86CPU *cpu = X86_CPU(cs); 820 CPUX86State *env = &cpu->env; 821 FeatureWordInfo *wi; 822 FeatureWord w; 823 bool mismatch = false; 824 int r; 825 826 fetch_cpuid = tdx_fetch_cpuid(cs, &r); 827 if (!fetch_cpuid) { 828 return r; 829 } 830 831 if (cpu->check_cpuid || cpu->enforce_cpuid) { 832 unav_prefix = "TDX doesn't support requested feature"; 833 forced_on_prefix = "TDX forcibly sets the feature"; 834 } 835 836 for (w = 0; w < FEATURE_WORDS; w++) { 837 wi = &feature_word_info[w]; 838 actual = 0; 839 840 switch (wi->type) { 841 case CPUID_FEATURE_WORD: 842 entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); 843 if (!entry) { 844 /* 845 * If KVM doesn't report it means it's totally configurable 846 * by QEMU 847 */ 848 continue; 849 } 850 851 actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); 852 break; 853 case MSR_FEATURE_WORD: 854 /* 855 * TODO: 856 * validate MSR features when KVM has interface report them. 857 */ 858 continue; 859 } 860 861 /* Fixup for special cases */ 862 switch (w) { 863 case FEAT_8000_0001_EDX: 864 /* 865 * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit 866 * mode and before vcpu running it's not in 64-bit mode. 867 */ 868 actual |= CPUID_EXT2_SYSCALL; 869 break; 870 default: 871 break; 872 } 873 874 requested = env->features[w]; 875 unavailable = requested & ~actual; 876 mark_unavailable_features(cpu, w, unavailable, unav_prefix); 877 if (unavailable) { 878 mismatch = true; 879 } 880 881 forced_on = actual & ~requested; 882 mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); 883 if (forced_on) { 884 mismatch = true; 885 } 886 } 887 888 if (cpu->enforce_cpuid && mismatch) { 889 return -EINVAL; 890 } 891 892 if (cpu->phys_bits != host_cpu_phys_bits()) { 893 error_report("TDX requires guest CPU physical bits (%u) " 894 "to match host CPU physical bits (%u)", 895 cpu->phys_bits, host_cpu_phys_bits()); 896 return -EINVAL; 897 } 898 899 return 0; 900 } 901 902 static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) 903 { 904 if ((tdx->attributes & ~tdx_caps->supported_attrs)) { 905 error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " 906 "(KVM supported: 0x%"PRIx64")", tdx->attributes, 907 (uint64_t)tdx_caps->supported_attrs); 908 return -1; 909 } 910 911 if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { 912 error_setg(errp, "Some QEMU unsupported TD attribute bits being " 913 "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", 914 tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); 915 return -1; 916 } 917 918 return 0; 919 } 920 921 static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) 922 { 923 CPUX86State *env = &x86cpu->env; 924 925 tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? 926 TDX_TD_ATTRIBUTES_PKS : 0; 927 tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; 928 929 return tdx_validate_attributes(tdx_guest, errp); 930 } 931 932 static int setup_td_xfam(X86CPU *x86cpu, Error **errp) 933 { 934 CPUX86State *env = &x86cpu->env; 935 uint64_t xfam; 936 937 xfam = env->features[FEAT_XSAVE_XCR0_LO] | 938 env->features[FEAT_XSAVE_XCR0_HI] | 939 env->features[FEAT_XSAVE_XSS_LO] | 940 env->features[FEAT_XSAVE_XSS_HI]; 941 942 if (xfam & ~tdx_caps->supported_xfam) { 943 error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", 944 xfam, (uint64_t)tdx_caps->supported_xfam); 945 return -1; 946 } 947 948 tdx_guest->xfam = xfam; 949 return 0; 950 } 951 952 static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) 953 { 954 int i, dest_cnt = 0; 955 struct kvm_cpuid_entry2 *src, *dest, *conf; 956 957 for (i = 0; i < cpuids->nent; i++) { 958 src = cpuids->entries + i; 959 conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); 960 if (!conf) { 961 continue; 962 } 963 dest = cpuids->entries + dest_cnt; 964 965 dest->function = src->function; 966 dest->index = src->index; 967 dest->flags = src->flags; 968 dest->eax = src->eax & conf->eax; 969 dest->ebx = src->ebx & conf->ebx; 970 dest->ecx = src->ecx & conf->ecx; 971 dest->edx = src->edx & conf->edx; 972 973 dest_cnt++; 974 } 975 cpuids->nent = dest_cnt++; 976 } 977 978 int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) 979 { 980 X86CPU *x86cpu = X86_CPU(cpu); 981 CPUX86State *env = &x86cpu->env; 982 g_autofree struct kvm_tdx_init_vm *init_vm = NULL; 983 Error *local_err = NULL; 984 size_t data_len; 985 int retry = 10000; 986 int r = 0; 987 988 QEMU_LOCK_GUARD(&tdx_guest->lock); 989 if (tdx_guest->initialized) { 990 return r; 991 } 992 993 init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + 994 sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); 995 996 if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { 997 error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); 998 return -EOPNOTSUPP; 999 } 1000 1001 r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, 1002 0, TDX_APIC_BUS_CYCLES_NS); 1003 if (r < 0) { 1004 error_setg_errno(errp, -r, 1005 "Unable to set core crystal clock frequency to 25MHz"); 1006 return r; 1007 } 1008 1009 if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || 1010 env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { 1011 error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " 1012 "between [%d, %d] kHz", env->tsc_khz, 1013 TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); 1014 return -EINVAL; 1015 } 1016 1017 if (env->tsc_khz % (25 * 1000)) { 1018 error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", 1019 env->tsc_khz); 1020 return -EINVAL; 1021 } 1022 1023 /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ 1024 r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); 1025 if (r < 0) { 1026 error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", 1027 env->tsc_khz); 1028 return r; 1029 } 1030 1031 if (tdx_guest->mrconfigid) { 1032 g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, 1033 strlen(tdx_guest->mrconfigid), &data_len, errp); 1034 if (!data) { 1035 return -1; 1036 } 1037 if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 1038 error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " 1039 "expected %d bytes", data_len, 1040 QCRYPTO_HASH_DIGEST_LEN_SHA384); 1041 return -1; 1042 } 1043 memcpy(init_vm->mrconfigid, data, data_len); 1044 } 1045 1046 if (tdx_guest->mrowner) { 1047 g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, 1048 strlen(tdx_guest->mrowner), &data_len, errp); 1049 if (!data) { 1050 return -1; 1051 } 1052 if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 1053 error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " 1054 "expected %d bytes", data_len, 1055 QCRYPTO_HASH_DIGEST_LEN_SHA384); 1056 return -1; 1057 } 1058 memcpy(init_vm->mrowner, data, data_len); 1059 } 1060 1061 if (tdx_guest->mrownerconfig) { 1062 g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, 1063 strlen(tdx_guest->mrownerconfig), &data_len, errp); 1064 if (!data) { 1065 return -1; 1066 } 1067 if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 1068 error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " 1069 "expected %d bytes", data_len, 1070 QCRYPTO_HASH_DIGEST_LEN_SHA384); 1071 return -1; 1072 } 1073 memcpy(init_vm->mrownerconfig, data, data_len); 1074 } 1075 1076 r = setup_td_guest_attributes(x86cpu, errp); 1077 if (r) { 1078 return r; 1079 } 1080 1081 r = setup_td_xfam(x86cpu, errp); 1082 if (r) { 1083 return r; 1084 } 1085 1086 init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); 1087 tdx_filter_cpuid(&init_vm->cpuid); 1088 1089 init_vm->attributes = tdx_guest->attributes; 1090 init_vm->xfam = tdx_guest->xfam; 1091 1092 /* 1093 * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) 1094 * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or 1095 * RDSEED) is busy. 1096 * 1097 * Retry for the case. 1098 */ 1099 do { 1100 error_free(local_err); 1101 local_err = NULL; 1102 r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); 1103 } while (r == -EAGAIN && --retry); 1104 1105 if (r < 0) { 1106 if (!retry) { 1107 error_append_hint(&local_err, "Hardware RNG (Random Number " 1108 "Generator) is busy occupied by someone (via RDRAND/RDSEED) " 1109 "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " 1110 "due to lack of entropy.\n"); 1111 } 1112 error_propagate(errp, local_err); 1113 return r; 1114 } 1115 1116 tdx_guest->initialized = true; 1117 1118 return 0; 1119 } 1120 1121 int tdx_parse_tdvf(void *flash_ptr, int size) 1122 { 1123 return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); 1124 } 1125 1126 static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) 1127 { 1128 TdxGuest *tdx = task->opaque; 1129 int ret; 1130 1131 /* Maintain the number of in-flight requests. */ 1132 qemu_mutex_lock(&tdx->lock); 1133 tdx->num--; 1134 qemu_mutex_unlock(&tdx->lock); 1135 1136 if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { 1137 ret = address_space_write(&address_space_memory, task->payload_gpa, 1138 MEMTXATTRS_UNSPECIFIED, task->receive_buf, 1139 task->receive_buf_received); 1140 if (ret != MEMTX_OK) { 1141 error_report("TDX: get-quote: failed to write quote data."); 1142 } else { 1143 task->hdr.out_len = cpu_to_le64(task->receive_buf_received); 1144 } 1145 } 1146 task->hdr.error_code = cpu_to_le64(task->status_code); 1147 1148 /* Publish the response contents before marking this request completed. */ 1149 smp_wmb(); 1150 ret = address_space_write(&address_space_memory, task->buf_gpa, 1151 MEMTXATTRS_UNSPECIFIED, &task->hdr, 1152 TDX_GET_QUOTE_HDR_SIZE); 1153 if (ret != MEMTX_OK) { 1154 error_report("TDX: get-quote: failed to update GetQuote header."); 1155 } 1156 1157 g_free(task->send_data); 1158 g_free(task->receive_buf); 1159 g_free(task); 1160 object_unref(tdx); 1161 } 1162 1163 void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) 1164 { 1165 TdxGenerateQuoteTask *task; 1166 struct tdx_get_quote_header hdr; 1167 hwaddr buf_gpa = run->tdx.get_quote.gpa; 1168 uint64_t buf_len = run->tdx.get_quote.size; 1169 1170 QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); 1171 1172 run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; 1173 1174 if (buf_len == 0) { 1175 return; 1176 } 1177 1178 if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { 1179 run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; 1180 return; 1181 } 1182 1183 if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, 1184 &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { 1185 error_report("TDX: get-quote: failed to read GetQuote header."); 1186 return; 1187 } 1188 1189 if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { 1190 return; 1191 } 1192 1193 /* Only safe-guard check to avoid too large buffer size. */ 1194 if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || 1195 le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { 1196 return; 1197 } 1198 1199 if (!tdx_guest->qg_sock_addr) { 1200 hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); 1201 if (address_space_write(&address_space_memory, buf_gpa, 1202 MEMTXATTRS_UNSPECIFIED, 1203 &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { 1204 error_report("TDX: failed to update GetQuote header."); 1205 return; 1206 } 1207 run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; 1208 return; 1209 } 1210 1211 qemu_mutex_lock(&tdx_guest->lock); 1212 if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { 1213 qemu_mutex_unlock(&tdx_guest->lock); 1214 run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; 1215 return; 1216 } 1217 tdx_guest->num++; 1218 qemu_mutex_unlock(&tdx_guest->lock); 1219 1220 task = g_new(TdxGenerateQuoteTask, 1); 1221 task->buf_gpa = buf_gpa; 1222 task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; 1223 task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; 1224 task->hdr = hdr; 1225 task->completion = tdx_get_quote_completion; 1226 1227 task->send_data_size = le32_to_cpu(hdr.in_len); 1228 task->send_data = g_malloc(task->send_data_size); 1229 task->send_data_sent = 0; 1230 1231 if (address_space_read(&address_space_memory, task->payload_gpa, 1232 MEMTXATTRS_UNSPECIFIED, task->send_data, 1233 task->send_data_size) != MEMTX_OK) { 1234 goto out_free; 1235 } 1236 1237 /* Mark the buffer in-flight. */ 1238 hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); 1239 if (address_space_write(&address_space_memory, buf_gpa, 1240 MEMTXATTRS_UNSPECIFIED, 1241 &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { 1242 goto out_free; 1243 } 1244 1245 task->receive_buf = g_malloc0(task->payload_len); 1246 task->receive_buf_received = 0; 1247 task->opaque = tdx_guest; 1248 1249 object_ref(tdx_guest); 1250 tdx_generate_quote(task, tdx_guest->qg_sock_addr); 1251 run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; 1252 return; 1253 1254 out_free: 1255 g_free(task->send_data); 1256 g_free(task); 1257 } 1258 1259 void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) 1260 { 1261 if (run->tdx.get_tdvmcall_info.leaf != 1) { 1262 return; 1263 } 1264 1265 run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE; 1266 run->tdx.get_tdvmcall_info.r12 = 0; 1267 run->tdx.get_tdvmcall_info.r13 = 0; 1268 run->tdx.get_tdvmcall_info.r14 = 0; 1269 } 1270 1271 static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, 1272 char *message, uint64_t gpa) 1273 { 1274 GuestPanicInformation *panic_info; 1275 1276 panic_info = g_new0(GuestPanicInformation, 1); 1277 panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; 1278 panic_info->u.tdx.error_code = (uint32_t) error_code; 1279 panic_info->u.tdx.message = message; 1280 panic_info->u.tdx.gpa = gpa; 1281 1282 qemu_system_guest_panicked(panic_info); 1283 } 1284 1285 /* 1286 * Only 8 registers can contain valid ASCII byte stream to form the fatal 1287 * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX 1288 */ 1289 #define TDX_FATAL_MESSAGE_MAX 64 1290 1291 #define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) 1292 1293 int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) 1294 { 1295 uint64_t error_code = run->system_event.data[R_R12]; 1296 uint64_t reg_mask = run->system_event.data[R_ECX]; 1297 char *message = NULL; 1298 uint64_t *tmp; 1299 uint64_t gpa = -1ull; 1300 1301 if (error_code & 0xffff) { 1302 error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, 1303 error_code); 1304 return -1; 1305 } 1306 1307 if (reg_mask) { 1308 message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); 1309 tmp = (uint64_t *)message; 1310 1311 #define COPY_REG(REG) \ 1312 do { \ 1313 if (reg_mask & BIT_ULL(REG)) { \ 1314 *(tmp++) = run->system_event.data[REG]; \ 1315 } \ 1316 } while (0) 1317 1318 COPY_REG(R_R14); 1319 COPY_REG(R_R15); 1320 COPY_REG(R_EBX); 1321 COPY_REG(R_EDI); 1322 COPY_REG(R_ESI); 1323 COPY_REG(R_R8); 1324 COPY_REG(R_R9); 1325 COPY_REG(R_EDX); 1326 *((char *)tmp) = '\0'; 1327 } 1328 #undef COPY_REG 1329 1330 if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { 1331 gpa = run->system_event.data[R_R13]; 1332 } 1333 1334 tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); 1335 1336 return -1; 1337 } 1338 1339 static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) 1340 { 1341 TdxGuest *tdx = TDX_GUEST(obj); 1342 1343 return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); 1344 } 1345 1346 static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) 1347 { 1348 TdxGuest *tdx = TDX_GUEST(obj); 1349 1350 if (value) { 1351 tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 1352 } else { 1353 tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 1354 } 1355 } 1356 1357 static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) 1358 { 1359 TdxGuest *tdx = TDX_GUEST(obj); 1360 1361 return g_strdup(tdx->mrconfigid); 1362 } 1363 1364 static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) 1365 { 1366 TdxGuest *tdx = TDX_GUEST(obj); 1367 1368 g_free(tdx->mrconfigid); 1369 tdx->mrconfigid = g_strdup(value); 1370 } 1371 1372 static char *tdx_guest_get_mrowner(Object *obj, Error **errp) 1373 { 1374 TdxGuest *tdx = TDX_GUEST(obj); 1375 1376 return g_strdup(tdx->mrowner); 1377 } 1378 1379 static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) 1380 { 1381 TdxGuest *tdx = TDX_GUEST(obj); 1382 1383 g_free(tdx->mrowner); 1384 tdx->mrowner = g_strdup(value); 1385 } 1386 1387 static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) 1388 { 1389 TdxGuest *tdx = TDX_GUEST(obj); 1390 1391 return g_strdup(tdx->mrownerconfig); 1392 } 1393 1394 static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) 1395 { 1396 TdxGuest *tdx = TDX_GUEST(obj); 1397 1398 g_free(tdx->mrownerconfig); 1399 tdx->mrownerconfig = g_strdup(value); 1400 } 1401 1402 static void tdx_guest_get_qgs(Object *obj, Visitor *v, 1403 const char *name, void *opaque, 1404 Error **errp) 1405 { 1406 TdxGuest *tdx = TDX_GUEST(obj); 1407 1408 if (!tdx->qg_sock_addr) { 1409 error_setg(errp, "quote-generation-socket is not set"); 1410 return; 1411 } 1412 visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); 1413 } 1414 1415 static void tdx_guest_set_qgs(Object *obj, Visitor *v, 1416 const char *name, void *opaque, 1417 Error **errp) 1418 { 1419 TdxGuest *tdx = TDX_GUEST(obj); 1420 SocketAddress *sock = NULL; 1421 1422 if (!visit_type_SocketAddress(v, name, &sock, errp)) { 1423 return; 1424 } 1425 1426 if (tdx->qg_sock_addr) { 1427 qapi_free_SocketAddress(tdx->qg_sock_addr); 1428 } 1429 1430 tdx->qg_sock_addr = sock; 1431 } 1432 1433 /* tdx guest */ 1434 OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, 1435 tdx_guest, 1436 TDX_GUEST, 1437 X86_CONFIDENTIAL_GUEST, 1438 { TYPE_USER_CREATABLE }, 1439 { NULL }) 1440 1441 static void tdx_guest_init(Object *obj) 1442 { 1443 ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); 1444 TdxGuest *tdx = TDX_GUEST(obj); 1445 1446 qemu_mutex_init(&tdx->lock); 1447 1448 cgs->require_guest_memfd = true; 1449 tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 1450 1451 object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, 1452 OBJ_PROP_FLAG_READWRITE); 1453 object_property_add_bool(obj, "sept-ve-disable", 1454 tdx_guest_get_sept_ve_disable, 1455 tdx_guest_set_sept_ve_disable); 1456 object_property_add_str(obj, "mrconfigid", 1457 tdx_guest_get_mrconfigid, 1458 tdx_guest_set_mrconfigid); 1459 object_property_add_str(obj, "mrowner", 1460 tdx_guest_get_mrowner, tdx_guest_set_mrowner); 1461 object_property_add_str(obj, "mrownerconfig", 1462 tdx_guest_get_mrownerconfig, 1463 tdx_guest_set_mrownerconfig); 1464 1465 object_property_add(obj, "quote-generation-socket", "SocketAddress", 1466 tdx_guest_get_qgs, 1467 tdx_guest_set_qgs, 1468 NULL, NULL); 1469 1470 qemu_mutex_init(&tdx->lock); 1471 } 1472 1473 static void tdx_guest_finalize(Object *obj) 1474 { 1475 } 1476 1477 static void tdx_guest_class_init(ObjectClass *oc, const void *data) 1478 { 1479 ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); 1480 X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); 1481 1482 klass->kvm_init = tdx_kvm_init; 1483 x86_klass->kvm_type = tdx_kvm_type; 1484 x86_klass->cpu_instance_init = tdx_cpu_instance_init; 1485 x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; 1486 x86_klass->check_features = tdx_check_features; 1487 } 1488