1756e12e7SXiaoyao Li /* 2756e12e7SXiaoyao Li * QEMU TDX support 3756e12e7SXiaoyao Li * 4756e12e7SXiaoyao Li * Copyright (c) 2025 Intel Corporation 5756e12e7SXiaoyao Li * 6756e12e7SXiaoyao Li * Author: 7756e12e7SXiaoyao Li * Xiaoyao Li <xiaoyao.li@intel.com> 8756e12e7SXiaoyao Li * 9756e12e7SXiaoyao Li * SPDX-License-Identifier: GPL-2.0-or-later 10756e12e7SXiaoyao Li */ 11756e12e7SXiaoyao Li 12756e12e7SXiaoyao Li #include "qemu/osdep.h" 138eddedc3SXiaoyao Li #include "qemu/error-report.h" 14d05a0858SIsaku Yamahata #include "qemu/base64.h" 154420ba0eSXiaoyao Li #include "qemu/mmap-alloc.h" 168eddedc3SXiaoyao Li #include "qapi/error.h" 17756e12e7SXiaoyao Li #include "qom/object_interfaces.h" 18d05a0858SIsaku Yamahata #include "crypto/hash.h" 194420ba0eSXiaoyao Li #include "system/system.h" 20756e12e7SXiaoyao Li 21*f18672e4SXiaoyao Li #include "hw/i386/e820_memory_layout.h" 224420ba0eSXiaoyao Li #include "hw/i386/tdvf.h" 23631a2ac5SXiaoyao Li #include "hw/i386/x86.h" 24b455880eSXiaoyao Li #include "kvm_i386.h" 25756e12e7SXiaoyao Li #include "tdx.h" 26756e12e7SXiaoyao Li 270e73b843SXiaoyao Li #define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) 280e73b843SXiaoyao Li #define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) 290e73b843SXiaoyao Li 3053b6f406SXiaoyao Li #define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) 316016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) 32bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) 33bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) 346016e297SXiaoyao Li 3553b6f406SXiaoyao Li #define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ 3653b6f406SXiaoyao Li TDX_TD_ATTRIBUTES_PKS | \ 3753b6f406SXiaoyao Li TDX_TD_ATTRIBUTES_PERFMON) 3853b6f406SXiaoyao Li 391619d0e4SXiaoyao Li static TdxGuest *tdx_guest; 401619d0e4SXiaoyao Li 418eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps; 428eddedc3SXiaoyao Li 431619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ 441619d0e4SXiaoyao Li bool is_tdx_vm(void) 451619d0e4SXiaoyao Li { 461619d0e4SXiaoyao Li return !!tdx_guest; 471619d0e4SXiaoyao Li } 481619d0e4SXiaoyao Li 498eddedc3SXiaoyao Li enum tdx_ioctl_level { 508eddedc3SXiaoyao Li TDX_VM_IOCTL, 518eddedc3SXiaoyao Li TDX_VCPU_IOCTL, 528eddedc3SXiaoyao Li }; 538eddedc3SXiaoyao Li 548eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, 558eddedc3SXiaoyao Li int cmd_id, __u32 flags, void *data, 568eddedc3SXiaoyao Li Error **errp) 57631a2ac5SXiaoyao Li { 588eddedc3SXiaoyao Li struct kvm_tdx_cmd tdx_cmd = {}; 598eddedc3SXiaoyao Li int r; 608eddedc3SXiaoyao Li 618eddedc3SXiaoyao Li const char *tdx_ioctl_name[] = { 628eddedc3SXiaoyao Li [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", 638eddedc3SXiaoyao Li [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", 648eddedc3SXiaoyao Li [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", 658eddedc3SXiaoyao Li [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", 668eddedc3SXiaoyao Li [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", 678eddedc3SXiaoyao Li [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", 688eddedc3SXiaoyao Li }; 698eddedc3SXiaoyao Li 708eddedc3SXiaoyao Li tdx_cmd.id = cmd_id; 718eddedc3SXiaoyao Li tdx_cmd.flags = flags; 728eddedc3SXiaoyao Li tdx_cmd.data = (__u64)(unsigned long)data; 738eddedc3SXiaoyao Li 748eddedc3SXiaoyao Li switch (level) { 758eddedc3SXiaoyao Li case TDX_VM_IOCTL: 768eddedc3SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 778eddedc3SXiaoyao Li break; 788eddedc3SXiaoyao Li case TDX_VCPU_IOCTL: 798eddedc3SXiaoyao Li r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 808eddedc3SXiaoyao Li break; 818eddedc3SXiaoyao Li default: 828eddedc3SXiaoyao Li error_setg(errp, "Invalid tdx_ioctl_level %d", level); 838eddedc3SXiaoyao Li return -EINVAL; 848eddedc3SXiaoyao Li } 858eddedc3SXiaoyao Li 868eddedc3SXiaoyao Li if (r < 0) { 878eddedc3SXiaoyao Li error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", 888eddedc3SXiaoyao Li tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); 898eddedc3SXiaoyao Li } 908eddedc3SXiaoyao Li return r; 918eddedc3SXiaoyao Li } 928eddedc3SXiaoyao Li 938eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, 948eddedc3SXiaoyao Li Error **errp) 958eddedc3SXiaoyao Li { 968eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); 978eddedc3SXiaoyao Li } 988eddedc3SXiaoyao Li 998eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, 1008eddedc3SXiaoyao Li void *data, Error **errp) 1018eddedc3SXiaoyao Li { 1028eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); 1038eddedc3SXiaoyao Li } 1048eddedc3SXiaoyao Li 1058eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp) 1068eddedc3SXiaoyao Li { 1078eddedc3SXiaoyao Li struct kvm_tdx_capabilities *caps; 1088eddedc3SXiaoyao Li /* 1st generation of TDX reports 6 cpuid configs */ 1098eddedc3SXiaoyao Li int nr_cpuid_configs = 6; 1108eddedc3SXiaoyao Li size_t size; 1118eddedc3SXiaoyao Li int r; 1128eddedc3SXiaoyao Li 1138eddedc3SXiaoyao Li do { 1148eddedc3SXiaoyao Li Error *local_err = NULL; 1158eddedc3SXiaoyao Li size = sizeof(struct kvm_tdx_capabilities) + 1168eddedc3SXiaoyao Li nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); 1178eddedc3SXiaoyao Li caps = g_malloc0(size); 1188eddedc3SXiaoyao Li caps->cpuid.nent = nr_cpuid_configs; 1198eddedc3SXiaoyao Li 1208eddedc3SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); 1218eddedc3SXiaoyao Li if (r == -E2BIG) { 1228eddedc3SXiaoyao Li g_free(caps); 1238eddedc3SXiaoyao Li nr_cpuid_configs *= 2; 1248eddedc3SXiaoyao Li if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { 1258eddedc3SXiaoyao Li error_report("KVM TDX seems broken that number of CPUID entries" 1268eddedc3SXiaoyao Li " in kvm_tdx_capabilities exceeds limit: %d", 1278eddedc3SXiaoyao Li KVM_MAX_CPUID_ENTRIES); 1288eddedc3SXiaoyao Li error_propagate(errp, local_err); 1298eddedc3SXiaoyao Li return r; 1308eddedc3SXiaoyao Li } 1318eddedc3SXiaoyao Li error_free(local_err); 1328eddedc3SXiaoyao Li } else if (r < 0) { 1338eddedc3SXiaoyao Li g_free(caps); 1348eddedc3SXiaoyao Li error_propagate(errp, local_err); 1358eddedc3SXiaoyao Li return r; 1368eddedc3SXiaoyao Li } 1378eddedc3SXiaoyao Li } while (r == -E2BIG); 1388eddedc3SXiaoyao Li 1398eddedc3SXiaoyao Li tdx_caps = caps; 140631a2ac5SXiaoyao Li 141631a2ac5SXiaoyao Li return 0; 142631a2ac5SXiaoyao Li } 143631a2ac5SXiaoyao Li 1440dd5fe5eSChao Peng void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) 1450dd5fe5eSChao Peng { 1460dd5fe5eSChao Peng assert(!tdx_guest->tdvf_mr); 1470dd5fe5eSChao Peng tdx_guest->tdvf_mr = tdvf_mr; 1480dd5fe5eSChao Peng } 1490dd5fe5eSChao Peng 150*f18672e4SXiaoyao Li static void tdx_add_ram_entry(uint64_t address, uint64_t length, 151*f18672e4SXiaoyao Li enum TdxRamType type) 152*f18672e4SXiaoyao Li { 153*f18672e4SXiaoyao Li uint32_t nr_entries = tdx_guest->nr_ram_entries; 154*f18672e4SXiaoyao Li tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, 155*f18672e4SXiaoyao Li nr_entries + 1); 156*f18672e4SXiaoyao Li 157*f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].address = address; 158*f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].length = length; 159*f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].type = type; 160*f18672e4SXiaoyao Li tdx_guest->nr_ram_entries++; 161*f18672e4SXiaoyao Li } 162*f18672e4SXiaoyao Li 163*f18672e4SXiaoyao Li static int tdx_accept_ram_range(uint64_t address, uint64_t length) 164*f18672e4SXiaoyao Li { 165*f18672e4SXiaoyao Li uint64_t head_start, tail_start, head_length, tail_length; 166*f18672e4SXiaoyao Li uint64_t tmp_address, tmp_length; 167*f18672e4SXiaoyao Li TdxRamEntry *e; 168*f18672e4SXiaoyao Li int i = 0; 169*f18672e4SXiaoyao Li 170*f18672e4SXiaoyao Li do { 171*f18672e4SXiaoyao Li if (i == tdx_guest->nr_ram_entries) { 172*f18672e4SXiaoyao Li return -1; 173*f18672e4SXiaoyao Li } 174*f18672e4SXiaoyao Li 175*f18672e4SXiaoyao Li e = &tdx_guest->ram_entries[i++]; 176*f18672e4SXiaoyao Li } while (address + length <= e->address || address >= e->address + e->length); 177*f18672e4SXiaoyao Li 178*f18672e4SXiaoyao Li /* 179*f18672e4SXiaoyao Li * The to-be-accepted ram range must be fully contained by one 180*f18672e4SXiaoyao Li * RAM entry. 181*f18672e4SXiaoyao Li */ 182*f18672e4SXiaoyao Li if (e->address > address || 183*f18672e4SXiaoyao Li e->address + e->length < address + length) { 184*f18672e4SXiaoyao Li return -1; 185*f18672e4SXiaoyao Li } 186*f18672e4SXiaoyao Li 187*f18672e4SXiaoyao Li if (e->type == TDX_RAM_ADDED) { 188*f18672e4SXiaoyao Li return 0; 189*f18672e4SXiaoyao Li } 190*f18672e4SXiaoyao Li 191*f18672e4SXiaoyao Li tmp_address = e->address; 192*f18672e4SXiaoyao Li tmp_length = e->length; 193*f18672e4SXiaoyao Li 194*f18672e4SXiaoyao Li e->address = address; 195*f18672e4SXiaoyao Li e->length = length; 196*f18672e4SXiaoyao Li e->type = TDX_RAM_ADDED; 197*f18672e4SXiaoyao Li 198*f18672e4SXiaoyao Li head_length = address - tmp_address; 199*f18672e4SXiaoyao Li if (head_length > 0) { 200*f18672e4SXiaoyao Li head_start = tmp_address; 201*f18672e4SXiaoyao Li tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); 202*f18672e4SXiaoyao Li } 203*f18672e4SXiaoyao Li 204*f18672e4SXiaoyao Li tail_start = address + length; 205*f18672e4SXiaoyao Li if (tail_start < tmp_address + tmp_length) { 206*f18672e4SXiaoyao Li tail_length = tmp_address + tmp_length - tail_start; 207*f18672e4SXiaoyao Li tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); 208*f18672e4SXiaoyao Li } 209*f18672e4SXiaoyao Li 210*f18672e4SXiaoyao Li return 0; 211*f18672e4SXiaoyao Li } 212*f18672e4SXiaoyao Li 213*f18672e4SXiaoyao Li static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) 214*f18672e4SXiaoyao Li { 215*f18672e4SXiaoyao Li const TdxRamEntry *lhs = lhs_; 216*f18672e4SXiaoyao Li const TdxRamEntry *rhs = rhs_; 217*f18672e4SXiaoyao Li 218*f18672e4SXiaoyao Li if (lhs->address == rhs->address) { 219*f18672e4SXiaoyao Li return 0; 220*f18672e4SXiaoyao Li } 221*f18672e4SXiaoyao Li if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { 222*f18672e4SXiaoyao Li return 1; 223*f18672e4SXiaoyao Li } 224*f18672e4SXiaoyao Li return -1; 225*f18672e4SXiaoyao Li } 226*f18672e4SXiaoyao Li 227*f18672e4SXiaoyao Li static void tdx_init_ram_entries(void) 228*f18672e4SXiaoyao Li { 229*f18672e4SXiaoyao Li unsigned i, j, nr_e820_entries; 230*f18672e4SXiaoyao Li 231*f18672e4SXiaoyao Li nr_e820_entries = e820_get_table(NULL); 232*f18672e4SXiaoyao Li tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); 233*f18672e4SXiaoyao Li 234*f18672e4SXiaoyao Li for (i = 0, j = 0; i < nr_e820_entries; i++) { 235*f18672e4SXiaoyao Li uint64_t addr, len; 236*f18672e4SXiaoyao Li 237*f18672e4SXiaoyao Li if (e820_get_entry(i, E820_RAM, &addr, &len)) { 238*f18672e4SXiaoyao Li tdx_guest->ram_entries[j].address = addr; 239*f18672e4SXiaoyao Li tdx_guest->ram_entries[j].length = len; 240*f18672e4SXiaoyao Li tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; 241*f18672e4SXiaoyao Li j++; 242*f18672e4SXiaoyao Li } 243*f18672e4SXiaoyao Li } 244*f18672e4SXiaoyao Li tdx_guest->nr_ram_entries = j; 245*f18672e4SXiaoyao Li } 246*f18672e4SXiaoyao Li 2474420ba0eSXiaoyao Li static void tdx_finalize_vm(Notifier *notifier, void *unused) 2484420ba0eSXiaoyao Li { 2494420ba0eSXiaoyao Li TdxFirmware *tdvf = &tdx_guest->tdvf; 2504420ba0eSXiaoyao Li TdxFirmwareEntry *entry; 2514420ba0eSXiaoyao Li 252*f18672e4SXiaoyao Li tdx_init_ram_entries(); 253*f18672e4SXiaoyao Li 2544420ba0eSXiaoyao Li for_each_tdx_fw_entry(tdvf, entry) { 2554420ba0eSXiaoyao Li switch (entry->type) { 2564420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_BFV: 2574420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_CFV: 2584420ba0eSXiaoyao Li entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; 2594420ba0eSXiaoyao Li break; 2604420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_TD_HOB: 2614420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_TEMP_MEM: 2624420ba0eSXiaoyao Li entry->mem_ptr = qemu_ram_mmap(-1, entry->size, 2634420ba0eSXiaoyao Li qemu_real_host_page_size(), 0, 0); 2644420ba0eSXiaoyao Li if (entry->mem_ptr == MAP_FAILED) { 2654420ba0eSXiaoyao Li error_report("Failed to mmap memory for TDVF section %d", 2664420ba0eSXiaoyao Li entry->type); 2674420ba0eSXiaoyao Li exit(1); 2684420ba0eSXiaoyao Li } 269*f18672e4SXiaoyao Li if (tdx_accept_ram_range(entry->address, entry->size)) { 270*f18672e4SXiaoyao Li error_report("Failed to accept memory for TDVF section %d", 271*f18672e4SXiaoyao Li entry->type); 272*f18672e4SXiaoyao Li qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 273*f18672e4SXiaoyao Li exit(1); 274*f18672e4SXiaoyao Li } 2754420ba0eSXiaoyao Li break; 2764420ba0eSXiaoyao Li default: 2774420ba0eSXiaoyao Li error_report("Unsupported TDVF section %d", entry->type); 2784420ba0eSXiaoyao Li exit(1); 2794420ba0eSXiaoyao Li } 2804420ba0eSXiaoyao Li } 281*f18672e4SXiaoyao Li 282*f18672e4SXiaoyao Li qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, 283*f18672e4SXiaoyao Li sizeof(TdxRamEntry), &tdx_ram_entry_compare); 2844420ba0eSXiaoyao Li } 2854420ba0eSXiaoyao Li 2864420ba0eSXiaoyao Li static Notifier tdx_machine_done_notify = { 2874420ba0eSXiaoyao Li .notify = tdx_finalize_vm, 2884420ba0eSXiaoyao Li }; 2894420ba0eSXiaoyao Li 2908eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) 2918eddedc3SXiaoyao Li { 2921619d0e4SXiaoyao Li TdxGuest *tdx = TDX_GUEST(cgs); 2938eddedc3SXiaoyao Li int r = 0; 2948eddedc3SXiaoyao Li 2958eddedc3SXiaoyao Li kvm_mark_guest_state_protected(); 2968eddedc3SXiaoyao Li 2978eddedc3SXiaoyao Li if (!tdx_caps) { 2988eddedc3SXiaoyao Li r = get_tdx_capabilities(errp); 2991619d0e4SXiaoyao Li if (r) { 3001619d0e4SXiaoyao Li return r; 3011619d0e4SXiaoyao Li } 3028eddedc3SXiaoyao Li } 3038eddedc3SXiaoyao Li 3044420ba0eSXiaoyao Li qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); 3054420ba0eSXiaoyao Li 3061619d0e4SXiaoyao Li tdx_guest = tdx; 3071619d0e4SXiaoyao Li return 0; 3088eddedc3SXiaoyao Li } 3098eddedc3SXiaoyao Li 310b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg) 311b455880eSXiaoyao Li { 312b455880eSXiaoyao Li /* Do the object check */ 313b455880eSXiaoyao Li TDX_GUEST(cg); 314b455880eSXiaoyao Li 315b455880eSXiaoyao Li return KVM_X86_TDX_VM; 316b455880eSXiaoyao Li } 317b455880eSXiaoyao Li 31853b6f406SXiaoyao Li static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) 31953b6f406SXiaoyao Li { 32053b6f406SXiaoyao Li if ((tdx->attributes & ~tdx_caps->supported_attrs)) { 32153b6f406SXiaoyao Li error_setg(errp, "Invalid attributes 0x%lx for TDX VM " 32253b6f406SXiaoyao Li "(KVM supported: 0x%llx)", tdx->attributes, 32353b6f406SXiaoyao Li tdx_caps->supported_attrs); 32453b6f406SXiaoyao Li return -1; 32553b6f406SXiaoyao Li } 32653b6f406SXiaoyao Li 32753b6f406SXiaoyao Li if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { 32853b6f406SXiaoyao Li error_setg(errp, "Some QEMU unsupported TD attribute bits being " 32953b6f406SXiaoyao Li "requested: 0x%lx (QEMU supported: 0x%llx)", 33053b6f406SXiaoyao Li tdx->attributes, TDX_SUPPORTED_TD_ATTRS); 33153b6f406SXiaoyao Li return -1; 33253b6f406SXiaoyao Li } 33353b6f406SXiaoyao Li 33453b6f406SXiaoyao Li return 0; 33553b6f406SXiaoyao Li } 33653b6f406SXiaoyao Li 33753b6f406SXiaoyao Li static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) 338bb3be394SXiaoyao Li { 339bb3be394SXiaoyao Li CPUX86State *env = &x86cpu->env; 340bb3be394SXiaoyao Li 341bb3be394SXiaoyao Li tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? 342bb3be394SXiaoyao Li TDX_TD_ATTRIBUTES_PKS : 0; 343bb3be394SXiaoyao Li tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; 34453b6f406SXiaoyao Li 34553b6f406SXiaoyao Li return tdx_validate_attributes(tdx_guest, errp); 346bb3be394SXiaoyao Li } 347bb3be394SXiaoyao Li 348f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp) 349f15898b0SXiaoyao Li { 350f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env; 351f15898b0SXiaoyao Li uint64_t xfam; 352f15898b0SXiaoyao Li 353f15898b0SXiaoyao Li xfam = env->features[FEAT_XSAVE_XCR0_LO] | 354f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XCR0_HI] | 355f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_LO] | 356f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_HI]; 357f15898b0SXiaoyao Li 358f15898b0SXiaoyao Li if (xfam & ~tdx_caps->supported_xfam) { 359f15898b0SXiaoyao Li error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))", 360f15898b0SXiaoyao Li xfam, tdx_caps->supported_xfam); 361f15898b0SXiaoyao Li return -1; 362f15898b0SXiaoyao Li } 363f15898b0SXiaoyao Li 364f15898b0SXiaoyao Li tdx_guest->xfam = xfam; 365f15898b0SXiaoyao Li return 0; 366f15898b0SXiaoyao Li } 367f15898b0SXiaoyao Li 368f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) 369f15898b0SXiaoyao Li { 370f15898b0SXiaoyao Li int i, dest_cnt = 0; 371f15898b0SXiaoyao Li struct kvm_cpuid_entry2 *src, *dest, *conf; 372f15898b0SXiaoyao Li 373f15898b0SXiaoyao Li for (i = 0; i < cpuids->nent; i++) { 374f15898b0SXiaoyao Li src = cpuids->entries + i; 375f15898b0SXiaoyao Li conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); 376f15898b0SXiaoyao Li if (!conf) { 377f15898b0SXiaoyao Li continue; 378f15898b0SXiaoyao Li } 379f15898b0SXiaoyao Li dest = cpuids->entries + dest_cnt; 380f15898b0SXiaoyao Li 381f15898b0SXiaoyao Li dest->function = src->function; 382f15898b0SXiaoyao Li dest->index = src->index; 383f15898b0SXiaoyao Li dest->flags = src->flags; 384f15898b0SXiaoyao Li dest->eax = src->eax & conf->eax; 385f15898b0SXiaoyao Li dest->ebx = src->ebx & conf->ebx; 386f15898b0SXiaoyao Li dest->ecx = src->ecx & conf->ecx; 387f15898b0SXiaoyao Li dest->edx = src->edx & conf->edx; 388f15898b0SXiaoyao Li 389f15898b0SXiaoyao Li dest_cnt++; 390f15898b0SXiaoyao Li } 391f15898b0SXiaoyao Li cpuids->nent = dest_cnt++; 392f15898b0SXiaoyao Li } 393f15898b0SXiaoyao Li 394f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) 395f15898b0SXiaoyao Li { 396f15898b0SXiaoyao Li X86CPU *x86cpu = X86_CPU(cpu); 397f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env; 398f15898b0SXiaoyao Li g_autofree struct kvm_tdx_init_vm *init_vm = NULL; 399f15898b0SXiaoyao Li Error *local_err = NULL; 400d05a0858SIsaku Yamahata size_t data_len; 401f15898b0SXiaoyao Li int retry = 10000; 402f15898b0SXiaoyao Li int r = 0; 403f15898b0SXiaoyao Li 404f15898b0SXiaoyao Li QEMU_LOCK_GUARD(&tdx_guest->lock); 405f15898b0SXiaoyao Li if (tdx_guest->initialized) { 406f15898b0SXiaoyao Li return r; 407f15898b0SXiaoyao Li } 408f15898b0SXiaoyao Li 409f15898b0SXiaoyao Li init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + 410f15898b0SXiaoyao Li sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); 411f15898b0SXiaoyao Li 412d529a2acSXiaoyao Li if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { 413d529a2acSXiaoyao Li error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); 414d529a2acSXiaoyao Li return -EOPNOTSUPP; 415d529a2acSXiaoyao Li } 416d529a2acSXiaoyao Li 417d529a2acSXiaoyao Li r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, 418d529a2acSXiaoyao Li 0, TDX_APIC_BUS_CYCLES_NS); 419d529a2acSXiaoyao Li if (r < 0) { 420d529a2acSXiaoyao Li error_setg_errno(errp, -r, 421d529a2acSXiaoyao Li "Unable to set core crystal clock frequency to 25MHz"); 422d529a2acSXiaoyao Li return r; 423d529a2acSXiaoyao Li } 424d529a2acSXiaoyao Li 4250e73b843SXiaoyao Li if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || 4260e73b843SXiaoyao Li env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { 4270e73b843SXiaoyao Li error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency " 4280e73b843SXiaoyao Li "between [%d, %d] kHz", env->tsc_khz, 4290e73b843SXiaoyao Li TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); 4300e73b843SXiaoyao Li return -EINVAL; 4310e73b843SXiaoyao Li } 4320e73b843SXiaoyao Li 4330e73b843SXiaoyao Li if (env->tsc_khz % (25 * 1000)) { 4340e73b843SXiaoyao Li error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz", 4350e73b843SXiaoyao Li env->tsc_khz); 4360e73b843SXiaoyao Li return -EINVAL; 4370e73b843SXiaoyao Li } 4380e73b843SXiaoyao Li 4390e73b843SXiaoyao Li /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ 4400e73b843SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); 4410e73b843SXiaoyao Li if (r < 0) { 4420e73b843SXiaoyao Li error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz", 4430e73b843SXiaoyao Li env->tsc_khz); 4440e73b843SXiaoyao Li return r; 4450e73b843SXiaoyao Li } 4460e73b843SXiaoyao Li 447d05a0858SIsaku Yamahata if (tdx_guest->mrconfigid) { 448d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, 449d05a0858SIsaku Yamahata strlen(tdx_guest->mrconfigid), &data_len, errp); 450d05a0858SIsaku Yamahata if (!data) { 451d05a0858SIsaku Yamahata return -1; 452d05a0858SIsaku Yamahata } 453d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 454d05a0858SIsaku Yamahata error_setg(errp, "TDX: failed to decode mrconfigid"); 455d05a0858SIsaku Yamahata return -1; 456d05a0858SIsaku Yamahata } 457d05a0858SIsaku Yamahata memcpy(init_vm->mrconfigid, data, data_len); 458d05a0858SIsaku Yamahata } 459d05a0858SIsaku Yamahata 460d05a0858SIsaku Yamahata if (tdx_guest->mrowner) { 461d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, 462d05a0858SIsaku Yamahata strlen(tdx_guest->mrowner), &data_len, errp); 463d05a0858SIsaku Yamahata if (!data) { 464d05a0858SIsaku Yamahata return -1; 465d05a0858SIsaku Yamahata } 466d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 467d05a0858SIsaku Yamahata error_setg(errp, "TDX: failed to decode mrowner"); 468d05a0858SIsaku Yamahata return -1; 469d05a0858SIsaku Yamahata } 470d05a0858SIsaku Yamahata memcpy(init_vm->mrowner, data, data_len); 471d05a0858SIsaku Yamahata } 472d05a0858SIsaku Yamahata 473d05a0858SIsaku Yamahata if (tdx_guest->mrownerconfig) { 474d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, 475d05a0858SIsaku Yamahata strlen(tdx_guest->mrownerconfig), &data_len, errp); 476d05a0858SIsaku Yamahata if (!data) { 477d05a0858SIsaku Yamahata return -1; 478d05a0858SIsaku Yamahata } 479d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 480d05a0858SIsaku Yamahata error_setg(errp, "TDX: failed to decode mrownerconfig"); 481d05a0858SIsaku Yamahata return -1; 482d05a0858SIsaku Yamahata } 483d05a0858SIsaku Yamahata memcpy(init_vm->mrownerconfig, data, data_len); 484d05a0858SIsaku Yamahata } 485d05a0858SIsaku Yamahata 48653b6f406SXiaoyao Li r = setup_td_guest_attributes(x86cpu, errp); 48753b6f406SXiaoyao Li if (r) { 48853b6f406SXiaoyao Li return r; 48953b6f406SXiaoyao Li } 490bb3be394SXiaoyao Li 491f15898b0SXiaoyao Li r = setup_td_xfam(x86cpu, errp); 492f15898b0SXiaoyao Li if (r) { 493f15898b0SXiaoyao Li return r; 494f15898b0SXiaoyao Li } 495f15898b0SXiaoyao Li 496f15898b0SXiaoyao Li init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); 497f15898b0SXiaoyao Li tdx_filter_cpuid(&init_vm->cpuid); 498f15898b0SXiaoyao Li 499f15898b0SXiaoyao Li init_vm->attributes = tdx_guest->attributes; 500f15898b0SXiaoyao Li init_vm->xfam = tdx_guest->xfam; 501f15898b0SXiaoyao Li 502f15898b0SXiaoyao Li /* 503f15898b0SXiaoyao Li * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) 504f15898b0SXiaoyao Li * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or 505f15898b0SXiaoyao Li * RDSEED) is busy. 506f15898b0SXiaoyao Li * 507f15898b0SXiaoyao Li * Retry for the case. 508f15898b0SXiaoyao Li */ 509f15898b0SXiaoyao Li do { 510f15898b0SXiaoyao Li error_free(local_err); 511f15898b0SXiaoyao Li local_err = NULL; 512f15898b0SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); 513f15898b0SXiaoyao Li } while (r == -EAGAIN && --retry); 514f15898b0SXiaoyao Li 515f15898b0SXiaoyao Li if (r < 0) { 516f15898b0SXiaoyao Li if (!retry) { 517f15898b0SXiaoyao Li error_append_hint(&local_err, "Hardware RNG (Random Number " 518f15898b0SXiaoyao Li "Generator) is busy occupied by someone (via RDRAND/RDSEED) " 519f15898b0SXiaoyao Li "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " 520f15898b0SXiaoyao Li "due to lack of entropy.\n"); 521f15898b0SXiaoyao Li } 522f15898b0SXiaoyao Li error_propagate(errp, local_err); 523f15898b0SXiaoyao Li return r; 524f15898b0SXiaoyao Li } 525f15898b0SXiaoyao Li 526f15898b0SXiaoyao Li tdx_guest->initialized = true; 527f15898b0SXiaoyao Li 528f15898b0SXiaoyao Li return 0; 529f15898b0SXiaoyao Li } 530f15898b0SXiaoyao Li 531cb5d65a8SXiaoyao Li int tdx_parse_tdvf(void *flash_ptr, int size) 532cb5d65a8SXiaoyao Li { 533cb5d65a8SXiaoyao Li return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); 534cb5d65a8SXiaoyao Li } 535cb5d65a8SXiaoyao Li 5366016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) 5376016e297SXiaoyao Li { 5386016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 5396016e297SXiaoyao Li 5406016e297SXiaoyao Li return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); 5416016e297SXiaoyao Li } 5426016e297SXiaoyao Li 5436016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) 5446016e297SXiaoyao Li { 5456016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 5466016e297SXiaoyao Li 5476016e297SXiaoyao Li if (value) { 5486016e297SXiaoyao Li tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 5496016e297SXiaoyao Li } else { 5506016e297SXiaoyao Li tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 5516016e297SXiaoyao Li } 5526016e297SXiaoyao Li } 5536016e297SXiaoyao Li 554d05a0858SIsaku Yamahata static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) 555d05a0858SIsaku Yamahata { 556d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 557d05a0858SIsaku Yamahata 558d05a0858SIsaku Yamahata return g_strdup(tdx->mrconfigid); 559d05a0858SIsaku Yamahata } 560d05a0858SIsaku Yamahata 561d05a0858SIsaku Yamahata static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) 562d05a0858SIsaku Yamahata { 563d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 564d05a0858SIsaku Yamahata 565d05a0858SIsaku Yamahata g_free(tdx->mrconfigid); 566d05a0858SIsaku Yamahata tdx->mrconfigid = g_strdup(value); 567d05a0858SIsaku Yamahata } 568d05a0858SIsaku Yamahata 569d05a0858SIsaku Yamahata static char *tdx_guest_get_mrowner(Object *obj, Error **errp) 570d05a0858SIsaku Yamahata { 571d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 572d05a0858SIsaku Yamahata 573d05a0858SIsaku Yamahata return g_strdup(tdx->mrowner); 574d05a0858SIsaku Yamahata } 575d05a0858SIsaku Yamahata 576d05a0858SIsaku Yamahata static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) 577d05a0858SIsaku Yamahata { 578d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 579d05a0858SIsaku Yamahata 580d05a0858SIsaku Yamahata g_free(tdx->mrowner); 581d05a0858SIsaku Yamahata tdx->mrowner = g_strdup(value); 582d05a0858SIsaku Yamahata } 583d05a0858SIsaku Yamahata 584d05a0858SIsaku Yamahata static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) 585d05a0858SIsaku Yamahata { 586d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 587d05a0858SIsaku Yamahata 588d05a0858SIsaku Yamahata return g_strdup(tdx->mrownerconfig); 589d05a0858SIsaku Yamahata } 590d05a0858SIsaku Yamahata 591d05a0858SIsaku Yamahata static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) 592d05a0858SIsaku Yamahata { 593d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 594d05a0858SIsaku Yamahata 595d05a0858SIsaku Yamahata g_free(tdx->mrownerconfig); 596d05a0858SIsaku Yamahata tdx->mrownerconfig = g_strdup(value); 597d05a0858SIsaku Yamahata } 598d05a0858SIsaku Yamahata 599756e12e7SXiaoyao Li /* tdx guest */ 600756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, 601756e12e7SXiaoyao Li tdx_guest, 602756e12e7SXiaoyao Li TDX_GUEST, 603756e12e7SXiaoyao Li X86_CONFIDENTIAL_GUEST, 604756e12e7SXiaoyao Li { TYPE_USER_CREATABLE }, 605756e12e7SXiaoyao Li { NULL }) 606756e12e7SXiaoyao Li 607756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj) 608756e12e7SXiaoyao Li { 609756e12e7SXiaoyao Li ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); 610756e12e7SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 611756e12e7SXiaoyao Li 612f15898b0SXiaoyao Li qemu_mutex_init(&tdx->lock); 613f15898b0SXiaoyao Li 614756e12e7SXiaoyao Li cgs->require_guest_memfd = true; 615714af522SIsaku Yamahata tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 616756e12e7SXiaoyao Li 617756e12e7SXiaoyao Li object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, 618756e12e7SXiaoyao Li OBJ_PROP_FLAG_READWRITE); 6196016e297SXiaoyao Li object_property_add_bool(obj, "sept-ve-disable", 6206016e297SXiaoyao Li tdx_guest_get_sept_ve_disable, 6216016e297SXiaoyao Li tdx_guest_set_sept_ve_disable); 622d05a0858SIsaku Yamahata object_property_add_str(obj, "mrconfigid", 623d05a0858SIsaku Yamahata tdx_guest_get_mrconfigid, 624d05a0858SIsaku Yamahata tdx_guest_set_mrconfigid); 625d05a0858SIsaku Yamahata object_property_add_str(obj, "mrowner", 626d05a0858SIsaku Yamahata tdx_guest_get_mrowner, tdx_guest_set_mrowner); 627d05a0858SIsaku Yamahata object_property_add_str(obj, "mrownerconfig", 628d05a0858SIsaku Yamahata tdx_guest_get_mrownerconfig, 629d05a0858SIsaku Yamahata tdx_guest_set_mrownerconfig); 630756e12e7SXiaoyao Li } 631756e12e7SXiaoyao Li 632756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj) 633756e12e7SXiaoyao Li { 634756e12e7SXiaoyao Li } 635756e12e7SXiaoyao Li 636756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data) 637756e12e7SXiaoyao Li { 638631a2ac5SXiaoyao Li ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); 639b455880eSXiaoyao Li X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); 640b455880eSXiaoyao Li 641631a2ac5SXiaoyao Li klass->kvm_init = tdx_kvm_init; 642b455880eSXiaoyao Li x86_klass->kvm_type = tdx_kvm_type; 643756e12e7SXiaoyao Li } 644