1756e12e7SXiaoyao Li /* 2756e12e7SXiaoyao Li * QEMU TDX support 3756e12e7SXiaoyao Li * 4756e12e7SXiaoyao Li * Copyright (c) 2025 Intel Corporation 5756e12e7SXiaoyao Li * 6756e12e7SXiaoyao Li * Author: 7756e12e7SXiaoyao Li * Xiaoyao Li <xiaoyao.li@intel.com> 8756e12e7SXiaoyao Li * 9756e12e7SXiaoyao Li * SPDX-License-Identifier: GPL-2.0-or-later 10756e12e7SXiaoyao Li */ 11756e12e7SXiaoyao Li 12756e12e7SXiaoyao Li #include "qemu/osdep.h" 138eddedc3SXiaoyao Li #include "qemu/error-report.h" 14d05a0858SIsaku Yamahata #include "qemu/base64.h" 154420ba0eSXiaoyao Li #include "qemu/mmap-alloc.h" 168eddedc3SXiaoyao Li #include "qapi/error.h" 1740da501dSIsaku Yamahata #include "qapi/qapi-visit-sockets.h" 18756e12e7SXiaoyao Li #include "qom/object_interfaces.h" 19d05a0858SIsaku Yamahata #include "crypto/hash.h" 20bb45580dSXiaoyao Li #include "system/kvm_int.h" 216e250463SXiaoyao Li #include "system/runstate.h" 224420ba0eSXiaoyao Li #include "system/system.h" 23ebc2d2b4SIsaku Yamahata #include "system/ramblock.h" 2440da501dSIsaku Yamahata #include "system/address-spaces.h" 25756e12e7SXiaoyao Li 261ff5048dSXiaoyao Li #include <linux/kvm_para.h> 271ff5048dSXiaoyao Li 288c94c84cSXiaoyao Li #include "cpu.h" 298c94c84cSXiaoyao Li #include "cpu-internal.h" 30907ee7b6SXiaoyao Li #include "host-cpu.h" 31*efa742b2SXiaoyao Li #include "hw/i386/apic_internal.h" 32*efa742b2SXiaoyao Li #include "hw/i386/apic-msidef.h" 33f18672e4SXiaoyao Li #include "hw/i386/e820_memory_layout.h" 344420ba0eSXiaoyao Li #include "hw/i386/tdvf.h" 35631a2ac5SXiaoyao Li #include "hw/i386/x86.h" 36a7314259SXiaoyao Li #include "hw/i386/tdvf-hob.h" 37*efa742b2SXiaoyao Li #include "hw/pci/msi.h" 38b455880eSXiaoyao Li #include "kvm_i386.h" 39756e12e7SXiaoyao Li #include "tdx.h" 4040da501dSIsaku Yamahata #include "tdx-quote-generator.h" 41756e12e7SXiaoyao Li 424d6e288aSXiaoyao Li #include "standard-headers/asm-x86/kvm_para.h" 434d6e288aSXiaoyao Li 440e73b843SXiaoyao Li #define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) 450e73b843SXiaoyao Li #define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) 460e73b843SXiaoyao Li 4753b6f406SXiaoyao Li #define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) 486016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) 49bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) 50bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) 516016e297SXiaoyao Li 5253b6f406SXiaoyao Li #define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ 5353b6f406SXiaoyao Li TDX_TD_ATTRIBUTES_PKS | \ 5453b6f406SXiaoyao Li TDX_TD_ATTRIBUTES_PERFMON) 5553b6f406SXiaoyao Li 564d6e288aSXiaoyao Li #define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ 574d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_UNHALT) | \ 584d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ 594d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_SEND_IPI) | \ 604d6e288aSXiaoyao Li (1U << KVM_FEATURE_POLL_CONTROL) | \ 614d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ 624d6e288aSXiaoyao Li (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) 634d6e288aSXiaoyao Li 641619d0e4SXiaoyao Li static TdxGuest *tdx_guest; 651619d0e4SXiaoyao Li 668eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps; 6775ec6189SXiaoyao Li static struct kvm_cpuid2 *tdx_supported_cpuid; 688eddedc3SXiaoyao Li 691619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ 701619d0e4SXiaoyao Li bool is_tdx_vm(void) 711619d0e4SXiaoyao Li { 721619d0e4SXiaoyao Li return !!tdx_guest; 731619d0e4SXiaoyao Li } 741619d0e4SXiaoyao Li 758eddedc3SXiaoyao Li enum tdx_ioctl_level { 768eddedc3SXiaoyao Li TDX_VM_IOCTL, 778eddedc3SXiaoyao Li TDX_VCPU_IOCTL, 788eddedc3SXiaoyao Li }; 798eddedc3SXiaoyao Li 808eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, 818eddedc3SXiaoyao Li int cmd_id, __u32 flags, void *data, 828eddedc3SXiaoyao Li Error **errp) 83631a2ac5SXiaoyao Li { 848eddedc3SXiaoyao Li struct kvm_tdx_cmd tdx_cmd = {}; 858eddedc3SXiaoyao Li int r; 868eddedc3SXiaoyao Li 878eddedc3SXiaoyao Li const char *tdx_ioctl_name[] = { 888eddedc3SXiaoyao Li [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", 898eddedc3SXiaoyao Li [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", 908eddedc3SXiaoyao Li [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", 918eddedc3SXiaoyao Li [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", 928eddedc3SXiaoyao Li [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", 938eddedc3SXiaoyao Li [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", 948eddedc3SXiaoyao Li }; 958eddedc3SXiaoyao Li 968eddedc3SXiaoyao Li tdx_cmd.id = cmd_id; 978eddedc3SXiaoyao Li tdx_cmd.flags = flags; 988eddedc3SXiaoyao Li tdx_cmd.data = (__u64)(unsigned long)data; 998eddedc3SXiaoyao Li 1008eddedc3SXiaoyao Li switch (level) { 1018eddedc3SXiaoyao Li case TDX_VM_IOCTL: 1028eddedc3SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 1038eddedc3SXiaoyao Li break; 1048eddedc3SXiaoyao Li case TDX_VCPU_IOCTL: 1058eddedc3SXiaoyao Li r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 1068eddedc3SXiaoyao Li break; 1078eddedc3SXiaoyao Li default: 1088eddedc3SXiaoyao Li error_setg(errp, "Invalid tdx_ioctl_level %d", level); 1098eddedc3SXiaoyao Li return -EINVAL; 1108eddedc3SXiaoyao Li } 1118eddedc3SXiaoyao Li 1128eddedc3SXiaoyao Li if (r < 0) { 1138eddedc3SXiaoyao Li error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", 1148eddedc3SXiaoyao Li tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); 1158eddedc3SXiaoyao Li } 1168eddedc3SXiaoyao Li return r; 1178eddedc3SXiaoyao Li } 1188eddedc3SXiaoyao Li 1198eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, 1208eddedc3SXiaoyao Li Error **errp) 1218eddedc3SXiaoyao Li { 1228eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); 1238eddedc3SXiaoyao Li } 1248eddedc3SXiaoyao Li 1258eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, 1268eddedc3SXiaoyao Li void *data, Error **errp) 1278eddedc3SXiaoyao Li { 1288eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); 1298eddedc3SXiaoyao Li } 1308eddedc3SXiaoyao Li 1318eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp) 1328eddedc3SXiaoyao Li { 1338eddedc3SXiaoyao Li struct kvm_tdx_capabilities *caps; 1348eddedc3SXiaoyao Li /* 1st generation of TDX reports 6 cpuid configs */ 1358eddedc3SXiaoyao Li int nr_cpuid_configs = 6; 1368eddedc3SXiaoyao Li size_t size; 1378eddedc3SXiaoyao Li int r; 1388eddedc3SXiaoyao Li 1398eddedc3SXiaoyao Li do { 1408eddedc3SXiaoyao Li Error *local_err = NULL; 1418eddedc3SXiaoyao Li size = sizeof(struct kvm_tdx_capabilities) + 1428eddedc3SXiaoyao Li nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); 1438eddedc3SXiaoyao Li caps = g_malloc0(size); 1448eddedc3SXiaoyao Li caps->cpuid.nent = nr_cpuid_configs; 1458eddedc3SXiaoyao Li 1468eddedc3SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); 1478eddedc3SXiaoyao Li if (r == -E2BIG) { 1488eddedc3SXiaoyao Li g_free(caps); 1498eddedc3SXiaoyao Li nr_cpuid_configs *= 2; 1508eddedc3SXiaoyao Li if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { 1518eddedc3SXiaoyao Li error_report("KVM TDX seems broken that number of CPUID entries" 1528eddedc3SXiaoyao Li " in kvm_tdx_capabilities exceeds limit: %d", 1538eddedc3SXiaoyao Li KVM_MAX_CPUID_ENTRIES); 1548eddedc3SXiaoyao Li error_propagate(errp, local_err); 1558eddedc3SXiaoyao Li return r; 1568eddedc3SXiaoyao Li } 1578eddedc3SXiaoyao Li error_free(local_err); 1588eddedc3SXiaoyao Li } else if (r < 0) { 1598eddedc3SXiaoyao Li g_free(caps); 1608eddedc3SXiaoyao Li error_propagate(errp, local_err); 1618eddedc3SXiaoyao Li return r; 1628eddedc3SXiaoyao Li } 1638eddedc3SXiaoyao Li } while (r == -E2BIG); 1648eddedc3SXiaoyao Li 1658eddedc3SXiaoyao Li tdx_caps = caps; 166631a2ac5SXiaoyao Li 167631a2ac5SXiaoyao Li return 0; 168631a2ac5SXiaoyao Li } 169631a2ac5SXiaoyao Li 1700dd5fe5eSChao Peng void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) 1710dd5fe5eSChao Peng { 1720dd5fe5eSChao Peng assert(!tdx_guest->tdvf_mr); 1730dd5fe5eSChao Peng tdx_guest->tdvf_mr = tdvf_mr; 1740dd5fe5eSChao Peng } 1750dd5fe5eSChao Peng 176a7314259SXiaoyao Li static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) 177a7314259SXiaoyao Li { 178a7314259SXiaoyao Li TdxFirmwareEntry *entry; 179a7314259SXiaoyao Li 180a7314259SXiaoyao Li for_each_tdx_fw_entry(&tdx->tdvf, entry) { 181a7314259SXiaoyao Li if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { 182a7314259SXiaoyao Li return entry; 183a7314259SXiaoyao Li } 184a7314259SXiaoyao Li } 185a7314259SXiaoyao Li error_report("TDVF metadata doesn't specify TD_HOB location."); 186a7314259SXiaoyao Li exit(1); 187a7314259SXiaoyao Li } 188a7314259SXiaoyao Li 189f18672e4SXiaoyao Li static void tdx_add_ram_entry(uint64_t address, uint64_t length, 190f18672e4SXiaoyao Li enum TdxRamType type) 191f18672e4SXiaoyao Li { 192f18672e4SXiaoyao Li uint32_t nr_entries = tdx_guest->nr_ram_entries; 193f18672e4SXiaoyao Li tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, 194f18672e4SXiaoyao Li nr_entries + 1); 195f18672e4SXiaoyao Li 196f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].address = address; 197f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].length = length; 198f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].type = type; 199f18672e4SXiaoyao Li tdx_guest->nr_ram_entries++; 200f18672e4SXiaoyao Li } 201f18672e4SXiaoyao Li 202f18672e4SXiaoyao Li static int tdx_accept_ram_range(uint64_t address, uint64_t length) 203f18672e4SXiaoyao Li { 204f18672e4SXiaoyao Li uint64_t head_start, tail_start, head_length, tail_length; 205f18672e4SXiaoyao Li uint64_t tmp_address, tmp_length; 206f18672e4SXiaoyao Li TdxRamEntry *e; 207f18672e4SXiaoyao Li int i = 0; 208f18672e4SXiaoyao Li 209f18672e4SXiaoyao Li do { 210f18672e4SXiaoyao Li if (i == tdx_guest->nr_ram_entries) { 211f18672e4SXiaoyao Li return -1; 212f18672e4SXiaoyao Li } 213f18672e4SXiaoyao Li 214f18672e4SXiaoyao Li e = &tdx_guest->ram_entries[i++]; 215f18672e4SXiaoyao Li } while (address + length <= e->address || address >= e->address + e->length); 216f18672e4SXiaoyao Li 217f18672e4SXiaoyao Li /* 218f18672e4SXiaoyao Li * The to-be-accepted ram range must be fully contained by one 219f18672e4SXiaoyao Li * RAM entry. 220f18672e4SXiaoyao Li */ 221f18672e4SXiaoyao Li if (e->address > address || 222f18672e4SXiaoyao Li e->address + e->length < address + length) { 223f18672e4SXiaoyao Li return -1; 224f18672e4SXiaoyao Li } 225f18672e4SXiaoyao Li 226f18672e4SXiaoyao Li if (e->type == TDX_RAM_ADDED) { 227f18672e4SXiaoyao Li return 0; 228f18672e4SXiaoyao Li } 229f18672e4SXiaoyao Li 230f18672e4SXiaoyao Li tmp_address = e->address; 231f18672e4SXiaoyao Li tmp_length = e->length; 232f18672e4SXiaoyao Li 233f18672e4SXiaoyao Li e->address = address; 234f18672e4SXiaoyao Li e->length = length; 235f18672e4SXiaoyao Li e->type = TDX_RAM_ADDED; 236f18672e4SXiaoyao Li 237f18672e4SXiaoyao Li head_length = address - tmp_address; 238f18672e4SXiaoyao Li if (head_length > 0) { 239f18672e4SXiaoyao Li head_start = tmp_address; 240f18672e4SXiaoyao Li tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); 241f18672e4SXiaoyao Li } 242f18672e4SXiaoyao Li 243f18672e4SXiaoyao Li tail_start = address + length; 244f18672e4SXiaoyao Li if (tail_start < tmp_address + tmp_length) { 245f18672e4SXiaoyao Li tail_length = tmp_address + tmp_length - tail_start; 246f18672e4SXiaoyao Li tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); 247f18672e4SXiaoyao Li } 248f18672e4SXiaoyao Li 249f18672e4SXiaoyao Li return 0; 250f18672e4SXiaoyao Li } 251f18672e4SXiaoyao Li 252f18672e4SXiaoyao Li static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) 253f18672e4SXiaoyao Li { 254f18672e4SXiaoyao Li const TdxRamEntry *lhs = lhs_; 255f18672e4SXiaoyao Li const TdxRamEntry *rhs = rhs_; 256f18672e4SXiaoyao Li 257f18672e4SXiaoyao Li if (lhs->address == rhs->address) { 258f18672e4SXiaoyao Li return 0; 259f18672e4SXiaoyao Li } 260f18672e4SXiaoyao Li if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { 261f18672e4SXiaoyao Li return 1; 262f18672e4SXiaoyao Li } 263f18672e4SXiaoyao Li return -1; 264f18672e4SXiaoyao Li } 265f18672e4SXiaoyao Li 266f18672e4SXiaoyao Li static void tdx_init_ram_entries(void) 267f18672e4SXiaoyao Li { 268f18672e4SXiaoyao Li unsigned i, j, nr_e820_entries; 269f18672e4SXiaoyao Li 270f18672e4SXiaoyao Li nr_e820_entries = e820_get_table(NULL); 271f18672e4SXiaoyao Li tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); 272f18672e4SXiaoyao Li 273f18672e4SXiaoyao Li for (i = 0, j = 0; i < nr_e820_entries; i++) { 274f18672e4SXiaoyao Li uint64_t addr, len; 275f18672e4SXiaoyao Li 276f18672e4SXiaoyao Li if (e820_get_entry(i, E820_RAM, &addr, &len)) { 277f18672e4SXiaoyao Li tdx_guest->ram_entries[j].address = addr; 278f18672e4SXiaoyao Li tdx_guest->ram_entries[j].length = len; 279f18672e4SXiaoyao Li tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; 280f18672e4SXiaoyao Li j++; 281f18672e4SXiaoyao Li } 282f18672e4SXiaoyao Li } 283f18672e4SXiaoyao Li tdx_guest->nr_ram_entries = j; 284f18672e4SXiaoyao Li } 285f18672e4SXiaoyao Li 28641f7fd22SXiaoyao Li static void tdx_post_init_vcpus(void) 28741f7fd22SXiaoyao Li { 28841f7fd22SXiaoyao Li TdxFirmwareEntry *hob; 28941f7fd22SXiaoyao Li CPUState *cpu; 29041f7fd22SXiaoyao Li 29141f7fd22SXiaoyao Li hob = tdx_get_hob_entry(tdx_guest); 29241f7fd22SXiaoyao Li CPU_FOREACH(cpu) { 293e7f926ebSCédric Le Goater tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, 29441f7fd22SXiaoyao Li &error_fatal); 29541f7fd22SXiaoyao Li } 29641f7fd22SXiaoyao Li } 29741f7fd22SXiaoyao Li 2984420ba0eSXiaoyao Li static void tdx_finalize_vm(Notifier *notifier, void *unused) 2994420ba0eSXiaoyao Li { 3004420ba0eSXiaoyao Li TdxFirmware *tdvf = &tdx_guest->tdvf; 3014420ba0eSXiaoyao Li TdxFirmwareEntry *entry; 302ebc2d2b4SIsaku Yamahata RAMBlock *ram_block; 303ebc2d2b4SIsaku Yamahata Error *local_err = NULL; 304ebc2d2b4SIsaku Yamahata int r; 3054420ba0eSXiaoyao Li 306f18672e4SXiaoyao Li tdx_init_ram_entries(); 307f18672e4SXiaoyao Li 3084420ba0eSXiaoyao Li for_each_tdx_fw_entry(tdvf, entry) { 3094420ba0eSXiaoyao Li switch (entry->type) { 3104420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_BFV: 3114420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_CFV: 3124420ba0eSXiaoyao Li entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; 3134420ba0eSXiaoyao Li break; 3144420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_TD_HOB: 3154420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_TEMP_MEM: 3164420ba0eSXiaoyao Li entry->mem_ptr = qemu_ram_mmap(-1, entry->size, 3174420ba0eSXiaoyao Li qemu_real_host_page_size(), 0, 0); 3184420ba0eSXiaoyao Li if (entry->mem_ptr == MAP_FAILED) { 3194420ba0eSXiaoyao Li error_report("Failed to mmap memory for TDVF section %d", 3204420ba0eSXiaoyao Li entry->type); 3214420ba0eSXiaoyao Li exit(1); 3224420ba0eSXiaoyao Li } 323f18672e4SXiaoyao Li if (tdx_accept_ram_range(entry->address, entry->size)) { 324f18672e4SXiaoyao Li error_report("Failed to accept memory for TDVF section %d", 325f18672e4SXiaoyao Li entry->type); 326f18672e4SXiaoyao Li qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 327f18672e4SXiaoyao Li exit(1); 328f18672e4SXiaoyao Li } 3294420ba0eSXiaoyao Li break; 3304420ba0eSXiaoyao Li default: 3314420ba0eSXiaoyao Li error_report("Unsupported TDVF section %d", entry->type); 3324420ba0eSXiaoyao Li exit(1); 3334420ba0eSXiaoyao Li } 3344420ba0eSXiaoyao Li } 335f18672e4SXiaoyao Li 336f18672e4SXiaoyao Li qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, 337f18672e4SXiaoyao Li sizeof(TdxRamEntry), &tdx_ram_entry_compare); 338a7314259SXiaoyao Li 339a7314259SXiaoyao Li tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); 340ebc2d2b4SIsaku Yamahata 34141f7fd22SXiaoyao Li tdx_post_init_vcpus(); 34241f7fd22SXiaoyao Li 343ebc2d2b4SIsaku Yamahata for_each_tdx_fw_entry(tdvf, entry) { 344ebc2d2b4SIsaku Yamahata struct kvm_tdx_init_mem_region region; 345ebc2d2b4SIsaku Yamahata uint32_t flags; 346ebc2d2b4SIsaku Yamahata 347ebc2d2b4SIsaku Yamahata region = (struct kvm_tdx_init_mem_region) { 348e7f926ebSCédric Le Goater .source_addr = (uintptr_t)entry->mem_ptr, 349ebc2d2b4SIsaku Yamahata .gpa = entry->address, 350ebc2d2b4SIsaku Yamahata .nr_pages = entry->size >> 12, 351ebc2d2b4SIsaku Yamahata }; 352ebc2d2b4SIsaku Yamahata 353ebc2d2b4SIsaku Yamahata flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? 354ebc2d2b4SIsaku Yamahata KVM_TDX_MEASURE_MEMORY_REGION : 0; 355ebc2d2b4SIsaku Yamahata 356ebc2d2b4SIsaku Yamahata do { 357ebc2d2b4SIsaku Yamahata error_free(local_err); 358ebc2d2b4SIsaku Yamahata local_err = NULL; 359ebc2d2b4SIsaku Yamahata r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, 360ebc2d2b4SIsaku Yamahata ®ion, &local_err); 361ebc2d2b4SIsaku Yamahata } while (r == -EAGAIN || r == -EINTR); 362ebc2d2b4SIsaku Yamahata if (r < 0) { 363ebc2d2b4SIsaku Yamahata error_report_err(local_err); 364ebc2d2b4SIsaku Yamahata exit(1); 365ebc2d2b4SIsaku Yamahata } 366ebc2d2b4SIsaku Yamahata 367ebc2d2b4SIsaku Yamahata if (entry->type == TDVF_SECTION_TYPE_TD_HOB || 368ebc2d2b4SIsaku Yamahata entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { 369ebc2d2b4SIsaku Yamahata qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 370ebc2d2b4SIsaku Yamahata entry->mem_ptr = NULL; 371ebc2d2b4SIsaku Yamahata } 372ebc2d2b4SIsaku Yamahata } 373ebc2d2b4SIsaku Yamahata 374ebc2d2b4SIsaku Yamahata /* 375ebc2d2b4SIsaku Yamahata * TDVF image has been copied into private region above via 376ebc2d2b4SIsaku Yamahata * KVM_MEMORY_MAPPING. It becomes useless. 377ebc2d2b4SIsaku Yamahata */ 378ebc2d2b4SIsaku Yamahata ram_block = tdx_guest->tdvf_mr->ram_block; 379ebc2d2b4SIsaku Yamahata ram_block_discard_range(ram_block, 0, ram_block->max_length); 380ae60ff4eSXiaoyao Li 381ae60ff4eSXiaoyao Li tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); 382ae60ff4eSXiaoyao Li CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; 3834420ba0eSXiaoyao Li } 3844420ba0eSXiaoyao Li 3854420ba0eSXiaoyao Li static Notifier tdx_machine_done_notify = { 3864420ba0eSXiaoyao Li .notify = tdx_finalize_vm, 3874420ba0eSXiaoyao Li }; 3884420ba0eSXiaoyao Li 3890ba06e46SXiaoyao Li /* 3900ba06e46SXiaoyao Li * Some CPUID bits change from fixed1 to configurable bits when TDX module 3910ba06e46SXiaoyao Li * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. 3920ba06e46SXiaoyao Li * 3930ba06e46SXiaoyao Li * To make QEMU work with all the versions of TDX module, keep the fixed1 bits 3940ba06e46SXiaoyao Li * here if they are ever fixed1 bits in any of the version though not fixed1 in 3950ba06e46SXiaoyao Li * the latest version. Otherwise, with the older version of TDX module, QEMU may 3960ba06e46SXiaoyao Li * treat the fixed1 bit as unsupported. 3970ba06e46SXiaoyao Li * 3980ba06e46SXiaoyao Li * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even 3990ba06e46SXiaoyao Li * though they changed to configurable bits. Because tdx_fixed1_bits is used to 4000ba06e46SXiaoyao Li * setup the supported bits. 4010ba06e46SXiaoyao Li */ 4020ba06e46SXiaoyao Li KvmCpuidInfo tdx_fixed1_bits = { 4030ba06e46SXiaoyao Li .cpuid.nent = 8, 4040ba06e46SXiaoyao Li .entries[0] = { 4050ba06e46SXiaoyao Li .function = 0x1, 4060ba06e46SXiaoyao Li .index = 0, 4070ba06e46SXiaoyao Li .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | 4080ba06e46SXiaoyao Li CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | 4090ba06e46SXiaoyao Li CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | 4100ba06e46SXiaoyao Li CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | 4110ba06e46SXiaoyao Li CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | 4120ba06e46SXiaoyao Li CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, 4130ba06e46SXiaoyao Li .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | 4140ba06e46SXiaoyao Li CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | 4150ba06e46SXiaoyao Li CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | 4160ba06e46SXiaoyao Li CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | 4170ba06e46SXiaoyao Li CPUID_SSE | CPUID_SSE2, 4180ba06e46SXiaoyao Li }, 4190ba06e46SXiaoyao Li .entries[1] = { 4200ba06e46SXiaoyao Li .function = 0x6, 4210ba06e46SXiaoyao Li .index = 0, 4220ba06e46SXiaoyao Li .eax = CPUID_6_EAX_ARAT, 4230ba06e46SXiaoyao Li }, 4240ba06e46SXiaoyao Li .entries[2] = { 4250ba06e46SXiaoyao Li .function = 0x7, 4260ba06e46SXiaoyao Li .index = 0, 4270ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 4280ba06e46SXiaoyao Li .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | 4290ba06e46SXiaoyao Li CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | 4300ba06e46SXiaoyao Li CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | 4310ba06e46SXiaoyao Li CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | 4320ba06e46SXiaoyao Li CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, 4330ba06e46SXiaoyao Li .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | 4340ba06e46SXiaoyao Li CPUID_7_0_ECX_MOVDIR64B, 4350ba06e46SXiaoyao Li .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | 4360ba06e46SXiaoyao Li CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | 4370ba06e46SXiaoyao Li CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | 4380ba06e46SXiaoyao Li CPUID_7_0_EDX_SPEC_CTRL_SSBD, 4390ba06e46SXiaoyao Li }, 4400ba06e46SXiaoyao Li .entries[3] = { 4410ba06e46SXiaoyao Li .function = 0x7, 4420ba06e46SXiaoyao Li .index = 2, 4430ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 4440ba06e46SXiaoyao Li .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | 4450ba06e46SXiaoyao Li CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, 4460ba06e46SXiaoyao Li }, 4470ba06e46SXiaoyao Li .entries[4] = { 4480ba06e46SXiaoyao Li .function = 0xD, 4490ba06e46SXiaoyao Li .index = 0, 4500ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 4510ba06e46SXiaoyao Li .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, 4520ba06e46SXiaoyao Li }, 4530ba06e46SXiaoyao Li .entries[5] = { 4540ba06e46SXiaoyao Li .function = 0xD, 4550ba06e46SXiaoyao Li .index = 1, 4560ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 4570ba06e46SXiaoyao Li .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| 4580ba06e46SXiaoyao Li CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, 4590ba06e46SXiaoyao Li }, 4600ba06e46SXiaoyao Li .entries[6] = { 4610ba06e46SXiaoyao Li .function = 0x80000001, 4620ba06e46SXiaoyao Li .index = 0, 4630ba06e46SXiaoyao Li .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, 4640ba06e46SXiaoyao Li /* 4650ba06e46SXiaoyao Li * Strictly speaking, SYSCALL is not fixed1 bit since it depends on 4660ba06e46SXiaoyao Li * the CPU to be in 64-bit mode. But here fixed1 is used to serve the 4670ba06e46SXiaoyao Li * purpose of supported bits for TDX. In this sense, SYACALL is always 4680ba06e46SXiaoyao Li * supported. 4690ba06e46SXiaoyao Li */ 4700ba06e46SXiaoyao Li .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | 4710ba06e46SXiaoyao Li CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, 4720ba06e46SXiaoyao Li }, 4730ba06e46SXiaoyao Li .entries[7] = { 4740ba06e46SXiaoyao Li .function = 0x80000007, 4750ba06e46SXiaoyao Li .index = 0, 4760ba06e46SXiaoyao Li .edx = CPUID_APM_INVTSC, 4770ba06e46SXiaoyao Li }, 4780ba06e46SXiaoyao Li }; 4790ba06e46SXiaoyao Li 48031df29c5SXiaoyao Li typedef struct TdxAttrsMap { 48131df29c5SXiaoyao Li uint32_t attr_index; 48231df29c5SXiaoyao Li uint32_t cpuid_leaf; 48331df29c5SXiaoyao Li uint32_t cpuid_subleaf; 48431df29c5SXiaoyao Li int cpuid_reg; 48531df29c5SXiaoyao Li uint32_t feat_mask; 48631df29c5SXiaoyao Li } TdxAttrsMap; 48731df29c5SXiaoyao Li 48831df29c5SXiaoyao Li static TdxAttrsMap tdx_attrs_maps[] = { 48931df29c5SXiaoyao Li {.attr_index = 27, 49031df29c5SXiaoyao Li .cpuid_leaf = 7, 49131df29c5SXiaoyao Li .cpuid_subleaf = 1, 49231df29c5SXiaoyao Li .cpuid_reg = R_EAX, 49331df29c5SXiaoyao Li .feat_mask = CPUID_7_1_EAX_LASS,}, 49431df29c5SXiaoyao Li 49531df29c5SXiaoyao Li {.attr_index = 30, 49631df29c5SXiaoyao Li .cpuid_leaf = 7, 49731df29c5SXiaoyao Li .cpuid_subleaf = 0, 49831df29c5SXiaoyao Li .cpuid_reg = R_ECX, 49931df29c5SXiaoyao Li .feat_mask = CPUID_7_0_ECX_PKS,}, 50031df29c5SXiaoyao Li 50131df29c5SXiaoyao Li {.attr_index = 31, 50231df29c5SXiaoyao Li .cpuid_leaf = 7, 50331df29c5SXiaoyao Li .cpuid_subleaf = 0, 50431df29c5SXiaoyao Li .cpuid_reg = R_ECX, 50531df29c5SXiaoyao Li .feat_mask = CPUID_7_0_ECX_KeyLocker,}, 50631df29c5SXiaoyao Li }; 50731df29c5SXiaoyao Li 5088c94c84cSXiaoyao Li typedef struct TdxXFAMDep { 5098c94c84cSXiaoyao Li int xfam_bit; 5108c94c84cSXiaoyao Li FeatureMask feat_mask; 5118c94c84cSXiaoyao Li } TdxXFAMDep; 5128c94c84cSXiaoyao Li 5138c94c84cSXiaoyao Li /* 5148c94c84cSXiaoyao Li * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are 5158c94c84cSXiaoyao Li * defiend here. 5168c94c84cSXiaoyao Li * 5178c94c84cSXiaoyao Li * For those whose virtualization type are "XFAM & Configured & Native", they 5188c94c84cSXiaoyao Li * are reported as configurable bits. And they are not supported if not in the 5198c94c84cSXiaoyao Li * configureable bits list from KVM even if the corresponding XFAM bit is 5208c94c84cSXiaoyao Li * supported. 5218c94c84cSXiaoyao Li */ 5228c94c84cSXiaoyao Li TdxXFAMDep tdx_xfam_deps[] = { 5238c94c84cSXiaoyao Li { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, 5248c94c84cSXiaoyao Li { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, 5258c94c84cSXiaoyao Li { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, 5268c94c84cSXiaoyao Li { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, 5278c94c84cSXiaoyao Li { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, 5288c94c84cSXiaoyao Li { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, 5298c94c84cSXiaoyao Li { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, 5308c94c84cSXiaoyao Li { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, 5318c94c84cSXiaoyao Li { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, 5328c94c84cSXiaoyao Li }; 5338c94c84cSXiaoyao Li 5340ba06e46SXiaoyao Li static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, 5350ba06e46SXiaoyao Li uint32_t index) 5360ba06e46SXiaoyao Li { 5370ba06e46SXiaoyao Li struct kvm_cpuid_entry2 *e; 5380ba06e46SXiaoyao Li 5390ba06e46SXiaoyao Li e = cpuid_find_entry(tdx_supported_cpuid, function, index); 5400ba06e46SXiaoyao Li if (!e) { 5410ba06e46SXiaoyao Li if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { 5420ba06e46SXiaoyao Li error_report("tdx_supported_cpuid requries more space than %d entries", 5430ba06e46SXiaoyao Li KVM_MAX_CPUID_ENTRIES); 5440ba06e46SXiaoyao Li exit(1); 5450ba06e46SXiaoyao Li } 5460ba06e46SXiaoyao Li e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; 5470ba06e46SXiaoyao Li e->function = function; 5480ba06e46SXiaoyao Li e->index = index; 5490ba06e46SXiaoyao Li } 5500ba06e46SXiaoyao Li 5510ba06e46SXiaoyao Li return e; 5520ba06e46SXiaoyao Li } 5530ba06e46SXiaoyao Li 5540ba06e46SXiaoyao Li static void tdx_add_supported_cpuid_by_fixed1_bits(void) 5550ba06e46SXiaoyao Li { 5560ba06e46SXiaoyao Li struct kvm_cpuid_entry2 *e, *e1; 5570ba06e46SXiaoyao Li int i; 5580ba06e46SXiaoyao Li 5590ba06e46SXiaoyao Li for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { 5600ba06e46SXiaoyao Li e = &tdx_fixed1_bits.entries[i]; 5610ba06e46SXiaoyao Li 5620ba06e46SXiaoyao Li e1 = find_in_supported_entry(e->function, e->index); 5630ba06e46SXiaoyao Li e1->eax |= e->eax; 5640ba06e46SXiaoyao Li e1->ebx |= e->ebx; 5650ba06e46SXiaoyao Li e1->ecx |= e->ecx; 5660ba06e46SXiaoyao Li e1->edx |= e->edx; 5670ba06e46SXiaoyao Li } 5680ba06e46SXiaoyao Li } 5690ba06e46SXiaoyao Li 57031df29c5SXiaoyao Li static void tdx_add_supported_cpuid_by_attrs(void) 57131df29c5SXiaoyao Li { 57231df29c5SXiaoyao Li struct kvm_cpuid_entry2 *e; 57331df29c5SXiaoyao Li TdxAttrsMap *map; 57431df29c5SXiaoyao Li int i; 57531df29c5SXiaoyao Li 57631df29c5SXiaoyao Li for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { 57731df29c5SXiaoyao Li map = &tdx_attrs_maps[i]; 57831df29c5SXiaoyao Li if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { 57931df29c5SXiaoyao Li continue; 58031df29c5SXiaoyao Li } 58131df29c5SXiaoyao Li 58231df29c5SXiaoyao Li e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); 58331df29c5SXiaoyao Li 58431df29c5SXiaoyao Li switch(map->cpuid_reg) { 58531df29c5SXiaoyao Li case R_EAX: 58631df29c5SXiaoyao Li e->eax |= map->feat_mask; 58731df29c5SXiaoyao Li break; 58831df29c5SXiaoyao Li case R_EBX: 58931df29c5SXiaoyao Li e->ebx |= map->feat_mask; 59031df29c5SXiaoyao Li break; 59131df29c5SXiaoyao Li case R_ECX: 59231df29c5SXiaoyao Li e->ecx |= map->feat_mask; 59331df29c5SXiaoyao Li break; 59431df29c5SXiaoyao Li case R_EDX: 59531df29c5SXiaoyao Li e->edx |= map->feat_mask; 59631df29c5SXiaoyao Li break; 59731df29c5SXiaoyao Li } 59831df29c5SXiaoyao Li } 59931df29c5SXiaoyao Li } 60031df29c5SXiaoyao Li 6018c94c84cSXiaoyao Li static void tdx_add_supported_cpuid_by_xfam(void) 6028c94c84cSXiaoyao Li { 6038c94c84cSXiaoyao Li struct kvm_cpuid_entry2 *e; 6048c94c84cSXiaoyao Li int i; 6058c94c84cSXiaoyao Li 6068c94c84cSXiaoyao Li const TdxXFAMDep *xfam_dep; 6078c94c84cSXiaoyao Li const FeatureWordInfo *f; 6088c94c84cSXiaoyao Li for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { 6098c94c84cSXiaoyao Li xfam_dep = &tdx_xfam_deps[i]; 6108c94c84cSXiaoyao Li if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { 6118c94c84cSXiaoyao Li continue; 6128c94c84cSXiaoyao Li } 6138c94c84cSXiaoyao Li 6148c94c84cSXiaoyao Li f = &feature_word_info[xfam_dep->feat_mask.index]; 6158c94c84cSXiaoyao Li if (f->type != CPUID_FEATURE_WORD) { 6168c94c84cSXiaoyao Li continue; 6178c94c84cSXiaoyao Li } 6188c94c84cSXiaoyao Li 6198c94c84cSXiaoyao Li e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); 6208c94c84cSXiaoyao Li switch(f->cpuid.reg) { 6218c94c84cSXiaoyao Li case R_EAX: 6228c94c84cSXiaoyao Li e->eax |= xfam_dep->feat_mask.mask; 6238c94c84cSXiaoyao Li break; 6248c94c84cSXiaoyao Li case R_EBX: 6258c94c84cSXiaoyao Li e->ebx |= xfam_dep->feat_mask.mask; 6268c94c84cSXiaoyao Li break; 6278c94c84cSXiaoyao Li case R_ECX: 6288c94c84cSXiaoyao Li e->ecx |= xfam_dep->feat_mask.mask; 6298c94c84cSXiaoyao Li break; 6308c94c84cSXiaoyao Li case R_EDX: 6318c94c84cSXiaoyao Li e->edx |= xfam_dep->feat_mask.mask; 6328c94c84cSXiaoyao Li break; 6338c94c84cSXiaoyao Li } 6348c94c84cSXiaoyao Li } 6358c94c84cSXiaoyao Li 6368c94c84cSXiaoyao Li e = find_in_supported_entry(0xd, 0); 6378c94c84cSXiaoyao Li e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); 6388c94c84cSXiaoyao Li e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; 6398c94c84cSXiaoyao Li 6408c94c84cSXiaoyao Li e = find_in_supported_entry(0xd, 1); 6419f5771c5SXiaoyao Li /* 6429f5771c5SXiaoyao Li * Mark XFD always support for TDX, it will be cleared finally in 6439f5771c5SXiaoyao Li * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware 6449f5771c5SXiaoyao Li * because in this case the original data has it as 0. 6459f5771c5SXiaoyao Li */ 6469f5771c5SXiaoyao Li e->eax |= CPUID_XSAVE_XFD; 6478c94c84cSXiaoyao Li e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); 6488c94c84cSXiaoyao Li e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; 6498c94c84cSXiaoyao Li } 6508c94c84cSXiaoyao Li 6514d6e288aSXiaoyao Li static void tdx_add_supported_kvm_features(void) 6524d6e288aSXiaoyao Li { 6534d6e288aSXiaoyao Li struct kvm_cpuid_entry2 *e; 6544d6e288aSXiaoyao Li 6554d6e288aSXiaoyao Li e = find_in_supported_entry(0x40000001, 0); 6564d6e288aSXiaoyao Li e->eax = TDX_SUPPORTED_KVM_FEATURES; 6574d6e288aSXiaoyao Li } 6584d6e288aSXiaoyao Li 65975ec6189SXiaoyao Li static void tdx_setup_supported_cpuid(void) 66075ec6189SXiaoyao Li { 66175ec6189SXiaoyao Li if (tdx_supported_cpuid) { 66275ec6189SXiaoyao Li return; 66375ec6189SXiaoyao Li } 66475ec6189SXiaoyao Li 66575ec6189SXiaoyao Li tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + 66675ec6189SXiaoyao Li KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); 66775ec6189SXiaoyao Li 66875ec6189SXiaoyao Li memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, 66975ec6189SXiaoyao Li tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); 67075ec6189SXiaoyao Li tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; 6710ba06e46SXiaoyao Li 6720ba06e46SXiaoyao Li tdx_add_supported_cpuid_by_fixed1_bits(); 67331df29c5SXiaoyao Li tdx_add_supported_cpuid_by_attrs(); 6748c94c84cSXiaoyao Li tdx_add_supported_cpuid_by_xfam(); 6754d6e288aSXiaoyao Li 6764d6e288aSXiaoyao Li tdx_add_supported_kvm_features(); 67775ec6189SXiaoyao Li } 67875ec6189SXiaoyao Li 6798eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) 6808eddedc3SXiaoyao Li { 681810d4e83SXiaoyao Li MachineState *ms = MACHINE(qdev_get_machine()); 682810d4e83SXiaoyao Li X86MachineState *x86ms = X86_MACHINE(ms); 6831619d0e4SXiaoyao Li TdxGuest *tdx = TDX_GUEST(cgs); 6848eddedc3SXiaoyao Li int r = 0; 6858eddedc3SXiaoyao Li 6868eddedc3SXiaoyao Li kvm_mark_guest_state_protected(); 6878eddedc3SXiaoyao Li 688810d4e83SXiaoyao Li if (x86ms->smm == ON_OFF_AUTO_AUTO) { 689810d4e83SXiaoyao Li x86ms->smm = ON_OFF_AUTO_OFF; 690810d4e83SXiaoyao Li } else if (x86ms->smm == ON_OFF_AUTO_ON) { 691810d4e83SXiaoyao Li error_setg(errp, "TDX VM doesn't support SMM"); 692810d4e83SXiaoyao Li return -EINVAL; 693810d4e83SXiaoyao Li } 694810d4e83SXiaoyao Li 695e7ef6089SXiaoyao Li if (x86ms->pic == ON_OFF_AUTO_AUTO) { 696e7ef6089SXiaoyao Li x86ms->pic = ON_OFF_AUTO_OFF; 697e7ef6089SXiaoyao Li } else if (x86ms->pic == ON_OFF_AUTO_ON) { 698e7ef6089SXiaoyao Li error_setg(errp, "TDX VM doesn't support PIC"); 699e7ef6089SXiaoyao Li return -EINVAL; 700e7ef6089SXiaoyao Li } 701e7ef6089SXiaoyao Li 702bb45580dSXiaoyao Li if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { 703bb45580dSXiaoyao Li kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; 704bb45580dSXiaoyao Li } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { 705bb45580dSXiaoyao Li error_setg(errp, "TDX VM requires kernel_irqchip to be split"); 706bb45580dSXiaoyao Li return -EINVAL; 707bb45580dSXiaoyao Li } 708bb45580dSXiaoyao Li 7098eddedc3SXiaoyao Li if (!tdx_caps) { 7108eddedc3SXiaoyao Li r = get_tdx_capabilities(errp); 7111619d0e4SXiaoyao Li if (r) { 7121619d0e4SXiaoyao Li return r; 7131619d0e4SXiaoyao Li } 7148eddedc3SXiaoyao Li } 7158eddedc3SXiaoyao Li 71675ec6189SXiaoyao Li tdx_setup_supported_cpuid(); 71775ec6189SXiaoyao Li 7181ff5048dSXiaoyao Li /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ 7191ff5048dSXiaoyao Li if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { 7201ff5048dSXiaoyao Li return -EOPNOTSUPP; 7211ff5048dSXiaoyao Li } 7221ff5048dSXiaoyao Li 723da672865SXiaoyao Li /* 724da672865SXiaoyao Li * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly 725da672865SXiaoyao Li * memory for shared memory but not for private memory. Besides, whether a 726da672865SXiaoyao Li * memslot is private or shared is not determined by QEMU. 727da672865SXiaoyao Li * 728da672865SXiaoyao Li * Thus, just mark readonly memory not supported for simplicity. 729da672865SXiaoyao Li */ 730da672865SXiaoyao Li kvm_readonly_mem_allowed = false; 731da672865SXiaoyao Li 7324420ba0eSXiaoyao Li qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); 7334420ba0eSXiaoyao Li 7341619d0e4SXiaoyao Li tdx_guest = tdx; 7351619d0e4SXiaoyao Li return 0; 7368eddedc3SXiaoyao Li } 7378eddedc3SXiaoyao Li 738b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg) 739b455880eSXiaoyao Li { 740b455880eSXiaoyao Li /* Do the object check */ 741b455880eSXiaoyao Li TDX_GUEST(cg); 742b455880eSXiaoyao Li 743b455880eSXiaoyao Li return KVM_X86_TDX_VM; 744b455880eSXiaoyao Li } 745b455880eSXiaoyao Li 7467c615242SXiaoyao Li static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) 7477c615242SXiaoyao Li { 748750560f8SXiaoyao Li X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); 7499002494fSXiaoyao Li X86CPU *x86cpu = X86_CPU(cpu); 7509002494fSXiaoyao Li 751750560f8SXiaoyao Li if (xcc->model) { 752750560f8SXiaoyao Li error_report("Named cpu model is not supported for TDX yet!"); 753750560f8SXiaoyao Li exit(1); 754750560f8SXiaoyao Li } 755750560f8SXiaoyao Li 7567c615242SXiaoyao Li object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); 7579002494fSXiaoyao Li 758ea4867b9SXiaoyao Li /* invtsc is fixed1 for TD guest */ 759ea4867b9SXiaoyao Li object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); 760ea4867b9SXiaoyao Li 76190d2bbd1SXiaoyao Li x86cpu->force_cpuid_0x1f = true; 7627c615242SXiaoyao Li } 7637c615242SXiaoyao Li 76475ec6189SXiaoyao Li static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, 76575ec6189SXiaoyao Li uint32_t feature, uint32_t index, 76675ec6189SXiaoyao Li int reg, uint32_t value) 76775ec6189SXiaoyao Li { 76875ec6189SXiaoyao Li struct kvm_cpuid_entry2 *e; 76975ec6189SXiaoyao Li 7700ba06e46SXiaoyao Li e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); 7710ba06e46SXiaoyao Li if (e) { 7720ba06e46SXiaoyao Li value |= cpuid_entry_get_reg(e, reg); 7730ba06e46SXiaoyao Li } 7740ba06e46SXiaoyao Li 77575ec6189SXiaoyao Li if (is_feature_word_cpuid(feature, index, reg)) { 77675ec6189SXiaoyao Li e = cpuid_find_entry(tdx_supported_cpuid, feature, index); 77775ec6189SXiaoyao Li if (e) { 77875ec6189SXiaoyao Li value &= cpuid_entry_get_reg(e, reg); 77975ec6189SXiaoyao Li } 78075ec6189SXiaoyao Li } 78175ec6189SXiaoyao Li 78275ec6189SXiaoyao Li return value; 78375ec6189SXiaoyao Li } 78475ec6189SXiaoyao Li 785e3d1a4a6SXiaoyao Li static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) 786e3d1a4a6SXiaoyao Li { 787e3d1a4a6SXiaoyao Li struct kvm_cpuid2 *fetch_cpuid; 788e3d1a4a6SXiaoyao Li int size = KVM_MAX_CPUID_ENTRIES; 789e3d1a4a6SXiaoyao Li Error *local_err = NULL; 790e3d1a4a6SXiaoyao Li int r; 791e3d1a4a6SXiaoyao Li 792e3d1a4a6SXiaoyao Li do { 793e3d1a4a6SXiaoyao Li error_free(local_err); 794e3d1a4a6SXiaoyao Li local_err = NULL; 795e3d1a4a6SXiaoyao Li 796e3d1a4a6SXiaoyao Li fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + 797e3d1a4a6SXiaoyao Li sizeof(struct kvm_cpuid_entry2) * size); 798e3d1a4a6SXiaoyao Li fetch_cpuid->nent = size; 799e3d1a4a6SXiaoyao Li r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); 800e3d1a4a6SXiaoyao Li if (r == -E2BIG) { 801e3d1a4a6SXiaoyao Li g_free(fetch_cpuid); 802e3d1a4a6SXiaoyao Li size = fetch_cpuid->nent; 803e3d1a4a6SXiaoyao Li } 804e3d1a4a6SXiaoyao Li } while (r == -E2BIG); 805e3d1a4a6SXiaoyao Li 806e3d1a4a6SXiaoyao Li if (r < 0) { 807e3d1a4a6SXiaoyao Li error_report_err(local_err); 808e3d1a4a6SXiaoyao Li *ret = r; 809e3d1a4a6SXiaoyao Li return NULL; 810e3d1a4a6SXiaoyao Li } 811e3d1a4a6SXiaoyao Li 812e3d1a4a6SXiaoyao Li return fetch_cpuid; 813e3d1a4a6SXiaoyao Li } 814e3d1a4a6SXiaoyao Li 815e3d1a4a6SXiaoyao Li static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) 816e3d1a4a6SXiaoyao Li { 817e3d1a4a6SXiaoyao Li uint64_t actual, requested, unavailable, forced_on; 818e3d1a4a6SXiaoyao Li g_autofree struct kvm_cpuid2 *fetch_cpuid; 819e3d1a4a6SXiaoyao Li const char *forced_on_prefix = NULL; 820e3d1a4a6SXiaoyao Li const char *unav_prefix = NULL; 821e3d1a4a6SXiaoyao Li struct kvm_cpuid_entry2 *entry; 822e3d1a4a6SXiaoyao Li X86CPU *cpu = X86_CPU(cs); 823e3d1a4a6SXiaoyao Li CPUX86State *env = &cpu->env; 824e3d1a4a6SXiaoyao Li FeatureWordInfo *wi; 825e3d1a4a6SXiaoyao Li FeatureWord w; 826e3d1a4a6SXiaoyao Li bool mismatch = false; 827e3d1a4a6SXiaoyao Li int r; 828e3d1a4a6SXiaoyao Li 829e3d1a4a6SXiaoyao Li fetch_cpuid = tdx_fetch_cpuid(cs, &r); 830e3d1a4a6SXiaoyao Li if (!fetch_cpuid) { 831e3d1a4a6SXiaoyao Li return r; 832e3d1a4a6SXiaoyao Li } 833e3d1a4a6SXiaoyao Li 834e3d1a4a6SXiaoyao Li if (cpu->check_cpuid || cpu->enforce_cpuid) { 835e3d1a4a6SXiaoyao Li unav_prefix = "TDX doesn't support requested feature"; 836e3d1a4a6SXiaoyao Li forced_on_prefix = "TDX forcibly sets the feature"; 837e3d1a4a6SXiaoyao Li } 838e3d1a4a6SXiaoyao Li 839e3d1a4a6SXiaoyao Li for (w = 0; w < FEATURE_WORDS; w++) { 840e3d1a4a6SXiaoyao Li wi = &feature_word_info[w]; 841e3d1a4a6SXiaoyao Li actual = 0; 842e3d1a4a6SXiaoyao Li 843e3d1a4a6SXiaoyao Li switch (wi->type) { 844e3d1a4a6SXiaoyao Li case CPUID_FEATURE_WORD: 845e3d1a4a6SXiaoyao Li entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); 846e3d1a4a6SXiaoyao Li if (!entry) { 847e3d1a4a6SXiaoyao Li /* 848e3d1a4a6SXiaoyao Li * If KVM doesn't report it means it's totally configurable 849e3d1a4a6SXiaoyao Li * by QEMU 850e3d1a4a6SXiaoyao Li */ 851e3d1a4a6SXiaoyao Li continue; 852e3d1a4a6SXiaoyao Li } 853e3d1a4a6SXiaoyao Li 854e3d1a4a6SXiaoyao Li actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); 855e3d1a4a6SXiaoyao Li break; 856e3d1a4a6SXiaoyao Li case MSR_FEATURE_WORD: 857e3d1a4a6SXiaoyao Li /* 858e3d1a4a6SXiaoyao Li * TODO: 859e3d1a4a6SXiaoyao Li * validate MSR features when KVM has interface report them. 860e3d1a4a6SXiaoyao Li */ 861e3d1a4a6SXiaoyao Li continue; 862e3d1a4a6SXiaoyao Li } 863e3d1a4a6SXiaoyao Li 864deb9db6fSXiaoyao Li /* Fixup for special cases */ 865deb9db6fSXiaoyao Li switch (w) { 866deb9db6fSXiaoyao Li case FEAT_8000_0001_EDX: 867deb9db6fSXiaoyao Li /* 868deb9db6fSXiaoyao Li * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit 869deb9db6fSXiaoyao Li * mode and before vcpu running it's not in 64-bit mode. 870deb9db6fSXiaoyao Li */ 871deb9db6fSXiaoyao Li actual |= CPUID_EXT2_SYSCALL; 872deb9db6fSXiaoyao Li break; 873deb9db6fSXiaoyao Li default: 874deb9db6fSXiaoyao Li break; 875deb9db6fSXiaoyao Li } 876deb9db6fSXiaoyao Li 877e3d1a4a6SXiaoyao Li requested = env->features[w]; 878e3d1a4a6SXiaoyao Li unavailable = requested & ~actual; 879e3d1a4a6SXiaoyao Li mark_unavailable_features(cpu, w, unavailable, unav_prefix); 880e3d1a4a6SXiaoyao Li if (unavailable) { 881e3d1a4a6SXiaoyao Li mismatch = true; 882e3d1a4a6SXiaoyao Li } 883e3d1a4a6SXiaoyao Li 884e3d1a4a6SXiaoyao Li forced_on = actual & ~requested; 885e3d1a4a6SXiaoyao Li mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); 886e3d1a4a6SXiaoyao Li if (forced_on) { 887e3d1a4a6SXiaoyao Li mismatch = true; 888e3d1a4a6SXiaoyao Li } 889e3d1a4a6SXiaoyao Li } 890e3d1a4a6SXiaoyao Li 891e3d1a4a6SXiaoyao Li if (cpu->enforce_cpuid && mismatch) { 892e3d1a4a6SXiaoyao Li return -EINVAL; 893e3d1a4a6SXiaoyao Li } 894e3d1a4a6SXiaoyao Li 895907ee7b6SXiaoyao Li if (cpu->phys_bits != host_cpu_phys_bits()) { 896907ee7b6SXiaoyao Li error_report("TDX requires guest CPU physical bits (%u) " 897907ee7b6SXiaoyao Li "to match host CPU physical bits (%u)", 898907ee7b6SXiaoyao Li cpu->phys_bits, host_cpu_phys_bits()); 899907ee7b6SXiaoyao Li return -EINVAL; 900907ee7b6SXiaoyao Li } 901907ee7b6SXiaoyao Li 902e3d1a4a6SXiaoyao Li return 0; 903e3d1a4a6SXiaoyao Li } 904e3d1a4a6SXiaoyao Li 90553b6f406SXiaoyao Li static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) 90653b6f406SXiaoyao Li { 90753b6f406SXiaoyao Li if ((tdx->attributes & ~tdx_caps->supported_attrs)) { 908e7f926ebSCédric Le Goater error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " 909e7f926ebSCédric Le Goater "(KVM supported: 0x%"PRIx64")", tdx->attributes, 910e7f926ebSCédric Le Goater (uint64_t)tdx_caps->supported_attrs); 91153b6f406SXiaoyao Li return -1; 91253b6f406SXiaoyao Li } 91353b6f406SXiaoyao Li 91453b6f406SXiaoyao Li if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { 91553b6f406SXiaoyao Li error_setg(errp, "Some QEMU unsupported TD attribute bits being " 916e7f926ebSCédric Le Goater "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", 917e7f926ebSCédric Le Goater tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); 91853b6f406SXiaoyao Li return -1; 91953b6f406SXiaoyao Li } 92053b6f406SXiaoyao Li 92153b6f406SXiaoyao Li return 0; 92253b6f406SXiaoyao Li } 92353b6f406SXiaoyao Li 92453b6f406SXiaoyao Li static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) 925bb3be394SXiaoyao Li { 926bb3be394SXiaoyao Li CPUX86State *env = &x86cpu->env; 927bb3be394SXiaoyao Li 928bb3be394SXiaoyao Li tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? 929bb3be394SXiaoyao Li TDX_TD_ATTRIBUTES_PKS : 0; 930bb3be394SXiaoyao Li tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; 93153b6f406SXiaoyao Li 93253b6f406SXiaoyao Li return tdx_validate_attributes(tdx_guest, errp); 933bb3be394SXiaoyao Li } 934bb3be394SXiaoyao Li 935f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp) 936f15898b0SXiaoyao Li { 937f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env; 938f15898b0SXiaoyao Li uint64_t xfam; 939f15898b0SXiaoyao Li 940f15898b0SXiaoyao Li xfam = env->features[FEAT_XSAVE_XCR0_LO] | 941f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XCR0_HI] | 942f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_LO] | 943f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_HI]; 944f15898b0SXiaoyao Li 945f15898b0SXiaoyao Li if (xfam & ~tdx_caps->supported_xfam) { 946e7f926ebSCédric Le Goater error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", 947e7f926ebSCédric Le Goater xfam, (uint64_t)tdx_caps->supported_xfam); 948f15898b0SXiaoyao Li return -1; 949f15898b0SXiaoyao Li } 950f15898b0SXiaoyao Li 951f15898b0SXiaoyao Li tdx_guest->xfam = xfam; 952f15898b0SXiaoyao Li return 0; 953f15898b0SXiaoyao Li } 954f15898b0SXiaoyao Li 955f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) 956f15898b0SXiaoyao Li { 957f15898b0SXiaoyao Li int i, dest_cnt = 0; 958f15898b0SXiaoyao Li struct kvm_cpuid_entry2 *src, *dest, *conf; 959f15898b0SXiaoyao Li 960f15898b0SXiaoyao Li for (i = 0; i < cpuids->nent; i++) { 961f15898b0SXiaoyao Li src = cpuids->entries + i; 962f15898b0SXiaoyao Li conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); 963f15898b0SXiaoyao Li if (!conf) { 964f15898b0SXiaoyao Li continue; 965f15898b0SXiaoyao Li } 966f15898b0SXiaoyao Li dest = cpuids->entries + dest_cnt; 967f15898b0SXiaoyao Li 968f15898b0SXiaoyao Li dest->function = src->function; 969f15898b0SXiaoyao Li dest->index = src->index; 970f15898b0SXiaoyao Li dest->flags = src->flags; 971f15898b0SXiaoyao Li dest->eax = src->eax & conf->eax; 972f15898b0SXiaoyao Li dest->ebx = src->ebx & conf->ebx; 973f15898b0SXiaoyao Li dest->ecx = src->ecx & conf->ecx; 974f15898b0SXiaoyao Li dest->edx = src->edx & conf->edx; 975f15898b0SXiaoyao Li 976f15898b0SXiaoyao Li dest_cnt++; 977f15898b0SXiaoyao Li } 978f15898b0SXiaoyao Li cpuids->nent = dest_cnt++; 979f15898b0SXiaoyao Li } 980f15898b0SXiaoyao Li 981f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) 982f15898b0SXiaoyao Li { 983f15898b0SXiaoyao Li X86CPU *x86cpu = X86_CPU(cpu); 984f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env; 985f15898b0SXiaoyao Li g_autofree struct kvm_tdx_init_vm *init_vm = NULL; 986f15898b0SXiaoyao Li Error *local_err = NULL; 987d05a0858SIsaku Yamahata size_t data_len; 988f15898b0SXiaoyao Li int retry = 10000; 989f15898b0SXiaoyao Li int r = 0; 990f15898b0SXiaoyao Li 991f15898b0SXiaoyao Li QEMU_LOCK_GUARD(&tdx_guest->lock); 992f15898b0SXiaoyao Li if (tdx_guest->initialized) { 993f15898b0SXiaoyao Li return r; 994f15898b0SXiaoyao Li } 995f15898b0SXiaoyao Li 996f15898b0SXiaoyao Li init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + 997f15898b0SXiaoyao Li sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); 998f15898b0SXiaoyao Li 999d529a2acSXiaoyao Li if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { 1000d529a2acSXiaoyao Li error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); 1001d529a2acSXiaoyao Li return -EOPNOTSUPP; 1002d529a2acSXiaoyao Li } 1003d529a2acSXiaoyao Li 1004d529a2acSXiaoyao Li r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, 1005d529a2acSXiaoyao Li 0, TDX_APIC_BUS_CYCLES_NS); 1006d529a2acSXiaoyao Li if (r < 0) { 1007d529a2acSXiaoyao Li error_setg_errno(errp, -r, 1008d529a2acSXiaoyao Li "Unable to set core crystal clock frequency to 25MHz"); 1009d529a2acSXiaoyao Li return r; 1010d529a2acSXiaoyao Li } 1011d529a2acSXiaoyao Li 10120e73b843SXiaoyao Li if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || 10130e73b843SXiaoyao Li env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { 1014e7f926ebSCédric Le Goater error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " 10150e73b843SXiaoyao Li "between [%d, %d] kHz", env->tsc_khz, 10160e73b843SXiaoyao Li TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); 10170e73b843SXiaoyao Li return -EINVAL; 10180e73b843SXiaoyao Li } 10190e73b843SXiaoyao Li 10200e73b843SXiaoyao Li if (env->tsc_khz % (25 * 1000)) { 1021e7f926ebSCédric Le Goater error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", 10220e73b843SXiaoyao Li env->tsc_khz); 10230e73b843SXiaoyao Li return -EINVAL; 10240e73b843SXiaoyao Li } 10250e73b843SXiaoyao Li 10260e73b843SXiaoyao Li /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ 10270e73b843SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); 10280e73b843SXiaoyao Li if (r < 0) { 1029e7f926ebSCédric Le Goater error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", 10300e73b843SXiaoyao Li env->tsc_khz); 10310e73b843SXiaoyao Li return r; 10320e73b843SXiaoyao Li } 10330e73b843SXiaoyao Li 1034d05a0858SIsaku Yamahata if (tdx_guest->mrconfigid) { 1035d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, 1036d05a0858SIsaku Yamahata strlen(tdx_guest->mrconfigid), &data_len, errp); 1037d05a0858SIsaku Yamahata if (!data) { 1038d05a0858SIsaku Yamahata return -1; 1039d05a0858SIsaku Yamahata } 1040d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 104141cd354dSXiaoyao Li error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " 104241cd354dSXiaoyao Li "expected %d bytes", data_len, 104341cd354dSXiaoyao Li QCRYPTO_HASH_DIGEST_LEN_SHA384); 1044d05a0858SIsaku Yamahata return -1; 1045d05a0858SIsaku Yamahata } 1046d05a0858SIsaku Yamahata memcpy(init_vm->mrconfigid, data, data_len); 1047d05a0858SIsaku Yamahata } 1048d05a0858SIsaku Yamahata 1049d05a0858SIsaku Yamahata if (tdx_guest->mrowner) { 1050d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, 1051d05a0858SIsaku Yamahata strlen(tdx_guest->mrowner), &data_len, errp); 1052d05a0858SIsaku Yamahata if (!data) { 1053d05a0858SIsaku Yamahata return -1; 1054d05a0858SIsaku Yamahata } 1055d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 105641cd354dSXiaoyao Li error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " 105741cd354dSXiaoyao Li "expected %d bytes", data_len, 105841cd354dSXiaoyao Li QCRYPTO_HASH_DIGEST_LEN_SHA384); 1059d05a0858SIsaku Yamahata return -1; 1060d05a0858SIsaku Yamahata } 1061d05a0858SIsaku Yamahata memcpy(init_vm->mrowner, data, data_len); 1062d05a0858SIsaku Yamahata } 1063d05a0858SIsaku Yamahata 1064d05a0858SIsaku Yamahata if (tdx_guest->mrownerconfig) { 1065d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, 1066d05a0858SIsaku Yamahata strlen(tdx_guest->mrownerconfig), &data_len, errp); 1067d05a0858SIsaku Yamahata if (!data) { 1068d05a0858SIsaku Yamahata return -1; 1069d05a0858SIsaku Yamahata } 1070d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 107141cd354dSXiaoyao Li error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " 107241cd354dSXiaoyao Li "expected %d bytes", data_len, 107341cd354dSXiaoyao Li QCRYPTO_HASH_DIGEST_LEN_SHA384); 1074d05a0858SIsaku Yamahata return -1; 1075d05a0858SIsaku Yamahata } 1076d05a0858SIsaku Yamahata memcpy(init_vm->mrownerconfig, data, data_len); 1077d05a0858SIsaku Yamahata } 1078d05a0858SIsaku Yamahata 107953b6f406SXiaoyao Li r = setup_td_guest_attributes(x86cpu, errp); 108053b6f406SXiaoyao Li if (r) { 108153b6f406SXiaoyao Li return r; 108253b6f406SXiaoyao Li } 1083bb3be394SXiaoyao Li 1084f15898b0SXiaoyao Li r = setup_td_xfam(x86cpu, errp); 1085f15898b0SXiaoyao Li if (r) { 1086f15898b0SXiaoyao Li return r; 1087f15898b0SXiaoyao Li } 1088f15898b0SXiaoyao Li 1089f15898b0SXiaoyao Li init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); 1090f15898b0SXiaoyao Li tdx_filter_cpuid(&init_vm->cpuid); 1091f15898b0SXiaoyao Li 1092f15898b0SXiaoyao Li init_vm->attributes = tdx_guest->attributes; 1093f15898b0SXiaoyao Li init_vm->xfam = tdx_guest->xfam; 1094f15898b0SXiaoyao Li 1095f15898b0SXiaoyao Li /* 1096f15898b0SXiaoyao Li * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) 1097f15898b0SXiaoyao Li * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or 1098f15898b0SXiaoyao Li * RDSEED) is busy. 1099f15898b0SXiaoyao Li * 1100f15898b0SXiaoyao Li * Retry for the case. 1101f15898b0SXiaoyao Li */ 1102f15898b0SXiaoyao Li do { 1103f15898b0SXiaoyao Li error_free(local_err); 1104f15898b0SXiaoyao Li local_err = NULL; 1105f15898b0SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); 1106f15898b0SXiaoyao Li } while (r == -EAGAIN && --retry); 1107f15898b0SXiaoyao Li 1108f15898b0SXiaoyao Li if (r < 0) { 1109f15898b0SXiaoyao Li if (!retry) { 1110f15898b0SXiaoyao Li error_append_hint(&local_err, "Hardware RNG (Random Number " 1111f15898b0SXiaoyao Li "Generator) is busy occupied by someone (via RDRAND/RDSEED) " 1112f15898b0SXiaoyao Li "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " 1113f15898b0SXiaoyao Li "due to lack of entropy.\n"); 1114f15898b0SXiaoyao Li } 1115f15898b0SXiaoyao Li error_propagate(errp, local_err); 1116f15898b0SXiaoyao Li return r; 1117f15898b0SXiaoyao Li } 1118f15898b0SXiaoyao Li 1119f15898b0SXiaoyao Li tdx_guest->initialized = true; 1120f15898b0SXiaoyao Li 1121f15898b0SXiaoyao Li return 0; 1122f15898b0SXiaoyao Li } 1123f15898b0SXiaoyao Li 1124cb5d65a8SXiaoyao Li int tdx_parse_tdvf(void *flash_ptr, int size) 1125cb5d65a8SXiaoyao Li { 1126cb5d65a8SXiaoyao Li return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); 1127cb5d65a8SXiaoyao Li } 1128cb5d65a8SXiaoyao Li 1129*efa742b2SXiaoyao Li static void tdx_inject_interrupt(uint32_t apicid, uint32_t vector) 1130*efa742b2SXiaoyao Li { 1131*efa742b2SXiaoyao Li int ret; 1132*efa742b2SXiaoyao Li 1133*efa742b2SXiaoyao Li if (vector < 32 || vector > 255) { 1134*efa742b2SXiaoyao Li return; 1135*efa742b2SXiaoyao Li } 1136*efa742b2SXiaoyao Li 1137*efa742b2SXiaoyao Li MSIMessage msg = { 1138*efa742b2SXiaoyao Li .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) | 1139*efa742b2SXiaoyao Li (((uint64_t)apicid & 0xffffff00) << 32), 1140*efa742b2SXiaoyao Li .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT), 1141*efa742b2SXiaoyao Li }; 1142*efa742b2SXiaoyao Li 1143*efa742b2SXiaoyao Li ret = kvm_irqchip_send_msi(kvm_state, msg); 1144*efa742b2SXiaoyao Li if (ret < 0) { 1145*efa742b2SXiaoyao Li /* In this case, no better way to tell it to guest. Log it. */ 1146*efa742b2SXiaoyao Li error_report("TDX: injection interrupt %d failed, interrupt lost (%s).", 1147*efa742b2SXiaoyao Li vector, strerror(-ret)); 1148*efa742b2SXiaoyao Li } 1149*efa742b2SXiaoyao Li } 1150*efa742b2SXiaoyao Li 115140da501dSIsaku Yamahata static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) 115240da501dSIsaku Yamahata { 115340da501dSIsaku Yamahata TdxGuest *tdx = task->opaque; 115440da501dSIsaku Yamahata int ret; 115540da501dSIsaku Yamahata 115640da501dSIsaku Yamahata /* Maintain the number of in-flight requests. */ 115740da501dSIsaku Yamahata qemu_mutex_lock(&tdx->lock); 115840da501dSIsaku Yamahata tdx->num--; 115940da501dSIsaku Yamahata qemu_mutex_unlock(&tdx->lock); 116040da501dSIsaku Yamahata 116140da501dSIsaku Yamahata if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { 116240da501dSIsaku Yamahata ret = address_space_write(&address_space_memory, task->payload_gpa, 116340da501dSIsaku Yamahata MEMTXATTRS_UNSPECIFIED, task->receive_buf, 116440da501dSIsaku Yamahata task->receive_buf_received); 116540da501dSIsaku Yamahata if (ret != MEMTX_OK) { 116640da501dSIsaku Yamahata error_report("TDX: get-quote: failed to write quote data."); 116740da501dSIsaku Yamahata } else { 116840da501dSIsaku Yamahata task->hdr.out_len = cpu_to_le64(task->receive_buf_received); 116940da501dSIsaku Yamahata } 117040da501dSIsaku Yamahata } 117140da501dSIsaku Yamahata task->hdr.error_code = cpu_to_le64(task->status_code); 117240da501dSIsaku Yamahata 117340da501dSIsaku Yamahata /* Publish the response contents before marking this request completed. */ 117440da501dSIsaku Yamahata smp_wmb(); 117540da501dSIsaku Yamahata ret = address_space_write(&address_space_memory, task->buf_gpa, 117640da501dSIsaku Yamahata MEMTXATTRS_UNSPECIFIED, &task->hdr, 117740da501dSIsaku Yamahata TDX_GET_QUOTE_HDR_SIZE); 117840da501dSIsaku Yamahata if (ret != MEMTX_OK) { 117940da501dSIsaku Yamahata error_report("TDX: get-quote: failed to update GetQuote header."); 118040da501dSIsaku Yamahata } 118140da501dSIsaku Yamahata 1182*efa742b2SXiaoyao Li tdx_inject_interrupt(tdx_guest->event_notify_apicid, 1183*efa742b2SXiaoyao Li tdx_guest->event_notify_vector); 1184*efa742b2SXiaoyao Li 118540da501dSIsaku Yamahata g_free(task->send_data); 118640da501dSIsaku Yamahata g_free(task->receive_buf); 118740da501dSIsaku Yamahata g_free(task); 118840da501dSIsaku Yamahata object_unref(tdx); 118940da501dSIsaku Yamahata } 119040da501dSIsaku Yamahata 119140da501dSIsaku Yamahata void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) 119240da501dSIsaku Yamahata { 119340da501dSIsaku Yamahata TdxGenerateQuoteTask *task; 119440da501dSIsaku Yamahata struct tdx_get_quote_header hdr; 119540da501dSIsaku Yamahata hwaddr buf_gpa = run->tdx.get_quote.gpa; 119640da501dSIsaku Yamahata uint64_t buf_len = run->tdx.get_quote.size; 119740da501dSIsaku Yamahata 119840da501dSIsaku Yamahata QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); 119940da501dSIsaku Yamahata 120040da501dSIsaku Yamahata run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; 120140da501dSIsaku Yamahata 120240da501dSIsaku Yamahata if (buf_len == 0) { 120340da501dSIsaku Yamahata return; 120440da501dSIsaku Yamahata } 120540da501dSIsaku Yamahata 120640da501dSIsaku Yamahata if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { 120740da501dSIsaku Yamahata run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; 120840da501dSIsaku Yamahata return; 120940da501dSIsaku Yamahata } 121040da501dSIsaku Yamahata 121140da501dSIsaku Yamahata if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, 121240da501dSIsaku Yamahata &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { 121340da501dSIsaku Yamahata error_report("TDX: get-quote: failed to read GetQuote header."); 121440da501dSIsaku Yamahata return; 121540da501dSIsaku Yamahata } 121640da501dSIsaku Yamahata 121740da501dSIsaku Yamahata if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { 121840da501dSIsaku Yamahata return; 121940da501dSIsaku Yamahata } 122040da501dSIsaku Yamahata 122140da501dSIsaku Yamahata /* Only safe-guard check to avoid too large buffer size. */ 122240da501dSIsaku Yamahata if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || 122340da501dSIsaku Yamahata le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { 122440da501dSIsaku Yamahata return; 122540da501dSIsaku Yamahata } 122640da501dSIsaku Yamahata 122740da501dSIsaku Yamahata if (!tdx_guest->qg_sock_addr) { 122840da501dSIsaku Yamahata hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); 122940da501dSIsaku Yamahata if (address_space_write(&address_space_memory, buf_gpa, 123040da501dSIsaku Yamahata MEMTXATTRS_UNSPECIFIED, 123140da501dSIsaku Yamahata &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { 123240da501dSIsaku Yamahata error_report("TDX: failed to update GetQuote header."); 123340da501dSIsaku Yamahata return; 123440da501dSIsaku Yamahata } 123540da501dSIsaku Yamahata run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; 123640da501dSIsaku Yamahata return; 123740da501dSIsaku Yamahata } 123840da501dSIsaku Yamahata 123940da501dSIsaku Yamahata qemu_mutex_lock(&tdx_guest->lock); 124040da501dSIsaku Yamahata if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { 124140da501dSIsaku Yamahata qemu_mutex_unlock(&tdx_guest->lock); 124240da501dSIsaku Yamahata run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; 124340da501dSIsaku Yamahata return; 124440da501dSIsaku Yamahata } 124540da501dSIsaku Yamahata tdx_guest->num++; 124640da501dSIsaku Yamahata qemu_mutex_unlock(&tdx_guest->lock); 124740da501dSIsaku Yamahata 124840da501dSIsaku Yamahata task = g_new(TdxGenerateQuoteTask, 1); 124940da501dSIsaku Yamahata task->buf_gpa = buf_gpa; 125040da501dSIsaku Yamahata task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; 125140da501dSIsaku Yamahata task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; 125240da501dSIsaku Yamahata task->hdr = hdr; 125340da501dSIsaku Yamahata task->completion = tdx_get_quote_completion; 125440da501dSIsaku Yamahata 125540da501dSIsaku Yamahata task->send_data_size = le32_to_cpu(hdr.in_len); 125640da501dSIsaku Yamahata task->send_data = g_malloc(task->send_data_size); 125740da501dSIsaku Yamahata task->send_data_sent = 0; 125840da501dSIsaku Yamahata 125940da501dSIsaku Yamahata if (address_space_read(&address_space_memory, task->payload_gpa, 126040da501dSIsaku Yamahata MEMTXATTRS_UNSPECIFIED, task->send_data, 126140da501dSIsaku Yamahata task->send_data_size) != MEMTX_OK) { 126240da501dSIsaku Yamahata goto out_free; 126340da501dSIsaku Yamahata } 126440da501dSIsaku Yamahata 126540da501dSIsaku Yamahata /* Mark the buffer in-flight. */ 126640da501dSIsaku Yamahata hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); 126740da501dSIsaku Yamahata if (address_space_write(&address_space_memory, buf_gpa, 126840da501dSIsaku Yamahata MEMTXATTRS_UNSPECIFIED, 126940da501dSIsaku Yamahata &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { 127040da501dSIsaku Yamahata goto out_free; 127140da501dSIsaku Yamahata } 127240da501dSIsaku Yamahata 127340da501dSIsaku Yamahata task->receive_buf = g_malloc0(task->payload_len); 127440da501dSIsaku Yamahata task->receive_buf_received = 0; 127540da501dSIsaku Yamahata task->opaque = tdx_guest; 127640da501dSIsaku Yamahata 127740da501dSIsaku Yamahata object_ref(tdx_guest); 127840da501dSIsaku Yamahata tdx_generate_quote(task, tdx_guest->qg_sock_addr); 127940da501dSIsaku Yamahata run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; 128040da501dSIsaku Yamahata return; 128140da501dSIsaku Yamahata 128240da501dSIsaku Yamahata out_free: 128340da501dSIsaku Yamahata g_free(task->send_data); 128440da501dSIsaku Yamahata g_free(task); 128540da501dSIsaku Yamahata } 128640da501dSIsaku Yamahata 1287*efa742b2SXiaoyao Li #define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT) 128855be385bSXiaoyao Li #define SUPPORTED_TDVMCALLINFO_1_R12 (0) 128955be385bSXiaoyao Li 1290427b8cf4SBinbin Wu void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) 1291427b8cf4SBinbin Wu { 1292427b8cf4SBinbin Wu if (run->tdx.get_tdvmcall_info.leaf != 1) { 1293427b8cf4SBinbin Wu return; 1294427b8cf4SBinbin Wu } 1295427b8cf4SBinbin Wu 129655be385bSXiaoyao Li run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 & 129755be385bSXiaoyao Li SUPPORTED_TDVMCALLINFO_1_R11) | 129855be385bSXiaoyao Li tdx_caps->kernel_tdvmcallinfo_1_r11; 129955be385bSXiaoyao Li run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 & 130055be385bSXiaoyao Li SUPPORTED_TDVMCALLINFO_1_R12) | 130155be385bSXiaoyao Li tdx_caps->kernel_tdvmcallinfo_1_r12; 1302427b8cf4SBinbin Wu run->tdx.get_tdvmcall_info.r13 = 0; 1303427b8cf4SBinbin Wu run->tdx.get_tdvmcall_info.r14 = 0; 1304b57999bbSXiaoyao Li 1305b57999bbSXiaoyao Li run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS; 1306427b8cf4SBinbin Wu } 1307427b8cf4SBinbin Wu 1308*efa742b2SXiaoyao Li void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) 1309*efa742b2SXiaoyao Li { 1310*efa742b2SXiaoyao Li uint64_t vector = run->tdx.setup_event_notify.vector; 1311*efa742b2SXiaoyao Li 1312*efa742b2SXiaoyao Li if (vector >= 32 && vector < 256) { 1313*efa742b2SXiaoyao Li qemu_mutex_lock(&tdx_guest->lock); 1314*efa742b2SXiaoyao Li tdx_guest->event_notify_vector = vector; 1315*efa742b2SXiaoyao Li tdx_guest->event_notify_apicid = cpu->apic_id; 1316*efa742b2SXiaoyao Li qemu_mutex_unlock(&tdx_guest->lock); 1317*efa742b2SXiaoyao Li run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS; 1318*efa742b2SXiaoyao Li } else { 1319*efa742b2SXiaoyao Li run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND; 1320*efa742b2SXiaoyao Li } 1321*efa742b2SXiaoyao Li } 1322*efa742b2SXiaoyao Li 13236e250463SXiaoyao Li static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, 13246e250463SXiaoyao Li char *message, uint64_t gpa) 13256e250463SXiaoyao Li { 13266e250463SXiaoyao Li GuestPanicInformation *panic_info; 13276e250463SXiaoyao Li 13286e250463SXiaoyao Li panic_info = g_new0(GuestPanicInformation, 1); 13296e250463SXiaoyao Li panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; 13306e250463SXiaoyao Li panic_info->u.tdx.error_code = (uint32_t) error_code; 13316e250463SXiaoyao Li panic_info->u.tdx.message = message; 13326e250463SXiaoyao Li panic_info->u.tdx.gpa = gpa; 13336e250463SXiaoyao Li 13346e250463SXiaoyao Li qemu_system_guest_panicked(panic_info); 13356e250463SXiaoyao Li } 13366e250463SXiaoyao Li 133798dbfd68SXiaoyao Li /* 133898dbfd68SXiaoyao Li * Only 8 registers can contain valid ASCII byte stream to form the fatal 133998dbfd68SXiaoyao Li * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX 134098dbfd68SXiaoyao Li */ 134198dbfd68SXiaoyao Li #define TDX_FATAL_MESSAGE_MAX 64 134298dbfd68SXiaoyao Li 13436e250463SXiaoyao Li #define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) 13446e250463SXiaoyao Li 134598dbfd68SXiaoyao Li int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) 134698dbfd68SXiaoyao Li { 134798dbfd68SXiaoyao Li uint64_t error_code = run->system_event.data[R_R12]; 134898dbfd68SXiaoyao Li uint64_t reg_mask = run->system_event.data[R_ECX]; 134998dbfd68SXiaoyao Li char *message = NULL; 135098dbfd68SXiaoyao Li uint64_t *tmp; 13516e250463SXiaoyao Li uint64_t gpa = -1ull; 135298dbfd68SXiaoyao Li 135398dbfd68SXiaoyao Li if (error_code & 0xffff) { 1354e7f926ebSCédric Le Goater error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, 135598dbfd68SXiaoyao Li error_code); 135698dbfd68SXiaoyao Li return -1; 135798dbfd68SXiaoyao Li } 135898dbfd68SXiaoyao Li 135998dbfd68SXiaoyao Li if (reg_mask) { 136098dbfd68SXiaoyao Li message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); 136198dbfd68SXiaoyao Li tmp = (uint64_t *)message; 136298dbfd68SXiaoyao Li 136398dbfd68SXiaoyao Li #define COPY_REG(REG) \ 136498dbfd68SXiaoyao Li do { \ 136598dbfd68SXiaoyao Li if (reg_mask & BIT_ULL(REG)) { \ 136698dbfd68SXiaoyao Li *(tmp++) = run->system_event.data[REG]; \ 136798dbfd68SXiaoyao Li } \ 136898dbfd68SXiaoyao Li } while (0) 136998dbfd68SXiaoyao Li 137098dbfd68SXiaoyao Li COPY_REG(R_R14); 137198dbfd68SXiaoyao Li COPY_REG(R_R15); 137298dbfd68SXiaoyao Li COPY_REG(R_EBX); 137398dbfd68SXiaoyao Li COPY_REG(R_EDI); 137498dbfd68SXiaoyao Li COPY_REG(R_ESI); 137598dbfd68SXiaoyao Li COPY_REG(R_R8); 137698dbfd68SXiaoyao Li COPY_REG(R_R9); 137798dbfd68SXiaoyao Li COPY_REG(R_EDX); 137898dbfd68SXiaoyao Li *((char *)tmp) = '\0'; 137998dbfd68SXiaoyao Li } 138098dbfd68SXiaoyao Li #undef COPY_REG 138198dbfd68SXiaoyao Li 13826e250463SXiaoyao Li if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { 13836e250463SXiaoyao Li gpa = run->system_event.data[R_R13]; 13846e250463SXiaoyao Li } 13856e250463SXiaoyao Li 13866e250463SXiaoyao Li tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); 13876e250463SXiaoyao Li 138898dbfd68SXiaoyao Li return -1; 138998dbfd68SXiaoyao Li } 139098dbfd68SXiaoyao Li 13916016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) 13926016e297SXiaoyao Li { 13936016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 13946016e297SXiaoyao Li 13956016e297SXiaoyao Li return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); 13966016e297SXiaoyao Li } 13976016e297SXiaoyao Li 13986016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) 13996016e297SXiaoyao Li { 14006016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 14016016e297SXiaoyao Li 14026016e297SXiaoyao Li if (value) { 14036016e297SXiaoyao Li tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 14046016e297SXiaoyao Li } else { 14056016e297SXiaoyao Li tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 14066016e297SXiaoyao Li } 14076016e297SXiaoyao Li } 14086016e297SXiaoyao Li 1409d05a0858SIsaku Yamahata static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) 1410d05a0858SIsaku Yamahata { 1411d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 1412d05a0858SIsaku Yamahata 1413d05a0858SIsaku Yamahata return g_strdup(tdx->mrconfigid); 1414d05a0858SIsaku Yamahata } 1415d05a0858SIsaku Yamahata 1416d05a0858SIsaku Yamahata static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) 1417d05a0858SIsaku Yamahata { 1418d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 1419d05a0858SIsaku Yamahata 1420d05a0858SIsaku Yamahata g_free(tdx->mrconfigid); 1421d05a0858SIsaku Yamahata tdx->mrconfigid = g_strdup(value); 1422d05a0858SIsaku Yamahata } 1423d05a0858SIsaku Yamahata 1424d05a0858SIsaku Yamahata static char *tdx_guest_get_mrowner(Object *obj, Error **errp) 1425d05a0858SIsaku Yamahata { 1426d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 1427d05a0858SIsaku Yamahata 1428d05a0858SIsaku Yamahata return g_strdup(tdx->mrowner); 1429d05a0858SIsaku Yamahata } 1430d05a0858SIsaku Yamahata 1431d05a0858SIsaku Yamahata static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) 1432d05a0858SIsaku Yamahata { 1433d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 1434d05a0858SIsaku Yamahata 1435d05a0858SIsaku Yamahata g_free(tdx->mrowner); 1436d05a0858SIsaku Yamahata tdx->mrowner = g_strdup(value); 1437d05a0858SIsaku Yamahata } 1438d05a0858SIsaku Yamahata 1439d05a0858SIsaku Yamahata static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) 1440d05a0858SIsaku Yamahata { 1441d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 1442d05a0858SIsaku Yamahata 1443d05a0858SIsaku Yamahata return g_strdup(tdx->mrownerconfig); 1444d05a0858SIsaku Yamahata } 1445d05a0858SIsaku Yamahata 1446d05a0858SIsaku Yamahata static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) 1447d05a0858SIsaku Yamahata { 1448d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 1449d05a0858SIsaku Yamahata 1450d05a0858SIsaku Yamahata g_free(tdx->mrownerconfig); 1451d05a0858SIsaku Yamahata tdx->mrownerconfig = g_strdup(value); 1452d05a0858SIsaku Yamahata } 1453d05a0858SIsaku Yamahata 145440da501dSIsaku Yamahata static void tdx_guest_get_qgs(Object *obj, Visitor *v, 145540da501dSIsaku Yamahata const char *name, void *opaque, 145640da501dSIsaku Yamahata Error **errp) 145740da501dSIsaku Yamahata { 145840da501dSIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 145940da501dSIsaku Yamahata 146040da501dSIsaku Yamahata if (!tdx->qg_sock_addr) { 146140da501dSIsaku Yamahata error_setg(errp, "quote-generation-socket is not set"); 146240da501dSIsaku Yamahata return; 146340da501dSIsaku Yamahata } 146440da501dSIsaku Yamahata visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); 146540da501dSIsaku Yamahata } 146640da501dSIsaku Yamahata 146740da501dSIsaku Yamahata static void tdx_guest_set_qgs(Object *obj, Visitor *v, 146840da501dSIsaku Yamahata const char *name, void *opaque, 146940da501dSIsaku Yamahata Error **errp) 147040da501dSIsaku Yamahata { 147140da501dSIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj); 147240da501dSIsaku Yamahata SocketAddress *sock = NULL; 147340da501dSIsaku Yamahata 147440da501dSIsaku Yamahata if (!visit_type_SocketAddress(v, name, &sock, errp)) { 147540da501dSIsaku Yamahata return; 147640da501dSIsaku Yamahata } 147740da501dSIsaku Yamahata 147840da501dSIsaku Yamahata if (tdx->qg_sock_addr) { 147940da501dSIsaku Yamahata qapi_free_SocketAddress(tdx->qg_sock_addr); 148040da501dSIsaku Yamahata } 148140da501dSIsaku Yamahata 148240da501dSIsaku Yamahata tdx->qg_sock_addr = sock; 148340da501dSIsaku Yamahata } 148440da501dSIsaku Yamahata 1485756e12e7SXiaoyao Li /* tdx guest */ 1486756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, 1487756e12e7SXiaoyao Li tdx_guest, 1488756e12e7SXiaoyao Li TDX_GUEST, 1489756e12e7SXiaoyao Li X86_CONFIDENTIAL_GUEST, 1490756e12e7SXiaoyao Li { TYPE_USER_CREATABLE }, 1491756e12e7SXiaoyao Li { NULL }) 1492756e12e7SXiaoyao Li 1493756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj) 1494756e12e7SXiaoyao Li { 1495756e12e7SXiaoyao Li ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); 1496756e12e7SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 1497756e12e7SXiaoyao Li 1498f15898b0SXiaoyao Li qemu_mutex_init(&tdx->lock); 1499f15898b0SXiaoyao Li 1500756e12e7SXiaoyao Li cgs->require_guest_memfd = true; 1501714af522SIsaku Yamahata tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 1502756e12e7SXiaoyao Li 1503756e12e7SXiaoyao Li object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, 1504756e12e7SXiaoyao Li OBJ_PROP_FLAG_READWRITE); 15056016e297SXiaoyao Li object_property_add_bool(obj, "sept-ve-disable", 15066016e297SXiaoyao Li tdx_guest_get_sept_ve_disable, 15076016e297SXiaoyao Li tdx_guest_set_sept_ve_disable); 1508d05a0858SIsaku Yamahata object_property_add_str(obj, "mrconfigid", 1509d05a0858SIsaku Yamahata tdx_guest_get_mrconfigid, 1510d05a0858SIsaku Yamahata tdx_guest_set_mrconfigid); 1511d05a0858SIsaku Yamahata object_property_add_str(obj, "mrowner", 1512d05a0858SIsaku Yamahata tdx_guest_get_mrowner, tdx_guest_set_mrowner); 1513d05a0858SIsaku Yamahata object_property_add_str(obj, "mrownerconfig", 1514d05a0858SIsaku Yamahata tdx_guest_get_mrownerconfig, 1515d05a0858SIsaku Yamahata tdx_guest_set_mrownerconfig); 151640da501dSIsaku Yamahata 151740da501dSIsaku Yamahata object_property_add(obj, "quote-generation-socket", "SocketAddress", 151840da501dSIsaku Yamahata tdx_guest_get_qgs, 151940da501dSIsaku Yamahata tdx_guest_set_qgs, 152040da501dSIsaku Yamahata NULL, NULL); 152140da501dSIsaku Yamahata 152240da501dSIsaku Yamahata qemu_mutex_init(&tdx->lock); 1523*efa742b2SXiaoyao Li 1524*efa742b2SXiaoyao Li tdx->event_notify_vector = -1; 1525*efa742b2SXiaoyao Li tdx->event_notify_apicid = -1; 1526756e12e7SXiaoyao Li } 1527756e12e7SXiaoyao Li 1528756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj) 1529756e12e7SXiaoyao Li { 1530756e12e7SXiaoyao Li } 1531756e12e7SXiaoyao Li 1532756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data) 1533756e12e7SXiaoyao Li { 1534631a2ac5SXiaoyao Li ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); 1535b455880eSXiaoyao Li X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); 1536b455880eSXiaoyao Li 1537631a2ac5SXiaoyao Li klass->kvm_init = tdx_kvm_init; 1538b455880eSXiaoyao Li x86_klass->kvm_type = tdx_kvm_type; 15397c615242SXiaoyao Li x86_klass->cpu_instance_init = tdx_cpu_instance_init; 154075ec6189SXiaoyao Li x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; 1541e3d1a4a6SXiaoyao Li x86_klass->check_features = tdx_check_features; 1542756e12e7SXiaoyao Li } 1543