xref: /openbmc/qemu/target/i386/kvm/tdx.c (revision 40da501d8989913935660dc24953ece02c9e98b8)
1756e12e7SXiaoyao Li /*
2756e12e7SXiaoyao Li  * QEMU TDX support
3756e12e7SXiaoyao Li  *
4756e12e7SXiaoyao Li  * Copyright (c) 2025 Intel Corporation
5756e12e7SXiaoyao Li  *
6756e12e7SXiaoyao Li  * Author:
7756e12e7SXiaoyao Li  *      Xiaoyao Li <xiaoyao.li@intel.com>
8756e12e7SXiaoyao Li  *
9756e12e7SXiaoyao Li  * SPDX-License-Identifier: GPL-2.0-or-later
10756e12e7SXiaoyao Li  */
11756e12e7SXiaoyao Li 
12756e12e7SXiaoyao Li #include "qemu/osdep.h"
138eddedc3SXiaoyao Li #include "qemu/error-report.h"
14d05a0858SIsaku Yamahata #include "qemu/base64.h"
154420ba0eSXiaoyao Li #include "qemu/mmap-alloc.h"
168eddedc3SXiaoyao Li #include "qapi/error.h"
17*40da501dSIsaku Yamahata #include "qapi/qapi-visit-sockets.h"
18756e12e7SXiaoyao Li #include "qom/object_interfaces.h"
19d05a0858SIsaku Yamahata #include "crypto/hash.h"
20bb45580dSXiaoyao Li #include "system/kvm_int.h"
216e250463SXiaoyao Li #include "system/runstate.h"
224420ba0eSXiaoyao Li #include "system/system.h"
23ebc2d2b4SIsaku Yamahata #include "system/ramblock.h"
24*40da501dSIsaku Yamahata #include "system/address-spaces.h"
25756e12e7SXiaoyao Li 
261ff5048dSXiaoyao Li #include <linux/kvm_para.h>
271ff5048dSXiaoyao Li 
288c94c84cSXiaoyao Li #include "cpu.h"
298c94c84cSXiaoyao Li #include "cpu-internal.h"
30907ee7b6SXiaoyao Li #include "host-cpu.h"
31f18672e4SXiaoyao Li #include "hw/i386/e820_memory_layout.h"
324420ba0eSXiaoyao Li #include "hw/i386/tdvf.h"
33631a2ac5SXiaoyao Li #include "hw/i386/x86.h"
34a7314259SXiaoyao Li #include "hw/i386/tdvf-hob.h"
35b455880eSXiaoyao Li #include "kvm_i386.h"
36756e12e7SXiaoyao Li #include "tdx.h"
37*40da501dSIsaku Yamahata #include "tdx-quote-generator.h"
38756e12e7SXiaoyao Li 
394d6e288aSXiaoyao Li #include "standard-headers/asm-x86/kvm_para.h"
404d6e288aSXiaoyao Li 
410e73b843SXiaoyao Li #define TDX_MIN_TSC_FREQUENCY_KHZ   (100 * 1000)
420e73b843SXiaoyao Li #define TDX_MAX_TSC_FREQUENCY_KHZ   (10 * 1000 * 1000)
430e73b843SXiaoyao Li 
4453b6f406SXiaoyao Li #define TDX_TD_ATTRIBUTES_DEBUG             BIT_ULL(0)
456016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE   BIT_ULL(28)
46bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS               BIT_ULL(30)
47bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON           BIT_ULL(63)
486016e297SXiaoyao Li 
4953b6f406SXiaoyao Li #define TDX_SUPPORTED_TD_ATTRS  (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
5053b6f406SXiaoyao Li                                  TDX_TD_ATTRIBUTES_PKS | \
5153b6f406SXiaoyao Li                                  TDX_TD_ATTRIBUTES_PERFMON)
5253b6f406SXiaoyao Li 
534d6e288aSXiaoyao Li #define TDX_SUPPORTED_KVM_FEATURES  ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
544d6e288aSXiaoyao Li                                      (1U << KVM_FEATURE_PV_UNHALT) | \
554d6e288aSXiaoyao Li                                      (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
564d6e288aSXiaoyao Li                                      (1U << KVM_FEATURE_PV_SEND_IPI) | \
574d6e288aSXiaoyao Li                                      (1U << KVM_FEATURE_POLL_CONTROL) | \
584d6e288aSXiaoyao Li                                      (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
594d6e288aSXiaoyao Li                                      (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
604d6e288aSXiaoyao Li 
611619d0e4SXiaoyao Li static TdxGuest *tdx_guest;
621619d0e4SXiaoyao Li 
638eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps;
6475ec6189SXiaoyao Li static struct kvm_cpuid2 *tdx_supported_cpuid;
658eddedc3SXiaoyao Li 
661619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
671619d0e4SXiaoyao Li bool is_tdx_vm(void)
681619d0e4SXiaoyao Li {
691619d0e4SXiaoyao Li     return !!tdx_guest;
701619d0e4SXiaoyao Li }
711619d0e4SXiaoyao Li 
728eddedc3SXiaoyao Li enum tdx_ioctl_level {
738eddedc3SXiaoyao Li     TDX_VM_IOCTL,
748eddedc3SXiaoyao Li     TDX_VCPU_IOCTL,
758eddedc3SXiaoyao Li };
768eddedc3SXiaoyao Li 
778eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
788eddedc3SXiaoyao Li                               int cmd_id, __u32 flags, void *data,
798eddedc3SXiaoyao Li                               Error **errp)
80631a2ac5SXiaoyao Li {
818eddedc3SXiaoyao Li     struct kvm_tdx_cmd tdx_cmd = {};
828eddedc3SXiaoyao Li     int r;
838eddedc3SXiaoyao Li 
848eddedc3SXiaoyao Li     const char *tdx_ioctl_name[] = {
858eddedc3SXiaoyao Li         [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
868eddedc3SXiaoyao Li         [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
878eddedc3SXiaoyao Li         [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
888eddedc3SXiaoyao Li         [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
898eddedc3SXiaoyao Li         [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
908eddedc3SXiaoyao Li         [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
918eddedc3SXiaoyao Li     };
928eddedc3SXiaoyao Li 
938eddedc3SXiaoyao Li     tdx_cmd.id = cmd_id;
948eddedc3SXiaoyao Li     tdx_cmd.flags = flags;
958eddedc3SXiaoyao Li     tdx_cmd.data = (__u64)(unsigned long)data;
968eddedc3SXiaoyao Li 
978eddedc3SXiaoyao Li     switch (level) {
988eddedc3SXiaoyao Li     case TDX_VM_IOCTL:
998eddedc3SXiaoyao Li         r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
1008eddedc3SXiaoyao Li         break;
1018eddedc3SXiaoyao Li     case TDX_VCPU_IOCTL:
1028eddedc3SXiaoyao Li         r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
1038eddedc3SXiaoyao Li         break;
1048eddedc3SXiaoyao Li     default:
1058eddedc3SXiaoyao Li         error_setg(errp, "Invalid tdx_ioctl_level %d", level);
1068eddedc3SXiaoyao Li         return -EINVAL;
1078eddedc3SXiaoyao Li     }
1088eddedc3SXiaoyao Li 
1098eddedc3SXiaoyao Li     if (r < 0) {
1108eddedc3SXiaoyao Li         error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
1118eddedc3SXiaoyao Li                          tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
1128eddedc3SXiaoyao Li     }
1138eddedc3SXiaoyao Li     return r;
1148eddedc3SXiaoyao Li }
1158eddedc3SXiaoyao Li 
1168eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
1178eddedc3SXiaoyao Li                                Error **errp)
1188eddedc3SXiaoyao Li {
1198eddedc3SXiaoyao Li     return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
1208eddedc3SXiaoyao Li }
1218eddedc3SXiaoyao Li 
1228eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
1238eddedc3SXiaoyao Li                                  void *data, Error **errp)
1248eddedc3SXiaoyao Li {
1258eddedc3SXiaoyao Li     return  tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
1268eddedc3SXiaoyao Li }
1278eddedc3SXiaoyao Li 
1288eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp)
1298eddedc3SXiaoyao Li {
1308eddedc3SXiaoyao Li     struct kvm_tdx_capabilities *caps;
1318eddedc3SXiaoyao Li     /* 1st generation of TDX reports 6 cpuid configs */
1328eddedc3SXiaoyao Li     int nr_cpuid_configs = 6;
1338eddedc3SXiaoyao Li     size_t size;
1348eddedc3SXiaoyao Li     int r;
1358eddedc3SXiaoyao Li 
1368eddedc3SXiaoyao Li     do {
1378eddedc3SXiaoyao Li         Error *local_err = NULL;
1388eddedc3SXiaoyao Li         size = sizeof(struct kvm_tdx_capabilities) +
1398eddedc3SXiaoyao Li                       nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
1408eddedc3SXiaoyao Li         caps = g_malloc0(size);
1418eddedc3SXiaoyao Li         caps->cpuid.nent = nr_cpuid_configs;
1428eddedc3SXiaoyao Li 
1438eddedc3SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
1448eddedc3SXiaoyao Li         if (r == -E2BIG) {
1458eddedc3SXiaoyao Li             g_free(caps);
1468eddedc3SXiaoyao Li             nr_cpuid_configs *= 2;
1478eddedc3SXiaoyao Li             if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
1488eddedc3SXiaoyao Li                 error_report("KVM TDX seems broken that number of CPUID entries"
1498eddedc3SXiaoyao Li                              " in kvm_tdx_capabilities exceeds limit: %d",
1508eddedc3SXiaoyao Li                              KVM_MAX_CPUID_ENTRIES);
1518eddedc3SXiaoyao Li                 error_propagate(errp, local_err);
1528eddedc3SXiaoyao Li                 return r;
1538eddedc3SXiaoyao Li             }
1548eddedc3SXiaoyao Li             error_free(local_err);
1558eddedc3SXiaoyao Li         } else if (r < 0) {
1568eddedc3SXiaoyao Li             g_free(caps);
1578eddedc3SXiaoyao Li             error_propagate(errp, local_err);
1588eddedc3SXiaoyao Li             return r;
1598eddedc3SXiaoyao Li         }
1608eddedc3SXiaoyao Li     } while (r == -E2BIG);
1618eddedc3SXiaoyao Li 
1628eddedc3SXiaoyao Li     tdx_caps = caps;
163631a2ac5SXiaoyao Li 
164631a2ac5SXiaoyao Li     return 0;
165631a2ac5SXiaoyao Li }
166631a2ac5SXiaoyao Li 
1670dd5fe5eSChao Peng void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
1680dd5fe5eSChao Peng {
1690dd5fe5eSChao Peng     assert(!tdx_guest->tdvf_mr);
1700dd5fe5eSChao Peng     tdx_guest->tdvf_mr = tdvf_mr;
1710dd5fe5eSChao Peng }
1720dd5fe5eSChao Peng 
173a7314259SXiaoyao Li static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
174a7314259SXiaoyao Li {
175a7314259SXiaoyao Li     TdxFirmwareEntry *entry;
176a7314259SXiaoyao Li 
177a7314259SXiaoyao Li     for_each_tdx_fw_entry(&tdx->tdvf, entry) {
178a7314259SXiaoyao Li         if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
179a7314259SXiaoyao Li             return entry;
180a7314259SXiaoyao Li         }
181a7314259SXiaoyao Li     }
182a7314259SXiaoyao Li     error_report("TDVF metadata doesn't specify TD_HOB location.");
183a7314259SXiaoyao Li     exit(1);
184a7314259SXiaoyao Li }
185a7314259SXiaoyao Li 
186f18672e4SXiaoyao Li static void tdx_add_ram_entry(uint64_t address, uint64_t length,
187f18672e4SXiaoyao Li                               enum TdxRamType type)
188f18672e4SXiaoyao Li {
189f18672e4SXiaoyao Li     uint32_t nr_entries = tdx_guest->nr_ram_entries;
190f18672e4SXiaoyao Li     tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
191f18672e4SXiaoyao Li                                      nr_entries + 1);
192f18672e4SXiaoyao Li 
193f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].address = address;
194f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].length = length;
195f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].type = type;
196f18672e4SXiaoyao Li     tdx_guest->nr_ram_entries++;
197f18672e4SXiaoyao Li }
198f18672e4SXiaoyao Li 
199f18672e4SXiaoyao Li static int tdx_accept_ram_range(uint64_t address, uint64_t length)
200f18672e4SXiaoyao Li {
201f18672e4SXiaoyao Li     uint64_t head_start, tail_start, head_length, tail_length;
202f18672e4SXiaoyao Li     uint64_t tmp_address, tmp_length;
203f18672e4SXiaoyao Li     TdxRamEntry *e;
204f18672e4SXiaoyao Li     int i = 0;
205f18672e4SXiaoyao Li 
206f18672e4SXiaoyao Li     do {
207f18672e4SXiaoyao Li         if (i == tdx_guest->nr_ram_entries) {
208f18672e4SXiaoyao Li             return -1;
209f18672e4SXiaoyao Li         }
210f18672e4SXiaoyao Li 
211f18672e4SXiaoyao Li         e = &tdx_guest->ram_entries[i++];
212f18672e4SXiaoyao Li     } while (address + length <= e->address || address >= e->address + e->length);
213f18672e4SXiaoyao Li 
214f18672e4SXiaoyao Li     /*
215f18672e4SXiaoyao Li      * The to-be-accepted ram range must be fully contained by one
216f18672e4SXiaoyao Li      * RAM entry.
217f18672e4SXiaoyao Li      */
218f18672e4SXiaoyao Li     if (e->address > address ||
219f18672e4SXiaoyao Li         e->address + e->length < address + length) {
220f18672e4SXiaoyao Li         return -1;
221f18672e4SXiaoyao Li     }
222f18672e4SXiaoyao Li 
223f18672e4SXiaoyao Li     if (e->type == TDX_RAM_ADDED) {
224f18672e4SXiaoyao Li         return 0;
225f18672e4SXiaoyao Li     }
226f18672e4SXiaoyao Li 
227f18672e4SXiaoyao Li     tmp_address = e->address;
228f18672e4SXiaoyao Li     tmp_length = e->length;
229f18672e4SXiaoyao Li 
230f18672e4SXiaoyao Li     e->address = address;
231f18672e4SXiaoyao Li     e->length = length;
232f18672e4SXiaoyao Li     e->type = TDX_RAM_ADDED;
233f18672e4SXiaoyao Li 
234f18672e4SXiaoyao Li     head_length = address - tmp_address;
235f18672e4SXiaoyao Li     if (head_length > 0) {
236f18672e4SXiaoyao Li         head_start = tmp_address;
237f18672e4SXiaoyao Li         tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
238f18672e4SXiaoyao Li     }
239f18672e4SXiaoyao Li 
240f18672e4SXiaoyao Li     tail_start = address + length;
241f18672e4SXiaoyao Li     if (tail_start < tmp_address + tmp_length) {
242f18672e4SXiaoyao Li         tail_length = tmp_address + tmp_length - tail_start;
243f18672e4SXiaoyao Li         tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
244f18672e4SXiaoyao Li     }
245f18672e4SXiaoyao Li 
246f18672e4SXiaoyao Li     return 0;
247f18672e4SXiaoyao Li }
248f18672e4SXiaoyao Li 
249f18672e4SXiaoyao Li static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
250f18672e4SXiaoyao Li {
251f18672e4SXiaoyao Li     const TdxRamEntry *lhs = lhs_;
252f18672e4SXiaoyao Li     const TdxRamEntry *rhs = rhs_;
253f18672e4SXiaoyao Li 
254f18672e4SXiaoyao Li     if (lhs->address == rhs->address) {
255f18672e4SXiaoyao Li         return 0;
256f18672e4SXiaoyao Li     }
257f18672e4SXiaoyao Li     if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
258f18672e4SXiaoyao Li         return 1;
259f18672e4SXiaoyao Li     }
260f18672e4SXiaoyao Li     return -1;
261f18672e4SXiaoyao Li }
262f18672e4SXiaoyao Li 
263f18672e4SXiaoyao Li static void tdx_init_ram_entries(void)
264f18672e4SXiaoyao Li {
265f18672e4SXiaoyao Li     unsigned i, j, nr_e820_entries;
266f18672e4SXiaoyao Li 
267f18672e4SXiaoyao Li     nr_e820_entries = e820_get_table(NULL);
268f18672e4SXiaoyao Li     tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
269f18672e4SXiaoyao Li 
270f18672e4SXiaoyao Li     for (i = 0, j = 0; i < nr_e820_entries; i++) {
271f18672e4SXiaoyao Li         uint64_t addr, len;
272f18672e4SXiaoyao Li 
273f18672e4SXiaoyao Li         if (e820_get_entry(i, E820_RAM, &addr, &len)) {
274f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].address = addr;
275f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].length = len;
276f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
277f18672e4SXiaoyao Li             j++;
278f18672e4SXiaoyao Li         }
279f18672e4SXiaoyao Li     }
280f18672e4SXiaoyao Li     tdx_guest->nr_ram_entries = j;
281f18672e4SXiaoyao Li }
282f18672e4SXiaoyao Li 
28341f7fd22SXiaoyao Li static void tdx_post_init_vcpus(void)
28441f7fd22SXiaoyao Li {
28541f7fd22SXiaoyao Li     TdxFirmwareEntry *hob;
28641f7fd22SXiaoyao Li     CPUState *cpu;
28741f7fd22SXiaoyao Li 
28841f7fd22SXiaoyao Li     hob = tdx_get_hob_entry(tdx_guest);
28941f7fd22SXiaoyao Li     CPU_FOREACH(cpu) {
290e7f926ebSCédric Le Goater         tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address,
29141f7fd22SXiaoyao Li                        &error_fatal);
29241f7fd22SXiaoyao Li     }
29341f7fd22SXiaoyao Li }
29441f7fd22SXiaoyao Li 
2954420ba0eSXiaoyao Li static void tdx_finalize_vm(Notifier *notifier, void *unused)
2964420ba0eSXiaoyao Li {
2974420ba0eSXiaoyao Li     TdxFirmware *tdvf = &tdx_guest->tdvf;
2984420ba0eSXiaoyao Li     TdxFirmwareEntry *entry;
299ebc2d2b4SIsaku Yamahata     RAMBlock *ram_block;
300ebc2d2b4SIsaku Yamahata     Error *local_err = NULL;
301ebc2d2b4SIsaku Yamahata     int r;
3024420ba0eSXiaoyao Li 
303f18672e4SXiaoyao Li     tdx_init_ram_entries();
304f18672e4SXiaoyao Li 
3054420ba0eSXiaoyao Li     for_each_tdx_fw_entry(tdvf, entry) {
3064420ba0eSXiaoyao Li         switch (entry->type) {
3074420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_BFV:
3084420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_CFV:
3094420ba0eSXiaoyao Li             entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
3104420ba0eSXiaoyao Li             break;
3114420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_TD_HOB:
3124420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_TEMP_MEM:
3134420ba0eSXiaoyao Li             entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
3144420ba0eSXiaoyao Li                                            qemu_real_host_page_size(), 0, 0);
3154420ba0eSXiaoyao Li             if (entry->mem_ptr == MAP_FAILED) {
3164420ba0eSXiaoyao Li                 error_report("Failed to mmap memory for TDVF section %d",
3174420ba0eSXiaoyao Li                              entry->type);
3184420ba0eSXiaoyao Li                 exit(1);
3194420ba0eSXiaoyao Li             }
320f18672e4SXiaoyao Li             if (tdx_accept_ram_range(entry->address, entry->size)) {
321f18672e4SXiaoyao Li                 error_report("Failed to accept memory for TDVF section %d",
322f18672e4SXiaoyao Li                              entry->type);
323f18672e4SXiaoyao Li                 qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
324f18672e4SXiaoyao Li                 exit(1);
325f18672e4SXiaoyao Li             }
3264420ba0eSXiaoyao Li             break;
3274420ba0eSXiaoyao Li         default:
3284420ba0eSXiaoyao Li             error_report("Unsupported TDVF section %d", entry->type);
3294420ba0eSXiaoyao Li             exit(1);
3304420ba0eSXiaoyao Li         }
3314420ba0eSXiaoyao Li     }
332f18672e4SXiaoyao Li 
333f18672e4SXiaoyao Li     qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
334f18672e4SXiaoyao Li           sizeof(TdxRamEntry), &tdx_ram_entry_compare);
335a7314259SXiaoyao Li 
336a7314259SXiaoyao Li     tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
337ebc2d2b4SIsaku Yamahata 
33841f7fd22SXiaoyao Li     tdx_post_init_vcpus();
33941f7fd22SXiaoyao Li 
340ebc2d2b4SIsaku Yamahata     for_each_tdx_fw_entry(tdvf, entry) {
341ebc2d2b4SIsaku Yamahata         struct kvm_tdx_init_mem_region region;
342ebc2d2b4SIsaku Yamahata         uint32_t flags;
343ebc2d2b4SIsaku Yamahata 
344ebc2d2b4SIsaku Yamahata         region = (struct kvm_tdx_init_mem_region) {
345e7f926ebSCédric Le Goater             .source_addr = (uintptr_t)entry->mem_ptr,
346ebc2d2b4SIsaku Yamahata             .gpa = entry->address,
347ebc2d2b4SIsaku Yamahata             .nr_pages = entry->size >> 12,
348ebc2d2b4SIsaku Yamahata         };
349ebc2d2b4SIsaku Yamahata 
350ebc2d2b4SIsaku Yamahata         flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
351ebc2d2b4SIsaku Yamahata                 KVM_TDX_MEASURE_MEMORY_REGION : 0;
352ebc2d2b4SIsaku Yamahata 
353ebc2d2b4SIsaku Yamahata         do {
354ebc2d2b4SIsaku Yamahata             error_free(local_err);
355ebc2d2b4SIsaku Yamahata             local_err = NULL;
356ebc2d2b4SIsaku Yamahata             r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
357ebc2d2b4SIsaku Yamahata                                &region, &local_err);
358ebc2d2b4SIsaku Yamahata         } while (r == -EAGAIN || r == -EINTR);
359ebc2d2b4SIsaku Yamahata         if (r < 0) {
360ebc2d2b4SIsaku Yamahata             error_report_err(local_err);
361ebc2d2b4SIsaku Yamahata             exit(1);
362ebc2d2b4SIsaku Yamahata         }
363ebc2d2b4SIsaku Yamahata 
364ebc2d2b4SIsaku Yamahata         if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
365ebc2d2b4SIsaku Yamahata             entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
366ebc2d2b4SIsaku Yamahata             qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
367ebc2d2b4SIsaku Yamahata             entry->mem_ptr = NULL;
368ebc2d2b4SIsaku Yamahata         }
369ebc2d2b4SIsaku Yamahata     }
370ebc2d2b4SIsaku Yamahata 
371ebc2d2b4SIsaku Yamahata     /*
372ebc2d2b4SIsaku Yamahata      * TDVF image has been copied into private region above via
373ebc2d2b4SIsaku Yamahata      * KVM_MEMORY_MAPPING. It becomes useless.
374ebc2d2b4SIsaku Yamahata      */
375ebc2d2b4SIsaku Yamahata     ram_block = tdx_guest->tdvf_mr->ram_block;
376ebc2d2b4SIsaku Yamahata     ram_block_discard_range(ram_block, 0, ram_block->max_length);
377ae60ff4eSXiaoyao Li 
378ae60ff4eSXiaoyao Li     tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
379ae60ff4eSXiaoyao Li     CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
3804420ba0eSXiaoyao Li }
3814420ba0eSXiaoyao Li 
3824420ba0eSXiaoyao Li static Notifier tdx_machine_done_notify = {
3834420ba0eSXiaoyao Li     .notify = tdx_finalize_vm,
3844420ba0eSXiaoyao Li };
3854420ba0eSXiaoyao Li 
3860ba06e46SXiaoyao Li /*
3870ba06e46SXiaoyao Li  * Some CPUID bits change from fixed1 to configurable bits when TDX module
3880ba06e46SXiaoyao Li  * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY.
3890ba06e46SXiaoyao Li  *
3900ba06e46SXiaoyao Li  * To make QEMU work with all the versions of TDX module, keep the fixed1 bits
3910ba06e46SXiaoyao Li  * here if they are ever fixed1 bits in any of the version though not fixed1 in
3920ba06e46SXiaoyao Li  * the latest version. Otherwise, with the older version of TDX module, QEMU may
3930ba06e46SXiaoyao Li  * treat the fixed1 bit as unsupported.
3940ba06e46SXiaoyao Li  *
3950ba06e46SXiaoyao Li  * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even
3960ba06e46SXiaoyao Li  * though they changed to configurable bits. Because tdx_fixed1_bits is used to
3970ba06e46SXiaoyao Li  * setup the supported bits.
3980ba06e46SXiaoyao Li  */
3990ba06e46SXiaoyao Li KvmCpuidInfo tdx_fixed1_bits = {
4000ba06e46SXiaoyao Li     .cpuid.nent = 8,
4010ba06e46SXiaoyao Li     .entries[0] = {
4020ba06e46SXiaoyao Li         .function = 0x1,
4030ba06e46SXiaoyao Li         .index = 0,
4040ba06e46SXiaoyao Li         .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
4050ba06e46SXiaoyao Li                CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 |
4060ba06e46SXiaoyao Li                CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
4070ba06e46SXiaoyao Li                CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
4080ba06e46SXiaoyao Li                CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
4090ba06e46SXiaoyao Li                CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR,
4100ba06e46SXiaoyao Li         .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
4110ba06e46SXiaoyao Li                CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
4120ba06e46SXiaoyao Li                CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
4130ba06e46SXiaoyao Li                CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR |
4140ba06e46SXiaoyao Li                CPUID_SSE | CPUID_SSE2,
4150ba06e46SXiaoyao Li     },
4160ba06e46SXiaoyao Li     .entries[1] = {
4170ba06e46SXiaoyao Li         .function = 0x6,
4180ba06e46SXiaoyao Li         .index = 0,
4190ba06e46SXiaoyao Li         .eax = CPUID_6_EAX_ARAT,
4200ba06e46SXiaoyao Li     },
4210ba06e46SXiaoyao Li     .entries[2] = {
4220ba06e46SXiaoyao Li         .function = 0x7,
4230ba06e46SXiaoyao Li         .index = 0,
4240ba06e46SXiaoyao Li         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4250ba06e46SXiaoyao Li         .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY |
4260ba06e46SXiaoyao Li                CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID |
4270ba06e46SXiaoyao Li                CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED |
4280ba06e46SXiaoyao Li                CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT |
4290ba06e46SXiaoyao Li                CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI,
4300ba06e46SXiaoyao Li         .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI |
4310ba06e46SXiaoyao Li                CPUID_7_0_ECX_MOVDIR64B,
4320ba06e46SXiaoyao Li         .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL |
4330ba06e46SXiaoyao Li                CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D |
4340ba06e46SXiaoyao Li                CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY |
4350ba06e46SXiaoyao Li                CPUID_7_0_EDX_SPEC_CTRL_SSBD,
4360ba06e46SXiaoyao Li     },
4370ba06e46SXiaoyao Li     .entries[3] = {
4380ba06e46SXiaoyao Li         .function = 0x7,
4390ba06e46SXiaoyao Li         .index = 2,
4400ba06e46SXiaoyao Li         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4410ba06e46SXiaoyao Li         .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL |
4420ba06e46SXiaoyao Li                CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL,
4430ba06e46SXiaoyao Li     },
4440ba06e46SXiaoyao Li     .entries[4] = {
4450ba06e46SXiaoyao Li         .function = 0xD,
4460ba06e46SXiaoyao Li         .index = 0,
4470ba06e46SXiaoyao Li         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4480ba06e46SXiaoyao Li         .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK,
4490ba06e46SXiaoyao Li     },
4500ba06e46SXiaoyao Li     .entries[5] = {
4510ba06e46SXiaoyao Li         .function = 0xD,
4520ba06e46SXiaoyao Li         .index = 1,
4530ba06e46SXiaoyao Li         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4540ba06e46SXiaoyao Li         .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC|
4550ba06e46SXiaoyao Li                CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
4560ba06e46SXiaoyao Li     },
4570ba06e46SXiaoyao Li     .entries[6] = {
4580ba06e46SXiaoyao Li         .function = 0x80000001,
4590ba06e46SXiaoyao Li         .index = 0,
4600ba06e46SXiaoyao Li         .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
4610ba06e46SXiaoyao Li         /*
4620ba06e46SXiaoyao Li          * Strictly speaking, SYSCALL is not fixed1 bit since it depends on
4630ba06e46SXiaoyao Li          * the CPU to be in 64-bit mode. But here fixed1 is used to serve the
4640ba06e46SXiaoyao Li          * purpose of supported bits for TDX. In this sense, SYACALL is always
4650ba06e46SXiaoyao Li          * supported.
4660ba06e46SXiaoyao Li          */
4670ba06e46SXiaoyao Li         .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
4680ba06e46SXiaoyao Li                CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
4690ba06e46SXiaoyao Li     },
4700ba06e46SXiaoyao Li     .entries[7] = {
4710ba06e46SXiaoyao Li         .function = 0x80000007,
4720ba06e46SXiaoyao Li         .index = 0,
4730ba06e46SXiaoyao Li         .edx = CPUID_APM_INVTSC,
4740ba06e46SXiaoyao Li     },
4750ba06e46SXiaoyao Li };
4760ba06e46SXiaoyao Li 
47731df29c5SXiaoyao Li typedef struct TdxAttrsMap {
47831df29c5SXiaoyao Li     uint32_t attr_index;
47931df29c5SXiaoyao Li     uint32_t cpuid_leaf;
48031df29c5SXiaoyao Li     uint32_t cpuid_subleaf;
48131df29c5SXiaoyao Li     int cpuid_reg;
48231df29c5SXiaoyao Li     uint32_t feat_mask;
48331df29c5SXiaoyao Li } TdxAttrsMap;
48431df29c5SXiaoyao Li 
48531df29c5SXiaoyao Li static TdxAttrsMap tdx_attrs_maps[] = {
48631df29c5SXiaoyao Li     {.attr_index = 27,
48731df29c5SXiaoyao Li      .cpuid_leaf = 7,
48831df29c5SXiaoyao Li      .cpuid_subleaf = 1,
48931df29c5SXiaoyao Li      .cpuid_reg = R_EAX,
49031df29c5SXiaoyao Li      .feat_mask = CPUID_7_1_EAX_LASS,},
49131df29c5SXiaoyao Li 
49231df29c5SXiaoyao Li     {.attr_index = 30,
49331df29c5SXiaoyao Li      .cpuid_leaf = 7,
49431df29c5SXiaoyao Li      .cpuid_subleaf = 0,
49531df29c5SXiaoyao Li      .cpuid_reg = R_ECX,
49631df29c5SXiaoyao Li      .feat_mask = CPUID_7_0_ECX_PKS,},
49731df29c5SXiaoyao Li 
49831df29c5SXiaoyao Li     {.attr_index = 31,
49931df29c5SXiaoyao Li      .cpuid_leaf = 7,
50031df29c5SXiaoyao Li      .cpuid_subleaf = 0,
50131df29c5SXiaoyao Li      .cpuid_reg = R_ECX,
50231df29c5SXiaoyao Li      .feat_mask = CPUID_7_0_ECX_KeyLocker,},
50331df29c5SXiaoyao Li };
50431df29c5SXiaoyao Li 
5058c94c84cSXiaoyao Li typedef struct TdxXFAMDep {
5068c94c84cSXiaoyao Li     int xfam_bit;
5078c94c84cSXiaoyao Li     FeatureMask feat_mask;
5088c94c84cSXiaoyao Li } TdxXFAMDep;
5098c94c84cSXiaoyao Li 
5108c94c84cSXiaoyao Li /*
5118c94c84cSXiaoyao Li  * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are
5128c94c84cSXiaoyao Li  * defiend here.
5138c94c84cSXiaoyao Li  *
5148c94c84cSXiaoyao Li  * For those whose virtualization type are "XFAM & Configured & Native", they
5158c94c84cSXiaoyao Li  * are reported as configurable bits. And they are not supported if not in the
5168c94c84cSXiaoyao Li  * configureable bits list from KVM even if the corresponding XFAM bit is
5178c94c84cSXiaoyao Li  * supported.
5188c94c84cSXiaoyao Li  */
5198c94c84cSXiaoyao Li TdxXFAMDep tdx_xfam_deps[] = {
5208c94c84cSXiaoyao Li     { XSTATE_YMM_BIT,       { FEAT_1_ECX, CPUID_EXT_FMA }},
5218c94c84cSXiaoyao Li     { XSTATE_YMM_BIT,       { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }},
5228c94c84cSXiaoyao Li     { XSTATE_OPMASK_BIT,    { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}},
5238c94c84cSXiaoyao Li     { XSTATE_OPMASK_BIT,    { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}},
5248c94c84cSXiaoyao Li     { XSTATE_PT_BIT,        { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}},
5258c94c84cSXiaoyao Li     { XSTATE_PKRU_BIT,      { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}},
5268c94c84cSXiaoyao Li     { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }},
5278c94c84cSXiaoyao Li     { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }},
5288c94c84cSXiaoyao Li     { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }},
5298c94c84cSXiaoyao Li };
5308c94c84cSXiaoyao Li 
5310ba06e46SXiaoyao Li static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
5320ba06e46SXiaoyao Li                                                         uint32_t index)
5330ba06e46SXiaoyao Li {
5340ba06e46SXiaoyao Li     struct kvm_cpuid_entry2 *e;
5350ba06e46SXiaoyao Li 
5360ba06e46SXiaoyao Li     e = cpuid_find_entry(tdx_supported_cpuid, function, index);
5370ba06e46SXiaoyao Li     if (!e) {
5380ba06e46SXiaoyao Li         if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) {
5390ba06e46SXiaoyao Li             error_report("tdx_supported_cpuid requries more space than %d entries",
5400ba06e46SXiaoyao Li                           KVM_MAX_CPUID_ENTRIES);
5410ba06e46SXiaoyao Li             exit(1);
5420ba06e46SXiaoyao Li         }
5430ba06e46SXiaoyao Li         e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++];
5440ba06e46SXiaoyao Li         e->function = function;
5450ba06e46SXiaoyao Li         e->index = index;
5460ba06e46SXiaoyao Li     }
5470ba06e46SXiaoyao Li 
5480ba06e46SXiaoyao Li     return e;
5490ba06e46SXiaoyao Li }
5500ba06e46SXiaoyao Li 
5510ba06e46SXiaoyao Li static void tdx_add_supported_cpuid_by_fixed1_bits(void)
5520ba06e46SXiaoyao Li {
5530ba06e46SXiaoyao Li     struct kvm_cpuid_entry2 *e, *e1;
5540ba06e46SXiaoyao Li     int i;
5550ba06e46SXiaoyao Li 
5560ba06e46SXiaoyao Li     for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) {
5570ba06e46SXiaoyao Li         e = &tdx_fixed1_bits.entries[i];
5580ba06e46SXiaoyao Li 
5590ba06e46SXiaoyao Li         e1 = find_in_supported_entry(e->function, e->index);
5600ba06e46SXiaoyao Li         e1->eax |= e->eax;
5610ba06e46SXiaoyao Li         e1->ebx |= e->ebx;
5620ba06e46SXiaoyao Li         e1->ecx |= e->ecx;
5630ba06e46SXiaoyao Li         e1->edx |= e->edx;
5640ba06e46SXiaoyao Li     }
5650ba06e46SXiaoyao Li }
5660ba06e46SXiaoyao Li 
56731df29c5SXiaoyao Li static void tdx_add_supported_cpuid_by_attrs(void)
56831df29c5SXiaoyao Li {
56931df29c5SXiaoyao Li     struct kvm_cpuid_entry2 *e;
57031df29c5SXiaoyao Li     TdxAttrsMap *map;
57131df29c5SXiaoyao Li     int i;
57231df29c5SXiaoyao Li 
57331df29c5SXiaoyao Li     for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) {
57431df29c5SXiaoyao Li         map = &tdx_attrs_maps[i];
57531df29c5SXiaoyao Li         if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) {
57631df29c5SXiaoyao Li             continue;
57731df29c5SXiaoyao Li         }
57831df29c5SXiaoyao Li 
57931df29c5SXiaoyao Li         e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf);
58031df29c5SXiaoyao Li 
58131df29c5SXiaoyao Li         switch(map->cpuid_reg) {
58231df29c5SXiaoyao Li         case R_EAX:
58331df29c5SXiaoyao Li             e->eax |= map->feat_mask;
58431df29c5SXiaoyao Li             break;
58531df29c5SXiaoyao Li         case R_EBX:
58631df29c5SXiaoyao Li             e->ebx |= map->feat_mask;
58731df29c5SXiaoyao Li             break;
58831df29c5SXiaoyao Li         case R_ECX:
58931df29c5SXiaoyao Li             e->ecx |= map->feat_mask;
59031df29c5SXiaoyao Li             break;
59131df29c5SXiaoyao Li         case R_EDX:
59231df29c5SXiaoyao Li             e->edx |= map->feat_mask;
59331df29c5SXiaoyao Li             break;
59431df29c5SXiaoyao Li         }
59531df29c5SXiaoyao Li     }
59631df29c5SXiaoyao Li }
59731df29c5SXiaoyao Li 
5988c94c84cSXiaoyao Li static void tdx_add_supported_cpuid_by_xfam(void)
5998c94c84cSXiaoyao Li {
6008c94c84cSXiaoyao Li     struct kvm_cpuid_entry2 *e;
6018c94c84cSXiaoyao Li     int i;
6028c94c84cSXiaoyao Li 
6038c94c84cSXiaoyao Li     const TdxXFAMDep *xfam_dep;
6048c94c84cSXiaoyao Li     const FeatureWordInfo *f;
6058c94c84cSXiaoyao Li     for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) {
6068c94c84cSXiaoyao Li         xfam_dep = &tdx_xfam_deps[i];
6078c94c84cSXiaoyao Li         if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) {
6088c94c84cSXiaoyao Li             continue;
6098c94c84cSXiaoyao Li         }
6108c94c84cSXiaoyao Li 
6118c94c84cSXiaoyao Li         f = &feature_word_info[xfam_dep->feat_mask.index];
6128c94c84cSXiaoyao Li         if (f->type != CPUID_FEATURE_WORD) {
6138c94c84cSXiaoyao Li             continue;
6148c94c84cSXiaoyao Li         }
6158c94c84cSXiaoyao Li 
6168c94c84cSXiaoyao Li         e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx);
6178c94c84cSXiaoyao Li         switch(f->cpuid.reg) {
6188c94c84cSXiaoyao Li         case R_EAX:
6198c94c84cSXiaoyao Li             e->eax |= xfam_dep->feat_mask.mask;
6208c94c84cSXiaoyao Li             break;
6218c94c84cSXiaoyao Li         case R_EBX:
6228c94c84cSXiaoyao Li             e->ebx |= xfam_dep->feat_mask.mask;
6238c94c84cSXiaoyao Li             break;
6248c94c84cSXiaoyao Li         case R_ECX:
6258c94c84cSXiaoyao Li             e->ecx |= xfam_dep->feat_mask.mask;
6268c94c84cSXiaoyao Li             break;
6278c94c84cSXiaoyao Li         case R_EDX:
6288c94c84cSXiaoyao Li             e->edx |= xfam_dep->feat_mask.mask;
6298c94c84cSXiaoyao Li             break;
6308c94c84cSXiaoyao Li         }
6318c94c84cSXiaoyao Li     }
6328c94c84cSXiaoyao Li 
6338c94c84cSXiaoyao Li     e = find_in_supported_entry(0xd, 0);
6348c94c84cSXiaoyao Li     e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK);
6358c94c84cSXiaoyao Li     e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
6368c94c84cSXiaoyao Li 
6378c94c84cSXiaoyao Li     e = find_in_supported_entry(0xd, 1);
6389f5771c5SXiaoyao Li     /*
6399f5771c5SXiaoyao Li      * Mark XFD always support for TDX, it will be cleared finally in
6409f5771c5SXiaoyao Li      * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware
6419f5771c5SXiaoyao Li      * because in this case the original data has it as 0.
6429f5771c5SXiaoyao Li      */
6439f5771c5SXiaoyao Li     e->eax |= CPUID_XSAVE_XFD;
6448c94c84cSXiaoyao Li     e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
6458c94c84cSXiaoyao Li     e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
6468c94c84cSXiaoyao Li }
6478c94c84cSXiaoyao Li 
6484d6e288aSXiaoyao Li static void tdx_add_supported_kvm_features(void)
6494d6e288aSXiaoyao Li {
6504d6e288aSXiaoyao Li     struct kvm_cpuid_entry2 *e;
6514d6e288aSXiaoyao Li 
6524d6e288aSXiaoyao Li     e = find_in_supported_entry(0x40000001, 0);
6534d6e288aSXiaoyao Li     e->eax = TDX_SUPPORTED_KVM_FEATURES;
6544d6e288aSXiaoyao Li }
6554d6e288aSXiaoyao Li 
65675ec6189SXiaoyao Li static void tdx_setup_supported_cpuid(void)
65775ec6189SXiaoyao Li {
65875ec6189SXiaoyao Li     if (tdx_supported_cpuid) {
65975ec6189SXiaoyao Li         return;
66075ec6189SXiaoyao Li     }
66175ec6189SXiaoyao Li 
66275ec6189SXiaoyao Li     tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
66375ec6189SXiaoyao Li                     KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
66475ec6189SXiaoyao Li 
66575ec6189SXiaoyao Li     memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
66675ec6189SXiaoyao Li            tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
66775ec6189SXiaoyao Li     tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
6680ba06e46SXiaoyao Li 
6690ba06e46SXiaoyao Li     tdx_add_supported_cpuid_by_fixed1_bits();
67031df29c5SXiaoyao Li     tdx_add_supported_cpuid_by_attrs();
6718c94c84cSXiaoyao Li     tdx_add_supported_cpuid_by_xfam();
6724d6e288aSXiaoyao Li 
6734d6e288aSXiaoyao Li     tdx_add_supported_kvm_features();
67475ec6189SXiaoyao Li }
67575ec6189SXiaoyao Li 
6768eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
6778eddedc3SXiaoyao Li {
678810d4e83SXiaoyao Li     MachineState *ms = MACHINE(qdev_get_machine());
679810d4e83SXiaoyao Li     X86MachineState *x86ms = X86_MACHINE(ms);
6801619d0e4SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(cgs);
6818eddedc3SXiaoyao Li     int r = 0;
6828eddedc3SXiaoyao Li 
6838eddedc3SXiaoyao Li     kvm_mark_guest_state_protected();
6848eddedc3SXiaoyao Li 
685810d4e83SXiaoyao Li     if (x86ms->smm == ON_OFF_AUTO_AUTO) {
686810d4e83SXiaoyao Li         x86ms->smm = ON_OFF_AUTO_OFF;
687810d4e83SXiaoyao Li     } else if (x86ms->smm == ON_OFF_AUTO_ON) {
688810d4e83SXiaoyao Li         error_setg(errp, "TDX VM doesn't support SMM");
689810d4e83SXiaoyao Li         return -EINVAL;
690810d4e83SXiaoyao Li     }
691810d4e83SXiaoyao Li 
692e7ef6089SXiaoyao Li     if (x86ms->pic == ON_OFF_AUTO_AUTO) {
693e7ef6089SXiaoyao Li         x86ms->pic = ON_OFF_AUTO_OFF;
694e7ef6089SXiaoyao Li     } else if (x86ms->pic == ON_OFF_AUTO_ON) {
695e7ef6089SXiaoyao Li         error_setg(errp, "TDX VM doesn't support PIC");
696e7ef6089SXiaoyao Li         return -EINVAL;
697e7ef6089SXiaoyao Li     }
698e7ef6089SXiaoyao Li 
699bb45580dSXiaoyao Li     if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
700bb45580dSXiaoyao Li         kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
701bb45580dSXiaoyao Li     } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
702bb45580dSXiaoyao Li         error_setg(errp, "TDX VM requires kernel_irqchip to be split");
703bb45580dSXiaoyao Li         return -EINVAL;
704bb45580dSXiaoyao Li     }
705bb45580dSXiaoyao Li 
7068eddedc3SXiaoyao Li     if (!tdx_caps) {
7078eddedc3SXiaoyao Li         r = get_tdx_capabilities(errp);
7081619d0e4SXiaoyao Li         if (r) {
7091619d0e4SXiaoyao Li             return r;
7101619d0e4SXiaoyao Li         }
7118eddedc3SXiaoyao Li     }
7128eddedc3SXiaoyao Li 
71375ec6189SXiaoyao Li     tdx_setup_supported_cpuid();
71475ec6189SXiaoyao Li 
7151ff5048dSXiaoyao Li     /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
7161ff5048dSXiaoyao Li     if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
7171ff5048dSXiaoyao Li         return -EOPNOTSUPP;
7181ff5048dSXiaoyao Li     }
7191ff5048dSXiaoyao Li 
720da672865SXiaoyao Li     /*
721da672865SXiaoyao Li      * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
722da672865SXiaoyao Li      * memory for shared memory but not for private memory. Besides, whether a
723da672865SXiaoyao Li      * memslot is private or shared is not determined by QEMU.
724da672865SXiaoyao Li      *
725da672865SXiaoyao Li      * Thus, just mark readonly memory not supported for simplicity.
726da672865SXiaoyao Li      */
727da672865SXiaoyao Li     kvm_readonly_mem_allowed = false;
728da672865SXiaoyao Li 
7294420ba0eSXiaoyao Li     qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
7304420ba0eSXiaoyao Li 
7311619d0e4SXiaoyao Li     tdx_guest = tdx;
7321619d0e4SXiaoyao Li     return 0;
7338eddedc3SXiaoyao Li }
7348eddedc3SXiaoyao Li 
735b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg)
736b455880eSXiaoyao Li {
737b455880eSXiaoyao Li     /* Do the object check */
738b455880eSXiaoyao Li     TDX_GUEST(cg);
739b455880eSXiaoyao Li 
740b455880eSXiaoyao Li     return KVM_X86_TDX_VM;
741b455880eSXiaoyao Li }
742b455880eSXiaoyao Li 
7437c615242SXiaoyao Li static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
7447c615242SXiaoyao Li {
745750560f8SXiaoyao Li     X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
7469002494fSXiaoyao Li     X86CPU *x86cpu = X86_CPU(cpu);
7479002494fSXiaoyao Li 
748750560f8SXiaoyao Li     if (xcc->model) {
749750560f8SXiaoyao Li         error_report("Named cpu model is not supported for TDX yet!");
750750560f8SXiaoyao Li         exit(1);
751750560f8SXiaoyao Li     }
752750560f8SXiaoyao Li 
7537c615242SXiaoyao Li     object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
7549002494fSXiaoyao Li 
755ea4867b9SXiaoyao Li     /* invtsc is fixed1 for TD guest */
756ea4867b9SXiaoyao Li     object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
757ea4867b9SXiaoyao Li 
75890d2bbd1SXiaoyao Li     x86cpu->force_cpuid_0x1f = true;
7597c615242SXiaoyao Li }
7607c615242SXiaoyao Li 
76175ec6189SXiaoyao Li static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
76275ec6189SXiaoyao Li                                           uint32_t feature, uint32_t index,
76375ec6189SXiaoyao Li                                           int reg, uint32_t value)
76475ec6189SXiaoyao Li {
76575ec6189SXiaoyao Li     struct kvm_cpuid_entry2 *e;
76675ec6189SXiaoyao Li 
7670ba06e46SXiaoyao Li     e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index);
7680ba06e46SXiaoyao Li     if (e) {
7690ba06e46SXiaoyao Li         value |= cpuid_entry_get_reg(e, reg);
7700ba06e46SXiaoyao Li     }
7710ba06e46SXiaoyao Li 
77275ec6189SXiaoyao Li     if (is_feature_word_cpuid(feature, index, reg)) {
77375ec6189SXiaoyao Li         e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
77475ec6189SXiaoyao Li         if (e) {
77575ec6189SXiaoyao Li             value &= cpuid_entry_get_reg(e, reg);
77675ec6189SXiaoyao Li         }
77775ec6189SXiaoyao Li     }
77875ec6189SXiaoyao Li 
77975ec6189SXiaoyao Li     return value;
78075ec6189SXiaoyao Li }
78175ec6189SXiaoyao Li 
782e3d1a4a6SXiaoyao Li static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret)
783e3d1a4a6SXiaoyao Li {
784e3d1a4a6SXiaoyao Li     struct kvm_cpuid2 *fetch_cpuid;
785e3d1a4a6SXiaoyao Li     int size = KVM_MAX_CPUID_ENTRIES;
786e3d1a4a6SXiaoyao Li     Error *local_err = NULL;
787e3d1a4a6SXiaoyao Li     int r;
788e3d1a4a6SXiaoyao Li 
789e3d1a4a6SXiaoyao Li     do {
790e3d1a4a6SXiaoyao Li         error_free(local_err);
791e3d1a4a6SXiaoyao Li         local_err = NULL;
792e3d1a4a6SXiaoyao Li 
793e3d1a4a6SXiaoyao Li         fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) +
794e3d1a4a6SXiaoyao Li                                 sizeof(struct kvm_cpuid_entry2) * size);
795e3d1a4a6SXiaoyao Li         fetch_cpuid->nent = size;
796e3d1a4a6SXiaoyao Li         r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err);
797e3d1a4a6SXiaoyao Li         if (r == -E2BIG) {
798e3d1a4a6SXiaoyao Li             g_free(fetch_cpuid);
799e3d1a4a6SXiaoyao Li             size = fetch_cpuid->nent;
800e3d1a4a6SXiaoyao Li         }
801e3d1a4a6SXiaoyao Li     } while (r == -E2BIG);
802e3d1a4a6SXiaoyao Li 
803e3d1a4a6SXiaoyao Li     if (r < 0) {
804e3d1a4a6SXiaoyao Li         error_report_err(local_err);
805e3d1a4a6SXiaoyao Li         *ret = r;
806e3d1a4a6SXiaoyao Li         return NULL;
807e3d1a4a6SXiaoyao Li     }
808e3d1a4a6SXiaoyao Li 
809e3d1a4a6SXiaoyao Li     return fetch_cpuid;
810e3d1a4a6SXiaoyao Li }
811e3d1a4a6SXiaoyao Li 
812e3d1a4a6SXiaoyao Li static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
813e3d1a4a6SXiaoyao Li {
814e3d1a4a6SXiaoyao Li     uint64_t actual, requested, unavailable, forced_on;
815e3d1a4a6SXiaoyao Li     g_autofree struct kvm_cpuid2 *fetch_cpuid;
816e3d1a4a6SXiaoyao Li     const char *forced_on_prefix = NULL;
817e3d1a4a6SXiaoyao Li     const char *unav_prefix = NULL;
818e3d1a4a6SXiaoyao Li     struct kvm_cpuid_entry2 *entry;
819e3d1a4a6SXiaoyao Li     X86CPU *cpu = X86_CPU(cs);
820e3d1a4a6SXiaoyao Li     CPUX86State *env = &cpu->env;
821e3d1a4a6SXiaoyao Li     FeatureWordInfo *wi;
822e3d1a4a6SXiaoyao Li     FeatureWord w;
823e3d1a4a6SXiaoyao Li     bool mismatch = false;
824e3d1a4a6SXiaoyao Li     int r;
825e3d1a4a6SXiaoyao Li 
826e3d1a4a6SXiaoyao Li     fetch_cpuid = tdx_fetch_cpuid(cs, &r);
827e3d1a4a6SXiaoyao Li     if (!fetch_cpuid) {
828e3d1a4a6SXiaoyao Li         return r;
829e3d1a4a6SXiaoyao Li     }
830e3d1a4a6SXiaoyao Li 
831e3d1a4a6SXiaoyao Li     if (cpu->check_cpuid || cpu->enforce_cpuid) {
832e3d1a4a6SXiaoyao Li         unav_prefix = "TDX doesn't support requested feature";
833e3d1a4a6SXiaoyao Li         forced_on_prefix = "TDX forcibly sets the feature";
834e3d1a4a6SXiaoyao Li     }
835e3d1a4a6SXiaoyao Li 
836e3d1a4a6SXiaoyao Li     for (w = 0; w < FEATURE_WORDS; w++) {
837e3d1a4a6SXiaoyao Li         wi = &feature_word_info[w];
838e3d1a4a6SXiaoyao Li         actual = 0;
839e3d1a4a6SXiaoyao Li 
840e3d1a4a6SXiaoyao Li         switch (wi->type) {
841e3d1a4a6SXiaoyao Li         case CPUID_FEATURE_WORD:
842e3d1a4a6SXiaoyao Li             entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx);
843e3d1a4a6SXiaoyao Li             if (!entry) {
844e3d1a4a6SXiaoyao Li                 /*
845e3d1a4a6SXiaoyao Li                  * If KVM doesn't report it means it's totally configurable
846e3d1a4a6SXiaoyao Li                  * by QEMU
847e3d1a4a6SXiaoyao Li                  */
848e3d1a4a6SXiaoyao Li                 continue;
849e3d1a4a6SXiaoyao Li             }
850e3d1a4a6SXiaoyao Li 
851e3d1a4a6SXiaoyao Li             actual = cpuid_entry_get_reg(entry, wi->cpuid.reg);
852e3d1a4a6SXiaoyao Li             break;
853e3d1a4a6SXiaoyao Li         case MSR_FEATURE_WORD:
854e3d1a4a6SXiaoyao Li             /*
855e3d1a4a6SXiaoyao Li              * TODO:
856e3d1a4a6SXiaoyao Li              * validate MSR features when KVM has interface report them.
857e3d1a4a6SXiaoyao Li              */
858e3d1a4a6SXiaoyao Li             continue;
859e3d1a4a6SXiaoyao Li         }
860e3d1a4a6SXiaoyao Li 
861deb9db6fSXiaoyao Li         /* Fixup for special cases */
862deb9db6fSXiaoyao Li         switch (w) {
863deb9db6fSXiaoyao Li         case FEAT_8000_0001_EDX:
864deb9db6fSXiaoyao Li             /*
865deb9db6fSXiaoyao Li              * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit
866deb9db6fSXiaoyao Li              * mode and before vcpu running it's not in 64-bit mode.
867deb9db6fSXiaoyao Li              */
868deb9db6fSXiaoyao Li             actual |= CPUID_EXT2_SYSCALL;
869deb9db6fSXiaoyao Li             break;
870deb9db6fSXiaoyao Li         default:
871deb9db6fSXiaoyao Li             break;
872deb9db6fSXiaoyao Li         }
873deb9db6fSXiaoyao Li 
874e3d1a4a6SXiaoyao Li         requested = env->features[w];
875e3d1a4a6SXiaoyao Li         unavailable = requested & ~actual;
876e3d1a4a6SXiaoyao Li         mark_unavailable_features(cpu, w, unavailable, unav_prefix);
877e3d1a4a6SXiaoyao Li         if (unavailable) {
878e3d1a4a6SXiaoyao Li             mismatch = true;
879e3d1a4a6SXiaoyao Li         }
880e3d1a4a6SXiaoyao Li 
881e3d1a4a6SXiaoyao Li         forced_on = actual & ~requested;
882e3d1a4a6SXiaoyao Li         mark_forced_on_features(cpu, w, forced_on, forced_on_prefix);
883e3d1a4a6SXiaoyao Li         if (forced_on) {
884e3d1a4a6SXiaoyao Li             mismatch = true;
885e3d1a4a6SXiaoyao Li         }
886e3d1a4a6SXiaoyao Li     }
887e3d1a4a6SXiaoyao Li 
888e3d1a4a6SXiaoyao Li     if (cpu->enforce_cpuid && mismatch) {
889e3d1a4a6SXiaoyao Li         return -EINVAL;
890e3d1a4a6SXiaoyao Li     }
891e3d1a4a6SXiaoyao Li 
892907ee7b6SXiaoyao Li     if (cpu->phys_bits != host_cpu_phys_bits()) {
893907ee7b6SXiaoyao Li         error_report("TDX requires guest CPU physical bits (%u) "
894907ee7b6SXiaoyao Li                      "to match host CPU physical bits (%u)",
895907ee7b6SXiaoyao Li                      cpu->phys_bits, host_cpu_phys_bits());
896907ee7b6SXiaoyao Li         return -EINVAL;
897907ee7b6SXiaoyao Li     }
898907ee7b6SXiaoyao Li 
899e3d1a4a6SXiaoyao Li     return 0;
900e3d1a4a6SXiaoyao Li }
901e3d1a4a6SXiaoyao Li 
90253b6f406SXiaoyao Li static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
90353b6f406SXiaoyao Li {
90453b6f406SXiaoyao Li     if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
905e7f926ebSCédric Le Goater         error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM "
906e7f926ebSCédric Le Goater                    "(KVM supported: 0x%"PRIx64")", tdx->attributes,
907e7f926ebSCédric Le Goater                    (uint64_t)tdx_caps->supported_attrs);
90853b6f406SXiaoyao Li         return -1;
90953b6f406SXiaoyao Li     }
91053b6f406SXiaoyao Li 
91153b6f406SXiaoyao Li     if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
91253b6f406SXiaoyao Li         error_setg(errp, "Some QEMU unsupported TD attribute bits being "
913e7f926ebSCédric Le Goater                     "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")",
914e7f926ebSCédric Le Goater                     tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS);
91553b6f406SXiaoyao Li         return -1;
91653b6f406SXiaoyao Li     }
91753b6f406SXiaoyao Li 
91853b6f406SXiaoyao Li     return 0;
91953b6f406SXiaoyao Li }
92053b6f406SXiaoyao Li 
92153b6f406SXiaoyao Li static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
922bb3be394SXiaoyao Li {
923bb3be394SXiaoyao Li     CPUX86State *env = &x86cpu->env;
924bb3be394SXiaoyao Li 
925bb3be394SXiaoyao Li     tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
926bb3be394SXiaoyao Li                              TDX_TD_ATTRIBUTES_PKS : 0;
927bb3be394SXiaoyao Li     tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
92853b6f406SXiaoyao Li 
92953b6f406SXiaoyao Li     return tdx_validate_attributes(tdx_guest, errp);
930bb3be394SXiaoyao Li }
931bb3be394SXiaoyao Li 
932f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
933f15898b0SXiaoyao Li {
934f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
935f15898b0SXiaoyao Li     uint64_t xfam;
936f15898b0SXiaoyao Li 
937f15898b0SXiaoyao Li     xfam = env->features[FEAT_XSAVE_XCR0_LO] |
938f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XCR0_HI] |
939f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_LO] |
940f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_HI];
941f15898b0SXiaoyao Li 
942f15898b0SXiaoyao Li     if (xfam & ~tdx_caps->supported_xfam) {
943e7f926ebSCédric Le Goater         error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))",
944e7f926ebSCédric Le Goater                    xfam, (uint64_t)tdx_caps->supported_xfam);
945f15898b0SXiaoyao Li         return -1;
946f15898b0SXiaoyao Li     }
947f15898b0SXiaoyao Li 
948f15898b0SXiaoyao Li     tdx_guest->xfam = xfam;
949f15898b0SXiaoyao Li     return 0;
950f15898b0SXiaoyao Li }
951f15898b0SXiaoyao Li 
952f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
953f15898b0SXiaoyao Li {
954f15898b0SXiaoyao Li     int i, dest_cnt = 0;
955f15898b0SXiaoyao Li     struct kvm_cpuid_entry2 *src, *dest, *conf;
956f15898b0SXiaoyao Li 
957f15898b0SXiaoyao Li     for (i = 0; i < cpuids->nent; i++) {
958f15898b0SXiaoyao Li         src = cpuids->entries + i;
959f15898b0SXiaoyao Li         conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
960f15898b0SXiaoyao Li         if (!conf) {
961f15898b0SXiaoyao Li             continue;
962f15898b0SXiaoyao Li         }
963f15898b0SXiaoyao Li         dest = cpuids->entries + dest_cnt;
964f15898b0SXiaoyao Li 
965f15898b0SXiaoyao Li         dest->function = src->function;
966f15898b0SXiaoyao Li         dest->index = src->index;
967f15898b0SXiaoyao Li         dest->flags = src->flags;
968f15898b0SXiaoyao Li         dest->eax = src->eax & conf->eax;
969f15898b0SXiaoyao Li         dest->ebx = src->ebx & conf->ebx;
970f15898b0SXiaoyao Li         dest->ecx = src->ecx & conf->ecx;
971f15898b0SXiaoyao Li         dest->edx = src->edx & conf->edx;
972f15898b0SXiaoyao Li 
973f15898b0SXiaoyao Li         dest_cnt++;
974f15898b0SXiaoyao Li     }
975f15898b0SXiaoyao Li     cpuids->nent = dest_cnt++;
976f15898b0SXiaoyao Li }
977f15898b0SXiaoyao Li 
978f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
979f15898b0SXiaoyao Li {
980f15898b0SXiaoyao Li     X86CPU *x86cpu = X86_CPU(cpu);
981f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
982f15898b0SXiaoyao Li     g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
983f15898b0SXiaoyao Li     Error *local_err = NULL;
984d05a0858SIsaku Yamahata     size_t data_len;
985f15898b0SXiaoyao Li     int retry = 10000;
986f15898b0SXiaoyao Li     int r = 0;
987f15898b0SXiaoyao Li 
988f15898b0SXiaoyao Li     QEMU_LOCK_GUARD(&tdx_guest->lock);
989f15898b0SXiaoyao Li     if (tdx_guest->initialized) {
990f15898b0SXiaoyao Li         return r;
991f15898b0SXiaoyao Li     }
992f15898b0SXiaoyao Li 
993f15898b0SXiaoyao Li     init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
994f15898b0SXiaoyao Li                         sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
995f15898b0SXiaoyao Li 
996d529a2acSXiaoyao Li     if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
997d529a2acSXiaoyao Li         error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
998d529a2acSXiaoyao Li         return -EOPNOTSUPP;
999d529a2acSXiaoyao Li     }
1000d529a2acSXiaoyao Li 
1001d529a2acSXiaoyao Li     r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
1002d529a2acSXiaoyao Li                           0, TDX_APIC_BUS_CYCLES_NS);
1003d529a2acSXiaoyao Li     if (r < 0) {
1004d529a2acSXiaoyao Li         error_setg_errno(errp, -r,
1005d529a2acSXiaoyao Li                          "Unable to set core crystal clock frequency to 25MHz");
1006d529a2acSXiaoyao Li         return r;
1007d529a2acSXiaoyao Li     }
1008d529a2acSXiaoyao Li 
10090e73b843SXiaoyao Li     if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
10100e73b843SXiaoyao Li                          env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
1011e7f926ebSCédric Le Goater         error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency "
10120e73b843SXiaoyao Li                          "between [%d, %d] kHz", env->tsc_khz,
10130e73b843SXiaoyao Li                          TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
10140e73b843SXiaoyao Li        return -EINVAL;
10150e73b843SXiaoyao Li     }
10160e73b843SXiaoyao Li 
10170e73b843SXiaoyao Li     if (env->tsc_khz % (25 * 1000)) {
1018e7f926ebSCédric Le Goater         error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz",
10190e73b843SXiaoyao Li                    env->tsc_khz);
10200e73b843SXiaoyao Li         return -EINVAL;
10210e73b843SXiaoyao Li     }
10220e73b843SXiaoyao Li 
10230e73b843SXiaoyao Li     /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
10240e73b843SXiaoyao Li     r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
10250e73b843SXiaoyao Li     if (r < 0) {
1026e7f926ebSCédric Le Goater         error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz",
10270e73b843SXiaoyao Li                          env->tsc_khz);
10280e73b843SXiaoyao Li         return r;
10290e73b843SXiaoyao Li     }
10300e73b843SXiaoyao Li 
1031d05a0858SIsaku Yamahata     if (tdx_guest->mrconfigid) {
1032d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
1033d05a0858SIsaku Yamahata                               strlen(tdx_guest->mrconfigid), &data_len, errp);
1034d05a0858SIsaku Yamahata         if (!data) {
1035d05a0858SIsaku Yamahata             return -1;
1036d05a0858SIsaku Yamahata         }
1037d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
103841cd354dSXiaoyao Li             error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, "
103941cd354dSXiaoyao Li                              "expected %d bytes", data_len,
104041cd354dSXiaoyao Li                              QCRYPTO_HASH_DIGEST_LEN_SHA384);
1041d05a0858SIsaku Yamahata             return -1;
1042d05a0858SIsaku Yamahata         }
1043d05a0858SIsaku Yamahata         memcpy(init_vm->mrconfigid, data, data_len);
1044d05a0858SIsaku Yamahata     }
1045d05a0858SIsaku Yamahata 
1046d05a0858SIsaku Yamahata     if (tdx_guest->mrowner) {
1047d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
1048d05a0858SIsaku Yamahata                               strlen(tdx_guest->mrowner), &data_len, errp);
1049d05a0858SIsaku Yamahata         if (!data) {
1050d05a0858SIsaku Yamahata             return -1;
1051d05a0858SIsaku Yamahata         }
1052d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
105341cd354dSXiaoyao Li             error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, "
105441cd354dSXiaoyao Li                              "expected %d bytes", data_len,
105541cd354dSXiaoyao Li                              QCRYPTO_HASH_DIGEST_LEN_SHA384);
1056d05a0858SIsaku Yamahata             return -1;
1057d05a0858SIsaku Yamahata         }
1058d05a0858SIsaku Yamahata         memcpy(init_vm->mrowner, data, data_len);
1059d05a0858SIsaku Yamahata     }
1060d05a0858SIsaku Yamahata 
1061d05a0858SIsaku Yamahata     if (tdx_guest->mrownerconfig) {
1062d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
1063d05a0858SIsaku Yamahata                             strlen(tdx_guest->mrownerconfig), &data_len, errp);
1064d05a0858SIsaku Yamahata         if (!data) {
1065d05a0858SIsaku Yamahata             return -1;
1066d05a0858SIsaku Yamahata         }
1067d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
106841cd354dSXiaoyao Li             error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, "
106941cd354dSXiaoyao Li                              "expected %d bytes", data_len,
107041cd354dSXiaoyao Li                              QCRYPTO_HASH_DIGEST_LEN_SHA384);
1071d05a0858SIsaku Yamahata             return -1;
1072d05a0858SIsaku Yamahata         }
1073d05a0858SIsaku Yamahata         memcpy(init_vm->mrownerconfig, data, data_len);
1074d05a0858SIsaku Yamahata     }
1075d05a0858SIsaku Yamahata 
107653b6f406SXiaoyao Li     r = setup_td_guest_attributes(x86cpu, errp);
107753b6f406SXiaoyao Li     if (r) {
107853b6f406SXiaoyao Li         return r;
107953b6f406SXiaoyao Li     }
1080bb3be394SXiaoyao Li 
1081f15898b0SXiaoyao Li     r = setup_td_xfam(x86cpu, errp);
1082f15898b0SXiaoyao Li     if (r) {
1083f15898b0SXiaoyao Li         return r;
1084f15898b0SXiaoyao Li     }
1085f15898b0SXiaoyao Li 
1086f15898b0SXiaoyao Li     init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
1087f15898b0SXiaoyao Li     tdx_filter_cpuid(&init_vm->cpuid);
1088f15898b0SXiaoyao Li 
1089f15898b0SXiaoyao Li     init_vm->attributes = tdx_guest->attributes;
1090f15898b0SXiaoyao Li     init_vm->xfam = tdx_guest->xfam;
1091f15898b0SXiaoyao Li 
1092f15898b0SXiaoyao Li     /*
1093f15898b0SXiaoyao Li      * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
1094f15898b0SXiaoyao Li      * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
1095f15898b0SXiaoyao Li      * RDSEED) is busy.
1096f15898b0SXiaoyao Li      *
1097f15898b0SXiaoyao Li      * Retry for the case.
1098f15898b0SXiaoyao Li      */
1099f15898b0SXiaoyao Li     do {
1100f15898b0SXiaoyao Li         error_free(local_err);
1101f15898b0SXiaoyao Li         local_err = NULL;
1102f15898b0SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
1103f15898b0SXiaoyao Li     } while (r == -EAGAIN && --retry);
1104f15898b0SXiaoyao Li 
1105f15898b0SXiaoyao Li     if (r < 0) {
1106f15898b0SXiaoyao Li         if (!retry) {
1107f15898b0SXiaoyao Li             error_append_hint(&local_err, "Hardware RNG (Random Number "
1108f15898b0SXiaoyao Li             "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
1109f15898b0SXiaoyao Li             "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
1110f15898b0SXiaoyao Li             "due to lack of entropy.\n");
1111f15898b0SXiaoyao Li         }
1112f15898b0SXiaoyao Li         error_propagate(errp, local_err);
1113f15898b0SXiaoyao Li         return r;
1114f15898b0SXiaoyao Li     }
1115f15898b0SXiaoyao Li 
1116f15898b0SXiaoyao Li     tdx_guest->initialized = true;
1117f15898b0SXiaoyao Li 
1118f15898b0SXiaoyao Li     return 0;
1119f15898b0SXiaoyao Li }
1120f15898b0SXiaoyao Li 
1121cb5d65a8SXiaoyao Li int tdx_parse_tdvf(void *flash_ptr, int size)
1122cb5d65a8SXiaoyao Li {
1123cb5d65a8SXiaoyao Li     return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
1124cb5d65a8SXiaoyao Li }
1125cb5d65a8SXiaoyao Li 
1126*40da501dSIsaku Yamahata static void tdx_get_quote_completion(TdxGenerateQuoteTask *task)
1127*40da501dSIsaku Yamahata {
1128*40da501dSIsaku Yamahata     TdxGuest *tdx = task->opaque;
1129*40da501dSIsaku Yamahata     int ret;
1130*40da501dSIsaku Yamahata 
1131*40da501dSIsaku Yamahata     /* Maintain the number of in-flight requests. */
1132*40da501dSIsaku Yamahata     qemu_mutex_lock(&tdx->lock);
1133*40da501dSIsaku Yamahata     tdx->num--;
1134*40da501dSIsaku Yamahata     qemu_mutex_unlock(&tdx->lock);
1135*40da501dSIsaku Yamahata 
1136*40da501dSIsaku Yamahata     if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) {
1137*40da501dSIsaku Yamahata         ret = address_space_write(&address_space_memory, task->payload_gpa,
1138*40da501dSIsaku Yamahata                                   MEMTXATTRS_UNSPECIFIED, task->receive_buf,
1139*40da501dSIsaku Yamahata                                   task->receive_buf_received);
1140*40da501dSIsaku Yamahata         if (ret != MEMTX_OK) {
1141*40da501dSIsaku Yamahata             error_report("TDX: get-quote: failed to write quote data.");
1142*40da501dSIsaku Yamahata         } else {
1143*40da501dSIsaku Yamahata             task->hdr.out_len = cpu_to_le64(task->receive_buf_received);
1144*40da501dSIsaku Yamahata         }
1145*40da501dSIsaku Yamahata     }
1146*40da501dSIsaku Yamahata     task->hdr.error_code = cpu_to_le64(task->status_code);
1147*40da501dSIsaku Yamahata 
1148*40da501dSIsaku Yamahata     /* Publish the response contents before marking this request completed. */
1149*40da501dSIsaku Yamahata     smp_wmb();
1150*40da501dSIsaku Yamahata     ret = address_space_write(&address_space_memory, task->buf_gpa,
1151*40da501dSIsaku Yamahata                               MEMTXATTRS_UNSPECIFIED, &task->hdr,
1152*40da501dSIsaku Yamahata                               TDX_GET_QUOTE_HDR_SIZE);
1153*40da501dSIsaku Yamahata     if (ret != MEMTX_OK) {
1154*40da501dSIsaku Yamahata         error_report("TDX: get-quote: failed to update GetQuote header.");
1155*40da501dSIsaku Yamahata     }
1156*40da501dSIsaku Yamahata 
1157*40da501dSIsaku Yamahata     g_free(task->send_data);
1158*40da501dSIsaku Yamahata     g_free(task->receive_buf);
1159*40da501dSIsaku Yamahata     g_free(task);
1160*40da501dSIsaku Yamahata     object_unref(tdx);
1161*40da501dSIsaku Yamahata }
1162*40da501dSIsaku Yamahata 
1163*40da501dSIsaku Yamahata void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run)
1164*40da501dSIsaku Yamahata {
1165*40da501dSIsaku Yamahata     TdxGenerateQuoteTask *task;
1166*40da501dSIsaku Yamahata     struct tdx_get_quote_header hdr;
1167*40da501dSIsaku Yamahata     hwaddr buf_gpa = run->tdx.get_quote.gpa;
1168*40da501dSIsaku Yamahata     uint64_t buf_len = run->tdx.get_quote.size;
1169*40da501dSIsaku Yamahata 
1170*40da501dSIsaku Yamahata     QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE);
1171*40da501dSIsaku Yamahata 
1172*40da501dSIsaku Yamahata     run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND;
1173*40da501dSIsaku Yamahata 
1174*40da501dSIsaku Yamahata     if (buf_len == 0) {
1175*40da501dSIsaku Yamahata         return;
1176*40da501dSIsaku Yamahata     }
1177*40da501dSIsaku Yamahata 
1178*40da501dSIsaku Yamahata     if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
1179*40da501dSIsaku Yamahata         run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR;
1180*40da501dSIsaku Yamahata         return;
1181*40da501dSIsaku Yamahata     }
1182*40da501dSIsaku Yamahata 
1183*40da501dSIsaku Yamahata     if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED,
1184*40da501dSIsaku Yamahata                            &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
1185*40da501dSIsaku Yamahata         error_report("TDX: get-quote: failed to read GetQuote header.");
1186*40da501dSIsaku Yamahata         return;
1187*40da501dSIsaku Yamahata     }
1188*40da501dSIsaku Yamahata 
1189*40da501dSIsaku Yamahata     if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
1190*40da501dSIsaku Yamahata         return;
1191*40da501dSIsaku Yamahata     }
1192*40da501dSIsaku Yamahata 
1193*40da501dSIsaku Yamahata     /* Only safe-guard check to avoid too large buffer size. */
1194*40da501dSIsaku Yamahata     if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
1195*40da501dSIsaku Yamahata         le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) {
1196*40da501dSIsaku Yamahata         return;
1197*40da501dSIsaku Yamahata     }
1198*40da501dSIsaku Yamahata 
1199*40da501dSIsaku Yamahata     if (!tdx_guest->qg_sock_addr) {
1200*40da501dSIsaku Yamahata         hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
1201*40da501dSIsaku Yamahata         if (address_space_write(&address_space_memory, buf_gpa,
1202*40da501dSIsaku Yamahata                                 MEMTXATTRS_UNSPECIFIED,
1203*40da501dSIsaku Yamahata                                 &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
1204*40da501dSIsaku Yamahata             error_report("TDX: failed to update GetQuote header.");
1205*40da501dSIsaku Yamahata             return;
1206*40da501dSIsaku Yamahata         }
1207*40da501dSIsaku Yamahata         run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS;
1208*40da501dSIsaku Yamahata         return;
1209*40da501dSIsaku Yamahata     }
1210*40da501dSIsaku Yamahata 
1211*40da501dSIsaku Yamahata     qemu_mutex_lock(&tdx_guest->lock);
1212*40da501dSIsaku Yamahata     if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) {
1213*40da501dSIsaku Yamahata         qemu_mutex_unlock(&tdx_guest->lock);
1214*40da501dSIsaku Yamahata         run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY;
1215*40da501dSIsaku Yamahata         return;
1216*40da501dSIsaku Yamahata     }
1217*40da501dSIsaku Yamahata     tdx_guest->num++;
1218*40da501dSIsaku Yamahata     qemu_mutex_unlock(&tdx_guest->lock);
1219*40da501dSIsaku Yamahata 
1220*40da501dSIsaku Yamahata     task = g_new(TdxGenerateQuoteTask, 1);
1221*40da501dSIsaku Yamahata     task->buf_gpa = buf_gpa;
1222*40da501dSIsaku Yamahata     task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE;
1223*40da501dSIsaku Yamahata     task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE;
1224*40da501dSIsaku Yamahata     task->hdr = hdr;
1225*40da501dSIsaku Yamahata     task->completion = tdx_get_quote_completion;
1226*40da501dSIsaku Yamahata 
1227*40da501dSIsaku Yamahata     task->send_data_size = le32_to_cpu(hdr.in_len);
1228*40da501dSIsaku Yamahata     task->send_data = g_malloc(task->send_data_size);
1229*40da501dSIsaku Yamahata     task->send_data_sent = 0;
1230*40da501dSIsaku Yamahata 
1231*40da501dSIsaku Yamahata     if (address_space_read(&address_space_memory, task->payload_gpa,
1232*40da501dSIsaku Yamahata                            MEMTXATTRS_UNSPECIFIED, task->send_data,
1233*40da501dSIsaku Yamahata                            task->send_data_size) != MEMTX_OK) {
1234*40da501dSIsaku Yamahata         goto out_free;
1235*40da501dSIsaku Yamahata     }
1236*40da501dSIsaku Yamahata 
1237*40da501dSIsaku Yamahata     /* Mark the buffer in-flight. */
1238*40da501dSIsaku Yamahata     hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
1239*40da501dSIsaku Yamahata     if (address_space_write(&address_space_memory, buf_gpa,
1240*40da501dSIsaku Yamahata                             MEMTXATTRS_UNSPECIFIED,
1241*40da501dSIsaku Yamahata                             &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
1242*40da501dSIsaku Yamahata         goto out_free;
1243*40da501dSIsaku Yamahata     }
1244*40da501dSIsaku Yamahata 
1245*40da501dSIsaku Yamahata     task->receive_buf = g_malloc0(task->payload_len);
1246*40da501dSIsaku Yamahata     task->receive_buf_received = 0;
1247*40da501dSIsaku Yamahata     task->opaque = tdx_guest;
1248*40da501dSIsaku Yamahata 
1249*40da501dSIsaku Yamahata     object_ref(tdx_guest);
1250*40da501dSIsaku Yamahata     tdx_generate_quote(task, tdx_guest->qg_sock_addr);
1251*40da501dSIsaku Yamahata     run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS;
1252*40da501dSIsaku Yamahata     return;
1253*40da501dSIsaku Yamahata 
1254*40da501dSIsaku Yamahata out_free:
1255*40da501dSIsaku Yamahata     g_free(task->send_data);
1256*40da501dSIsaku Yamahata     g_free(task);
1257*40da501dSIsaku Yamahata }
1258*40da501dSIsaku Yamahata 
1259427b8cf4SBinbin Wu void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
1260427b8cf4SBinbin Wu {
1261427b8cf4SBinbin Wu     if (run->tdx.get_tdvmcall_info.leaf != 1) {
1262427b8cf4SBinbin Wu 	return;
1263427b8cf4SBinbin Wu     }
1264427b8cf4SBinbin Wu 
1265*40da501dSIsaku Yamahata     run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE;
1266427b8cf4SBinbin Wu     run->tdx.get_tdvmcall_info.r12 = 0;
1267427b8cf4SBinbin Wu     run->tdx.get_tdvmcall_info.r13 = 0;
1268427b8cf4SBinbin Wu     run->tdx.get_tdvmcall_info.r14 = 0;
1269427b8cf4SBinbin Wu }
1270427b8cf4SBinbin Wu 
12716e250463SXiaoyao Li static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
12726e250463SXiaoyao Li                                         char *message, uint64_t gpa)
12736e250463SXiaoyao Li {
12746e250463SXiaoyao Li     GuestPanicInformation *panic_info;
12756e250463SXiaoyao Li 
12766e250463SXiaoyao Li     panic_info = g_new0(GuestPanicInformation, 1);
12776e250463SXiaoyao Li     panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
12786e250463SXiaoyao Li     panic_info->u.tdx.error_code = (uint32_t) error_code;
12796e250463SXiaoyao Li     panic_info->u.tdx.message = message;
12806e250463SXiaoyao Li     panic_info->u.tdx.gpa = gpa;
12816e250463SXiaoyao Li 
12826e250463SXiaoyao Li     qemu_system_guest_panicked(panic_info);
12836e250463SXiaoyao Li }
12846e250463SXiaoyao Li 
128598dbfd68SXiaoyao Li /*
128698dbfd68SXiaoyao Li  * Only 8 registers can contain valid ASCII byte stream to form the fatal
128798dbfd68SXiaoyao Li  * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
128898dbfd68SXiaoyao Li  */
128998dbfd68SXiaoyao Li #define TDX_FATAL_MESSAGE_MAX        64
129098dbfd68SXiaoyao Li 
12916e250463SXiaoyao Li #define TDX_REPORT_FATAL_ERROR_GPA_VALID    BIT_ULL(63)
12926e250463SXiaoyao Li 
129398dbfd68SXiaoyao Li int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
129498dbfd68SXiaoyao Li {
129598dbfd68SXiaoyao Li     uint64_t error_code = run->system_event.data[R_R12];
129698dbfd68SXiaoyao Li     uint64_t reg_mask = run->system_event.data[R_ECX];
129798dbfd68SXiaoyao Li     char *message = NULL;
129898dbfd68SXiaoyao Li     uint64_t *tmp;
12996e250463SXiaoyao Li     uint64_t gpa = -1ull;
130098dbfd68SXiaoyao Li 
130198dbfd68SXiaoyao Li     if (error_code & 0xffff) {
1302e7f926ebSCédric Le Goater         error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
130398dbfd68SXiaoyao Li                      error_code);
130498dbfd68SXiaoyao Li         return -1;
130598dbfd68SXiaoyao Li     }
130698dbfd68SXiaoyao Li 
130798dbfd68SXiaoyao Li     if (reg_mask) {
130898dbfd68SXiaoyao Li         message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
130998dbfd68SXiaoyao Li         tmp = (uint64_t *)message;
131098dbfd68SXiaoyao Li 
131198dbfd68SXiaoyao Li #define COPY_REG(REG)                               \
131298dbfd68SXiaoyao Li     do {                                            \
131398dbfd68SXiaoyao Li         if (reg_mask & BIT_ULL(REG)) {              \
131498dbfd68SXiaoyao Li             *(tmp++) = run->system_event.data[REG]; \
131598dbfd68SXiaoyao Li         }                                           \
131698dbfd68SXiaoyao Li     } while (0)
131798dbfd68SXiaoyao Li 
131898dbfd68SXiaoyao Li         COPY_REG(R_R14);
131998dbfd68SXiaoyao Li         COPY_REG(R_R15);
132098dbfd68SXiaoyao Li         COPY_REG(R_EBX);
132198dbfd68SXiaoyao Li         COPY_REG(R_EDI);
132298dbfd68SXiaoyao Li         COPY_REG(R_ESI);
132398dbfd68SXiaoyao Li         COPY_REG(R_R8);
132498dbfd68SXiaoyao Li         COPY_REG(R_R9);
132598dbfd68SXiaoyao Li         COPY_REG(R_EDX);
132698dbfd68SXiaoyao Li         *((char *)tmp) = '\0';
132798dbfd68SXiaoyao Li     }
132898dbfd68SXiaoyao Li #undef COPY_REG
132998dbfd68SXiaoyao Li 
13306e250463SXiaoyao Li     if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
13316e250463SXiaoyao Li         gpa = run->system_event.data[R_R13];
13326e250463SXiaoyao Li     }
13336e250463SXiaoyao Li 
13346e250463SXiaoyao Li     tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
13356e250463SXiaoyao Li 
133698dbfd68SXiaoyao Li     return -1;
133798dbfd68SXiaoyao Li }
133898dbfd68SXiaoyao Li 
13396016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
13406016e297SXiaoyao Li {
13416016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
13426016e297SXiaoyao Li 
13436016e297SXiaoyao Li     return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
13446016e297SXiaoyao Li }
13456016e297SXiaoyao Li 
13466016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
13476016e297SXiaoyao Li {
13486016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
13496016e297SXiaoyao Li 
13506016e297SXiaoyao Li     if (value) {
13516016e297SXiaoyao Li         tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
13526016e297SXiaoyao Li     } else {
13536016e297SXiaoyao Li         tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
13546016e297SXiaoyao Li     }
13556016e297SXiaoyao Li }
13566016e297SXiaoyao Li 
1357d05a0858SIsaku Yamahata static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
1358d05a0858SIsaku Yamahata {
1359d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1360d05a0858SIsaku Yamahata 
1361d05a0858SIsaku Yamahata     return g_strdup(tdx->mrconfigid);
1362d05a0858SIsaku Yamahata }
1363d05a0858SIsaku Yamahata 
1364d05a0858SIsaku Yamahata static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
1365d05a0858SIsaku Yamahata {
1366d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1367d05a0858SIsaku Yamahata 
1368d05a0858SIsaku Yamahata     g_free(tdx->mrconfigid);
1369d05a0858SIsaku Yamahata     tdx->mrconfigid = g_strdup(value);
1370d05a0858SIsaku Yamahata }
1371d05a0858SIsaku Yamahata 
1372d05a0858SIsaku Yamahata static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
1373d05a0858SIsaku Yamahata {
1374d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1375d05a0858SIsaku Yamahata 
1376d05a0858SIsaku Yamahata     return g_strdup(tdx->mrowner);
1377d05a0858SIsaku Yamahata }
1378d05a0858SIsaku Yamahata 
1379d05a0858SIsaku Yamahata static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
1380d05a0858SIsaku Yamahata {
1381d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1382d05a0858SIsaku Yamahata 
1383d05a0858SIsaku Yamahata     g_free(tdx->mrowner);
1384d05a0858SIsaku Yamahata     tdx->mrowner = g_strdup(value);
1385d05a0858SIsaku Yamahata }
1386d05a0858SIsaku Yamahata 
1387d05a0858SIsaku Yamahata static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
1388d05a0858SIsaku Yamahata {
1389d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1390d05a0858SIsaku Yamahata 
1391d05a0858SIsaku Yamahata     return g_strdup(tdx->mrownerconfig);
1392d05a0858SIsaku Yamahata }
1393d05a0858SIsaku Yamahata 
1394d05a0858SIsaku Yamahata static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
1395d05a0858SIsaku Yamahata {
1396d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1397d05a0858SIsaku Yamahata 
1398d05a0858SIsaku Yamahata     g_free(tdx->mrownerconfig);
1399d05a0858SIsaku Yamahata     tdx->mrownerconfig = g_strdup(value);
1400d05a0858SIsaku Yamahata }
1401d05a0858SIsaku Yamahata 
1402*40da501dSIsaku Yamahata static void tdx_guest_get_qgs(Object *obj, Visitor *v,
1403*40da501dSIsaku Yamahata                               const char *name, void *opaque,
1404*40da501dSIsaku Yamahata                               Error **errp)
1405*40da501dSIsaku Yamahata {
1406*40da501dSIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1407*40da501dSIsaku Yamahata 
1408*40da501dSIsaku Yamahata     if (!tdx->qg_sock_addr) {
1409*40da501dSIsaku Yamahata         error_setg(errp, "quote-generation-socket is not set");
1410*40da501dSIsaku Yamahata         return;
1411*40da501dSIsaku Yamahata     }
1412*40da501dSIsaku Yamahata     visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp);
1413*40da501dSIsaku Yamahata }
1414*40da501dSIsaku Yamahata 
1415*40da501dSIsaku Yamahata static void tdx_guest_set_qgs(Object *obj, Visitor *v,
1416*40da501dSIsaku Yamahata                               const char *name, void *opaque,
1417*40da501dSIsaku Yamahata                               Error **errp)
1418*40da501dSIsaku Yamahata {
1419*40da501dSIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
1420*40da501dSIsaku Yamahata     SocketAddress *sock = NULL;
1421*40da501dSIsaku Yamahata 
1422*40da501dSIsaku Yamahata     if (!visit_type_SocketAddress(v, name, &sock, errp)) {
1423*40da501dSIsaku Yamahata         return;
1424*40da501dSIsaku Yamahata     }
1425*40da501dSIsaku Yamahata 
1426*40da501dSIsaku Yamahata     if (tdx->qg_sock_addr) {
1427*40da501dSIsaku Yamahata         qapi_free_SocketAddress(tdx->qg_sock_addr);
1428*40da501dSIsaku Yamahata     }
1429*40da501dSIsaku Yamahata 
1430*40da501dSIsaku Yamahata     tdx->qg_sock_addr = sock;
1431*40da501dSIsaku Yamahata }
1432*40da501dSIsaku Yamahata 
1433756e12e7SXiaoyao Li /* tdx guest */
1434756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
1435756e12e7SXiaoyao Li                                    tdx_guest,
1436756e12e7SXiaoyao Li                                    TDX_GUEST,
1437756e12e7SXiaoyao Li                                    X86_CONFIDENTIAL_GUEST,
1438756e12e7SXiaoyao Li                                    { TYPE_USER_CREATABLE },
1439756e12e7SXiaoyao Li                                    { NULL })
1440756e12e7SXiaoyao Li 
1441756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj)
1442756e12e7SXiaoyao Li {
1443756e12e7SXiaoyao Li     ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
1444756e12e7SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
1445756e12e7SXiaoyao Li 
1446f15898b0SXiaoyao Li     qemu_mutex_init(&tdx->lock);
1447f15898b0SXiaoyao Li 
1448756e12e7SXiaoyao Li     cgs->require_guest_memfd = true;
1449714af522SIsaku Yamahata     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
1450756e12e7SXiaoyao Li 
1451756e12e7SXiaoyao Li     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
1452756e12e7SXiaoyao Li                                    OBJ_PROP_FLAG_READWRITE);
14536016e297SXiaoyao Li     object_property_add_bool(obj, "sept-ve-disable",
14546016e297SXiaoyao Li                              tdx_guest_get_sept_ve_disable,
14556016e297SXiaoyao Li                              tdx_guest_set_sept_ve_disable);
1456d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrconfigid",
1457d05a0858SIsaku Yamahata                             tdx_guest_get_mrconfigid,
1458d05a0858SIsaku Yamahata                             tdx_guest_set_mrconfigid);
1459d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrowner",
1460d05a0858SIsaku Yamahata                             tdx_guest_get_mrowner, tdx_guest_set_mrowner);
1461d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrownerconfig",
1462d05a0858SIsaku Yamahata                             tdx_guest_get_mrownerconfig,
1463d05a0858SIsaku Yamahata                             tdx_guest_set_mrownerconfig);
1464*40da501dSIsaku Yamahata 
1465*40da501dSIsaku Yamahata     object_property_add(obj, "quote-generation-socket", "SocketAddress",
1466*40da501dSIsaku Yamahata                             tdx_guest_get_qgs,
1467*40da501dSIsaku Yamahata                             tdx_guest_set_qgs,
1468*40da501dSIsaku Yamahata                             NULL, NULL);
1469*40da501dSIsaku Yamahata 
1470*40da501dSIsaku Yamahata     qemu_mutex_init(&tdx->lock);
1471756e12e7SXiaoyao Li }
1472756e12e7SXiaoyao Li 
1473756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj)
1474756e12e7SXiaoyao Li {
1475756e12e7SXiaoyao Li }
1476756e12e7SXiaoyao Li 
1477756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data)
1478756e12e7SXiaoyao Li {
1479631a2ac5SXiaoyao Li     ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
1480b455880eSXiaoyao Li     X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
1481b455880eSXiaoyao Li 
1482631a2ac5SXiaoyao Li     klass->kvm_init = tdx_kvm_init;
1483b455880eSXiaoyao Li     x86_klass->kvm_type = tdx_kvm_type;
14847c615242SXiaoyao Li     x86_klass->cpu_instance_init = tdx_cpu_instance_init;
148575ec6189SXiaoyao Li     x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
1486e3d1a4a6SXiaoyao Li     x86_klass->check_features = tdx_check_features;
1487756e12e7SXiaoyao Li }
1488