xref: /openbmc/qemu/target/i386/kvm/kvm.c (revision 2dc652961d6a9508d5db140765a0b22238165d88)
1a9dc68d9SClaudio Fontana /*
2a9dc68d9SClaudio Fontana  * QEMU KVM support
3a9dc68d9SClaudio Fontana  *
4a9dc68d9SClaudio Fontana  * Copyright (C) 2006-2008 Qumranet Technologies
5a9dc68d9SClaudio Fontana  * Copyright IBM, Corp. 2008
6a9dc68d9SClaudio Fontana  *
7a9dc68d9SClaudio Fontana  * Authors:
8a9dc68d9SClaudio Fontana  *  Anthony Liguori   <aliguori@us.ibm.com>
9a9dc68d9SClaudio Fontana  *
10a9dc68d9SClaudio Fontana  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11a9dc68d9SClaudio Fontana  * See the COPYING file in the top-level directory.
12a9dc68d9SClaudio Fontana  *
13a9dc68d9SClaudio Fontana  */
14a9dc68d9SClaudio Fontana 
15a9dc68d9SClaudio Fontana #include "qemu/osdep.h"
16a9dc68d9SClaudio Fontana #include "qapi/qapi-events-run-state.h"
17a9dc68d9SClaudio Fontana #include "qapi/error.h"
18e2e69f6bSChenyi Qiang #include "qapi/visitor.h"
190418f908SAnthony Harivel #include <math.h>
20a9dc68d9SClaudio Fontana #include <sys/ioctl.h>
21a9dc68d9SClaudio Fontana #include <sys/utsname.h>
2219db68caSYang Zhong #include <sys/syscall.h>
230418f908SAnthony Harivel #include <sys/resource.h>
240418f908SAnthony Harivel #include <sys/time.h>
25a9dc68d9SClaudio Fontana 
26a9dc68d9SClaudio Fontana #include <linux/kvm.h>
2747e76d03SMichael Roth #include <linux/kvm_para.h>
28a9dc68d9SClaudio Fontana #include "standard-headers/asm-x86/kvm_para.h"
29f66b8a83SJoao Martins #include "hw/xen/interface/arch-x86/cpuid.h"
30a9dc68d9SClaudio Fontana 
31a9dc68d9SClaudio Fontana #include "cpu.h"
32f5cc5a5cSClaudio Fontana #include "host-cpu.h"
330418f908SAnthony Harivel #include "vmsr_energy.h"
34a9dc68d9SClaudio Fontana #include "sysemu/sysemu.h"
35a9dc68d9SClaudio Fontana #include "sysemu/hw_accel.h"
36a9dc68d9SClaudio Fontana #include "sysemu/kvm_int.h"
37a9dc68d9SClaudio Fontana #include "sysemu/runstate.h"
38a9dc68d9SClaudio Fontana #include "kvm_i386.h"
39ee88612dSPaolo Bonzini #include "../confidential-guest.h"
4093777de3SPhilippe Mathieu-Daudé #include "sev.h"
4161491cf4SDavid Woodhouse #include "xen-emu.h"
42a9dc68d9SClaudio Fontana #include "hyperv.h"
43a9dc68d9SClaudio Fontana #include "hyperv-proto.h"
44a9dc68d9SClaudio Fontana 
455b7d54d4SAlex Bennée #include "gdbstub/enums.h"
46a9dc68d9SClaudio Fontana #include "qemu/host-utils.h"
47a9dc68d9SClaudio Fontana #include "qemu/main-loop.h"
48e2c1c34fSMarkus Armbruster #include "qemu/ratelimit.h"
49a9dc68d9SClaudio Fontana #include "qemu/config-file.h"
50a9dc68d9SClaudio Fontana #include "qemu/error-report.h"
515df022cfSPeter Maydell #include "qemu/memalign.h"
52a9dc68d9SClaudio Fontana #include "hw/i386/x86.h"
536096cf78SDavid Woodhouse #include "hw/i386/kvm/xen_evtchn.h"
5461491cf4SDavid Woodhouse #include "hw/i386/pc.h"
55a9dc68d9SClaudio Fontana #include "hw/i386/apic.h"
56a9dc68d9SClaudio Fontana #include "hw/i386/apic_internal.h"
57a9dc68d9SClaudio Fontana #include "hw/i386/apic-msidef.h"
58a9dc68d9SClaudio Fontana #include "hw/i386/intel_iommu.h"
596ddeb0ecSZhao Liu #include "hw/i386/topology.h"
60a9dc68d9SClaudio Fontana #include "hw/i386/x86-iommu.h"
61a9dc68d9SClaudio Fontana #include "hw/i386/e820_memory_layout.h"
62a9dc68d9SClaudio Fontana 
6361491cf4SDavid Woodhouse #include "hw/xen/xen.h"
6461491cf4SDavid Woodhouse 
65a9dc68d9SClaudio Fontana #include "hw/pci/pci.h"
66a9dc68d9SClaudio Fontana #include "hw/pci/msi.h"
67a9dc68d9SClaudio Fontana #include "hw/pci/msix.h"
68a9dc68d9SClaudio Fontana #include "migration/blocker.h"
69a9dc68d9SClaudio Fontana #include "exec/memattrs.h"
70a9dc68d9SClaudio Fontana #include "trace.h"
71a9dc68d9SClaudio Fontana 
72d8701185SJon Doron #include CONFIG_DEVICES
73d8701185SJon Doron 
74a9dc68d9SClaudio Fontana //#define DEBUG_KVM
75a9dc68d9SClaudio Fontana 
76a9dc68d9SClaudio Fontana #ifdef DEBUG_KVM
77a9dc68d9SClaudio Fontana #define DPRINTF(fmt, ...) \
78a9dc68d9SClaudio Fontana     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
79a9dc68d9SClaudio Fontana #else
80a9dc68d9SClaudio Fontana #define DPRINTF(fmt, ...) \
81a9dc68d9SClaudio Fontana     do { } while (0)
82a9dc68d9SClaudio Fontana #endif
83a9dc68d9SClaudio Fontana 
84dc448549SPaolo Bonzini /*
85dc448549SPaolo Bonzini  * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly.
86dc448549SPaolo Bonzini  * In order to use vm86 mode, an EPT identity map and a TSS  are needed.
87dc448549SPaolo Bonzini  * Since these must be part of guest physical memory, we need to allocate
88dc448549SPaolo Bonzini  * them, both by setting their start addresses in the kernel and by
89dc448549SPaolo Bonzini  * creating a corresponding e820 entry. We need 4 pages before the BIOS,
90dc448549SPaolo Bonzini  * so this value allows up to 16M BIOSes.
91dc448549SPaolo Bonzini  */
92dc448549SPaolo Bonzini #define KVM_IDENTITY_BASE 0xfeffc000
93dc448549SPaolo Bonzini 
94a9dc68d9SClaudio Fontana /* From arch/x86/kvm/lapic.h */
95a9dc68d9SClaudio Fontana #define KVM_APIC_BUS_CYCLE_NS       1
96a9dc68d9SClaudio Fontana #define KVM_APIC_BUS_FREQUENCY      (1000000000ULL / KVM_APIC_BUS_CYCLE_NS)
97a9dc68d9SClaudio Fontana 
98a9dc68d9SClaudio Fontana #define MSR_KVM_WALL_CLOCK  0x11
99a9dc68d9SClaudio Fontana #define MSR_KVM_SYSTEM_TIME 0x12
100a9dc68d9SClaudio Fontana 
101a9dc68d9SClaudio Fontana /* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus
102a9dc68d9SClaudio Fontana  * 255 kvm_msr_entry structs */
103a9dc68d9SClaudio Fontana #define MSR_BUF_SIZE 4096
104a9dc68d9SClaudio Fontana 
105ed2880f4SAni Sinha typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
106ed2880f4SAni Sinha typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
107ed2880f4SAni Sinha typedef struct {
108ed2880f4SAni Sinha     uint32_t msr;
109ed2880f4SAni Sinha     QEMURDMSRHandler *rdmsr;
110ed2880f4SAni Sinha     QEMUWRMSRHandler *wrmsr;
111ed2880f4SAni Sinha } KVMMSRHandlers;
112ed2880f4SAni Sinha 
113a9dc68d9SClaudio Fontana static void kvm_init_msrs(X86CPU *cpu);
114ed2880f4SAni Sinha static bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
115ed2880f4SAni Sinha                            QEMUWRMSRHandler *wrmsr);
116a9dc68d9SClaudio Fontana 
117a9dc68d9SClaudio Fontana const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
118a9dc68d9SClaudio Fontana     KVM_CAP_INFO(SET_TSS_ADDR),
119a9dc68d9SClaudio Fontana     KVM_CAP_INFO(EXT_CPUID),
120a9dc68d9SClaudio Fontana     KVM_CAP_INFO(MP_STATE),
121cc5e719eSPaolo Bonzini     KVM_CAP_INFO(SIGNAL_MSI),
1224b299166SPaolo Bonzini     KVM_CAP_INFO(IRQ_ROUTING),
123f57a4dd3SPaolo Bonzini     KVM_CAP_INFO(DEBUGREGS),
1248bba0a3bSPaolo Bonzini     KVM_CAP_INFO(XSAVE),
1251a44a79dSPaolo Bonzini     KVM_CAP_INFO(VCPU_EVENTS),
1261a44a79dSPaolo Bonzini     KVM_CAP_INFO(X86_ROBUST_SINGLESTEP),
12786f2438fSPaolo Bonzini     KVM_CAP_INFO(MCE),
128700766baSPaolo Bonzini     KVM_CAP_INFO(ADJUST_CLOCK),
12952b04ea4SPaolo Bonzini     KVM_CAP_INFO(SET_IDENTITY_MAP_ADDR),
130a9dc68d9SClaudio Fontana     KVM_CAP_LAST_INFO
131a9dc68d9SClaudio Fontana };
132a9dc68d9SClaudio Fontana 
133a9dc68d9SClaudio Fontana static bool has_msr_star;
134a9dc68d9SClaudio Fontana static bool has_msr_hsave_pa;
135a9dc68d9SClaudio Fontana static bool has_msr_tsc_aux;
136a9dc68d9SClaudio Fontana static bool has_msr_tsc_adjust;
137a9dc68d9SClaudio Fontana static bool has_msr_tsc_deadline;
138a9dc68d9SClaudio Fontana static bool has_msr_feature_control;
139a9dc68d9SClaudio Fontana static bool has_msr_misc_enable;
140a9dc68d9SClaudio Fontana static bool has_msr_smbase;
141a9dc68d9SClaudio Fontana static bool has_msr_bndcfgs;
142a9dc68d9SClaudio Fontana static int lm_capable_kernel;
143a9dc68d9SClaudio Fontana static bool has_msr_hv_hypercall;
144a9dc68d9SClaudio Fontana static bool has_msr_hv_crash;
145a9dc68d9SClaudio Fontana static bool has_msr_hv_reset;
146a9dc68d9SClaudio Fontana static bool has_msr_hv_vpindex;
147a9dc68d9SClaudio Fontana static bool hv_vpindex_settable;
148a9dc68d9SClaudio Fontana static bool has_msr_hv_runtime;
149a9dc68d9SClaudio Fontana static bool has_msr_hv_synic;
150a9dc68d9SClaudio Fontana static bool has_msr_hv_stimer;
151a9dc68d9SClaudio Fontana static bool has_msr_hv_frequencies;
152a9dc68d9SClaudio Fontana static bool has_msr_hv_reenlightenment;
15373d24074SJon Doron static bool has_msr_hv_syndbg_options;
154a9dc68d9SClaudio Fontana static bool has_msr_xss;
155a9dc68d9SClaudio Fontana static bool has_msr_umwait;
156a9dc68d9SClaudio Fontana static bool has_msr_spec_ctrl;
157cabf9862SMaxim Levitsky static bool has_tsc_scale_msr;
158a9dc68d9SClaudio Fontana static bool has_msr_tsx_ctrl;
159a9dc68d9SClaudio Fontana static bool has_msr_virt_ssbd;
160a9dc68d9SClaudio Fontana static bool has_msr_smi_count;
161a9dc68d9SClaudio Fontana static bool has_msr_arch_capabs;
162a9dc68d9SClaudio Fontana static bool has_msr_core_capabs;
163a9dc68d9SClaudio Fontana static bool has_msr_vmx_vmfunc;
164a9dc68d9SClaudio Fontana static bool has_msr_ucode_rev;
165a9dc68d9SClaudio Fontana static bool has_msr_vmx_procbased_ctls2;
166a9dc68d9SClaudio Fontana static bool has_msr_perf_capabs;
1676aa4228bSChenyi Qiang static bool has_msr_pkrs;
168b5151aceSGao Shiyuan static bool has_msr_hwcr;
169a9dc68d9SClaudio Fontana 
170a9dc68d9SClaudio Fontana static uint32_t has_architectural_pmu_version;
171a9dc68d9SClaudio Fontana static uint32_t num_architectural_pmu_gp_counters;
172a9dc68d9SClaudio Fontana static uint32_t num_architectural_pmu_fixed_counters;
173a9dc68d9SClaudio Fontana 
174e56dd3c7SJing Liu static int has_xsave2;
175a9dc68d9SClaudio Fontana static int has_xcrs;
1768f515d38SMaxim Levitsky static int has_sregs2;
177a9dc68d9SClaudio Fontana static int has_exception_payload;
17812f89a39SChenyi Qiang static int has_triple_fault_event;
179a9dc68d9SClaudio Fontana 
180a9dc68d9SClaudio Fontana static bool has_msr_mcg_ext_ctl;
181a9dc68d9SClaudio Fontana 
182a9dc68d9SClaudio Fontana static struct kvm_cpuid2 *cpuid_cache;
183a8439be6SVitaly Kuznetsov static struct kvm_cpuid2 *hv_cpuid_cache;
184a9dc68d9SClaudio Fontana static struct kvm_msr_list *kvm_feature_msrs;
185a9dc68d9SClaudio Fontana 
186860054d8SAlexander Graf static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES];
187860054d8SAlexander Graf 
188035d1ef2SChenyi Qiang #define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */
189035d1ef2SChenyi Qiang static RateLimit bus_lock_ratelimit_ctrl;
1905a778a5fSYang Weijiang static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);
191035d1ef2SChenyi Qiang 
192ee88612dSPaolo Bonzini static const char *vm_type_name[] = {
193ee88612dSPaolo Bonzini     [KVM_X86_DEFAULT_VM] = "default",
194663e2f44SPaolo Bonzini     [KVM_X86_SEV_VM] = "SEV",
195663e2f44SPaolo Bonzini     [KVM_X86_SEV_ES_VM] = "SEV-ES",
196a808132fSPaolo Bonzini     [KVM_X86_SNP_VM] = "SEV-SNP",
197ee88612dSPaolo Bonzini };
198ee88612dSPaolo Bonzini 
kvm_is_vm_type_supported(int type)199ee88612dSPaolo Bonzini bool kvm_is_vm_type_supported(int type)
200ee88612dSPaolo Bonzini {
201ee88612dSPaolo Bonzini     uint32_t machine_types;
202ee88612dSPaolo Bonzini 
203ee88612dSPaolo Bonzini     /*
204ee88612dSPaolo Bonzini      * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM
205ee88612dSPaolo Bonzini      * is always supported
206ee88612dSPaolo Bonzini      */
207ee88612dSPaolo Bonzini     if (type == KVM_X86_DEFAULT_VM) {
208ee88612dSPaolo Bonzini         return true;
209ee88612dSPaolo Bonzini     }
210ee88612dSPaolo Bonzini 
211ee88612dSPaolo Bonzini     machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator),
212ee88612dSPaolo Bonzini                                         KVM_CAP_VM_TYPES);
213ee88612dSPaolo Bonzini     return !!(machine_types & BIT(type));
214ee88612dSPaolo Bonzini }
215ee88612dSPaolo Bonzini 
kvm_get_vm_type(MachineState * ms)216ee88612dSPaolo Bonzini int kvm_get_vm_type(MachineState *ms)
217ee88612dSPaolo Bonzini {
218ee88612dSPaolo Bonzini     int kvm_type = KVM_X86_DEFAULT_VM;
219ee88612dSPaolo Bonzini 
220ee88612dSPaolo Bonzini     if (ms->cgs) {
221ee88612dSPaolo Bonzini         if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) {
222ee88612dSPaolo Bonzini             error_report("configuration type %s not supported for x86 guests",
223ee88612dSPaolo Bonzini                          object_get_typename(OBJECT(ms->cgs)));
224ee88612dSPaolo Bonzini             exit(1);
225ee88612dSPaolo Bonzini         }
226ee88612dSPaolo Bonzini         kvm_type = x86_confidential_guest_kvm_type(
227ee88612dSPaolo Bonzini             X86_CONFIDENTIAL_GUEST(ms->cgs));
228ee88612dSPaolo Bonzini     }
229ee88612dSPaolo Bonzini 
230ee88612dSPaolo Bonzini     if (!kvm_is_vm_type_supported(kvm_type)) {
231ee88612dSPaolo Bonzini         error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]);
232ee88612dSPaolo Bonzini         exit(1);
233ee88612dSPaolo Bonzini     }
234ee88612dSPaolo Bonzini 
235ee88612dSPaolo Bonzini     return kvm_type;
236ee88612dSPaolo Bonzini }
237ee88612dSPaolo Bonzini 
kvm_enable_hypercall(uint64_t enable_mask)23847e76d03SMichael Roth bool kvm_enable_hypercall(uint64_t enable_mask)
23947e76d03SMichael Roth {
24047e76d03SMichael Roth     KVMState *s = KVM_STATE(current_accel());
24147e76d03SMichael Roth 
24247e76d03SMichael Roth     return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask);
24347e76d03SMichael Roth }
24447e76d03SMichael Roth 
kvm_has_smm(void)245a9dc68d9SClaudio Fontana bool kvm_has_smm(void)
246a9dc68d9SClaudio Fontana {
24723edf8b5STom Lendacky     return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
248a9dc68d9SClaudio Fontana }
249a9dc68d9SClaudio Fontana 
kvm_has_adjust_clock_stable(void)250a9dc68d9SClaudio Fontana bool kvm_has_adjust_clock_stable(void)
251a9dc68d9SClaudio Fontana {
252a9dc68d9SClaudio Fontana     int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
253a9dc68d9SClaudio Fontana 
254c4ef867fSRay Zhang     return (ret & KVM_CLOCK_TSC_STABLE);
255a9dc68d9SClaudio Fontana }
256a9dc68d9SClaudio Fontana 
kvm_has_exception_payload(void)257a9dc68d9SClaudio Fontana bool kvm_has_exception_payload(void)
258a9dc68d9SClaudio Fontana {
259a9dc68d9SClaudio Fontana     return has_exception_payload;
260a9dc68d9SClaudio Fontana }
261a9dc68d9SClaudio Fontana 
kvm_x2apic_api_set_flags(uint64_t flags)262a9dc68d9SClaudio Fontana static bool kvm_x2apic_api_set_flags(uint64_t flags)
263a9dc68d9SClaudio Fontana {
264a9dc68d9SClaudio Fontana     KVMState *s = KVM_STATE(current_accel());
265a9dc68d9SClaudio Fontana 
266a9dc68d9SClaudio Fontana     return !kvm_vm_enable_cap(s, KVM_CAP_X2APIC_API, 0, flags);
267a9dc68d9SClaudio Fontana }
268a9dc68d9SClaudio Fontana 
269a9dc68d9SClaudio Fontana #define MEMORIZE(fn, _result) \
270a9dc68d9SClaudio Fontana     ({ \
271a9dc68d9SClaudio Fontana         static bool _memorized; \
272a9dc68d9SClaudio Fontana         \
273a9dc68d9SClaudio Fontana         if (_memorized) { \
274a9dc68d9SClaudio Fontana             return _result; \
275a9dc68d9SClaudio Fontana         } \
276a9dc68d9SClaudio Fontana         _memorized = true; \
277a9dc68d9SClaudio Fontana         _result = fn; \
278a9dc68d9SClaudio Fontana     })
279a9dc68d9SClaudio Fontana 
280a9dc68d9SClaudio Fontana static bool has_x2apic_api;
281a9dc68d9SClaudio Fontana 
kvm_has_x2apic_api(void)282a9dc68d9SClaudio Fontana bool kvm_has_x2apic_api(void)
283a9dc68d9SClaudio Fontana {
284a9dc68d9SClaudio Fontana     return has_x2apic_api;
285a9dc68d9SClaudio Fontana }
286a9dc68d9SClaudio Fontana 
kvm_enable_x2apic(void)287a9dc68d9SClaudio Fontana bool kvm_enable_x2apic(void)
288a9dc68d9SClaudio Fontana {
289a9dc68d9SClaudio Fontana     return MEMORIZE(
290a9dc68d9SClaudio Fontana              kvm_x2apic_api_set_flags(KVM_X2APIC_API_USE_32BIT_IDS |
291a9dc68d9SClaudio Fontana                                       KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK),
292a9dc68d9SClaudio Fontana              has_x2apic_api);
293a9dc68d9SClaudio Fontana }
294a9dc68d9SClaudio Fontana 
kvm_hv_vpindex_settable(void)295a9dc68d9SClaudio Fontana bool kvm_hv_vpindex_settable(void)
296a9dc68d9SClaudio Fontana {
297a9dc68d9SClaudio Fontana     return hv_vpindex_settable;
298a9dc68d9SClaudio Fontana }
299a9dc68d9SClaudio Fontana 
kvm_get_tsc(CPUState * cs)300a9dc68d9SClaudio Fontana static int kvm_get_tsc(CPUState *cs)
301a9dc68d9SClaudio Fontana {
302a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
303a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3045a778a5fSYang Weijiang     uint64_t value;
305a9dc68d9SClaudio Fontana     int ret;
306a9dc68d9SClaudio Fontana 
307a9dc68d9SClaudio Fontana     if (env->tsc_valid) {
308a9dc68d9SClaudio Fontana         return 0;
309a9dc68d9SClaudio Fontana     }
310a9dc68d9SClaudio Fontana 
311a9dc68d9SClaudio Fontana     env->tsc_valid = !runstate_is_running();
312a9dc68d9SClaudio Fontana 
3135a778a5fSYang Weijiang     ret = kvm_get_one_msr(cpu, MSR_IA32_TSC, &value);
314a9dc68d9SClaudio Fontana     if (ret < 0) {
315a9dc68d9SClaudio Fontana         return ret;
316a9dc68d9SClaudio Fontana     }
317a9dc68d9SClaudio Fontana 
3185a778a5fSYang Weijiang     env->tsc = value;
319a9dc68d9SClaudio Fontana     return 0;
320a9dc68d9SClaudio Fontana }
321a9dc68d9SClaudio Fontana 
do_kvm_synchronize_tsc(CPUState * cpu,run_on_cpu_data arg)322a9dc68d9SClaudio Fontana static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg)
323a9dc68d9SClaudio Fontana {
324a9dc68d9SClaudio Fontana     kvm_get_tsc(cpu);
325a9dc68d9SClaudio Fontana }
326a9dc68d9SClaudio Fontana 
kvm_synchronize_all_tsc(void)327a9dc68d9SClaudio Fontana void kvm_synchronize_all_tsc(void)
328a9dc68d9SClaudio Fontana {
329a9dc68d9SClaudio Fontana     CPUState *cpu;
330a9dc68d9SClaudio Fontana 
331a9dc68d9SClaudio Fontana     if (kvm_enabled()) {
332a9dc68d9SClaudio Fontana         CPU_FOREACH(cpu) {
333a9dc68d9SClaudio Fontana             run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
334a9dc68d9SClaudio Fontana         }
335a9dc68d9SClaudio Fontana     }
336a9dc68d9SClaudio Fontana }
337a9dc68d9SClaudio Fontana 
try_get_cpuid(KVMState * s,int max)338a9dc68d9SClaudio Fontana static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
339a9dc68d9SClaudio Fontana {
340a9dc68d9SClaudio Fontana     struct kvm_cpuid2 *cpuid;
341a9dc68d9SClaudio Fontana     int r, size;
342a9dc68d9SClaudio Fontana 
343a9dc68d9SClaudio Fontana     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
344a9dc68d9SClaudio Fontana     cpuid = g_malloc0(size);
345a9dc68d9SClaudio Fontana     cpuid->nent = max;
346a9dc68d9SClaudio Fontana     r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
347a9dc68d9SClaudio Fontana     if (r == 0 && cpuid->nent >= max) {
348a9dc68d9SClaudio Fontana         r = -E2BIG;
349a9dc68d9SClaudio Fontana     }
350a9dc68d9SClaudio Fontana     if (r < 0) {
351a9dc68d9SClaudio Fontana         if (r == -E2BIG) {
352a9dc68d9SClaudio Fontana             g_free(cpuid);
353a9dc68d9SClaudio Fontana             return NULL;
354a9dc68d9SClaudio Fontana         } else {
355a9dc68d9SClaudio Fontana             fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
356a9dc68d9SClaudio Fontana                     strerror(-r));
357a9dc68d9SClaudio Fontana             exit(1);
358a9dc68d9SClaudio Fontana         }
359a9dc68d9SClaudio Fontana     }
360a9dc68d9SClaudio Fontana     return cpuid;
361a9dc68d9SClaudio Fontana }
362a9dc68d9SClaudio Fontana 
363a9dc68d9SClaudio Fontana /* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough
364a9dc68d9SClaudio Fontana  * for all entries.
365a9dc68d9SClaudio Fontana  */
get_supported_cpuid(KVMState * s)366a9dc68d9SClaudio Fontana static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
367a9dc68d9SClaudio Fontana {
368a9dc68d9SClaudio Fontana     struct kvm_cpuid2 *cpuid;
369a9dc68d9SClaudio Fontana     int max = 1;
370a9dc68d9SClaudio Fontana 
371a9dc68d9SClaudio Fontana     if (cpuid_cache != NULL) {
372a9dc68d9SClaudio Fontana         return cpuid_cache;
373a9dc68d9SClaudio Fontana     }
374a9dc68d9SClaudio Fontana     while ((cpuid = try_get_cpuid(s, max)) == NULL) {
375a9dc68d9SClaudio Fontana         max *= 2;
376a9dc68d9SClaudio Fontana     }
377a9dc68d9SClaudio Fontana     cpuid_cache = cpuid;
378a9dc68d9SClaudio Fontana     return cpuid;
379a9dc68d9SClaudio Fontana }
380a9dc68d9SClaudio Fontana 
host_tsx_broken(void)381a9dc68d9SClaudio Fontana static bool host_tsx_broken(void)
382a9dc68d9SClaudio Fontana {
383a9dc68d9SClaudio Fontana     int family, model, stepping;\
384a9dc68d9SClaudio Fontana     char vendor[CPUID_VENDOR_SZ + 1];
385a9dc68d9SClaudio Fontana 
386f5cc5a5cSClaudio Fontana     host_cpu_vendor_fms(vendor, &family, &model, &stepping);
387a9dc68d9SClaudio Fontana 
388a9dc68d9SClaudio Fontana     /* Check if we are running on a Haswell host known to have broken TSX */
389a9dc68d9SClaudio Fontana     return !strcmp(vendor, CPUID_VENDOR_INTEL) &&
390a9dc68d9SClaudio Fontana            (family == 6) &&
391a9dc68d9SClaudio Fontana            ((model == 63 && stepping < 4) ||
392a9dc68d9SClaudio Fontana             model == 60 || model == 69 || model == 70);
393a9dc68d9SClaudio Fontana }
394a9dc68d9SClaudio Fontana 
395a9dc68d9SClaudio Fontana /* Returns the value for a specific register on the cpuid entry
396a9dc68d9SClaudio Fontana  */
cpuid_entry_get_reg(struct kvm_cpuid_entry2 * entry,int reg)397a9dc68d9SClaudio Fontana static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
398a9dc68d9SClaudio Fontana {
399a9dc68d9SClaudio Fontana     uint32_t ret = 0;
400a9dc68d9SClaudio Fontana     switch (reg) {
401a9dc68d9SClaudio Fontana     case R_EAX:
402a9dc68d9SClaudio Fontana         ret = entry->eax;
403a9dc68d9SClaudio Fontana         break;
404a9dc68d9SClaudio Fontana     case R_EBX:
405a9dc68d9SClaudio Fontana         ret = entry->ebx;
406a9dc68d9SClaudio Fontana         break;
407a9dc68d9SClaudio Fontana     case R_ECX:
408a9dc68d9SClaudio Fontana         ret = entry->ecx;
409a9dc68d9SClaudio Fontana         break;
410a9dc68d9SClaudio Fontana     case R_EDX:
411a9dc68d9SClaudio Fontana         ret = entry->edx;
412a9dc68d9SClaudio Fontana         break;
413a9dc68d9SClaudio Fontana     }
414a9dc68d9SClaudio Fontana     return ret;
415a9dc68d9SClaudio Fontana }
416a9dc68d9SClaudio Fontana 
417a9dc68d9SClaudio Fontana /* Find matching entry for function/index on kvm_cpuid2 struct
418a9dc68d9SClaudio Fontana  */
cpuid_find_entry(struct kvm_cpuid2 * cpuid,uint32_t function,uint32_t index)419a9dc68d9SClaudio Fontana static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
420a9dc68d9SClaudio Fontana                                                  uint32_t function,
421a9dc68d9SClaudio Fontana                                                  uint32_t index)
422a9dc68d9SClaudio Fontana {
423a9dc68d9SClaudio Fontana     int i;
424a9dc68d9SClaudio Fontana     for (i = 0; i < cpuid->nent; ++i) {
425a9dc68d9SClaudio Fontana         if (cpuid->entries[i].function == function &&
426a9dc68d9SClaudio Fontana             cpuid->entries[i].index == index) {
427a9dc68d9SClaudio Fontana             return &cpuid->entries[i];
428a9dc68d9SClaudio Fontana         }
429a9dc68d9SClaudio Fontana     }
430a9dc68d9SClaudio Fontana     /* not found: */
431a9dc68d9SClaudio Fontana     return NULL;
432a9dc68d9SClaudio Fontana }
433a9dc68d9SClaudio Fontana 
kvm_arch_get_supported_cpuid(KVMState * s,uint32_t function,uint32_t index,int reg)434a9dc68d9SClaudio Fontana uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
435a9dc68d9SClaudio Fontana                                       uint32_t index, int reg)
436a9dc68d9SClaudio Fontana {
437a9dc68d9SClaudio Fontana     struct kvm_cpuid2 *cpuid;
438a9dc68d9SClaudio Fontana     uint32_t ret = 0;
4393023c9b4SPaolo Bonzini     uint32_t cpuid_1_edx, unused;
44019db68caSYang Zhong     uint64_t bitmask;
441a9dc68d9SClaudio Fontana 
442a9dc68d9SClaudio Fontana     cpuid = get_supported_cpuid(s);
443a9dc68d9SClaudio Fontana 
444a9dc68d9SClaudio Fontana     struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index);
445a9dc68d9SClaudio Fontana     if (entry) {
446a9dc68d9SClaudio Fontana         ret = cpuid_entry_get_reg(entry, reg);
447a9dc68d9SClaudio Fontana     }
448a9dc68d9SClaudio Fontana 
449a9dc68d9SClaudio Fontana     /* Fixups for the data returned by KVM, below */
450a9dc68d9SClaudio Fontana 
451a9dc68d9SClaudio Fontana     if (function == 1 && reg == R_EDX) {
452a9dc68d9SClaudio Fontana         /* KVM before 2.6.30 misreports the following features */
453a9dc68d9SClaudio Fontana         ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
45483629b14SXiaoyao Li         /* KVM never reports CPUID_HT but QEMU can support when vcpus > 1 */
45583629b14SXiaoyao Li         ret |= CPUID_HT;
456a9dc68d9SClaudio Fontana     } else if (function == 1 && reg == R_ECX) {
457a9dc68d9SClaudio Fontana         /* We can set the hypervisor flag, even if KVM does not return it on
458a9dc68d9SClaudio Fontana          * GET_SUPPORTED_CPUID
459a9dc68d9SClaudio Fontana          */
460a9dc68d9SClaudio Fontana         ret |= CPUID_EXT_HYPERVISOR;
461a9dc68d9SClaudio Fontana         /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it
462a9dc68d9SClaudio Fontana          * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER,
463a9dc68d9SClaudio Fontana          * and the irqchip is in the kernel.
464a9dc68d9SClaudio Fontana          */
465a9dc68d9SClaudio Fontana         if (kvm_irqchip_in_kernel() &&
466a9dc68d9SClaudio Fontana                 kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
467a9dc68d9SClaudio Fontana             ret |= CPUID_EXT_TSC_DEADLINE_TIMER;
468a9dc68d9SClaudio Fontana         }
469a9dc68d9SClaudio Fontana 
470a9dc68d9SClaudio Fontana         /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled
471a9dc68d9SClaudio Fontana          * without the in-kernel irqchip
472a9dc68d9SClaudio Fontana          */
473a9dc68d9SClaudio Fontana         if (!kvm_irqchip_in_kernel()) {
474a9dc68d9SClaudio Fontana             ret &= ~CPUID_EXT_X2APIC;
475a9dc68d9SClaudio Fontana         }
476a9dc68d9SClaudio Fontana 
477a9dc68d9SClaudio Fontana         if (enable_cpu_pm) {
478a9dc68d9SClaudio Fontana             int disable_exits = kvm_check_extension(s,
479a9dc68d9SClaudio Fontana                                                     KVM_CAP_X86_DISABLE_EXITS);
480a9dc68d9SClaudio Fontana 
481a9dc68d9SClaudio Fontana             if (disable_exits & KVM_X86_DISABLE_EXITS_MWAIT) {
482a9dc68d9SClaudio Fontana                 ret |= CPUID_EXT_MONITOR;
483a9dc68d9SClaudio Fontana             }
484a9dc68d9SClaudio Fontana         }
485a9dc68d9SClaudio Fontana     } else if (function == 6 && reg == R_EAX) {
486a9dc68d9SClaudio Fontana         ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
487a9dc68d9SClaudio Fontana     } else if (function == 7 && index == 0 && reg == R_EBX) {
4883023c9b4SPaolo Bonzini         /* Not new instructions, just an optimization.  */
4893023c9b4SPaolo Bonzini         uint32_t ebx;
4903023c9b4SPaolo Bonzini         host_cpuid(7, 0, &unused, &ebx, &unused, &unused);
4913023c9b4SPaolo Bonzini         ret |= ebx & CPUID_7_0_EBX_ERMS;
4923023c9b4SPaolo Bonzini 
493a9dc68d9SClaudio Fontana         if (host_tsx_broken()) {
494a9dc68d9SClaudio Fontana             ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE);
495a9dc68d9SClaudio Fontana         }
496a9dc68d9SClaudio Fontana     } else if (function == 7 && index == 0 && reg == R_EDX) {
4973023c9b4SPaolo Bonzini         /* Not new instructions, just an optimization.  */
4983023c9b4SPaolo Bonzini         uint32_t edx;
4993023c9b4SPaolo Bonzini         host_cpuid(7, 0, &unused, &unused, &unused, &edx);
5003023c9b4SPaolo Bonzini         ret |= edx & CPUID_7_0_EDX_FSRM;
5013023c9b4SPaolo Bonzini 
502a9dc68d9SClaudio Fontana         /*
503a9dc68d9SClaudio Fontana          * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts.
504a9dc68d9SClaudio Fontana          * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is
505a9dc68d9SClaudio Fontana          * returned by KVM_GET_MSR_INDEX_LIST.
506a9dc68d9SClaudio Fontana          */
507a9dc68d9SClaudio Fontana         if (!has_msr_arch_capabs) {
508a9dc68d9SClaudio Fontana             ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
509a9dc68d9SClaudio Fontana         }
5103023c9b4SPaolo Bonzini     } else if (function == 7 && index == 1 && reg == R_EAX) {
5113023c9b4SPaolo Bonzini         /* Not new instructions, just an optimization.  */
5123023c9b4SPaolo Bonzini         uint32_t eax;
5133023c9b4SPaolo Bonzini         host_cpuid(7, 1, &eax, &unused, &unused, &unused);
5143023c9b4SPaolo Bonzini         ret |= eax & (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC);
515ba3709feSTao Su     } else if (function == 7 && index == 2 && reg == R_EDX) {
516ba3709feSTao Su         uint32_t edx;
517ba3709feSTao Su         host_cpuid(7, 2, &unused, &unused, &unused, &edx);
518ba3709feSTao Su         ret |= edx & CPUID_7_2_EDX_MCDT_NO;
51919db68caSYang Zhong     } else if (function == 0xd && index == 0 &&
52019db68caSYang Zhong                (reg == R_EAX || reg == R_EDX)) {
5213ec5ad40SPaolo Bonzini         /*
5223ec5ad40SPaolo Bonzini          * The value returned by KVM_GET_SUPPORTED_CPUID does not include
5233ec5ad40SPaolo Bonzini          * features that still have to be enabled with the arch_prctl
5243ec5ad40SPaolo Bonzini          * system call.  QEMU needs the full value, which is retrieved
5253ec5ad40SPaolo Bonzini          * with KVM_GET_DEVICE_ATTR.
5263ec5ad40SPaolo Bonzini          */
52719db68caSYang Zhong         struct kvm_device_attr attr = {
52819db68caSYang Zhong             .group = 0,
52919db68caSYang Zhong             .attr = KVM_X86_XCOMP_GUEST_SUPP,
53019db68caSYang Zhong             .addr = (unsigned long) &bitmask
53119db68caSYang Zhong         };
53219db68caSYang Zhong 
53319db68caSYang Zhong         bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
53419db68caSYang Zhong         if (!sys_attr) {
5353ec5ad40SPaolo Bonzini             return ret;
53619db68caSYang Zhong         }
53719db68caSYang Zhong 
53819db68caSYang Zhong         int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
5393ec5ad40SPaolo Bonzini         if (rc < 0) {
5403ec5ad40SPaolo Bonzini             if (rc != -ENXIO) {
54119db68caSYang Zhong                 warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
54219db68caSYang Zhong                             "error: %d", rc);
54319db68caSYang Zhong             }
5443ec5ad40SPaolo Bonzini             return ret;
5453ec5ad40SPaolo Bonzini         }
54619db68caSYang Zhong         ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
547a9dc68d9SClaudio Fontana     } else if (function == 0x80000001 && reg == R_ECX) {
548a9dc68d9SClaudio Fontana         /*
549a9dc68d9SClaudio Fontana          * It's safe to enable TOPOEXT even if it's not returned by
550a9dc68d9SClaudio Fontana          * GET_SUPPORTED_CPUID.  Unconditionally enabling TOPOEXT here allows
551a9dc68d9SClaudio Fontana          * us to keep CPU models including TOPOEXT runnable on older kernels.
552a9dc68d9SClaudio Fontana          */
553a9dc68d9SClaudio Fontana         ret |= CPUID_EXT3_TOPOEXT;
554a9dc68d9SClaudio Fontana     } else if (function == 0x80000001 && reg == R_EDX) {
555a9dc68d9SClaudio Fontana         /* On Intel, kvm returns cpuid according to the Intel spec,
556a9dc68d9SClaudio Fontana          * so add missing bits according to the AMD spec:
557a9dc68d9SClaudio Fontana          */
558a9dc68d9SClaudio Fontana         cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
559a9dc68d9SClaudio Fontana         ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
5602ba8b7eeSJohn Allen     } else if (function == 0x80000007 && reg == R_EBX) {
5611ea14321SJohn Allen         ret |= CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR;
562a9dc68d9SClaudio Fontana     } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) {
563a9dc68d9SClaudio Fontana         /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't
564a9dc68d9SClaudio Fontana          * be enabled without the in-kernel irqchip
565a9dc68d9SClaudio Fontana          */
566a9dc68d9SClaudio Fontana         if (!kvm_irqchip_in_kernel()) {
567a9dc68d9SClaudio Fontana             ret &= ~(1U << KVM_FEATURE_PV_UNHALT);
568a9dc68d9SClaudio Fontana         }
569a9dc68d9SClaudio Fontana         if (kvm_irqchip_is_split()) {
570a9dc68d9SClaudio Fontana             ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID;
571a9dc68d9SClaudio Fontana         }
572a9dc68d9SClaudio Fontana     } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) {
573a9dc68d9SClaudio Fontana         ret |= 1U << KVM_HINTS_REALTIME;
574a9dc68d9SClaudio Fontana     }
575a9dc68d9SClaudio Fontana 
576c28d8b09SPaolo Bonzini     if (current_machine->cgs) {
577c28d8b09SPaolo Bonzini         ret = x86_confidential_guest_mask_cpuid_features(
578c28d8b09SPaolo Bonzini             X86_CONFIDENTIAL_GUEST(current_machine->cgs),
579c28d8b09SPaolo Bonzini             function, index, reg, ret);
580c28d8b09SPaolo Bonzini     }
581a9dc68d9SClaudio Fontana     return ret;
582a9dc68d9SClaudio Fontana }
583a9dc68d9SClaudio Fontana 
kvm_arch_get_supported_msr_feature(KVMState * s,uint32_t index)584a9dc68d9SClaudio Fontana uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
585a9dc68d9SClaudio Fontana {
586a9dc68d9SClaudio Fontana     struct {
587a9dc68d9SClaudio Fontana         struct kvm_msrs info;
588a9dc68d9SClaudio Fontana         struct kvm_msr_entry entries[1];
589a9dc68d9SClaudio Fontana     } msr_data = {};
590a9dc68d9SClaudio Fontana     uint64_t value;
591a9dc68d9SClaudio Fontana     uint32_t ret, can_be_one, must_be_one;
592a9dc68d9SClaudio Fontana 
593a9dc68d9SClaudio Fontana     if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */
594a9dc68d9SClaudio Fontana         return 0;
595a9dc68d9SClaudio Fontana     }
596a9dc68d9SClaudio Fontana 
597a9dc68d9SClaudio Fontana     /* Check if requested MSR is supported feature MSR */
598a9dc68d9SClaudio Fontana     int i;
599a9dc68d9SClaudio Fontana     for (i = 0; i < kvm_feature_msrs->nmsrs; i++)
600a9dc68d9SClaudio Fontana         if (kvm_feature_msrs->indices[i] == index) {
601a9dc68d9SClaudio Fontana             break;
602a9dc68d9SClaudio Fontana         }
603a9dc68d9SClaudio Fontana     if (i == kvm_feature_msrs->nmsrs) {
604a9dc68d9SClaudio Fontana         return 0; /* if the feature MSR is not supported, simply return 0 */
605a9dc68d9SClaudio Fontana     }
606a9dc68d9SClaudio Fontana 
607a9dc68d9SClaudio Fontana     msr_data.info.nmsrs = 1;
608a9dc68d9SClaudio Fontana     msr_data.entries[0].index = index;
609a9dc68d9SClaudio Fontana 
610a9dc68d9SClaudio Fontana     ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
611a9dc68d9SClaudio Fontana     if (ret != 1) {
612a9dc68d9SClaudio Fontana         error_report("KVM get MSR (index=0x%x) feature failed, %s",
613a9dc68d9SClaudio Fontana             index, strerror(-ret));
614a9dc68d9SClaudio Fontana         exit(1);
615a9dc68d9SClaudio Fontana     }
616a9dc68d9SClaudio Fontana 
617a9dc68d9SClaudio Fontana     value = msr_data.entries[0].data;
618a9dc68d9SClaudio Fontana     switch (index) {
619a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_PROCBASED_CTLS2:
620a9dc68d9SClaudio Fontana         if (!has_msr_vmx_procbased_ctls2) {
621a9dc68d9SClaudio Fontana             /* KVM forgot to add these bits for some time, do this ourselves. */
622a9dc68d9SClaudio Fontana             if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) &
623a9dc68d9SClaudio Fontana                 CPUID_XSAVE_XSAVES) {
624a9dc68d9SClaudio Fontana                 value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
625a9dc68d9SClaudio Fontana             }
626a9dc68d9SClaudio Fontana             if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) &
627a9dc68d9SClaudio Fontana                 CPUID_EXT_RDRAND) {
628a9dc68d9SClaudio Fontana                 value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
629a9dc68d9SClaudio Fontana             }
630a9dc68d9SClaudio Fontana             if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
631a9dc68d9SClaudio Fontana                 CPUID_7_0_EBX_INVPCID) {
632a9dc68d9SClaudio Fontana                 value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
633a9dc68d9SClaudio Fontana             }
634a9dc68d9SClaudio Fontana             if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
635a9dc68d9SClaudio Fontana                 CPUID_7_0_EBX_RDSEED) {
636a9dc68d9SClaudio Fontana                 value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
637a9dc68d9SClaudio Fontana             }
638a9dc68d9SClaudio Fontana             if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) &
639a9dc68d9SClaudio Fontana                 CPUID_EXT2_RDTSCP) {
640a9dc68d9SClaudio Fontana                 value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
641a9dc68d9SClaudio Fontana             }
642a9dc68d9SClaudio Fontana         }
643a9dc68d9SClaudio Fontana         /* fall through */
644a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
645a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
646a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
647a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_EXIT_CTLS:
648a9dc68d9SClaudio Fontana         /*
649a9dc68d9SClaudio Fontana          * Return true for bits that can be one, but do not have to be one.
650a9dc68d9SClaudio Fontana          * The SDM tells us which bits could have a "must be one" setting,
651a9dc68d9SClaudio Fontana          * so we can do the opposite transformation in make_vmx_msr_value.
652a9dc68d9SClaudio Fontana          */
653a9dc68d9SClaudio Fontana         must_be_one = (uint32_t)value;
654a9dc68d9SClaudio Fontana         can_be_one = (uint32_t)(value >> 32);
655a9dc68d9SClaudio Fontana         return can_be_one & ~must_be_one;
656a9dc68d9SClaudio Fontana 
657a9dc68d9SClaudio Fontana     default:
658a9dc68d9SClaudio Fontana         return value;
659a9dc68d9SClaudio Fontana     }
660a9dc68d9SClaudio Fontana }
661a9dc68d9SClaudio Fontana 
kvm_get_mce_cap_supported(KVMState * s,uint64_t * mce_cap,int * max_banks)662a9dc68d9SClaudio Fontana static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
663a9dc68d9SClaudio Fontana                                      int *max_banks)
664a9dc68d9SClaudio Fontana {
66586f2438fSPaolo Bonzini     *max_banks = kvm_check_extension(s, KVM_CAP_MCE);
666a9dc68d9SClaudio Fontana     return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
667a9dc68d9SClaudio Fontana }
668a9dc68d9SClaudio Fontana 
kvm_mce_inject(X86CPU * cpu,hwaddr paddr,int code)669a9dc68d9SClaudio Fontana static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
670a9dc68d9SClaudio Fontana {
671a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
672a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
6734b77512bSJohn Allen     uint64_t status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_MISCV |
6744b77512bSJohn Allen                       MCI_STATUS_ADDRV;
6754b77512bSJohn Allen     uint64_t mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
676a9dc68d9SClaudio Fontana     int flags = 0;
677a9dc68d9SClaudio Fontana 
6784b77512bSJohn Allen     if (!IS_AMD_CPU(env)) {
6794b77512bSJohn Allen         status |= MCI_STATUS_S | MCI_STATUS_UC;
680a9dc68d9SClaudio Fontana         if (code == BUS_MCEERR_AR) {
681a9dc68d9SClaudio Fontana             status |= MCI_STATUS_AR | 0x134;
6824b77512bSJohn Allen             mcg_status |= MCG_STATUS_EIPV;
683a9dc68d9SClaudio Fontana         } else {
684a9dc68d9SClaudio Fontana             status |= 0xc0;
6854b77512bSJohn Allen         }
6864b77512bSJohn Allen     } else {
6874b77512bSJohn Allen         if (code == BUS_MCEERR_AR) {
6884b77512bSJohn Allen             status |= MCI_STATUS_UC | MCI_STATUS_POISON;
6894b77512bSJohn Allen             mcg_status |= MCG_STATUS_EIPV;
6904b77512bSJohn Allen         } else {
6914b77512bSJohn Allen             /* Setting the POISON bit for deferred errors indicates to the
6924b77512bSJohn Allen              * guest kernel that the address provided by the MCE is valid
6934b77512bSJohn Allen              * and usable which will ensure that the guest kernel will send
6944b77512bSJohn Allen              * a SIGBUS_AO signal to the guest process. This allows for
6954b77512bSJohn Allen              * more desirable behavior in the case that the guest process
6964b77512bSJohn Allen              * with poisoned memory has set the MCE_KILL_EARLY prctl flag
6974b77512bSJohn Allen              * which indicates that the process would prefer to handle or
6984b77512bSJohn Allen              * shutdown due to the poisoned memory condition before the
6994b77512bSJohn Allen              * memory has been accessed.
7004b77512bSJohn Allen              *
7014b77512bSJohn Allen              * While the POISON bit would not be set in a deferred error
7024b77512bSJohn Allen              * sent from hardware, the bit is not meaningful for deferred
7034b77512bSJohn Allen              * errors and can be reused in this scenario.
7044b77512bSJohn Allen              */
7054b77512bSJohn Allen             status |= MCI_STATUS_DEFERRED | MCI_STATUS_POISON;
7064b77512bSJohn Allen         }
707a9dc68d9SClaudio Fontana     }
708a9dc68d9SClaudio Fontana 
709a9dc68d9SClaudio Fontana     flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
710a9dc68d9SClaudio Fontana     /* We need to read back the value of MSR_EXT_MCG_CTL that was set by the
711a9dc68d9SClaudio Fontana      * guest kernel back into env->mcg_ext_ctl.
712a9dc68d9SClaudio Fontana      */
713a9dc68d9SClaudio Fontana     cpu_synchronize_state(cs);
714a9dc68d9SClaudio Fontana     if (env->mcg_ext_ctl & MCG_EXT_CTL_LMCE_EN) {
715a9dc68d9SClaudio Fontana         mcg_status |= MCG_STATUS_LMCE;
716a9dc68d9SClaudio Fontana         flags = 0;
717a9dc68d9SClaudio Fontana     }
718a9dc68d9SClaudio Fontana 
719a9dc68d9SClaudio Fontana     cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
720a9dc68d9SClaudio Fontana                        (MCM_ADDR_PHYS << 6) | 0xc, flags);
721a9dc68d9SClaudio Fontana }
722a9dc68d9SClaudio Fontana 
emit_hypervisor_memory_failure(MemoryFailureAction action,bool ar)723a9dc68d9SClaudio Fontana static void emit_hypervisor_memory_failure(MemoryFailureAction action, bool ar)
724a9dc68d9SClaudio Fontana {
725a9dc68d9SClaudio Fontana     MemoryFailureFlags mff = {.action_required = ar, .recursive = false};
726a9dc68d9SClaudio Fontana 
727a9dc68d9SClaudio Fontana     qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_HYPERVISOR, action,
728a9dc68d9SClaudio Fontana                                    &mff);
729a9dc68d9SClaudio Fontana }
730a9dc68d9SClaudio Fontana 
hardware_memory_error(void * host_addr)731a9dc68d9SClaudio Fontana static void hardware_memory_error(void *host_addr)
732a9dc68d9SClaudio Fontana {
733a9dc68d9SClaudio Fontana     emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_FATAL, true);
734a9dc68d9SClaudio Fontana     error_report("QEMU got Hardware memory error at addr %p", host_addr);
735a9dc68d9SClaudio Fontana     exit(1);
736a9dc68d9SClaudio Fontana }
737a9dc68d9SClaudio Fontana 
kvm_arch_on_sigbus_vcpu(CPUState * c,int code,void * addr)738a9dc68d9SClaudio Fontana void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
739a9dc68d9SClaudio Fontana {
740a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(c);
741a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
742a9dc68d9SClaudio Fontana     ram_addr_t ram_addr;
743a9dc68d9SClaudio Fontana     hwaddr paddr;
744a9dc68d9SClaudio Fontana 
745a9dc68d9SClaudio Fontana     /* If we get an action required MCE, it has been injected by KVM
746a9dc68d9SClaudio Fontana      * while the VM was running.  An action optional MCE instead should
747a9dc68d9SClaudio Fontana      * be coming from the main thread, which qemu_init_sigbus identifies
748a9dc68d9SClaudio Fontana      * as the "early kill" thread.
749a9dc68d9SClaudio Fontana      */
750a9dc68d9SClaudio Fontana     assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
751a9dc68d9SClaudio Fontana 
752a9dc68d9SClaudio Fontana     if ((env->mcg_cap & MCG_SER_P) && addr) {
753a9dc68d9SClaudio Fontana         ram_addr = qemu_ram_addr_from_host(addr);
754a9dc68d9SClaudio Fontana         if (ram_addr != RAM_ADDR_INVALID &&
755a9dc68d9SClaudio Fontana             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
756a9dc68d9SClaudio Fontana             kvm_hwpoison_page_add(ram_addr);
757a9dc68d9SClaudio Fontana             kvm_mce_inject(cpu, paddr, code);
758a9dc68d9SClaudio Fontana 
759a9dc68d9SClaudio Fontana             /*
760a9dc68d9SClaudio Fontana              * Use different logging severity based on error type.
761a9dc68d9SClaudio Fontana              * If there is additional MCE reporting on the hypervisor, QEMU VA
762a9dc68d9SClaudio Fontana              * could be another source to identify the PA and MCE details.
763a9dc68d9SClaudio Fontana              */
764a9dc68d9SClaudio Fontana             if (code == BUS_MCEERR_AR) {
765a9dc68d9SClaudio Fontana                 error_report("Guest MCE Memory Error at QEMU addr %p and "
766a9dc68d9SClaudio Fontana                     "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
767a9dc68d9SClaudio Fontana                     addr, paddr, "BUS_MCEERR_AR");
768a9dc68d9SClaudio Fontana             } else {
769a9dc68d9SClaudio Fontana                  warn_report("Guest MCE Memory Error at QEMU addr %p and "
770a9dc68d9SClaudio Fontana                      "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
771a9dc68d9SClaudio Fontana                      addr, paddr, "BUS_MCEERR_AO");
772a9dc68d9SClaudio Fontana             }
773a9dc68d9SClaudio Fontana 
774a9dc68d9SClaudio Fontana             return;
775a9dc68d9SClaudio Fontana         }
776a9dc68d9SClaudio Fontana 
777a9dc68d9SClaudio Fontana         if (code == BUS_MCEERR_AO) {
778a9dc68d9SClaudio Fontana             warn_report("Hardware memory error at addr %p of type %s "
779a9dc68d9SClaudio Fontana                 "for memory used by QEMU itself instead of guest system!",
780a9dc68d9SClaudio Fontana                  addr, "BUS_MCEERR_AO");
781a9dc68d9SClaudio Fontana         }
782a9dc68d9SClaudio Fontana     }
783a9dc68d9SClaudio Fontana 
784a9dc68d9SClaudio Fontana     if (code == BUS_MCEERR_AR) {
785a9dc68d9SClaudio Fontana         hardware_memory_error(addr);
786a9dc68d9SClaudio Fontana     }
787a9dc68d9SClaudio Fontana 
788a9dc68d9SClaudio Fontana     /* Hope we are lucky for AO MCE, just notify a event */
789a9dc68d9SClaudio Fontana     emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, false);
790a9dc68d9SClaudio Fontana }
791a9dc68d9SClaudio Fontana 
kvm_queue_exception(CPUX86State * env,int32_t exception_nr,uint8_t exception_has_payload,uint64_t exception_payload)792a9dc68d9SClaudio Fontana static void kvm_queue_exception(CPUX86State *env,
793a9dc68d9SClaudio Fontana                                 int32_t exception_nr,
794a9dc68d9SClaudio Fontana                                 uint8_t exception_has_payload,
795a9dc68d9SClaudio Fontana                                 uint64_t exception_payload)
796a9dc68d9SClaudio Fontana {
797a9dc68d9SClaudio Fontana     assert(env->exception_nr == -1);
798a9dc68d9SClaudio Fontana     assert(!env->exception_pending);
799a9dc68d9SClaudio Fontana     assert(!env->exception_injected);
800a9dc68d9SClaudio Fontana     assert(!env->exception_has_payload);
801a9dc68d9SClaudio Fontana 
802a9dc68d9SClaudio Fontana     env->exception_nr = exception_nr;
803a9dc68d9SClaudio Fontana 
804a9dc68d9SClaudio Fontana     if (has_exception_payload) {
805a9dc68d9SClaudio Fontana         env->exception_pending = 1;
806a9dc68d9SClaudio Fontana 
807a9dc68d9SClaudio Fontana         env->exception_has_payload = exception_has_payload;
808a9dc68d9SClaudio Fontana         env->exception_payload = exception_payload;
809a9dc68d9SClaudio Fontana     } else {
810a9dc68d9SClaudio Fontana         env->exception_injected = 1;
811a9dc68d9SClaudio Fontana 
812a9dc68d9SClaudio Fontana         if (exception_nr == EXCP01_DB) {
813a9dc68d9SClaudio Fontana             assert(exception_has_payload);
814a9dc68d9SClaudio Fontana             env->dr[6] = exception_payload;
815a9dc68d9SClaudio Fontana         } else if (exception_nr == EXCP0E_PAGE) {
816a9dc68d9SClaudio Fontana             assert(exception_has_payload);
817a9dc68d9SClaudio Fontana             env->cr[2] = exception_payload;
818a9dc68d9SClaudio Fontana         } else {
819a9dc68d9SClaudio Fontana             assert(!exception_has_payload);
820a9dc68d9SClaudio Fontana         }
821a9dc68d9SClaudio Fontana     }
822a9dc68d9SClaudio Fontana }
823a9dc68d9SClaudio Fontana 
cpu_update_state(void * opaque,bool running,RunState state)824538f0497SPhilippe Mathieu-Daudé static void cpu_update_state(void *opaque, bool running, RunState state)
825a9dc68d9SClaudio Fontana {
826a9dc68d9SClaudio Fontana     CPUX86State *env = opaque;
827a9dc68d9SClaudio Fontana 
828a9dc68d9SClaudio Fontana     if (running) {
829a9dc68d9SClaudio Fontana         env->tsc_valid = false;
830a9dc68d9SClaudio Fontana     }
831a9dc68d9SClaudio Fontana }
832a9dc68d9SClaudio Fontana 
kvm_arch_vcpu_id(CPUState * cs)833a9dc68d9SClaudio Fontana unsigned long kvm_arch_vcpu_id(CPUState *cs)
834a9dc68d9SClaudio Fontana {
835a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
836a9dc68d9SClaudio Fontana     return cpu->apic_id;
837a9dc68d9SClaudio Fontana }
838a9dc68d9SClaudio Fontana 
839a9dc68d9SClaudio Fontana #ifndef KVM_CPUID_SIGNATURE_NEXT
840a9dc68d9SClaudio Fontana #define KVM_CPUID_SIGNATURE_NEXT                0x40000100
841a9dc68d9SClaudio Fontana #endif
842a9dc68d9SClaudio Fontana 
hyperv_enabled(X86CPU * cpu)843a9dc68d9SClaudio Fontana static bool hyperv_enabled(X86CPU *cpu)
844a9dc68d9SClaudio Fontana {
8455aa9ef5eSVitaly Kuznetsov     return kvm_check_extension(kvm_state, KVM_CAP_HYPERV) > 0 &&
846a9dc68d9SClaudio Fontana         ((cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_NOTIFY) ||
847a9dc68d9SClaudio Fontana          cpu->hyperv_features || cpu->hyperv_passthrough);
848a9dc68d9SClaudio Fontana }
849a9dc68d9SClaudio Fontana 
850a9dc68d9SClaudio Fontana /*
851a9dc68d9SClaudio Fontana  * Check whether target_freq is within conservative
852a9dc68d9SClaudio Fontana  * ntp correctable bounds (250ppm) of freq
853a9dc68d9SClaudio Fontana  */
freq_within_bounds(int freq,int target_freq)854a9dc68d9SClaudio Fontana static inline bool freq_within_bounds(int freq, int target_freq)
855a9dc68d9SClaudio Fontana {
856a9dc68d9SClaudio Fontana         int max_freq = freq + (freq * 250 / 1000000);
857a9dc68d9SClaudio Fontana         int min_freq = freq - (freq * 250 / 1000000);
858a9dc68d9SClaudio Fontana 
859a9dc68d9SClaudio Fontana         if (target_freq >= min_freq && target_freq <= max_freq) {
860a9dc68d9SClaudio Fontana                 return true;
861a9dc68d9SClaudio Fontana         }
862a9dc68d9SClaudio Fontana 
863a9dc68d9SClaudio Fontana         return false;
864a9dc68d9SClaudio Fontana }
865a9dc68d9SClaudio Fontana 
kvm_arch_set_tsc_khz(CPUState * cs)866a9dc68d9SClaudio Fontana static int kvm_arch_set_tsc_khz(CPUState *cs)
867a9dc68d9SClaudio Fontana {
868a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
869a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
870a9dc68d9SClaudio Fontana     int r, cur_freq;
871a9dc68d9SClaudio Fontana     bool set_ioctl = false;
872a9dc68d9SClaudio Fontana 
873a9dc68d9SClaudio Fontana     if (!env->tsc_khz) {
874a9dc68d9SClaudio Fontana         return 0;
875a9dc68d9SClaudio Fontana     }
876a9dc68d9SClaudio Fontana 
877a9dc68d9SClaudio Fontana     cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
878a9dc68d9SClaudio Fontana                kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : -ENOTSUP;
879a9dc68d9SClaudio Fontana 
880a9dc68d9SClaudio Fontana     /*
881a9dc68d9SClaudio Fontana      * If TSC scaling is supported, attempt to set TSC frequency.
882a9dc68d9SClaudio Fontana      */
883a9dc68d9SClaudio Fontana     if (kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL)) {
884a9dc68d9SClaudio Fontana         set_ioctl = true;
885a9dc68d9SClaudio Fontana     }
886a9dc68d9SClaudio Fontana 
887a9dc68d9SClaudio Fontana     /*
888a9dc68d9SClaudio Fontana      * If desired TSC frequency is within bounds of NTP correction,
889a9dc68d9SClaudio Fontana      * attempt to set TSC frequency.
890a9dc68d9SClaudio Fontana      */
891a9dc68d9SClaudio Fontana     if (cur_freq != -ENOTSUP && freq_within_bounds(cur_freq, env->tsc_khz)) {
892a9dc68d9SClaudio Fontana         set_ioctl = true;
893a9dc68d9SClaudio Fontana     }
894a9dc68d9SClaudio Fontana 
895a9dc68d9SClaudio Fontana     r = set_ioctl ?
896a9dc68d9SClaudio Fontana         kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) :
897a9dc68d9SClaudio Fontana         -ENOTSUP;
898a9dc68d9SClaudio Fontana 
899a9dc68d9SClaudio Fontana     if (r < 0) {
900a9dc68d9SClaudio Fontana         /* When KVM_SET_TSC_KHZ fails, it's an error only if the current
901a9dc68d9SClaudio Fontana          * TSC frequency doesn't match the one we want.
902a9dc68d9SClaudio Fontana          */
903a9dc68d9SClaudio Fontana         cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
904a9dc68d9SClaudio Fontana                    kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
905a9dc68d9SClaudio Fontana                    -ENOTSUP;
906a9dc68d9SClaudio Fontana         if (cur_freq <= 0 || cur_freq != env->tsc_khz) {
907a9dc68d9SClaudio Fontana             warn_report("TSC frequency mismatch between "
908a9dc68d9SClaudio Fontana                         "VM (%" PRId64 " kHz) and host (%d kHz), "
909a9dc68d9SClaudio Fontana                         "and TSC scaling unavailable",
910a9dc68d9SClaudio Fontana                         env->tsc_khz, cur_freq);
911a9dc68d9SClaudio Fontana             return r;
912a9dc68d9SClaudio Fontana         }
913a9dc68d9SClaudio Fontana     }
914a9dc68d9SClaudio Fontana 
915a9dc68d9SClaudio Fontana     return 0;
916a9dc68d9SClaudio Fontana }
917a9dc68d9SClaudio Fontana 
tsc_is_stable_and_known(CPUX86State * env)918a9dc68d9SClaudio Fontana static bool tsc_is_stable_and_known(CPUX86State *env)
919a9dc68d9SClaudio Fontana {
920a9dc68d9SClaudio Fontana     if (!env->tsc_khz) {
921a9dc68d9SClaudio Fontana         return false;
922a9dc68d9SClaudio Fontana     }
923a9dc68d9SClaudio Fontana     return (env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC)
924a9dc68d9SClaudio Fontana         || env->user_tsc_khz;
925a9dc68d9SClaudio Fontana }
926a9dc68d9SClaudio Fontana 
9277110fe56SVitaly Kuznetsov #define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
9287110fe56SVitaly Kuznetsov 
929a9dc68d9SClaudio Fontana static struct {
930a9dc68d9SClaudio Fontana     const char *desc;
931a9dc68d9SClaudio Fontana     struct {
932061817a7SVitaly Kuznetsov         uint32_t func;
933061817a7SVitaly Kuznetsov         int reg;
934a9dc68d9SClaudio Fontana         uint32_t bits;
935a9dc68d9SClaudio Fontana     } flags[2];
936a9dc68d9SClaudio Fontana     uint64_t dependencies;
9377d7b9c76SVitaly Kuznetsov     bool skip_passthrough;
938a9dc68d9SClaudio Fontana } kvm_hyperv_properties[] = {
939a9dc68d9SClaudio Fontana     [HYPERV_FEAT_RELAXED] = {
940a9dc68d9SClaudio Fontana         .desc = "relaxed timing (hv-relaxed)",
941a9dc68d9SClaudio Fontana         .flags = {
942061817a7SVitaly Kuznetsov             {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
943a9dc68d9SClaudio Fontana              .bits = HV_RELAXED_TIMING_RECOMMENDED}
944a9dc68d9SClaudio Fontana         }
945a9dc68d9SClaudio Fontana     },
946a9dc68d9SClaudio Fontana     [HYPERV_FEAT_VAPIC] = {
947a9dc68d9SClaudio Fontana         .desc = "virtual APIC (hv-vapic)",
948a9dc68d9SClaudio Fontana         .flags = {
949061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
95005071629SVitaly Kuznetsov              .bits = HV_APIC_ACCESS_AVAILABLE}
951a9dc68d9SClaudio Fontana         }
952a9dc68d9SClaudio Fontana     },
953a9dc68d9SClaudio Fontana     [HYPERV_FEAT_TIME] = {
954a9dc68d9SClaudio Fontana         .desc = "clocksources (hv-time)",
955a9dc68d9SClaudio Fontana         .flags = {
956061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
957b26f68c3SVitaly Kuznetsov              .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE}
958a9dc68d9SClaudio Fontana         }
959a9dc68d9SClaudio Fontana     },
960a9dc68d9SClaudio Fontana     [HYPERV_FEAT_CRASH] = {
961a9dc68d9SClaudio Fontana         .desc = "crash MSRs (hv-crash)",
962a9dc68d9SClaudio Fontana         .flags = {
963061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EDX,
964a9dc68d9SClaudio Fontana              .bits = HV_GUEST_CRASH_MSR_AVAILABLE}
965a9dc68d9SClaudio Fontana         }
966a9dc68d9SClaudio Fontana     },
967a9dc68d9SClaudio Fontana     [HYPERV_FEAT_RESET] = {
968a9dc68d9SClaudio Fontana         .desc = "reset MSR (hv-reset)",
969a9dc68d9SClaudio Fontana         .flags = {
970061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
971a9dc68d9SClaudio Fontana              .bits = HV_RESET_AVAILABLE}
972a9dc68d9SClaudio Fontana         }
973a9dc68d9SClaudio Fontana     },
974a9dc68d9SClaudio Fontana     [HYPERV_FEAT_VPINDEX] = {
975a9dc68d9SClaudio Fontana         .desc = "VP_INDEX MSR (hv-vpindex)",
976a9dc68d9SClaudio Fontana         .flags = {
977061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
978a9dc68d9SClaudio Fontana              .bits = HV_VP_INDEX_AVAILABLE}
979a9dc68d9SClaudio Fontana         }
980a9dc68d9SClaudio Fontana     },
981a9dc68d9SClaudio Fontana     [HYPERV_FEAT_RUNTIME] = {
982a9dc68d9SClaudio Fontana         .desc = "VP_RUNTIME MSR (hv-runtime)",
983a9dc68d9SClaudio Fontana         .flags = {
984061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
985a9dc68d9SClaudio Fontana              .bits = HV_VP_RUNTIME_AVAILABLE}
986a9dc68d9SClaudio Fontana         }
987a9dc68d9SClaudio Fontana     },
988a9dc68d9SClaudio Fontana     [HYPERV_FEAT_SYNIC] = {
989a9dc68d9SClaudio Fontana         .desc = "synthetic interrupt controller (hv-synic)",
990a9dc68d9SClaudio Fontana         .flags = {
991061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
992a9dc68d9SClaudio Fontana              .bits = HV_SYNIC_AVAILABLE}
993a9dc68d9SClaudio Fontana         }
994a9dc68d9SClaudio Fontana     },
995a9dc68d9SClaudio Fontana     [HYPERV_FEAT_STIMER] = {
996a9dc68d9SClaudio Fontana         .desc = "synthetic timers (hv-stimer)",
997a9dc68d9SClaudio Fontana         .flags = {
998061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
999a9dc68d9SClaudio Fontana              .bits = HV_SYNTIMERS_AVAILABLE}
1000a9dc68d9SClaudio Fontana         },
1001a9dc68d9SClaudio Fontana         .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_TIME)
1002a9dc68d9SClaudio Fontana     },
1003a9dc68d9SClaudio Fontana     [HYPERV_FEAT_FREQUENCIES] = {
1004a9dc68d9SClaudio Fontana         .desc = "frequency MSRs (hv-frequencies)",
1005a9dc68d9SClaudio Fontana         .flags = {
1006061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
1007a9dc68d9SClaudio Fontana              .bits = HV_ACCESS_FREQUENCY_MSRS},
1008061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EDX,
1009a9dc68d9SClaudio Fontana              .bits = HV_FREQUENCY_MSRS_AVAILABLE}
1010a9dc68d9SClaudio Fontana         }
1011a9dc68d9SClaudio Fontana     },
1012a9dc68d9SClaudio Fontana     [HYPERV_FEAT_REENLIGHTENMENT] = {
1013a9dc68d9SClaudio Fontana         .desc = "reenlightenment MSRs (hv-reenlightenment)",
1014a9dc68d9SClaudio Fontana         .flags = {
1015061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EAX,
1016a9dc68d9SClaudio Fontana              .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL}
1017a9dc68d9SClaudio Fontana         }
1018a9dc68d9SClaudio Fontana     },
1019a9dc68d9SClaudio Fontana     [HYPERV_FEAT_TLBFLUSH] = {
1020a9dc68d9SClaudio Fontana         .desc = "paravirtualized TLB flush (hv-tlbflush)",
1021a9dc68d9SClaudio Fontana         .flags = {
1022061817a7SVitaly Kuznetsov             {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
1023a9dc68d9SClaudio Fontana              .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED |
1024a9dc68d9SClaudio Fontana              HV_EX_PROCESSOR_MASKS_RECOMMENDED}
1025a9dc68d9SClaudio Fontana         },
1026a9dc68d9SClaudio Fontana         .dependencies = BIT(HYPERV_FEAT_VPINDEX)
1027a9dc68d9SClaudio Fontana     },
1028a9dc68d9SClaudio Fontana     [HYPERV_FEAT_EVMCS] = {
1029a9dc68d9SClaudio Fontana         .desc = "enlightened VMCS (hv-evmcs)",
1030a9dc68d9SClaudio Fontana         .flags = {
1031061817a7SVitaly Kuznetsov             {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
1032a9dc68d9SClaudio Fontana              .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED}
1033a9dc68d9SClaudio Fontana         },
1034a9dc68d9SClaudio Fontana         .dependencies = BIT(HYPERV_FEAT_VAPIC)
1035a9dc68d9SClaudio Fontana     },
1036a9dc68d9SClaudio Fontana     [HYPERV_FEAT_IPI] = {
1037a9dc68d9SClaudio Fontana         .desc = "paravirtualized IPI (hv-ipi)",
1038a9dc68d9SClaudio Fontana         .flags = {
1039061817a7SVitaly Kuznetsov             {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
1040a9dc68d9SClaudio Fontana              .bits = HV_CLUSTER_IPI_RECOMMENDED |
1041a9dc68d9SClaudio Fontana              HV_EX_PROCESSOR_MASKS_RECOMMENDED}
1042a9dc68d9SClaudio Fontana         },
1043a9dc68d9SClaudio Fontana         .dependencies = BIT(HYPERV_FEAT_VPINDEX)
1044a9dc68d9SClaudio Fontana     },
1045a9dc68d9SClaudio Fontana     [HYPERV_FEAT_STIMER_DIRECT] = {
1046a9dc68d9SClaudio Fontana         .desc = "direct mode synthetic timers (hv-stimer-direct)",
1047a9dc68d9SClaudio Fontana         .flags = {
1048061817a7SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EDX,
1049a9dc68d9SClaudio Fontana              .bits = HV_STIMER_DIRECT_MODE_AVAILABLE}
1050a9dc68d9SClaudio Fontana         },
1051a9dc68d9SClaudio Fontana         .dependencies = BIT(HYPERV_FEAT_STIMER)
1052a9dc68d9SClaudio Fontana     },
1053e1f9a8e8SVitaly Kuznetsov     [HYPERV_FEAT_AVIC] = {
1054e1f9a8e8SVitaly Kuznetsov         .desc = "AVIC/APICv support (hv-avic/hv-apicv)",
1055e1f9a8e8SVitaly Kuznetsov         .flags = {
1056e1f9a8e8SVitaly Kuznetsov             {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
1057e1f9a8e8SVitaly Kuznetsov              .bits = HV_DEPRECATING_AEOI_RECOMMENDED}
1058e1f9a8e8SVitaly Kuznetsov         }
1059e1f9a8e8SVitaly Kuznetsov     },
106073d24074SJon Doron     [HYPERV_FEAT_SYNDBG] = {
106173d24074SJon Doron         .desc = "Enable synthetic kernel debugger channel (hv-syndbg)",
106273d24074SJon Doron         .flags = {
106373d24074SJon Doron             {.func = HV_CPUID_FEATURES, .reg = R_EDX,
106473d24074SJon Doron              .bits = HV_FEATURE_DEBUG_MSRS_AVAILABLE}
106573d24074SJon Doron         },
10667d7b9c76SVitaly Kuznetsov         .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED),
10677d7b9c76SVitaly Kuznetsov         .skip_passthrough = true,
106873d24074SJon Doron     },
1069869840d2SVitaly Kuznetsov     [HYPERV_FEAT_MSR_BITMAP] = {
1070869840d2SVitaly Kuznetsov         .desc = "enlightened MSR-Bitmap (hv-emsr-bitmap)",
1071869840d2SVitaly Kuznetsov         .flags = {
1072869840d2SVitaly Kuznetsov             {.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX,
1073869840d2SVitaly Kuznetsov              .bits = HV_NESTED_MSR_BITMAP}
1074869840d2SVitaly Kuznetsov         }
1075869840d2SVitaly Kuznetsov     },
10769411e8b6SVitaly Kuznetsov     [HYPERV_FEAT_XMM_INPUT] = {
10779411e8b6SVitaly Kuznetsov         .desc = "XMM fast hypercall input (hv-xmm-input)",
10789411e8b6SVitaly Kuznetsov         .flags = {
10799411e8b6SVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EDX,
10809411e8b6SVitaly Kuznetsov              .bits = HV_HYPERCALL_XMM_INPUT_AVAILABLE}
10819411e8b6SVitaly Kuznetsov         }
10829411e8b6SVitaly Kuznetsov     },
1083aa6bb5faSVitaly Kuznetsov     [HYPERV_FEAT_TLBFLUSH_EXT] = {
1084aa6bb5faSVitaly Kuznetsov         .desc = "Extended gva ranges for TLB flush hypercalls (hv-tlbflush-ext)",
1085aa6bb5faSVitaly Kuznetsov         .flags = {
1086aa6bb5faSVitaly Kuznetsov             {.func = HV_CPUID_FEATURES, .reg = R_EDX,
1087aa6bb5faSVitaly Kuznetsov              .bits = HV_EXT_GVA_RANGES_FLUSH_AVAILABLE}
1088aa6bb5faSVitaly Kuznetsov         },
1089aa6bb5faSVitaly Kuznetsov         .dependencies = BIT(HYPERV_FEAT_TLBFLUSH)
1090aa6bb5faSVitaly Kuznetsov     },
10913aae0854SVitaly Kuznetsov     [HYPERV_FEAT_TLBFLUSH_DIRECT] = {
10923aae0854SVitaly Kuznetsov         .desc = "direct TLB flush (hv-tlbflush-direct)",
10933aae0854SVitaly Kuznetsov         .flags = {
10943aae0854SVitaly Kuznetsov             {.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX,
10953aae0854SVitaly Kuznetsov              .bits = HV_NESTED_DIRECT_FLUSH}
10963aae0854SVitaly Kuznetsov         },
10973aae0854SVitaly Kuznetsov         .dependencies = BIT(HYPERV_FEAT_VAPIC)
10983aae0854SVitaly Kuznetsov     },
1099a9dc68d9SClaudio Fontana };
1100a9dc68d9SClaudio Fontana 
try_get_hv_cpuid(CPUState * cs,int max,bool do_sys_ioctl)11012e905438SVitaly Kuznetsov static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
11022e905438SVitaly Kuznetsov                                            bool do_sys_ioctl)
1103a9dc68d9SClaudio Fontana {
1104a9dc68d9SClaudio Fontana     struct kvm_cpuid2 *cpuid;
1105a9dc68d9SClaudio Fontana     int r, size;
1106a9dc68d9SClaudio Fontana 
1107a9dc68d9SClaudio Fontana     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
1108a9dc68d9SClaudio Fontana     cpuid = g_malloc0(size);
1109a9dc68d9SClaudio Fontana     cpuid->nent = max;
1110a9dc68d9SClaudio Fontana 
11112e905438SVitaly Kuznetsov     if (do_sys_ioctl) {
11122e905438SVitaly Kuznetsov         r = kvm_ioctl(kvm_state, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
11132e905438SVitaly Kuznetsov     } else {
1114a9dc68d9SClaudio Fontana         r = kvm_vcpu_ioctl(cs, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
11152e905438SVitaly Kuznetsov     }
1116a9dc68d9SClaudio Fontana     if (r == 0 && cpuid->nent >= max) {
1117a9dc68d9SClaudio Fontana         r = -E2BIG;
1118a9dc68d9SClaudio Fontana     }
1119a9dc68d9SClaudio Fontana     if (r < 0) {
1120a9dc68d9SClaudio Fontana         if (r == -E2BIG) {
1121a9dc68d9SClaudio Fontana             g_free(cpuid);
1122a9dc68d9SClaudio Fontana             return NULL;
1123a9dc68d9SClaudio Fontana         } else {
1124a9dc68d9SClaudio Fontana             fprintf(stderr, "KVM_GET_SUPPORTED_HV_CPUID failed: %s\n",
1125a9dc68d9SClaudio Fontana                     strerror(-r));
1126a9dc68d9SClaudio Fontana             exit(1);
1127a9dc68d9SClaudio Fontana         }
1128a9dc68d9SClaudio Fontana     }
1129a9dc68d9SClaudio Fontana     return cpuid;
1130a9dc68d9SClaudio Fontana }
1131a9dc68d9SClaudio Fontana 
1132a9dc68d9SClaudio Fontana /*
1133a9dc68d9SClaudio Fontana  * Run KVM_GET_SUPPORTED_HV_CPUID ioctl(), allocating a buffer large enough
1134a9dc68d9SClaudio Fontana  * for all entries.
1135a9dc68d9SClaudio Fontana  */
get_supported_hv_cpuid(CPUState * cs)1136a9dc68d9SClaudio Fontana static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
1137a9dc68d9SClaudio Fontana {
1138a9dc68d9SClaudio Fontana     struct kvm_cpuid2 *cpuid;
113973d24074SJon Doron     /* 0x40000000..0x40000005, 0x4000000A, 0x40000080..0x40000082 leaves */
114073d24074SJon Doron     int max = 11;
1141decb4f20SVitaly Kuznetsov     int i;
11422e905438SVitaly Kuznetsov     bool do_sys_ioctl;
11432e905438SVitaly Kuznetsov 
11442e905438SVitaly Kuznetsov     do_sys_ioctl =
11452e905438SVitaly Kuznetsov         kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID) > 0;
1146a9dc68d9SClaudio Fontana 
1147a9dc68d9SClaudio Fontana     /*
1148e4adb09fSVitaly Kuznetsov      * Non-empty KVM context is needed when KVM_CAP_SYS_HYPERV_CPUID is
1149e4adb09fSVitaly Kuznetsov      * unsupported, kvm_hyperv_expand_features() checks for that.
1150e4adb09fSVitaly Kuznetsov      */
1151e4adb09fSVitaly Kuznetsov     assert(do_sys_ioctl || cs->kvm_state);
1152e4adb09fSVitaly Kuznetsov 
1153e4adb09fSVitaly Kuznetsov     /*
1154a9dc68d9SClaudio Fontana      * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with
1155a9dc68d9SClaudio Fontana      * -E2BIG, however, it doesn't report back the right size. Keep increasing
1156a9dc68d9SClaudio Fontana      * it and re-trying until we succeed.
1157a9dc68d9SClaudio Fontana      */
11582e905438SVitaly Kuznetsov     while ((cpuid = try_get_hv_cpuid(cs, max, do_sys_ioctl)) == NULL) {
1159a9dc68d9SClaudio Fontana         max++;
1160a9dc68d9SClaudio Fontana     }
1161decb4f20SVitaly Kuznetsov 
1162decb4f20SVitaly Kuznetsov     /*
1163decb4f20SVitaly Kuznetsov      * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before
1164decb4f20SVitaly Kuznetsov      * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the
1165decb4f20SVitaly Kuznetsov      * information early, just check for the capability and set the bit
1166decb4f20SVitaly Kuznetsov      * manually.
1167decb4f20SVitaly Kuznetsov      */
11682e905438SVitaly Kuznetsov     if (!do_sys_ioctl && kvm_check_extension(cs->kvm_state,
1169decb4f20SVitaly Kuznetsov                             KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
1170decb4f20SVitaly Kuznetsov         for (i = 0; i < cpuid->nent; i++) {
1171decb4f20SVitaly Kuznetsov             if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) {
1172decb4f20SVitaly Kuznetsov                 cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
1173decb4f20SVitaly Kuznetsov             }
1174decb4f20SVitaly Kuznetsov         }
1175decb4f20SVitaly Kuznetsov     }
1176decb4f20SVitaly Kuznetsov 
1177a9dc68d9SClaudio Fontana     return cpuid;
1178a9dc68d9SClaudio Fontana }
1179a9dc68d9SClaudio Fontana 
1180a9dc68d9SClaudio Fontana /*
1181a9dc68d9SClaudio Fontana  * When KVM_GET_SUPPORTED_HV_CPUID is not supported we fill CPUID feature
1182a9dc68d9SClaudio Fontana  * leaves from KVM_CAP_HYPERV* and present MSRs data.
1183a9dc68d9SClaudio Fontana  */
get_supported_hv_cpuid_legacy(CPUState * cs)1184a9dc68d9SClaudio Fontana static struct kvm_cpuid2 *get_supported_hv_cpuid_legacy(CPUState *cs)
1185a9dc68d9SClaudio Fontana {
1186a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
1187a9dc68d9SClaudio Fontana     struct kvm_cpuid2 *cpuid;
1188a9dc68d9SClaudio Fontana     struct kvm_cpuid_entry2 *entry_feat, *entry_recomm;
1189a9dc68d9SClaudio Fontana 
1190a9dc68d9SClaudio Fontana     /* HV_CPUID_FEATURES, HV_CPUID_ENLIGHTMENT_INFO */
1191a9dc68d9SClaudio Fontana     cpuid = g_malloc0(sizeof(*cpuid) + 2 * sizeof(*cpuid->entries));
1192a9dc68d9SClaudio Fontana     cpuid->nent = 2;
1193a9dc68d9SClaudio Fontana 
1194a9dc68d9SClaudio Fontana     /* HV_CPUID_VENDOR_AND_MAX_FUNCTIONS */
1195a9dc68d9SClaudio Fontana     entry_feat = &cpuid->entries[0];
1196a9dc68d9SClaudio Fontana     entry_feat->function = HV_CPUID_FEATURES;
1197a9dc68d9SClaudio Fontana 
1198a9dc68d9SClaudio Fontana     entry_recomm = &cpuid->entries[1];
1199a9dc68d9SClaudio Fontana     entry_recomm->function = HV_CPUID_ENLIGHTMENT_INFO;
1200a9dc68d9SClaudio Fontana     entry_recomm->ebx = cpu->hyperv_spinlock_attempts;
1201a9dc68d9SClaudio Fontana 
1202a9dc68d9SClaudio Fontana     if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0) {
1203a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_HYPERCALL_AVAILABLE;
1204a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_APIC_ACCESS_AVAILABLE;
1205a9dc68d9SClaudio Fontana         entry_feat->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
1206a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_RELAXED_TIMING_RECOMMENDED;
1207a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_APIC_ACCESS_RECOMMENDED;
1208a9dc68d9SClaudio Fontana     }
1209a9dc68d9SClaudio Fontana 
1210a9dc68d9SClaudio Fontana     if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
1211a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_TIME_REF_COUNT_AVAILABLE;
1212a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_REFERENCE_TSC_AVAILABLE;
1213a9dc68d9SClaudio Fontana     }
1214a9dc68d9SClaudio Fontana 
1215a9dc68d9SClaudio Fontana     if (has_msr_hv_frequencies) {
1216a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_ACCESS_FREQUENCY_MSRS;
1217a9dc68d9SClaudio Fontana         entry_feat->edx |= HV_FREQUENCY_MSRS_AVAILABLE;
1218a9dc68d9SClaudio Fontana     }
1219a9dc68d9SClaudio Fontana 
1220a9dc68d9SClaudio Fontana     if (has_msr_hv_crash) {
1221a9dc68d9SClaudio Fontana         entry_feat->edx |= HV_GUEST_CRASH_MSR_AVAILABLE;
1222a9dc68d9SClaudio Fontana     }
1223a9dc68d9SClaudio Fontana 
1224a9dc68d9SClaudio Fontana     if (has_msr_hv_reenlightenment) {
1225a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_ACCESS_REENLIGHTENMENTS_CONTROL;
1226a9dc68d9SClaudio Fontana     }
1227a9dc68d9SClaudio Fontana 
1228a9dc68d9SClaudio Fontana     if (has_msr_hv_reset) {
1229a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_RESET_AVAILABLE;
1230a9dc68d9SClaudio Fontana     }
1231a9dc68d9SClaudio Fontana 
1232a9dc68d9SClaudio Fontana     if (has_msr_hv_vpindex) {
1233a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_VP_INDEX_AVAILABLE;
1234a9dc68d9SClaudio Fontana     }
1235a9dc68d9SClaudio Fontana 
1236a9dc68d9SClaudio Fontana     if (has_msr_hv_runtime) {
1237a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_VP_RUNTIME_AVAILABLE;
1238a9dc68d9SClaudio Fontana     }
1239a9dc68d9SClaudio Fontana 
1240a9dc68d9SClaudio Fontana     if (has_msr_hv_synic) {
1241a9dc68d9SClaudio Fontana         unsigned int cap = cpu->hyperv_synic_kvm_only ?
1242a9dc68d9SClaudio Fontana             KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2;
1243a9dc68d9SClaudio Fontana 
1244a9dc68d9SClaudio Fontana         if (kvm_check_extension(cs->kvm_state, cap) > 0) {
1245a9dc68d9SClaudio Fontana             entry_feat->eax |= HV_SYNIC_AVAILABLE;
1246a9dc68d9SClaudio Fontana         }
1247a9dc68d9SClaudio Fontana     }
1248a9dc68d9SClaudio Fontana 
1249a9dc68d9SClaudio Fontana     if (has_msr_hv_stimer) {
1250a9dc68d9SClaudio Fontana         entry_feat->eax |= HV_SYNTIMERS_AVAILABLE;
1251a9dc68d9SClaudio Fontana     }
1252a9dc68d9SClaudio Fontana 
125373d24074SJon Doron     if (has_msr_hv_syndbg_options) {
125473d24074SJon Doron         entry_feat->edx |= HV_GUEST_DEBUGGING_AVAILABLE;
125573d24074SJon Doron         entry_feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
125673d24074SJon Doron         entry_feat->ebx |= HV_PARTITION_DEBUGGING_ALLOWED;
125773d24074SJon Doron     }
125873d24074SJon Doron 
1259a9dc68d9SClaudio Fontana     if (kvm_check_extension(cs->kvm_state,
1260a9dc68d9SClaudio Fontana                             KVM_CAP_HYPERV_TLBFLUSH) > 0) {
1261a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED;
1262a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
1263a9dc68d9SClaudio Fontana     }
1264a9dc68d9SClaudio Fontana 
1265a9dc68d9SClaudio Fontana     if (kvm_check_extension(cs->kvm_state,
1266a9dc68d9SClaudio Fontana                             KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
1267a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
1268a9dc68d9SClaudio Fontana     }
1269a9dc68d9SClaudio Fontana 
1270a9dc68d9SClaudio Fontana     if (kvm_check_extension(cs->kvm_state,
1271a9dc68d9SClaudio Fontana                             KVM_CAP_HYPERV_SEND_IPI) > 0) {
1272a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_CLUSTER_IPI_RECOMMENDED;
1273a9dc68d9SClaudio Fontana         entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
1274a9dc68d9SClaudio Fontana     }
1275a9dc68d9SClaudio Fontana 
1276a9dc68d9SClaudio Fontana     return cpuid;
1277a9dc68d9SClaudio Fontana }
1278a9dc68d9SClaudio Fontana 
hv_cpuid_get_host(CPUState * cs,uint32_t func,int reg)1279a8439be6SVitaly Kuznetsov static uint32_t hv_cpuid_get_host(CPUState *cs, uint32_t func, int reg)
1280e1a66a1eSVitaly Kuznetsov {
1281e1a66a1eSVitaly Kuznetsov     struct kvm_cpuid_entry2 *entry;
1282a8439be6SVitaly Kuznetsov     struct kvm_cpuid2 *cpuid;
1283a8439be6SVitaly Kuznetsov 
1284a8439be6SVitaly Kuznetsov     if (hv_cpuid_cache) {
1285a8439be6SVitaly Kuznetsov         cpuid = hv_cpuid_cache;
1286a8439be6SVitaly Kuznetsov     } else {
1287a8439be6SVitaly Kuznetsov         if (kvm_check_extension(kvm_state, KVM_CAP_HYPERV_CPUID) > 0) {
1288a8439be6SVitaly Kuznetsov             cpuid = get_supported_hv_cpuid(cs);
1289a8439be6SVitaly Kuznetsov         } else {
1290e4adb09fSVitaly Kuznetsov             /*
1291e4adb09fSVitaly Kuznetsov              * 'cs->kvm_state' may be NULL when Hyper-V features are expanded
1292e4adb09fSVitaly Kuznetsov              * before KVM context is created but this is only done when
1293e4adb09fSVitaly Kuznetsov              * KVM_CAP_SYS_HYPERV_CPUID is supported and it implies
1294e4adb09fSVitaly Kuznetsov              * KVM_CAP_HYPERV_CPUID.
1295e4adb09fSVitaly Kuznetsov              */
1296e4adb09fSVitaly Kuznetsov             assert(cs->kvm_state);
1297e4adb09fSVitaly Kuznetsov 
1298a8439be6SVitaly Kuznetsov             cpuid = get_supported_hv_cpuid_legacy(cs);
1299a8439be6SVitaly Kuznetsov         }
1300a8439be6SVitaly Kuznetsov         hv_cpuid_cache = cpuid;
1301a8439be6SVitaly Kuznetsov     }
1302a8439be6SVitaly Kuznetsov 
1303a8439be6SVitaly Kuznetsov     if (!cpuid) {
1304a8439be6SVitaly Kuznetsov         return 0;
1305a8439be6SVitaly Kuznetsov     }
1306e1a66a1eSVitaly Kuznetsov 
1307e1a66a1eSVitaly Kuznetsov     entry = cpuid_find_entry(cpuid, func, 0);
1308e1a66a1eSVitaly Kuznetsov     if (!entry) {
1309e1a66a1eSVitaly Kuznetsov         return 0;
1310e1a66a1eSVitaly Kuznetsov     }
1311e1a66a1eSVitaly Kuznetsov 
1312e1a66a1eSVitaly Kuznetsov     return cpuid_entry_get_reg(entry, reg);
1313e1a66a1eSVitaly Kuznetsov }
1314e1a66a1eSVitaly Kuznetsov 
hyperv_feature_supported(CPUState * cs,int feature)1315a8439be6SVitaly Kuznetsov static bool hyperv_feature_supported(CPUState *cs, int feature)
13167682f857SVitaly Kuznetsov {
1317061817a7SVitaly Kuznetsov     uint32_t func, bits;
1318061817a7SVitaly Kuznetsov     int i, reg;
13197682f857SVitaly Kuznetsov 
1320bbf3810fSVitaly Kuznetsov     /*
1321bbf3810fSVitaly Kuznetsov      * kvm_hyperv_properties needs to define at least one CPUID flag which
1322bbf3810fSVitaly Kuznetsov      * must be used to detect the feature, it's hard to say whether it is
1323bbf3810fSVitaly Kuznetsov      * supported or not otherwise.
1324bbf3810fSVitaly Kuznetsov      */
1325bbf3810fSVitaly Kuznetsov     assert(kvm_hyperv_properties[feature].flags[0].func);
1326bbf3810fSVitaly Kuznetsov 
13277682f857SVitaly Kuznetsov     for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) {
1328061817a7SVitaly Kuznetsov 
1329061817a7SVitaly Kuznetsov         func = kvm_hyperv_properties[feature].flags[i].func;
1330061817a7SVitaly Kuznetsov         reg = kvm_hyperv_properties[feature].flags[i].reg;
13317682f857SVitaly Kuznetsov         bits = kvm_hyperv_properties[feature].flags[i].bits;
13327682f857SVitaly Kuznetsov 
1333061817a7SVitaly Kuznetsov         if (!func) {
13347682f857SVitaly Kuznetsov             continue;
13357682f857SVitaly Kuznetsov         }
13367682f857SVitaly Kuznetsov 
1337a8439be6SVitaly Kuznetsov         if ((hv_cpuid_get_host(cs, func, reg) & bits) != bits) {
13387682f857SVitaly Kuznetsov             return false;
13397682f857SVitaly Kuznetsov         }
13407682f857SVitaly Kuznetsov     }
13417682f857SVitaly Kuznetsov 
13427682f857SVitaly Kuznetsov     return true;
13437682f857SVitaly Kuznetsov }
13447682f857SVitaly Kuznetsov 
13455ce48fa3SVitaly Kuznetsov /* Checks that all feature dependencies are enabled */
hv_feature_check_deps(X86CPU * cpu,int feature,Error ** errp)13465ce48fa3SVitaly Kuznetsov static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp)
1347a9dc68d9SClaudio Fontana {
1348a9dc68d9SClaudio Fontana     uint64_t deps;
13497682f857SVitaly Kuznetsov     int dep_feat;
1350a9dc68d9SClaudio Fontana 
1351a9dc68d9SClaudio Fontana     deps = kvm_hyperv_properties[feature].dependencies;
1352a9dc68d9SClaudio Fontana     while (deps) {
1353a9dc68d9SClaudio Fontana         dep_feat = ctz64(deps);
1354a9dc68d9SClaudio Fontana         if (!(hyperv_feat_enabled(cpu, dep_feat))) {
1355f4a62495SVitaly Kuznetsov             error_setg(errp, "Hyper-V %s requires Hyper-V %s",
1356a9dc68d9SClaudio Fontana                        kvm_hyperv_properties[feature].desc,
1357a9dc68d9SClaudio Fontana                        kvm_hyperv_properties[dep_feat].desc);
13585ce48fa3SVitaly Kuznetsov             return false;
1359a9dc68d9SClaudio Fontana         }
1360a9dc68d9SClaudio Fontana         deps &= ~(1ull << dep_feat);
1361a9dc68d9SClaudio Fontana     }
1362a9dc68d9SClaudio Fontana 
13635ce48fa3SVitaly Kuznetsov     return true;
1364a9dc68d9SClaudio Fontana }
1365a9dc68d9SClaudio Fontana 
hv_build_cpuid_leaf(CPUState * cs,uint32_t func,int reg)1366061817a7SVitaly Kuznetsov static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
1367c830015eSVitaly Kuznetsov {
1368c830015eSVitaly Kuznetsov     X86CPU *cpu = X86_CPU(cs);
1369c830015eSVitaly Kuznetsov     uint32_t r = 0;
1370c830015eSVitaly Kuznetsov     int i, j;
1371c830015eSVitaly Kuznetsov 
1372c830015eSVitaly Kuznetsov     for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties); i++) {
1373c830015eSVitaly Kuznetsov         if (!hyperv_feat_enabled(cpu, i)) {
1374c830015eSVitaly Kuznetsov             continue;
1375c830015eSVitaly Kuznetsov         }
1376c830015eSVitaly Kuznetsov 
1377c830015eSVitaly Kuznetsov         for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) {
1378061817a7SVitaly Kuznetsov             if (kvm_hyperv_properties[i].flags[j].func != func) {
1379061817a7SVitaly Kuznetsov                 continue;
1380061817a7SVitaly Kuznetsov             }
1381061817a7SVitaly Kuznetsov             if (kvm_hyperv_properties[i].flags[j].reg != reg) {
1382c830015eSVitaly Kuznetsov                 continue;
1383c830015eSVitaly Kuznetsov             }
1384c830015eSVitaly Kuznetsov 
1385c830015eSVitaly Kuznetsov             r |= kvm_hyperv_properties[i].flags[j].bits;
1386c830015eSVitaly Kuznetsov         }
1387c830015eSVitaly Kuznetsov     }
1388c830015eSVitaly Kuznetsov 
13897110fe56SVitaly Kuznetsov     /* HV_CPUID_NESTED_FEATURES.EAX also encodes the supported eVMCS range */
13907110fe56SVitaly Kuznetsov     if (func == HV_CPUID_NESTED_FEATURES && reg == R_EAX) {
13917110fe56SVitaly Kuznetsov         if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
13927110fe56SVitaly Kuznetsov             r |= DEFAULT_EVMCS_VERSION;
13937110fe56SVitaly Kuznetsov         }
13947110fe56SVitaly Kuznetsov     }
13957110fe56SVitaly Kuznetsov 
1396c830015eSVitaly Kuznetsov     return r;
1397c830015eSVitaly Kuznetsov }
1398c830015eSVitaly Kuznetsov 
1399a9dc68d9SClaudio Fontana /*
1400f6e01ab5SVitaly Kuznetsov  * Expand Hyper-V CPU features. In partucular, check that all the requested
1401f6e01ab5SVitaly Kuznetsov  * features are supported by the host and the sanity of the configuration
1402f6e01ab5SVitaly Kuznetsov  * (that all the required dependencies are included). Also, this takes care
1403f6e01ab5SVitaly Kuznetsov  * of 'hv_passthrough' mode and fills the environment with all supported
1404f6e01ab5SVitaly Kuznetsov  * Hyper-V features.
1405a9dc68d9SClaudio Fontana  */
kvm_hyperv_expand_features(X86CPU * cpu,Error ** errp)1406071ce4b0SVitaly Kuznetsov bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
1407a9dc68d9SClaudio Fontana {
1408071ce4b0SVitaly Kuznetsov     CPUState *cs = CPU(cpu);
14095ce48fa3SVitaly Kuznetsov     Error *local_err = NULL;
14105ce48fa3SVitaly Kuznetsov     int feat;
1411a9dc68d9SClaudio Fontana 
1412a9dc68d9SClaudio Fontana     if (!hyperv_enabled(cpu))
1413d7652b77SVitaly Kuznetsov         return true;
1414a9dc68d9SClaudio Fontana 
1415071ce4b0SVitaly Kuznetsov     /*
1416071ce4b0SVitaly Kuznetsov      * When kvm_hyperv_expand_features is called at CPU feature expansion
1417071ce4b0SVitaly Kuznetsov      * time per-CPU kvm_state is not available yet so we can only proceed
1418071ce4b0SVitaly Kuznetsov      * when KVM_CAP_SYS_HYPERV_CPUID is supported.
1419071ce4b0SVitaly Kuznetsov      */
1420071ce4b0SVitaly Kuznetsov     if (!cs->kvm_state &&
1421071ce4b0SVitaly Kuznetsov         !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID))
1422071ce4b0SVitaly Kuznetsov         return true;
1423071ce4b0SVitaly Kuznetsov 
1424a9dc68d9SClaudio Fontana     if (cpu->hyperv_passthrough) {
1425e1a66a1eSVitaly Kuznetsov         cpu->hyperv_vendor_id[0] =
1426a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX);
1427e1a66a1eSVitaly Kuznetsov         cpu->hyperv_vendor_id[1] =
1428a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_ECX);
1429e1a66a1eSVitaly Kuznetsov         cpu->hyperv_vendor_id[2] =
1430a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EDX);
14314519259aSVitaly Kuznetsov         cpu->hyperv_vendor = g_realloc(cpu->hyperv_vendor,
14324519259aSVitaly Kuznetsov                                        sizeof(cpu->hyperv_vendor_id) + 1);
14334519259aSVitaly Kuznetsov         memcpy(cpu->hyperv_vendor, cpu->hyperv_vendor_id,
14344519259aSVitaly Kuznetsov                sizeof(cpu->hyperv_vendor_id));
14354519259aSVitaly Kuznetsov         cpu->hyperv_vendor[sizeof(cpu->hyperv_vendor_id)] = 0;
143608856771SVitaly Kuznetsov 
1437e1a66a1eSVitaly Kuznetsov         cpu->hyperv_interface_id[0] =
1438a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EAX);
1439e1a66a1eSVitaly Kuznetsov         cpu->hyperv_interface_id[1] =
1440a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EBX);
1441e1a66a1eSVitaly Kuznetsov         cpu->hyperv_interface_id[2] =
1442a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_ECX);
1443e1a66a1eSVitaly Kuznetsov         cpu->hyperv_interface_id[3] =
1444a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EDX);
1445735db465SVitaly Kuznetsov 
1446af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_build =
1447a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EAX);
1448af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_major =
1449af7228b8SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) >> 16;
1450af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_minor =
1451af7228b8SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) & 0xffff;
1452af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_sp =
1453a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_ECX);
1454af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_sb =
1455af7228b8SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) >> 24;
1456af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_sn =
1457af7228b8SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) & 0xffffff;
1458fb7e31aaSVitaly Kuznetsov 
1459a8439be6SVitaly Kuznetsov         cpu->hv_max_vps = hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS,
1460e1a66a1eSVitaly Kuznetsov                                             R_EAX);
1461e1a66a1eSVitaly Kuznetsov         cpu->hyperv_limits[0] =
1462a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EBX);
1463e1a66a1eSVitaly Kuznetsov         cpu->hyperv_limits[1] =
1464a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_ECX);
1465e1a66a1eSVitaly Kuznetsov         cpu->hyperv_limits[2] =
1466a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EDX);
146723eb5d03SVitaly Kuznetsov 
1468e1a66a1eSVitaly Kuznetsov         cpu->hyperv_spinlock_attempts =
1469a8439be6SVitaly Kuznetsov             hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX);
14705ce48fa3SVitaly Kuznetsov 
14715ce48fa3SVitaly Kuznetsov         /*
14725ce48fa3SVitaly Kuznetsov          * Mark feature as enabled in 'cpu->hyperv_features' as
14735ce48fa3SVitaly Kuznetsov          * hv_build_cpuid_leaf() uses this info to build guest CPUIDs.
14745ce48fa3SVitaly Kuznetsov          */
14755ce48fa3SVitaly Kuznetsov         for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
14767d7b9c76SVitaly Kuznetsov             if (hyperv_feature_supported(cs, feat) &&
14777d7b9c76SVitaly Kuznetsov                 !kvm_hyperv_properties[feat].skip_passthrough) {
14785ce48fa3SVitaly Kuznetsov                 cpu->hyperv_features |= BIT(feat);
14795ce48fa3SVitaly Kuznetsov             }
14805ce48fa3SVitaly Kuznetsov         }
14815ce48fa3SVitaly Kuznetsov     } else {
14825ce48fa3SVitaly Kuznetsov         /* Check features availability and dependencies */
14835ce48fa3SVitaly Kuznetsov         for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
14845ce48fa3SVitaly Kuznetsov             /* If the feature was not requested skip it. */
14855ce48fa3SVitaly Kuznetsov             if (!hyperv_feat_enabled(cpu, feat)) {
14865ce48fa3SVitaly Kuznetsov                 continue;
1487a9dc68d9SClaudio Fontana             }
1488a9dc68d9SClaudio Fontana 
14895ce48fa3SVitaly Kuznetsov             /* Check if the feature is supported by KVM */
14905ce48fa3SVitaly Kuznetsov             if (!hyperv_feature_supported(cs, feat)) {
14915ce48fa3SVitaly Kuznetsov                 error_setg(errp, "Hyper-V %s is not supported by kernel",
14925ce48fa3SVitaly Kuznetsov                            kvm_hyperv_properties[feat].desc);
1493d7652b77SVitaly Kuznetsov                 return false;
1494f4a62495SVitaly Kuznetsov             }
14955ce48fa3SVitaly Kuznetsov 
14965ce48fa3SVitaly Kuznetsov             /* Check dependencies */
14975ce48fa3SVitaly Kuznetsov             if (!hv_feature_check_deps(cpu, feat, &local_err)) {
14985ce48fa3SVitaly Kuznetsov                 error_propagate(errp, local_err);
1499d7652b77SVitaly Kuznetsov                 return false;
1500f4a62495SVitaly Kuznetsov             }
1501f4a62495SVitaly Kuznetsov         }
1502f4a62495SVitaly Kuznetsov     }
1503a9dc68d9SClaudio Fontana 
1504a9dc68d9SClaudio Fontana     /* Additional dependencies not covered by kvm_hyperv_properties[] */
1505a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
1506a9dc68d9SClaudio Fontana         !cpu->hyperv_synic_kvm_only &&
1507a9dc68d9SClaudio Fontana         !hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)) {
1508f4a62495SVitaly Kuznetsov         error_setg(errp, "Hyper-V %s requires Hyper-V %s",
1509a9dc68d9SClaudio Fontana                    kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc,
1510a9dc68d9SClaudio Fontana                    kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc);
1511d7652b77SVitaly Kuznetsov         return false;
1512a9dc68d9SClaudio Fontana     }
1513d7652b77SVitaly Kuznetsov 
1514d7652b77SVitaly Kuznetsov     return true;
1515f6e01ab5SVitaly Kuznetsov }
1516f6e01ab5SVitaly Kuznetsov 
1517f6e01ab5SVitaly Kuznetsov /*
1518f6e01ab5SVitaly Kuznetsov  * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent.
1519f6e01ab5SVitaly Kuznetsov  */
hyperv_fill_cpuids(CPUState * cs,struct kvm_cpuid_entry2 * cpuid_ent)1520f6e01ab5SVitaly Kuznetsov static int hyperv_fill_cpuids(CPUState *cs,
1521f6e01ab5SVitaly Kuznetsov                               struct kvm_cpuid_entry2 *cpuid_ent)
1522f6e01ab5SVitaly Kuznetsov {
1523f6e01ab5SVitaly Kuznetsov     X86CPU *cpu = X86_CPU(cs);
1524f6e01ab5SVitaly Kuznetsov     struct kvm_cpuid_entry2 *c;
152573d24074SJon Doron     uint32_t signature[3];
152673d24074SJon Doron     uint32_t cpuid_i = 0, max_cpuid_leaf = 0;
15277110fe56SVitaly Kuznetsov     uint32_t nested_eax =
15287110fe56SVitaly Kuznetsov         hv_build_cpuid_leaf(cs, HV_CPUID_NESTED_FEATURES, R_EAX);
152973d24074SJon Doron 
15307110fe56SVitaly Kuznetsov     max_cpuid_leaf = nested_eax ? HV_CPUID_NESTED_FEATURES :
15317110fe56SVitaly Kuznetsov         HV_CPUID_IMPLEMENT_LIMITS;
153273d24074SJon Doron 
153373d24074SJon Doron     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
153473d24074SJon Doron         max_cpuid_leaf =
153573d24074SJon Doron             MAX(max_cpuid_leaf, HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
153673d24074SJon Doron     }
1537f6e01ab5SVitaly Kuznetsov 
1538a9dc68d9SClaudio Fontana     c = &cpuid_ent[cpuid_i++];
1539a9dc68d9SClaudio Fontana     c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
154073d24074SJon Doron     c->eax = max_cpuid_leaf;
154108856771SVitaly Kuznetsov     c->ebx = cpu->hyperv_vendor_id[0];
154208856771SVitaly Kuznetsov     c->ecx = cpu->hyperv_vendor_id[1];
154308856771SVitaly Kuznetsov     c->edx = cpu->hyperv_vendor_id[2];
1544a9dc68d9SClaudio Fontana 
1545a9dc68d9SClaudio Fontana     c = &cpuid_ent[cpuid_i++];
1546a9dc68d9SClaudio Fontana     c->function = HV_CPUID_INTERFACE;
1547735db465SVitaly Kuznetsov     c->eax = cpu->hyperv_interface_id[0];
1548735db465SVitaly Kuznetsov     c->ebx = cpu->hyperv_interface_id[1];
1549735db465SVitaly Kuznetsov     c->ecx = cpu->hyperv_interface_id[2];
1550735db465SVitaly Kuznetsov     c->edx = cpu->hyperv_interface_id[3];
1551a9dc68d9SClaudio Fontana 
1552a9dc68d9SClaudio Fontana     c = &cpuid_ent[cpuid_i++];
1553a9dc68d9SClaudio Fontana     c->function = HV_CPUID_VERSION;
1554af7228b8SVitaly Kuznetsov     c->eax = cpu->hyperv_ver_id_build;
1555af7228b8SVitaly Kuznetsov     c->ebx = (uint32_t)cpu->hyperv_ver_id_major << 16 |
1556af7228b8SVitaly Kuznetsov         cpu->hyperv_ver_id_minor;
1557af7228b8SVitaly Kuznetsov     c->ecx = cpu->hyperv_ver_id_sp;
1558af7228b8SVitaly Kuznetsov     c->edx = (uint32_t)cpu->hyperv_ver_id_sb << 24 |
1559af7228b8SVitaly Kuznetsov         (cpu->hyperv_ver_id_sn & 0xffffff);
1560a9dc68d9SClaudio Fontana 
1561a9dc68d9SClaudio Fontana     c = &cpuid_ent[cpuid_i++];
1562a9dc68d9SClaudio Fontana     c->function = HV_CPUID_FEATURES;
1563061817a7SVitaly Kuznetsov     c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EAX);
1564061817a7SVitaly Kuznetsov     c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX);
1565061817a7SVitaly Kuznetsov     c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX);
1566c830015eSVitaly Kuznetsov 
1567b26f68c3SVitaly Kuznetsov     /* Unconditionally required with any Hyper-V enlightenment */
1568b26f68c3SVitaly Kuznetsov     c->eax |= HV_HYPERCALL_AVAILABLE;
1569b26f68c3SVitaly Kuznetsov 
1570cce087f6SVitaly Kuznetsov     /* SynIC and Vmbus devices require messages/signals hypercalls */
1571cce087f6SVitaly Kuznetsov     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
1572cce087f6SVitaly Kuznetsov         !cpu->hyperv_synic_kvm_only) {
1573cce087f6SVitaly Kuznetsov         c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS;
1574cce087f6SVitaly Kuznetsov     }
1575cce087f6SVitaly Kuznetsov 
157605071629SVitaly Kuznetsov 
1577c830015eSVitaly Kuznetsov     /* Not exposed by KVM but needed to make CPU hotplug in Windows work */
1578c830015eSVitaly Kuznetsov     c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
1579a9dc68d9SClaudio Fontana 
1580a9dc68d9SClaudio Fontana     c = &cpuid_ent[cpuid_i++];
1581a9dc68d9SClaudio Fontana     c->function = HV_CPUID_ENLIGHTMENT_INFO;
1582061817a7SVitaly Kuznetsov     c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX);
1583a9dc68d9SClaudio Fontana     c->ebx = cpu->hyperv_spinlock_attempts;
1584a9dc68d9SClaudio Fontana 
1585e1f9a8e8SVitaly Kuznetsov     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) &&
1586e1f9a8e8SVitaly Kuznetsov         !hyperv_feat_enabled(cpu, HYPERV_FEAT_AVIC)) {
158705071629SVitaly Kuznetsov         c->eax |= HV_APIC_ACCESS_RECOMMENDED;
158805071629SVitaly Kuznetsov     }
158905071629SVitaly Kuznetsov 
1590c830015eSVitaly Kuznetsov     if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_ON) {
1591c830015eSVitaly Kuznetsov         c->eax |= HV_NO_NONARCH_CORESHARING;
1592c830015eSVitaly Kuznetsov     } else if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO) {
1593a8439be6SVitaly Kuznetsov         c->eax |= hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX) &
1594e1a66a1eSVitaly Kuznetsov             HV_NO_NONARCH_CORESHARING;
1595c830015eSVitaly Kuznetsov     }
1596c830015eSVitaly Kuznetsov 
1597a9dc68d9SClaudio Fontana     c = &cpuid_ent[cpuid_i++];
1598a9dc68d9SClaudio Fontana     c->function = HV_CPUID_IMPLEMENT_LIMITS;
1599a9dc68d9SClaudio Fontana     c->eax = cpu->hv_max_vps;
160023eb5d03SVitaly Kuznetsov     c->ebx = cpu->hyperv_limits[0];
160123eb5d03SVitaly Kuznetsov     c->ecx = cpu->hyperv_limits[1];
160223eb5d03SVitaly Kuznetsov     c->edx = cpu->hyperv_limits[2];
1603a9dc68d9SClaudio Fontana 
16047110fe56SVitaly Kuznetsov     if (nested_eax) {
1605dc7d6cafSPhilippe Mathieu-Daudé         uint32_t function;
1606a9dc68d9SClaudio Fontana 
1607a9dc68d9SClaudio Fontana         /* Create zeroed 0x40000006..0x40000009 leaves */
1608a9dc68d9SClaudio Fontana         for (function = HV_CPUID_IMPLEMENT_LIMITS + 1;
1609a9dc68d9SClaudio Fontana              function < HV_CPUID_NESTED_FEATURES; function++) {
1610a9dc68d9SClaudio Fontana             c = &cpuid_ent[cpuid_i++];
1611a9dc68d9SClaudio Fontana             c->function = function;
1612a9dc68d9SClaudio Fontana         }
1613a9dc68d9SClaudio Fontana 
1614a9dc68d9SClaudio Fontana         c = &cpuid_ent[cpuid_i++];
1615a9dc68d9SClaudio Fontana         c->function = HV_CPUID_NESTED_FEATURES;
16167110fe56SVitaly Kuznetsov         c->eax = nested_eax;
1617a9dc68d9SClaudio Fontana     }
1618a9dc68d9SClaudio Fontana 
161973d24074SJon Doron     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
162073d24074SJon Doron         c = &cpuid_ent[cpuid_i++];
162173d24074SJon Doron         c->function = HV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS;
162273d24074SJon Doron         c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ?
162373d24074SJon Doron             HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
162473d24074SJon Doron         memcpy(signature, "Microsoft VS", 12);
162573d24074SJon Doron         c->eax = 0;
162673d24074SJon Doron         c->ebx = signature[0];
162773d24074SJon Doron         c->ecx = signature[1];
162873d24074SJon Doron         c->edx = signature[2];
162973d24074SJon Doron 
163073d24074SJon Doron         c = &cpuid_ent[cpuid_i++];
163173d24074SJon Doron         c->function = HV_CPUID_SYNDBG_INTERFACE;
163273d24074SJon Doron         memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
163373d24074SJon Doron         c->eax = signature[0];
163473d24074SJon Doron         c->ebx = 0;
163573d24074SJon Doron         c->ecx = 0;
163673d24074SJon Doron         c->edx = 0;
163773d24074SJon Doron 
163873d24074SJon Doron         c = &cpuid_ent[cpuid_i++];
163973d24074SJon Doron         c->function = HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
164073d24074SJon Doron         c->eax = HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
164173d24074SJon Doron         c->ebx = 0;
164273d24074SJon Doron         c->ecx = 0;
164373d24074SJon Doron         c->edx = 0;
164473d24074SJon Doron     }
164573d24074SJon Doron 
1646a8439be6SVitaly Kuznetsov     return cpuid_i;
1647a9dc68d9SClaudio Fontana }
1648a9dc68d9SClaudio Fontana 
1649a9dc68d9SClaudio Fontana static Error *hv_passthrough_mig_blocker;
1650a9dc68d9SClaudio Fontana static Error *hv_no_nonarch_cs_mig_blocker;
1651a9dc68d9SClaudio Fontana 
165207454e2eSVitaly Kuznetsov /* Checks that the exposed eVMCS version range is supported by KVM */
evmcs_version_supported(uint16_t evmcs_version,uint16_t supported_evmcs_version)165307454e2eSVitaly Kuznetsov static bool evmcs_version_supported(uint16_t evmcs_version,
165407454e2eSVitaly Kuznetsov                                     uint16_t supported_evmcs_version)
165507454e2eSVitaly Kuznetsov {
165607454e2eSVitaly Kuznetsov     uint8_t min_version = evmcs_version & 0xff;
165707454e2eSVitaly Kuznetsov     uint8_t max_version = evmcs_version >> 8;
165807454e2eSVitaly Kuznetsov     uint8_t min_supported_version = supported_evmcs_version & 0xff;
165907454e2eSVitaly Kuznetsov     uint8_t max_supported_version = supported_evmcs_version >> 8;
166007454e2eSVitaly Kuznetsov 
166107454e2eSVitaly Kuznetsov     return (min_version >= min_supported_version) &&
166207454e2eSVitaly Kuznetsov         (max_version <= max_supported_version);
166307454e2eSVitaly Kuznetsov }
166407454e2eSVitaly Kuznetsov 
hyperv_init_vcpu(X86CPU * cpu)1665a9dc68d9SClaudio Fontana static int hyperv_init_vcpu(X86CPU *cpu)
1666a9dc68d9SClaudio Fontana {
1667a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
1668a9dc68d9SClaudio Fontana     Error *local_err = NULL;
1669a9dc68d9SClaudio Fontana     int ret;
1670a9dc68d9SClaudio Fontana 
1671a9dc68d9SClaudio Fontana     if (cpu->hyperv_passthrough && hv_passthrough_mig_blocker == NULL) {
1672a9dc68d9SClaudio Fontana         error_setg(&hv_passthrough_mig_blocker,
1673a9dc68d9SClaudio Fontana                    "'hv-passthrough' CPU flag prevents migration, use explicit"
1674a9dc68d9SClaudio Fontana                    " set of hv-* flags instead");
1675c8a7fc51SSteve Sistare         ret = migrate_add_blocker(&hv_passthrough_mig_blocker, &local_err);
1676436c831aSMarkus Armbruster         if (ret < 0) {
1677a9dc68d9SClaudio Fontana             error_report_err(local_err);
1678a9dc68d9SClaudio Fontana             return ret;
1679a9dc68d9SClaudio Fontana         }
1680a9dc68d9SClaudio Fontana     }
1681a9dc68d9SClaudio Fontana 
1682a9dc68d9SClaudio Fontana     if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO &&
1683a9dc68d9SClaudio Fontana         hv_no_nonarch_cs_mig_blocker == NULL) {
1684a9dc68d9SClaudio Fontana         error_setg(&hv_no_nonarch_cs_mig_blocker,
1685a9dc68d9SClaudio Fontana                    "'hv-no-nonarch-coresharing=auto' CPU flag prevents migration"
1686a9dc68d9SClaudio Fontana                    " use explicit 'hv-no-nonarch-coresharing=on' instead (but"
1687a9dc68d9SClaudio Fontana                    " make sure SMT is disabled and/or that vCPUs are properly"
1688a9dc68d9SClaudio Fontana                    " pinned)");
1689c8a7fc51SSteve Sistare         ret = migrate_add_blocker(&hv_no_nonarch_cs_mig_blocker, &local_err);
1690436c831aSMarkus Armbruster         if (ret < 0) {
1691a9dc68d9SClaudio Fontana             error_report_err(local_err);
1692a9dc68d9SClaudio Fontana             return ret;
1693a9dc68d9SClaudio Fontana         }
1694a9dc68d9SClaudio Fontana     }
1695a9dc68d9SClaudio Fontana 
1696a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && !hv_vpindex_settable) {
1697a9dc68d9SClaudio Fontana         /*
1698a9dc68d9SClaudio Fontana          * the kernel doesn't support setting vp_index; assert that its value
1699a9dc68d9SClaudio Fontana          * is in sync
1700a9dc68d9SClaudio Fontana          */
17015a778a5fSYang Weijiang         uint64_t value;
1702a9dc68d9SClaudio Fontana 
17035a778a5fSYang Weijiang         ret = kvm_get_one_msr(cpu, HV_X64_MSR_VP_INDEX, &value);
1704a9dc68d9SClaudio Fontana         if (ret < 0) {
1705a9dc68d9SClaudio Fontana             return ret;
1706a9dc68d9SClaudio Fontana         }
1707a9dc68d9SClaudio Fontana 
17085a778a5fSYang Weijiang         if (value != hyperv_vp_index(CPU(cpu))) {
1709a9dc68d9SClaudio Fontana             error_report("kernel's vp_index != QEMU's vp_index");
1710a9dc68d9SClaudio Fontana             return -ENXIO;
1711a9dc68d9SClaudio Fontana         }
1712a9dc68d9SClaudio Fontana     }
1713a9dc68d9SClaudio Fontana 
1714a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
1715a9dc68d9SClaudio Fontana         uint32_t synic_cap = cpu->hyperv_synic_kvm_only ?
1716a9dc68d9SClaudio Fontana             KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2;
1717a9dc68d9SClaudio Fontana         ret = kvm_vcpu_enable_cap(cs, synic_cap, 0);
1718a9dc68d9SClaudio Fontana         if (ret < 0) {
1719a9dc68d9SClaudio Fontana             error_report("failed to turn on HyperV SynIC in KVM: %s",
1720a9dc68d9SClaudio Fontana                          strerror(-ret));
1721a9dc68d9SClaudio Fontana             return ret;
1722a9dc68d9SClaudio Fontana         }
1723a9dc68d9SClaudio Fontana 
1724a9dc68d9SClaudio Fontana         if (!cpu->hyperv_synic_kvm_only) {
1725a9dc68d9SClaudio Fontana             ret = hyperv_x86_synic_add(cpu);
1726a9dc68d9SClaudio Fontana             if (ret < 0) {
1727a9dc68d9SClaudio Fontana                 error_report("failed to create HyperV SynIC: %s",
1728a9dc68d9SClaudio Fontana                              strerror(-ret));
1729a9dc68d9SClaudio Fontana                 return ret;
1730a9dc68d9SClaudio Fontana             }
1731a9dc68d9SClaudio Fontana         }
1732a9dc68d9SClaudio Fontana     }
1733a9dc68d9SClaudio Fontana 
1734decb4f20SVitaly Kuznetsov     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
173507454e2eSVitaly Kuznetsov         uint16_t evmcs_version = DEFAULT_EVMCS_VERSION;
173607454e2eSVitaly Kuznetsov         uint16_t supported_evmcs_version;
1737decb4f20SVitaly Kuznetsov 
1738decb4f20SVitaly Kuznetsov         ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
173907454e2eSVitaly Kuznetsov                                   (uintptr_t)&supported_evmcs_version);
1740decb4f20SVitaly Kuznetsov 
174107454e2eSVitaly Kuznetsov         /*
174207454e2eSVitaly Kuznetsov          * KVM is required to support EVMCS ver.1. as that's what 'hv-evmcs'
174307454e2eSVitaly Kuznetsov          * option sets. Note: we hardcode the maximum supported eVMCS version
174407454e2eSVitaly Kuznetsov          * to '1' as well so 'hv-evmcs' feature is migratable even when (and if)
174507454e2eSVitaly Kuznetsov          * ver.2 is implemented. A new option (e.g. 'hv-evmcs=2') will then have
174607454e2eSVitaly Kuznetsov          * to be added.
174707454e2eSVitaly Kuznetsov          */
1748decb4f20SVitaly Kuznetsov         if (ret < 0) {
174907454e2eSVitaly Kuznetsov             error_report("Hyper-V %s is not supported by kernel",
1750decb4f20SVitaly Kuznetsov                          kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
1751decb4f20SVitaly Kuznetsov             return ret;
1752decb4f20SVitaly Kuznetsov         }
1753decb4f20SVitaly Kuznetsov 
175407454e2eSVitaly Kuznetsov         if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) {
175507454e2eSVitaly Kuznetsov             error_report("eVMCS version range [%d..%d] is not supported by "
175607454e2eSVitaly Kuznetsov                          "kernel (supported: [%d..%d])", evmcs_version & 0xff,
175707454e2eSVitaly Kuznetsov                          evmcs_version >> 8, supported_evmcs_version & 0xff,
175807454e2eSVitaly Kuznetsov                          supported_evmcs_version >> 8);
175907454e2eSVitaly Kuznetsov             return -ENOTSUP;
176007454e2eSVitaly Kuznetsov         }
1761decb4f20SVitaly Kuznetsov     }
1762decb4f20SVitaly Kuznetsov 
176370367f09SVitaly Kuznetsov     if (cpu->hyperv_enforce_cpuid) {
176470367f09SVitaly Kuznetsov         ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENFORCE_CPUID, 0, 1);
176570367f09SVitaly Kuznetsov         if (ret < 0) {
176670367f09SVitaly Kuznetsov             error_report("failed to enable KVM_CAP_HYPERV_ENFORCE_CPUID: %s",
176770367f09SVitaly Kuznetsov                          strerror(-ret));
176870367f09SVitaly Kuznetsov             return ret;
176970367f09SVitaly Kuznetsov         }
177070367f09SVitaly Kuznetsov     }
177170367f09SVitaly Kuznetsov 
17726093637bSMaciej S. Szmigiero     /* Skip SynIC and VP_INDEX since they are hard deps already */
17736093637bSMaciej S. Szmigiero     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_STIMER) &&
17746093637bSMaciej S. Szmigiero         hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) &&
17756093637bSMaciej S. Szmigiero         hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) {
17766093637bSMaciej S. Szmigiero         hyperv_x86_set_vmbus_recommended_features_enabled();
17776093637bSMaciej S. Szmigiero     }
17786093637bSMaciej S. Szmigiero 
1779a9dc68d9SClaudio Fontana     return 0;
1780a9dc68d9SClaudio Fontana }
1781a9dc68d9SClaudio Fontana 
1782a9dc68d9SClaudio Fontana static Error *invtsc_mig_blocker;
1783a9dc68d9SClaudio Fontana 
1784a9dc68d9SClaudio Fontana #define KVM_MAX_CPUID_ENTRIES  100
1785a9dc68d9SClaudio Fontana 
kvm_init_xsave(CPUX86State * env)1786e56dd3c7SJing Liu static void kvm_init_xsave(CPUX86State *env)
1787e56dd3c7SJing Liu {
1788e56dd3c7SJing Liu     if (has_xsave2) {
1789e56dd3c7SJing Liu         env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
1790e56dd3c7SJing Liu     } else {
17918bba0a3bSPaolo Bonzini         env->xsave_buf_len = sizeof(struct kvm_xsave);
1792e56dd3c7SJing Liu     }
1793e56dd3c7SJing Liu 
1794e56dd3c7SJing Liu     env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
1795e56dd3c7SJing Liu     memset(env->xsave_buf, 0, env->xsave_buf_len);
1796e56dd3c7SJing Liu     /*
1797e56dd3c7SJing Liu      * The allocated storage must be large enough for all of the
1798e56dd3c7SJing Liu      * possible XSAVE state components.
1799e56dd3c7SJing Liu      */
1800e56dd3c7SJing Liu     assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
1801e56dd3c7SJing Liu            env->xsave_buf_len);
1802e56dd3c7SJing Liu }
1803e56dd3c7SJing Liu 
kvm_init_nested_state(CPUX86State * env)18043cafdb67SVitaly Kuznetsov static void kvm_init_nested_state(CPUX86State *env)
18053cafdb67SVitaly Kuznetsov {
18063cafdb67SVitaly Kuznetsov     struct kvm_vmx_nested_state_hdr *vmx_hdr;
18073cafdb67SVitaly Kuznetsov     uint32_t size;
18083cafdb67SVitaly Kuznetsov 
18093cafdb67SVitaly Kuznetsov     if (!env->nested_state) {
18103cafdb67SVitaly Kuznetsov         return;
18113cafdb67SVitaly Kuznetsov     }
18123cafdb67SVitaly Kuznetsov 
18133cafdb67SVitaly Kuznetsov     size = env->nested_state->size;
18143cafdb67SVitaly Kuznetsov 
18153cafdb67SVitaly Kuznetsov     memset(env->nested_state, 0, size);
18163cafdb67SVitaly Kuznetsov     env->nested_state->size = size;
18173cafdb67SVitaly Kuznetsov 
18183cafdb67SVitaly Kuznetsov     if (cpu_has_vmx(env)) {
18193cafdb67SVitaly Kuznetsov         env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
18203cafdb67SVitaly Kuznetsov         vmx_hdr = &env->nested_state->hdr.vmx;
18213cafdb67SVitaly Kuznetsov         vmx_hdr->vmxon_pa = -1ull;
18223cafdb67SVitaly Kuznetsov         vmx_hdr->vmcs12_pa = -1ull;
18233cafdb67SVitaly Kuznetsov     } else if (cpu_has_svm(env)) {
18243cafdb67SVitaly Kuznetsov         env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
18253cafdb67SVitaly Kuznetsov     }
18263cafdb67SVitaly Kuznetsov }
18273cafdb67SVitaly Kuznetsov 
kvm_x86_build_cpuid(CPUX86State * env,struct kvm_cpuid_entry2 * entries,uint32_t cpuid_i)1828a5acf4f2SSean Christopherson static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
1829a5acf4f2SSean Christopherson                                     struct kvm_cpuid_entry2 *entries,
1830a5acf4f2SSean Christopherson                                     uint32_t cpuid_i)
1831a5acf4f2SSean Christopherson {
1832a5acf4f2SSean Christopherson     uint32_t limit, i, j;
1833a5acf4f2SSean Christopherson     uint32_t unused;
1834a5acf4f2SSean Christopherson     struct kvm_cpuid_entry2 *c;
1835a5acf4f2SSean Christopherson 
1836a5acf4f2SSean Christopherson     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
1837a5acf4f2SSean Christopherson 
1838a5acf4f2SSean Christopherson     for (i = 0; i <= limit; i++) {
1839a5acf4f2SSean Christopherson         j = 0;
1840a5acf4f2SSean Christopherson         if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1841a5acf4f2SSean Christopherson             goto full;
1842a5acf4f2SSean Christopherson         }
1843a5acf4f2SSean Christopherson         c = &entries[cpuid_i++];
1844a5acf4f2SSean Christopherson         switch (i) {
1845a5acf4f2SSean Christopherson         case 2: {
1846a5acf4f2SSean Christopherson             /* Keep reading function 2 till all the input is received */
1847a5acf4f2SSean Christopherson             int times;
1848a5acf4f2SSean Christopherson 
1849a5acf4f2SSean Christopherson             c->function = i;
1850a5acf4f2SSean Christopherson             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1851a5acf4f2SSean Christopherson             times = c->eax & 0xff;
18525ab63914SXiaoyao Li             if (times > 1) {
18535ab63914SXiaoyao Li                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
18545ab63914SXiaoyao Li                            KVM_CPUID_FLAG_STATE_READ_NEXT;
18555ab63914SXiaoyao Li             }
1856a5acf4f2SSean Christopherson 
1857a5acf4f2SSean Christopherson             for (j = 1; j < times; ++j) {
1858a5acf4f2SSean Christopherson                 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1859a5acf4f2SSean Christopherson                     goto full;
1860a5acf4f2SSean Christopherson                 }
1861a5acf4f2SSean Christopherson                 c = &entries[cpuid_i++];
1862a5acf4f2SSean Christopherson                 c->function = i;
1863a5acf4f2SSean Christopherson                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
1864a5acf4f2SSean Christopherson                 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1865a5acf4f2SSean Christopherson             }
1866a5acf4f2SSean Christopherson             break;
1867a5acf4f2SSean Christopherson         }
1868a5acf4f2SSean Christopherson         case 0x1f:
18696ddeb0ecSZhao Liu             if (!x86_has_extended_topo(env->avail_cpu_topo)) {
1870a5acf4f2SSean Christopherson                 cpuid_i--;
1871a5acf4f2SSean Christopherson                 break;
1872a5acf4f2SSean Christopherson             }
1873a5acf4f2SSean Christopherson             /* fallthrough */
1874a5acf4f2SSean Christopherson         case 4:
1875a5acf4f2SSean Christopherson         case 0xb:
1876a5acf4f2SSean Christopherson         case 0xd:
1877a5acf4f2SSean Christopherson             for (j = 0; ; j++) {
1878a5acf4f2SSean Christopherson                 c->function = i;
1879a5acf4f2SSean Christopherson                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1880a5acf4f2SSean Christopherson                 c->index = j;
1881a5acf4f2SSean Christopherson                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1882a5acf4f2SSean Christopherson 
1883a5acf4f2SSean Christopherson                 if (i == 4 && c->eax == 0) {
1884a5acf4f2SSean Christopherson                     break;
1885a5acf4f2SSean Christopherson                 }
1886a5acf4f2SSean Christopherson                 if (i == 0xb && !(c->ecx & 0xff00)) {
1887a5acf4f2SSean Christopherson                     break;
1888a5acf4f2SSean Christopherson                 }
1889a5acf4f2SSean Christopherson                 if (i == 0x1f && !(c->ecx & 0xff00)) {
1890a5acf4f2SSean Christopherson                     break;
1891a5acf4f2SSean Christopherson                 }
1892a5acf4f2SSean Christopherson                 if (i == 0xd && c->eax == 0) {
189300c8a933SXiaoyao Li                     if (j < 63) {
1894a5acf4f2SSean Christopherson                         continue;
189500c8a933SXiaoyao Li                     } else {
189600c8a933SXiaoyao Li                         cpuid_i--;
189700c8a933SXiaoyao Li                         break;
189800c8a933SXiaoyao Li                     }
1899a5acf4f2SSean Christopherson                 }
1900a5acf4f2SSean Christopherson                 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1901a5acf4f2SSean Christopherson                     goto full;
1902a5acf4f2SSean Christopherson                 }
1903a5acf4f2SSean Christopherson                 c = &entries[cpuid_i++];
1904a5acf4f2SSean Christopherson             }
1905a5acf4f2SSean Christopherson             break;
1906a5acf4f2SSean Christopherson         case 0x12:
1907a5acf4f2SSean Christopherson             for (j = 0; ; j++) {
1908a5acf4f2SSean Christopherson                 c->function = i;
1909a5acf4f2SSean Christopherson                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1910a5acf4f2SSean Christopherson                 c->index = j;
1911a5acf4f2SSean Christopherson                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1912a5acf4f2SSean Christopherson 
1913a5acf4f2SSean Christopherson                 if (j > 1 && (c->eax & 0xf) != 1) {
1914a5acf4f2SSean Christopherson                     break;
1915a5acf4f2SSean Christopherson                 }
1916a5acf4f2SSean Christopherson 
1917a5acf4f2SSean Christopherson                 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1918a5acf4f2SSean Christopherson                     goto full;
1919a5acf4f2SSean Christopherson                 }
1920a5acf4f2SSean Christopherson                 c = &entries[cpuid_i++];
1921a5acf4f2SSean Christopherson             }
1922a5acf4f2SSean Christopherson             break;
1923a5acf4f2SSean Christopherson         case 0x7:
1924a5acf4f2SSean Christopherson         case 0x14:
1925a5acf4f2SSean Christopherson         case 0x1d:
1926bccfb846STao Su         case 0x1e:
1927bccfb846STao Su         case 0x24: {
1928a5acf4f2SSean Christopherson             uint32_t times;
1929a5acf4f2SSean Christopherson 
1930a5acf4f2SSean Christopherson             c->function = i;
1931a5acf4f2SSean Christopherson             c->index = 0;
1932a5acf4f2SSean Christopherson             c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1933a5acf4f2SSean Christopherson             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1934a5acf4f2SSean Christopherson             times = c->eax;
1935a5acf4f2SSean Christopherson 
1936a5acf4f2SSean Christopherson             for (j = 1; j <= times; ++j) {
1937a5acf4f2SSean Christopherson                 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1938a5acf4f2SSean Christopherson                     goto full;
1939a5acf4f2SSean Christopherson                 }
1940a5acf4f2SSean Christopherson                 c = &entries[cpuid_i++];
1941a5acf4f2SSean Christopherson                 c->function = i;
1942a5acf4f2SSean Christopherson                 c->index = j;
1943a5acf4f2SSean Christopherson                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1944a5acf4f2SSean Christopherson                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1945a5acf4f2SSean Christopherson             }
1946a5acf4f2SSean Christopherson             break;
1947a5acf4f2SSean Christopherson         }
1948a5acf4f2SSean Christopherson         default:
1949a5acf4f2SSean Christopherson             c->function = i;
1950a5acf4f2SSean Christopherson             c->flags = 0;
1951a5acf4f2SSean Christopherson             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1952a5acf4f2SSean Christopherson             if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
1953a5acf4f2SSean Christopherson                 /*
1954a5acf4f2SSean Christopherson                  * KVM already returns all zeroes if a CPUID entry is missing,
1955a5acf4f2SSean Christopherson                  * so we can omit it and avoid hitting KVM's 80-entry limit.
1956a5acf4f2SSean Christopherson                  */
1957a5acf4f2SSean Christopherson                 cpuid_i--;
1958a5acf4f2SSean Christopherson             }
1959a5acf4f2SSean Christopherson             break;
1960a5acf4f2SSean Christopherson         }
1961a5acf4f2SSean Christopherson     }
1962a5acf4f2SSean Christopherson 
1963a5acf4f2SSean Christopherson     if (limit >= 0x0a) {
1964a5acf4f2SSean Christopherson         uint32_t eax, edx;
1965a5acf4f2SSean Christopherson 
1966a5acf4f2SSean Christopherson         cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
1967a5acf4f2SSean Christopherson 
1968a5acf4f2SSean Christopherson         has_architectural_pmu_version = eax & 0xff;
1969a5acf4f2SSean Christopherson         if (has_architectural_pmu_version > 0) {
1970a5acf4f2SSean Christopherson             num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
1971a5acf4f2SSean Christopherson 
1972a5acf4f2SSean Christopherson             /* Shouldn't be more than 32, since that's the number of bits
1973a5acf4f2SSean Christopherson              * available in EBX to tell us _which_ counters are available.
1974a5acf4f2SSean Christopherson              * Play it safe.
1975a5acf4f2SSean Christopherson              */
1976a5acf4f2SSean Christopherson             if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
1977a5acf4f2SSean Christopherson                 num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
1978a5acf4f2SSean Christopherson             }
1979a5acf4f2SSean Christopherson 
1980a5acf4f2SSean Christopherson             if (has_architectural_pmu_version > 1) {
1981a5acf4f2SSean Christopherson                 num_architectural_pmu_fixed_counters = edx & 0x1f;
1982a5acf4f2SSean Christopherson 
1983a5acf4f2SSean Christopherson                 if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
1984a5acf4f2SSean Christopherson                     num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
1985a5acf4f2SSean Christopherson                 }
1986a5acf4f2SSean Christopherson             }
1987a5acf4f2SSean Christopherson         }
1988a5acf4f2SSean Christopherson     }
1989a5acf4f2SSean Christopherson 
1990a5acf4f2SSean Christopherson     cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
1991a5acf4f2SSean Christopherson 
1992a5acf4f2SSean Christopherson     for (i = 0x80000000; i <= limit; i++) {
1993a5acf4f2SSean Christopherson         j = 0;
1994a5acf4f2SSean Christopherson         if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1995a5acf4f2SSean Christopherson             goto full;
1996a5acf4f2SSean Christopherson         }
1997a5acf4f2SSean Christopherson         c = &entries[cpuid_i++];
1998a5acf4f2SSean Christopherson 
1999a5acf4f2SSean Christopherson         switch (i) {
2000a5acf4f2SSean Christopherson         case 0x8000001d:
2001a5acf4f2SSean Christopherson             /* Query for all AMD cache information leaves */
2002a5acf4f2SSean Christopherson             for (j = 0; ; j++) {
2003a5acf4f2SSean Christopherson                 c->function = i;
2004a5acf4f2SSean Christopherson                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2005a5acf4f2SSean Christopherson                 c->index = j;
2006a5acf4f2SSean Christopherson                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
2007a5acf4f2SSean Christopherson 
2008a5acf4f2SSean Christopherson                 if (c->eax == 0) {
2009a5acf4f2SSean Christopherson                     break;
2010a5acf4f2SSean Christopherson                 }
2011a5acf4f2SSean Christopherson                 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
2012a5acf4f2SSean Christopherson                     goto full;
2013a5acf4f2SSean Christopherson                 }
2014a5acf4f2SSean Christopherson                 c = &entries[cpuid_i++];
2015a5acf4f2SSean Christopherson             }
2016a5acf4f2SSean Christopherson             break;
2017a5acf4f2SSean Christopherson         default:
2018a5acf4f2SSean Christopherson             c->function = i;
2019a5acf4f2SSean Christopherson             c->flags = 0;
2020a5acf4f2SSean Christopherson             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
2021a5acf4f2SSean Christopherson             if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
2022a5acf4f2SSean Christopherson                 /*
2023a5acf4f2SSean Christopherson                  * KVM already returns all zeroes if a CPUID entry is missing,
2024a5acf4f2SSean Christopherson                  * so we can omit it and avoid hitting KVM's 80-entry limit.
2025a5acf4f2SSean Christopherson                  */
2026a5acf4f2SSean Christopherson                 cpuid_i--;
2027a5acf4f2SSean Christopherson             }
2028a5acf4f2SSean Christopherson             break;
2029a5acf4f2SSean Christopherson         }
2030a5acf4f2SSean Christopherson     }
2031a5acf4f2SSean Christopherson 
2032a5acf4f2SSean Christopherson     /* Call Centaur's CPUID instructions they are supported. */
2033a5acf4f2SSean Christopherson     if (env->cpuid_xlevel2 > 0) {
2034a5acf4f2SSean Christopherson         cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
2035a5acf4f2SSean Christopherson 
2036a5acf4f2SSean Christopherson         for (i = 0xC0000000; i <= limit; i++) {
2037a5acf4f2SSean Christopherson             j = 0;
2038a5acf4f2SSean Christopherson             if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
2039a5acf4f2SSean Christopherson                 goto full;
2040a5acf4f2SSean Christopherson             }
2041a5acf4f2SSean Christopherson             c = &entries[cpuid_i++];
2042a5acf4f2SSean Christopherson 
2043a5acf4f2SSean Christopherson             c->function = i;
2044a5acf4f2SSean Christopherson             c->flags = 0;
2045a5acf4f2SSean Christopherson             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
2046a5acf4f2SSean Christopherson         }
2047a5acf4f2SSean Christopherson     }
2048a5acf4f2SSean Christopherson 
2049a5acf4f2SSean Christopherson     return cpuid_i;
2050a5acf4f2SSean Christopherson 
2051a5acf4f2SSean Christopherson full:
2052a5acf4f2SSean Christopherson     fprintf(stderr, "cpuid_data is full, no space for "
2053a5acf4f2SSean Christopherson             "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
2054a5acf4f2SSean Christopherson     abort();
2055a5acf4f2SSean Christopherson }
2056a5acf4f2SSean Christopherson 
kvm_arch_init_vcpu(CPUState * cs)2057a9dc68d9SClaudio Fontana int kvm_arch_init_vcpu(CPUState *cs)
2058a9dc68d9SClaudio Fontana {
2059a9dc68d9SClaudio Fontana     struct {
2060a9dc68d9SClaudio Fontana         struct kvm_cpuid2 cpuid;
2061a9dc68d9SClaudio Fontana         struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
2062a9dc68d9SClaudio Fontana     } cpuid_data;
2063a9dc68d9SClaudio Fontana     /*
2064a9dc68d9SClaudio Fontana      * The kernel defines these structs with padding fields so there
2065a9dc68d9SClaudio Fontana      * should be no extra padding in our cpuid_data struct.
2066a9dc68d9SClaudio Fontana      */
2067a9dc68d9SClaudio Fontana     QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
2068a9dc68d9SClaudio Fontana                       sizeof(struct kvm_cpuid2) +
2069a9dc68d9SClaudio Fontana                       sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
2070a9dc68d9SClaudio Fontana 
2071a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
2072a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
2073a5acf4f2SSean Christopherson     uint32_t cpuid_i;
2074a9dc68d9SClaudio Fontana     struct kvm_cpuid_entry2 *c;
2075a9dc68d9SClaudio Fontana     uint32_t signature[3];
2076a9dc68d9SClaudio Fontana     int kvm_base = KVM_CPUID_SIGNATURE;
2077a9dc68d9SClaudio Fontana     int max_nested_state_len;
2078a9dc68d9SClaudio Fontana     int r;
2079a9dc68d9SClaudio Fontana     Error *local_err = NULL;
2080a9dc68d9SClaudio Fontana 
2081a9dc68d9SClaudio Fontana     memset(&cpuid_data, 0, sizeof(cpuid_data));
2082a9dc68d9SClaudio Fontana 
2083a9dc68d9SClaudio Fontana     cpuid_i = 0;
2084a9dc68d9SClaudio Fontana 
2085e56dd3c7SJing Liu     has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
2086e56dd3c7SJing Liu 
2087a9dc68d9SClaudio Fontana     r = kvm_arch_set_tsc_khz(cs);
2088a9dc68d9SClaudio Fontana     if (r < 0) {
2089a9dc68d9SClaudio Fontana         return r;
2090a9dc68d9SClaudio Fontana     }
2091a9dc68d9SClaudio Fontana 
2092a9dc68d9SClaudio Fontana     /* vcpu's TSC frequency is either specified by user, or following
2093a9dc68d9SClaudio Fontana      * the value used by KVM if the former is not present. In the
2094a9dc68d9SClaudio Fontana      * latter case, we query it from KVM and record in env->tsc_khz,
2095a9dc68d9SClaudio Fontana      * so that vcpu's TSC frequency can be migrated later via this field.
2096a9dc68d9SClaudio Fontana      */
2097a9dc68d9SClaudio Fontana     if (!env->tsc_khz) {
2098a9dc68d9SClaudio Fontana         r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
2099a9dc68d9SClaudio Fontana             kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
2100a9dc68d9SClaudio Fontana             -ENOTSUP;
2101a9dc68d9SClaudio Fontana         if (r > 0) {
2102a9dc68d9SClaudio Fontana             env->tsc_khz = r;
2103a9dc68d9SClaudio Fontana         }
2104a9dc68d9SClaudio Fontana     }
2105a9dc68d9SClaudio Fontana 
2106a9dc68d9SClaudio Fontana     env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
2107a9dc68d9SClaudio Fontana 
2108071ce4b0SVitaly Kuznetsov     /*
2109071ce4b0SVitaly Kuznetsov      * kvm_hyperv_expand_features() is called here for the second time in case
2110071ce4b0SVitaly Kuznetsov      * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
2111071ce4b0SVitaly Kuznetsov      * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
2112071ce4b0SVitaly Kuznetsov      * check which Hyper-V enlightenments are supported and which are not, we
2113071ce4b0SVitaly Kuznetsov      * can still proceed and check/expand Hyper-V enlightenments here so legacy
2114071ce4b0SVitaly Kuznetsov      * behavior is preserved.
2115071ce4b0SVitaly Kuznetsov      */
2116071ce4b0SVitaly Kuznetsov     if (!kvm_hyperv_expand_features(cpu, &local_err)) {
2117f4a62495SVitaly Kuznetsov         error_report_err(local_err);
2118f4a62495SVitaly Kuznetsov         return -ENOSYS;
2119f6e01ab5SVitaly Kuznetsov     }
2120f6e01ab5SVitaly Kuznetsov 
2121f6e01ab5SVitaly Kuznetsov     if (hyperv_enabled(cpu)) {
2122decb4f20SVitaly Kuznetsov         r = hyperv_init_vcpu(cpu);
2123decb4f20SVitaly Kuznetsov         if (r) {
2124decb4f20SVitaly Kuznetsov             return r;
2125decb4f20SVitaly Kuznetsov         }
2126decb4f20SVitaly Kuznetsov 
2127f6e01ab5SVitaly Kuznetsov         cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
2128a9dc68d9SClaudio Fontana         kvm_base = KVM_CPUID_SIGNATURE_NEXT;
2129a9dc68d9SClaudio Fontana         has_msr_hv_hypercall = true;
2130a9dc68d9SClaudio Fontana     }
2131a9dc68d9SClaudio Fontana 
2132f66b8a83SJoao Martins     if (cs->kvm_state->xen_version) {
2133f66b8a83SJoao Martins #ifdef CONFIG_XEN_EMU
2134f66b8a83SJoao Martins         struct kvm_cpuid_entry2 *xen_max_leaf;
2135f66b8a83SJoao Martins 
2136f66b8a83SJoao Martins         memcpy(signature, "XenVMMXenVMM", 12);
2137f66b8a83SJoao Martins 
2138f66b8a83SJoao Martins         xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
2139f66b8a83SJoao Martins         c->function = kvm_base + XEN_CPUID_SIGNATURE;
2140f66b8a83SJoao Martins         c->eax = kvm_base + XEN_CPUID_TIME;
2141f66b8a83SJoao Martins         c->ebx = signature[0];
2142f66b8a83SJoao Martins         c->ecx = signature[1];
2143f66b8a83SJoao Martins         c->edx = signature[2];
2144f66b8a83SJoao Martins 
2145f66b8a83SJoao Martins         c = &cpuid_data.entries[cpuid_i++];
2146f66b8a83SJoao Martins         c->function = kvm_base + XEN_CPUID_VENDOR;
2147f66b8a83SJoao Martins         c->eax = cs->kvm_state->xen_version;
2148f66b8a83SJoao Martins         c->ebx = 0;
2149f66b8a83SJoao Martins         c->ecx = 0;
2150f66b8a83SJoao Martins         c->edx = 0;
2151f66b8a83SJoao Martins 
2152f66b8a83SJoao Martins         c = &cpuid_data.entries[cpuid_i++];
2153f66b8a83SJoao Martins         c->function = kvm_base + XEN_CPUID_HVM_MSR;
2154f66b8a83SJoao Martins         /* Number of hypercall-transfer pages */
2155f66b8a83SJoao Martins         c->eax = 1;
2156f66b8a83SJoao Martins         /* Hypercall MSR base address */
2157f66b8a83SJoao Martins         if (hyperv_enabled(cpu)) {
2158f66b8a83SJoao Martins             c->ebx = XEN_HYPERCALL_MSR_HYPERV;
2159f66b8a83SJoao Martins             kvm_xen_init(cs->kvm_state, c->ebx);
2160f66b8a83SJoao Martins         } else {
2161f66b8a83SJoao Martins             c->ebx = XEN_HYPERCALL_MSR;
2162f66b8a83SJoao Martins         }
2163f66b8a83SJoao Martins         c->ecx = 0;
2164f66b8a83SJoao Martins         c->edx = 0;
2165f66b8a83SJoao Martins 
2166f66b8a83SJoao Martins         c = &cpuid_data.entries[cpuid_i++];
2167f66b8a83SJoao Martins         c->function = kvm_base + XEN_CPUID_TIME;
2168f66b8a83SJoao Martins         c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
2169f66b8a83SJoao Martins             (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
2170f66b8a83SJoao Martins         /* default=0 (emulate if necessary) */
2171f66b8a83SJoao Martins         c->ebx = 0;
2172f66b8a83SJoao Martins         /* guest tsc frequency */
2173f66b8a83SJoao Martins         c->ecx = env->user_tsc_khz;
2174f66b8a83SJoao Martins         /* guest tsc incarnation (migration count) */
2175f66b8a83SJoao Martins         c->edx = 0;
2176f66b8a83SJoao Martins 
2177f66b8a83SJoao Martins         c = &cpuid_data.entries[cpuid_i++];
2178f66b8a83SJoao Martins         c->function = kvm_base + XEN_CPUID_HVM;
2179f66b8a83SJoao Martins         xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
2180f66b8a83SJoao Martins         if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
2181f66b8a83SJoao Martins             c->function = kvm_base + XEN_CPUID_HVM;
2182f66b8a83SJoao Martins 
2183f66b8a83SJoao Martins             if (cpu->xen_vapic) {
2184f66b8a83SJoao Martins                 c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
2185f66b8a83SJoao Martins                 c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
2186f66b8a83SJoao Martins             }
2187f66b8a83SJoao Martins 
2188f66b8a83SJoao Martins             c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
2189f66b8a83SJoao Martins 
2190f66b8a83SJoao Martins             if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
2191f66b8a83SJoao Martins                 c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
2192f66b8a83SJoao Martins                 c->ebx = cs->cpu_index;
2193f66b8a83SJoao Martins             }
21948473607bSDavid Woodhouse 
21958473607bSDavid Woodhouse             if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
21968473607bSDavid Woodhouse                 c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
21978473607bSDavid Woodhouse             }
2198f66b8a83SJoao Martins         }
2199f66b8a83SJoao Martins 
22005e691a95SDavid Woodhouse         r = kvm_xen_init_vcpu(cs);
22015e691a95SDavid Woodhouse         if (r) {
22025e691a95SDavid Woodhouse             return r;
22035e691a95SDavid Woodhouse         }
22045e691a95SDavid Woodhouse 
2205f66b8a83SJoao Martins         kvm_base += 0x100;
2206f66b8a83SJoao Martins #else /* CONFIG_XEN_EMU */
2207f66b8a83SJoao Martins         /* This should never happen as kvm_arch_init() would have died first. */
2208f66b8a83SJoao Martins         fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
2209f66b8a83SJoao Martins         abort();
2210f66b8a83SJoao Martins #endif
2211f66b8a83SJoao Martins     } else if (cpu->expose_kvm) {
2212a9dc68d9SClaudio Fontana         memcpy(signature, "KVMKVMKVM\0\0\0", 12);
2213a9dc68d9SClaudio Fontana         c = &cpuid_data.entries[cpuid_i++];
2214a9dc68d9SClaudio Fontana         c->function = KVM_CPUID_SIGNATURE | kvm_base;
2215a9dc68d9SClaudio Fontana         c->eax = KVM_CPUID_FEATURES | kvm_base;
2216a9dc68d9SClaudio Fontana         c->ebx = signature[0];
2217a9dc68d9SClaudio Fontana         c->ecx = signature[1];
2218a9dc68d9SClaudio Fontana         c->edx = signature[2];
2219a9dc68d9SClaudio Fontana 
2220a9dc68d9SClaudio Fontana         c = &cpuid_data.entries[cpuid_i++];
2221a9dc68d9SClaudio Fontana         c->function = KVM_CPUID_FEATURES | kvm_base;
2222a9dc68d9SClaudio Fontana         c->eax = env->features[FEAT_KVM];
2223a9dc68d9SClaudio Fontana         c->edx = env->features[FEAT_KVM_HINTS];
2224a9dc68d9SClaudio Fontana     }
2225a9dc68d9SClaudio Fontana 
2226988f7b8bSVitaly Kuznetsov     if (cpu->kvm_pv_enforce_cpuid) {
2227988f7b8bSVitaly Kuznetsov         r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
2228988f7b8bSVitaly Kuznetsov         if (r < 0) {
2229988f7b8bSVitaly Kuznetsov             fprintf(stderr,
2230988f7b8bSVitaly Kuznetsov                     "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
2231988f7b8bSVitaly Kuznetsov                     strerror(-r));
2232988f7b8bSVitaly Kuznetsov             abort();
2233988f7b8bSVitaly Kuznetsov         }
2234988f7b8bSVitaly Kuznetsov     }
2235988f7b8bSVitaly Kuznetsov 
2236a5acf4f2SSean Christopherson     cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i);
2237a9dc68d9SClaudio Fontana     cpuid_data.cpuid.nent = cpuid_i;
2238a9dc68d9SClaudio Fontana 
2239a9dc68d9SClaudio Fontana     if (((env->cpuid_version >> 8)&0xF) >= 6
2240a9dc68d9SClaudio Fontana         && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) ==
224186f2438fSPaolo Bonzini            (CPUID_MCE | CPUID_MCA)) {
2242a9dc68d9SClaudio Fontana         uint64_t mcg_cap, unsupported_caps;
2243a9dc68d9SClaudio Fontana         int banks;
2244a9dc68d9SClaudio Fontana         int ret;
2245a9dc68d9SClaudio Fontana 
2246a9dc68d9SClaudio Fontana         ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks);
2247a9dc68d9SClaudio Fontana         if (ret < 0) {
2248a9dc68d9SClaudio Fontana             fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret));
2249a9dc68d9SClaudio Fontana             return ret;
2250a9dc68d9SClaudio Fontana         }
2251a9dc68d9SClaudio Fontana 
2252a9dc68d9SClaudio Fontana         if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) {
2253a9dc68d9SClaudio Fontana             error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)",
2254a9dc68d9SClaudio Fontana                          (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks);
2255a9dc68d9SClaudio Fontana             return -ENOTSUP;
2256a9dc68d9SClaudio Fontana         }
2257a9dc68d9SClaudio Fontana 
2258a9dc68d9SClaudio Fontana         unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK);
2259a9dc68d9SClaudio Fontana         if (unsupported_caps) {
2260a9dc68d9SClaudio Fontana             if (unsupported_caps & MCG_LMCE_P) {
2261a9dc68d9SClaudio Fontana                 error_report("kvm: LMCE not supported");
2262a9dc68d9SClaudio Fontana                 return -ENOTSUP;
2263a9dc68d9SClaudio Fontana             }
2264a9dc68d9SClaudio Fontana             warn_report("Unsupported MCG_CAP bits: 0x%" PRIx64,
2265a9dc68d9SClaudio Fontana                         unsupported_caps);
2266a9dc68d9SClaudio Fontana         }
2267a9dc68d9SClaudio Fontana 
2268a9dc68d9SClaudio Fontana         env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK;
2269a9dc68d9SClaudio Fontana         ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap);
2270a9dc68d9SClaudio Fontana         if (ret < 0) {
2271a9dc68d9SClaudio Fontana             fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret));
2272a9dc68d9SClaudio Fontana             return ret;
2273a9dc68d9SClaudio Fontana         }
2274a9dc68d9SClaudio Fontana     }
2275a9dc68d9SClaudio Fontana 
2276a9dc68d9SClaudio Fontana     cpu->vmsentry = qemu_add_vm_change_state_handler(cpu_update_state, env);
2277a9dc68d9SClaudio Fontana 
2278a9dc68d9SClaudio Fontana     c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
2279a9dc68d9SClaudio Fontana     if (c) {
2280a9dc68d9SClaudio Fontana         has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
2281a9dc68d9SClaudio Fontana                                   !!(c->ecx & CPUID_EXT_SMX);
2282a9dc68d9SClaudio Fontana     }
2283a9dc68d9SClaudio Fontana 
2284a0483541SSean Christopherson     c = cpuid_find_entry(&cpuid_data.cpuid, 7, 0);
2285a0483541SSean Christopherson     if (c && (c->ebx & CPUID_7_0_EBX_SGX)) {
2286a0483541SSean Christopherson         has_msr_feature_control = true;
2287a0483541SSean Christopherson     }
2288a0483541SSean Christopherson 
2289a9dc68d9SClaudio Fontana     if (env->mcg_cap & MCG_LMCE_P) {
2290a9dc68d9SClaudio Fontana         has_msr_mcg_ext_ctl = has_msr_feature_control = true;
2291a9dc68d9SClaudio Fontana     }
2292a9dc68d9SClaudio Fontana 
2293a9dc68d9SClaudio Fontana     if (!env->user_tsc_khz) {
2294a9dc68d9SClaudio Fontana         if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
2295a9dc68d9SClaudio Fontana             invtsc_mig_blocker == NULL) {
2296a9dc68d9SClaudio Fontana             error_setg(&invtsc_mig_blocker,
2297a9dc68d9SClaudio Fontana                        "State blocked by non-migratable CPU device"
2298a9dc68d9SClaudio Fontana                        " (invtsc flag)");
2299c8a7fc51SSteve Sistare             r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
2300436c831aSMarkus Armbruster             if (r < 0) {
2301a9dc68d9SClaudio Fontana                 error_report_err(local_err);
2302a9dc68d9SClaudio Fontana                 return r;
2303a9dc68d9SClaudio Fontana             }
2304a9dc68d9SClaudio Fontana         }
2305a9dc68d9SClaudio Fontana     }
2306a9dc68d9SClaudio Fontana 
2307a9dc68d9SClaudio Fontana     if (cpu->vmware_cpuid_freq
2308a9dc68d9SClaudio Fontana         /* Guests depend on 0x40000000 to detect this feature, so only expose
2309a9dc68d9SClaudio Fontana          * it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */
2310a9dc68d9SClaudio Fontana         && cpu->expose_kvm
2311a9dc68d9SClaudio Fontana         && kvm_base == KVM_CPUID_SIGNATURE
2312a9dc68d9SClaudio Fontana         /* TSC clock must be stable and known for this feature. */
2313a9dc68d9SClaudio Fontana         && tsc_is_stable_and_known(env)) {
2314a9dc68d9SClaudio Fontana 
2315a9dc68d9SClaudio Fontana         c = &cpuid_data.entries[cpuid_i++];
2316a9dc68d9SClaudio Fontana         c->function = KVM_CPUID_SIGNATURE | 0x10;
2317a9dc68d9SClaudio Fontana         c->eax = env->tsc_khz;
2318a9dc68d9SClaudio Fontana         c->ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
2319a9dc68d9SClaudio Fontana         c->ecx = c->edx = 0;
2320a9dc68d9SClaudio Fontana 
2321a9dc68d9SClaudio Fontana         c = cpuid_find_entry(&cpuid_data.cpuid, kvm_base, 0);
2322a9dc68d9SClaudio Fontana         c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10);
2323a9dc68d9SClaudio Fontana     }
2324a9dc68d9SClaudio Fontana 
2325a9dc68d9SClaudio Fontana     cpuid_data.cpuid.nent = cpuid_i;
2326a9dc68d9SClaudio Fontana 
2327a9dc68d9SClaudio Fontana     cpuid_data.cpuid.padding = 0;
2328a9dc68d9SClaudio Fontana     r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
2329a9dc68d9SClaudio Fontana     if (r) {
2330a9dc68d9SClaudio Fontana         goto fail;
2331a9dc68d9SClaudio Fontana     }
2332e56dd3c7SJing Liu     kvm_init_xsave(env);
2333a9dc68d9SClaudio Fontana 
2334a9dc68d9SClaudio Fontana     max_nested_state_len = kvm_max_nested_state_length();
2335a9dc68d9SClaudio Fontana     if (max_nested_state_len > 0) {
2336a9dc68d9SClaudio Fontana         assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
2337a9dc68d9SClaudio Fontana 
2338a9dc68d9SClaudio Fontana         if (cpu_has_vmx(env) || cpu_has_svm(env)) {
2339a9dc68d9SClaudio Fontana             env->nested_state = g_malloc0(max_nested_state_len);
2340a9dc68d9SClaudio Fontana             env->nested_state->size = max_nested_state_len;
2341a9dc68d9SClaudio Fontana 
23423cafdb67SVitaly Kuznetsov             kvm_init_nested_state(env);
2343a9dc68d9SClaudio Fontana         }
2344a9dc68d9SClaudio Fontana     }
2345a9dc68d9SClaudio Fontana 
2346a9dc68d9SClaudio Fontana     cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
2347a9dc68d9SClaudio Fontana 
2348a9dc68d9SClaudio Fontana     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
2349a9dc68d9SClaudio Fontana         has_msr_tsc_aux = false;
2350a9dc68d9SClaudio Fontana     }
2351a9dc68d9SClaudio Fontana 
2352a9dc68d9SClaudio Fontana     kvm_init_msrs(cpu);
2353a9dc68d9SClaudio Fontana 
2354a9dc68d9SClaudio Fontana     return 0;
2355a9dc68d9SClaudio Fontana 
2356a9dc68d9SClaudio Fontana  fail:
2357c8a7fc51SSteve Sistare     migrate_del_blocker(&invtsc_mig_blocker);
2358a9dc68d9SClaudio Fontana 
2359a9dc68d9SClaudio Fontana     return r;
2360a9dc68d9SClaudio Fontana }
2361a9dc68d9SClaudio Fontana 
kvm_arch_destroy_vcpu(CPUState * cs)2362a9dc68d9SClaudio Fontana int kvm_arch_destroy_vcpu(CPUState *cs)
2363a9dc68d9SClaudio Fontana {
2364a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
2365a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
2366a9dc68d9SClaudio Fontana 
2367dcebbb65SPhilippe Mathieu-Daudé     g_free(env->xsave_buf);
2368dcebbb65SPhilippe Mathieu-Daudé 
2369a9dc68d9SClaudio Fontana     g_free(cpu->kvm_msr_buf);
2370a9dc68d9SClaudio Fontana     cpu->kvm_msr_buf = NULL;
2371a9dc68d9SClaudio Fontana 
2372a9dc68d9SClaudio Fontana     g_free(env->nested_state);
2373a9dc68d9SClaudio Fontana     env->nested_state = NULL;
2374a9dc68d9SClaudio Fontana 
2375a9dc68d9SClaudio Fontana     qemu_del_vm_change_state_handler(cpu->vmsentry);
2376a9dc68d9SClaudio Fontana 
2377a9dc68d9SClaudio Fontana     return 0;
2378a9dc68d9SClaudio Fontana }
2379a9dc68d9SClaudio Fontana 
kvm_arch_reset_vcpu(X86CPU * cpu)2380a9dc68d9SClaudio Fontana void kvm_arch_reset_vcpu(X86CPU *cpu)
2381a9dc68d9SClaudio Fontana {
2382a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
2383a9dc68d9SClaudio Fontana 
2384a9dc68d9SClaudio Fontana     env->xcr0 = 1;
2385a9dc68d9SClaudio Fontana     if (kvm_irqchip_in_kernel()) {
2386a9dc68d9SClaudio Fontana         env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :
2387a9dc68d9SClaudio Fontana                                           KVM_MP_STATE_UNINITIALIZED;
2388a9dc68d9SClaudio Fontana     } else {
2389a9dc68d9SClaudio Fontana         env->mp_state = KVM_MP_STATE_RUNNABLE;
2390a9dc68d9SClaudio Fontana     }
2391a9dc68d9SClaudio Fontana 
2392a9dc68d9SClaudio Fontana     /* enabled by default */
2393a9dc68d9SClaudio Fontana     env->poll_control_msr = 1;
2394b2f73a07SPaolo Bonzini 
23953cafdb67SVitaly Kuznetsov     kvm_init_nested_state(env);
23963cafdb67SVitaly Kuznetsov 
2397b2f73a07SPaolo Bonzini     sev_es_set_reset_vector(CPU(cpu));
2398a9dc68d9SClaudio Fontana }
2399a9dc68d9SClaudio Fontana 
kvm_arch_after_reset_vcpu(X86CPU * cpu)2400ec19444aSMaciej S. Szmigiero void kvm_arch_after_reset_vcpu(X86CPU *cpu)
2401ec19444aSMaciej S. Szmigiero {
2402ec19444aSMaciej S. Szmigiero     CPUX86State *env = &cpu->env;
2403ec19444aSMaciej S. Szmigiero     int i;
2404ec19444aSMaciej S. Szmigiero 
2405ec19444aSMaciej S. Szmigiero     /*
2406ec19444aSMaciej S. Szmigiero      * Reset SynIC after all other devices have been reset to let them remove
2407ec19444aSMaciej S. Szmigiero      * their SINT routes first.
2408ec19444aSMaciej S. Szmigiero      */
2409ec19444aSMaciej S. Szmigiero     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
2410ec19444aSMaciej S. Szmigiero         for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) {
2411ec19444aSMaciej S. Szmigiero             env->msr_hv_synic_sint[i] = HV_SINT_MASKED;
2412ec19444aSMaciej S. Szmigiero         }
2413ec19444aSMaciej S. Szmigiero 
2414ec19444aSMaciej S. Szmigiero         hyperv_x86_synic_reset(cpu);
2415ec19444aSMaciej S. Szmigiero     }
2416ec19444aSMaciej S. Szmigiero }
2417ec19444aSMaciej S. Szmigiero 
kvm_arch_reset_parked_vcpu(unsigned long vcpu_id,int kvm_fd)2418*2dc65296SMaciej S. Szmigiero void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
2419*2dc65296SMaciej S. Szmigiero {
2420*2dc65296SMaciej S. Szmigiero     g_autofree struct kvm_msrs *msrs = NULL;
2421*2dc65296SMaciej S. Szmigiero 
2422*2dc65296SMaciej S. Szmigiero     msrs = g_malloc0(sizeof(*msrs) + sizeof(msrs->entries[0]));
2423*2dc65296SMaciej S. Szmigiero     msrs->entries[0].index = MSR_IA32_TSC;
2424*2dc65296SMaciej S. Szmigiero     msrs->entries[0].data = 1; /* match the value in x86_cpu_reset() */
2425*2dc65296SMaciej S. Szmigiero     msrs->nmsrs++;
2426*2dc65296SMaciej S. Szmigiero 
2427*2dc65296SMaciej S. Szmigiero     if (ioctl(kvm_fd, KVM_SET_MSRS, msrs) != 1) {
2428*2dc65296SMaciej S. Szmigiero         warn_report("parked vCPU %lu TSC reset failed: %d",
2429*2dc65296SMaciej S. Szmigiero                     vcpu_id, errno);
2430*2dc65296SMaciej S. Szmigiero     }
2431*2dc65296SMaciej S. Szmigiero }
2432*2dc65296SMaciej S. Szmigiero 
kvm_arch_do_init_vcpu(X86CPU * cpu)2433a9dc68d9SClaudio Fontana void kvm_arch_do_init_vcpu(X86CPU *cpu)
2434a9dc68d9SClaudio Fontana {
2435a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
2436a9dc68d9SClaudio Fontana 
2437a9dc68d9SClaudio Fontana     /* APs get directly into wait-for-SIPI state.  */
2438a9dc68d9SClaudio Fontana     if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) {
2439a9dc68d9SClaudio Fontana         env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
2440a9dc68d9SClaudio Fontana     }
2441a9dc68d9SClaudio Fontana }
2442a9dc68d9SClaudio Fontana 
kvm_get_supported_feature_msrs(KVMState * s)2443a9dc68d9SClaudio Fontana static int kvm_get_supported_feature_msrs(KVMState *s)
2444a9dc68d9SClaudio Fontana {
2445a9dc68d9SClaudio Fontana     int ret = 0;
2446a9dc68d9SClaudio Fontana 
2447a9dc68d9SClaudio Fontana     if (kvm_feature_msrs != NULL) {
2448a9dc68d9SClaudio Fontana         return 0;
2449a9dc68d9SClaudio Fontana     }
2450a9dc68d9SClaudio Fontana 
2451a9dc68d9SClaudio Fontana     if (!kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES)) {
2452a9dc68d9SClaudio Fontana         return 0;
2453a9dc68d9SClaudio Fontana     }
2454a9dc68d9SClaudio Fontana 
2455a9dc68d9SClaudio Fontana     struct kvm_msr_list msr_list;
2456a9dc68d9SClaudio Fontana 
2457a9dc68d9SClaudio Fontana     msr_list.nmsrs = 0;
2458a9dc68d9SClaudio Fontana     ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
2459a9dc68d9SClaudio Fontana     if (ret < 0 && ret != -E2BIG) {
2460a9dc68d9SClaudio Fontana         error_report("Fetch KVM feature MSR list failed: %s",
2461a9dc68d9SClaudio Fontana             strerror(-ret));
2462a9dc68d9SClaudio Fontana         return ret;
2463a9dc68d9SClaudio Fontana     }
2464a9dc68d9SClaudio Fontana 
2465a9dc68d9SClaudio Fontana     assert(msr_list.nmsrs > 0);
24660a553c12SMarkus Armbruster     kvm_feature_msrs = g_malloc0(sizeof(msr_list) +
2467a9dc68d9SClaudio Fontana                  msr_list.nmsrs * sizeof(msr_list.indices[0]));
2468a9dc68d9SClaudio Fontana 
2469a9dc68d9SClaudio Fontana     kvm_feature_msrs->nmsrs = msr_list.nmsrs;
2470a9dc68d9SClaudio Fontana     ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs);
2471a9dc68d9SClaudio Fontana 
2472a9dc68d9SClaudio Fontana     if (ret < 0) {
2473a9dc68d9SClaudio Fontana         error_report("Fetch KVM feature MSR list failed: %s",
2474a9dc68d9SClaudio Fontana             strerror(-ret));
2475a9dc68d9SClaudio Fontana         g_free(kvm_feature_msrs);
2476a9dc68d9SClaudio Fontana         kvm_feature_msrs = NULL;
2477a9dc68d9SClaudio Fontana         return ret;
2478a9dc68d9SClaudio Fontana     }
2479a9dc68d9SClaudio Fontana 
2480a9dc68d9SClaudio Fontana     return 0;
2481a9dc68d9SClaudio Fontana }
2482a9dc68d9SClaudio Fontana 
kvm_get_supported_msrs(KVMState * s)2483a9dc68d9SClaudio Fontana static int kvm_get_supported_msrs(KVMState *s)
2484a9dc68d9SClaudio Fontana {
2485a9dc68d9SClaudio Fontana     int ret = 0;
2486a9dc68d9SClaudio Fontana     struct kvm_msr_list msr_list, *kvm_msr_list;
2487a9dc68d9SClaudio Fontana 
2488a9dc68d9SClaudio Fontana     /*
2489a9dc68d9SClaudio Fontana      *  Obtain MSR list from KVM.  These are the MSRs that we must
2490a9dc68d9SClaudio Fontana      *  save/restore.
2491a9dc68d9SClaudio Fontana      */
2492a9dc68d9SClaudio Fontana     msr_list.nmsrs = 0;
2493a9dc68d9SClaudio Fontana     ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
2494a9dc68d9SClaudio Fontana     if (ret < 0 && ret != -E2BIG) {
2495a9dc68d9SClaudio Fontana         return ret;
2496a9dc68d9SClaudio Fontana     }
2497a9dc68d9SClaudio Fontana     /*
2498a9dc68d9SClaudio Fontana      * Old kernel modules had a bug and could write beyond the provided
2499a9dc68d9SClaudio Fontana      * memory. Allocate at least a safe amount of 1K.
2500a9dc68d9SClaudio Fontana      */
2501a9dc68d9SClaudio Fontana     kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) +
2502a9dc68d9SClaudio Fontana                                           msr_list.nmsrs *
2503a9dc68d9SClaudio Fontana                                           sizeof(msr_list.indices[0])));
2504a9dc68d9SClaudio Fontana 
2505a9dc68d9SClaudio Fontana     kvm_msr_list->nmsrs = msr_list.nmsrs;
2506a9dc68d9SClaudio Fontana     ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
2507a9dc68d9SClaudio Fontana     if (ret >= 0) {
2508a9dc68d9SClaudio Fontana         int i;
2509a9dc68d9SClaudio Fontana 
2510a9dc68d9SClaudio Fontana         for (i = 0; i < kvm_msr_list->nmsrs; i++) {
2511a9dc68d9SClaudio Fontana             switch (kvm_msr_list->indices[i]) {
2512a9dc68d9SClaudio Fontana             case MSR_STAR:
2513a9dc68d9SClaudio Fontana                 has_msr_star = true;
2514a9dc68d9SClaudio Fontana                 break;
2515a9dc68d9SClaudio Fontana             case MSR_VM_HSAVE_PA:
2516a9dc68d9SClaudio Fontana                 has_msr_hsave_pa = true;
2517a9dc68d9SClaudio Fontana                 break;
2518a9dc68d9SClaudio Fontana             case MSR_TSC_AUX:
2519a9dc68d9SClaudio Fontana                 has_msr_tsc_aux = true;
2520a9dc68d9SClaudio Fontana                 break;
2521a9dc68d9SClaudio Fontana             case MSR_TSC_ADJUST:
2522a9dc68d9SClaudio Fontana                 has_msr_tsc_adjust = true;
2523a9dc68d9SClaudio Fontana                 break;
2524a9dc68d9SClaudio Fontana             case MSR_IA32_TSCDEADLINE:
2525a9dc68d9SClaudio Fontana                 has_msr_tsc_deadline = true;
2526a9dc68d9SClaudio Fontana                 break;
2527a9dc68d9SClaudio Fontana             case MSR_IA32_SMBASE:
2528a9dc68d9SClaudio Fontana                 has_msr_smbase = true;
2529a9dc68d9SClaudio Fontana                 break;
2530a9dc68d9SClaudio Fontana             case MSR_SMI_COUNT:
2531a9dc68d9SClaudio Fontana                 has_msr_smi_count = true;
2532a9dc68d9SClaudio Fontana                 break;
2533a9dc68d9SClaudio Fontana             case MSR_IA32_MISC_ENABLE:
2534a9dc68d9SClaudio Fontana                 has_msr_misc_enable = true;
2535a9dc68d9SClaudio Fontana                 break;
2536a9dc68d9SClaudio Fontana             case MSR_IA32_BNDCFGS:
2537a9dc68d9SClaudio Fontana                 has_msr_bndcfgs = true;
2538a9dc68d9SClaudio Fontana                 break;
2539a9dc68d9SClaudio Fontana             case MSR_IA32_XSS:
2540a9dc68d9SClaudio Fontana                 has_msr_xss = true;
2541a9dc68d9SClaudio Fontana                 break;
2542a9dc68d9SClaudio Fontana             case MSR_IA32_UMWAIT_CONTROL:
2543a9dc68d9SClaudio Fontana                 has_msr_umwait = true;
2544a9dc68d9SClaudio Fontana                 break;
2545a9dc68d9SClaudio Fontana             case HV_X64_MSR_CRASH_CTL:
2546a9dc68d9SClaudio Fontana                 has_msr_hv_crash = true;
2547a9dc68d9SClaudio Fontana                 break;
2548a9dc68d9SClaudio Fontana             case HV_X64_MSR_RESET:
2549a9dc68d9SClaudio Fontana                 has_msr_hv_reset = true;
2550a9dc68d9SClaudio Fontana                 break;
2551a9dc68d9SClaudio Fontana             case HV_X64_MSR_VP_INDEX:
2552a9dc68d9SClaudio Fontana                 has_msr_hv_vpindex = true;
2553a9dc68d9SClaudio Fontana                 break;
2554a9dc68d9SClaudio Fontana             case HV_X64_MSR_VP_RUNTIME:
2555a9dc68d9SClaudio Fontana                 has_msr_hv_runtime = true;
2556a9dc68d9SClaudio Fontana                 break;
2557a9dc68d9SClaudio Fontana             case HV_X64_MSR_SCONTROL:
2558a9dc68d9SClaudio Fontana                 has_msr_hv_synic = true;
2559a9dc68d9SClaudio Fontana                 break;
2560a9dc68d9SClaudio Fontana             case HV_X64_MSR_STIMER0_CONFIG:
2561a9dc68d9SClaudio Fontana                 has_msr_hv_stimer = true;
2562a9dc68d9SClaudio Fontana                 break;
2563a9dc68d9SClaudio Fontana             case HV_X64_MSR_TSC_FREQUENCY:
2564a9dc68d9SClaudio Fontana                 has_msr_hv_frequencies = true;
2565a9dc68d9SClaudio Fontana                 break;
2566a9dc68d9SClaudio Fontana             case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2567a9dc68d9SClaudio Fontana                 has_msr_hv_reenlightenment = true;
2568a9dc68d9SClaudio Fontana                 break;
256973d24074SJon Doron             case HV_X64_MSR_SYNDBG_OPTIONS:
257073d24074SJon Doron                 has_msr_hv_syndbg_options = true;
257173d24074SJon Doron                 break;
2572a9dc68d9SClaudio Fontana             case MSR_IA32_SPEC_CTRL:
2573a9dc68d9SClaudio Fontana                 has_msr_spec_ctrl = true;
2574a9dc68d9SClaudio Fontana                 break;
2575cabf9862SMaxim Levitsky             case MSR_AMD64_TSC_RATIO:
2576cabf9862SMaxim Levitsky                 has_tsc_scale_msr = true;
2577cabf9862SMaxim Levitsky                 break;
2578a9dc68d9SClaudio Fontana             case MSR_IA32_TSX_CTRL:
2579a9dc68d9SClaudio Fontana                 has_msr_tsx_ctrl = true;
2580a9dc68d9SClaudio Fontana                 break;
2581a9dc68d9SClaudio Fontana             case MSR_VIRT_SSBD:
2582a9dc68d9SClaudio Fontana                 has_msr_virt_ssbd = true;
2583a9dc68d9SClaudio Fontana                 break;
2584a9dc68d9SClaudio Fontana             case MSR_IA32_ARCH_CAPABILITIES:
2585a9dc68d9SClaudio Fontana                 has_msr_arch_capabs = true;
2586a9dc68d9SClaudio Fontana                 break;
2587a9dc68d9SClaudio Fontana             case MSR_IA32_CORE_CAPABILITY:
2588a9dc68d9SClaudio Fontana                 has_msr_core_capabs = true;
2589a9dc68d9SClaudio Fontana                 break;
2590a9dc68d9SClaudio Fontana             case MSR_IA32_PERF_CAPABILITIES:
2591a9dc68d9SClaudio Fontana                 has_msr_perf_capabs = true;
2592a9dc68d9SClaudio Fontana                 break;
2593a9dc68d9SClaudio Fontana             case MSR_IA32_VMX_VMFUNC:
2594a9dc68d9SClaudio Fontana                 has_msr_vmx_vmfunc = true;
2595a9dc68d9SClaudio Fontana                 break;
2596a9dc68d9SClaudio Fontana             case MSR_IA32_UCODE_REV:
2597a9dc68d9SClaudio Fontana                 has_msr_ucode_rev = true;
2598a9dc68d9SClaudio Fontana                 break;
2599a9dc68d9SClaudio Fontana             case MSR_IA32_VMX_PROCBASED_CTLS2:
2600a9dc68d9SClaudio Fontana                 has_msr_vmx_procbased_ctls2 = true;
2601a9dc68d9SClaudio Fontana                 break;
26026aa4228bSChenyi Qiang             case MSR_IA32_PKRS:
26036aa4228bSChenyi Qiang                 has_msr_pkrs = true;
26046aa4228bSChenyi Qiang                 break;
2605b5151aceSGao Shiyuan             case MSR_K7_HWCR:
2606b5151aceSGao Shiyuan                 has_msr_hwcr = true;
2607a9dc68d9SClaudio Fontana             }
2608a9dc68d9SClaudio Fontana         }
2609a9dc68d9SClaudio Fontana     }
2610a9dc68d9SClaudio Fontana 
2611a9dc68d9SClaudio Fontana     g_free(kvm_msr_list);
2612a9dc68d9SClaudio Fontana 
2613a9dc68d9SClaudio Fontana     return ret;
2614a9dc68d9SClaudio Fontana }
2615a9dc68d9SClaudio Fontana 
kvm_rdmsr_core_thread_count(X86CPU * cpu,uint32_t msr,uint64_t * val)26160418f908SAnthony Harivel static bool kvm_rdmsr_core_thread_count(X86CPU *cpu,
26170418f908SAnthony Harivel                                         uint32_t msr,
261837656470SAlexander Graf                                         uint64_t *val)
261937656470SAlexander Graf {
262037656470SAlexander Graf     CPUState *cs = CPU(cpu);
262137656470SAlexander Graf 
262237656470SAlexander Graf     *val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */
262337656470SAlexander Graf     *val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */
262437656470SAlexander Graf 
262537656470SAlexander Graf     return true;
262637656470SAlexander Graf }
262737656470SAlexander Graf 
kvm_rdmsr_rapl_power_unit(X86CPU * cpu,uint32_t msr,uint64_t * val)26280418f908SAnthony Harivel static bool kvm_rdmsr_rapl_power_unit(X86CPU *cpu,
26290418f908SAnthony Harivel                                       uint32_t msr,
26300418f908SAnthony Harivel                                       uint64_t *val)
26310418f908SAnthony Harivel {
26320418f908SAnthony Harivel 
26330418f908SAnthony Harivel     CPUState *cs = CPU(cpu);
26340418f908SAnthony Harivel 
26350418f908SAnthony Harivel     *val = cs->kvm_state->msr_energy.msr_unit;
26360418f908SAnthony Harivel 
26370418f908SAnthony Harivel     return true;
26380418f908SAnthony Harivel }
26390418f908SAnthony Harivel 
kvm_rdmsr_pkg_power_limit(X86CPU * cpu,uint32_t msr,uint64_t * val)26400418f908SAnthony Harivel static bool kvm_rdmsr_pkg_power_limit(X86CPU *cpu,
26410418f908SAnthony Harivel                                       uint32_t msr,
26420418f908SAnthony Harivel                                       uint64_t *val)
26430418f908SAnthony Harivel {
26440418f908SAnthony Harivel 
26450418f908SAnthony Harivel     CPUState *cs = CPU(cpu);
26460418f908SAnthony Harivel 
26470418f908SAnthony Harivel     *val = cs->kvm_state->msr_energy.msr_limit;
26480418f908SAnthony Harivel 
26490418f908SAnthony Harivel     return true;
26500418f908SAnthony Harivel }
26510418f908SAnthony Harivel 
kvm_rdmsr_pkg_power_info(X86CPU * cpu,uint32_t msr,uint64_t * val)26520418f908SAnthony Harivel static bool kvm_rdmsr_pkg_power_info(X86CPU *cpu,
26530418f908SAnthony Harivel                                      uint32_t msr,
26540418f908SAnthony Harivel                                      uint64_t *val)
26550418f908SAnthony Harivel {
26560418f908SAnthony Harivel 
26570418f908SAnthony Harivel     CPUState *cs = CPU(cpu);
26580418f908SAnthony Harivel 
26590418f908SAnthony Harivel     *val = cs->kvm_state->msr_energy.msr_info;
26600418f908SAnthony Harivel 
26610418f908SAnthony Harivel     return true;
26620418f908SAnthony Harivel }
26630418f908SAnthony Harivel 
kvm_rdmsr_pkg_energy_status(X86CPU * cpu,uint32_t msr,uint64_t * val)26640418f908SAnthony Harivel static bool kvm_rdmsr_pkg_energy_status(X86CPU *cpu,
26650418f908SAnthony Harivel                                         uint32_t msr,
26660418f908SAnthony Harivel                                         uint64_t *val)
26670418f908SAnthony Harivel {
26680418f908SAnthony Harivel 
26690418f908SAnthony Harivel     CPUState *cs = CPU(cpu);
26700418f908SAnthony Harivel     *val = cs->kvm_state->msr_energy.msr_value[cs->cpu_index];
26710418f908SAnthony Harivel 
26720418f908SAnthony Harivel     return true;
26730418f908SAnthony Harivel }
26740418f908SAnthony Harivel 
2675a9dc68d9SClaudio Fontana static Notifier smram_machine_done;
2676a9dc68d9SClaudio Fontana static KVMMemoryListener smram_listener;
2677a9dc68d9SClaudio Fontana static AddressSpace smram_address_space;
2678a9dc68d9SClaudio Fontana static MemoryRegion smram_as_root;
2679a9dc68d9SClaudio Fontana static MemoryRegion smram_as_mem;
2680a9dc68d9SClaudio Fontana 
register_smram_listener(Notifier * n,void * unused)2681a9dc68d9SClaudio Fontana static void register_smram_listener(Notifier *n, void *unused)
2682a9dc68d9SClaudio Fontana {
2683a9dc68d9SClaudio Fontana     MemoryRegion *smram =
2684a9dc68d9SClaudio Fontana         (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
2685a9dc68d9SClaudio Fontana 
2686a9dc68d9SClaudio Fontana     /* Outer container... */
2687a9dc68d9SClaudio Fontana     memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull);
2688a9dc68d9SClaudio Fontana     memory_region_set_enabled(&smram_as_root, true);
2689a9dc68d9SClaudio Fontana 
2690a9dc68d9SClaudio Fontana     /* ... with two regions inside: normal system memory with low
2691a9dc68d9SClaudio Fontana      * priority, and...
2692a9dc68d9SClaudio Fontana      */
2693a9dc68d9SClaudio Fontana     memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram",
2694a9dc68d9SClaudio Fontana                              get_system_memory(), 0, ~0ull);
2695a9dc68d9SClaudio Fontana     memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0);
2696a9dc68d9SClaudio Fontana     memory_region_set_enabled(&smram_as_mem, true);
2697a9dc68d9SClaudio Fontana 
2698a9dc68d9SClaudio Fontana     if (smram) {
2699a9dc68d9SClaudio Fontana         /* ... SMRAM with higher priority */
2700a9dc68d9SClaudio Fontana         memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10);
2701a9dc68d9SClaudio Fontana         memory_region_set_enabled(smram, true);
2702a9dc68d9SClaudio Fontana     }
2703a9dc68d9SClaudio Fontana 
2704a9dc68d9SClaudio Fontana     address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM");
2705a9dc68d9SClaudio Fontana     kvm_memory_listener_register(kvm_state, &smram_listener,
2706142518bdSPeter Xu                                  &smram_address_space, 1, "kvm-smram");
2707a9dc68d9SClaudio Fontana }
2708a9dc68d9SClaudio Fontana 
kvm_msr_energy_thread(void * data)27090418f908SAnthony Harivel static void *kvm_msr_energy_thread(void *data)
27100418f908SAnthony Harivel {
27110418f908SAnthony Harivel     KVMState *s = data;
27120418f908SAnthony Harivel     struct KVMMsrEnergy *vmsr = &s->msr_energy;
27130418f908SAnthony Harivel 
27140418f908SAnthony Harivel     g_autofree vmsr_package_energy_stat *pkg_stat = NULL;
27150418f908SAnthony Harivel     g_autofree vmsr_thread_stat *thd_stat = NULL;
27160418f908SAnthony Harivel     g_autofree CPUState *cpu = NULL;
27170418f908SAnthony Harivel     g_autofree unsigned int *vpkgs_energy_stat = NULL;
27180418f908SAnthony Harivel     unsigned int num_threads = 0;
27190418f908SAnthony Harivel 
27200418f908SAnthony Harivel     X86CPUTopoIDs topo_ids;
27210418f908SAnthony Harivel 
27220418f908SAnthony Harivel     rcu_register_thread();
27230418f908SAnthony Harivel 
27240418f908SAnthony Harivel     /* Allocate memory for each package energy status */
27250418f908SAnthony Harivel     pkg_stat = g_new0(vmsr_package_energy_stat, vmsr->host_topo.maxpkgs);
27260418f908SAnthony Harivel 
27270418f908SAnthony Harivel     /* Allocate memory for thread stats */
27280418f908SAnthony Harivel     thd_stat = g_new0(vmsr_thread_stat, 1);
27290418f908SAnthony Harivel 
27300418f908SAnthony Harivel     /* Allocate memory for holding virtual package energy counter */
27310418f908SAnthony Harivel     vpkgs_energy_stat = g_new0(unsigned int, vmsr->guest_vsockets);
27320418f908SAnthony Harivel 
27330418f908SAnthony Harivel     /* Populate the max tick of each packages */
27340418f908SAnthony Harivel     for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
27350418f908SAnthony Harivel         /*
27360418f908SAnthony Harivel          * Max numbers of ticks per package
27370418f908SAnthony Harivel          * Time in second * Number of ticks/second * Number of cores/package
27380418f908SAnthony Harivel          * ex: 100 ticks/second/CPU, 12 CPUs per Package gives 1200 ticks max
27390418f908SAnthony Harivel          */
27400418f908SAnthony Harivel         vmsr->host_topo.maxticks[i] = (MSR_ENERGY_THREAD_SLEEP_US / 1000000)
27410418f908SAnthony Harivel                         * sysconf(_SC_CLK_TCK)
27420418f908SAnthony Harivel                         * vmsr->host_topo.pkg_cpu_count[i];
27430418f908SAnthony Harivel     }
27440418f908SAnthony Harivel 
27450418f908SAnthony Harivel     while (true) {
27460418f908SAnthony Harivel         /* Get all qemu threads id */
27475997fbdfSAnthony Harivel         g_autofree pid_t *thread_ids
27485997fbdfSAnthony Harivel             = vmsr_get_thread_ids(vmsr->pid, &num_threads);
27490418f908SAnthony Harivel 
27500418f908SAnthony Harivel         if (thread_ids == NULL) {
27510418f908SAnthony Harivel             goto clean;
27520418f908SAnthony Harivel         }
27530418f908SAnthony Harivel 
27540418f908SAnthony Harivel         thd_stat = g_renew(vmsr_thread_stat, thd_stat, num_threads);
27550418f908SAnthony Harivel         /* Unlike g_new0, g_renew0 function doesn't exist yet... */
27560418f908SAnthony Harivel         memset(thd_stat, 0, num_threads * sizeof(vmsr_thread_stat));
27570418f908SAnthony Harivel 
27580418f908SAnthony Harivel         /* Populate all the thread stats */
27590418f908SAnthony Harivel         for (int i = 0; i < num_threads; i++) {
27600418f908SAnthony Harivel             thd_stat[i].utime = g_new0(unsigned long long, 2);
27610418f908SAnthony Harivel             thd_stat[i].stime = g_new0(unsigned long long, 2);
27620418f908SAnthony Harivel             thd_stat[i].thread_id = thread_ids[i];
27630418f908SAnthony Harivel             vmsr_read_thread_stat(vmsr->pid,
27640418f908SAnthony Harivel                                   thd_stat[i].thread_id,
2765a6e65975SAnthony Harivel                                   &thd_stat[i].utime[0],
2766a6e65975SAnthony Harivel                                   &thd_stat[i].stime[0],
27670418f908SAnthony Harivel                                   &thd_stat[i].cpu_id);
27680418f908SAnthony Harivel             thd_stat[i].pkg_id =
27690418f908SAnthony Harivel                 vmsr_get_physical_package_id(thd_stat[i].cpu_id);
27700418f908SAnthony Harivel         }
27710418f908SAnthony Harivel 
27720418f908SAnthony Harivel         /* Retrieve all packages power plane energy counter */
27730418f908SAnthony Harivel         for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
27740418f908SAnthony Harivel             for (int j = 0; j < num_threads; j++) {
27750418f908SAnthony Harivel                 /*
27760418f908SAnthony Harivel                  * Use the first thread we found that ran on the CPU
27770418f908SAnthony Harivel                  * of the package to read the packages energy counter
27780418f908SAnthony Harivel                  */
27790418f908SAnthony Harivel                 if (thd_stat[j].pkg_id == i) {
27800418f908SAnthony Harivel                     pkg_stat[i].e_start =
27810418f908SAnthony Harivel                     vmsr_read_msr(MSR_PKG_ENERGY_STATUS,
27820418f908SAnthony Harivel                                   thd_stat[j].cpu_id,
27830418f908SAnthony Harivel                                   thd_stat[j].thread_id,
27840418f908SAnthony Harivel                                   s->msr_energy.sioc);
27850418f908SAnthony Harivel                     break;
27860418f908SAnthony Harivel                 }
27870418f908SAnthony Harivel             }
27880418f908SAnthony Harivel         }
27890418f908SAnthony Harivel 
27900418f908SAnthony Harivel         /* Sleep a short period while the other threads are working */
27910418f908SAnthony Harivel         usleep(MSR_ENERGY_THREAD_SLEEP_US);
27920418f908SAnthony Harivel 
27930418f908SAnthony Harivel         /*
27940418f908SAnthony Harivel          * Retrieve all packages power plane energy counter
27950418f908SAnthony Harivel          * Calculate the delta of all packages
27960418f908SAnthony Harivel          */
27970418f908SAnthony Harivel         for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
27980418f908SAnthony Harivel             for (int j = 0; j < num_threads; j++) {
27990418f908SAnthony Harivel                 /*
28000418f908SAnthony Harivel                  * Use the first thread we found that ran on the CPU
28010418f908SAnthony Harivel                  * of the package to read the packages energy counter
28020418f908SAnthony Harivel                  */
28030418f908SAnthony Harivel                 if (thd_stat[j].pkg_id == i) {
28040418f908SAnthony Harivel                     pkg_stat[i].e_end =
28050418f908SAnthony Harivel                     vmsr_read_msr(MSR_PKG_ENERGY_STATUS,
28060418f908SAnthony Harivel                                   thd_stat[j].cpu_id,
28070418f908SAnthony Harivel                                   thd_stat[j].thread_id,
28080418f908SAnthony Harivel                                   s->msr_energy.sioc);
28090418f908SAnthony Harivel                     /*
28100418f908SAnthony Harivel                      * Prevent the case we have migrate the VM
28110418f908SAnthony Harivel                      * during the sleep period or any other cases
28120418f908SAnthony Harivel                      * were energy counter might be lower after
28130418f908SAnthony Harivel                      * the sleep period.
28140418f908SAnthony Harivel                      */
28150418f908SAnthony Harivel                     if (pkg_stat[i].e_end > pkg_stat[i].e_start) {
28160418f908SAnthony Harivel                         pkg_stat[i].e_delta =
28170418f908SAnthony Harivel                             pkg_stat[i].e_end - pkg_stat[i].e_start;
28180418f908SAnthony Harivel                     } else {
28190418f908SAnthony Harivel                         pkg_stat[i].e_delta = 0;
28200418f908SAnthony Harivel                     }
28210418f908SAnthony Harivel                     break;
28220418f908SAnthony Harivel                 }
28230418f908SAnthony Harivel             }
28240418f908SAnthony Harivel         }
28250418f908SAnthony Harivel 
28260418f908SAnthony Harivel         /* Delta of ticks spend by each thread between the sample */
28270418f908SAnthony Harivel         for (int i = 0; i < num_threads; i++) {
28280418f908SAnthony Harivel             vmsr_read_thread_stat(vmsr->pid,
28290418f908SAnthony Harivel                                   thd_stat[i].thread_id,
2830a6e65975SAnthony Harivel                                   &thd_stat[i].utime[1],
2831a6e65975SAnthony Harivel                                   &thd_stat[i].stime[1],
28320418f908SAnthony Harivel                                   &thd_stat[i].cpu_id);
28330418f908SAnthony Harivel 
28340418f908SAnthony Harivel             if (vmsr->pid < 0) {
28350418f908SAnthony Harivel                 /*
28360418f908SAnthony Harivel                  * We don't count the dead thread
28370418f908SAnthony Harivel                  * i.e threads that existed before the sleep
28380418f908SAnthony Harivel                  * and not anymore
28390418f908SAnthony Harivel                  */
28400418f908SAnthony Harivel                 thd_stat[i].delta_ticks = 0;
28410418f908SAnthony Harivel             } else {
28420418f908SAnthony Harivel                 vmsr_delta_ticks(thd_stat, i);
28430418f908SAnthony Harivel             }
28440418f908SAnthony Harivel         }
28450418f908SAnthony Harivel 
28460418f908SAnthony Harivel         /*
28470418f908SAnthony Harivel          * Identify the vcpu threads
28480418f908SAnthony Harivel          * Calculate the number of vcpu per package
28490418f908SAnthony Harivel          */
28500418f908SAnthony Harivel         CPU_FOREACH(cpu) {
28510418f908SAnthony Harivel             for (int i = 0; i < num_threads; i++) {
28520418f908SAnthony Harivel                 if (cpu->thread_id == thd_stat[i].thread_id) {
28530418f908SAnthony Harivel                     thd_stat[i].is_vcpu = true;
28540418f908SAnthony Harivel                     thd_stat[i].vcpu_id = cpu->cpu_index;
28550418f908SAnthony Harivel                     pkg_stat[thd_stat[i].pkg_id].nb_vcpu++;
28560418f908SAnthony Harivel                     thd_stat[i].acpi_id = kvm_arch_vcpu_id(cpu);
28570418f908SAnthony Harivel                     break;
28580418f908SAnthony Harivel                 }
28590418f908SAnthony Harivel             }
28600418f908SAnthony Harivel         }
28610418f908SAnthony Harivel 
28620418f908SAnthony Harivel         /* Retrieve the virtual package number of each vCPU */
28630418f908SAnthony Harivel         for (int i = 0; i < vmsr->guest_cpu_list->len; i++) {
28640418f908SAnthony Harivel             for (int j = 0; j < num_threads; j++) {
28650418f908SAnthony Harivel                 if ((thd_stat[j].acpi_id ==
28660418f908SAnthony Harivel                         vmsr->guest_cpu_list->cpus[i].arch_id)
28670418f908SAnthony Harivel                     && (thd_stat[j].is_vcpu == true)) {
28680418f908SAnthony Harivel                     x86_topo_ids_from_apicid(thd_stat[j].acpi_id,
28690418f908SAnthony Harivel                         &vmsr->guest_topo_info, &topo_ids);
28700418f908SAnthony Harivel                     thd_stat[j].vpkg_id = topo_ids.pkg_id;
28710418f908SAnthony Harivel                 }
28720418f908SAnthony Harivel             }
28730418f908SAnthony Harivel         }
28740418f908SAnthony Harivel 
28750418f908SAnthony Harivel         /* Calculate the total energy of all non-vCPU thread */
28760418f908SAnthony Harivel         for (int i = 0; i < num_threads; i++) {
28770418f908SAnthony Harivel             if ((thd_stat[i].is_vcpu != true) &&
28780418f908SAnthony Harivel                 (thd_stat[i].delta_ticks > 0)) {
28790418f908SAnthony Harivel                 double temp;
28800418f908SAnthony Harivel                 temp = vmsr_get_ratio(pkg_stat[thd_stat[i].pkg_id].e_delta,
28810418f908SAnthony Harivel                     thd_stat[i].delta_ticks,
28820418f908SAnthony Harivel                     vmsr->host_topo.maxticks[thd_stat[i].pkg_id]);
28830418f908SAnthony Harivel                 pkg_stat[thd_stat[i].pkg_id].e_ratio
28840418f908SAnthony Harivel                     += (uint64_t)lround(temp);
28850418f908SAnthony Harivel             }
28860418f908SAnthony Harivel         }
28870418f908SAnthony Harivel 
28880418f908SAnthony Harivel         /* Calculate the ratio per non-vCPU thread of each package */
28890418f908SAnthony Harivel         for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
28900418f908SAnthony Harivel             if (pkg_stat[i].nb_vcpu > 0) {
28910418f908SAnthony Harivel                 pkg_stat[i].e_ratio = pkg_stat[i].e_ratio / pkg_stat[i].nb_vcpu;
28920418f908SAnthony Harivel             }
28930418f908SAnthony Harivel         }
28940418f908SAnthony Harivel 
28950418f908SAnthony Harivel         /*
28960418f908SAnthony Harivel          * Calculate the energy for each Package:
28970418f908SAnthony Harivel          * Energy Package = sum of each vCPU energy that belongs to the package
28980418f908SAnthony Harivel          */
28990418f908SAnthony Harivel         for (int i = 0; i < num_threads; i++) {
29000418f908SAnthony Harivel             if ((thd_stat[i].is_vcpu == true) && \
29010418f908SAnthony Harivel                     (thd_stat[i].delta_ticks > 0)) {
29020418f908SAnthony Harivel                 double temp;
29030418f908SAnthony Harivel                 temp = vmsr_get_ratio(pkg_stat[thd_stat[i].pkg_id].e_delta,
29040418f908SAnthony Harivel                     thd_stat[i].delta_ticks,
29050418f908SAnthony Harivel                     vmsr->host_topo.maxticks[thd_stat[i].pkg_id]);
29060418f908SAnthony Harivel                 vpkgs_energy_stat[thd_stat[i].vpkg_id] +=
29070418f908SAnthony Harivel                     (uint64_t)lround(temp);
29080418f908SAnthony Harivel                 vpkgs_energy_stat[thd_stat[i].vpkg_id] +=
29090418f908SAnthony Harivel                     pkg_stat[thd_stat[i].pkg_id].e_ratio;
29100418f908SAnthony Harivel             }
29110418f908SAnthony Harivel         }
29120418f908SAnthony Harivel 
29130418f908SAnthony Harivel         /*
29140418f908SAnthony Harivel          * Finally populate the vmsr register of each vCPU with the total
29150418f908SAnthony Harivel          * package value to emulate the real hardware where each CPU return the
29160418f908SAnthony Harivel          * value of the package it belongs.
29170418f908SAnthony Harivel          */
29180418f908SAnthony Harivel         for (int i = 0; i < num_threads; i++) {
29190418f908SAnthony Harivel             if ((thd_stat[i].is_vcpu == true) && \
29200418f908SAnthony Harivel                     (thd_stat[i].delta_ticks > 0)) {
29210418f908SAnthony Harivel                 vmsr->msr_value[thd_stat[i].vcpu_id] = \
29220418f908SAnthony Harivel                                         vpkgs_energy_stat[thd_stat[i].vpkg_id];
29230418f908SAnthony Harivel           }
29240418f908SAnthony Harivel         }
29250418f908SAnthony Harivel 
29260418f908SAnthony Harivel         /* Freeing memory before zeroing the pointer */
29270418f908SAnthony Harivel         for (int i = 0; i < num_threads; i++) {
29280418f908SAnthony Harivel             g_free(thd_stat[i].utime);
29290418f908SAnthony Harivel             g_free(thd_stat[i].stime);
29300418f908SAnthony Harivel         }
29310418f908SAnthony Harivel    }
29320418f908SAnthony Harivel 
29330418f908SAnthony Harivel clean:
29340418f908SAnthony Harivel     rcu_unregister_thread();
29350418f908SAnthony Harivel     return NULL;
29360418f908SAnthony Harivel }
29370418f908SAnthony Harivel 
kvm_msr_energy_thread_init(KVMState * s,MachineState * ms)29380418f908SAnthony Harivel static int kvm_msr_energy_thread_init(KVMState *s, MachineState *ms)
29390418f908SAnthony Harivel {
29400418f908SAnthony Harivel     MachineClass *mc = MACHINE_GET_CLASS(ms);
29410418f908SAnthony Harivel     struct KVMMsrEnergy *r = &s->msr_energy;
29420418f908SAnthony Harivel     int ret = 0;
29430418f908SAnthony Harivel 
29440418f908SAnthony Harivel     /*
29450418f908SAnthony Harivel      * Sanity check
29460418f908SAnthony Harivel      * 1. Host cpu must be Intel cpu
29470418f908SAnthony Harivel      * 2. RAPL must be enabled on the Host
29480418f908SAnthony Harivel      */
294987e82951SAni Sinha     if (!is_host_cpu_intel()) {
295087e82951SAni Sinha         error_report("The RAPL feature can only be enabled on hosts "
295187e82951SAni Sinha                      "with Intel CPU models");
29520418f908SAnthony Harivel         ret = 1;
29530418f908SAnthony Harivel         goto out;
29540418f908SAnthony Harivel     }
29550418f908SAnthony Harivel 
29560418f908SAnthony Harivel     if (!is_rapl_enabled()) {
29570418f908SAnthony Harivel         ret = 1;
29580418f908SAnthony Harivel         goto out;
29590418f908SAnthony Harivel     }
29600418f908SAnthony Harivel 
29610418f908SAnthony Harivel     /* Retrieve the virtual topology */
29620418f908SAnthony Harivel     vmsr_init_topo_info(&r->guest_topo_info, ms);
29630418f908SAnthony Harivel 
29640418f908SAnthony Harivel     /* Retrieve the number of vcpu */
29650418f908SAnthony Harivel     r->guest_vcpus = ms->smp.cpus;
29660418f908SAnthony Harivel 
29670418f908SAnthony Harivel     /* Retrieve the number of virtual sockets */
29680418f908SAnthony Harivel     r->guest_vsockets = ms->smp.sockets;
29690418f908SAnthony Harivel 
29700418f908SAnthony Harivel     /* Allocate register memory (MSR_PKG_STATUS) for each vcpu */
29710418f908SAnthony Harivel     r->msr_value = g_new0(uint64_t, r->guest_vcpus);
29720418f908SAnthony Harivel 
29730418f908SAnthony Harivel     /* Retrieve the CPUArchIDlist */
29740418f908SAnthony Harivel     r->guest_cpu_list = mc->possible_cpu_arch_ids(ms);
29750418f908SAnthony Harivel 
29760418f908SAnthony Harivel     /* Max number of cpus on the Host */
29770418f908SAnthony Harivel     r->host_topo.maxcpus = vmsr_get_maxcpus();
29780418f908SAnthony Harivel     if (r->host_topo.maxcpus == 0) {
29790418f908SAnthony Harivel         error_report("host max cpus = 0");
29800418f908SAnthony Harivel         ret = 1;
29810418f908SAnthony Harivel         goto out;
29820418f908SAnthony Harivel     }
29830418f908SAnthony Harivel 
29840418f908SAnthony Harivel     /* Max number of packages on the host */
29850418f908SAnthony Harivel     r->host_topo.maxpkgs = vmsr_get_max_physical_package(r->host_topo.maxcpus);
29860418f908SAnthony Harivel     if (r->host_topo.maxpkgs == 0) {
29870418f908SAnthony Harivel         error_report("host max pkgs = 0");
29880418f908SAnthony Harivel         ret = 1;
29890418f908SAnthony Harivel         goto out;
29900418f908SAnthony Harivel     }
29910418f908SAnthony Harivel 
29920418f908SAnthony Harivel     /* Allocate memory for each package on the host */
29930418f908SAnthony Harivel     r->host_topo.pkg_cpu_count = g_new0(unsigned int, r->host_topo.maxpkgs);
29940418f908SAnthony Harivel     r->host_topo.maxticks = g_new0(unsigned int, r->host_topo.maxpkgs);
29950418f908SAnthony Harivel 
29960418f908SAnthony Harivel     vmsr_count_cpus_per_package(r->host_topo.pkg_cpu_count,
29970418f908SAnthony Harivel                                 r->host_topo.maxpkgs);
29980418f908SAnthony Harivel     for (int i = 0; i < r->host_topo.maxpkgs; i++) {
29990418f908SAnthony Harivel         if (r->host_topo.pkg_cpu_count[i] == 0) {
30000418f908SAnthony Harivel             error_report("cpu per packages = 0 on package_%d", i);
30010418f908SAnthony Harivel             ret = 1;
30020418f908SAnthony Harivel             goto out;
30030418f908SAnthony Harivel         }
30040418f908SAnthony Harivel     }
30050418f908SAnthony Harivel 
30060418f908SAnthony Harivel     /* Get QEMU PID*/
30070418f908SAnthony Harivel     r->pid = getpid();
30080418f908SAnthony Harivel 
30090418f908SAnthony Harivel     /* Compute the socket path if necessary */
30100418f908SAnthony Harivel     if (s->msr_energy.socket_path == NULL) {
30110418f908SAnthony Harivel         s->msr_energy.socket_path = vmsr_compute_default_paths();
30120418f908SAnthony Harivel     }
30130418f908SAnthony Harivel 
30140418f908SAnthony Harivel     /* Open socket with vmsr helper */
30150418f908SAnthony Harivel     s->msr_energy.sioc = vmsr_open_socket(s->msr_energy.socket_path);
30160418f908SAnthony Harivel 
30170418f908SAnthony Harivel     if (s->msr_energy.sioc == NULL) {
30180418f908SAnthony Harivel         error_report("vmsr socket opening failed");
30190418f908SAnthony Harivel         ret = 1;
30200418f908SAnthony Harivel         goto out;
30210418f908SAnthony Harivel     }
30220418f908SAnthony Harivel 
30230418f908SAnthony Harivel     /* Those MSR values should not change */
30240418f908SAnthony Harivel     r->msr_unit  = vmsr_read_msr(MSR_RAPL_POWER_UNIT, 0, r->pid,
30250418f908SAnthony Harivel                                     s->msr_energy.sioc);
30260418f908SAnthony Harivel     r->msr_limit = vmsr_read_msr(MSR_PKG_POWER_LIMIT, 0, r->pid,
30270418f908SAnthony Harivel                                     s->msr_energy.sioc);
30280418f908SAnthony Harivel     r->msr_info  = vmsr_read_msr(MSR_PKG_POWER_INFO, 0, r->pid,
30290418f908SAnthony Harivel                                     s->msr_energy.sioc);
30300418f908SAnthony Harivel     if (r->msr_unit == 0 || r->msr_limit == 0 || r->msr_info == 0) {
30310418f908SAnthony Harivel         error_report("can't read any virtual msr");
30320418f908SAnthony Harivel         ret = 1;
30330418f908SAnthony Harivel         goto out;
30340418f908SAnthony Harivel     }
30350418f908SAnthony Harivel 
30360418f908SAnthony Harivel     qemu_thread_create(&r->msr_thr, "kvm-msr",
30370418f908SAnthony Harivel                        kvm_msr_energy_thread,
30380418f908SAnthony Harivel                        s, QEMU_THREAD_JOINABLE);
30390418f908SAnthony Harivel out:
30400418f908SAnthony Harivel     return ret;
30410418f908SAnthony Harivel }
30420418f908SAnthony Harivel 
kvm_arch_get_default_type(MachineState * ms)30435e0d6590SAkihiko Odaki int kvm_arch_get_default_type(MachineState *ms)
30445e0d6590SAkihiko Odaki {
30455e0d6590SAkihiko Odaki     return 0;
30465e0d6590SAkihiko Odaki }
30475e0d6590SAkihiko Odaki 
kvm_vm_enable_exception_payload(KVMState * s)30480cc42e63SAni Sinha static int kvm_vm_enable_exception_payload(KVMState *s)
30490cc42e63SAni Sinha {
30500cc42e63SAni Sinha     int ret = 0;
30510cc42e63SAni Sinha     has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD);
30520cc42e63SAni Sinha     if (has_exception_payload) {
30530cc42e63SAni Sinha         ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true);
30540cc42e63SAni Sinha         if (ret < 0) {
30550cc42e63SAni Sinha             error_report("kvm: Failed to enable exception payload cap: %s",
30560cc42e63SAni Sinha                          strerror(-ret));
30570cc42e63SAni Sinha         }
30580cc42e63SAni Sinha     }
30590cc42e63SAni Sinha 
30600cc42e63SAni Sinha     return ret;
30610cc42e63SAni Sinha }
30620cc42e63SAni Sinha 
kvm_vm_enable_triple_fault_event(KVMState * s)30630cc42e63SAni Sinha static int kvm_vm_enable_triple_fault_event(KVMState *s)
30640cc42e63SAni Sinha {
30650cc42e63SAni Sinha     int ret = 0;
30660cc42e63SAni Sinha     has_triple_fault_event = \
30670cc42e63SAni Sinha         kvm_check_extension(s,
30680cc42e63SAni Sinha                             KVM_CAP_X86_TRIPLE_FAULT_EVENT);
30690cc42e63SAni Sinha     if (has_triple_fault_event) {
30700cc42e63SAni Sinha         ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true);
30710cc42e63SAni Sinha         if (ret < 0) {
30720cc42e63SAni Sinha             error_report("kvm: Failed to enable triple fault event cap: %s",
30730cc42e63SAni Sinha                          strerror(-ret));
30740cc42e63SAni Sinha         }
30750cc42e63SAni Sinha     }
30760cc42e63SAni Sinha     return ret;
30770cc42e63SAni Sinha }
30780cc42e63SAni Sinha 
kvm_vm_set_identity_map_addr(KVMState * s,uint64_t identity_base)3079dc448549SPaolo Bonzini static int kvm_vm_set_identity_map_addr(KVMState *s, uint64_t identity_base)
30800cc42e63SAni Sinha {
3081dc448549SPaolo Bonzini     return kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base);
30820cc42e63SAni Sinha }
30830cc42e63SAni Sinha 
kvm_vm_set_nr_mmu_pages(KVMState * s)30840cc42e63SAni Sinha static int kvm_vm_set_nr_mmu_pages(KVMState *s)
30850cc42e63SAni Sinha {
30860cc42e63SAni Sinha     uint64_t shadow_mem;
30870cc42e63SAni Sinha     int ret = 0;
30880cc42e63SAni Sinha     shadow_mem = object_property_get_int(OBJECT(s),
30890cc42e63SAni Sinha                                          "kvm-shadow-mem",
30900cc42e63SAni Sinha                                          &error_abort);
30910cc42e63SAni Sinha     if (shadow_mem != -1) {
30920cc42e63SAni Sinha         shadow_mem /= 4096;
30930cc42e63SAni Sinha         ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem);
30940cc42e63SAni Sinha     }
30950cc42e63SAni Sinha     return ret;
30960cc42e63SAni Sinha }
30970cc42e63SAni Sinha 
kvm_vm_set_tss_addr(KVMState * s,uint64_t tss_base)3098dc448549SPaolo Bonzini static int kvm_vm_set_tss_addr(KVMState *s, uint64_t tss_base)
30990cc42e63SAni Sinha {
3100dc448549SPaolo Bonzini     return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, tss_base);
31010cc42e63SAni Sinha }
31020cc42e63SAni Sinha 
kvm_vm_enable_disable_exits(KVMState * s)31030cc42e63SAni Sinha static int kvm_vm_enable_disable_exits(KVMState *s)
31040cc42e63SAni Sinha {
31050cc42e63SAni Sinha     int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS);
31060cc42e63SAni Sinha /* Work around for kernel header with a typo. TODO: fix header and drop. */
31070cc42e63SAni Sinha #if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT)
31080cc42e63SAni Sinha #define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL
31090cc42e63SAni Sinha #endif
31100cc42e63SAni Sinha     if (disable_exits) {
31110cc42e63SAni Sinha         disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
31120cc42e63SAni Sinha                           KVM_X86_DISABLE_EXITS_HLT |
31130cc42e63SAni Sinha                           KVM_X86_DISABLE_EXITS_PAUSE |
31140cc42e63SAni Sinha                           KVM_X86_DISABLE_EXITS_CSTATE);
31150cc42e63SAni Sinha     }
31160cc42e63SAni Sinha 
31170cc42e63SAni Sinha     return kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0,
31180cc42e63SAni Sinha                              disable_exits);
31190cc42e63SAni Sinha }
31200cc42e63SAni Sinha 
kvm_vm_enable_bus_lock_exit(KVMState * s)31210cc42e63SAni Sinha static int kvm_vm_enable_bus_lock_exit(KVMState *s)
31220cc42e63SAni Sinha {
31230cc42e63SAni Sinha     int ret = 0;
31240cc42e63SAni Sinha     ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
31250cc42e63SAni Sinha     if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
31260cc42e63SAni Sinha         error_report("kvm: bus lock detection unsupported");
31270cc42e63SAni Sinha         return -ENOTSUP;
31280cc42e63SAni Sinha     }
31290cc42e63SAni Sinha     ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
31300cc42e63SAni Sinha                             KVM_BUS_LOCK_DETECTION_EXIT);
31310cc42e63SAni Sinha     if (ret < 0) {
31320cc42e63SAni Sinha         error_report("kvm: Failed to enable bus lock detection cap: %s",
31330cc42e63SAni Sinha                      strerror(-ret));
31340cc42e63SAni Sinha     }
31350cc42e63SAni Sinha 
31360cc42e63SAni Sinha     return ret;
31370cc42e63SAni Sinha }
31380cc42e63SAni Sinha 
kvm_vm_enable_notify_vmexit(KVMState * s)31390cc42e63SAni Sinha static int kvm_vm_enable_notify_vmexit(KVMState *s)
31400cc42e63SAni Sinha {
31410cc42e63SAni Sinha     int ret = 0;
31420cc42e63SAni Sinha     if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE) {
31430cc42e63SAni Sinha         uint64_t notify_window_flags =
31440cc42e63SAni Sinha             ((uint64_t)s->notify_window << 32) |
31450cc42e63SAni Sinha             KVM_X86_NOTIFY_VMEXIT_ENABLED |
31460cc42e63SAni Sinha             KVM_X86_NOTIFY_VMEXIT_USER;
31470cc42e63SAni Sinha         ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0,
31480cc42e63SAni Sinha                                 notify_window_flags);
31490cc42e63SAni Sinha         if (ret < 0) {
31500cc42e63SAni Sinha             error_report("kvm: Failed to enable notify vmexit cap: %s",
31510cc42e63SAni Sinha                          strerror(-ret));
31520cc42e63SAni Sinha         }
31530cc42e63SAni Sinha     }
31540cc42e63SAni Sinha     return ret;
31550cc42e63SAni Sinha }
31560cc42e63SAni Sinha 
kvm_vm_enable_userspace_msr(KVMState * s)31570cc42e63SAni Sinha static int kvm_vm_enable_userspace_msr(KVMState *s)
31580cc42e63SAni Sinha {
31590cc42e63SAni Sinha     int ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0,
31600cc42e63SAni Sinha                                 KVM_MSR_EXIT_REASON_FILTER);
31610cc42e63SAni Sinha     if (ret < 0) {
31620cc42e63SAni Sinha         error_report("Could not enable user space MSRs: %s",
31630cc42e63SAni Sinha                      strerror(-ret));
31640cc42e63SAni Sinha         exit(1);
31650cc42e63SAni Sinha     }
31660cc42e63SAni Sinha 
31670cc42e63SAni Sinha     if (!kvm_filter_msr(s, MSR_CORE_THREAD_COUNT,
31680cc42e63SAni Sinha                         kvm_rdmsr_core_thread_count, NULL)) {
31690cc42e63SAni Sinha         error_report("Could not install MSR_CORE_THREAD_COUNT handler!");
31700cc42e63SAni Sinha         exit(1);
31710cc42e63SAni Sinha     }
31720cc42e63SAni Sinha 
31730cc42e63SAni Sinha     return 0;
31740cc42e63SAni Sinha }
31750cc42e63SAni Sinha 
kvm_vm_enable_energy_msrs(KVMState * s)31760cc42e63SAni Sinha static void kvm_vm_enable_energy_msrs(KVMState *s)
31770cc42e63SAni Sinha {
31780cc42e63SAni Sinha     bool r;
31790cc42e63SAni Sinha     if (s->msr_energy.enable == true) {
31800cc42e63SAni Sinha         r = kvm_filter_msr(s, MSR_RAPL_POWER_UNIT,
31810cc42e63SAni Sinha                            kvm_rdmsr_rapl_power_unit, NULL);
31820cc42e63SAni Sinha         if (!r) {
31830cc42e63SAni Sinha             error_report("Could not install MSR_RAPL_POWER_UNIT \
31840cc42e63SAni Sinha                                 handler");
31850cc42e63SAni Sinha             exit(1);
31860cc42e63SAni Sinha         }
31870cc42e63SAni Sinha 
31880cc42e63SAni Sinha         r = kvm_filter_msr(s, MSR_PKG_POWER_LIMIT,
31890cc42e63SAni Sinha                            kvm_rdmsr_pkg_power_limit, NULL);
31900cc42e63SAni Sinha         if (!r) {
31910cc42e63SAni Sinha             error_report("Could not install MSR_PKG_POWER_LIMIT \
31920cc42e63SAni Sinha                                 handler");
31930cc42e63SAni Sinha             exit(1);
31940cc42e63SAni Sinha         }
31950cc42e63SAni Sinha 
31960cc42e63SAni Sinha         r = kvm_filter_msr(s, MSR_PKG_POWER_INFO,
31970cc42e63SAni Sinha                            kvm_rdmsr_pkg_power_info, NULL);
31980cc42e63SAni Sinha         if (!r) {
31990cc42e63SAni Sinha             error_report("Could not install MSR_PKG_POWER_INFO \
32000cc42e63SAni Sinha                                 handler");
32010cc42e63SAni Sinha             exit(1);
32020cc42e63SAni Sinha         }
32030cc42e63SAni Sinha         r = kvm_filter_msr(s, MSR_PKG_ENERGY_STATUS,
32040cc42e63SAni Sinha                            kvm_rdmsr_pkg_energy_status, NULL);
32050cc42e63SAni Sinha         if (!r) {
32060cc42e63SAni Sinha             error_report("Could not install MSR_PKG_ENERGY_STATUS \
32070cc42e63SAni Sinha                                 handler");
32080cc42e63SAni Sinha             exit(1);
32090cc42e63SAni Sinha         }
32100cc42e63SAni Sinha     }
32110cc42e63SAni Sinha     return;
32120cc42e63SAni Sinha }
32130cc42e63SAni Sinha 
kvm_arch_init(MachineState * ms,KVMState * s)3214a9dc68d9SClaudio Fontana int kvm_arch_init(MachineState *ms, KVMState *s)
3215a9dc68d9SClaudio Fontana {
3216a9dc68d9SClaudio Fontana     int ret;
3217a9dc68d9SClaudio Fontana     struct utsname utsname;
3218ec78e2cdSDavid Gibson     Error *local_err = NULL;
3219ec78e2cdSDavid Gibson 
3220ec78e2cdSDavid Gibson     /*
3221ec78e2cdSDavid Gibson      * Initialize SEV context, if required
3222ec78e2cdSDavid Gibson      *
3223ec78e2cdSDavid Gibson      * If no memory encryption is requested (ms->cgs == NULL) this is
3224ec78e2cdSDavid Gibson      * a no-op.
3225ec78e2cdSDavid Gibson      *
3226ec78e2cdSDavid Gibson      * It's also a no-op if a non-SEV confidential guest support
3227ec78e2cdSDavid Gibson      * mechanism is selected.  SEV is the only mechanism available to
3228ec78e2cdSDavid Gibson      * select on x86 at present, so this doesn't arise, but if new
3229ec78e2cdSDavid Gibson      * mechanisms are supported in future (e.g. TDX), they'll need
3230ec78e2cdSDavid Gibson      * their own initialization either here or elsewhere.
3231ec78e2cdSDavid Gibson      */
3232637c95b3SXiaoyao Li     if (ms->cgs) {
3233637c95b3SXiaoyao Li         ret = confidential_guest_kvm_init(ms->cgs, &local_err);
3234ec78e2cdSDavid Gibson         if (ret < 0) {
3235ec78e2cdSDavid Gibson             error_report_err(local_err);
3236ec78e2cdSDavid Gibson             return ret;
3237ec78e2cdSDavid Gibson         }
3238637c95b3SXiaoyao Li     }
3239a9dc68d9SClaudio Fontana 
3240a9dc68d9SClaudio Fontana     has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
32418f515d38SMaxim Levitsky     has_sregs2 = kvm_check_extension(s, KVM_CAP_SREGS2) > 0;
3242a9dc68d9SClaudio Fontana 
3243a9dc68d9SClaudio Fontana     hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX);
3244a9dc68d9SClaudio Fontana 
32450cc42e63SAni Sinha     ret = kvm_vm_enable_exception_payload(s);
3246a9dc68d9SClaudio Fontana     if (ret < 0) {
3247a9dc68d9SClaudio Fontana         return ret;
3248a9dc68d9SClaudio Fontana     }
3249a9dc68d9SClaudio Fontana 
32500cc42e63SAni Sinha     ret = kvm_vm_enable_triple_fault_event(s);
325112f89a39SChenyi Qiang     if (ret < 0) {
325212f89a39SChenyi Qiang         return ret;
325312f89a39SChenyi Qiang     }
325412f89a39SChenyi Qiang 
325561491cf4SDavid Woodhouse     if (s->xen_version) {
325661491cf4SDavid Woodhouse #ifdef CONFIG_XEN_EMU
325761491cf4SDavid Woodhouse         if (!object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)) {
325861491cf4SDavid Woodhouse             error_report("kvm: Xen support only available in PC machine");
325961491cf4SDavid Woodhouse             return -ENOTSUP;
326061491cf4SDavid Woodhouse         }
3261f66b8a83SJoao Martins         /* hyperv_enabled() doesn't work yet. */
3262f66b8a83SJoao Martins         uint32_t msr = XEN_HYPERCALL_MSR;
3263f66b8a83SJoao Martins         ret = kvm_xen_init(s, msr);
326461491cf4SDavid Woodhouse         if (ret < 0) {
326561491cf4SDavid Woodhouse             return ret;
326661491cf4SDavid Woodhouse         }
326761491cf4SDavid Woodhouse #else
326861491cf4SDavid Woodhouse         error_report("kvm: Xen support not enabled in qemu");
326961491cf4SDavid Woodhouse         return -ENOTSUP;
327061491cf4SDavid Woodhouse #endif
327161491cf4SDavid Woodhouse     }
327261491cf4SDavid Woodhouse 
3273a9dc68d9SClaudio Fontana     ret = kvm_get_supported_msrs(s);
3274a9dc68d9SClaudio Fontana     if (ret < 0) {
3275a9dc68d9SClaudio Fontana         return ret;
3276a9dc68d9SClaudio Fontana     }
3277a9dc68d9SClaudio Fontana 
3278a9dc68d9SClaudio Fontana     kvm_get_supported_feature_msrs(s);
3279a9dc68d9SClaudio Fontana 
3280a9dc68d9SClaudio Fontana     uname(&utsname);
3281a9dc68d9SClaudio Fontana     lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
3282a9dc68d9SClaudio Fontana 
3283dc448549SPaolo Bonzini     ret = kvm_vm_set_identity_map_addr(s, KVM_IDENTITY_BASE);
3284a9dc68d9SClaudio Fontana     if (ret < 0) {
3285a9dc68d9SClaudio Fontana         return ret;
3286a9dc68d9SClaudio Fontana     }
3287a9dc68d9SClaudio Fontana 
3288dc448549SPaolo Bonzini     /* Set TSS base one page after EPT identity map. */
3289dc448549SPaolo Bonzini     ret = kvm_vm_set_tss_addr(s, KVM_IDENTITY_BASE + 0x1000);
3290a9dc68d9SClaudio Fontana     if (ret < 0) {
3291a9dc68d9SClaudio Fontana         return ret;
3292a9dc68d9SClaudio Fontana     }
3293a9dc68d9SClaudio Fontana 
3294a9dc68d9SClaudio Fontana     /* Tell fw_cfg to notify the BIOS to reserve the range. */
3295dc448549SPaolo Bonzini     e820_add_entry(KVM_IDENTITY_BASE, 0x4000, E820_RESERVED);
3296a9dc68d9SClaudio Fontana 
32970cc42e63SAni Sinha     ret = kvm_vm_set_nr_mmu_pages(s);
3298a9dc68d9SClaudio Fontana     if (ret < 0) {
3299a9dc68d9SClaudio Fontana         return ret;
3300a9dc68d9SClaudio Fontana     }
3301a9dc68d9SClaudio Fontana 
3302a9dc68d9SClaudio Fontana     if (kvm_check_extension(s, KVM_CAP_X86_SMM) &&
3303a9dc68d9SClaudio Fontana         object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) &&
3304a9dc68d9SClaudio Fontana         x86_machine_is_smm_enabled(X86_MACHINE(ms))) {
3305a9dc68d9SClaudio Fontana         smram_machine_done.notify = register_smram_listener;
3306a9dc68d9SClaudio Fontana         qemu_add_machine_init_done_notifier(&smram_machine_done);
3307a9dc68d9SClaudio Fontana     }
3308a9dc68d9SClaudio Fontana 
3309a9dc68d9SClaudio Fontana     if (enable_cpu_pm) {
33100cc42e63SAni Sinha         ret = kvm_vm_enable_disable_exits(s);
3311a9dc68d9SClaudio Fontana         if (ret < 0) {
3312a9dc68d9SClaudio Fontana             error_report("kvm: guest stopping CPU not supported: %s",
3313a9dc68d9SClaudio Fontana                          strerror(-ret));
3314a9dc68d9SClaudio Fontana         }
3315a9dc68d9SClaudio Fontana     }
3316a9dc68d9SClaudio Fontana 
3317035d1ef2SChenyi Qiang     if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
3318035d1ef2SChenyi Qiang         X86MachineState *x86ms = X86_MACHINE(ms);
3319035d1ef2SChenyi Qiang 
3320035d1ef2SChenyi Qiang         if (x86ms->bus_lock_ratelimit > 0) {
33210cc42e63SAni Sinha             ret = kvm_vm_enable_bus_lock_exit(s);
3322035d1ef2SChenyi Qiang             if (ret < 0) {
3323035d1ef2SChenyi Qiang                 return ret;
3324035d1ef2SChenyi Qiang             }
3325035d1ef2SChenyi Qiang             ratelimit_init(&bus_lock_ratelimit_ctrl);
3326035d1ef2SChenyi Qiang             ratelimit_set_speed(&bus_lock_ratelimit_ctrl,
3327035d1ef2SChenyi Qiang                                 x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME);
3328035d1ef2SChenyi Qiang         }
3329035d1ef2SChenyi Qiang     }
3330035d1ef2SChenyi Qiang 
33310cc42e63SAni Sinha     if (kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) {
33320cc42e63SAni Sinha         ret = kvm_vm_enable_notify_vmexit(s);
3333e2e69f6bSChenyi Qiang         if (ret < 0) {
3334e2e69f6bSChenyi Qiang             return ret;
3335e2e69f6bSChenyi Qiang         }
3336e2e69f6bSChenyi Qiang     }
33370cc42e63SAni Sinha 
3338860054d8SAlexander Graf     if (kvm_vm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR)) {
33390cc42e63SAni Sinha         ret = kvm_vm_enable_userspace_msr(s);
33400cc42e63SAni Sinha         if (ret < 0) {
33410cc42e63SAni Sinha             return ret;
334237656470SAlexander Graf         }
33430418f908SAnthony Harivel 
33440418f908SAnthony Harivel         if (s->msr_energy.enable == true) {
33450cc42e63SAni Sinha             kvm_vm_enable_energy_msrs(s);
33460cc42e63SAni Sinha             if (kvm_msr_energy_thread_init(s, ms)) {
33470cc42e63SAni Sinha                 error_report("kvm : error RAPL feature requirement not met");
33480418f908SAnthony Harivel                 exit(1);
33490418f908SAnthony Harivel             }
33500418f908SAnthony Harivel         }
3351860054d8SAlexander Graf     }
3352e2e69f6bSChenyi Qiang 
3353a9dc68d9SClaudio Fontana     return 0;
3354a9dc68d9SClaudio Fontana }
3355a9dc68d9SClaudio Fontana 
set_v8086_seg(struct kvm_segment * lhs,const SegmentCache * rhs)3356a9dc68d9SClaudio Fontana static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
3357a9dc68d9SClaudio Fontana {
3358a9dc68d9SClaudio Fontana     lhs->selector = rhs->selector;
3359a9dc68d9SClaudio Fontana     lhs->base = rhs->base;
3360a9dc68d9SClaudio Fontana     lhs->limit = rhs->limit;
3361a9dc68d9SClaudio Fontana     lhs->type = 3;
3362a9dc68d9SClaudio Fontana     lhs->present = 1;
3363a9dc68d9SClaudio Fontana     lhs->dpl = 3;
3364a9dc68d9SClaudio Fontana     lhs->db = 0;
3365a9dc68d9SClaudio Fontana     lhs->s = 1;
3366a9dc68d9SClaudio Fontana     lhs->l = 0;
3367a9dc68d9SClaudio Fontana     lhs->g = 0;
3368a9dc68d9SClaudio Fontana     lhs->avl = 0;
3369a9dc68d9SClaudio Fontana     lhs->unusable = 0;
3370a9dc68d9SClaudio Fontana }
3371a9dc68d9SClaudio Fontana 
set_seg(struct kvm_segment * lhs,const SegmentCache * rhs)3372a9dc68d9SClaudio Fontana static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
3373a9dc68d9SClaudio Fontana {
3374a9dc68d9SClaudio Fontana     unsigned flags = rhs->flags;
3375a9dc68d9SClaudio Fontana     lhs->selector = rhs->selector;
3376a9dc68d9SClaudio Fontana     lhs->base = rhs->base;
3377a9dc68d9SClaudio Fontana     lhs->limit = rhs->limit;
3378a9dc68d9SClaudio Fontana     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
3379a9dc68d9SClaudio Fontana     lhs->present = (flags & DESC_P_MASK) != 0;
3380a9dc68d9SClaudio Fontana     lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3;
3381a9dc68d9SClaudio Fontana     lhs->db = (flags >> DESC_B_SHIFT) & 1;
3382a9dc68d9SClaudio Fontana     lhs->s = (flags & DESC_S_MASK) != 0;
3383a9dc68d9SClaudio Fontana     lhs->l = (flags >> DESC_L_SHIFT) & 1;
3384a9dc68d9SClaudio Fontana     lhs->g = (flags & DESC_G_MASK) != 0;
3385a9dc68d9SClaudio Fontana     lhs->avl = (flags & DESC_AVL_MASK) != 0;
3386a9dc68d9SClaudio Fontana     lhs->unusable = !lhs->present;
3387a9dc68d9SClaudio Fontana     lhs->padding = 0;
3388a9dc68d9SClaudio Fontana }
3389a9dc68d9SClaudio Fontana 
get_seg(SegmentCache * lhs,const struct kvm_segment * rhs)3390a9dc68d9SClaudio Fontana static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
3391a9dc68d9SClaudio Fontana {
3392a9dc68d9SClaudio Fontana     lhs->selector = rhs->selector;
3393a9dc68d9SClaudio Fontana     lhs->base = rhs->base;
3394a9dc68d9SClaudio Fontana     lhs->limit = rhs->limit;
3395a9dc68d9SClaudio Fontana     lhs->flags = (rhs->type << DESC_TYPE_SHIFT) |
3396a9dc68d9SClaudio Fontana                  ((rhs->present && !rhs->unusable) * DESC_P_MASK) |
3397a9dc68d9SClaudio Fontana                  (rhs->dpl << DESC_DPL_SHIFT) |
3398a9dc68d9SClaudio Fontana                  (rhs->db << DESC_B_SHIFT) |
3399a9dc68d9SClaudio Fontana                  (rhs->s * DESC_S_MASK) |
3400a9dc68d9SClaudio Fontana                  (rhs->l << DESC_L_SHIFT) |
3401a9dc68d9SClaudio Fontana                  (rhs->g * DESC_G_MASK) |
3402a9dc68d9SClaudio Fontana                  (rhs->avl * DESC_AVL_MASK);
3403a9dc68d9SClaudio Fontana }
3404a9dc68d9SClaudio Fontana 
kvm_getput_reg(__u64 * kvm_reg,target_ulong * qemu_reg,int set)3405a9dc68d9SClaudio Fontana static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
3406a9dc68d9SClaudio Fontana {
3407a9dc68d9SClaudio Fontana     if (set) {
3408a9dc68d9SClaudio Fontana         *kvm_reg = *qemu_reg;
3409a9dc68d9SClaudio Fontana     } else {
3410a9dc68d9SClaudio Fontana         *qemu_reg = *kvm_reg;
3411a9dc68d9SClaudio Fontana     }
3412a9dc68d9SClaudio Fontana }
3413a9dc68d9SClaudio Fontana 
kvm_getput_regs(X86CPU * cpu,int set)3414a9dc68d9SClaudio Fontana static int kvm_getput_regs(X86CPU *cpu, int set)
3415a9dc68d9SClaudio Fontana {
3416a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3417a9dc68d9SClaudio Fontana     struct kvm_regs regs;
3418a9dc68d9SClaudio Fontana     int ret = 0;
3419a9dc68d9SClaudio Fontana 
3420a9dc68d9SClaudio Fontana     if (!set) {
3421a9dc68d9SClaudio Fontana         ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, &regs);
3422a9dc68d9SClaudio Fontana         if (ret < 0) {
3423a9dc68d9SClaudio Fontana             return ret;
3424a9dc68d9SClaudio Fontana         }
3425a9dc68d9SClaudio Fontana     }
3426a9dc68d9SClaudio Fontana 
3427a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
3428a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
3429a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
3430a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
3431a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
3432a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
3433a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
3434a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
3435a9dc68d9SClaudio Fontana #ifdef TARGET_X86_64
3436a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r8, &env->regs[8], set);
3437a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r9, &env->regs[9], set);
3438a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r10, &env->regs[10], set);
3439a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r11, &env->regs[11], set);
3440a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r12, &env->regs[12], set);
3441a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r13, &env->regs[13], set);
3442a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r14, &env->regs[14], set);
3443a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.r15, &env->regs[15], set);
3444a9dc68d9SClaudio Fontana #endif
3445a9dc68d9SClaudio Fontana 
3446a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rflags, &env->eflags, set);
3447a9dc68d9SClaudio Fontana     kvm_getput_reg(&regs.rip, &env->eip, set);
3448a9dc68d9SClaudio Fontana 
3449a9dc68d9SClaudio Fontana     if (set) {
3450a9dc68d9SClaudio Fontana         ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, &regs);
3451a9dc68d9SClaudio Fontana     }
3452a9dc68d9SClaudio Fontana 
3453a9dc68d9SClaudio Fontana     return ret;
3454a9dc68d9SClaudio Fontana }
3455a9dc68d9SClaudio Fontana 
kvm_put_xsave(X86CPU * cpu)3456a9dc68d9SClaudio Fontana static int kvm_put_xsave(X86CPU *cpu)
3457a9dc68d9SClaudio Fontana {
3458a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3459c0198c5fSDavid Edmondson     void *xsave = env->xsave_buf;
3460a9dc68d9SClaudio Fontana 
3461c0198c5fSDavid Edmondson     x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len);
3462a9dc68d9SClaudio Fontana 
3463a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
3464a9dc68d9SClaudio Fontana }
3465a9dc68d9SClaudio Fontana 
kvm_put_xcrs(X86CPU * cpu)3466a9dc68d9SClaudio Fontana static int kvm_put_xcrs(X86CPU *cpu)
3467a9dc68d9SClaudio Fontana {
3468a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3469a9dc68d9SClaudio Fontana     struct kvm_xcrs xcrs = {};
3470a9dc68d9SClaudio Fontana 
3471a9dc68d9SClaudio Fontana     if (!has_xcrs) {
3472a9dc68d9SClaudio Fontana         return 0;
3473a9dc68d9SClaudio Fontana     }
3474a9dc68d9SClaudio Fontana 
3475a9dc68d9SClaudio Fontana     xcrs.nr_xcrs = 1;
3476a9dc68d9SClaudio Fontana     xcrs.flags = 0;
3477a9dc68d9SClaudio Fontana     xcrs.xcrs[0].xcr = 0;
3478a9dc68d9SClaudio Fontana     xcrs.xcrs[0].value = env->xcr0;
3479a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs);
3480a9dc68d9SClaudio Fontana }
3481a9dc68d9SClaudio Fontana 
kvm_put_sregs(X86CPU * cpu)3482a9dc68d9SClaudio Fontana static int kvm_put_sregs(X86CPU *cpu)
3483a9dc68d9SClaudio Fontana {
3484a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3485a9dc68d9SClaudio Fontana     struct kvm_sregs sregs;
3486a9dc68d9SClaudio Fontana 
34871520f8bbSPaolo Bonzini     /*
34881520f8bbSPaolo Bonzini      * The interrupt_bitmap is ignored because KVM_SET_SREGS is
34891520f8bbSPaolo Bonzini      * always followed by KVM_SET_VCPU_EVENTS.
34901520f8bbSPaolo Bonzini      */
3491a9dc68d9SClaudio Fontana     memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
3492a9dc68d9SClaudio Fontana 
3493a9dc68d9SClaudio Fontana     if ((env->eflags & VM_MASK)) {
3494a9dc68d9SClaudio Fontana         set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
3495a9dc68d9SClaudio Fontana         set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
3496a9dc68d9SClaudio Fontana         set_v8086_seg(&sregs.es, &env->segs[R_ES]);
3497a9dc68d9SClaudio Fontana         set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
3498a9dc68d9SClaudio Fontana         set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
3499a9dc68d9SClaudio Fontana         set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
3500a9dc68d9SClaudio Fontana     } else {
3501a9dc68d9SClaudio Fontana         set_seg(&sregs.cs, &env->segs[R_CS]);
3502a9dc68d9SClaudio Fontana         set_seg(&sregs.ds, &env->segs[R_DS]);
3503a9dc68d9SClaudio Fontana         set_seg(&sregs.es, &env->segs[R_ES]);
3504a9dc68d9SClaudio Fontana         set_seg(&sregs.fs, &env->segs[R_FS]);
3505a9dc68d9SClaudio Fontana         set_seg(&sregs.gs, &env->segs[R_GS]);
3506a9dc68d9SClaudio Fontana         set_seg(&sregs.ss, &env->segs[R_SS]);
3507a9dc68d9SClaudio Fontana     }
3508a9dc68d9SClaudio Fontana 
3509a9dc68d9SClaudio Fontana     set_seg(&sregs.tr, &env->tr);
3510a9dc68d9SClaudio Fontana     set_seg(&sregs.ldt, &env->ldt);
3511a9dc68d9SClaudio Fontana 
3512a9dc68d9SClaudio Fontana     sregs.idt.limit = env->idt.limit;
3513a9dc68d9SClaudio Fontana     sregs.idt.base = env->idt.base;
3514a9dc68d9SClaudio Fontana     memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
3515a9dc68d9SClaudio Fontana     sregs.gdt.limit = env->gdt.limit;
3516a9dc68d9SClaudio Fontana     sregs.gdt.base = env->gdt.base;
3517a9dc68d9SClaudio Fontana     memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
3518a9dc68d9SClaudio Fontana 
3519a9dc68d9SClaudio Fontana     sregs.cr0 = env->cr[0];
3520a9dc68d9SClaudio Fontana     sregs.cr2 = env->cr[2];
3521a9dc68d9SClaudio Fontana     sregs.cr3 = env->cr[3];
3522a9dc68d9SClaudio Fontana     sregs.cr4 = env->cr[4];
3523a9dc68d9SClaudio Fontana 
3524a9dc68d9SClaudio Fontana     sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
3525a9dc68d9SClaudio Fontana     sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
3526a9dc68d9SClaudio Fontana 
3527a9dc68d9SClaudio Fontana     sregs.efer = env->efer;
3528a9dc68d9SClaudio Fontana 
3529a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
3530a9dc68d9SClaudio Fontana }
3531a9dc68d9SClaudio Fontana 
kvm_put_sregs2(X86CPU * cpu)35328f515d38SMaxim Levitsky static int kvm_put_sregs2(X86CPU *cpu)
35338f515d38SMaxim Levitsky {
35348f515d38SMaxim Levitsky     CPUX86State *env = &cpu->env;
35358f515d38SMaxim Levitsky     struct kvm_sregs2 sregs;
35368f515d38SMaxim Levitsky     int i;
35378f515d38SMaxim Levitsky 
35388f515d38SMaxim Levitsky     sregs.flags = 0;
35398f515d38SMaxim Levitsky 
35408f515d38SMaxim Levitsky     if ((env->eflags & VM_MASK)) {
35418f515d38SMaxim Levitsky         set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
35428f515d38SMaxim Levitsky         set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
35438f515d38SMaxim Levitsky         set_v8086_seg(&sregs.es, &env->segs[R_ES]);
35448f515d38SMaxim Levitsky         set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
35458f515d38SMaxim Levitsky         set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
35468f515d38SMaxim Levitsky         set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
35478f515d38SMaxim Levitsky     } else {
35488f515d38SMaxim Levitsky         set_seg(&sregs.cs, &env->segs[R_CS]);
35498f515d38SMaxim Levitsky         set_seg(&sregs.ds, &env->segs[R_DS]);
35508f515d38SMaxim Levitsky         set_seg(&sregs.es, &env->segs[R_ES]);
35518f515d38SMaxim Levitsky         set_seg(&sregs.fs, &env->segs[R_FS]);
35528f515d38SMaxim Levitsky         set_seg(&sregs.gs, &env->segs[R_GS]);
35538f515d38SMaxim Levitsky         set_seg(&sregs.ss, &env->segs[R_SS]);
35548f515d38SMaxim Levitsky     }
35558f515d38SMaxim Levitsky 
35568f515d38SMaxim Levitsky     set_seg(&sregs.tr, &env->tr);
35578f515d38SMaxim Levitsky     set_seg(&sregs.ldt, &env->ldt);
35588f515d38SMaxim Levitsky 
35598f515d38SMaxim Levitsky     sregs.idt.limit = env->idt.limit;
35608f515d38SMaxim Levitsky     sregs.idt.base = env->idt.base;
35618f515d38SMaxim Levitsky     memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
35628f515d38SMaxim Levitsky     sregs.gdt.limit = env->gdt.limit;
35638f515d38SMaxim Levitsky     sregs.gdt.base = env->gdt.base;
35648f515d38SMaxim Levitsky     memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
35658f515d38SMaxim Levitsky 
35668f515d38SMaxim Levitsky     sregs.cr0 = env->cr[0];
35678f515d38SMaxim Levitsky     sregs.cr2 = env->cr[2];
35688f515d38SMaxim Levitsky     sregs.cr3 = env->cr[3];
35698f515d38SMaxim Levitsky     sregs.cr4 = env->cr[4];
35708f515d38SMaxim Levitsky 
35718f515d38SMaxim Levitsky     sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
35728f515d38SMaxim Levitsky     sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
35738f515d38SMaxim Levitsky 
35748f515d38SMaxim Levitsky     sregs.efer = env->efer;
35758f515d38SMaxim Levitsky 
35768f515d38SMaxim Levitsky     if (env->pdptrs_valid) {
35778f515d38SMaxim Levitsky         for (i = 0; i < 4; i++) {
35788f515d38SMaxim Levitsky             sregs.pdptrs[i] = env->pdptrs[i];
35798f515d38SMaxim Levitsky         }
35808f515d38SMaxim Levitsky         sregs.flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
35818f515d38SMaxim Levitsky     }
35828f515d38SMaxim Levitsky 
35838f515d38SMaxim Levitsky     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS2, &sregs);
35848f515d38SMaxim Levitsky }
35858f515d38SMaxim Levitsky 
35868f515d38SMaxim Levitsky 
kvm_msr_buf_reset(X86CPU * cpu)3587a9dc68d9SClaudio Fontana static void kvm_msr_buf_reset(X86CPU *cpu)
3588a9dc68d9SClaudio Fontana {
3589a9dc68d9SClaudio Fontana     memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE);
3590a9dc68d9SClaudio Fontana }
3591a9dc68d9SClaudio Fontana 
kvm_msr_entry_add(X86CPU * cpu,uint32_t index,uint64_t value)3592a9dc68d9SClaudio Fontana static void kvm_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value)
3593a9dc68d9SClaudio Fontana {
3594a9dc68d9SClaudio Fontana     struct kvm_msrs *msrs = cpu->kvm_msr_buf;
3595a9dc68d9SClaudio Fontana     void *limit = ((void *)msrs) + MSR_BUF_SIZE;
3596a9dc68d9SClaudio Fontana     struct kvm_msr_entry *entry = &msrs->entries[msrs->nmsrs];
3597a9dc68d9SClaudio Fontana 
3598a9dc68d9SClaudio Fontana     assert((void *)(entry + 1) <= limit);
3599a9dc68d9SClaudio Fontana 
3600a9dc68d9SClaudio Fontana     entry->index = index;
3601a9dc68d9SClaudio Fontana     entry->reserved = 0;
3602a9dc68d9SClaudio Fontana     entry->data = value;
3603a9dc68d9SClaudio Fontana     msrs->nmsrs++;
3604a9dc68d9SClaudio Fontana }
3605a9dc68d9SClaudio Fontana 
kvm_put_one_msr(X86CPU * cpu,int index,uint64_t value)3606a9dc68d9SClaudio Fontana static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value)
3607a9dc68d9SClaudio Fontana {
3608a9dc68d9SClaudio Fontana     kvm_msr_buf_reset(cpu);
3609a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, index, value);
3610a9dc68d9SClaudio Fontana 
3611a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
3612a9dc68d9SClaudio Fontana }
3613a9dc68d9SClaudio Fontana 
kvm_get_one_msr(X86CPU * cpu,int index,uint64_t * value)36145a778a5fSYang Weijiang static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value)
36155a778a5fSYang Weijiang {
36165a778a5fSYang Weijiang     int ret;
36175a778a5fSYang Weijiang     struct {
36185a778a5fSYang Weijiang         struct kvm_msrs info;
36195a778a5fSYang Weijiang         struct kvm_msr_entry entries[1];
36205a778a5fSYang Weijiang     } msr_data = {
36215a778a5fSYang Weijiang         .info.nmsrs = 1,
36225a778a5fSYang Weijiang         .entries[0].index = index,
36235a778a5fSYang Weijiang     };
36245a778a5fSYang Weijiang 
36255a778a5fSYang Weijiang     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
36265a778a5fSYang Weijiang     if (ret < 0) {
36275a778a5fSYang Weijiang         return ret;
36285a778a5fSYang Weijiang     }
36295a778a5fSYang Weijiang     assert(ret == 1);
36305a778a5fSYang Weijiang     *value = msr_data.entries[0].data;
36315a778a5fSYang Weijiang     return ret;
36325a778a5fSYang Weijiang }
kvm_put_apicbase(X86CPU * cpu,uint64_t value)3633a9dc68d9SClaudio Fontana void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
3634a9dc68d9SClaudio Fontana {
3635a9dc68d9SClaudio Fontana     int ret;
3636a9dc68d9SClaudio Fontana 
3637a9dc68d9SClaudio Fontana     ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
3638a9dc68d9SClaudio Fontana     assert(ret == 1);
3639a9dc68d9SClaudio Fontana }
3640a9dc68d9SClaudio Fontana 
kvm_put_tscdeadline_msr(X86CPU * cpu)3641a9dc68d9SClaudio Fontana static int kvm_put_tscdeadline_msr(X86CPU *cpu)
3642a9dc68d9SClaudio Fontana {
3643a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3644a9dc68d9SClaudio Fontana     int ret;
3645a9dc68d9SClaudio Fontana 
3646a9dc68d9SClaudio Fontana     if (!has_msr_tsc_deadline) {
3647a9dc68d9SClaudio Fontana         return 0;
3648a9dc68d9SClaudio Fontana     }
3649a9dc68d9SClaudio Fontana 
3650a9dc68d9SClaudio Fontana     ret = kvm_put_one_msr(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline);
3651a9dc68d9SClaudio Fontana     if (ret < 0) {
3652a9dc68d9SClaudio Fontana         return ret;
3653a9dc68d9SClaudio Fontana     }
3654a9dc68d9SClaudio Fontana 
3655a9dc68d9SClaudio Fontana     assert(ret == 1);
3656a9dc68d9SClaudio Fontana     return 0;
3657a9dc68d9SClaudio Fontana }
3658a9dc68d9SClaudio Fontana 
3659a9dc68d9SClaudio Fontana /*
3660a9dc68d9SClaudio Fontana  * Provide a separate write service for the feature control MSR in order to
3661a9dc68d9SClaudio Fontana  * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
3662a9dc68d9SClaudio Fontana  * before writing any other state because forcibly leaving nested mode
3663a9dc68d9SClaudio Fontana  * invalidates the VCPU state.
3664a9dc68d9SClaudio Fontana  */
kvm_put_msr_feature_control(X86CPU * cpu)3665a9dc68d9SClaudio Fontana static int kvm_put_msr_feature_control(X86CPU *cpu)
3666a9dc68d9SClaudio Fontana {
3667a9dc68d9SClaudio Fontana     int ret;
3668a9dc68d9SClaudio Fontana 
3669a9dc68d9SClaudio Fontana     if (!has_msr_feature_control) {
3670a9dc68d9SClaudio Fontana         return 0;
3671a9dc68d9SClaudio Fontana     }
3672a9dc68d9SClaudio Fontana 
3673a9dc68d9SClaudio Fontana     ret = kvm_put_one_msr(cpu, MSR_IA32_FEATURE_CONTROL,
3674a9dc68d9SClaudio Fontana                           cpu->env.msr_ia32_feature_control);
3675a9dc68d9SClaudio Fontana     if (ret < 0) {
3676a9dc68d9SClaudio Fontana         return ret;
3677a9dc68d9SClaudio Fontana     }
3678a9dc68d9SClaudio Fontana 
3679a9dc68d9SClaudio Fontana     assert(ret == 1);
3680a9dc68d9SClaudio Fontana     return 0;
3681a9dc68d9SClaudio Fontana }
3682a9dc68d9SClaudio Fontana 
make_vmx_msr_value(uint32_t index,uint32_t features)3683a9dc68d9SClaudio Fontana static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features)
3684a9dc68d9SClaudio Fontana {
3685a9dc68d9SClaudio Fontana     uint32_t default1, can_be_one, can_be_zero;
3686a9dc68d9SClaudio Fontana     uint32_t must_be_one;
3687a9dc68d9SClaudio Fontana 
3688a9dc68d9SClaudio Fontana     switch (index) {
3689a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3690a9dc68d9SClaudio Fontana         default1 = 0x00000016;
3691a9dc68d9SClaudio Fontana         break;
3692a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3693a9dc68d9SClaudio Fontana         default1 = 0x0401e172;
3694a9dc68d9SClaudio Fontana         break;
3695a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3696a9dc68d9SClaudio Fontana         default1 = 0x000011ff;
3697a9dc68d9SClaudio Fontana         break;
3698a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3699a9dc68d9SClaudio Fontana         default1 = 0x00036dff;
3700a9dc68d9SClaudio Fontana         break;
3701a9dc68d9SClaudio Fontana     case MSR_IA32_VMX_PROCBASED_CTLS2:
3702a9dc68d9SClaudio Fontana         default1 = 0;
3703a9dc68d9SClaudio Fontana         break;
3704a9dc68d9SClaudio Fontana     default:
3705a9dc68d9SClaudio Fontana         abort();
3706a9dc68d9SClaudio Fontana     }
3707a9dc68d9SClaudio Fontana 
3708a9dc68d9SClaudio Fontana     /* If a feature bit is set, the control can be either set or clear.
3709a9dc68d9SClaudio Fontana      * Otherwise the value is limited to either 0 or 1 by default1.
3710a9dc68d9SClaudio Fontana      */
3711a9dc68d9SClaudio Fontana     can_be_one = features | default1;
3712a9dc68d9SClaudio Fontana     can_be_zero = features | ~default1;
3713a9dc68d9SClaudio Fontana     must_be_one = ~can_be_zero;
3714a9dc68d9SClaudio Fontana 
3715a9dc68d9SClaudio Fontana     /*
3716a9dc68d9SClaudio Fontana      * Bit 0:31 -> 0 if the control bit can be zero (i.e. 1 if it must be one).
3717a9dc68d9SClaudio Fontana      * Bit 32:63 -> 1 if the control bit can be one.
3718a9dc68d9SClaudio Fontana      */
3719a9dc68d9SClaudio Fontana     return must_be_one | (((uint64_t)can_be_one) << 32);
3720a9dc68d9SClaudio Fontana }
3721a9dc68d9SClaudio Fontana 
kvm_msr_entry_add_vmx(X86CPU * cpu,FeatureWordArray f)3722a9dc68d9SClaudio Fontana static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f)
3723a9dc68d9SClaudio Fontana {
3724a9dc68d9SClaudio Fontana     uint64_t kvm_vmx_basic =
3725a9dc68d9SClaudio Fontana         kvm_arch_get_supported_msr_feature(kvm_state,
3726a9dc68d9SClaudio Fontana                                            MSR_IA32_VMX_BASIC);
3727a9dc68d9SClaudio Fontana 
3728a9dc68d9SClaudio Fontana     if (!kvm_vmx_basic) {
3729a9dc68d9SClaudio Fontana         /* If the kernel doesn't support VMX feature (kvm_intel.nested=0),
3730a9dc68d9SClaudio Fontana          * then kvm_vmx_basic will be 0 and KVM_SET_MSR will fail.
3731a9dc68d9SClaudio Fontana          */
3732a9dc68d9SClaudio Fontana         return;
3733a9dc68d9SClaudio Fontana     }
3734a9dc68d9SClaudio Fontana 
3735a9dc68d9SClaudio Fontana     uint64_t kvm_vmx_misc =
3736a9dc68d9SClaudio Fontana         kvm_arch_get_supported_msr_feature(kvm_state,
3737a9dc68d9SClaudio Fontana                                            MSR_IA32_VMX_MISC);
3738a9dc68d9SClaudio Fontana     uint64_t kvm_vmx_ept_vpid =
3739a9dc68d9SClaudio Fontana         kvm_arch_get_supported_msr_feature(kvm_state,
3740a9dc68d9SClaudio Fontana                                            MSR_IA32_VMX_EPT_VPID_CAP);
3741a9dc68d9SClaudio Fontana 
3742a9dc68d9SClaudio Fontana     /*
3743a9dc68d9SClaudio Fontana      * If the guest is 64-bit, a value of 1 is allowed for the host address
3744a9dc68d9SClaudio Fontana      * space size vmexit control.
3745a9dc68d9SClaudio Fontana      */
3746a9dc68d9SClaudio Fontana     uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM
3747a9dc68d9SClaudio Fontana         ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0;
3748a9dc68d9SClaudio Fontana 
3749a9dc68d9SClaudio Fontana     /*
3750a9dc68d9SClaudio Fontana      * Bits 0-30, 32-44 and 50-53 come from the host.  KVM should
3751a9dc68d9SClaudio Fontana      * not change them for backwards compatibility.
3752a9dc68d9SClaudio Fontana      */
3753a9dc68d9SClaudio Fontana     uint64_t fixed_vmx_basic = kvm_vmx_basic &
3754a9dc68d9SClaudio Fontana         (MSR_VMX_BASIC_VMCS_REVISION_MASK |
3755a9dc68d9SClaudio Fontana          MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK |
3756a9dc68d9SClaudio Fontana          MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK);
3757a9dc68d9SClaudio Fontana 
3758a9dc68d9SClaudio Fontana     /*
3759a9dc68d9SClaudio Fontana      * Same for bits 0-4 and 25-27.  Bits 16-24 (CR3 target count) can
3760a9dc68d9SClaudio Fontana      * change in the future but are always zero for now, clear them to be
3761a9dc68d9SClaudio Fontana      * future proof.  Bits 32-63 in theory could change, though KVM does
3762a9dc68d9SClaudio Fontana      * not support dual-monitor treatment and probably never will; mask
3763a9dc68d9SClaudio Fontana      * them out as well.
3764a9dc68d9SClaudio Fontana      */
3765a9dc68d9SClaudio Fontana     uint64_t fixed_vmx_misc = kvm_vmx_misc &
3766a9dc68d9SClaudio Fontana         (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK |
3767a9dc68d9SClaudio Fontana          MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK);
3768a9dc68d9SClaudio Fontana 
3769a9dc68d9SClaudio Fontana     /*
3770a9dc68d9SClaudio Fontana      * EPT memory types should not change either, so we do not bother
3771a9dc68d9SClaudio Fontana      * adding features for them.
3772a9dc68d9SClaudio Fontana      */
3773a9dc68d9SClaudio Fontana     uint64_t fixed_vmx_ept_mask =
3774a9dc68d9SClaudio Fontana             (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ?
3775a9dc68d9SClaudio Fontana              MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0);
3776a9dc68d9SClaudio Fontana     uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask;
3777a9dc68d9SClaudio Fontana 
3778a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
3779a9dc68d9SClaudio Fontana                       make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
3780a9dc68d9SClaudio Fontana                                          f[FEAT_VMX_PROCBASED_CTLS]));
3781a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS,
3782a9dc68d9SClaudio Fontana                       make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS,
3783a9dc68d9SClaudio Fontana                                          f[FEAT_VMX_PINBASED_CTLS]));
3784a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS,
3785a9dc68d9SClaudio Fontana                       make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS,
3786a9dc68d9SClaudio Fontana                                          f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit);
3787a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS,
3788a9dc68d9SClaudio Fontana                       make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS,
3789a9dc68d9SClaudio Fontana                                          f[FEAT_VMX_ENTRY_CTLS]));
3790a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2,
3791a9dc68d9SClaudio Fontana                       make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2,
3792a9dc68d9SClaudio Fontana                                          f[FEAT_VMX_SECONDARY_CTLS]));
3793a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP,
3794a9dc68d9SClaudio Fontana                       f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid);
3795a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC,
3796a9dc68d9SClaudio Fontana                       f[FEAT_VMX_BASIC] | fixed_vmx_basic);
3797a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC,
3798a9dc68d9SClaudio Fontana                       f[FEAT_VMX_MISC] | fixed_vmx_misc);
3799a9dc68d9SClaudio Fontana     if (has_msr_vmx_vmfunc) {
3800a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]);
3801a9dc68d9SClaudio Fontana     }
3802a9dc68d9SClaudio Fontana 
3803a9dc68d9SClaudio Fontana     /*
3804a9dc68d9SClaudio Fontana      * Just to be safe, write these with constant values.  The CRn_FIXED1
3805a9dc68d9SClaudio Fontana      * MSRs are generated by KVM based on the vCPU's CPUID.
3806a9dc68d9SClaudio Fontana      */
3807a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0,
3808a9dc68d9SClaudio Fontana                       CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK);
3809a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0,
3810a9dc68d9SClaudio Fontana                       CR4_VMXE_MASK);
38119ce8af4dSPaolo Bonzini 
3812ab891454SLei Wang     if (f[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
3813ab891454SLei Wang         /* FRED injected-event data (0x2052).  */
3814ab891454SLei Wang         kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x52);
3815ab891454SLei Wang     } else if (f[FEAT_VMX_EXIT_CTLS] &
3816ab891454SLei Wang                VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS) {
3817ab891454SLei Wang         /* Secondary VM-exit controls (0x2044).  */
3818ab891454SLei Wang         kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x44);
3819ab891454SLei Wang     } else if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) {
38209ce8af4dSPaolo Bonzini         /* TSC multiplier (0x2032).  */
38219ce8af4dSPaolo Bonzini         kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32);
38229ce8af4dSPaolo Bonzini     } else {
38239ce8af4dSPaolo Bonzini         /* Preemption timer (0x482E).  */
38249ce8af4dSPaolo Bonzini         kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x2E);
38259ce8af4dSPaolo Bonzini     }
3826a9dc68d9SClaudio Fontana }
3827a9dc68d9SClaudio Fontana 
kvm_msr_entry_add_perf(X86CPU * cpu,FeatureWordArray f)3828a9dc68d9SClaudio Fontana static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f)
3829a9dc68d9SClaudio Fontana {
3830a9dc68d9SClaudio Fontana     uint64_t kvm_perf_cap =
3831a9dc68d9SClaudio Fontana         kvm_arch_get_supported_msr_feature(kvm_state,
3832a9dc68d9SClaudio Fontana                                            MSR_IA32_PERF_CAPABILITIES);
3833a9dc68d9SClaudio Fontana 
3834a9dc68d9SClaudio Fontana     if (kvm_perf_cap) {
3835a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_PERF_CAPABILITIES,
3836a9dc68d9SClaudio Fontana                         kvm_perf_cap & f[FEAT_PERF_CAPABILITIES]);
3837a9dc68d9SClaudio Fontana     }
3838a9dc68d9SClaudio Fontana }
3839a9dc68d9SClaudio Fontana 
kvm_buf_set_msrs(X86CPU * cpu)3840a9dc68d9SClaudio Fontana static int kvm_buf_set_msrs(X86CPU *cpu)
3841a9dc68d9SClaudio Fontana {
3842a9dc68d9SClaudio Fontana     int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
3843a9dc68d9SClaudio Fontana     if (ret < 0) {
3844a9dc68d9SClaudio Fontana         return ret;
3845a9dc68d9SClaudio Fontana     }
3846a9dc68d9SClaudio Fontana 
3847a9dc68d9SClaudio Fontana     if (ret < cpu->kvm_msr_buf->nmsrs) {
3848a9dc68d9SClaudio Fontana         struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
3849a9dc68d9SClaudio Fontana         error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
3850a9dc68d9SClaudio Fontana                      (uint32_t)e->index, (uint64_t)e->data);
3851a9dc68d9SClaudio Fontana     }
3852a9dc68d9SClaudio Fontana 
3853a9dc68d9SClaudio Fontana     assert(ret == cpu->kvm_msr_buf->nmsrs);
3854a9dc68d9SClaudio Fontana     return 0;
3855a9dc68d9SClaudio Fontana }
3856a9dc68d9SClaudio Fontana 
kvm_init_msrs(X86CPU * cpu)3857a9dc68d9SClaudio Fontana static void kvm_init_msrs(X86CPU *cpu)
3858a9dc68d9SClaudio Fontana {
3859a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3860a9dc68d9SClaudio Fontana 
3861a9dc68d9SClaudio Fontana     kvm_msr_buf_reset(cpu);
3862a9dc68d9SClaudio Fontana     if (has_msr_arch_capabs) {
3863a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
3864a9dc68d9SClaudio Fontana                           env->features[FEAT_ARCH_CAPABILITIES]);
3865a9dc68d9SClaudio Fontana     }
3866a9dc68d9SClaudio Fontana 
3867a9dc68d9SClaudio Fontana     if (has_msr_core_capabs) {
3868a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
3869a9dc68d9SClaudio Fontana                           env->features[FEAT_CORE_CAPABILITY]);
3870a9dc68d9SClaudio Fontana     }
3871a9dc68d9SClaudio Fontana 
3872a9dc68d9SClaudio Fontana     if (has_msr_perf_capabs && cpu->enable_pmu) {
3873a9dc68d9SClaudio Fontana         kvm_msr_entry_add_perf(cpu, env->features);
3874a9dc68d9SClaudio Fontana     }
3875a9dc68d9SClaudio Fontana 
3876a9dc68d9SClaudio Fontana     if (has_msr_ucode_rev) {
3877a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
3878a9dc68d9SClaudio Fontana     }
3879a9dc68d9SClaudio Fontana 
3880a9dc68d9SClaudio Fontana     /*
3881a9dc68d9SClaudio Fontana      * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
3882a9dc68d9SClaudio Fontana      * all kernels with MSR features should have them.
3883a9dc68d9SClaudio Fontana      */
3884a9dc68d9SClaudio Fontana     if (kvm_feature_msrs && cpu_has_vmx(env)) {
3885a9dc68d9SClaudio Fontana         kvm_msr_entry_add_vmx(cpu, env->features);
3886a9dc68d9SClaudio Fontana     }
3887a9dc68d9SClaudio Fontana 
3888a9dc68d9SClaudio Fontana     assert(kvm_buf_set_msrs(cpu) == 0);
3889a9dc68d9SClaudio Fontana }
3890a9dc68d9SClaudio Fontana 
kvm_put_msrs(X86CPU * cpu,int level)3891a9dc68d9SClaudio Fontana static int kvm_put_msrs(X86CPU *cpu, int level)
3892a9dc68d9SClaudio Fontana {
3893a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
3894a9dc68d9SClaudio Fontana     int i;
3895a9dc68d9SClaudio Fontana 
3896a9dc68d9SClaudio Fontana     kvm_msr_buf_reset(cpu);
3897a9dc68d9SClaudio Fontana 
3898a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, env->sysenter_cs);
3899a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
3900a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
3901a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_PAT, env->pat);
3902a9dc68d9SClaudio Fontana     if (has_msr_star) {
3903a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_STAR, env->star);
3904a9dc68d9SClaudio Fontana     }
3905a9dc68d9SClaudio Fontana     if (has_msr_hsave_pa) {
3906a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, env->vm_hsave);
3907a9dc68d9SClaudio Fontana     }
3908a9dc68d9SClaudio Fontana     if (has_msr_tsc_aux) {
3909a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux);
3910a9dc68d9SClaudio Fontana     }
3911a9dc68d9SClaudio Fontana     if (has_msr_tsc_adjust) {
3912a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust);
3913a9dc68d9SClaudio Fontana     }
3914a9dc68d9SClaudio Fontana     if (has_msr_misc_enable) {
3915a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE,
3916a9dc68d9SClaudio Fontana                           env->msr_ia32_misc_enable);
3917a9dc68d9SClaudio Fontana     }
3918a9dc68d9SClaudio Fontana     if (has_msr_smbase) {
3919a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, env->smbase);
3920a9dc68d9SClaudio Fontana     }
3921a9dc68d9SClaudio Fontana     if (has_msr_smi_count) {
3922a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_SMI_COUNT, env->msr_smi_count);
3923a9dc68d9SClaudio Fontana     }
39246aa4228bSChenyi Qiang     if (has_msr_pkrs) {
39256aa4228bSChenyi Qiang         kvm_msr_entry_add(cpu, MSR_IA32_PKRS, env->pkrs);
39266aa4228bSChenyi Qiang     }
3927a9dc68d9SClaudio Fontana     if (has_msr_bndcfgs) {
3928a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, env->msr_bndcfgs);
3929a9dc68d9SClaudio Fontana     }
3930a9dc68d9SClaudio Fontana     if (has_msr_xss) {
3931a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_XSS, env->xss);
3932a9dc68d9SClaudio Fontana     }
3933a9dc68d9SClaudio Fontana     if (has_msr_umwait) {
3934a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, env->umwait);
3935a9dc68d9SClaudio Fontana     }
3936a9dc68d9SClaudio Fontana     if (has_msr_spec_ctrl) {
3937a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl);
3938a9dc68d9SClaudio Fontana     }
3939cabf9862SMaxim Levitsky     if (has_tsc_scale_msr) {
3940cabf9862SMaxim Levitsky         kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr);
3941cabf9862SMaxim Levitsky     }
3942cabf9862SMaxim Levitsky 
3943a9dc68d9SClaudio Fontana     if (has_msr_tsx_ctrl) {
3944a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl);
3945a9dc68d9SClaudio Fontana     }
3946a9dc68d9SClaudio Fontana     if (has_msr_virt_ssbd) {
3947a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd);
3948a9dc68d9SClaudio Fontana     }
3949b5151aceSGao Shiyuan     if (has_msr_hwcr) {
3950b5151aceSGao Shiyuan         kvm_msr_entry_add(cpu, MSR_K7_HWCR, env->msr_hwcr);
3951b5151aceSGao Shiyuan     }
3952a9dc68d9SClaudio Fontana 
3953a9dc68d9SClaudio Fontana #ifdef TARGET_X86_64
3954a9dc68d9SClaudio Fontana     if (lm_capable_kernel) {
3955a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_CSTAR, env->cstar);
3956a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
3957a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask);
3958a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
39594ebd98ebSXin Li         if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
39604ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0);
39614ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1);
39624ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2);
39634ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3);
39644ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls);
39654ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1);
39664ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2);
39674ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3);
39684ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config);
39694ebd98ebSXin Li         }
3970a9dc68d9SClaudio Fontana     }
3971a9dc68d9SClaudio Fontana #endif
3972a9dc68d9SClaudio Fontana 
3973a9dc68d9SClaudio Fontana     /*
3974a9dc68d9SClaudio Fontana      * The following MSRs have side effects on the guest or are too heavy
3975a9dc68d9SClaudio Fontana      * for normal writeback. Limit them to reset or full state updates.
3976a9dc68d9SClaudio Fontana      */
3977a9dc68d9SClaudio Fontana     if (level >= KVM_PUT_RESET_STATE) {
3978a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc);
3979a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr);
3980a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
3981a9dc68d9SClaudio Fontana         if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) {
3982a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, env->async_pf_int_msr);
3983a9dc68d9SClaudio Fontana         }
3984a9dc68d9SClaudio Fontana         if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) {
3985a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr);
3986a9dc68d9SClaudio Fontana         }
3987a9dc68d9SClaudio Fontana         if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) {
3988a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, env->pv_eoi_en_msr);
3989a9dc68d9SClaudio Fontana         }
3990a9dc68d9SClaudio Fontana         if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) {
3991a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr);
3992a9dc68d9SClaudio Fontana         }
3993a9dc68d9SClaudio Fontana 
3994a9dc68d9SClaudio Fontana         if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) {
3995a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, env->poll_control_msr);
3996a9dc68d9SClaudio Fontana         }
3997a9dc68d9SClaudio Fontana 
3998a9dc68d9SClaudio Fontana         if (has_architectural_pmu_version > 0) {
3999a9dc68d9SClaudio Fontana             if (has_architectural_pmu_version > 1) {
4000a9dc68d9SClaudio Fontana                 /* Stop the counter.  */
4001a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
4002a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
4003a9dc68d9SClaudio Fontana             }
4004a9dc68d9SClaudio Fontana 
4005a9dc68d9SClaudio Fontana             /* Set the counter values.  */
4006a9dc68d9SClaudio Fontana             for (i = 0; i < num_architectural_pmu_fixed_counters; i++) {
4007a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i,
4008a9dc68d9SClaudio Fontana                                   env->msr_fixed_counters[i]);
4009a9dc68d9SClaudio Fontana             }
4010a9dc68d9SClaudio Fontana             for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
4011a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i,
4012a9dc68d9SClaudio Fontana                                   env->msr_gp_counters[i]);
4013a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i,
4014a9dc68d9SClaudio Fontana                                   env->msr_gp_evtsel[i]);
4015a9dc68d9SClaudio Fontana             }
4016a9dc68d9SClaudio Fontana             if (has_architectural_pmu_version > 1) {
4017a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS,
4018a9dc68d9SClaudio Fontana                                   env->msr_global_status);
4019a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
4020a9dc68d9SClaudio Fontana                                   env->msr_global_ovf_ctrl);
4021a9dc68d9SClaudio Fontana 
4022a9dc68d9SClaudio Fontana                 /* Now start the PMU.  */
4023a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL,
4024a9dc68d9SClaudio Fontana                                   env->msr_fixed_ctr_ctrl);
4025a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL,
4026a9dc68d9SClaudio Fontana                                   env->msr_global_ctrl);
4027a9dc68d9SClaudio Fontana             }
4028a9dc68d9SClaudio Fontana         }
4029a9dc68d9SClaudio Fontana         /*
4030a9dc68d9SClaudio Fontana          * Hyper-V partition-wide MSRs: to avoid clearing them on cpu hot-add,
4031a9dc68d9SClaudio Fontana          * only sync them to KVM on the first cpu
4032a9dc68d9SClaudio Fontana          */
4033a9dc68d9SClaudio Fontana         if (current_cpu == first_cpu) {
4034a9dc68d9SClaudio Fontana             if (has_msr_hv_hypercall) {
4035a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID,
4036a9dc68d9SClaudio Fontana                                   env->msr_hv_guest_os_id);
4037a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL,
4038a9dc68d9SClaudio Fontana                                   env->msr_hv_hypercall);
4039a9dc68d9SClaudio Fontana             }
4040a9dc68d9SClaudio Fontana             if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) {
4041a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC,
4042a9dc68d9SClaudio Fontana                                   env->msr_hv_tsc);
4043a9dc68d9SClaudio Fontana             }
4044a9dc68d9SClaudio Fontana             if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) {
4045a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL,
4046a9dc68d9SClaudio Fontana                                   env->msr_hv_reenlightenment_control);
4047a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL,
4048a9dc68d9SClaudio Fontana                                   env->msr_hv_tsc_emulation_control);
4049a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS,
4050a9dc68d9SClaudio Fontana                                   env->msr_hv_tsc_emulation_status);
4051a9dc68d9SClaudio Fontana             }
405273d24074SJon Doron             if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG) &&
405373d24074SJon Doron                 has_msr_hv_syndbg_options) {
405473d24074SJon Doron                 kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS,
405573d24074SJon Doron                                   hyperv_syndbg_query_options());
405673d24074SJon Doron             }
4057a9dc68d9SClaudio Fontana         }
4058a9dc68d9SClaudio Fontana         if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) {
4059a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE,
4060a9dc68d9SClaudio Fontana                               env->msr_hv_vapic);
4061a9dc68d9SClaudio Fontana         }
4062a9dc68d9SClaudio Fontana         if (has_msr_hv_crash) {
4063a9dc68d9SClaudio Fontana             int j;
4064a9dc68d9SClaudio Fontana 
4065a9dc68d9SClaudio Fontana             for (j = 0; j < HV_CRASH_PARAMS; j++)
4066a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j,
4067a9dc68d9SClaudio Fontana                                   env->msr_hv_crash_params[j]);
4068a9dc68d9SClaudio Fontana 
4069a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_NOTIFY);
4070a9dc68d9SClaudio Fontana         }
4071a9dc68d9SClaudio Fontana         if (has_msr_hv_runtime) {
4072a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime);
4073a9dc68d9SClaudio Fontana         }
4074a9dc68d9SClaudio Fontana         if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)
4075a9dc68d9SClaudio Fontana             && hv_vpindex_settable) {
4076a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX,
4077a9dc68d9SClaudio Fontana                               hyperv_vp_index(CPU(cpu)));
4078a9dc68d9SClaudio Fontana         }
4079a9dc68d9SClaudio Fontana         if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
4080a9dc68d9SClaudio Fontana             int j;
4081a9dc68d9SClaudio Fontana 
4082a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, HV_SYNIC_VERSION);
4083a9dc68d9SClaudio Fontana 
4084a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL,
4085a9dc68d9SClaudio Fontana                               env->msr_hv_synic_control);
4086a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP,
4087a9dc68d9SClaudio Fontana                               env->msr_hv_synic_evt_page);
4088a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP,
4089a9dc68d9SClaudio Fontana                               env->msr_hv_synic_msg_page);
4090a9dc68d9SClaudio Fontana 
4091a9dc68d9SClaudio Fontana             for (j = 0; j < ARRAY_SIZE(env->msr_hv_synic_sint); j++) {
4092a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_SINT0 + j,
4093a9dc68d9SClaudio Fontana                                   env->msr_hv_synic_sint[j]);
4094a9dc68d9SClaudio Fontana             }
4095a9dc68d9SClaudio Fontana         }
4096a9dc68d9SClaudio Fontana         if (has_msr_hv_stimer) {
4097a9dc68d9SClaudio Fontana             int j;
4098a9dc68d9SClaudio Fontana 
4099a9dc68d9SClaudio Fontana             for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_config); j++) {
4100a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_CONFIG + j * 2,
4101a9dc68d9SClaudio Fontana                                 env->msr_hv_stimer_config[j]);
4102a9dc68d9SClaudio Fontana             }
4103a9dc68d9SClaudio Fontana 
4104a9dc68d9SClaudio Fontana             for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_count); j++) {
4105a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_COUNT + j * 2,
4106a9dc68d9SClaudio Fontana                                 env->msr_hv_stimer_count[j]);
4107a9dc68d9SClaudio Fontana             }
4108a9dc68d9SClaudio Fontana         }
4109a9dc68d9SClaudio Fontana         if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
4110a9dc68d9SClaudio Fontana             uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits);
4111a9dc68d9SClaudio Fontana 
4112a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype);
4113a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
4114a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
4115a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
4116a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
4117a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
4118a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
4119a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
4120a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
4121a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
4122a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
4123a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
4124a9dc68d9SClaudio Fontana             for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
4125a9dc68d9SClaudio Fontana                 /* The CPU GPs if we write to a bit above the physical limit of
4126a9dc68d9SClaudio Fontana                  * the host CPU (and KVM emulates that)
4127a9dc68d9SClaudio Fontana                  */
4128a9dc68d9SClaudio Fontana                 uint64_t mask = env->mtrr_var[i].mask;
4129a9dc68d9SClaudio Fontana                 mask &= phys_mask;
4130a9dc68d9SClaudio Fontana 
4131a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i),
4132a9dc68d9SClaudio Fontana                                   env->mtrr_var[i].base);
4133a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask);
4134a9dc68d9SClaudio Fontana             }
4135a9dc68d9SClaudio Fontana         }
4136a9dc68d9SClaudio Fontana         if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
4137a9dc68d9SClaudio Fontana             int addr_num = kvm_arch_get_supported_cpuid(kvm_state,
4138a9dc68d9SClaudio Fontana                                                     0x14, 1, R_EAX) & 0x7;
4139a9dc68d9SClaudio Fontana 
4140a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL,
4141a9dc68d9SClaudio Fontana                             env->msr_rtit_ctrl);
4142a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS,
4143a9dc68d9SClaudio Fontana                             env->msr_rtit_status);
4144a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE,
4145a9dc68d9SClaudio Fontana                             env->msr_rtit_output_base);
4146a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK,
4147a9dc68d9SClaudio Fontana                             env->msr_rtit_output_mask);
4148a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH,
4149a9dc68d9SClaudio Fontana                             env->msr_rtit_cr3_match);
4150a9dc68d9SClaudio Fontana             for (i = 0; i < addr_num; i++) {
4151a9dc68d9SClaudio Fontana                 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i,
4152a9dc68d9SClaudio Fontana                             env->msr_rtit_addrs[i]);
4153a9dc68d9SClaudio Fontana             }
4154a9dc68d9SClaudio Fontana         }
4155a9dc68d9SClaudio Fontana 
4156db888065SSean Christopherson         if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) {
4157db888065SSean Christopherson             kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0,
4158db888065SSean Christopherson                               env->msr_ia32_sgxlepubkeyhash[0]);
4159db888065SSean Christopherson             kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1,
4160db888065SSean Christopherson                               env->msr_ia32_sgxlepubkeyhash[1]);
4161db888065SSean Christopherson             kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2,
4162db888065SSean Christopherson                               env->msr_ia32_sgxlepubkeyhash[2]);
4163db888065SSean Christopherson             kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3,
4164db888065SSean Christopherson                               env->msr_ia32_sgxlepubkeyhash[3]);
4165db888065SSean Christopherson         }
4166db888065SSean Christopherson 
4167cdec2b75SZeng Guang         if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
4168cdec2b75SZeng Guang             kvm_msr_entry_add(cpu, MSR_IA32_XFD,
4169cdec2b75SZeng Guang                               env->msr_xfd);
4170cdec2b75SZeng Guang             kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
4171cdec2b75SZeng Guang                               env->msr_xfd_err);
4172cdec2b75SZeng Guang         }
4173cdec2b75SZeng Guang 
417412703d4eSYang Weijiang         if (kvm_enabled() && cpu->enable_pmu &&
417512703d4eSYang Weijiang             (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
417612703d4eSYang Weijiang             uint64_t depth;
4177e0c3ef71SPaolo Bonzini             int ret;
417812703d4eSYang Weijiang 
417912703d4eSYang Weijiang             /*
41803a7a27cfSYang Weijiang              * Only migrate Arch LBR states when the host Arch LBR depth
41813a7a27cfSYang Weijiang              * equals that of source guest's, this is to avoid mismatch
41823a7a27cfSYang Weijiang              * of guest/host config for the msr hence avoid unexpected
41833a7a27cfSYang Weijiang              * misbehavior.
418412703d4eSYang Weijiang              */
418512703d4eSYang Weijiang             ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
418612703d4eSYang Weijiang 
41873a7a27cfSYang Weijiang             if (ret == 1 && !!depth && depth == env->msr_lbr_depth) {
418812703d4eSYang Weijiang                 kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, env->msr_lbr_ctl);
418912703d4eSYang Weijiang                 kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, env->msr_lbr_depth);
419012703d4eSYang Weijiang 
419112703d4eSYang Weijiang                 for (i = 0; i < ARCH_LBR_NR_ENTRIES; i++) {
419212703d4eSYang Weijiang                     if (!env->lbr_records[i].from) {
419312703d4eSYang Weijiang                         continue;
419412703d4eSYang Weijiang                     }
419512703d4eSYang Weijiang                     kvm_msr_entry_add(cpu, MSR_ARCH_LBR_FROM_0 + i,
419612703d4eSYang Weijiang                                       env->lbr_records[i].from);
419712703d4eSYang Weijiang                     kvm_msr_entry_add(cpu, MSR_ARCH_LBR_TO_0 + i,
419812703d4eSYang Weijiang                                       env->lbr_records[i].to);
419912703d4eSYang Weijiang                     kvm_msr_entry_add(cpu, MSR_ARCH_LBR_INFO_0 + i,
420012703d4eSYang Weijiang                                       env->lbr_records[i].info);
420112703d4eSYang Weijiang                 }
420212703d4eSYang Weijiang             }
420312703d4eSYang Weijiang         }
420412703d4eSYang Weijiang 
4205a9dc68d9SClaudio Fontana         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
4206a9dc68d9SClaudio Fontana          *       kvm_put_msr_feature_control. */
4207a9dc68d9SClaudio Fontana     }
4208a9dc68d9SClaudio Fontana 
4209a9dc68d9SClaudio Fontana     if (env->mcg_cap) {
4210a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MCG_STATUS, env->mcg_status);
4211a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MCG_CTL, env->mcg_ctl);
4212a9dc68d9SClaudio Fontana         if (has_msr_mcg_ext_ctl) {
4213a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, env->mcg_ext_ctl);
4214a9dc68d9SClaudio Fontana         }
4215a9dc68d9SClaudio Fontana         for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
4216a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, env->mce_banks[i]);
4217a9dc68d9SClaudio Fontana         }
4218a9dc68d9SClaudio Fontana     }
4219a9dc68d9SClaudio Fontana 
4220a9dc68d9SClaudio Fontana     return kvm_buf_set_msrs(cpu);
4221a9dc68d9SClaudio Fontana }
4222a9dc68d9SClaudio Fontana 
4223a9dc68d9SClaudio Fontana 
kvm_get_xsave(X86CPU * cpu)4224a9dc68d9SClaudio Fontana static int kvm_get_xsave(X86CPU *cpu)
4225a9dc68d9SClaudio Fontana {
4226a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
4227c0198c5fSDavid Edmondson     void *xsave = env->xsave_buf;
42286a8703aeSJohannes Stoelp     unsigned long type;
42296a8703aeSJohannes Stoelp     int ret;
4230a9dc68d9SClaudio Fontana 
4231e56dd3c7SJing Liu     type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
4232e56dd3c7SJing Liu     ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
4233a9dc68d9SClaudio Fontana     if (ret < 0) {
4234a9dc68d9SClaudio Fontana         return ret;
4235a9dc68d9SClaudio Fontana     }
4236c0198c5fSDavid Edmondson     x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len);
4237a9dc68d9SClaudio Fontana 
4238a9dc68d9SClaudio Fontana     return 0;
4239a9dc68d9SClaudio Fontana }
4240a9dc68d9SClaudio Fontana 
kvm_get_xcrs(X86CPU * cpu)4241a9dc68d9SClaudio Fontana static int kvm_get_xcrs(X86CPU *cpu)
4242a9dc68d9SClaudio Fontana {
4243a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
4244a9dc68d9SClaudio Fontana     int i, ret;
4245a9dc68d9SClaudio Fontana     struct kvm_xcrs xcrs;
4246a9dc68d9SClaudio Fontana 
4247a9dc68d9SClaudio Fontana     if (!has_xcrs) {
4248a9dc68d9SClaudio Fontana         return 0;
4249a9dc68d9SClaudio Fontana     }
4250a9dc68d9SClaudio Fontana 
4251a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs);
4252a9dc68d9SClaudio Fontana     if (ret < 0) {
4253a9dc68d9SClaudio Fontana         return ret;
4254a9dc68d9SClaudio Fontana     }
4255a9dc68d9SClaudio Fontana 
4256a9dc68d9SClaudio Fontana     for (i = 0; i < xcrs.nr_xcrs; i++) {
4257a9dc68d9SClaudio Fontana         /* Only support xcr0 now */
4258a9dc68d9SClaudio Fontana         if (xcrs.xcrs[i].xcr == 0) {
4259a9dc68d9SClaudio Fontana             env->xcr0 = xcrs.xcrs[i].value;
4260a9dc68d9SClaudio Fontana             break;
4261a9dc68d9SClaudio Fontana         }
4262a9dc68d9SClaudio Fontana     }
4263a9dc68d9SClaudio Fontana     return 0;
4264a9dc68d9SClaudio Fontana }
4265a9dc68d9SClaudio Fontana 
kvm_get_sregs(X86CPU * cpu)4266a9dc68d9SClaudio Fontana static int kvm_get_sregs(X86CPU *cpu)
4267a9dc68d9SClaudio Fontana {
4268a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
4269a9dc68d9SClaudio Fontana     struct kvm_sregs sregs;
42701520f8bbSPaolo Bonzini     int ret;
4271a9dc68d9SClaudio Fontana 
4272a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
4273a9dc68d9SClaudio Fontana     if (ret < 0) {
4274a9dc68d9SClaudio Fontana         return ret;
4275a9dc68d9SClaudio Fontana     }
4276a9dc68d9SClaudio Fontana 
42771520f8bbSPaolo Bonzini     /*
42781520f8bbSPaolo Bonzini      * The interrupt_bitmap is ignored because KVM_GET_SREGS is
42791520f8bbSPaolo Bonzini      * always preceded by KVM_GET_VCPU_EVENTS.
42801520f8bbSPaolo Bonzini      */
4281a9dc68d9SClaudio Fontana 
4282a9dc68d9SClaudio Fontana     get_seg(&env->segs[R_CS], &sregs.cs);
4283a9dc68d9SClaudio Fontana     get_seg(&env->segs[R_DS], &sregs.ds);
4284a9dc68d9SClaudio Fontana     get_seg(&env->segs[R_ES], &sregs.es);
4285a9dc68d9SClaudio Fontana     get_seg(&env->segs[R_FS], &sregs.fs);
4286a9dc68d9SClaudio Fontana     get_seg(&env->segs[R_GS], &sregs.gs);
4287a9dc68d9SClaudio Fontana     get_seg(&env->segs[R_SS], &sregs.ss);
4288a9dc68d9SClaudio Fontana 
4289a9dc68d9SClaudio Fontana     get_seg(&env->tr, &sregs.tr);
4290a9dc68d9SClaudio Fontana     get_seg(&env->ldt, &sregs.ldt);
4291a9dc68d9SClaudio Fontana 
4292a9dc68d9SClaudio Fontana     env->idt.limit = sregs.idt.limit;
4293a9dc68d9SClaudio Fontana     env->idt.base = sregs.idt.base;
4294a9dc68d9SClaudio Fontana     env->gdt.limit = sregs.gdt.limit;
4295a9dc68d9SClaudio Fontana     env->gdt.base = sregs.gdt.base;
4296a9dc68d9SClaudio Fontana 
4297a9dc68d9SClaudio Fontana     env->cr[0] = sregs.cr0;
4298a9dc68d9SClaudio Fontana     env->cr[2] = sregs.cr2;
4299a9dc68d9SClaudio Fontana     env->cr[3] = sregs.cr3;
4300a9dc68d9SClaudio Fontana     env->cr[4] = sregs.cr4;
4301a9dc68d9SClaudio Fontana 
4302a9dc68d9SClaudio Fontana     env->efer = sregs.efer;
43035746f70dSMichael Roth     if (sev_es_enabled() && env->efer & MSR_EFER_LME &&
43045746f70dSMichael Roth         env->cr[0] & CR0_PG_MASK) {
43055746f70dSMichael Roth         env->efer |= MSR_EFER_LMA;
43065746f70dSMichael Roth     }
4307a9dc68d9SClaudio Fontana 
4308a9dc68d9SClaudio Fontana     /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */
4309a9dc68d9SClaudio Fontana     x86_update_hflags(env);
4310a9dc68d9SClaudio Fontana 
4311a9dc68d9SClaudio Fontana     return 0;
4312a9dc68d9SClaudio Fontana }
4313a9dc68d9SClaudio Fontana 
kvm_get_sregs2(X86CPU * cpu)43148f515d38SMaxim Levitsky static int kvm_get_sregs2(X86CPU *cpu)
43158f515d38SMaxim Levitsky {
43168f515d38SMaxim Levitsky     CPUX86State *env = &cpu->env;
43178f515d38SMaxim Levitsky     struct kvm_sregs2 sregs;
43188f515d38SMaxim Levitsky     int i, ret;
43198f515d38SMaxim Levitsky 
43208f515d38SMaxim Levitsky     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS2, &sregs);
43218f515d38SMaxim Levitsky     if (ret < 0) {
43228f515d38SMaxim Levitsky         return ret;
43238f515d38SMaxim Levitsky     }
43248f515d38SMaxim Levitsky 
43258f515d38SMaxim Levitsky     get_seg(&env->segs[R_CS], &sregs.cs);
43268f515d38SMaxim Levitsky     get_seg(&env->segs[R_DS], &sregs.ds);
43278f515d38SMaxim Levitsky     get_seg(&env->segs[R_ES], &sregs.es);
43288f515d38SMaxim Levitsky     get_seg(&env->segs[R_FS], &sregs.fs);
43298f515d38SMaxim Levitsky     get_seg(&env->segs[R_GS], &sregs.gs);
43308f515d38SMaxim Levitsky     get_seg(&env->segs[R_SS], &sregs.ss);
43318f515d38SMaxim Levitsky 
43328f515d38SMaxim Levitsky     get_seg(&env->tr, &sregs.tr);
43338f515d38SMaxim Levitsky     get_seg(&env->ldt, &sregs.ldt);
43348f515d38SMaxim Levitsky 
43358f515d38SMaxim Levitsky     env->idt.limit = sregs.idt.limit;
43368f515d38SMaxim Levitsky     env->idt.base = sregs.idt.base;
43378f515d38SMaxim Levitsky     env->gdt.limit = sregs.gdt.limit;
43388f515d38SMaxim Levitsky     env->gdt.base = sregs.gdt.base;
43398f515d38SMaxim Levitsky 
43408f515d38SMaxim Levitsky     env->cr[0] = sregs.cr0;
43418f515d38SMaxim Levitsky     env->cr[2] = sregs.cr2;
43428f515d38SMaxim Levitsky     env->cr[3] = sregs.cr3;
43438f515d38SMaxim Levitsky     env->cr[4] = sregs.cr4;
43448f515d38SMaxim Levitsky 
43458f515d38SMaxim Levitsky     env->efer = sregs.efer;
43465746f70dSMichael Roth     if (sev_es_enabled() && env->efer & MSR_EFER_LME &&
43475746f70dSMichael Roth         env->cr[0] & CR0_PG_MASK) {
43485746f70dSMichael Roth         env->efer |= MSR_EFER_LMA;
43495746f70dSMichael Roth     }
43508f515d38SMaxim Levitsky 
43518f515d38SMaxim Levitsky     env->pdptrs_valid = sregs.flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
43528f515d38SMaxim Levitsky 
43538f515d38SMaxim Levitsky     if (env->pdptrs_valid) {
43548f515d38SMaxim Levitsky         for (i = 0; i < 4; i++) {
43558f515d38SMaxim Levitsky             env->pdptrs[i] = sregs.pdptrs[i];
43568f515d38SMaxim Levitsky         }
43578f515d38SMaxim Levitsky     }
43588f515d38SMaxim Levitsky 
43598f515d38SMaxim Levitsky     /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */
43608f515d38SMaxim Levitsky     x86_update_hflags(env);
43618f515d38SMaxim Levitsky 
43628f515d38SMaxim Levitsky     return 0;
43638f515d38SMaxim Levitsky }
43648f515d38SMaxim Levitsky 
kvm_get_msrs(X86CPU * cpu)4365a9dc68d9SClaudio Fontana static int kvm_get_msrs(X86CPU *cpu)
4366a9dc68d9SClaudio Fontana {
4367a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
4368a9dc68d9SClaudio Fontana     struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
4369a9dc68d9SClaudio Fontana     int ret, i;
4370a9dc68d9SClaudio Fontana     uint64_t mtrr_top_bits;
4371a9dc68d9SClaudio Fontana 
4372a9dc68d9SClaudio Fontana     kvm_msr_buf_reset(cpu);
4373a9dc68d9SClaudio Fontana 
4374a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, 0);
4375a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, 0);
4376a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, 0);
4377a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_PAT, 0);
4378a9dc68d9SClaudio Fontana     if (has_msr_star) {
4379a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_STAR, 0);
4380a9dc68d9SClaudio Fontana     }
4381a9dc68d9SClaudio Fontana     if (has_msr_hsave_pa) {
4382a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, 0);
4383a9dc68d9SClaudio Fontana     }
4384a9dc68d9SClaudio Fontana     if (has_msr_tsc_aux) {
4385a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0);
4386a9dc68d9SClaudio Fontana     }
4387a9dc68d9SClaudio Fontana     if (has_msr_tsc_adjust) {
4388a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0);
4389a9dc68d9SClaudio Fontana     }
4390a9dc68d9SClaudio Fontana     if (has_msr_tsc_deadline) {
4391a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0);
4392a9dc68d9SClaudio Fontana     }
4393a9dc68d9SClaudio Fontana     if (has_msr_misc_enable) {
4394a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 0);
4395a9dc68d9SClaudio Fontana     }
4396a9dc68d9SClaudio Fontana     if (has_msr_smbase) {
4397a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, 0);
4398a9dc68d9SClaudio Fontana     }
4399a9dc68d9SClaudio Fontana     if (has_msr_smi_count) {
4400a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_SMI_COUNT, 0);
4401a9dc68d9SClaudio Fontana     }
4402a9dc68d9SClaudio Fontana     if (has_msr_feature_control) {
4403a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL, 0);
4404a9dc68d9SClaudio Fontana     }
44056aa4228bSChenyi Qiang     if (has_msr_pkrs) {
44066aa4228bSChenyi Qiang         kvm_msr_entry_add(cpu, MSR_IA32_PKRS, 0);
44076aa4228bSChenyi Qiang     }
4408a9dc68d9SClaudio Fontana     if (has_msr_bndcfgs) {
4409a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, 0);
4410a9dc68d9SClaudio Fontana     }
4411a9dc68d9SClaudio Fontana     if (has_msr_xss) {
4412a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_XSS, 0);
4413a9dc68d9SClaudio Fontana     }
4414a9dc68d9SClaudio Fontana     if (has_msr_umwait) {
4415a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, 0);
4416a9dc68d9SClaudio Fontana     }
4417a9dc68d9SClaudio Fontana     if (has_msr_spec_ctrl) {
4418a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0);
4419a9dc68d9SClaudio Fontana     }
4420cabf9862SMaxim Levitsky     if (has_tsc_scale_msr) {
4421cabf9862SMaxim Levitsky         kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, 0);
4422cabf9862SMaxim Levitsky     }
4423cabf9862SMaxim Levitsky 
4424a9dc68d9SClaudio Fontana     if (has_msr_tsx_ctrl) {
4425a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0);
4426a9dc68d9SClaudio Fontana     }
4427a9dc68d9SClaudio Fontana     if (has_msr_virt_ssbd) {
4428a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0);
4429a9dc68d9SClaudio Fontana     }
4430a9dc68d9SClaudio Fontana     if (!env->tsc_valid) {
4431a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0);
4432a9dc68d9SClaudio Fontana         env->tsc_valid = !runstate_is_running();
4433a9dc68d9SClaudio Fontana     }
4434b5151aceSGao Shiyuan     if (has_msr_hwcr) {
4435b5151aceSGao Shiyuan         kvm_msr_entry_add(cpu, MSR_K7_HWCR, 0);
4436b5151aceSGao Shiyuan     }
4437a9dc68d9SClaudio Fontana 
4438a9dc68d9SClaudio Fontana #ifdef TARGET_X86_64
4439a9dc68d9SClaudio Fontana     if (lm_capable_kernel) {
4440a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_CSTAR, 0);
4441a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
4442a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_FMASK, 0);
4443a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_LSTAR, 0);
44444ebd98ebSXin Li         if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
44454ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0);
44464ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0);
44474ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0);
44484ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0);
44494ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0);
44504ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0);
44514ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0);
44524ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0);
44534ebd98ebSXin Li             kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0);
44544ebd98ebSXin Li         }
4455a9dc68d9SClaudio Fontana     }
4456a9dc68d9SClaudio Fontana #endif
4457a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
4458a9dc68d9SClaudio Fontana     kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, 0);
4459a9dc68d9SClaudio Fontana     if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) {
4460a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, 0);
4461a9dc68d9SClaudio Fontana     }
4462a9dc68d9SClaudio Fontana     if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) {
4463a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, 0);
4464a9dc68d9SClaudio Fontana     }
4465a9dc68d9SClaudio Fontana     if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) {
4466a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, 0);
4467a9dc68d9SClaudio Fontana     }
4468a9dc68d9SClaudio Fontana     if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) {
4469a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0);
4470a9dc68d9SClaudio Fontana     }
4471a9dc68d9SClaudio Fontana     if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) {
4472a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1);
4473a9dc68d9SClaudio Fontana     }
4474a9dc68d9SClaudio Fontana     if (has_architectural_pmu_version > 0) {
4475a9dc68d9SClaudio Fontana         if (has_architectural_pmu_version > 1) {
4476a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
4477a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
4478a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0);
4479a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0);
4480a9dc68d9SClaudio Fontana         }
4481a9dc68d9SClaudio Fontana         for (i = 0; i < num_architectural_pmu_fixed_counters; i++) {
4482a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
4483a9dc68d9SClaudio Fontana         }
4484a9dc68d9SClaudio Fontana         for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
4485a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0);
4486a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0);
4487a9dc68d9SClaudio Fontana         }
4488a9dc68d9SClaudio Fontana     }
4489a9dc68d9SClaudio Fontana 
4490a9dc68d9SClaudio Fontana     if (env->mcg_cap) {
4491a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MCG_STATUS, 0);
4492a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MCG_CTL, 0);
4493a9dc68d9SClaudio Fontana         if (has_msr_mcg_ext_ctl) {
4494a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, 0);
4495a9dc68d9SClaudio Fontana         }
4496a9dc68d9SClaudio Fontana         for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
4497a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, 0);
4498a9dc68d9SClaudio Fontana         }
4499a9dc68d9SClaudio Fontana     }
4500a9dc68d9SClaudio Fontana 
4501a9dc68d9SClaudio Fontana     if (has_msr_hv_hypercall) {
4502a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 0);
4503a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 0);
4504a9dc68d9SClaudio Fontana     }
4505a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) {
4506a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 0);
4507a9dc68d9SClaudio Fontana     }
4508a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) {
4509a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0);
4510a9dc68d9SClaudio Fontana     }
4511a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) {
4512a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
4513a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
4514a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0);
4515a9dc68d9SClaudio Fontana     }
451673d24074SJon Doron     if (has_msr_hv_syndbg_options) {
451773d24074SJon Doron         kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS, 0);
451873d24074SJon Doron     }
4519a9dc68d9SClaudio Fontana     if (has_msr_hv_crash) {
4520a9dc68d9SClaudio Fontana         int j;
4521a9dc68d9SClaudio Fontana 
4522a9dc68d9SClaudio Fontana         for (j = 0; j < HV_CRASH_PARAMS; j++) {
4523a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, 0);
4524a9dc68d9SClaudio Fontana         }
4525a9dc68d9SClaudio Fontana     }
4526a9dc68d9SClaudio Fontana     if (has_msr_hv_runtime) {
4527a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, 0);
4528a9dc68d9SClaudio Fontana     }
4529a9dc68d9SClaudio Fontana     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
4530a9dc68d9SClaudio Fontana         uint32_t msr;
4531a9dc68d9SClaudio Fontana 
4532a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 0);
4533a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 0);
4534a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 0);
4535a9dc68d9SClaudio Fontana         for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) {
4536a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, msr, 0);
4537a9dc68d9SClaudio Fontana         }
4538a9dc68d9SClaudio Fontana     }
4539a9dc68d9SClaudio Fontana     if (has_msr_hv_stimer) {
4540a9dc68d9SClaudio Fontana         uint32_t msr;
4541a9dc68d9SClaudio Fontana 
4542a9dc68d9SClaudio Fontana         for (msr = HV_X64_MSR_STIMER0_CONFIG; msr <= HV_X64_MSR_STIMER3_COUNT;
4543a9dc68d9SClaudio Fontana              msr++) {
4544a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, msr, 0);
4545a9dc68d9SClaudio Fontana         }
4546a9dc68d9SClaudio Fontana     }
4547a9dc68d9SClaudio Fontana     if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
4548a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRdefType, 0);
4549a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0);
4550a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0);
4551a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, 0);
4552a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, 0);
4553a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, 0);
4554a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, 0);
4555a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, 0);
4556a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, 0);
4557a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, 0);
4558a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, 0);
4559a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, 0);
4560a9dc68d9SClaudio Fontana         for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
4561a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), 0);
4562a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), 0);
4563a9dc68d9SClaudio Fontana         }
4564a9dc68d9SClaudio Fontana     }
4565a9dc68d9SClaudio Fontana 
4566a9dc68d9SClaudio Fontana     if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
4567a9dc68d9SClaudio Fontana         int addr_num =
4568a9dc68d9SClaudio Fontana             kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7;
4569a9dc68d9SClaudio Fontana 
4570a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0);
4571a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0);
4572a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0);
4573a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0);
4574a9dc68d9SClaudio Fontana         kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0);
4575a9dc68d9SClaudio Fontana         for (i = 0; i < addr_num; i++) {
4576a9dc68d9SClaudio Fontana             kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0);
4577a9dc68d9SClaudio Fontana         }
4578a9dc68d9SClaudio Fontana     }
4579a9dc68d9SClaudio Fontana 
4580db888065SSean Christopherson     if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) {
4581db888065SSean Christopherson         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, 0);
4582db888065SSean Christopherson         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, 0);
4583db888065SSean Christopherson         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, 0);
4584db888065SSean Christopherson         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
4585db888065SSean Christopherson     }
4586db888065SSean Christopherson 
4587cdec2b75SZeng Guang     if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
4588cdec2b75SZeng Guang         kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
4589cdec2b75SZeng Guang         kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
4590cdec2b75SZeng Guang     }
4591cdec2b75SZeng Guang 
459212703d4eSYang Weijiang     if (kvm_enabled() && cpu->enable_pmu &&
459312703d4eSYang Weijiang         (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
45943a7a27cfSYang Weijiang         uint64_t depth;
459512703d4eSYang Weijiang 
45963a7a27cfSYang Weijiang         ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
45973a7a27cfSYang Weijiang         if (ret == 1 && depth == ARCH_LBR_NR_ENTRIES) {
459812703d4eSYang Weijiang             kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, 0);
459912703d4eSYang Weijiang             kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, 0);
460012703d4eSYang Weijiang 
460112703d4eSYang Weijiang             for (i = 0; i < ARCH_LBR_NR_ENTRIES; i++) {
460212703d4eSYang Weijiang                 kvm_msr_entry_add(cpu, MSR_ARCH_LBR_FROM_0 + i, 0);
460312703d4eSYang Weijiang                 kvm_msr_entry_add(cpu, MSR_ARCH_LBR_TO_0 + i, 0);
460412703d4eSYang Weijiang                 kvm_msr_entry_add(cpu, MSR_ARCH_LBR_INFO_0 + i, 0);
460512703d4eSYang Weijiang             }
460612703d4eSYang Weijiang         }
460712703d4eSYang Weijiang     }
460812703d4eSYang Weijiang 
4609a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
4610a9dc68d9SClaudio Fontana     if (ret < 0) {
4611a9dc68d9SClaudio Fontana         return ret;
4612a9dc68d9SClaudio Fontana     }
4613a9dc68d9SClaudio Fontana 
4614a9dc68d9SClaudio Fontana     if (ret < cpu->kvm_msr_buf->nmsrs) {
4615a9dc68d9SClaudio Fontana         struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
4616a9dc68d9SClaudio Fontana         error_report("error: failed to get MSR 0x%" PRIx32,
4617a9dc68d9SClaudio Fontana                      (uint32_t)e->index);
4618a9dc68d9SClaudio Fontana     }
4619a9dc68d9SClaudio Fontana 
4620a9dc68d9SClaudio Fontana     assert(ret == cpu->kvm_msr_buf->nmsrs);
4621a9dc68d9SClaudio Fontana     /*
4622a9dc68d9SClaudio Fontana      * MTRR masks: Each mask consists of 5 parts
4623a9dc68d9SClaudio Fontana      * a  10..0: must be zero
4624a9dc68d9SClaudio Fontana      * b  11   : valid bit
4625a9dc68d9SClaudio Fontana      * c n-1.12: actual mask bits
4626a9dc68d9SClaudio Fontana      * d  51..n: reserved must be zero
4627a9dc68d9SClaudio Fontana      * e  63.52: reserved must be zero
4628a9dc68d9SClaudio Fontana      *
4629a9dc68d9SClaudio Fontana      * 'n' is the number of physical bits supported by the CPU and is
4630a9dc68d9SClaudio Fontana      * apparently always <= 52.   We know our 'n' but don't know what
4631a9dc68d9SClaudio Fontana      * the destinations 'n' is; it might be smaller, in which case
4632a9dc68d9SClaudio Fontana      * it masks (c) on loading. It might be larger, in which case
4633a9dc68d9SClaudio Fontana      * we fill 'd' so that d..c is consistent irrespetive of the 'n'
4634a9dc68d9SClaudio Fontana      * we're migrating to.
4635a9dc68d9SClaudio Fontana      */
4636a9dc68d9SClaudio Fontana 
4637a9dc68d9SClaudio Fontana     if (cpu->fill_mtrr_mask) {
4638a9dc68d9SClaudio Fontana         QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52);
4639a9dc68d9SClaudio Fontana         assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS);
4640a9dc68d9SClaudio Fontana         mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
4641a9dc68d9SClaudio Fontana     } else {
4642a9dc68d9SClaudio Fontana         mtrr_top_bits = 0;
4643a9dc68d9SClaudio Fontana     }
4644a9dc68d9SClaudio Fontana 
4645a9dc68d9SClaudio Fontana     for (i = 0; i < ret; i++) {
4646a9dc68d9SClaudio Fontana         uint32_t index = msrs[i].index;
4647a9dc68d9SClaudio Fontana         switch (index) {
4648a9dc68d9SClaudio Fontana         case MSR_IA32_SYSENTER_CS:
4649a9dc68d9SClaudio Fontana             env->sysenter_cs = msrs[i].data;
4650a9dc68d9SClaudio Fontana             break;
4651a9dc68d9SClaudio Fontana         case MSR_IA32_SYSENTER_ESP:
4652a9dc68d9SClaudio Fontana             env->sysenter_esp = msrs[i].data;
4653a9dc68d9SClaudio Fontana             break;
4654a9dc68d9SClaudio Fontana         case MSR_IA32_SYSENTER_EIP:
4655a9dc68d9SClaudio Fontana             env->sysenter_eip = msrs[i].data;
4656a9dc68d9SClaudio Fontana             break;
4657a9dc68d9SClaudio Fontana         case MSR_PAT:
4658a9dc68d9SClaudio Fontana             env->pat = msrs[i].data;
4659a9dc68d9SClaudio Fontana             break;
4660a9dc68d9SClaudio Fontana         case MSR_STAR:
4661a9dc68d9SClaudio Fontana             env->star = msrs[i].data;
4662a9dc68d9SClaudio Fontana             break;
4663a9dc68d9SClaudio Fontana #ifdef TARGET_X86_64
4664a9dc68d9SClaudio Fontana         case MSR_CSTAR:
4665a9dc68d9SClaudio Fontana             env->cstar = msrs[i].data;
4666a9dc68d9SClaudio Fontana             break;
4667a9dc68d9SClaudio Fontana         case MSR_KERNELGSBASE:
4668a9dc68d9SClaudio Fontana             env->kernelgsbase = msrs[i].data;
4669a9dc68d9SClaudio Fontana             break;
4670a9dc68d9SClaudio Fontana         case MSR_FMASK:
4671a9dc68d9SClaudio Fontana             env->fmask = msrs[i].data;
4672a9dc68d9SClaudio Fontana             break;
4673a9dc68d9SClaudio Fontana         case MSR_LSTAR:
4674a9dc68d9SClaudio Fontana             env->lstar = msrs[i].data;
4675a9dc68d9SClaudio Fontana             break;
46764ebd98ebSXin Li         case MSR_IA32_FRED_RSP0:
46774ebd98ebSXin Li             env->fred_rsp0 = msrs[i].data;
46784ebd98ebSXin Li             break;
46794ebd98ebSXin Li         case MSR_IA32_FRED_RSP1:
46804ebd98ebSXin Li             env->fred_rsp1 = msrs[i].data;
46814ebd98ebSXin Li             break;
46824ebd98ebSXin Li         case MSR_IA32_FRED_RSP2:
46834ebd98ebSXin Li             env->fred_rsp2 = msrs[i].data;
46844ebd98ebSXin Li             break;
46854ebd98ebSXin Li         case MSR_IA32_FRED_RSP3:
46864ebd98ebSXin Li             env->fred_rsp3 = msrs[i].data;
46874ebd98ebSXin Li             break;
46884ebd98ebSXin Li         case MSR_IA32_FRED_STKLVLS:
46894ebd98ebSXin Li             env->fred_stklvls = msrs[i].data;
46904ebd98ebSXin Li             break;
46914ebd98ebSXin Li         case MSR_IA32_FRED_SSP1:
46924ebd98ebSXin Li             env->fred_ssp1 = msrs[i].data;
46934ebd98ebSXin Li             break;
46944ebd98ebSXin Li         case MSR_IA32_FRED_SSP2:
46954ebd98ebSXin Li             env->fred_ssp2 = msrs[i].data;
46964ebd98ebSXin Li             break;
46974ebd98ebSXin Li         case MSR_IA32_FRED_SSP3:
46984ebd98ebSXin Li             env->fred_ssp3 = msrs[i].data;
46994ebd98ebSXin Li             break;
47004ebd98ebSXin Li         case MSR_IA32_FRED_CONFIG:
47014ebd98ebSXin Li             env->fred_config = msrs[i].data;
47024ebd98ebSXin Li             break;
4703a9dc68d9SClaudio Fontana #endif
4704a9dc68d9SClaudio Fontana         case MSR_IA32_TSC:
4705a9dc68d9SClaudio Fontana             env->tsc = msrs[i].data;
4706a9dc68d9SClaudio Fontana             break;
4707a9dc68d9SClaudio Fontana         case MSR_TSC_AUX:
4708a9dc68d9SClaudio Fontana             env->tsc_aux = msrs[i].data;
4709a9dc68d9SClaudio Fontana             break;
4710a9dc68d9SClaudio Fontana         case MSR_TSC_ADJUST:
4711a9dc68d9SClaudio Fontana             env->tsc_adjust = msrs[i].data;
4712a9dc68d9SClaudio Fontana             break;
4713a9dc68d9SClaudio Fontana         case MSR_IA32_TSCDEADLINE:
4714a9dc68d9SClaudio Fontana             env->tsc_deadline = msrs[i].data;
4715a9dc68d9SClaudio Fontana             break;
4716a9dc68d9SClaudio Fontana         case MSR_VM_HSAVE_PA:
4717a9dc68d9SClaudio Fontana             env->vm_hsave = msrs[i].data;
4718a9dc68d9SClaudio Fontana             break;
4719a9dc68d9SClaudio Fontana         case MSR_KVM_SYSTEM_TIME:
4720a9dc68d9SClaudio Fontana             env->system_time_msr = msrs[i].data;
4721a9dc68d9SClaudio Fontana             break;
4722a9dc68d9SClaudio Fontana         case MSR_KVM_WALL_CLOCK:
4723a9dc68d9SClaudio Fontana             env->wall_clock_msr = msrs[i].data;
4724a9dc68d9SClaudio Fontana             break;
4725a9dc68d9SClaudio Fontana         case MSR_MCG_STATUS:
4726a9dc68d9SClaudio Fontana             env->mcg_status = msrs[i].data;
4727a9dc68d9SClaudio Fontana             break;
4728a9dc68d9SClaudio Fontana         case MSR_MCG_CTL:
4729a9dc68d9SClaudio Fontana             env->mcg_ctl = msrs[i].data;
4730a9dc68d9SClaudio Fontana             break;
4731a9dc68d9SClaudio Fontana         case MSR_MCG_EXT_CTL:
4732a9dc68d9SClaudio Fontana             env->mcg_ext_ctl = msrs[i].data;
4733a9dc68d9SClaudio Fontana             break;
4734a9dc68d9SClaudio Fontana         case MSR_IA32_MISC_ENABLE:
4735a9dc68d9SClaudio Fontana             env->msr_ia32_misc_enable = msrs[i].data;
4736a9dc68d9SClaudio Fontana             break;
4737a9dc68d9SClaudio Fontana         case MSR_IA32_SMBASE:
4738a9dc68d9SClaudio Fontana             env->smbase = msrs[i].data;
4739a9dc68d9SClaudio Fontana             break;
4740a9dc68d9SClaudio Fontana         case MSR_SMI_COUNT:
4741a9dc68d9SClaudio Fontana             env->msr_smi_count = msrs[i].data;
4742a9dc68d9SClaudio Fontana             break;
4743a9dc68d9SClaudio Fontana         case MSR_IA32_FEATURE_CONTROL:
4744a9dc68d9SClaudio Fontana             env->msr_ia32_feature_control = msrs[i].data;
4745a9dc68d9SClaudio Fontana             break;
4746a9dc68d9SClaudio Fontana         case MSR_IA32_BNDCFGS:
4747a9dc68d9SClaudio Fontana             env->msr_bndcfgs = msrs[i].data;
4748a9dc68d9SClaudio Fontana             break;
4749a9dc68d9SClaudio Fontana         case MSR_IA32_XSS:
4750a9dc68d9SClaudio Fontana             env->xss = msrs[i].data;
4751a9dc68d9SClaudio Fontana             break;
4752a9dc68d9SClaudio Fontana         case MSR_IA32_UMWAIT_CONTROL:
4753a9dc68d9SClaudio Fontana             env->umwait = msrs[i].data;
4754a9dc68d9SClaudio Fontana             break;
47556aa4228bSChenyi Qiang         case MSR_IA32_PKRS:
47566aa4228bSChenyi Qiang             env->pkrs = msrs[i].data;
47576aa4228bSChenyi Qiang             break;
4758a9dc68d9SClaudio Fontana         default:
4759a9dc68d9SClaudio Fontana             if (msrs[i].index >= MSR_MC0_CTL &&
4760a9dc68d9SClaudio Fontana                 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
4761a9dc68d9SClaudio Fontana                 env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
4762a9dc68d9SClaudio Fontana             }
4763a9dc68d9SClaudio Fontana             break;
4764a9dc68d9SClaudio Fontana         case MSR_KVM_ASYNC_PF_EN:
4765a9dc68d9SClaudio Fontana             env->async_pf_en_msr = msrs[i].data;
4766a9dc68d9SClaudio Fontana             break;
4767a9dc68d9SClaudio Fontana         case MSR_KVM_ASYNC_PF_INT:
4768a9dc68d9SClaudio Fontana             env->async_pf_int_msr = msrs[i].data;
4769a9dc68d9SClaudio Fontana             break;
4770a9dc68d9SClaudio Fontana         case MSR_KVM_PV_EOI_EN:
4771a9dc68d9SClaudio Fontana             env->pv_eoi_en_msr = msrs[i].data;
4772a9dc68d9SClaudio Fontana             break;
4773a9dc68d9SClaudio Fontana         case MSR_KVM_STEAL_TIME:
4774a9dc68d9SClaudio Fontana             env->steal_time_msr = msrs[i].data;
4775a9dc68d9SClaudio Fontana             break;
4776a9dc68d9SClaudio Fontana         case MSR_KVM_POLL_CONTROL: {
4777a9dc68d9SClaudio Fontana             env->poll_control_msr = msrs[i].data;
4778a9dc68d9SClaudio Fontana             break;
4779a9dc68d9SClaudio Fontana         }
4780a9dc68d9SClaudio Fontana         case MSR_CORE_PERF_FIXED_CTR_CTRL:
4781a9dc68d9SClaudio Fontana             env->msr_fixed_ctr_ctrl = msrs[i].data;
4782a9dc68d9SClaudio Fontana             break;
4783a9dc68d9SClaudio Fontana         case MSR_CORE_PERF_GLOBAL_CTRL:
4784a9dc68d9SClaudio Fontana             env->msr_global_ctrl = msrs[i].data;
4785a9dc68d9SClaudio Fontana             break;
4786a9dc68d9SClaudio Fontana         case MSR_CORE_PERF_GLOBAL_STATUS:
4787a9dc68d9SClaudio Fontana             env->msr_global_status = msrs[i].data;
4788a9dc68d9SClaudio Fontana             break;
4789a9dc68d9SClaudio Fontana         case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
4790a9dc68d9SClaudio Fontana             env->msr_global_ovf_ctrl = msrs[i].data;
4791a9dc68d9SClaudio Fontana             break;
4792a9dc68d9SClaudio Fontana         case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
4793a9dc68d9SClaudio Fontana             env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data;
4794a9dc68d9SClaudio Fontana             break;
4795a9dc68d9SClaudio Fontana         case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
4796a9dc68d9SClaudio Fontana             env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
4797a9dc68d9SClaudio Fontana             break;
4798a9dc68d9SClaudio Fontana         case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
4799a9dc68d9SClaudio Fontana             env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
4800a9dc68d9SClaudio Fontana             break;
4801a9dc68d9SClaudio Fontana         case HV_X64_MSR_HYPERCALL:
4802a9dc68d9SClaudio Fontana             env->msr_hv_hypercall = msrs[i].data;
4803a9dc68d9SClaudio Fontana             break;
4804a9dc68d9SClaudio Fontana         case HV_X64_MSR_GUEST_OS_ID:
4805a9dc68d9SClaudio Fontana             env->msr_hv_guest_os_id = msrs[i].data;
4806a9dc68d9SClaudio Fontana             break;
4807a9dc68d9SClaudio Fontana         case HV_X64_MSR_APIC_ASSIST_PAGE:
4808a9dc68d9SClaudio Fontana             env->msr_hv_vapic = msrs[i].data;
4809a9dc68d9SClaudio Fontana             break;
4810a9dc68d9SClaudio Fontana         case HV_X64_MSR_REFERENCE_TSC:
4811a9dc68d9SClaudio Fontana             env->msr_hv_tsc = msrs[i].data;
4812a9dc68d9SClaudio Fontana             break;
4813a9dc68d9SClaudio Fontana         case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
4814a9dc68d9SClaudio Fontana             env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data;
4815a9dc68d9SClaudio Fontana             break;
4816a9dc68d9SClaudio Fontana         case HV_X64_MSR_VP_RUNTIME:
4817a9dc68d9SClaudio Fontana             env->msr_hv_runtime = msrs[i].data;
4818a9dc68d9SClaudio Fontana             break;
4819a9dc68d9SClaudio Fontana         case HV_X64_MSR_SCONTROL:
4820a9dc68d9SClaudio Fontana             env->msr_hv_synic_control = msrs[i].data;
4821a9dc68d9SClaudio Fontana             break;
4822a9dc68d9SClaudio Fontana         case HV_X64_MSR_SIEFP:
4823a9dc68d9SClaudio Fontana             env->msr_hv_synic_evt_page = msrs[i].data;
4824a9dc68d9SClaudio Fontana             break;
4825a9dc68d9SClaudio Fontana         case HV_X64_MSR_SIMP:
4826a9dc68d9SClaudio Fontana             env->msr_hv_synic_msg_page = msrs[i].data;
4827a9dc68d9SClaudio Fontana             break;
4828a9dc68d9SClaudio Fontana         case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
4829a9dc68d9SClaudio Fontana             env->msr_hv_synic_sint[index - HV_X64_MSR_SINT0] = msrs[i].data;
4830a9dc68d9SClaudio Fontana             break;
4831a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER0_CONFIG:
4832a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER1_CONFIG:
4833a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER2_CONFIG:
4834a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER3_CONFIG:
4835a9dc68d9SClaudio Fontana             env->msr_hv_stimer_config[(index - HV_X64_MSR_STIMER0_CONFIG)/2] =
4836a9dc68d9SClaudio Fontana                                 msrs[i].data;
4837a9dc68d9SClaudio Fontana             break;
4838a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER0_COUNT:
4839a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER1_COUNT:
4840a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER2_COUNT:
4841a9dc68d9SClaudio Fontana         case HV_X64_MSR_STIMER3_COUNT:
4842a9dc68d9SClaudio Fontana             env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] =
4843a9dc68d9SClaudio Fontana                                 msrs[i].data;
4844a9dc68d9SClaudio Fontana             break;
4845a9dc68d9SClaudio Fontana         case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
4846a9dc68d9SClaudio Fontana             env->msr_hv_reenlightenment_control = msrs[i].data;
4847a9dc68d9SClaudio Fontana             break;
4848a9dc68d9SClaudio Fontana         case HV_X64_MSR_TSC_EMULATION_CONTROL:
4849a9dc68d9SClaudio Fontana             env->msr_hv_tsc_emulation_control = msrs[i].data;
4850a9dc68d9SClaudio Fontana             break;
4851a9dc68d9SClaudio Fontana         case HV_X64_MSR_TSC_EMULATION_STATUS:
4852a9dc68d9SClaudio Fontana             env->msr_hv_tsc_emulation_status = msrs[i].data;
4853a9dc68d9SClaudio Fontana             break;
485473d24074SJon Doron         case HV_X64_MSR_SYNDBG_OPTIONS:
485573d24074SJon Doron             env->msr_hv_syndbg_options = msrs[i].data;
485673d24074SJon Doron             break;
4857a9dc68d9SClaudio Fontana         case MSR_MTRRdefType:
4858a9dc68d9SClaudio Fontana             env->mtrr_deftype = msrs[i].data;
4859a9dc68d9SClaudio Fontana             break;
4860a9dc68d9SClaudio Fontana         case MSR_MTRRfix64K_00000:
4861a9dc68d9SClaudio Fontana             env->mtrr_fixed[0] = msrs[i].data;
4862a9dc68d9SClaudio Fontana             break;
4863a9dc68d9SClaudio Fontana         case MSR_MTRRfix16K_80000:
4864a9dc68d9SClaudio Fontana             env->mtrr_fixed[1] = msrs[i].data;
4865a9dc68d9SClaudio Fontana             break;
4866a9dc68d9SClaudio Fontana         case MSR_MTRRfix16K_A0000:
4867a9dc68d9SClaudio Fontana             env->mtrr_fixed[2] = msrs[i].data;
4868a9dc68d9SClaudio Fontana             break;
4869a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_C0000:
4870a9dc68d9SClaudio Fontana             env->mtrr_fixed[3] = msrs[i].data;
4871a9dc68d9SClaudio Fontana             break;
4872a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_C8000:
4873a9dc68d9SClaudio Fontana             env->mtrr_fixed[4] = msrs[i].data;
4874a9dc68d9SClaudio Fontana             break;
4875a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_D0000:
4876a9dc68d9SClaudio Fontana             env->mtrr_fixed[5] = msrs[i].data;
4877a9dc68d9SClaudio Fontana             break;
4878a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_D8000:
4879a9dc68d9SClaudio Fontana             env->mtrr_fixed[6] = msrs[i].data;
4880a9dc68d9SClaudio Fontana             break;
4881a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_E0000:
4882a9dc68d9SClaudio Fontana             env->mtrr_fixed[7] = msrs[i].data;
4883a9dc68d9SClaudio Fontana             break;
4884a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_E8000:
4885a9dc68d9SClaudio Fontana             env->mtrr_fixed[8] = msrs[i].data;
4886a9dc68d9SClaudio Fontana             break;
4887a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_F0000:
4888a9dc68d9SClaudio Fontana             env->mtrr_fixed[9] = msrs[i].data;
4889a9dc68d9SClaudio Fontana             break;
4890a9dc68d9SClaudio Fontana         case MSR_MTRRfix4K_F8000:
4891a9dc68d9SClaudio Fontana             env->mtrr_fixed[10] = msrs[i].data;
4892a9dc68d9SClaudio Fontana             break;
4893a9dc68d9SClaudio Fontana         case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
4894a9dc68d9SClaudio Fontana             if (index & 1) {
4895a9dc68d9SClaudio Fontana                 env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
4896a9dc68d9SClaudio Fontana                                                                mtrr_top_bits;
4897a9dc68d9SClaudio Fontana             } else {
4898a9dc68d9SClaudio Fontana                 env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
4899a9dc68d9SClaudio Fontana             }
4900a9dc68d9SClaudio Fontana             break;
4901a9dc68d9SClaudio Fontana         case MSR_IA32_SPEC_CTRL:
4902a9dc68d9SClaudio Fontana             env->spec_ctrl = msrs[i].data;
4903a9dc68d9SClaudio Fontana             break;
4904cabf9862SMaxim Levitsky         case MSR_AMD64_TSC_RATIO:
4905cabf9862SMaxim Levitsky             env->amd_tsc_scale_msr = msrs[i].data;
4906cabf9862SMaxim Levitsky             break;
4907a9dc68d9SClaudio Fontana         case MSR_IA32_TSX_CTRL:
4908a9dc68d9SClaudio Fontana             env->tsx_ctrl = msrs[i].data;
4909a9dc68d9SClaudio Fontana             break;
4910a9dc68d9SClaudio Fontana         case MSR_VIRT_SSBD:
4911a9dc68d9SClaudio Fontana             env->virt_ssbd = msrs[i].data;
4912a9dc68d9SClaudio Fontana             break;
4913a9dc68d9SClaudio Fontana         case MSR_IA32_RTIT_CTL:
4914a9dc68d9SClaudio Fontana             env->msr_rtit_ctrl = msrs[i].data;
4915a9dc68d9SClaudio Fontana             break;
4916a9dc68d9SClaudio Fontana         case MSR_IA32_RTIT_STATUS:
4917a9dc68d9SClaudio Fontana             env->msr_rtit_status = msrs[i].data;
4918a9dc68d9SClaudio Fontana             break;
4919a9dc68d9SClaudio Fontana         case MSR_IA32_RTIT_OUTPUT_BASE:
4920a9dc68d9SClaudio Fontana             env->msr_rtit_output_base = msrs[i].data;
4921a9dc68d9SClaudio Fontana             break;
4922a9dc68d9SClaudio Fontana         case MSR_IA32_RTIT_OUTPUT_MASK:
4923a9dc68d9SClaudio Fontana             env->msr_rtit_output_mask = msrs[i].data;
4924a9dc68d9SClaudio Fontana             break;
4925a9dc68d9SClaudio Fontana         case MSR_IA32_RTIT_CR3_MATCH:
4926a9dc68d9SClaudio Fontana             env->msr_rtit_cr3_match = msrs[i].data;
4927a9dc68d9SClaudio Fontana             break;
4928a9dc68d9SClaudio Fontana         case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
4929a9dc68d9SClaudio Fontana             env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data;
4930a9dc68d9SClaudio Fontana             break;
4931db888065SSean Christopherson         case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
4932db888065SSean Christopherson             env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
4933db888065SSean Christopherson                            msrs[i].data;
4934db888065SSean Christopherson             break;
4935cdec2b75SZeng Guang         case MSR_IA32_XFD:
4936cdec2b75SZeng Guang             env->msr_xfd = msrs[i].data;
4937cdec2b75SZeng Guang             break;
4938cdec2b75SZeng Guang         case MSR_IA32_XFD_ERR:
4939cdec2b75SZeng Guang             env->msr_xfd_err = msrs[i].data;
4940cdec2b75SZeng Guang             break;
494112703d4eSYang Weijiang         case MSR_ARCH_LBR_CTL:
494212703d4eSYang Weijiang             env->msr_lbr_ctl = msrs[i].data;
494312703d4eSYang Weijiang             break;
494412703d4eSYang Weijiang         case MSR_ARCH_LBR_DEPTH:
494512703d4eSYang Weijiang             env->msr_lbr_depth = msrs[i].data;
494612703d4eSYang Weijiang             break;
494712703d4eSYang Weijiang         case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
494812703d4eSYang Weijiang             env->lbr_records[index - MSR_ARCH_LBR_FROM_0].from = msrs[i].data;
494912703d4eSYang Weijiang             break;
495012703d4eSYang Weijiang         case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
495112703d4eSYang Weijiang             env->lbr_records[index - MSR_ARCH_LBR_TO_0].to = msrs[i].data;
495212703d4eSYang Weijiang             break;
495312703d4eSYang Weijiang         case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
495412703d4eSYang Weijiang             env->lbr_records[index - MSR_ARCH_LBR_INFO_0].info = msrs[i].data;
495512703d4eSYang Weijiang             break;
4956b5151aceSGao Shiyuan         case MSR_K7_HWCR:
4957b5151aceSGao Shiyuan             env->msr_hwcr = msrs[i].data;
4958b5151aceSGao Shiyuan             break;
4959a9dc68d9SClaudio Fontana         }
4960a9dc68d9SClaudio Fontana     }
4961a9dc68d9SClaudio Fontana 
4962a9dc68d9SClaudio Fontana     return 0;
4963a9dc68d9SClaudio Fontana }
4964a9dc68d9SClaudio Fontana 
kvm_put_mp_state(X86CPU * cpu)4965a9dc68d9SClaudio Fontana static int kvm_put_mp_state(X86CPU *cpu)
4966a9dc68d9SClaudio Fontana {
4967a9dc68d9SClaudio Fontana     struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state };
4968a9dc68d9SClaudio Fontana 
4969a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
4970a9dc68d9SClaudio Fontana }
4971a9dc68d9SClaudio Fontana 
kvm_get_mp_state(X86CPU * cpu)4972a9dc68d9SClaudio Fontana static int kvm_get_mp_state(X86CPU *cpu)
4973a9dc68d9SClaudio Fontana {
4974a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
4975a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
4976a9dc68d9SClaudio Fontana     struct kvm_mp_state mp_state;
4977a9dc68d9SClaudio Fontana     int ret;
4978a9dc68d9SClaudio Fontana 
4979a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state);
4980a9dc68d9SClaudio Fontana     if (ret < 0) {
4981a9dc68d9SClaudio Fontana         return ret;
4982a9dc68d9SClaudio Fontana     }
4983a9dc68d9SClaudio Fontana     env->mp_state = mp_state.mp_state;
4984a9dc68d9SClaudio Fontana     if (kvm_irqchip_in_kernel()) {
4985a9dc68d9SClaudio Fontana         cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED);
4986a9dc68d9SClaudio Fontana     }
4987a9dc68d9SClaudio Fontana     return 0;
4988a9dc68d9SClaudio Fontana }
4989a9dc68d9SClaudio Fontana 
kvm_get_apic(X86CPU * cpu)4990a9dc68d9SClaudio Fontana static int kvm_get_apic(X86CPU *cpu)
4991a9dc68d9SClaudio Fontana {
4992a9dc68d9SClaudio Fontana     DeviceState *apic = cpu->apic_state;
4993a9dc68d9SClaudio Fontana     struct kvm_lapic_state kapic;
4994a9dc68d9SClaudio Fontana     int ret;
4995a9dc68d9SClaudio Fontana 
4996a9dc68d9SClaudio Fontana     if (apic && kvm_irqchip_in_kernel()) {
4997a9dc68d9SClaudio Fontana         ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic);
4998a9dc68d9SClaudio Fontana         if (ret < 0) {
4999a9dc68d9SClaudio Fontana             return ret;
5000a9dc68d9SClaudio Fontana         }
5001a9dc68d9SClaudio Fontana 
5002a9dc68d9SClaudio Fontana         kvm_get_apic_state(apic, &kapic);
5003a9dc68d9SClaudio Fontana     }
5004a9dc68d9SClaudio Fontana     return 0;
5005a9dc68d9SClaudio Fontana }
5006a9dc68d9SClaudio Fontana 
kvm_put_vcpu_events(X86CPU * cpu,int level)5007a9dc68d9SClaudio Fontana static int kvm_put_vcpu_events(X86CPU *cpu, int level)
5008a9dc68d9SClaudio Fontana {
5009a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
5010a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5011a9dc68d9SClaudio Fontana     struct kvm_vcpu_events events = {};
5012a9dc68d9SClaudio Fontana 
5013a9dc68d9SClaudio Fontana     events.flags = 0;
5014a9dc68d9SClaudio Fontana 
5015a9dc68d9SClaudio Fontana     if (has_exception_payload) {
5016a9dc68d9SClaudio Fontana         events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
5017a9dc68d9SClaudio Fontana         events.exception.pending = env->exception_pending;
5018a9dc68d9SClaudio Fontana         events.exception_has_payload = env->exception_has_payload;
5019a9dc68d9SClaudio Fontana         events.exception_payload = env->exception_payload;
5020a9dc68d9SClaudio Fontana     }
5021a9dc68d9SClaudio Fontana     events.exception.nr = env->exception_nr;
5022a9dc68d9SClaudio Fontana     events.exception.injected = env->exception_injected;
5023a9dc68d9SClaudio Fontana     events.exception.has_error_code = env->has_error_code;
5024a9dc68d9SClaudio Fontana     events.exception.error_code = env->error_code;
5025a9dc68d9SClaudio Fontana 
5026a9dc68d9SClaudio Fontana     events.interrupt.injected = (env->interrupt_injected >= 0);
5027a9dc68d9SClaudio Fontana     events.interrupt.nr = env->interrupt_injected;
5028a9dc68d9SClaudio Fontana     events.interrupt.soft = env->soft_interrupt;
5029a9dc68d9SClaudio Fontana 
5030a9dc68d9SClaudio Fontana     events.nmi.injected = env->nmi_injected;
5031a9dc68d9SClaudio Fontana     events.nmi.pending = env->nmi_pending;
5032a9dc68d9SClaudio Fontana     events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
5033a9dc68d9SClaudio Fontana 
5034a9dc68d9SClaudio Fontana     events.sipi_vector = env->sipi_vector;
5035a9dc68d9SClaudio Fontana 
5036a9dc68d9SClaudio Fontana     if (has_msr_smbase) {
503782912391SPhilippe Mathieu-Daudé         events.flags |= KVM_VCPUEVENT_VALID_SMM;
5038a9dc68d9SClaudio Fontana         events.smi.smm = !!(env->hflags & HF_SMM_MASK);
5039a9dc68d9SClaudio Fontana         events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK);
5040a9dc68d9SClaudio Fontana         if (kvm_irqchip_in_kernel()) {
5041a9dc68d9SClaudio Fontana             /* As soon as these are moved to the kernel, remove them
5042a9dc68d9SClaudio Fontana              * from cs->interrupt_request.
5043a9dc68d9SClaudio Fontana              */
5044a9dc68d9SClaudio Fontana             events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI;
5045a9dc68d9SClaudio Fontana             events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT;
5046a9dc68d9SClaudio Fontana             cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI);
5047a9dc68d9SClaudio Fontana         } else {
5048a9dc68d9SClaudio Fontana             /* Keep these in cs->interrupt_request.  */
5049a9dc68d9SClaudio Fontana             events.smi.pending = 0;
5050a9dc68d9SClaudio Fontana             events.smi.latched_init = 0;
5051a9dc68d9SClaudio Fontana         }
5052a9dc68d9SClaudio Fontana     }
5053a9dc68d9SClaudio Fontana 
5054a9dc68d9SClaudio Fontana     if (level >= KVM_PUT_RESET_STATE) {
5055a9dc68d9SClaudio Fontana         events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
5056a9dc68d9SClaudio Fontana         if (env->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
5057a9dc68d9SClaudio Fontana             events.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR;
5058a9dc68d9SClaudio Fontana         }
5059a9dc68d9SClaudio Fontana     }
5060a9dc68d9SClaudio Fontana 
506112f89a39SChenyi Qiang     if (has_triple_fault_event) {
506212f89a39SChenyi Qiang         events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
506312f89a39SChenyi Qiang         events.triple_fault.pending = env->triple_fault_pending;
506412f89a39SChenyi Qiang     }
506512f89a39SChenyi Qiang 
5066a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
5067a9dc68d9SClaudio Fontana }
5068a9dc68d9SClaudio Fontana 
kvm_get_vcpu_events(X86CPU * cpu)5069a9dc68d9SClaudio Fontana static int kvm_get_vcpu_events(X86CPU *cpu)
5070a9dc68d9SClaudio Fontana {
5071a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5072a9dc68d9SClaudio Fontana     struct kvm_vcpu_events events;
5073a9dc68d9SClaudio Fontana     int ret;
5074a9dc68d9SClaudio Fontana 
5075a9dc68d9SClaudio Fontana     memset(&events, 0, sizeof(events));
5076a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
5077a9dc68d9SClaudio Fontana     if (ret < 0) {
5078a9dc68d9SClaudio Fontana        return ret;
5079a9dc68d9SClaudio Fontana     }
5080a9dc68d9SClaudio Fontana 
5081a9dc68d9SClaudio Fontana     if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
5082a9dc68d9SClaudio Fontana         env->exception_pending = events.exception.pending;
5083a9dc68d9SClaudio Fontana         env->exception_has_payload = events.exception_has_payload;
5084a9dc68d9SClaudio Fontana         env->exception_payload = events.exception_payload;
5085a9dc68d9SClaudio Fontana     } else {
5086a9dc68d9SClaudio Fontana         env->exception_pending = 0;
5087a9dc68d9SClaudio Fontana         env->exception_has_payload = false;
5088a9dc68d9SClaudio Fontana     }
5089a9dc68d9SClaudio Fontana     env->exception_injected = events.exception.injected;
5090a9dc68d9SClaudio Fontana     env->exception_nr =
5091a9dc68d9SClaudio Fontana         (env->exception_pending || env->exception_injected) ?
5092a9dc68d9SClaudio Fontana         events.exception.nr : -1;
5093a9dc68d9SClaudio Fontana     env->has_error_code = events.exception.has_error_code;
5094a9dc68d9SClaudio Fontana     env->error_code = events.exception.error_code;
5095a9dc68d9SClaudio Fontana 
5096a9dc68d9SClaudio Fontana     env->interrupt_injected =
5097a9dc68d9SClaudio Fontana         events.interrupt.injected ? events.interrupt.nr : -1;
5098a9dc68d9SClaudio Fontana     env->soft_interrupt = events.interrupt.soft;
5099a9dc68d9SClaudio Fontana 
5100a9dc68d9SClaudio Fontana     env->nmi_injected = events.nmi.injected;
5101a9dc68d9SClaudio Fontana     env->nmi_pending = events.nmi.pending;
5102a9dc68d9SClaudio Fontana     if (events.nmi.masked) {
5103a9dc68d9SClaudio Fontana         env->hflags2 |= HF2_NMI_MASK;
5104a9dc68d9SClaudio Fontana     } else {
5105a9dc68d9SClaudio Fontana         env->hflags2 &= ~HF2_NMI_MASK;
5106a9dc68d9SClaudio Fontana     }
5107a9dc68d9SClaudio Fontana 
5108a9dc68d9SClaudio Fontana     if (events.flags & KVM_VCPUEVENT_VALID_SMM) {
5109a9dc68d9SClaudio Fontana         if (events.smi.smm) {
5110a9dc68d9SClaudio Fontana             env->hflags |= HF_SMM_MASK;
5111a9dc68d9SClaudio Fontana         } else {
5112a9dc68d9SClaudio Fontana             env->hflags &= ~HF_SMM_MASK;
5113a9dc68d9SClaudio Fontana         }
5114a9dc68d9SClaudio Fontana         if (events.smi.pending) {
5115a9dc68d9SClaudio Fontana             cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
5116a9dc68d9SClaudio Fontana         } else {
5117a9dc68d9SClaudio Fontana             cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
5118a9dc68d9SClaudio Fontana         }
5119a9dc68d9SClaudio Fontana         if (events.smi.smm_inside_nmi) {
5120a9dc68d9SClaudio Fontana             env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK;
5121a9dc68d9SClaudio Fontana         } else {
5122a9dc68d9SClaudio Fontana             env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
5123a9dc68d9SClaudio Fontana         }
5124a9dc68d9SClaudio Fontana         if (events.smi.latched_init) {
5125a9dc68d9SClaudio Fontana             cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
5126a9dc68d9SClaudio Fontana         } else {
5127a9dc68d9SClaudio Fontana             cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
5128a9dc68d9SClaudio Fontana         }
5129a9dc68d9SClaudio Fontana     }
5130a9dc68d9SClaudio Fontana 
513112f89a39SChenyi Qiang     if (events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) {
513212f89a39SChenyi Qiang         env->triple_fault_pending = events.triple_fault.pending;
513312f89a39SChenyi Qiang     }
513412f89a39SChenyi Qiang 
5135a9dc68d9SClaudio Fontana     env->sipi_vector = events.sipi_vector;
5136a9dc68d9SClaudio Fontana 
5137a9dc68d9SClaudio Fontana     return 0;
5138a9dc68d9SClaudio Fontana }
5139a9dc68d9SClaudio Fontana 
kvm_put_debugregs(X86CPU * cpu)5140a9dc68d9SClaudio Fontana static int kvm_put_debugregs(X86CPU *cpu)
5141a9dc68d9SClaudio Fontana {
5142a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5143a9dc68d9SClaudio Fontana     struct kvm_debugregs dbgregs;
5144a9dc68d9SClaudio Fontana     int i;
5145a9dc68d9SClaudio Fontana 
5146a9dc68d9SClaudio Fontana     memset(&dbgregs, 0, sizeof(dbgregs));
5147a9dc68d9SClaudio Fontana     for (i = 0; i < 4; i++) {
5148a9dc68d9SClaudio Fontana         dbgregs.db[i] = env->dr[i];
5149a9dc68d9SClaudio Fontana     }
5150a9dc68d9SClaudio Fontana     dbgregs.dr6 = env->dr[6];
5151a9dc68d9SClaudio Fontana     dbgregs.dr7 = env->dr[7];
5152a9dc68d9SClaudio Fontana     dbgregs.flags = 0;
5153a9dc68d9SClaudio Fontana 
5154a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs);
5155a9dc68d9SClaudio Fontana }
5156a9dc68d9SClaudio Fontana 
kvm_get_debugregs(X86CPU * cpu)5157a9dc68d9SClaudio Fontana static int kvm_get_debugregs(X86CPU *cpu)
5158a9dc68d9SClaudio Fontana {
5159a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5160a9dc68d9SClaudio Fontana     struct kvm_debugregs dbgregs;
5161a9dc68d9SClaudio Fontana     int i, ret;
5162a9dc68d9SClaudio Fontana 
5163a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs);
5164a9dc68d9SClaudio Fontana     if (ret < 0) {
5165a9dc68d9SClaudio Fontana         return ret;
5166a9dc68d9SClaudio Fontana     }
5167a9dc68d9SClaudio Fontana     for (i = 0; i < 4; i++) {
5168a9dc68d9SClaudio Fontana         env->dr[i] = dbgregs.db[i];
5169a9dc68d9SClaudio Fontana     }
5170a9dc68d9SClaudio Fontana     env->dr[4] = env->dr[6] = dbgregs.dr6;
5171a9dc68d9SClaudio Fontana     env->dr[5] = env->dr[7] = dbgregs.dr7;
5172a9dc68d9SClaudio Fontana 
5173a9dc68d9SClaudio Fontana     return 0;
5174a9dc68d9SClaudio Fontana }
5175a9dc68d9SClaudio Fontana 
kvm_put_nested_state(X86CPU * cpu)5176a9dc68d9SClaudio Fontana static int kvm_put_nested_state(X86CPU *cpu)
5177a9dc68d9SClaudio Fontana {
5178a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5179a9dc68d9SClaudio Fontana     int max_nested_state_len = kvm_max_nested_state_length();
5180a9dc68d9SClaudio Fontana 
5181a9dc68d9SClaudio Fontana     if (!env->nested_state) {
5182a9dc68d9SClaudio Fontana         return 0;
5183a9dc68d9SClaudio Fontana     }
5184a9dc68d9SClaudio Fontana 
5185a9dc68d9SClaudio Fontana     /*
5186a9dc68d9SClaudio Fontana      * Copy flags that are affected by reset from env->hflags and env->hflags2.
5187a9dc68d9SClaudio Fontana      */
5188a9dc68d9SClaudio Fontana     if (env->hflags & HF_GUEST_MASK) {
5189a9dc68d9SClaudio Fontana         env->nested_state->flags |= KVM_STATE_NESTED_GUEST_MODE;
5190a9dc68d9SClaudio Fontana     } else {
5191a9dc68d9SClaudio Fontana         env->nested_state->flags &= ~KVM_STATE_NESTED_GUEST_MODE;
5192a9dc68d9SClaudio Fontana     }
5193a9dc68d9SClaudio Fontana 
5194a9dc68d9SClaudio Fontana     /* Don't set KVM_STATE_NESTED_GIF_SET on VMX as it is illegal */
5195a9dc68d9SClaudio Fontana     if (cpu_has_svm(env) && (env->hflags2 & HF2_GIF_MASK)) {
5196a9dc68d9SClaudio Fontana         env->nested_state->flags |= KVM_STATE_NESTED_GIF_SET;
5197a9dc68d9SClaudio Fontana     } else {
5198a9dc68d9SClaudio Fontana         env->nested_state->flags &= ~KVM_STATE_NESTED_GIF_SET;
5199a9dc68d9SClaudio Fontana     }
5200a9dc68d9SClaudio Fontana 
5201a9dc68d9SClaudio Fontana     assert(env->nested_state->size <= max_nested_state_len);
5202a9dc68d9SClaudio Fontana     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
5203a9dc68d9SClaudio Fontana }
5204a9dc68d9SClaudio Fontana 
kvm_get_nested_state(X86CPU * cpu)5205a9dc68d9SClaudio Fontana static int kvm_get_nested_state(X86CPU *cpu)
5206a9dc68d9SClaudio Fontana {
5207a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5208a9dc68d9SClaudio Fontana     int max_nested_state_len = kvm_max_nested_state_length();
5209a9dc68d9SClaudio Fontana     int ret;
5210a9dc68d9SClaudio Fontana 
5211a9dc68d9SClaudio Fontana     if (!env->nested_state) {
5212a9dc68d9SClaudio Fontana         return 0;
5213a9dc68d9SClaudio Fontana     }
5214a9dc68d9SClaudio Fontana 
5215a9dc68d9SClaudio Fontana     /*
5216a9dc68d9SClaudio Fontana      * It is possible that migration restored a smaller size into
5217a9dc68d9SClaudio Fontana      * nested_state->hdr.size than what our kernel support.
5218a9dc68d9SClaudio Fontana      * We preserve migration origin nested_state->hdr.size for
5219a9dc68d9SClaudio Fontana      * call to KVM_SET_NESTED_STATE but wish that our next call
5220a9dc68d9SClaudio Fontana      * to KVM_GET_NESTED_STATE will use max size our kernel support.
5221a9dc68d9SClaudio Fontana      */
5222a9dc68d9SClaudio Fontana     env->nested_state->size = max_nested_state_len;
5223a9dc68d9SClaudio Fontana 
5224a9dc68d9SClaudio Fontana     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state);
5225a9dc68d9SClaudio Fontana     if (ret < 0) {
5226a9dc68d9SClaudio Fontana         return ret;
5227a9dc68d9SClaudio Fontana     }
5228a9dc68d9SClaudio Fontana 
5229a9dc68d9SClaudio Fontana     /*
5230a9dc68d9SClaudio Fontana      * Copy flags that are affected by reset to env->hflags and env->hflags2.
5231a9dc68d9SClaudio Fontana      */
5232a9dc68d9SClaudio Fontana     if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) {
5233a9dc68d9SClaudio Fontana         env->hflags |= HF_GUEST_MASK;
5234a9dc68d9SClaudio Fontana     } else {
5235a9dc68d9SClaudio Fontana         env->hflags &= ~HF_GUEST_MASK;
5236a9dc68d9SClaudio Fontana     }
5237a9dc68d9SClaudio Fontana 
5238a9dc68d9SClaudio Fontana     /* Keep HF2_GIF_MASK set on !SVM as x86_cpu_pending_interrupt() needs it */
5239a9dc68d9SClaudio Fontana     if (cpu_has_svm(env)) {
5240a9dc68d9SClaudio Fontana         if (env->nested_state->flags & KVM_STATE_NESTED_GIF_SET) {
5241a9dc68d9SClaudio Fontana             env->hflags2 |= HF2_GIF_MASK;
5242a9dc68d9SClaudio Fontana         } else {
5243a9dc68d9SClaudio Fontana             env->hflags2 &= ~HF2_GIF_MASK;
5244a9dc68d9SClaudio Fontana         }
5245a9dc68d9SClaudio Fontana     }
5246a9dc68d9SClaudio Fontana 
5247a9dc68d9SClaudio Fontana     return ret;
5248a9dc68d9SClaudio Fontana }
5249a9dc68d9SClaudio Fontana 
kvm_arch_put_registers(CPUState * cpu,int level,Error ** errp)5250a1676bb3SJulia Suvorova int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp)
5251a9dc68d9SClaudio Fontana {
5252a9dc68d9SClaudio Fontana     X86CPU *x86_cpu = X86_CPU(cpu);
5253a9dc68d9SClaudio Fontana     int ret;
5254a9dc68d9SClaudio Fontana 
5255a9dc68d9SClaudio Fontana     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
5256a9dc68d9SClaudio Fontana 
525745ed68a1SVitaly Kuznetsov     /*
525845ed68a1SVitaly Kuznetsov      * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
525945ed68a1SVitaly Kuznetsov      * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
5260bad5cfcdSMichael Tokarev      * precede kvm_put_nested_state() when 'real' nested state is set.
526145ed68a1SVitaly Kuznetsov      */
526245ed68a1SVitaly Kuznetsov     if (level >= KVM_PUT_RESET_STATE) {
526345ed68a1SVitaly Kuznetsov         ret = kvm_put_msr_feature_control(x86_cpu);
526445ed68a1SVitaly Kuznetsov         if (ret < 0) {
5265fc058618SJulia Suvorova             error_setg_errno(errp, -ret, "Failed to set feature control MSR");
526645ed68a1SVitaly Kuznetsov             return ret;
526745ed68a1SVitaly Kuznetsov         }
526845ed68a1SVitaly Kuznetsov     }
526945ed68a1SVitaly Kuznetsov 
5270a9dc68d9SClaudio Fontana     /* must be before kvm_put_nested_state so that EFER.SVME is set */
52718f515d38SMaxim Levitsky     ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu);
5272a9dc68d9SClaudio Fontana     if (ret < 0) {
5273fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set special registers");
5274a9dc68d9SClaudio Fontana         return ret;
5275a9dc68d9SClaudio Fontana     }
5276a9dc68d9SClaudio Fontana 
5277a9dc68d9SClaudio Fontana     if (level >= KVM_PUT_RESET_STATE) {
5278a9dc68d9SClaudio Fontana         ret = kvm_put_nested_state(x86_cpu);
5279a9dc68d9SClaudio Fontana         if (ret < 0) {
5280fc058618SJulia Suvorova             error_setg_errno(errp, -ret, "Failed to set nested state");
5281a9dc68d9SClaudio Fontana             return ret;
5282a9dc68d9SClaudio Fontana         }
5283a9dc68d9SClaudio Fontana     }
5284a9dc68d9SClaudio Fontana 
5285a9dc68d9SClaudio Fontana     if (level == KVM_PUT_FULL_STATE) {
5286a9dc68d9SClaudio Fontana         /* We don't check for kvm_arch_set_tsc_khz() errors here,
5287a9dc68d9SClaudio Fontana          * because TSC frequency mismatch shouldn't abort migration,
5288a9dc68d9SClaudio Fontana          * unless the user explicitly asked for a more strict TSC
5289a9dc68d9SClaudio Fontana          * setting (e.g. using an explicit "tsc-freq" option).
5290a9dc68d9SClaudio Fontana          */
5291a9dc68d9SClaudio Fontana         kvm_arch_set_tsc_khz(cpu);
5292a9dc68d9SClaudio Fontana     }
5293a9dc68d9SClaudio Fontana 
5294c345104cSJoao Martins #ifdef CONFIG_XEN_EMU
5295c345104cSJoao Martins     if (xen_mode == XEN_EMULATE && level == KVM_PUT_FULL_STATE) {
5296c345104cSJoao Martins         ret = kvm_put_xen_state(cpu);
5297c345104cSJoao Martins         if (ret < 0) {
5298fc058618SJulia Suvorova             error_setg_errno(errp, -ret, "Failed to set Xen state");
5299c345104cSJoao Martins             return ret;
5300c345104cSJoao Martins         }
5301c345104cSJoao Martins     }
5302c345104cSJoao Martins #endif
5303c345104cSJoao Martins 
5304a9dc68d9SClaudio Fontana     ret = kvm_getput_regs(x86_cpu, 1);
5305a9dc68d9SClaudio Fontana     if (ret < 0) {
5306fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set general purpose registers");
5307a9dc68d9SClaudio Fontana         return ret;
5308a9dc68d9SClaudio Fontana     }
5309a9dc68d9SClaudio Fontana     ret = kvm_put_xsave(x86_cpu);
5310a9dc68d9SClaudio Fontana     if (ret < 0) {
5311fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set XSAVE");
5312a9dc68d9SClaudio Fontana         return ret;
5313a9dc68d9SClaudio Fontana     }
5314a9dc68d9SClaudio Fontana     ret = kvm_put_xcrs(x86_cpu);
5315a9dc68d9SClaudio Fontana     if (ret < 0) {
5316fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set XCRs");
5317a9dc68d9SClaudio Fontana         return ret;
5318a9dc68d9SClaudio Fontana     }
5319a9dc68d9SClaudio Fontana     ret = kvm_put_msrs(x86_cpu, level);
5320a9dc68d9SClaudio Fontana     if (ret < 0) {
5321fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set MSRs");
5322a9dc68d9SClaudio Fontana         return ret;
5323a9dc68d9SClaudio Fontana     }
5324a9dc68d9SClaudio Fontana     ret = kvm_put_vcpu_events(x86_cpu, level);
5325a9dc68d9SClaudio Fontana     if (ret < 0) {
5326fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set vCPU events");
5327a9dc68d9SClaudio Fontana         return ret;
5328a9dc68d9SClaudio Fontana     }
5329a9dc68d9SClaudio Fontana     if (level >= KVM_PUT_RESET_STATE) {
5330a9dc68d9SClaudio Fontana         ret = kvm_put_mp_state(x86_cpu);
5331a9dc68d9SClaudio Fontana         if (ret < 0) {
5332fc058618SJulia Suvorova             error_setg_errno(errp, -ret, "Failed to set MP state");
5333a9dc68d9SClaudio Fontana             return ret;
5334a9dc68d9SClaudio Fontana         }
5335a9dc68d9SClaudio Fontana     }
5336a9dc68d9SClaudio Fontana 
5337a9dc68d9SClaudio Fontana     ret = kvm_put_tscdeadline_msr(x86_cpu);
5338a9dc68d9SClaudio Fontana     if (ret < 0) {
5339fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set TSC deadline MSR");
5340a9dc68d9SClaudio Fontana         return ret;
5341a9dc68d9SClaudio Fontana     }
5342a9dc68d9SClaudio Fontana     ret = kvm_put_debugregs(x86_cpu);
5343a9dc68d9SClaudio Fontana     if (ret < 0) {
5344fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to set debug registers");
5345a9dc68d9SClaudio Fontana         return ret;
5346a9dc68d9SClaudio Fontana     }
5347a9dc68d9SClaudio Fontana     return 0;
5348a9dc68d9SClaudio Fontana }
5349a9dc68d9SClaudio Fontana 
kvm_arch_get_registers(CPUState * cs,Error ** errp)5350a1676bb3SJulia Suvorova int kvm_arch_get_registers(CPUState *cs, Error **errp)
5351a9dc68d9SClaudio Fontana {
5352a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
5353a9dc68d9SClaudio Fontana     int ret;
5354a9dc68d9SClaudio Fontana 
5355a9dc68d9SClaudio Fontana     assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
5356a9dc68d9SClaudio Fontana 
5357a9dc68d9SClaudio Fontana     ret = kvm_get_vcpu_events(cpu);
5358a9dc68d9SClaudio Fontana     if (ret < 0) {
5359fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get vCPU events");
5360a9dc68d9SClaudio Fontana         goto out;
5361a9dc68d9SClaudio Fontana     }
5362a9dc68d9SClaudio Fontana     /*
5363a9dc68d9SClaudio Fontana      * KVM_GET_MPSTATE can modify CS and RIP, call it before
5364a9dc68d9SClaudio Fontana      * KVM_GET_REGS and KVM_GET_SREGS.
5365a9dc68d9SClaudio Fontana      */
5366a9dc68d9SClaudio Fontana     ret = kvm_get_mp_state(cpu);
5367a9dc68d9SClaudio Fontana     if (ret < 0) {
5368fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get MP state");
5369a9dc68d9SClaudio Fontana         goto out;
5370a9dc68d9SClaudio Fontana     }
5371a9dc68d9SClaudio Fontana     ret = kvm_getput_regs(cpu, 0);
5372a9dc68d9SClaudio Fontana     if (ret < 0) {
5373fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get general purpose registers");
5374a9dc68d9SClaudio Fontana         goto out;
5375a9dc68d9SClaudio Fontana     }
5376a9dc68d9SClaudio Fontana     ret = kvm_get_xsave(cpu);
5377a9dc68d9SClaudio Fontana     if (ret < 0) {
5378fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get XSAVE");
5379a9dc68d9SClaudio Fontana         goto out;
5380a9dc68d9SClaudio Fontana     }
5381a9dc68d9SClaudio Fontana     ret = kvm_get_xcrs(cpu);
5382a9dc68d9SClaudio Fontana     if (ret < 0) {
5383fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get XCRs");
5384a9dc68d9SClaudio Fontana         goto out;
5385a9dc68d9SClaudio Fontana     }
53868f515d38SMaxim Levitsky     ret = has_sregs2 ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu);
5387a9dc68d9SClaudio Fontana     if (ret < 0) {
5388fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get special registers");
5389a9dc68d9SClaudio Fontana         goto out;
5390a9dc68d9SClaudio Fontana     }
5391a9dc68d9SClaudio Fontana     ret = kvm_get_msrs(cpu);
5392a9dc68d9SClaudio Fontana     if (ret < 0) {
5393fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get MSRs");
5394a9dc68d9SClaudio Fontana         goto out;
5395a9dc68d9SClaudio Fontana     }
5396a9dc68d9SClaudio Fontana     ret = kvm_get_apic(cpu);
5397a9dc68d9SClaudio Fontana     if (ret < 0) {
5398fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get APIC");
5399a9dc68d9SClaudio Fontana         goto out;
5400a9dc68d9SClaudio Fontana     }
5401a9dc68d9SClaudio Fontana     ret = kvm_get_debugregs(cpu);
5402a9dc68d9SClaudio Fontana     if (ret < 0) {
5403fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get debug registers");
5404a9dc68d9SClaudio Fontana         goto out;
5405a9dc68d9SClaudio Fontana     }
5406a9dc68d9SClaudio Fontana     ret = kvm_get_nested_state(cpu);
5407a9dc68d9SClaudio Fontana     if (ret < 0) {
5408fc058618SJulia Suvorova         error_setg_errno(errp, -ret, "Failed to get nested state");
5409a9dc68d9SClaudio Fontana         goto out;
5410a9dc68d9SClaudio Fontana     }
5411c345104cSJoao Martins #ifdef CONFIG_XEN_EMU
5412c345104cSJoao Martins     if (xen_mode == XEN_EMULATE) {
5413c345104cSJoao Martins         ret = kvm_get_xen_state(cs);
5414c345104cSJoao Martins         if (ret < 0) {
5415fc058618SJulia Suvorova             error_setg_errno(errp, -ret, "Failed to get Xen state");
5416c345104cSJoao Martins             goto out;
5417c345104cSJoao Martins         }
5418c345104cSJoao Martins     }
5419c345104cSJoao Martins #endif
5420a9dc68d9SClaudio Fontana     ret = 0;
5421a9dc68d9SClaudio Fontana  out:
5422a9dc68d9SClaudio Fontana     cpu_sync_bndcs_hflags(&cpu->env);
5423a9dc68d9SClaudio Fontana     return ret;
5424a9dc68d9SClaudio Fontana }
5425a9dc68d9SClaudio Fontana 
kvm_arch_pre_run(CPUState * cpu,struct kvm_run * run)5426a9dc68d9SClaudio Fontana void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
5427a9dc68d9SClaudio Fontana {
5428a9dc68d9SClaudio Fontana     X86CPU *x86_cpu = X86_CPU(cpu);
5429a9dc68d9SClaudio Fontana     CPUX86State *env = &x86_cpu->env;
5430a9dc68d9SClaudio Fontana     int ret;
5431a9dc68d9SClaudio Fontana 
5432a9dc68d9SClaudio Fontana     /* Inject NMI */
5433a9dc68d9SClaudio Fontana     if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
5434a9dc68d9SClaudio Fontana         if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
5435195801d7SStefan Hajnoczi             bql_lock();
5436a9dc68d9SClaudio Fontana             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
5437195801d7SStefan Hajnoczi             bql_unlock();
5438a9dc68d9SClaudio Fontana             DPRINTF("injected NMI\n");
5439a9dc68d9SClaudio Fontana             ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
5440a9dc68d9SClaudio Fontana             if (ret < 0) {
5441a9dc68d9SClaudio Fontana                 fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
5442a9dc68d9SClaudio Fontana                         strerror(-ret));
5443a9dc68d9SClaudio Fontana             }
5444a9dc68d9SClaudio Fontana         }
5445a9dc68d9SClaudio Fontana         if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
5446195801d7SStefan Hajnoczi             bql_lock();
5447a9dc68d9SClaudio Fontana             cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
5448195801d7SStefan Hajnoczi             bql_unlock();
5449a9dc68d9SClaudio Fontana             DPRINTF("injected SMI\n");
5450a9dc68d9SClaudio Fontana             ret = kvm_vcpu_ioctl(cpu, KVM_SMI);
5451a9dc68d9SClaudio Fontana             if (ret < 0) {
5452a9dc68d9SClaudio Fontana                 fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n",
5453a9dc68d9SClaudio Fontana                         strerror(-ret));
5454a9dc68d9SClaudio Fontana             }
5455a9dc68d9SClaudio Fontana         }
5456a9dc68d9SClaudio Fontana     }
5457a9dc68d9SClaudio Fontana 
5458a9dc68d9SClaudio Fontana     if (!kvm_pic_in_kernel()) {
5459195801d7SStefan Hajnoczi         bql_lock();
5460a9dc68d9SClaudio Fontana     }
5461a9dc68d9SClaudio Fontana 
5462a9dc68d9SClaudio Fontana     /* Force the VCPU out of its inner loop to process any INIT requests
5463a9dc68d9SClaudio Fontana      * or (for userspace APIC, but it is cheap to combine the checks here)
5464a9dc68d9SClaudio Fontana      * pending TPR access reports.
5465a9dc68d9SClaudio Fontana      */
5466a9dc68d9SClaudio Fontana     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
5467a9dc68d9SClaudio Fontana         if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
5468a9dc68d9SClaudio Fontana             !(env->hflags & HF_SMM_MASK)) {
5469a9dc68d9SClaudio Fontana             cpu->exit_request = 1;
5470a9dc68d9SClaudio Fontana         }
5471a9dc68d9SClaudio Fontana         if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
5472a9dc68d9SClaudio Fontana             cpu->exit_request = 1;
5473a9dc68d9SClaudio Fontana         }
5474a9dc68d9SClaudio Fontana     }
5475a9dc68d9SClaudio Fontana 
5476a9dc68d9SClaudio Fontana     if (!kvm_pic_in_kernel()) {
5477a9dc68d9SClaudio Fontana         /* Try to inject an interrupt if the guest can accept it */
5478a9dc68d9SClaudio Fontana         if (run->ready_for_interrupt_injection &&
5479a9dc68d9SClaudio Fontana             (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
5480a9dc68d9SClaudio Fontana             (env->eflags & IF_MASK)) {
5481a9dc68d9SClaudio Fontana             int irq;
5482a9dc68d9SClaudio Fontana 
5483a9dc68d9SClaudio Fontana             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
5484a9dc68d9SClaudio Fontana             irq = cpu_get_pic_interrupt(env);
5485a9dc68d9SClaudio Fontana             if (irq >= 0) {
5486a9dc68d9SClaudio Fontana                 struct kvm_interrupt intr;
5487a9dc68d9SClaudio Fontana 
5488a9dc68d9SClaudio Fontana                 intr.irq = irq;
5489a9dc68d9SClaudio Fontana                 DPRINTF("injected interrupt %d\n", irq);
5490a9dc68d9SClaudio Fontana                 ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
5491a9dc68d9SClaudio Fontana                 if (ret < 0) {
5492a9dc68d9SClaudio Fontana                     fprintf(stderr,
5493a9dc68d9SClaudio Fontana                             "KVM: injection failed, interrupt lost (%s)\n",
5494a9dc68d9SClaudio Fontana                             strerror(-ret));
5495a9dc68d9SClaudio Fontana                 }
5496a9dc68d9SClaudio Fontana             }
5497a9dc68d9SClaudio Fontana         }
5498a9dc68d9SClaudio Fontana 
5499a9dc68d9SClaudio Fontana         /* If we have an interrupt but the guest is not ready to receive an
5500a9dc68d9SClaudio Fontana          * interrupt, request an interrupt window exit.  This will
5501a9dc68d9SClaudio Fontana          * cause a return to userspace as soon as the guest is ready to
5502a9dc68d9SClaudio Fontana          * receive interrupts. */
5503a9dc68d9SClaudio Fontana         if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
5504a9dc68d9SClaudio Fontana             run->request_interrupt_window = 1;
5505a9dc68d9SClaudio Fontana         } else {
5506a9dc68d9SClaudio Fontana             run->request_interrupt_window = 0;
5507a9dc68d9SClaudio Fontana         }
5508a9dc68d9SClaudio Fontana 
5509a9dc68d9SClaudio Fontana         DPRINTF("setting tpr\n");
5510a9dc68d9SClaudio Fontana         run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
5511a9dc68d9SClaudio Fontana 
5512195801d7SStefan Hajnoczi         bql_unlock();
5513a9dc68d9SClaudio Fontana     }
5514a9dc68d9SClaudio Fontana }
5515a9dc68d9SClaudio Fontana 
kvm_rate_limit_on_bus_lock(void)5516035d1ef2SChenyi Qiang static void kvm_rate_limit_on_bus_lock(void)
5517035d1ef2SChenyi Qiang {
5518035d1ef2SChenyi Qiang     uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1);
5519035d1ef2SChenyi Qiang 
5520035d1ef2SChenyi Qiang     if (delay_ns) {
5521035d1ef2SChenyi Qiang         g_usleep(delay_ns / SCALE_US);
5522035d1ef2SChenyi Qiang     }
5523035d1ef2SChenyi Qiang }
5524035d1ef2SChenyi Qiang 
kvm_arch_post_run(CPUState * cpu,struct kvm_run * run)5525a9dc68d9SClaudio Fontana MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
5526a9dc68d9SClaudio Fontana {
5527a9dc68d9SClaudio Fontana     X86CPU *x86_cpu = X86_CPU(cpu);
5528a9dc68d9SClaudio Fontana     CPUX86State *env = &x86_cpu->env;
5529a9dc68d9SClaudio Fontana 
5530a9dc68d9SClaudio Fontana     if (run->flags & KVM_RUN_X86_SMM) {
5531a9dc68d9SClaudio Fontana         env->hflags |= HF_SMM_MASK;
5532a9dc68d9SClaudio Fontana     } else {
5533a9dc68d9SClaudio Fontana         env->hflags &= ~HF_SMM_MASK;
5534a9dc68d9SClaudio Fontana     }
5535a9dc68d9SClaudio Fontana     if (run->if_flag) {
5536a9dc68d9SClaudio Fontana         env->eflags |= IF_MASK;
5537a9dc68d9SClaudio Fontana     } else {
5538a9dc68d9SClaudio Fontana         env->eflags &= ~IF_MASK;
5539a9dc68d9SClaudio Fontana     }
5540035d1ef2SChenyi Qiang     if (run->flags & KVM_RUN_X86_BUS_LOCK) {
5541035d1ef2SChenyi Qiang         kvm_rate_limit_on_bus_lock();
5542035d1ef2SChenyi Qiang     }
5543a9dc68d9SClaudio Fontana 
5544ddc7cb30SMiroslav Rezanina #ifdef CONFIG_XEN_EMU
5545ddf0fd9aSDavid Woodhouse     /*
5546ddf0fd9aSDavid Woodhouse      * If the callback is asserted as a GSI (or PCI INTx) then check if
5547ddf0fd9aSDavid Woodhouse      * vcpu_info->evtchn_upcall_pending has been cleared, and deassert
5548ddf0fd9aSDavid Woodhouse      * the callback IRQ if so. Ideally we could hook into the PIC/IOAPIC
5549ddf0fd9aSDavid Woodhouse      * EOI and only resample then, exactly how the VFIO eventfd pairs
5550ddf0fd9aSDavid Woodhouse      * are designed to work for level triggered interrupts.
5551ddf0fd9aSDavid Woodhouse      */
5552ddf0fd9aSDavid Woodhouse     if (x86_cpu->env.xen_callback_asserted) {
5553ddf0fd9aSDavid Woodhouse         kvm_xen_maybe_deassert_callback(cpu);
5554ddf0fd9aSDavid Woodhouse     }
5555ddc7cb30SMiroslav Rezanina #endif
5556ddf0fd9aSDavid Woodhouse 
5557a9dc68d9SClaudio Fontana     /* We need to protect the apic state against concurrent accesses from
5558a9dc68d9SClaudio Fontana      * different threads in case the userspace irqchip is used. */
5559a9dc68d9SClaudio Fontana     if (!kvm_irqchip_in_kernel()) {
5560195801d7SStefan Hajnoczi         bql_lock();
5561a9dc68d9SClaudio Fontana     }
5562a9dc68d9SClaudio Fontana     cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
5563a9dc68d9SClaudio Fontana     cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
5564a9dc68d9SClaudio Fontana     if (!kvm_irqchip_in_kernel()) {
5565195801d7SStefan Hajnoczi         bql_unlock();
5566a9dc68d9SClaudio Fontana     }
5567a9dc68d9SClaudio Fontana     return cpu_get_mem_attrs(env);
5568a9dc68d9SClaudio Fontana }
5569a9dc68d9SClaudio Fontana 
kvm_arch_process_async_events(CPUState * cs)5570a9dc68d9SClaudio Fontana int kvm_arch_process_async_events(CPUState *cs)
5571a9dc68d9SClaudio Fontana {
5572a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
5573a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5574a9dc68d9SClaudio Fontana 
5575a9dc68d9SClaudio Fontana     if (cs->interrupt_request & CPU_INTERRUPT_MCE) {
5576a9dc68d9SClaudio Fontana         /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */
5577a9dc68d9SClaudio Fontana         assert(env->mcg_cap);
5578a9dc68d9SClaudio Fontana 
5579a9dc68d9SClaudio Fontana         cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
5580a9dc68d9SClaudio Fontana 
5581a9dc68d9SClaudio Fontana         kvm_cpu_synchronize_state(cs);
5582a9dc68d9SClaudio Fontana 
5583a9dc68d9SClaudio Fontana         if (env->exception_nr == EXCP08_DBLE) {
5584a9dc68d9SClaudio Fontana             /* this means triple fault */
5585a9dc68d9SClaudio Fontana             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
5586a9dc68d9SClaudio Fontana             cs->exit_request = 1;
5587a9dc68d9SClaudio Fontana             return 0;
5588a9dc68d9SClaudio Fontana         }
5589a9dc68d9SClaudio Fontana         kvm_queue_exception(env, EXCP12_MCHK, 0, 0);
5590a9dc68d9SClaudio Fontana         env->has_error_code = 0;
5591a9dc68d9SClaudio Fontana 
5592a9dc68d9SClaudio Fontana         cs->halted = 0;
5593a9dc68d9SClaudio Fontana         if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) {
5594a9dc68d9SClaudio Fontana             env->mp_state = KVM_MP_STATE_RUNNABLE;
5595a9dc68d9SClaudio Fontana         }
5596a9dc68d9SClaudio Fontana     }
5597a9dc68d9SClaudio Fontana 
5598a9dc68d9SClaudio Fontana     if ((cs->interrupt_request & CPU_INTERRUPT_INIT) &&
5599a9dc68d9SClaudio Fontana         !(env->hflags & HF_SMM_MASK)) {
5600a9dc68d9SClaudio Fontana         kvm_cpu_synchronize_state(cs);
5601a9dc68d9SClaudio Fontana         do_cpu_init(cpu);
5602a9dc68d9SClaudio Fontana     }
5603a9dc68d9SClaudio Fontana 
5604a9dc68d9SClaudio Fontana     if (kvm_irqchip_in_kernel()) {
5605a9dc68d9SClaudio Fontana         return 0;
5606a9dc68d9SClaudio Fontana     }
5607a9dc68d9SClaudio Fontana 
5608a9dc68d9SClaudio Fontana     if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
5609a9dc68d9SClaudio Fontana         cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
5610a9dc68d9SClaudio Fontana         apic_poll_irq(cpu->apic_state);
5611a9dc68d9SClaudio Fontana     }
5612a9dc68d9SClaudio Fontana     if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
5613a9dc68d9SClaudio Fontana          (env->eflags & IF_MASK)) ||
5614a9dc68d9SClaudio Fontana         (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
5615a9dc68d9SClaudio Fontana         cs->halted = 0;
5616a9dc68d9SClaudio Fontana     }
5617a9dc68d9SClaudio Fontana     if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
5618a9dc68d9SClaudio Fontana         kvm_cpu_synchronize_state(cs);
5619a9dc68d9SClaudio Fontana         do_cpu_sipi(cpu);
5620a9dc68d9SClaudio Fontana     }
5621a9dc68d9SClaudio Fontana     if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
5622a9dc68d9SClaudio Fontana         cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
5623a9dc68d9SClaudio Fontana         kvm_cpu_synchronize_state(cs);
5624a9dc68d9SClaudio Fontana         apic_handle_tpr_access_report(cpu->apic_state, env->eip,
5625a9dc68d9SClaudio Fontana                                       env->tpr_access_type);
5626a9dc68d9SClaudio Fontana     }
5627a9dc68d9SClaudio Fontana 
5628a9dc68d9SClaudio Fontana     return cs->halted;
5629a9dc68d9SClaudio Fontana }
5630a9dc68d9SClaudio Fontana 
kvm_handle_halt(X86CPU * cpu)5631a9dc68d9SClaudio Fontana static int kvm_handle_halt(X86CPU *cpu)
5632a9dc68d9SClaudio Fontana {
5633a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
5634a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5635a9dc68d9SClaudio Fontana 
5636a9dc68d9SClaudio Fontana     if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
5637a9dc68d9SClaudio Fontana           (env->eflags & IF_MASK)) &&
5638a9dc68d9SClaudio Fontana         !(cs->interrupt_request & CPU_INTERRUPT_NMI)) {
5639a9dc68d9SClaudio Fontana         cs->halted = 1;
5640a9dc68d9SClaudio Fontana         return EXCP_HLT;
5641a9dc68d9SClaudio Fontana     }
5642a9dc68d9SClaudio Fontana 
5643a9dc68d9SClaudio Fontana     return 0;
5644a9dc68d9SClaudio Fontana }
5645a9dc68d9SClaudio Fontana 
kvm_handle_tpr_access(X86CPU * cpu)5646a9dc68d9SClaudio Fontana static int kvm_handle_tpr_access(X86CPU *cpu)
5647a9dc68d9SClaudio Fontana {
5648a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
5649a9dc68d9SClaudio Fontana     struct kvm_run *run = cs->kvm_run;
5650a9dc68d9SClaudio Fontana 
5651a9dc68d9SClaudio Fontana     apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
5652a9dc68d9SClaudio Fontana                                   run->tpr_access.is_write ? TPR_ACCESS_WRITE
5653a9dc68d9SClaudio Fontana                                                            : TPR_ACCESS_READ);
5654a9dc68d9SClaudio Fontana     return 1;
5655a9dc68d9SClaudio Fontana }
5656a9dc68d9SClaudio Fontana 
kvm_arch_insert_sw_breakpoint(CPUState * cs,struct kvm_sw_breakpoint * bp)5657a9dc68d9SClaudio Fontana int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
5658a9dc68d9SClaudio Fontana {
5659a9dc68d9SClaudio Fontana     static const uint8_t int3 = 0xcc;
5660a9dc68d9SClaudio Fontana 
5661a9dc68d9SClaudio Fontana     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
5662a9dc68d9SClaudio Fontana         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) {
5663a9dc68d9SClaudio Fontana         return -EINVAL;
5664a9dc68d9SClaudio Fontana     }
5665a9dc68d9SClaudio Fontana     return 0;
5666a9dc68d9SClaudio Fontana }
5667a9dc68d9SClaudio Fontana 
kvm_arch_remove_sw_breakpoint(CPUState * cs,struct kvm_sw_breakpoint * bp)5668a9dc68d9SClaudio Fontana int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
5669a9dc68d9SClaudio Fontana {
5670a9dc68d9SClaudio Fontana     uint8_t int3;
5671a9dc68d9SClaudio Fontana 
5672c6986f16SPaolo Bonzini     if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0)) {
5673c6986f16SPaolo Bonzini         return -EINVAL;
5674c6986f16SPaolo Bonzini     }
5675c6986f16SPaolo Bonzini     if (int3 != 0xcc) {
5676c6986f16SPaolo Bonzini         return 0;
5677c6986f16SPaolo Bonzini     }
5678c6986f16SPaolo Bonzini     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
5679a9dc68d9SClaudio Fontana         return -EINVAL;
5680a9dc68d9SClaudio Fontana     }
5681a9dc68d9SClaudio Fontana     return 0;
5682a9dc68d9SClaudio Fontana }
5683a9dc68d9SClaudio Fontana 
5684a9dc68d9SClaudio Fontana static struct {
5685a9dc68d9SClaudio Fontana     target_ulong addr;
5686a9dc68d9SClaudio Fontana     int len;
5687a9dc68d9SClaudio Fontana     int type;
5688a9dc68d9SClaudio Fontana } hw_breakpoint[4];
5689a9dc68d9SClaudio Fontana 
5690a9dc68d9SClaudio Fontana static int nb_hw_breakpoint;
5691a9dc68d9SClaudio Fontana 
find_hw_breakpoint(target_ulong addr,int len,int type)5692a9dc68d9SClaudio Fontana static int find_hw_breakpoint(target_ulong addr, int len, int type)
5693a9dc68d9SClaudio Fontana {
5694a9dc68d9SClaudio Fontana     int n;
5695a9dc68d9SClaudio Fontana 
5696a9dc68d9SClaudio Fontana     for (n = 0; n < nb_hw_breakpoint; n++) {
5697a9dc68d9SClaudio Fontana         if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
5698a9dc68d9SClaudio Fontana             (hw_breakpoint[n].len == len || len == -1)) {
5699a9dc68d9SClaudio Fontana             return n;
5700a9dc68d9SClaudio Fontana         }
5701a9dc68d9SClaudio Fontana     }
5702a9dc68d9SClaudio Fontana     return -1;
5703a9dc68d9SClaudio Fontana }
5704a9dc68d9SClaudio Fontana 
kvm_arch_insert_hw_breakpoint(vaddr addr,vaddr len,int type)5705b8a6eb18SAnton Johansson int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
5706a9dc68d9SClaudio Fontana {
5707a9dc68d9SClaudio Fontana     switch (type) {
5708a9dc68d9SClaudio Fontana     case GDB_BREAKPOINT_HW:
5709a9dc68d9SClaudio Fontana         len = 1;
5710a9dc68d9SClaudio Fontana         break;
5711a9dc68d9SClaudio Fontana     case GDB_WATCHPOINT_WRITE:
5712a9dc68d9SClaudio Fontana     case GDB_WATCHPOINT_ACCESS:
5713a9dc68d9SClaudio Fontana         switch (len) {
5714a9dc68d9SClaudio Fontana         case 1:
5715a9dc68d9SClaudio Fontana             break;
5716a9dc68d9SClaudio Fontana         case 2:
5717a9dc68d9SClaudio Fontana         case 4:
5718a9dc68d9SClaudio Fontana         case 8:
5719a9dc68d9SClaudio Fontana             if (addr & (len - 1)) {
5720a9dc68d9SClaudio Fontana                 return -EINVAL;
5721a9dc68d9SClaudio Fontana             }
5722a9dc68d9SClaudio Fontana             break;
5723a9dc68d9SClaudio Fontana         default:
5724a9dc68d9SClaudio Fontana             return -EINVAL;
5725a9dc68d9SClaudio Fontana         }
5726a9dc68d9SClaudio Fontana         break;
5727a9dc68d9SClaudio Fontana     default:
5728a9dc68d9SClaudio Fontana         return -ENOSYS;
5729a9dc68d9SClaudio Fontana     }
5730a9dc68d9SClaudio Fontana 
5731a9dc68d9SClaudio Fontana     if (nb_hw_breakpoint == 4) {
5732a9dc68d9SClaudio Fontana         return -ENOBUFS;
5733a9dc68d9SClaudio Fontana     }
5734a9dc68d9SClaudio Fontana     if (find_hw_breakpoint(addr, len, type) >= 0) {
5735a9dc68d9SClaudio Fontana         return -EEXIST;
5736a9dc68d9SClaudio Fontana     }
5737a9dc68d9SClaudio Fontana     hw_breakpoint[nb_hw_breakpoint].addr = addr;
5738a9dc68d9SClaudio Fontana     hw_breakpoint[nb_hw_breakpoint].len = len;
5739a9dc68d9SClaudio Fontana     hw_breakpoint[nb_hw_breakpoint].type = type;
5740a9dc68d9SClaudio Fontana     nb_hw_breakpoint++;
5741a9dc68d9SClaudio Fontana 
5742a9dc68d9SClaudio Fontana     return 0;
5743a9dc68d9SClaudio Fontana }
5744a9dc68d9SClaudio Fontana 
kvm_arch_remove_hw_breakpoint(vaddr addr,vaddr len,int type)5745b8a6eb18SAnton Johansson int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
5746a9dc68d9SClaudio Fontana {
5747a9dc68d9SClaudio Fontana     int n;
5748a9dc68d9SClaudio Fontana 
5749a9dc68d9SClaudio Fontana     n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
5750a9dc68d9SClaudio Fontana     if (n < 0) {
5751a9dc68d9SClaudio Fontana         return -ENOENT;
5752a9dc68d9SClaudio Fontana     }
5753a9dc68d9SClaudio Fontana     nb_hw_breakpoint--;
5754a9dc68d9SClaudio Fontana     hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
5755a9dc68d9SClaudio Fontana 
5756a9dc68d9SClaudio Fontana     return 0;
5757a9dc68d9SClaudio Fontana }
5758a9dc68d9SClaudio Fontana 
kvm_arch_remove_all_hw_breakpoints(void)5759a9dc68d9SClaudio Fontana void kvm_arch_remove_all_hw_breakpoints(void)
5760a9dc68d9SClaudio Fontana {
5761a9dc68d9SClaudio Fontana     nb_hw_breakpoint = 0;
5762a9dc68d9SClaudio Fontana }
5763a9dc68d9SClaudio Fontana 
5764a9dc68d9SClaudio Fontana static CPUWatchpoint hw_watchpoint;
5765a9dc68d9SClaudio Fontana 
kvm_handle_debug(X86CPU * cpu,struct kvm_debug_exit_arch * arch_info)5766a9dc68d9SClaudio Fontana static int kvm_handle_debug(X86CPU *cpu,
5767a9dc68d9SClaudio Fontana                             struct kvm_debug_exit_arch *arch_info)
5768a9dc68d9SClaudio Fontana {
5769a9dc68d9SClaudio Fontana     CPUState *cs = CPU(cpu);
5770a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
5771a9dc68d9SClaudio Fontana     int ret = 0;
5772a9dc68d9SClaudio Fontana     int n;
5773a9dc68d9SClaudio Fontana 
5774a9dc68d9SClaudio Fontana     if (arch_info->exception == EXCP01_DB) {
5775a9dc68d9SClaudio Fontana         if (arch_info->dr6 & DR6_BS) {
5776a9dc68d9SClaudio Fontana             if (cs->singlestep_enabled) {
5777a9dc68d9SClaudio Fontana                 ret = EXCP_DEBUG;
5778a9dc68d9SClaudio Fontana             }
5779a9dc68d9SClaudio Fontana         } else {
5780a9dc68d9SClaudio Fontana             for (n = 0; n < 4; n++) {
5781a9dc68d9SClaudio Fontana                 if (arch_info->dr6 & (1 << n)) {
5782a9dc68d9SClaudio Fontana                     switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
5783a9dc68d9SClaudio Fontana                     case 0x0:
5784a9dc68d9SClaudio Fontana                         ret = EXCP_DEBUG;
5785a9dc68d9SClaudio Fontana                         break;
5786a9dc68d9SClaudio Fontana                     case 0x1:
5787a9dc68d9SClaudio Fontana                         ret = EXCP_DEBUG;
5788a9dc68d9SClaudio Fontana                         cs->watchpoint_hit = &hw_watchpoint;
5789a9dc68d9SClaudio Fontana                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
5790a9dc68d9SClaudio Fontana                         hw_watchpoint.flags = BP_MEM_WRITE;
5791a9dc68d9SClaudio Fontana                         break;
5792a9dc68d9SClaudio Fontana                     case 0x3:
5793a9dc68d9SClaudio Fontana                         ret = EXCP_DEBUG;
5794a9dc68d9SClaudio Fontana                         cs->watchpoint_hit = &hw_watchpoint;
5795a9dc68d9SClaudio Fontana                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
5796a9dc68d9SClaudio Fontana                         hw_watchpoint.flags = BP_MEM_ACCESS;
5797a9dc68d9SClaudio Fontana                         break;
5798a9dc68d9SClaudio Fontana                     }
5799a9dc68d9SClaudio Fontana                 }
5800a9dc68d9SClaudio Fontana             }
5801a9dc68d9SClaudio Fontana         }
5802a9dc68d9SClaudio Fontana     } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) {
5803a9dc68d9SClaudio Fontana         ret = EXCP_DEBUG;
5804a9dc68d9SClaudio Fontana     }
5805a9dc68d9SClaudio Fontana     if (ret == 0) {
5806a9dc68d9SClaudio Fontana         cpu_synchronize_state(cs);
5807a9dc68d9SClaudio Fontana         assert(env->exception_nr == -1);
5808a9dc68d9SClaudio Fontana 
5809a9dc68d9SClaudio Fontana         /* pass to guest */
5810a9dc68d9SClaudio Fontana         kvm_queue_exception(env, arch_info->exception,
5811a9dc68d9SClaudio Fontana                             arch_info->exception == EXCP01_DB,
5812a9dc68d9SClaudio Fontana                             arch_info->dr6);
5813a9dc68d9SClaudio Fontana         env->has_error_code = 0;
5814a9dc68d9SClaudio Fontana     }
5815a9dc68d9SClaudio Fontana 
5816a9dc68d9SClaudio Fontana     return ret;
5817a9dc68d9SClaudio Fontana }
5818a9dc68d9SClaudio Fontana 
kvm_arch_update_guest_debug(CPUState * cpu,struct kvm_guest_debug * dbg)5819a9dc68d9SClaudio Fontana void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
5820a9dc68d9SClaudio Fontana {
5821a9dc68d9SClaudio Fontana     const uint8_t type_code[] = {
5822a9dc68d9SClaudio Fontana         [GDB_BREAKPOINT_HW] = 0x0,
5823a9dc68d9SClaudio Fontana         [GDB_WATCHPOINT_WRITE] = 0x1,
5824a9dc68d9SClaudio Fontana         [GDB_WATCHPOINT_ACCESS] = 0x3
5825a9dc68d9SClaudio Fontana     };
5826a9dc68d9SClaudio Fontana     const uint8_t len_code[] = {
5827a9dc68d9SClaudio Fontana         [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
5828a9dc68d9SClaudio Fontana     };
5829a9dc68d9SClaudio Fontana     int n;
5830a9dc68d9SClaudio Fontana 
5831a9dc68d9SClaudio Fontana     if (kvm_sw_breakpoints_active(cpu)) {
5832a9dc68d9SClaudio Fontana         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
5833a9dc68d9SClaudio Fontana     }
5834a9dc68d9SClaudio Fontana     if (nb_hw_breakpoint > 0) {
5835a9dc68d9SClaudio Fontana         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
5836a9dc68d9SClaudio Fontana         dbg->arch.debugreg[7] = 0x0600;
5837a9dc68d9SClaudio Fontana         for (n = 0; n < nb_hw_breakpoint; n++) {
5838a9dc68d9SClaudio Fontana             dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
5839a9dc68d9SClaudio Fontana             dbg->arch.debugreg[7] |= (2 << (n * 2)) |
5840a9dc68d9SClaudio Fontana                 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
5841a9dc68d9SClaudio Fontana                 ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4));
5842a9dc68d9SClaudio Fontana         }
5843a9dc68d9SClaudio Fontana     }
5844a9dc68d9SClaudio Fontana }
5845a9dc68d9SClaudio Fontana 
kvm_install_msr_filters(KVMState * s)5846860054d8SAlexander Graf static bool kvm_install_msr_filters(KVMState *s)
5847860054d8SAlexander Graf {
5848860054d8SAlexander Graf     uint64_t zero = 0;
5849860054d8SAlexander Graf     struct kvm_msr_filter filter = {
5850860054d8SAlexander Graf         .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
5851860054d8SAlexander Graf     };
5852860054d8SAlexander Graf     int r, i, j = 0;
5853860054d8SAlexander Graf 
5854860054d8SAlexander Graf     for (i = 0; i < KVM_MSR_FILTER_MAX_RANGES; i++) {
5855860054d8SAlexander Graf         KVMMSRHandlers *handler = &msr_handlers[i];
5856860054d8SAlexander Graf         if (handler->msr) {
5857860054d8SAlexander Graf             struct kvm_msr_filter_range *range = &filter.ranges[j++];
5858860054d8SAlexander Graf 
5859860054d8SAlexander Graf             *range = (struct kvm_msr_filter_range) {
5860860054d8SAlexander Graf                 .flags = 0,
5861860054d8SAlexander Graf                 .nmsrs = 1,
5862860054d8SAlexander Graf                 .base = handler->msr,
5863860054d8SAlexander Graf                 .bitmap = (__u8 *)&zero,
5864860054d8SAlexander Graf             };
5865860054d8SAlexander Graf 
5866860054d8SAlexander Graf             if (handler->rdmsr) {
5867860054d8SAlexander Graf                 range->flags |= KVM_MSR_FILTER_READ;
5868860054d8SAlexander Graf             }
5869860054d8SAlexander Graf 
5870860054d8SAlexander Graf             if (handler->wrmsr) {
5871860054d8SAlexander Graf                 range->flags |= KVM_MSR_FILTER_WRITE;
5872860054d8SAlexander Graf             }
5873860054d8SAlexander Graf         }
5874860054d8SAlexander Graf     }
5875860054d8SAlexander Graf 
5876860054d8SAlexander Graf     r = kvm_vm_ioctl(s, KVM_X86_SET_MSR_FILTER, &filter);
5877860054d8SAlexander Graf     if (r) {
5878860054d8SAlexander Graf         return false;
5879860054d8SAlexander Graf     }
5880860054d8SAlexander Graf 
5881860054d8SAlexander Graf     return true;
5882860054d8SAlexander Graf }
5883860054d8SAlexander Graf 
kvm_filter_msr(KVMState * s,uint32_t msr,QEMURDMSRHandler * rdmsr,QEMUWRMSRHandler * wrmsr)5884ed2880f4SAni Sinha static bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
5885860054d8SAlexander Graf                     QEMUWRMSRHandler *wrmsr)
5886860054d8SAlexander Graf {
5887860054d8SAlexander Graf     int i;
5888860054d8SAlexander Graf 
5889860054d8SAlexander Graf     for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) {
5890860054d8SAlexander Graf         if (!msr_handlers[i].msr) {
5891860054d8SAlexander Graf             msr_handlers[i] = (KVMMSRHandlers) {
5892860054d8SAlexander Graf                 .msr = msr,
5893860054d8SAlexander Graf                 .rdmsr = rdmsr,
5894860054d8SAlexander Graf                 .wrmsr = wrmsr,
5895860054d8SAlexander Graf             };
5896860054d8SAlexander Graf 
5897860054d8SAlexander Graf             if (!kvm_install_msr_filters(s)) {
5898860054d8SAlexander Graf                 msr_handlers[i] = (KVMMSRHandlers) { };
5899860054d8SAlexander Graf                 return false;
5900860054d8SAlexander Graf             }
5901860054d8SAlexander Graf 
5902860054d8SAlexander Graf             return true;
5903860054d8SAlexander Graf         }
5904860054d8SAlexander Graf     }
5905860054d8SAlexander Graf 
5906860054d8SAlexander Graf     return false;
5907860054d8SAlexander Graf }
5908860054d8SAlexander Graf 
kvm_handle_rdmsr(X86CPU * cpu,struct kvm_run * run)5909860054d8SAlexander Graf static int kvm_handle_rdmsr(X86CPU *cpu, struct kvm_run *run)
5910860054d8SAlexander Graf {
5911860054d8SAlexander Graf     int i;
5912860054d8SAlexander Graf     bool r;
5913860054d8SAlexander Graf 
5914860054d8SAlexander Graf     for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) {
5915860054d8SAlexander Graf         KVMMSRHandlers *handler = &msr_handlers[i];
5916860054d8SAlexander Graf         if (run->msr.index == handler->msr) {
5917860054d8SAlexander Graf             if (handler->rdmsr) {
5918860054d8SAlexander Graf                 r = handler->rdmsr(cpu, handler->msr,
5919860054d8SAlexander Graf                                    (uint64_t *)&run->msr.data);
5920860054d8SAlexander Graf                 run->msr.error = r ? 0 : 1;
5921860054d8SAlexander Graf                 return 0;
5922860054d8SAlexander Graf             }
5923860054d8SAlexander Graf         }
5924860054d8SAlexander Graf     }
5925860054d8SAlexander Graf 
5926f4fa1a53SPierrick Bouvier     g_assert_not_reached();
5927860054d8SAlexander Graf }
5928860054d8SAlexander Graf 
kvm_handle_wrmsr(X86CPU * cpu,struct kvm_run * run)5929860054d8SAlexander Graf static int kvm_handle_wrmsr(X86CPU *cpu, struct kvm_run *run)
5930860054d8SAlexander Graf {
5931860054d8SAlexander Graf     int i;
5932860054d8SAlexander Graf     bool r;
5933860054d8SAlexander Graf 
5934860054d8SAlexander Graf     for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) {
5935860054d8SAlexander Graf         KVMMSRHandlers *handler = &msr_handlers[i];
5936860054d8SAlexander Graf         if (run->msr.index == handler->msr) {
5937860054d8SAlexander Graf             if (handler->wrmsr) {
5938860054d8SAlexander Graf                 r = handler->wrmsr(cpu, handler->msr, run->msr.data);
5939860054d8SAlexander Graf                 run->msr.error = r ? 0 : 1;
5940860054d8SAlexander Graf                 return 0;
5941860054d8SAlexander Graf             }
5942860054d8SAlexander Graf         }
5943860054d8SAlexander Graf     }
5944860054d8SAlexander Graf 
5945f4fa1a53SPierrick Bouvier     g_assert_not_reached();
5946860054d8SAlexander Graf }
5947860054d8SAlexander Graf 
5948c22f5467SSean Christopherson static bool has_sgx_provisioning;
5949c22f5467SSean Christopherson 
__kvm_enable_sgx_provisioning(KVMState * s)5950c22f5467SSean Christopherson static bool __kvm_enable_sgx_provisioning(KVMState *s)
5951c22f5467SSean Christopherson {
5952c22f5467SSean Christopherson     int fd, ret;
5953c22f5467SSean Christopherson 
5954c22f5467SSean Christopherson     if (!kvm_vm_check_extension(s, KVM_CAP_SGX_ATTRIBUTE)) {
5955c22f5467SSean Christopherson         return false;
5956c22f5467SSean Christopherson     }
5957c22f5467SSean Christopherson 
5958c22f5467SSean Christopherson     fd = qemu_open_old("/dev/sgx_provision", O_RDONLY);
5959c22f5467SSean Christopherson     if (fd < 0) {
5960c22f5467SSean Christopherson         return false;
5961c22f5467SSean Christopherson     }
5962c22f5467SSean Christopherson 
5963c22f5467SSean Christopherson     ret = kvm_vm_enable_cap(s, KVM_CAP_SGX_ATTRIBUTE, 0, fd);
5964c22f5467SSean Christopherson     if (ret) {
5965c22f5467SSean Christopherson         error_report("Could not enable SGX PROVISIONKEY: %s", strerror(-ret));
5966c22f5467SSean Christopherson         exit(1);
5967c22f5467SSean Christopherson     }
5968c22f5467SSean Christopherson     close(fd);
5969c22f5467SSean Christopherson     return true;
5970c22f5467SSean Christopherson }
5971c22f5467SSean Christopherson 
kvm_enable_sgx_provisioning(KVMState * s)5972c22f5467SSean Christopherson bool kvm_enable_sgx_provisioning(KVMState *s)
5973c22f5467SSean Christopherson {
5974c22f5467SSean Christopherson     return MEMORIZE(__kvm_enable_sgx_provisioning(s), has_sgx_provisioning);
5975c22f5467SSean Christopherson }
5976c22f5467SSean Christopherson 
host_supports_vmx(void)5977a9dc68d9SClaudio Fontana static bool host_supports_vmx(void)
5978a9dc68d9SClaudio Fontana {
5979a9dc68d9SClaudio Fontana     uint32_t ecx, unused;
5980a9dc68d9SClaudio Fontana 
5981a9dc68d9SClaudio Fontana     host_cpuid(1, 0, &unused, &unused, &ecx, &unused);
5982a9dc68d9SClaudio Fontana     return ecx & CPUID_EXT_VMX;
5983a9dc68d9SClaudio Fontana }
5984a9dc68d9SClaudio Fontana 
598547e76d03SMichael Roth /*
598647e76d03SMichael Roth  * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE
598747e76d03SMichael Roth  * to service guest-initiated memory attribute update requests so that
598847e76d03SMichael Roth  * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be
598947e76d03SMichael Roth  * backed by the private memory pool provided by guest_memfd, and as such
599047e76d03SMichael Roth  * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX).
599147e76d03SMichael Roth  *
599247e76d03SMichael Roth  * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live
599347e76d03SMichael Roth  * migration, are not implemented here currently.
599447e76d03SMichael Roth  *
599547e76d03SMichael Roth  * For the guest_memfd use-case, these exits will generally be synthesized
599647e76d03SMichael Roth  * by KVM based on platform-specific hypercalls, like GHCB requests in the
599747e76d03SMichael Roth  * case of SEV-SNP, and not issued directly within the guest though the
599847e76d03SMichael Roth  * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is
599947e76d03SMichael Roth  * not actually advertised to guests via the KVM CPUID feature bit, as
600047e76d03SMichael Roth  * opposed to SEV live migration where it would be. Since it is unlikely the
600147e76d03SMichael Roth  * SEV live migration use-case would be useful for guest-memfd backed guests,
600247e76d03SMichael Roth  * because private/shared page tracking is already provided through other
600347e76d03SMichael Roth  * means, these 2 use-cases should be treated as being mutually-exclusive.
600447e76d03SMichael Roth  */
kvm_handle_hc_map_gpa_range(struct kvm_run * run)600547e76d03SMichael Roth static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
600647e76d03SMichael Roth {
600747e76d03SMichael Roth     uint64_t gpa, size, attributes;
600847e76d03SMichael Roth 
600947e76d03SMichael Roth     if (!machine_require_guest_memfd(current_machine))
601047e76d03SMichael Roth         return -EINVAL;
601147e76d03SMichael Roth 
601247e76d03SMichael Roth     gpa = run->hypercall.args[0];
601347e76d03SMichael Roth     size = run->hypercall.args[1] * TARGET_PAGE_SIZE;
601447e76d03SMichael Roth     attributes = run->hypercall.args[2];
601547e76d03SMichael Roth 
601647e76d03SMichael Roth     trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
601747e76d03SMichael Roth 
601847e76d03SMichael Roth     return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
601947e76d03SMichael Roth }
602047e76d03SMichael Roth 
kvm_handle_hypercall(struct kvm_run * run)602147e76d03SMichael Roth static int kvm_handle_hypercall(struct kvm_run *run)
602247e76d03SMichael Roth {
602347e76d03SMichael Roth     if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
602447e76d03SMichael Roth         return kvm_handle_hc_map_gpa_range(run);
602547e76d03SMichael Roth 
602647e76d03SMichael Roth     return -EINVAL;
602747e76d03SMichael Roth }
602847e76d03SMichael Roth 
6029a9dc68d9SClaudio Fontana #define VMX_INVALID_GUEST_STATE 0x80000021
6030a9dc68d9SClaudio Fontana 
kvm_arch_handle_exit(CPUState * cs,struct kvm_run * run)6031a9dc68d9SClaudio Fontana int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
6032a9dc68d9SClaudio Fontana {
6033a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
6034a9dc68d9SClaudio Fontana     uint64_t code;
6035a9dc68d9SClaudio Fontana     int ret;
6036e2e69f6bSChenyi Qiang     bool ctx_invalid;
6037e2e69f6bSChenyi Qiang     KVMState *state;
6038a9dc68d9SClaudio Fontana 
6039a9dc68d9SClaudio Fontana     switch (run->exit_reason) {
6040a9dc68d9SClaudio Fontana     case KVM_EXIT_HLT:
6041a9dc68d9SClaudio Fontana         DPRINTF("handle_hlt\n");
6042195801d7SStefan Hajnoczi         bql_lock();
6043a9dc68d9SClaudio Fontana         ret = kvm_handle_halt(cpu);
6044195801d7SStefan Hajnoczi         bql_unlock();
6045a9dc68d9SClaudio Fontana         break;
6046a9dc68d9SClaudio Fontana     case KVM_EXIT_SET_TPR:
6047a9dc68d9SClaudio Fontana         ret = 0;
6048a9dc68d9SClaudio Fontana         break;
6049a9dc68d9SClaudio Fontana     case KVM_EXIT_TPR_ACCESS:
6050195801d7SStefan Hajnoczi         bql_lock();
6051a9dc68d9SClaudio Fontana         ret = kvm_handle_tpr_access(cpu);
6052195801d7SStefan Hajnoczi         bql_unlock();
6053a9dc68d9SClaudio Fontana         break;
6054a9dc68d9SClaudio Fontana     case KVM_EXIT_FAIL_ENTRY:
6055a9dc68d9SClaudio Fontana         code = run->fail_entry.hardware_entry_failure_reason;
6056a9dc68d9SClaudio Fontana         fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n",
6057a9dc68d9SClaudio Fontana                 code);
6058a9dc68d9SClaudio Fontana         if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) {
6059a9dc68d9SClaudio Fontana             fprintf(stderr,
6060a9dc68d9SClaudio Fontana                     "\nIf you're running a guest on an Intel machine without "
6061a9dc68d9SClaudio Fontana                         "unrestricted mode\n"
6062a9dc68d9SClaudio Fontana                     "support, the failure can be most likely due to the guest "
6063a9dc68d9SClaudio Fontana                         "entering an invalid\n"
6064a9dc68d9SClaudio Fontana                     "state for Intel VT. For example, the guest maybe running "
6065a9dc68d9SClaudio Fontana                         "in big real mode\n"
6066a9dc68d9SClaudio Fontana                     "which is not supported on less recent Intel processors."
6067a9dc68d9SClaudio Fontana                         "\n\n");
6068a9dc68d9SClaudio Fontana         }
6069a9dc68d9SClaudio Fontana         ret = -1;
6070a9dc68d9SClaudio Fontana         break;
6071a9dc68d9SClaudio Fontana     case KVM_EXIT_EXCEPTION:
6072a9dc68d9SClaudio Fontana         fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
6073a9dc68d9SClaudio Fontana                 run->ex.exception, run->ex.error_code);
6074a9dc68d9SClaudio Fontana         ret = -1;
6075a9dc68d9SClaudio Fontana         break;
6076a9dc68d9SClaudio Fontana     case KVM_EXIT_DEBUG:
6077a9dc68d9SClaudio Fontana         DPRINTF("kvm_exit_debug\n");
6078195801d7SStefan Hajnoczi         bql_lock();
6079a9dc68d9SClaudio Fontana         ret = kvm_handle_debug(cpu, &run->debug.arch);
6080195801d7SStefan Hajnoczi         bql_unlock();
6081a9dc68d9SClaudio Fontana         break;
6082a9dc68d9SClaudio Fontana     case KVM_EXIT_HYPERV:
6083a9dc68d9SClaudio Fontana         ret = kvm_hv_handle_exit(cpu, &run->hyperv);
6084a9dc68d9SClaudio Fontana         break;
6085a9dc68d9SClaudio Fontana     case KVM_EXIT_IOAPIC_EOI:
6086a9dc68d9SClaudio Fontana         ioapic_eoi_broadcast(run->eoi.vector);
6087a9dc68d9SClaudio Fontana         ret = 0;
6088a9dc68d9SClaudio Fontana         break;
6089035d1ef2SChenyi Qiang     case KVM_EXIT_X86_BUS_LOCK:
6090035d1ef2SChenyi Qiang         /* already handled in kvm_arch_post_run */
6091035d1ef2SChenyi Qiang         ret = 0;
6092035d1ef2SChenyi Qiang         break;
6093e2e69f6bSChenyi Qiang     case KVM_EXIT_NOTIFY:
6094e2e69f6bSChenyi Qiang         ctx_invalid = !!(run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID);
6095e2e69f6bSChenyi Qiang         state = KVM_STATE(current_accel());
6096e2e69f6bSChenyi Qiang         if (ctx_invalid ||
6097e2e69f6bSChenyi Qiang             state->notify_vmexit == NOTIFY_VMEXIT_OPTION_INTERNAL_ERROR) {
6098a93b4061SRichard Henderson             warn_report("KVM internal error: Encountered a notify exit "
6099a93b4061SRichard Henderson                         "with invalid context in guest.");
6100e2e69f6bSChenyi Qiang             ret = -1;
6101e2e69f6bSChenyi Qiang         } else {
6102a93b4061SRichard Henderson             warn_report_once("KVM: Encountered a notify exit with valid "
6103a93b4061SRichard Henderson                              "context in guest. "
6104a93b4061SRichard Henderson                              "The guest could be misbehaving.");
6105e2e69f6bSChenyi Qiang             ret = 0;
6106e2e69f6bSChenyi Qiang         }
6107e2e69f6bSChenyi Qiang         break;
6108860054d8SAlexander Graf     case KVM_EXIT_X86_RDMSR:
6109860054d8SAlexander Graf         /* We only enable MSR filtering, any other exit is bogus */
6110860054d8SAlexander Graf         assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER);
6111860054d8SAlexander Graf         ret = kvm_handle_rdmsr(cpu, run);
6112860054d8SAlexander Graf         break;
6113860054d8SAlexander Graf     case KVM_EXIT_X86_WRMSR:
6114860054d8SAlexander Graf         /* We only enable MSR filtering, any other exit is bogus */
6115860054d8SAlexander Graf         assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER);
6116860054d8SAlexander Graf         ret = kvm_handle_wrmsr(cpu, run);
6117860054d8SAlexander Graf         break;
611855a3f666SJoao Martins #ifdef CONFIG_XEN_EMU
611955a3f666SJoao Martins     case KVM_EXIT_XEN:
612055a3f666SJoao Martins         ret = kvm_xen_handle_exit(cpu, &run->xen);
612155a3f666SJoao Martins         break;
612255a3f666SJoao Martins #endif
612347e76d03SMichael Roth     case KVM_EXIT_HYPERCALL:
612447e76d03SMichael Roth         ret = kvm_handle_hypercall(run);
612547e76d03SMichael Roth         break;
6126a9dc68d9SClaudio Fontana     default:
6127a9dc68d9SClaudio Fontana         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
6128a9dc68d9SClaudio Fontana         ret = -1;
6129a9dc68d9SClaudio Fontana         break;
6130a9dc68d9SClaudio Fontana     }
6131a9dc68d9SClaudio Fontana 
6132a9dc68d9SClaudio Fontana     return ret;
6133a9dc68d9SClaudio Fontana }
6134a9dc68d9SClaudio Fontana 
kvm_arch_stop_on_emulation_error(CPUState * cs)6135a9dc68d9SClaudio Fontana bool kvm_arch_stop_on_emulation_error(CPUState *cs)
6136a9dc68d9SClaudio Fontana {
6137a9dc68d9SClaudio Fontana     X86CPU *cpu = X86_CPU(cs);
6138a9dc68d9SClaudio Fontana     CPUX86State *env = &cpu->env;
6139a9dc68d9SClaudio Fontana 
6140a9dc68d9SClaudio Fontana     kvm_cpu_synchronize_state(cs);
6141a9dc68d9SClaudio Fontana     return !(env->cr[0] & CR0_PE_MASK) ||
6142a9dc68d9SClaudio Fontana            ((env->segs[R_CS].selector  & 3) != 3);
6143a9dc68d9SClaudio Fontana }
6144a9dc68d9SClaudio Fontana 
kvm_arch_init_irq_routing(KVMState * s)6145a9dc68d9SClaudio Fontana void kvm_arch_init_irq_routing(KVMState *s)
6146a9dc68d9SClaudio Fontana {
6147a9dc68d9SClaudio Fontana     /* We know at this point that we're using the in-kernel
6148a9dc68d9SClaudio Fontana      * irqchip, so we can use irqfds, and on x86 we know
6149a9dc68d9SClaudio Fontana      * we can use msi via irqfd and GSI routing.
6150a9dc68d9SClaudio Fontana      */
6151a9dc68d9SClaudio Fontana     kvm_msi_via_irqfd_allowed = true;
6152a9dc68d9SClaudio Fontana     kvm_gsi_routing_allowed = true;
6153a9dc68d9SClaudio Fontana 
6154a9dc68d9SClaudio Fontana     if (kvm_irqchip_is_split()) {
6155def4c557SLongpeng(Mike)         KVMRouteChange c = kvm_irqchip_begin_route_changes(s);
6156a9dc68d9SClaudio Fontana         int i;
6157a9dc68d9SClaudio Fontana 
6158a9dc68d9SClaudio Fontana         /* If the ioapic is in QEMU and the lapics are in KVM, reserve
6159a9dc68d9SClaudio Fontana            MSI routes for signaling interrupts to the local apics. */
6160a9dc68d9SClaudio Fontana         for (i = 0; i < IOAPIC_NUM_PINS; i++) {
6161def4c557SLongpeng(Mike)             if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) {
6162a9dc68d9SClaudio Fontana                 error_report("Could not enable split IRQ mode.");
6163a9dc68d9SClaudio Fontana                 exit(1);
6164a9dc68d9SClaudio Fontana             }
6165a9dc68d9SClaudio Fontana         }
6166def4c557SLongpeng(Mike)         kvm_irqchip_commit_route_changes(&c);
6167a9dc68d9SClaudio Fontana     }
6168a9dc68d9SClaudio Fontana }
6169a9dc68d9SClaudio Fontana 
kvm_arch_irqchip_create(KVMState * s)6170a9dc68d9SClaudio Fontana int kvm_arch_irqchip_create(KVMState *s)
6171a9dc68d9SClaudio Fontana {
6172a9dc68d9SClaudio Fontana     int ret;
6173a9dc68d9SClaudio Fontana     if (kvm_kernel_irqchip_split()) {
6174a9dc68d9SClaudio Fontana         ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24);
6175a9dc68d9SClaudio Fontana         if (ret) {
6176a9dc68d9SClaudio Fontana             error_report("Could not enable split irqchip mode: %s",
6177a9dc68d9SClaudio Fontana                          strerror(-ret));
6178a9dc68d9SClaudio Fontana             exit(1);
6179a9dc68d9SClaudio Fontana         } else {
6180a9dc68d9SClaudio Fontana             DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n");
6181a9dc68d9SClaudio Fontana             kvm_split_irqchip = true;
6182a9dc68d9SClaudio Fontana             return 1;
6183a9dc68d9SClaudio Fontana         }
6184a9dc68d9SClaudio Fontana     } else {
6185a9dc68d9SClaudio Fontana         return 0;
6186a9dc68d9SClaudio Fontana     }
6187a9dc68d9SClaudio Fontana }
6188a9dc68d9SClaudio Fontana 
kvm_swizzle_msi_ext_dest_id(uint64_t address)6189a9dc68d9SClaudio Fontana uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address)
6190a9dc68d9SClaudio Fontana {
6191a9dc68d9SClaudio Fontana     CPUX86State *env;
6192a9dc68d9SClaudio Fontana     uint64_t ext_id;
6193a9dc68d9SClaudio Fontana 
6194a9dc68d9SClaudio Fontana     if (!first_cpu) {
6195a9dc68d9SClaudio Fontana         return address;
6196a9dc68d9SClaudio Fontana     }
6197a9dc68d9SClaudio Fontana     env = &X86_CPU(first_cpu)->env;
6198a9dc68d9SClaudio Fontana     if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) {
6199a9dc68d9SClaudio Fontana         return address;
6200a9dc68d9SClaudio Fontana     }
6201a9dc68d9SClaudio Fontana 
6202a9dc68d9SClaudio Fontana     /*
6203a9dc68d9SClaudio Fontana      * If the remappable format bit is set, or the upper bits are
6204a9dc68d9SClaudio Fontana      * already set in address_hi, or the low extended bits aren't
6205a9dc68d9SClaudio Fontana      * there anyway, do nothing.
6206a9dc68d9SClaudio Fontana      */
6207a9dc68d9SClaudio Fontana     ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT);
6208a9dc68d9SClaudio Fontana     if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) {
6209a9dc68d9SClaudio Fontana         return address;
6210a9dc68d9SClaudio Fontana     }
6211a9dc68d9SClaudio Fontana 
6212a9dc68d9SClaudio Fontana     address &= ~ext_id;
6213a9dc68d9SClaudio Fontana     address |= ext_id << 35;
6214a9dc68d9SClaudio Fontana     return address;
6215a9dc68d9SClaudio Fontana }
6216a9dc68d9SClaudio Fontana 
kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry * route,uint64_t address,uint32_t data,PCIDevice * dev)6217a9dc68d9SClaudio Fontana int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
6218a9dc68d9SClaudio Fontana                              uint64_t address, uint32_t data, PCIDevice *dev)
6219a9dc68d9SClaudio Fontana {
6220a9dc68d9SClaudio Fontana     X86IOMMUState *iommu = x86_iommu_get_default();
6221a9dc68d9SClaudio Fontana 
6222a9dc68d9SClaudio Fontana     if (iommu) {
6223a9dc68d9SClaudio Fontana         X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu);
6224a9dc68d9SClaudio Fontana 
6225a9dc68d9SClaudio Fontana         if (class->int_remap) {
6226a9dc68d9SClaudio Fontana             int ret;
6227a9dc68d9SClaudio Fontana             MSIMessage src, dst;
6228a9dc68d9SClaudio Fontana 
6229a9dc68d9SClaudio Fontana             src.address = route->u.msi.address_hi;
6230a9dc68d9SClaudio Fontana             src.address <<= VTD_MSI_ADDR_HI_SHIFT;
6231a9dc68d9SClaudio Fontana             src.address |= route->u.msi.address_lo;
6232a9dc68d9SClaudio Fontana             src.data = route->u.msi.data;
6233a9dc68d9SClaudio Fontana 
6234a9dc68d9SClaudio Fontana             ret = class->int_remap(iommu, &src, &dst, dev ?     \
6235a9dc68d9SClaudio Fontana                                    pci_requester_id(dev) :      \
6236a9dc68d9SClaudio Fontana                                    X86_IOMMU_SID_INVALID);
6237a9dc68d9SClaudio Fontana             if (ret) {
6238a9dc68d9SClaudio Fontana                 trace_kvm_x86_fixup_msi_error(route->gsi);
6239a9dc68d9SClaudio Fontana                 return 1;
6240a9dc68d9SClaudio Fontana             }
6241a9dc68d9SClaudio Fontana 
6242a9dc68d9SClaudio Fontana             /*
6243bad5cfcdSMichael Tokarev              * Handled untranslated compatibility format interrupt with
6244a9dc68d9SClaudio Fontana              * extended destination ID in the low bits 11-5. */
6245a9dc68d9SClaudio Fontana             dst.address = kvm_swizzle_msi_ext_dest_id(dst.address);
6246a9dc68d9SClaudio Fontana 
6247a9dc68d9SClaudio Fontana             route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT;
6248a9dc68d9SClaudio Fontana             route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK;
6249a9dc68d9SClaudio Fontana             route->u.msi.data = dst.data;
6250a9dc68d9SClaudio Fontana             return 0;
6251a9dc68d9SClaudio Fontana         }
6252a9dc68d9SClaudio Fontana     }
6253a9dc68d9SClaudio Fontana 
62546096cf78SDavid Woodhouse #ifdef CONFIG_XEN_EMU
62556096cf78SDavid Woodhouse     if (xen_mode == XEN_EMULATE) {
62566096cf78SDavid Woodhouse         int handled = xen_evtchn_translate_pirq_msi(route, address, data);
62576096cf78SDavid Woodhouse 
62586096cf78SDavid Woodhouse         /*
62596096cf78SDavid Woodhouse          * If it was a PIRQ and successfully routed (handled == 0) or it was
62606096cf78SDavid Woodhouse          * an error (handled < 0), return. If it wasn't a PIRQ, keep going.
62616096cf78SDavid Woodhouse          */
62626096cf78SDavid Woodhouse         if (handled <= 0) {
62636096cf78SDavid Woodhouse             return handled;
62646096cf78SDavid Woodhouse         }
62656096cf78SDavid Woodhouse     }
62666096cf78SDavid Woodhouse #endif
62676096cf78SDavid Woodhouse 
6268a9dc68d9SClaudio Fontana     address = kvm_swizzle_msi_ext_dest_id(address);
6269a9dc68d9SClaudio Fontana     route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
6270a9dc68d9SClaudio Fontana     route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
6271a9dc68d9SClaudio Fontana     return 0;
6272a9dc68d9SClaudio Fontana }
6273a9dc68d9SClaudio Fontana 
6274a9dc68d9SClaudio Fontana typedef struct MSIRouteEntry MSIRouteEntry;
6275a9dc68d9SClaudio Fontana 
6276a9dc68d9SClaudio Fontana struct MSIRouteEntry {
6277a9dc68d9SClaudio Fontana     PCIDevice *dev;             /* Device pointer */
6278a9dc68d9SClaudio Fontana     int vector;                 /* MSI/MSIX vector index */
6279a9dc68d9SClaudio Fontana     int virq;                   /* Virtual IRQ index */
6280a9dc68d9SClaudio Fontana     QLIST_ENTRY(MSIRouteEntry) list;
6281a9dc68d9SClaudio Fontana };
6282a9dc68d9SClaudio Fontana 
6283a9dc68d9SClaudio Fontana /* List of used GSI routes */
6284a9dc68d9SClaudio Fontana static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
6285a9dc68d9SClaudio Fontana     QLIST_HEAD_INITIALIZER(msi_route_list);
6286a9dc68d9SClaudio Fontana 
kvm_update_msi_routes_all(void * private,bool global,uint32_t index,uint32_t mask)62876096cf78SDavid Woodhouse void kvm_update_msi_routes_all(void *private, bool global,
6288a9dc68d9SClaudio Fontana                                uint32_t index, uint32_t mask)
6289a9dc68d9SClaudio Fontana {
6290a9dc68d9SClaudio Fontana     int cnt = 0, vector;
6291a9dc68d9SClaudio Fontana     MSIRouteEntry *entry;
6292a9dc68d9SClaudio Fontana     MSIMessage msg;
6293a9dc68d9SClaudio Fontana     PCIDevice *dev;
6294a9dc68d9SClaudio Fontana 
6295a9dc68d9SClaudio Fontana     /* TODO: explicit route update */
6296a9dc68d9SClaudio Fontana     QLIST_FOREACH(entry, &msi_route_list, list) {
6297a9dc68d9SClaudio Fontana         cnt++;
6298a9dc68d9SClaudio Fontana         vector = entry->vector;
6299a9dc68d9SClaudio Fontana         dev = entry->dev;
6300a9dc68d9SClaudio Fontana         if (msix_enabled(dev) && !msix_is_masked(dev, vector)) {
6301a9dc68d9SClaudio Fontana             msg = msix_get_message(dev, vector);
6302a9dc68d9SClaudio Fontana         } else if (msi_enabled(dev) && !msi_is_masked(dev, vector)) {
6303a9dc68d9SClaudio Fontana             msg = msi_get_message(dev, vector);
6304a9dc68d9SClaudio Fontana         } else {
6305a9dc68d9SClaudio Fontana             /*
6306a9dc68d9SClaudio Fontana              * Either MSI/MSIX is disabled for the device, or the
6307a9dc68d9SClaudio Fontana              * specific message was masked out.  Skip this one.
6308a9dc68d9SClaudio Fontana              */
6309a9dc68d9SClaudio Fontana             continue;
6310a9dc68d9SClaudio Fontana         }
6311a9dc68d9SClaudio Fontana         kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev);
6312a9dc68d9SClaudio Fontana     }
6313a9dc68d9SClaudio Fontana     kvm_irqchip_commit_routes(kvm_state);
6314a9dc68d9SClaudio Fontana     trace_kvm_x86_update_msi_routes(cnt);
6315a9dc68d9SClaudio Fontana }
6316a9dc68d9SClaudio Fontana 
kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry * route,int vector,PCIDevice * dev)6317a9dc68d9SClaudio Fontana int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
6318a9dc68d9SClaudio Fontana                                 int vector, PCIDevice *dev)
6319a9dc68d9SClaudio Fontana {
6320a9dc68d9SClaudio Fontana     static bool notify_list_inited = false;
6321a9dc68d9SClaudio Fontana     MSIRouteEntry *entry;
6322a9dc68d9SClaudio Fontana 
6323a9dc68d9SClaudio Fontana     if (!dev) {
6324a9dc68d9SClaudio Fontana         /* These are (possibly) IOAPIC routes only used for split
6325a9dc68d9SClaudio Fontana          * kernel irqchip mode, while what we are housekeeping are
6326a9dc68d9SClaudio Fontana          * PCI devices only. */
6327a9dc68d9SClaudio Fontana         return 0;
6328a9dc68d9SClaudio Fontana     }
6329a9dc68d9SClaudio Fontana 
6330a9dc68d9SClaudio Fontana     entry = g_new0(MSIRouteEntry, 1);
6331a9dc68d9SClaudio Fontana     entry->dev = dev;
6332a9dc68d9SClaudio Fontana     entry->vector = vector;
6333a9dc68d9SClaudio Fontana     entry->virq = route->gsi;
6334a9dc68d9SClaudio Fontana     QLIST_INSERT_HEAD(&msi_route_list, entry, list);
6335a9dc68d9SClaudio Fontana 
6336a9dc68d9SClaudio Fontana     trace_kvm_x86_add_msi_route(route->gsi);
6337a9dc68d9SClaudio Fontana 
6338a9dc68d9SClaudio Fontana     if (!notify_list_inited) {
6339a9dc68d9SClaudio Fontana         /* For the first time we do add route, add ourselves into
6340a9dc68d9SClaudio Fontana          * IOMMU's IEC notify list if needed. */
6341a9dc68d9SClaudio Fontana         X86IOMMUState *iommu = x86_iommu_get_default();
6342a9dc68d9SClaudio Fontana         if (iommu) {
6343a9dc68d9SClaudio Fontana             x86_iommu_iec_register_notifier(iommu,
6344a9dc68d9SClaudio Fontana                                             kvm_update_msi_routes_all,
6345a9dc68d9SClaudio Fontana                                             NULL);
6346a9dc68d9SClaudio Fontana         }
6347a9dc68d9SClaudio Fontana         notify_list_inited = true;
6348a9dc68d9SClaudio Fontana     }
6349a9dc68d9SClaudio Fontana     return 0;
6350a9dc68d9SClaudio Fontana }
6351a9dc68d9SClaudio Fontana 
kvm_arch_release_virq_post(int virq)6352a9dc68d9SClaudio Fontana int kvm_arch_release_virq_post(int virq)
6353a9dc68d9SClaudio Fontana {
6354a9dc68d9SClaudio Fontana     MSIRouteEntry *entry, *next;
6355a9dc68d9SClaudio Fontana     QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) {
6356a9dc68d9SClaudio Fontana         if (entry->virq == virq) {
6357a9dc68d9SClaudio Fontana             trace_kvm_x86_remove_msi_route(virq);
6358a9dc68d9SClaudio Fontana             QLIST_REMOVE(entry, list);
6359a9dc68d9SClaudio Fontana             g_free(entry);
6360a9dc68d9SClaudio Fontana             break;
6361a9dc68d9SClaudio Fontana         }
6362a9dc68d9SClaudio Fontana     }
6363a9dc68d9SClaudio Fontana     return 0;
6364a9dc68d9SClaudio Fontana }
6365a9dc68d9SClaudio Fontana 
kvm_arch_msi_data_to_gsi(uint32_t data)6366a9dc68d9SClaudio Fontana int kvm_arch_msi_data_to_gsi(uint32_t data)
6367a9dc68d9SClaudio Fontana {
6368a9dc68d9SClaudio Fontana     abort();
6369a9dc68d9SClaudio Fontana }
6370a9dc68d9SClaudio Fontana 
kvm_has_waitpkg(void)6371a9dc68d9SClaudio Fontana bool kvm_has_waitpkg(void)
6372a9dc68d9SClaudio Fontana {
6373a9dc68d9SClaudio Fontana     return has_msr_umwait;
6374a9dc68d9SClaudio Fontana }
637592a5199bSTom Lendacky 
637619db68caSYang Zhong #define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
637719db68caSYang Zhong 
kvm_request_xsave_components(X86CPU * cpu,uint64_t mask)637819db68caSYang Zhong void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
637919db68caSYang Zhong {
638019db68caSYang Zhong     KVMState *s = kvm_state;
638119db68caSYang Zhong     uint64_t supported;
638219db68caSYang Zhong 
638319db68caSYang Zhong     mask &= XSTATE_DYNAMIC_MASK;
638419db68caSYang Zhong     if (!mask) {
638519db68caSYang Zhong         return;
638619db68caSYang Zhong     }
638719db68caSYang Zhong     /*
638819db68caSYang Zhong      * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
638919db68caSYang Zhong      * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
639019db68caSYang Zhong      * about them already because they are not supported features.
639119db68caSYang Zhong      */
639219db68caSYang Zhong     supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
639319db68caSYang Zhong     supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
639419db68caSYang Zhong     mask &= supported;
639519db68caSYang Zhong 
639619db68caSYang Zhong     while (mask) {
639719db68caSYang Zhong         int bit = ctz64(mask);
639819db68caSYang Zhong         int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
639919db68caSYang Zhong         if (rc) {
640019db68caSYang Zhong             /*
640119db68caSYang Zhong              * Older kernel version (<5.17) do not support
640219db68caSYang Zhong              * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
640319db68caSYang Zhong              * any dynamic feature from kvm_arch_get_supported_cpuid.
640419db68caSYang Zhong              */
640519db68caSYang Zhong             warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
640619db68caSYang Zhong                         "for feature bit %d", bit);
640719db68caSYang Zhong         }
640819db68caSYang Zhong         mask &= ~BIT_ULL(bit);
640919db68caSYang Zhong     }
641019db68caSYang Zhong }
64113dba0a33SPaolo Bonzini 
kvm_arch_get_notify_vmexit(Object * obj,Error ** errp)6412e2e69f6bSChenyi Qiang static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp)
6413e2e69f6bSChenyi Qiang {
6414e2e69f6bSChenyi Qiang     KVMState *s = KVM_STATE(obj);
6415e2e69f6bSChenyi Qiang     return s->notify_vmexit;
6416e2e69f6bSChenyi Qiang }
6417e2e69f6bSChenyi Qiang 
kvm_arch_set_notify_vmexit(Object * obj,int value,Error ** errp)6418e2e69f6bSChenyi Qiang static void kvm_arch_set_notify_vmexit(Object *obj, int value, Error **errp)
6419e2e69f6bSChenyi Qiang {
6420e2e69f6bSChenyi Qiang     KVMState *s = KVM_STATE(obj);
6421e2e69f6bSChenyi Qiang 
6422e2e69f6bSChenyi Qiang     if (s->fd != -1) {
6423e2e69f6bSChenyi Qiang         error_setg(errp, "Cannot set properties after the accelerator has been initialized");
6424e2e69f6bSChenyi Qiang         return;
6425e2e69f6bSChenyi Qiang     }
6426e2e69f6bSChenyi Qiang 
6427e2e69f6bSChenyi Qiang     s->notify_vmexit = value;
6428e2e69f6bSChenyi Qiang }
6429e2e69f6bSChenyi Qiang 
kvm_arch_get_notify_window(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)6430e2e69f6bSChenyi Qiang static void kvm_arch_get_notify_window(Object *obj, Visitor *v,
6431e2e69f6bSChenyi Qiang                                        const char *name, void *opaque,
6432e2e69f6bSChenyi Qiang                                        Error **errp)
6433e2e69f6bSChenyi Qiang {
6434e2e69f6bSChenyi Qiang     KVMState *s = KVM_STATE(obj);
6435e2e69f6bSChenyi Qiang     uint32_t value = s->notify_window;
6436e2e69f6bSChenyi Qiang 
6437e2e69f6bSChenyi Qiang     visit_type_uint32(v, name, &value, errp);
6438e2e69f6bSChenyi Qiang }
6439e2e69f6bSChenyi Qiang 
kvm_arch_set_notify_window(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)6440e2e69f6bSChenyi Qiang static void kvm_arch_set_notify_window(Object *obj, Visitor *v,
6441e2e69f6bSChenyi Qiang                                        const char *name, void *opaque,
6442e2e69f6bSChenyi Qiang                                        Error **errp)
6443e2e69f6bSChenyi Qiang {
6444e2e69f6bSChenyi Qiang     KVMState *s = KVM_STATE(obj);
6445e2e69f6bSChenyi Qiang     uint32_t value;
6446e2e69f6bSChenyi Qiang 
6447e2e69f6bSChenyi Qiang     if (s->fd != -1) {
6448e2e69f6bSChenyi Qiang         error_setg(errp, "Cannot set properties after the accelerator has been initialized");
6449e2e69f6bSChenyi Qiang         return;
6450e2e69f6bSChenyi Qiang     }
6451e2e69f6bSChenyi Qiang 
6452d1c81c34SMarkus Armbruster     if (!visit_type_uint32(v, name, &value, errp)) {
6453e2e69f6bSChenyi Qiang         return;
6454e2e69f6bSChenyi Qiang     }
6455e2e69f6bSChenyi Qiang 
6456e2e69f6bSChenyi Qiang     s->notify_window = value;
6457e2e69f6bSChenyi Qiang }
6458e2e69f6bSChenyi Qiang 
kvm_arch_get_xen_version(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)645961491cf4SDavid Woodhouse static void kvm_arch_get_xen_version(Object *obj, Visitor *v,
646061491cf4SDavid Woodhouse                                      const char *name, void *opaque,
646161491cf4SDavid Woodhouse                                      Error **errp)
646261491cf4SDavid Woodhouse {
646361491cf4SDavid Woodhouse     KVMState *s = KVM_STATE(obj);
646461491cf4SDavid Woodhouse     uint32_t value = s->xen_version;
646561491cf4SDavid Woodhouse 
646661491cf4SDavid Woodhouse     visit_type_uint32(v, name, &value, errp);
646761491cf4SDavid Woodhouse }
646861491cf4SDavid Woodhouse 
kvm_arch_set_xen_version(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)646961491cf4SDavid Woodhouse static void kvm_arch_set_xen_version(Object *obj, Visitor *v,
647061491cf4SDavid Woodhouse                                      const char *name, void *opaque,
647161491cf4SDavid Woodhouse                                      Error **errp)
647261491cf4SDavid Woodhouse {
647361491cf4SDavid Woodhouse     KVMState *s = KVM_STATE(obj);
647461491cf4SDavid Woodhouse     Error *error = NULL;
647561491cf4SDavid Woodhouse     uint32_t value;
647661491cf4SDavid Woodhouse 
647761491cf4SDavid Woodhouse     visit_type_uint32(v, name, &value, &error);
647861491cf4SDavid Woodhouse     if (error) {
647961491cf4SDavid Woodhouse         error_propagate(errp, error);
648061491cf4SDavid Woodhouse         return;
648161491cf4SDavid Woodhouse     }
648261491cf4SDavid Woodhouse 
648361491cf4SDavid Woodhouse     s->xen_version = value;
648461491cf4SDavid Woodhouse     if (value && xen_mode == XEN_DISABLED) {
648561491cf4SDavid Woodhouse         xen_mode = XEN_EMULATE;
648661491cf4SDavid Woodhouse     }
648761491cf4SDavid Woodhouse }
648861491cf4SDavid Woodhouse 
kvm_arch_get_xen_gnttab_max_frames(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)64896f43f2eeSDavid Woodhouse static void kvm_arch_get_xen_gnttab_max_frames(Object *obj, Visitor *v,
64906f43f2eeSDavid Woodhouse                                                const char *name, void *opaque,
64916f43f2eeSDavid Woodhouse                                                Error **errp)
64926f43f2eeSDavid Woodhouse {
64936f43f2eeSDavid Woodhouse     KVMState *s = KVM_STATE(obj);
64946f43f2eeSDavid Woodhouse     uint16_t value = s->xen_gnttab_max_frames;
64956f43f2eeSDavid Woodhouse 
64966f43f2eeSDavid Woodhouse     visit_type_uint16(v, name, &value, errp);
64976f43f2eeSDavid Woodhouse }
64986f43f2eeSDavid Woodhouse 
kvm_arch_set_xen_gnttab_max_frames(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)64996f43f2eeSDavid Woodhouse static void kvm_arch_set_xen_gnttab_max_frames(Object *obj, Visitor *v,
65006f43f2eeSDavid Woodhouse                                                const char *name, void *opaque,
65016f43f2eeSDavid Woodhouse                                                Error **errp)
65026f43f2eeSDavid Woodhouse {
65036f43f2eeSDavid Woodhouse     KVMState *s = KVM_STATE(obj);
65046f43f2eeSDavid Woodhouse     Error *error = NULL;
65056f43f2eeSDavid Woodhouse     uint16_t value;
65066f43f2eeSDavid Woodhouse 
65076f43f2eeSDavid Woodhouse     visit_type_uint16(v, name, &value, &error);
65086f43f2eeSDavid Woodhouse     if (error) {
65096f43f2eeSDavid Woodhouse         error_propagate(errp, error);
65106f43f2eeSDavid Woodhouse         return;
65116f43f2eeSDavid Woodhouse     }
65126f43f2eeSDavid Woodhouse 
65136f43f2eeSDavid Woodhouse     s->xen_gnttab_max_frames = value;
65146f43f2eeSDavid Woodhouse }
65156f43f2eeSDavid Woodhouse 
kvm_arch_get_xen_evtchn_max_pirq(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)6516e16aff4cSDavid Woodhouse static void kvm_arch_get_xen_evtchn_max_pirq(Object *obj, Visitor *v,
6517e16aff4cSDavid Woodhouse                                              const char *name, void *opaque,
6518e16aff4cSDavid Woodhouse                                              Error **errp)
6519e16aff4cSDavid Woodhouse {
6520e16aff4cSDavid Woodhouse     KVMState *s = KVM_STATE(obj);
6521e16aff4cSDavid Woodhouse     uint16_t value = s->xen_evtchn_max_pirq;
6522e16aff4cSDavid Woodhouse 
6523e16aff4cSDavid Woodhouse     visit_type_uint16(v, name, &value, errp);
6524e16aff4cSDavid Woodhouse }
6525e16aff4cSDavid Woodhouse 
kvm_arch_set_xen_evtchn_max_pirq(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)6526e16aff4cSDavid Woodhouse static void kvm_arch_set_xen_evtchn_max_pirq(Object *obj, Visitor *v,
6527e16aff4cSDavid Woodhouse                                              const char *name, void *opaque,
6528e16aff4cSDavid Woodhouse                                              Error **errp)
6529e16aff4cSDavid Woodhouse {
6530e16aff4cSDavid Woodhouse     KVMState *s = KVM_STATE(obj);
6531e16aff4cSDavid Woodhouse     Error *error = NULL;
6532e16aff4cSDavid Woodhouse     uint16_t value;
6533e16aff4cSDavid Woodhouse 
6534e16aff4cSDavid Woodhouse     visit_type_uint16(v, name, &value, &error);
6535e16aff4cSDavid Woodhouse     if (error) {
6536e16aff4cSDavid Woodhouse         error_propagate(errp, error);
6537e16aff4cSDavid Woodhouse         return;
6538e16aff4cSDavid Woodhouse     }
6539e16aff4cSDavid Woodhouse 
6540e16aff4cSDavid Woodhouse     s->xen_evtchn_max_pirq = value;
6541e16aff4cSDavid Woodhouse }
6542e16aff4cSDavid Woodhouse 
kvm_arch_accel_class_init(ObjectClass * oc)65433dba0a33SPaolo Bonzini void kvm_arch_accel_class_init(ObjectClass *oc)
65443dba0a33SPaolo Bonzini {
6545e2e69f6bSChenyi Qiang     object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption",
6546e2e69f6bSChenyi Qiang                                    &NotifyVmexitOption_lookup,
6547e2e69f6bSChenyi Qiang                                    kvm_arch_get_notify_vmexit,
6548e2e69f6bSChenyi Qiang                                    kvm_arch_set_notify_vmexit);
6549e2e69f6bSChenyi Qiang     object_class_property_set_description(oc, "notify-vmexit",
6550e2e69f6bSChenyi Qiang                                           "Enable notify VM exit");
6551e2e69f6bSChenyi Qiang 
6552e2e69f6bSChenyi Qiang     object_class_property_add(oc, "notify-window", "uint32",
6553e2e69f6bSChenyi Qiang                               kvm_arch_get_notify_window,
6554e2e69f6bSChenyi Qiang                               kvm_arch_set_notify_window,
6555e2e69f6bSChenyi Qiang                               NULL, NULL);
6556e2e69f6bSChenyi Qiang     object_class_property_set_description(oc, "notify-window",
6557e2e69f6bSChenyi Qiang                                           "Clock cycles without an event window "
6558e2e69f6bSChenyi Qiang                                           "after which a notification VM exit occurs");
655961491cf4SDavid Woodhouse 
656061491cf4SDavid Woodhouse     object_class_property_add(oc, "xen-version", "uint32",
656161491cf4SDavid Woodhouse                               kvm_arch_get_xen_version,
656261491cf4SDavid Woodhouse                               kvm_arch_set_xen_version,
656361491cf4SDavid Woodhouse                               NULL, NULL);
656461491cf4SDavid Woodhouse     object_class_property_set_description(oc, "xen-version",
656561491cf4SDavid Woodhouse                                           "Xen version to be emulated "
656661491cf4SDavid Woodhouse                                           "(in XENVER_version form "
656761491cf4SDavid Woodhouse                                           "e.g. 0x4000a for 4.10)");
65686f43f2eeSDavid Woodhouse 
65696f43f2eeSDavid Woodhouse     object_class_property_add(oc, "xen-gnttab-max-frames", "uint16",
65706f43f2eeSDavid Woodhouse                               kvm_arch_get_xen_gnttab_max_frames,
65716f43f2eeSDavid Woodhouse                               kvm_arch_set_xen_gnttab_max_frames,
65726f43f2eeSDavid Woodhouse                               NULL, NULL);
65736f43f2eeSDavid Woodhouse     object_class_property_set_description(oc, "xen-gnttab-max-frames",
65746f43f2eeSDavid Woodhouse                                           "Maximum number of grant table frames");
6575e16aff4cSDavid Woodhouse 
6576e16aff4cSDavid Woodhouse     object_class_property_add(oc, "xen-evtchn-max-pirq", "uint16",
6577e16aff4cSDavid Woodhouse                               kvm_arch_get_xen_evtchn_max_pirq,
6578e16aff4cSDavid Woodhouse                               kvm_arch_set_xen_evtchn_max_pirq,
6579e16aff4cSDavid Woodhouse                               NULL, NULL);
6580e16aff4cSDavid Woodhouse     object_class_property_set_description(oc, "xen-evtchn-max-pirq",
6581e16aff4cSDavid Woodhouse                                           "Maximum number of Xen PIRQs");
65823dba0a33SPaolo Bonzini }
658319e2a9fbSZeng Guang 
kvm_set_max_apic_id(uint32_t max_apic_id)658419e2a9fbSZeng Guang void kvm_set_max_apic_id(uint32_t max_apic_id)
658519e2a9fbSZeng Guang {
658619e2a9fbSZeng Guang     kvm_vm_enable_cap(kvm_state, KVM_CAP_MAX_VCPU_ID, 0, max_apic_id);
658719e2a9fbSZeng Guang }
6588