xref: /openbmc/qemu/target/i386/hvf/hvf.c (revision 63e7af2035242dda6e2460f4eadbbe6f58c67614)
1 /* Copyright 2008 IBM Corporation
2  *           2008 Red Hat, Inc.
3  * Copyright 2011 Intel Corporation
4  * Copyright 2016 Veertu, Inc.
5  * Copyright 2017 The Android Open Source Project
6  *
7  * QEMU Hypervisor.framework support
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of version 2 of the GNU General Public
11  * License as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see <http://www.gnu.org/licenses/>.
20  *
21  * This file contain code under public domain from the hvdos project:
22  * https://github.com/mist64/hvdos
23  *
24  * Parts Copyright (c) 2011 NetApp, Inc.
25  * All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions
29  * are met:
30  * 1. Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  * 2. Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in the
34  *    documentation and/or other materials provided with the distribution.
35  *
36  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  */
48 
49 #include "qemu/osdep.h"
50 #include "qemu/error-report.h"
51 #include "qemu/memalign.h"
52 #include "qapi/error.h"
53 #include "migration/blocker.h"
54 
55 #include "system/hvf.h"
56 #include "system/hvf_int.h"
57 #include "system/runstate.h"
58 #include "system/cpus.h"
59 #include "hvf-i386.h"
60 #include "vmcs.h"
61 #include "vmx.h"
62 #include "emulate/x86.h"
63 #include "x86_descr.h"
64 #include "emulate/x86_flags.h"
65 #include "x86_mmu.h"
66 #include "emulate/x86_decode.h"
67 #include "emulate/x86_emu.h"
68 #include "x86_task.h"
69 #include "x86hvf.h"
70 
71 #include <Hypervisor/hv.h>
72 #include <Hypervisor/hv_vmx.h>
73 #include <sys/sysctl.h>
74 
75 #include "hw/i386/apic_internal.h"
76 #include "qemu/main-loop.h"
77 #include "qemu/accel.h"
78 #include "target/i386/cpu.h"
79 #include "exec/target_page.h"
80 
81 static Error *invtsc_mig_blocker;
82 
83 void vmx_update_tpr(CPUState *cpu)
84 {
85     /* TODO: need integrate APIC handling */
86     X86CPU *x86_cpu = X86_CPU(cpu);
87     int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
88     int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
89 
90     wreg(cpu->accel->fd, HV_X86_TPR, tpr);
91     if (irr == -1) {
92         wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
93     } else {
94         wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
95               irr >> 4);
96     }
97 }
98 
99 static void update_apic_tpr(CPUState *cpu)
100 {
101     X86CPU *x86_cpu = X86_CPU(cpu);
102     int tpr = rreg(cpu->accel->fd, HV_X86_TPR) >> 4;
103     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
104 }
105 
106 #define VECTORING_INFO_VECTOR_MASK     0xff
107 
108 void hvf_handle_io(CPUState *env, uint16_t port, void *buffer,
109                   int direction, int size, int count)
110 {
111     int i;
112     uint8_t *ptr = buffer;
113 
114     for (i = 0; i < count; i++) {
115         address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
116                          ptr, size,
117                          direction);
118         ptr += size;
119     }
120 }
121 
122 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
123 {
124     int read, write;
125 
126     /* EPT fault on an instruction fetch doesn't make sense here */
127     if (ept_qual & EPT_VIOLATION_INST_FETCH) {
128         return false;
129     }
130 
131     /* EPT fault must be a read fault or a write fault */
132     read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
133     write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
134     if ((read | write) == 0) {
135         return false;
136     }
137 
138     if (write && slot) {
139         if (slot->flags & HVF_SLOT_LOG) {
140             uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u);
141             memory_region_set_dirty(slot->region, gpa - slot->start, 1);
142             hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE,
143                           HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
144         }
145     }
146 
147     /*
148      * The EPT violation must have been caused by accessing a
149      * guest-physical address that is a translation of a guest-linear
150      * address.
151      */
152     if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
153         (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
154         return false;
155     }
156 
157     if (!slot) {
158         return true;
159     }
160     if (!memory_region_is_ram(slot->region) &&
161         !(read && memory_region_is_romd(slot->region))) {
162         return true;
163     }
164     return false;
165 }
166 
167 void hvf_arch_vcpu_destroy(CPUState *cpu)
168 {
169     X86CPU *x86_cpu = X86_CPU(cpu);
170     CPUX86State *env = &x86_cpu->env;
171 
172     g_free(env->emu_mmio_buf);
173 }
174 
175 static void init_tsc_freq(CPUX86State *env)
176 {
177     size_t length;
178     uint64_t tsc_freq;
179 
180     if (env->tsc_khz != 0) {
181         return;
182     }
183 
184     length = sizeof(uint64_t);
185     if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
186         return;
187     }
188     env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
189 }
190 
191 static void init_apic_bus_freq(CPUX86State *env)
192 {
193     size_t length;
194     uint64_t bus_freq;
195 
196     if (env->apic_bus_freq != 0) {
197         return;
198     }
199 
200     length = sizeof(uint64_t);
201     if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
202         return;
203     }
204     env->apic_bus_freq = bus_freq;
205 }
206 
207 static inline bool tsc_is_known(CPUX86State *env)
208 {
209     return env->tsc_khz != 0;
210 }
211 
212 static inline bool apic_bus_freq_is_known(CPUX86State *env)
213 {
214     return env->apic_bus_freq != 0;
215 }
216 
217 void hvf_kick_vcpu_thread(CPUState *cpu)
218 {
219     cpus_kick_thread(cpu);
220     hv_vcpu_interrupt(&cpu->accel->fd, 1);
221 }
222 
223 int hvf_arch_init(void)
224 {
225     return 0;
226 }
227 
228 hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range)
229 {
230     return hv_vm_create(HV_VM_DEFAULT);
231 }
232 
233 static void hvf_read_segment_descriptor(CPUState *s, struct x86_segment_descriptor *desc,
234                                         X86Seg seg)
235 {
236     struct vmx_segment vmx_segment;
237     vmx_read_segment_descriptor(s, &vmx_segment, seg);
238     vmx_segment_to_x86_descriptor(s, &vmx_segment, desc);
239 }
240 
241 static void hvf_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
242 {
243     vmx_read_mem(cpu, data, gva, bytes);
244 }
245 
246 static void hvf_write_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
247 {
248     vmx_write_mem(cpu, gva, data, bytes);
249 }
250 
251 static const struct x86_emul_ops hvf_x86_emul_ops = {
252     .read_mem = hvf_read_mem,
253     .write_mem = hvf_write_mem,
254     .read_segment_descriptor = hvf_read_segment_descriptor,
255     .handle_io = hvf_handle_io,
256     .simulate_rdmsr = hvf_simulate_rdmsr,
257     .simulate_wrmsr = hvf_simulate_wrmsr,
258 };
259 
260 int hvf_arch_init_vcpu(CPUState *cpu)
261 {
262     X86CPU *x86cpu = X86_CPU(cpu);
263     CPUX86State *env = &x86cpu->env;
264     Error *local_err = NULL;
265     int r;
266     uint64_t reqCap;
267 
268     init_emu(&hvf_x86_emul_ops);
269     init_decoder();
270 
271     if (hvf_state->hvf_caps == NULL) {
272         hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
273     }
274     env->emu_mmio_buf = g_new(char, 4096);
275 
276     if (x86cpu->vmware_cpuid_freq) {
277         init_tsc_freq(env);
278         init_apic_bus_freq(env);
279 
280         if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
281             error_report("vmware-cpuid-freq: feature couldn't be enabled");
282         }
283     }
284 
285     if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
286         invtsc_mig_blocker == NULL) {
287         error_setg(&invtsc_mig_blocker,
288                    "State blocked by non-migratable CPU device (invtsc flag)");
289         r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
290         if (r < 0) {
291             error_report_err(local_err);
292             return r;
293         }
294     }
295 
296 
297     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
298         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
299         abort();
300     }
301     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
302         &hvf_state->hvf_caps->vmx_cap_procbased)) {
303         abort();
304     }
305     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
306         &hvf_state->hvf_caps->vmx_cap_procbased2)) {
307         abort();
308     }
309     if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
310         &hvf_state->hvf_caps->vmx_cap_entry)) {
311         abort();
312     }
313 
314     /* set VMCS control fields */
315     wvmcs(cpu->accel->fd, VMCS_PIN_BASED_CTLS,
316           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
317                    VMCS_PIN_BASED_CTLS_EXTINT |
318                    VMCS_PIN_BASED_CTLS_NMI |
319                    VMCS_PIN_BASED_CTLS_VNMI));
320     wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS,
321           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
322                    VMCS_PRI_PROC_BASED_CTLS_HLT |
323                    VMCS_PRI_PROC_BASED_CTLS_MWAIT |
324                    VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
325                    VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
326           VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
327 
328     reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES;
329 
330     /* Is RDTSCP support in CPUID?  If so, enable it in the VMCS. */
331     if (hvf_get_supported_cpuid(0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
332         reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP;
333     }
334 
335     wvmcs(cpu->accel->fd, VMCS_SEC_PROC_BASED_CTLS,
336           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap));
337 
338     wvmcs(cpu->accel->fd, VMCS_ENTRY_CTLS,
339           cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0));
340     wvmcs(cpu->accel->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
341 
342     wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
343 
344     x86cpu = X86_CPU(cpu);
345     x86cpu->env.xsave_buf_len = 4096;
346     x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len);
347 
348     /*
349      * The allocated storage must be large enough for all of the
350      * possible XSAVE state components.
351      */
352     assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len);
353 
354     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_STAR, 1);
355     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_LSTAR, 1);
356     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_CSTAR, 1);
357     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FMASK, 1);
358     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FSBASE, 1);
359     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_GSBASE, 1);
360     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_KERNELGSBASE, 1);
361     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_TSC_AUX, 1);
362     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_TSC, 1);
363     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_CS, 1);
364     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_EIP, 1);
365     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_ESP, 1);
366 
367     return 0;
368 }
369 
370 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
371 {
372     X86CPU *x86_cpu = X86_CPU(cpu);
373     CPUX86State *env = &x86_cpu->env;
374 
375     env->exception_nr = -1;
376     env->exception_pending = 0;
377     env->exception_injected = 0;
378     env->interrupt_injected = -1;
379     env->nmi_injected = false;
380     env->ins_len = 0;
381     env->has_error_code = false;
382     if (idtvec_info & VMCS_IDT_VEC_VALID) {
383         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
384         case VMCS_IDT_VEC_HWINTR:
385         case VMCS_IDT_VEC_SWINTR:
386             env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
387             break;
388         case VMCS_IDT_VEC_NMI:
389             env->nmi_injected = true;
390             break;
391         case VMCS_IDT_VEC_HWEXCEPTION:
392         case VMCS_IDT_VEC_SWEXCEPTION:
393             env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
394             env->exception_injected = 1;
395             break;
396         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
397         default:
398             abort();
399         }
400         if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
401             (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
402             env->ins_len = ins_len;
403         }
404         if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
405             env->has_error_code = true;
406             env->error_code = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_ERROR);
407         }
408     }
409     if ((rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
410         VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
411         env->hflags2 |= HF2_NMI_MASK;
412     } else {
413         env->hflags2 &= ~HF2_NMI_MASK;
414     }
415     if (rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
416          (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
417          VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
418         env->hflags |= HF_INHIBIT_IRQ_MASK;
419     } else {
420         env->hflags &= ~HF_INHIBIT_IRQ_MASK;
421     }
422 }
423 
424 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
425                               uint32_t *eax, uint32_t *ebx,
426                               uint32_t *ecx, uint32_t *edx)
427 {
428     /*
429      * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
430      * leafs 0x40000001-0x4000000F are filled with zeros
431      * Provides vmware-cpuid-freq support to hvf
432      *
433      * Note: leaf 0x40000000 not exposes HVF,
434      * leaving hypervisor signature empty
435      */
436 
437     if (index < 0x40000000 || index > 0x40000010 ||
438         !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
439 
440         cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
441         return;
442     }
443 
444     switch (index) {
445     case 0x40000000:
446         *eax = 0x40000010;    /* Max available cpuid leaf */
447         *ebx = 0;             /* Leave signature empty */
448         *ecx = 0;
449         *edx = 0;
450         break;
451     case 0x40000010:
452         *eax = env->tsc_khz;
453         *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
454         *ecx = 0;
455         *edx = 0;
456         break;
457     default:
458         *eax = 0;
459         *ebx = 0;
460         *ecx = 0;
461         *edx = 0;
462         break;
463     }
464 }
465 
466 void hvf_load_regs(CPUState *cs)
467 {
468     X86CPU *cpu = X86_CPU(cs);
469     CPUX86State *env = &cpu->env;
470 
471     int i = 0;
472     RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX);
473     RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX);
474     RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX);
475     RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX);
476     RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI);
477     RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI);
478     RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP);
479     RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP);
480     for (i = 8; i < 16; i++) {
481         RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i);
482     }
483 
484     env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
485     rflags_to_lflags(env);
486     env->eip = rreg(cs->accel->fd, HV_X86_RIP);
487 }
488 
489 void hvf_store_regs(CPUState *cs)
490 {
491     X86CPU *cpu = X86_CPU(cs);
492     CPUX86State *env = &cpu->env;
493 
494     int i = 0;
495     wreg(cs->accel->fd, HV_X86_RAX, RAX(env));
496     wreg(cs->accel->fd, HV_X86_RBX, RBX(env));
497     wreg(cs->accel->fd, HV_X86_RCX, RCX(env));
498     wreg(cs->accel->fd, HV_X86_RDX, RDX(env));
499     wreg(cs->accel->fd, HV_X86_RSI, RSI(env));
500     wreg(cs->accel->fd, HV_X86_RDI, RDI(env));
501     wreg(cs->accel->fd, HV_X86_RBP, RBP(env));
502     wreg(cs->accel->fd, HV_X86_RSP, RSP(env));
503     for (i = 8; i < 16; i++) {
504         wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i));
505     }
506 
507     lflags_to_rflags(env);
508     wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
509     macvm_set_rip(cs, env->eip);
510 }
511 
512 void hvf_simulate_rdmsr(CPUState *cs)
513 {
514     X86CPU *cpu = X86_CPU(cs);
515     CPUX86State *env = &cpu->env;
516     uint32_t msr = ECX(env);
517     uint64_t val = 0;
518 
519     switch (msr) {
520     case MSR_IA32_TSC:
521         val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
522         break;
523     case MSR_IA32_APICBASE:
524         val = cpu_get_apic_base(cpu->apic_state);
525         break;
526     case MSR_APIC_START ... MSR_APIC_END: {
527         int ret;
528         int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
529 
530         ret = apic_msr_read(index, &val);
531         if (ret < 0) {
532             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
533         }
534 
535         break;
536     }
537     case MSR_IA32_UCODE_REV:
538         val = cpu->ucode_rev;
539         break;
540     case MSR_EFER:
541         val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
542         break;
543     case MSR_FSBASE:
544         val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE);
545         break;
546     case MSR_GSBASE:
547         val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE);
548         break;
549     case MSR_KERNELGSBASE:
550         val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE);
551         break;
552     case MSR_STAR:
553         abort();
554         break;
555     case MSR_LSTAR:
556         abort();
557         break;
558     case MSR_CSTAR:
559         abort();
560         break;
561     case MSR_IA32_MISC_ENABLE:
562         val = env->msr_ia32_misc_enable;
563         break;
564     case MSR_MTRRphysBase(0):
565     case MSR_MTRRphysBase(1):
566     case MSR_MTRRphysBase(2):
567     case MSR_MTRRphysBase(3):
568     case MSR_MTRRphysBase(4):
569     case MSR_MTRRphysBase(5):
570     case MSR_MTRRphysBase(6):
571     case MSR_MTRRphysBase(7):
572         val = env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base;
573         break;
574     case MSR_MTRRphysMask(0):
575     case MSR_MTRRphysMask(1):
576     case MSR_MTRRphysMask(2):
577     case MSR_MTRRphysMask(3):
578     case MSR_MTRRphysMask(4):
579     case MSR_MTRRphysMask(5):
580     case MSR_MTRRphysMask(6):
581     case MSR_MTRRphysMask(7):
582         val = env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask;
583         break;
584     case MSR_MTRRfix64K_00000:
585         val = env->mtrr_fixed[0];
586         break;
587     case MSR_MTRRfix16K_80000:
588     case MSR_MTRRfix16K_A0000:
589         val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1];
590         break;
591     case MSR_MTRRfix4K_C0000:
592     case MSR_MTRRfix4K_C8000:
593     case MSR_MTRRfix4K_D0000:
594     case MSR_MTRRfix4K_D8000:
595     case MSR_MTRRfix4K_E0000:
596     case MSR_MTRRfix4K_E8000:
597     case MSR_MTRRfix4K_F0000:
598     case MSR_MTRRfix4K_F8000:
599         val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3];
600         break;
601     case MSR_MTRRdefType:
602         val = env->mtrr_deftype;
603         break;
604     case MSR_CORE_THREAD_COUNT:
605         val = cpu_x86_get_msr_core_thread_count(cpu);
606         break;
607     default:
608         /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
609         val = 0;
610         break;
611     }
612 
613     RAX(env) = (uint32_t)val;
614     RDX(env) = (uint32_t)(val >> 32);
615 }
616 
617 void hvf_simulate_wrmsr(CPUState *cs)
618 {
619     X86CPU *cpu = X86_CPU(cs);
620     CPUX86State *env = &cpu->env;
621     uint32_t msr = ECX(env);
622     uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env);
623 
624     switch (msr) {
625     case MSR_IA32_TSC:
626         break;
627     case MSR_IA32_APICBASE: {
628         int r;
629 
630         r = cpu_set_apic_base(cpu->apic_state, data);
631         if (r < 0) {
632             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
633         }
634 
635         break;
636     }
637     case MSR_APIC_START ... MSR_APIC_END: {
638         int ret;
639         int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
640 
641         ret = apic_msr_write(index, data);
642         if (ret < 0) {
643             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
644         }
645 
646         break;
647     }
648     case MSR_FSBASE:
649         wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
650         break;
651     case MSR_GSBASE:
652         wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data);
653         break;
654     case MSR_KERNELGSBASE:
655         wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data);
656         break;
657     case MSR_STAR:
658         abort();
659         break;
660     case MSR_LSTAR:
661         abort();
662         break;
663     case MSR_CSTAR:
664         abort();
665         break;
666     case MSR_EFER:
667         /*printf("new efer %llx\n", EFER(cs));*/
668         wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data);
669         if (data & MSR_EFER_NXE) {
670             hv_vcpu_invalidate_tlb(cs->accel->fd);
671         }
672         break;
673     case MSR_MTRRphysBase(0):
674     case MSR_MTRRphysBase(1):
675     case MSR_MTRRphysBase(2):
676     case MSR_MTRRphysBase(3):
677     case MSR_MTRRphysBase(4):
678     case MSR_MTRRphysBase(5):
679     case MSR_MTRRphysBase(6):
680     case MSR_MTRRphysBase(7):
681         env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base = data;
682         break;
683     case MSR_MTRRphysMask(0):
684     case MSR_MTRRphysMask(1):
685     case MSR_MTRRphysMask(2):
686     case MSR_MTRRphysMask(3):
687     case MSR_MTRRphysMask(4):
688     case MSR_MTRRphysMask(5):
689     case MSR_MTRRphysMask(6):
690     case MSR_MTRRphysMask(7):
691         env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask = data;
692         break;
693     case MSR_MTRRfix64K_00000:
694         env->mtrr_fixed[ECX(env) - MSR_MTRRfix64K_00000] = data;
695         break;
696     case MSR_MTRRfix16K_80000:
697     case MSR_MTRRfix16K_A0000:
698         env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1] = data;
699         break;
700     case MSR_MTRRfix4K_C0000:
701     case MSR_MTRRfix4K_C8000:
702     case MSR_MTRRfix4K_D0000:
703     case MSR_MTRRfix4K_D8000:
704     case MSR_MTRRfix4K_E0000:
705     case MSR_MTRRfix4K_E8000:
706     case MSR_MTRRfix4K_F0000:
707     case MSR_MTRRfix4K_F8000:
708         env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3] = data;
709         break;
710     case MSR_MTRRdefType:
711         env->mtrr_deftype = data;
712         break;
713     default:
714         break;
715     }
716 
717     /* Related to support known hypervisor interface */
718     /* if (g_hypervisor_iface)
719          g_hypervisor_iface->wrmsr_handler(cs, msr, data);
720 
721     printf("write msr %llx\n", RCX(cs));*/
722 }
723 
724 int hvf_vcpu_exec(CPUState *cpu)
725 {
726     X86CPU *x86_cpu = X86_CPU(cpu);
727     CPUX86State *env = &x86_cpu->env;
728     int ret = 0;
729     uint64_t rip = 0;
730 
731     if (hvf_process_events(cpu)) {
732         return EXCP_HLT;
733     }
734 
735     do {
736         if (cpu->vcpu_dirty) {
737             hvf_put_registers(cpu);
738             cpu->vcpu_dirty = false;
739         }
740 
741         if (hvf_inject_interrupts(cpu)) {
742             return EXCP_INTERRUPT;
743         }
744         vmx_update_tpr(cpu);
745 
746         bql_unlock();
747         if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
748             bql_lock();
749             return EXCP_HLT;
750         }
751 
752         hv_return_t r = hv_vcpu_run_until(cpu->accel->fd, HV_DEADLINE_FOREVER);
753         assert_hvf_ok(r);
754 
755         /* handle VMEXIT */
756         uint64_t exit_reason = rvmcs(cpu->accel->fd, VMCS_EXIT_REASON);
757         uint64_t exit_qual = rvmcs(cpu->accel->fd, VMCS_EXIT_QUALIFICATION);
758         uint32_t ins_len = (uint32_t)rvmcs(cpu->accel->fd,
759                                            VMCS_EXIT_INSTRUCTION_LENGTH);
760 
761         uint64_t idtvec_info = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
762 
763         hvf_store_events(cpu, ins_len, idtvec_info);
764         rip = rreg(cpu->accel->fd, HV_X86_RIP);
765         env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS);
766 
767         bql_lock();
768 
769         update_apic_tpr(cpu);
770         current_cpu = cpu;
771 
772         ret = 0;
773         switch (exit_reason) {
774         case EXIT_REASON_HLT: {
775             macvm_set_rip(cpu, rip + ins_len);
776             if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
777                 (env->eflags & IF_MASK))
778                 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
779                 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
780                 cpu->halted = 1;
781                 ret = EXCP_HLT;
782                 break;
783             }
784             ret = EXCP_INTERRUPT;
785             break;
786         }
787         case EXIT_REASON_MWAIT: {
788             ret = EXCP_INTERRUPT;
789             break;
790         }
791         /* Need to check if MMIO or unmapped fault */
792         case EXIT_REASON_EPT_FAULT:
793         {
794             hvf_slot *slot;
795             uint64_t gpa = rvmcs(cpu->accel->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
796 
797             if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
798                 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
799                 vmx_set_nmi_blocking(cpu);
800             }
801 
802             slot = hvf_find_overlap_slot(gpa, 1);
803             /* mmio */
804             if (ept_emulation_fault(slot, gpa, exit_qual)) {
805                 struct x86_decode decode;
806 
807                 hvf_load_regs(cpu);
808                 decode_instruction(env, &decode);
809                 exec_instruction(env, &decode);
810                 hvf_store_regs(cpu);
811                 break;
812             }
813             break;
814         }
815         case EXIT_REASON_INOUT:
816         {
817             uint32_t in = (exit_qual & 8) != 0;
818             uint32_t size =  (exit_qual & 7) + 1;
819             uint32_t string =  (exit_qual & 16) != 0;
820             uint32_t port =  exit_qual >> 16;
821             /*uint32_t rep = (exit_qual & 0x20) != 0;*/
822 
823             if (!string && in) {
824                 uint64_t val = 0;
825                 hvf_load_regs(cpu);
826                 hvf_handle_io(env_cpu(env), port, &val, 0, size, 1);
827                 if (size == 1) {
828                     AL(env) = val;
829                 } else if (size == 2) {
830                     AX(env) = val;
831                 } else if (size == 4) {
832                     RAX(env) = (uint32_t)val;
833                 } else {
834                     RAX(env) = (uint64_t)val;
835                 }
836                 env->eip += ins_len;
837                 hvf_store_regs(cpu);
838                 break;
839             } else if (!string && !in) {
840                 RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX);
841                 hvf_handle_io(env_cpu(env), port, &RAX(env), 1, size, 1);
842                 macvm_set_rip(cpu, rip + ins_len);
843                 break;
844             }
845             struct x86_decode decode;
846 
847             hvf_load_regs(cpu);
848             decode_instruction(env, &decode);
849             assert(ins_len == decode.len);
850             exec_instruction(env, &decode);
851             hvf_store_regs(cpu);
852 
853             break;
854         }
855         case EXIT_REASON_CPUID: {
856             uint32_t rax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
857             uint32_t rbx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RBX);
858             uint32_t rcx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
859             uint32_t rdx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
860 
861             if (rax == 1) {
862                 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
863                 env->cr[4] = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4);
864             }
865             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
866 
867             wreg(cpu->accel->fd, HV_X86_RAX, rax);
868             wreg(cpu->accel->fd, HV_X86_RBX, rbx);
869             wreg(cpu->accel->fd, HV_X86_RCX, rcx);
870             wreg(cpu->accel->fd, HV_X86_RDX, rdx);
871 
872             macvm_set_rip(cpu, rip + ins_len);
873             break;
874         }
875         case EXIT_REASON_XSETBV: {
876             uint32_t eax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
877             uint32_t ecx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
878             uint32_t edx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
879 
880             if (ecx) {
881                 macvm_set_rip(cpu, rip + ins_len);
882                 break;
883             }
884             env->xcr0 = ((uint64_t)edx << 32) | eax;
885             wreg(cpu->accel->fd, HV_X86_XCR0, env->xcr0 | 1);
886             macvm_set_rip(cpu, rip + ins_len);
887             break;
888         }
889         case EXIT_REASON_INTR_WINDOW:
890             vmx_clear_int_window_exiting(cpu);
891             ret = EXCP_INTERRUPT;
892             break;
893         case EXIT_REASON_NMI_WINDOW:
894             vmx_clear_nmi_window_exiting(cpu);
895             ret = EXCP_INTERRUPT;
896             break;
897         case EXIT_REASON_EXT_INTR:
898             /* force exit and allow io handling */
899             ret = EXCP_INTERRUPT;
900             break;
901         case EXIT_REASON_RDMSR:
902         case EXIT_REASON_WRMSR:
903         {
904             hvf_load_regs(cpu);
905             if (exit_reason == EXIT_REASON_RDMSR) {
906                 hvf_simulate_rdmsr(cpu);
907             } else {
908                 hvf_simulate_wrmsr(cpu);
909             }
910             env->eip += ins_len;
911             hvf_store_regs(cpu);
912             break;
913         }
914         case EXIT_REASON_CR_ACCESS: {
915             int cr;
916             int reg;
917 
918             hvf_load_regs(cpu);
919             cr = exit_qual & 15;
920             reg = (exit_qual >> 8) & 15;
921 
922             switch (cr) {
923             case 0x0: {
924                 macvm_set_cr0(cpu->accel->fd, RRX(env, reg));
925                 break;
926             }
927             case 4: {
928                 macvm_set_cr4(cpu->accel->fd, RRX(env, reg));
929                 break;
930             }
931             case 8: {
932                 if (exit_qual & 0x10) {
933                     RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
934                 } else {
935                     int tpr = RRX(env, reg);
936                     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
937                     ret = EXCP_INTERRUPT;
938                 }
939                 break;
940             }
941             default:
942                 error_report("Unrecognized CR %d", cr);
943                 abort();
944             }
945             env->eip += ins_len;
946             hvf_store_regs(cpu);
947             break;
948         }
949         case EXIT_REASON_APIC_ACCESS: { /* TODO */
950             struct x86_decode decode;
951 
952             hvf_load_regs(cpu);
953             decode_instruction(env, &decode);
954             exec_instruction(env, &decode);
955             hvf_store_regs(cpu);
956             break;
957         }
958         case EXIT_REASON_TPR: {
959             ret = 1;
960             break;
961         }
962         case EXIT_REASON_TASK_SWITCH: {
963             uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
964             x86_segment_selector sel = {.sel = exit_qual & 0xffff};
965             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
966              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
967              & VMCS_INTR_T_MASK);
968             break;
969         }
970         case EXIT_REASON_TRIPLE_FAULT: {
971             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
972             ret = EXCP_INTERRUPT;
973             break;
974         }
975         case EXIT_REASON_RDPMC:
976             wreg(cpu->accel->fd, HV_X86_RAX, 0);
977             wreg(cpu->accel->fd, HV_X86_RDX, 0);
978             macvm_set_rip(cpu, rip + ins_len);
979             break;
980         case VMX_REASON_VMCALL:
981             env->exception_nr = EXCP0D_GPF;
982             env->exception_injected = 1;
983             env->has_error_code = true;
984             env->error_code = 0;
985             break;
986         default:
987             error_report("%llx: unhandled exit %llx", rip, exit_reason);
988         }
989     } while (ret == 0);
990 
991     return ret;
992 }
993 
994 int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
995 {
996     return -ENOSYS;
997 }
998 
999 int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
1000 {
1001     return -ENOSYS;
1002 }
1003 
1004 int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
1005 {
1006     return -ENOSYS;
1007 }
1008 
1009 int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
1010 {
1011     return -ENOSYS;
1012 }
1013 
1014 void hvf_arch_remove_all_hw_breakpoints(void)
1015 {
1016 }
1017 
1018 void hvf_arch_update_guest_debug(CPUState *cpu)
1019 {
1020 }
1021 
1022 bool hvf_arch_supports_guest_debug(void)
1023 {
1024     return false;
1025 }
1026