xref: /openbmc/qemu/target/i386/hvf/x86hvf.c (revision 2df1eb27)
1 /*
2  * Copyright (c) 2003-2008 Fabrice Bellard
3  * Copyright (C) 2016 Veertu Inc,
4  * Copyright (C) 2017 Google Inc,
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #include "x86hvf.h"
23 #include "vmx.h"
24 #include "vmcs.h"
25 #include "cpu.h"
26 #include "x86_descr.h"
27 #include "x86_decode.h"
28 #include "sysemu/hw_accel.h"
29 
30 #include "hw/i386/apic_internal.h"
31 
32 #include <Hypervisor/hv.h>
33 #include <Hypervisor/hv_vmx.h>
34 
35 void hvf_set_segment(CPUState *cs, struct vmx_segment *vmx_seg,
36                      SegmentCache *qseg, bool is_tr)
37 {
38     vmx_seg->sel = qseg->selector;
39     vmx_seg->base = qseg->base;
40     vmx_seg->limit = qseg->limit;
41 
42     if (!qseg->selector && !x86_is_real(cs) && !is_tr) {
43         /* the TR register is usable after processor reset despite
44          * having a null selector */
45         vmx_seg->ar = 1 << 16;
46         return;
47     }
48     vmx_seg->ar = (qseg->flags >> DESC_TYPE_SHIFT) & 0xf;
49     vmx_seg->ar |= ((qseg->flags >> DESC_G_SHIFT) & 1) << 15;
50     vmx_seg->ar |= ((qseg->flags >> DESC_B_SHIFT) & 1) << 14;
51     vmx_seg->ar |= ((qseg->flags >> DESC_L_SHIFT) & 1) << 13;
52     vmx_seg->ar |= ((qseg->flags >> DESC_AVL_SHIFT) & 1) << 12;
53     vmx_seg->ar |= ((qseg->flags >> DESC_P_SHIFT) & 1) << 7;
54     vmx_seg->ar |= ((qseg->flags >> DESC_DPL_SHIFT) & 3) << 5;
55     vmx_seg->ar |= ((qseg->flags >> DESC_S_SHIFT) & 1) << 4;
56 }
57 
58 void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg)
59 {
60     qseg->limit = vmx_seg->limit;
61     qseg->base = vmx_seg->base;
62     qseg->selector = vmx_seg->sel;
63     qseg->flags = ((vmx_seg->ar & 0xf) << DESC_TYPE_SHIFT) |
64                   (((vmx_seg->ar >> 4) & 1) << DESC_S_SHIFT) |
65                   (((vmx_seg->ar >> 5) & 3) << DESC_DPL_SHIFT) |
66                   (((vmx_seg->ar >> 7) & 1) << DESC_P_SHIFT) |
67                   (((vmx_seg->ar >> 12) & 1) << DESC_AVL_SHIFT) |
68                   (((vmx_seg->ar >> 13) & 1) << DESC_L_SHIFT) |
69                   (((vmx_seg->ar >> 14) & 1) << DESC_B_SHIFT) |
70                   (((vmx_seg->ar >> 15) & 1) << DESC_G_SHIFT);
71 }
72 
73 void hvf_put_xsave(CPUState *cs)
74 {
75     void *xsave = X86_CPU(cs)->env.xsave_buf;
76     uint32_t xsave_len = X86_CPU(cs)->env.xsave_buf_len;
77 
78     x86_cpu_xsave_all_areas(X86_CPU(cs), xsave, xsave_len);
79 
80     if (hv_vcpu_write_fpstate(cs->accel->fd, xsave, xsave_len)) {
81         abort();
82     }
83 }
84 
85 static void hvf_put_segments(CPUState *cs)
86 {
87     CPUX86State *env = &X86_CPU(cs)->env;
88     struct vmx_segment seg;
89 
90     wvmcs(cs->accel->fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit);
91     wvmcs(cs->accel->fd, VMCS_GUEST_IDTR_BASE, env->idt.base);
92 
93     wvmcs(cs->accel->fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit);
94     wvmcs(cs->accel->fd, VMCS_GUEST_GDTR_BASE, env->gdt.base);
95 
96     /* wvmcs(cs->accel->fd, VMCS_GUEST_CR2, env->cr[2]); */
97     wvmcs(cs->accel->fd, VMCS_GUEST_CR3, env->cr[3]);
98     vmx_update_tpr(cs);
99     wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, env->efer);
100 
101     macvm_set_cr4(cs->accel->fd, env->cr[4]);
102     macvm_set_cr0(cs->accel->fd, env->cr[0]);
103 
104     hvf_set_segment(cs, &seg, &env->segs[R_CS], false);
105     vmx_write_segment_descriptor(cs, &seg, R_CS);
106 
107     hvf_set_segment(cs, &seg, &env->segs[R_DS], false);
108     vmx_write_segment_descriptor(cs, &seg, R_DS);
109 
110     hvf_set_segment(cs, &seg, &env->segs[R_ES], false);
111     vmx_write_segment_descriptor(cs, &seg, R_ES);
112 
113     hvf_set_segment(cs, &seg, &env->segs[R_SS], false);
114     vmx_write_segment_descriptor(cs, &seg, R_SS);
115 
116     hvf_set_segment(cs, &seg, &env->segs[R_FS], false);
117     vmx_write_segment_descriptor(cs, &seg, R_FS);
118 
119     hvf_set_segment(cs, &seg, &env->segs[R_GS], false);
120     vmx_write_segment_descriptor(cs, &seg, R_GS);
121 
122     hvf_set_segment(cs, &seg, &env->tr, true);
123     vmx_write_segment_descriptor(cs, &seg, R_TR);
124 
125     hvf_set_segment(cs, &seg, &env->ldt, false);
126     vmx_write_segment_descriptor(cs, &seg, R_LDTR);
127 }
128 
129 void hvf_put_msrs(CPUState *cs)
130 {
131     CPUX86State *env = &X86_CPU(cs)->env;
132 
133     hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_CS,
134                       env->sysenter_cs);
135     hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_ESP,
136                       env->sysenter_esp);
137     hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_EIP,
138                       env->sysenter_eip);
139 
140     hv_vcpu_write_msr(cs->accel->fd, MSR_STAR, env->star);
141 
142 #ifdef TARGET_X86_64
143     hv_vcpu_write_msr(cs->accel->fd, MSR_CSTAR, env->cstar);
144     hv_vcpu_write_msr(cs->accel->fd, MSR_KERNELGSBASE, env->kernelgsbase);
145     hv_vcpu_write_msr(cs->accel->fd, MSR_FMASK, env->fmask);
146     hv_vcpu_write_msr(cs->accel->fd, MSR_LSTAR, env->lstar);
147 #endif
148 
149     hv_vcpu_write_msr(cs->accel->fd, MSR_GSBASE, env->segs[R_GS].base);
150     hv_vcpu_write_msr(cs->accel->fd, MSR_FSBASE, env->segs[R_FS].base);
151 }
152 
153 
154 void hvf_get_xsave(CPUState *cs)
155 {
156     void *xsave = X86_CPU(cs)->env.xsave_buf;
157     uint32_t xsave_len = X86_CPU(cs)->env.xsave_buf_len;
158 
159     if (hv_vcpu_read_fpstate(cs->accel->fd, xsave, xsave_len)) {
160         abort();
161     }
162 
163     x86_cpu_xrstor_all_areas(X86_CPU(cs), xsave, xsave_len);
164 }
165 
166 static void hvf_get_segments(CPUState *cs)
167 {
168     CPUX86State *env = &X86_CPU(cs)->env;
169 
170     struct vmx_segment seg;
171 
172     env->interrupt_injected = -1;
173 
174     vmx_read_segment_descriptor(cs, &seg, R_CS);
175     hvf_get_segment(&env->segs[R_CS], &seg);
176 
177     vmx_read_segment_descriptor(cs, &seg, R_DS);
178     hvf_get_segment(&env->segs[R_DS], &seg);
179 
180     vmx_read_segment_descriptor(cs, &seg, R_ES);
181     hvf_get_segment(&env->segs[R_ES], &seg);
182 
183     vmx_read_segment_descriptor(cs, &seg, R_FS);
184     hvf_get_segment(&env->segs[R_FS], &seg);
185 
186     vmx_read_segment_descriptor(cs, &seg, R_GS);
187     hvf_get_segment(&env->segs[R_GS], &seg);
188 
189     vmx_read_segment_descriptor(cs, &seg, R_SS);
190     hvf_get_segment(&env->segs[R_SS], &seg);
191 
192     vmx_read_segment_descriptor(cs, &seg, R_TR);
193     hvf_get_segment(&env->tr, &seg);
194 
195     vmx_read_segment_descriptor(cs, &seg, R_LDTR);
196     hvf_get_segment(&env->ldt, &seg);
197 
198     env->idt.limit = rvmcs(cs->accel->fd, VMCS_GUEST_IDTR_LIMIT);
199     env->idt.base = rvmcs(cs->accel->fd, VMCS_GUEST_IDTR_BASE);
200     env->gdt.limit = rvmcs(cs->accel->fd, VMCS_GUEST_GDTR_LIMIT);
201     env->gdt.base = rvmcs(cs->accel->fd, VMCS_GUEST_GDTR_BASE);
202 
203     env->cr[0] = rvmcs(cs->accel->fd, VMCS_GUEST_CR0);
204     env->cr[2] = 0;
205     env->cr[3] = rvmcs(cs->accel->fd, VMCS_GUEST_CR3);
206     env->cr[4] = rvmcs(cs->accel->fd, VMCS_GUEST_CR4);
207 
208     env->efer = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
209 }
210 
211 void hvf_get_msrs(CPUState *cs)
212 {
213     CPUX86State *env = &X86_CPU(cs)->env;
214     uint64_t tmp;
215 
216     hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_CS, &tmp);
217     env->sysenter_cs = tmp;
218 
219     hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_ESP, &tmp);
220     env->sysenter_esp = tmp;
221 
222     hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_EIP, &tmp);
223     env->sysenter_eip = tmp;
224 
225     hv_vcpu_read_msr(cs->accel->fd, MSR_STAR, &env->star);
226 
227 #ifdef TARGET_X86_64
228     hv_vcpu_read_msr(cs->accel->fd, MSR_CSTAR, &env->cstar);
229     hv_vcpu_read_msr(cs->accel->fd, MSR_KERNELGSBASE, &env->kernelgsbase);
230     hv_vcpu_read_msr(cs->accel->fd, MSR_FMASK, &env->fmask);
231     hv_vcpu_read_msr(cs->accel->fd, MSR_LSTAR, &env->lstar);
232 #endif
233 
234     hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_APICBASE, &tmp);
235 
236     env->tsc = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
237 }
238 
239 int hvf_put_registers(CPUState *cs)
240 {
241     X86CPU *x86cpu = X86_CPU(cs);
242     CPUX86State *env = &x86cpu->env;
243 
244     wreg(cs->accel->fd, HV_X86_RAX, env->regs[R_EAX]);
245     wreg(cs->accel->fd, HV_X86_RBX, env->regs[R_EBX]);
246     wreg(cs->accel->fd, HV_X86_RCX, env->regs[R_ECX]);
247     wreg(cs->accel->fd, HV_X86_RDX, env->regs[R_EDX]);
248     wreg(cs->accel->fd, HV_X86_RBP, env->regs[R_EBP]);
249     wreg(cs->accel->fd, HV_X86_RSP, env->regs[R_ESP]);
250     wreg(cs->accel->fd, HV_X86_RSI, env->regs[R_ESI]);
251     wreg(cs->accel->fd, HV_X86_RDI, env->regs[R_EDI]);
252     wreg(cs->accel->fd, HV_X86_R8, env->regs[8]);
253     wreg(cs->accel->fd, HV_X86_R9, env->regs[9]);
254     wreg(cs->accel->fd, HV_X86_R10, env->regs[10]);
255     wreg(cs->accel->fd, HV_X86_R11, env->regs[11]);
256     wreg(cs->accel->fd, HV_X86_R12, env->regs[12]);
257     wreg(cs->accel->fd, HV_X86_R13, env->regs[13]);
258     wreg(cs->accel->fd, HV_X86_R14, env->regs[14]);
259     wreg(cs->accel->fd, HV_X86_R15, env->regs[15]);
260     wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
261     wreg(cs->accel->fd, HV_X86_RIP, env->eip);
262 
263     wreg(cs->accel->fd, HV_X86_XCR0, env->xcr0);
264 
265     hvf_put_xsave(cs);
266 
267     hvf_put_segments(cs);
268 
269     hvf_put_msrs(cs);
270 
271     wreg(cs->accel->fd, HV_X86_DR0, env->dr[0]);
272     wreg(cs->accel->fd, HV_X86_DR1, env->dr[1]);
273     wreg(cs->accel->fd, HV_X86_DR2, env->dr[2]);
274     wreg(cs->accel->fd, HV_X86_DR3, env->dr[3]);
275     wreg(cs->accel->fd, HV_X86_DR4, env->dr[4]);
276     wreg(cs->accel->fd, HV_X86_DR5, env->dr[5]);
277     wreg(cs->accel->fd, HV_X86_DR6, env->dr[6]);
278     wreg(cs->accel->fd, HV_X86_DR7, env->dr[7]);
279 
280     return 0;
281 }
282 
283 int hvf_get_registers(CPUState *cs)
284 {
285     X86CPU *x86cpu = X86_CPU(cs);
286     CPUX86State *env = &x86cpu->env;
287 
288     env->regs[R_EAX] = rreg(cs->accel->fd, HV_X86_RAX);
289     env->regs[R_EBX] = rreg(cs->accel->fd, HV_X86_RBX);
290     env->regs[R_ECX] = rreg(cs->accel->fd, HV_X86_RCX);
291     env->regs[R_EDX] = rreg(cs->accel->fd, HV_X86_RDX);
292     env->regs[R_EBP] = rreg(cs->accel->fd, HV_X86_RBP);
293     env->regs[R_ESP] = rreg(cs->accel->fd, HV_X86_RSP);
294     env->regs[R_ESI] = rreg(cs->accel->fd, HV_X86_RSI);
295     env->regs[R_EDI] = rreg(cs->accel->fd, HV_X86_RDI);
296     env->regs[8] = rreg(cs->accel->fd, HV_X86_R8);
297     env->regs[9] = rreg(cs->accel->fd, HV_X86_R9);
298     env->regs[10] = rreg(cs->accel->fd, HV_X86_R10);
299     env->regs[11] = rreg(cs->accel->fd, HV_X86_R11);
300     env->regs[12] = rreg(cs->accel->fd, HV_X86_R12);
301     env->regs[13] = rreg(cs->accel->fd, HV_X86_R13);
302     env->regs[14] = rreg(cs->accel->fd, HV_X86_R14);
303     env->regs[15] = rreg(cs->accel->fd, HV_X86_R15);
304 
305     env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
306     env->eip = rreg(cs->accel->fd, HV_X86_RIP);
307 
308     hvf_get_xsave(cs);
309     env->xcr0 = rreg(cs->accel->fd, HV_X86_XCR0);
310 
311     hvf_get_segments(cs);
312     hvf_get_msrs(cs);
313 
314     env->dr[0] = rreg(cs->accel->fd, HV_X86_DR0);
315     env->dr[1] = rreg(cs->accel->fd, HV_X86_DR1);
316     env->dr[2] = rreg(cs->accel->fd, HV_X86_DR2);
317     env->dr[3] = rreg(cs->accel->fd, HV_X86_DR3);
318     env->dr[4] = rreg(cs->accel->fd, HV_X86_DR4);
319     env->dr[5] = rreg(cs->accel->fd, HV_X86_DR5);
320     env->dr[6] = rreg(cs->accel->fd, HV_X86_DR6);
321     env->dr[7] = rreg(cs->accel->fd, HV_X86_DR7);
322 
323     x86_update_hflags(env);
324     return 0;
325 }
326 
327 static void vmx_set_int_window_exiting(CPUState *cs)
328 {
329      uint64_t val;
330      val = rvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS);
331      wvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val |
332              VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
333 }
334 
335 void vmx_clear_int_window_exiting(CPUState *cs)
336 {
337      uint64_t val;
338      val = rvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS);
339      wvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val &
340              ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
341 }
342 
343 bool hvf_inject_interrupts(CPUState *cs)
344 {
345     X86CPU *x86cpu = X86_CPU(cs);
346     CPUX86State *env = &x86cpu->env;
347 
348     uint8_t vector;
349     uint64_t intr_type;
350     bool have_event = true;
351     if (env->interrupt_injected != -1) {
352         vector = env->interrupt_injected;
353         if (env->ins_len) {
354             intr_type = VMCS_INTR_T_SWINTR;
355         } else {
356             intr_type = VMCS_INTR_T_HWINTR;
357         }
358     } else if (env->exception_nr != -1) {
359         vector = env->exception_nr;
360         if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
361             intr_type = VMCS_INTR_T_SWEXCEPTION;
362         } else {
363             intr_type = VMCS_INTR_T_HWEXCEPTION;
364         }
365     } else if (env->nmi_injected) {
366         vector = EXCP02_NMI;
367         intr_type = VMCS_INTR_T_NMI;
368     } else {
369         have_event = false;
370     }
371 
372     uint64_t info = 0;
373     if (have_event) {
374         info = vector | intr_type | VMCS_INTR_VALID;
375         uint64_t reason = rvmcs(cs->accel->fd, VMCS_EXIT_REASON);
376         if (env->nmi_injected && reason != EXIT_REASON_TASK_SWITCH) {
377             vmx_clear_nmi_blocking(cs);
378         }
379 
380         if (!(env->hflags2 & HF2_NMI_MASK) || intr_type != VMCS_INTR_T_NMI) {
381             info &= ~(1 << 12); /* clear undefined bit */
382             if (intr_type == VMCS_INTR_T_SWINTR ||
383                 intr_type == VMCS_INTR_T_SWEXCEPTION) {
384                 wvmcs(cs->accel->fd, VMCS_ENTRY_INST_LENGTH, env->ins_len);
385             }
386 
387             if (env->has_error_code) {
388                 wvmcs(cs->accel->fd, VMCS_ENTRY_EXCEPTION_ERROR,
389                       env->error_code);
390                 /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */
391                 info |= VMCS_INTR_DEL_ERRCODE;
392             }
393             /*printf("reinject  %lx err %d\n", info, err);*/
394             wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info);
395         };
396     }
397 
398     if (cs->interrupt_request & CPU_INTERRUPT_NMI) {
399         if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) {
400             cs->interrupt_request &= ~CPU_INTERRUPT_NMI;
401             info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI;
402             wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info);
403         } else {
404             vmx_set_nmi_window_exiting(cs);
405         }
406     }
407 
408     if (!(env->hflags & HF_INHIBIT_IRQ_MASK) &&
409         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
410         (env->eflags & IF_MASK) && !(info & VMCS_INTR_VALID)) {
411         int line = cpu_get_pic_interrupt(&x86cpu->env);
412         cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
413         if (line >= 0) {
414             wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, line |
415                   VMCS_INTR_VALID | VMCS_INTR_T_HWINTR);
416         }
417     }
418     if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
419         vmx_set_int_window_exiting(cs);
420     }
421     return (cs->interrupt_request
422             & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR));
423 }
424 
425 int hvf_process_events(CPUState *cs)
426 {
427     X86CPU *cpu = X86_CPU(cs);
428     CPUX86State *env = &cpu->env;
429 
430     if (!cs->vcpu_dirty) {
431         /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
432         env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
433     }
434 
435     if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
436         cpu_synchronize_state(cs);
437         do_cpu_init(cpu);
438     }
439 
440     if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
441         cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
442         apic_poll_irq(cpu->apic_state);
443     }
444     if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
445         (env->eflags & IF_MASK)) ||
446         (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
447         cs->halted = 0;
448     }
449     if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
450         cpu_synchronize_state(cs);
451         do_cpu_sipi(cpu);
452     }
453     if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
454         cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
455         cpu_synchronize_state(cs);
456         apic_handle_tpr_access_report(cpu->apic_state, env->eip,
457                                       env->tpr_access_type);
458     }
459     return cs->halted;
460 }
461