xref: /openbmc/qemu/target/i386/nvmm/nvmm-all.c (revision 98107c5d4c1c0a16f1a02a5efbfe01b567215cc6)
1 /*
2  * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
3  *
4  * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "cpu.h"
12 #include "system/address-spaces.h"
13 #include "system/ioport.h"
14 #include "qemu/accel.h"
15 #include "accel/accel-ops.h"
16 #include "system/nvmm.h"
17 #include "system/cpus.h"
18 #include "system/runstate.h"
19 #include "qemu/main-loop.h"
20 #include "qemu/error-report.h"
21 #include "qapi/error.h"
22 #include "qemu/queue.h"
23 #include "accel/accel-cpu-target.h"
24 #include "host-cpu.h"
25 #include "migration/blocker.h"
26 #include "strings.h"
27 
28 #include "nvmm-accel-ops.h"
29 
30 #include <nvmm.h>
31 
32 struct AccelCPUState {
33     struct nvmm_vcpu vcpu;
34     uint8_t tpr;
35     bool stop;
36 
37     /* Window-exiting for INTs/NMIs. */
38     bool int_window_exit;
39     bool nmi_window_exit;
40 
41     /* The guest is in an interrupt shadow (POP SS, etc). */
42     bool int_shadow;
43 };
44 
45 struct qemu_machine {
46     struct nvmm_capability cap;
47     struct nvmm_machine mach;
48 };
49 
50 /* -------------------------------------------------------------------------- */
51 
52 bool nvmm_allowed;
53 static struct qemu_machine qemu_mach;
54 
55 static struct nvmm_machine *
get_nvmm_mach(void)56 get_nvmm_mach(void)
57 {
58     return &qemu_mach.mach;
59 }
60 
61 /* -------------------------------------------------------------------------- */
62 
63 static void
nvmm_set_segment(struct nvmm_x64_state_seg * nseg,const SegmentCache * qseg)64 nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
65 {
66     uint32_t attrib = qseg->flags;
67 
68     nseg->selector = qseg->selector;
69     nseg->limit = qseg->limit;
70     nseg->base = qseg->base;
71     nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
72     nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
73     nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
74     nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
75     nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
76     nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
77     nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
78     nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
79 }
80 
81 static void
nvmm_set_registers(CPUState * cpu)82 nvmm_set_registers(CPUState *cpu)
83 {
84     CPUX86State *env = cpu_env(cpu);
85     struct nvmm_machine *mach = get_nvmm_mach();
86     AccelCPUState *qcpu = cpu->accel;
87     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
88     struct nvmm_x64_state *state = vcpu->state;
89     uint64_t bitmap;
90     size_t i;
91     int ret;
92 
93     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
94 
95     /* GPRs. */
96     state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
97     state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
98     state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
99     state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
100     state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
101     state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
102     state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
103     state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
104 #ifdef TARGET_X86_64
105     state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
106     state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
107     state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
108     state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
109     state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
110     state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
111     state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
112     state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
113 #endif
114 
115     /* RIP and RFLAGS. */
116     state->gprs[NVMM_X64_GPR_RIP] = env->eip;
117     state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
118 
119     /* Segments. */
120     nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
121     nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
122     nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
123     nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
124     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
125     nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
126 
127     /* Special segments. */
128     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
129     nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
130     nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
131     nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
132 
133     /* Control registers. */
134     state->crs[NVMM_X64_CR_CR0] = env->cr[0];
135     state->crs[NVMM_X64_CR_CR2] = env->cr[2];
136     state->crs[NVMM_X64_CR_CR3] = env->cr[3];
137     state->crs[NVMM_X64_CR_CR4] = env->cr[4];
138     state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
139     state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
140 
141     /* Debug registers. */
142     state->drs[NVMM_X64_DR_DR0] = env->dr[0];
143     state->drs[NVMM_X64_DR_DR1] = env->dr[1];
144     state->drs[NVMM_X64_DR_DR2] = env->dr[2];
145     state->drs[NVMM_X64_DR_DR3] = env->dr[3];
146     state->drs[NVMM_X64_DR_DR6] = env->dr[6];
147     state->drs[NVMM_X64_DR_DR7] = env->dr[7];
148 
149     /* FPU. */
150     state->fpu.fx_cw = env->fpuc;
151     state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
152     state->fpu.fx_tw = 0;
153     for (i = 0; i < 8; i++) {
154         state->fpu.fx_tw |= (!env->fptags[i]) << i;
155     }
156     state->fpu.fx_opcode = env->fpop;
157     state->fpu.fx_ip.fa_64 = env->fpip;
158     state->fpu.fx_dp.fa_64 = env->fpdp;
159     state->fpu.fx_mxcsr = env->mxcsr;
160     state->fpu.fx_mxcsr_mask = 0x0000FFFF;
161     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
162     memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
163     for (i = 0; i < CPU_NB_REGS; i++) {
164         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
165             &env->xmm_regs[i].ZMM_Q(0), 8);
166         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
167             &env->xmm_regs[i].ZMM_Q(1), 8);
168     }
169 
170     /* MSRs. */
171     state->msrs[NVMM_X64_MSR_EFER] = env->efer;
172     state->msrs[NVMM_X64_MSR_STAR] = env->star;
173 #ifdef TARGET_X86_64
174     state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
175     state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
176     state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
177     state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
178 #endif
179     state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
180     state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
181     state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
182     state->msrs[NVMM_X64_MSR_PAT] = env->pat;
183     state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
184 
185     bitmap =
186         NVMM_X64_STATE_SEGS |
187         NVMM_X64_STATE_GPRS |
188         NVMM_X64_STATE_CRS  |
189         NVMM_X64_STATE_DRS  |
190         NVMM_X64_STATE_MSRS |
191         NVMM_X64_STATE_FPU;
192 
193     ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
194     if (ret == -1) {
195         error_report("NVMM: Failed to set virtual processor context,"
196             " error=%d", errno);
197     }
198 }
199 
200 static void
nvmm_get_segment(SegmentCache * qseg,const struct nvmm_x64_state_seg * nseg)201 nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
202 {
203     qseg->selector = nseg->selector;
204     qseg->limit = nseg->limit;
205     qseg->base = nseg->base;
206 
207     qseg->flags =
208         __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
209         __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
210         __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
211         __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
212         __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
213         __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
214         __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
215         __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
216 }
217 
218 static void
nvmm_get_registers(CPUState * cpu)219 nvmm_get_registers(CPUState *cpu)
220 {
221     CPUX86State *env = cpu_env(cpu);
222     struct nvmm_machine *mach = get_nvmm_mach();
223     AccelCPUState *qcpu = cpu->accel;
224     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
225     X86CPU *x86_cpu = X86_CPU(cpu);
226     struct nvmm_x64_state *state = vcpu->state;
227     uint64_t bitmap, tpr;
228     size_t i;
229     int ret;
230 
231     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
232 
233     bitmap =
234         NVMM_X64_STATE_SEGS |
235         NVMM_X64_STATE_GPRS |
236         NVMM_X64_STATE_CRS  |
237         NVMM_X64_STATE_DRS  |
238         NVMM_X64_STATE_MSRS |
239         NVMM_X64_STATE_FPU;
240 
241     ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
242     if (ret == -1) {
243         error_report("NVMM: Failed to get virtual processor context,"
244             " error=%d", errno);
245     }
246 
247     /* GPRs. */
248     env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
249     env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
250     env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
251     env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
252     env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
253     env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
254     env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
255     env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
256 #ifdef TARGET_X86_64
257     env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
258     env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
259     env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
260     env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
261     env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
262     env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
263     env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
264     env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
265 #endif
266 
267     /* RIP and RFLAGS. */
268     env->eip = state->gprs[NVMM_X64_GPR_RIP];
269     env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
270 
271     /* Segments. */
272     nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
273     nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
274     nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
275     nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
276     nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
277     nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
278 
279     /* Special segments. */
280     nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
281     nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
282     nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
283     nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
284 
285     /* Control registers. */
286     env->cr[0] = state->crs[NVMM_X64_CR_CR0];
287     env->cr[2] = state->crs[NVMM_X64_CR_CR2];
288     env->cr[3] = state->crs[NVMM_X64_CR_CR3];
289     env->cr[4] = state->crs[NVMM_X64_CR_CR4];
290     tpr = state->crs[NVMM_X64_CR_CR8];
291     if (tpr != qcpu->tpr) {
292         qcpu->tpr = tpr;
293         cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
294     }
295     env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
296 
297     /* Debug registers. */
298     env->dr[0] = state->drs[NVMM_X64_DR_DR0];
299     env->dr[1] = state->drs[NVMM_X64_DR_DR1];
300     env->dr[2] = state->drs[NVMM_X64_DR_DR2];
301     env->dr[3] = state->drs[NVMM_X64_DR_DR3];
302     env->dr[6] = state->drs[NVMM_X64_DR_DR6];
303     env->dr[7] = state->drs[NVMM_X64_DR_DR7];
304 
305     /* FPU. */
306     env->fpuc = state->fpu.fx_cw;
307     env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
308     env->fpus = state->fpu.fx_sw & ~0x3800;
309     for (i = 0; i < 8; i++) {
310         env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
311     }
312     env->fpop = state->fpu.fx_opcode;
313     env->fpip = state->fpu.fx_ip.fa_64;
314     env->fpdp = state->fpu.fx_dp.fa_64;
315     env->mxcsr = state->fpu.fx_mxcsr;
316     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
317     memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
318     for (i = 0; i < CPU_NB_REGS; i++) {
319         memcpy(&env->xmm_regs[i].ZMM_Q(0),
320             &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
321         memcpy(&env->xmm_regs[i].ZMM_Q(1),
322             &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
323     }
324 
325     /* MSRs. */
326     env->efer = state->msrs[NVMM_X64_MSR_EFER];
327     env->star = state->msrs[NVMM_X64_MSR_STAR];
328 #ifdef TARGET_X86_64
329     env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
330     env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
331     env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
332     env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
333 #endif
334     env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
335     env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
336     env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
337     env->pat = state->msrs[NVMM_X64_MSR_PAT];
338     env->tsc = state->msrs[NVMM_X64_MSR_TSC];
339 
340     x86_update_hflags(env);
341 }
342 
343 static bool
nvmm_can_take_int(CPUState * cpu)344 nvmm_can_take_int(CPUState *cpu)
345 {
346     AccelCPUState *qcpu = cpu->accel;
347     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
348     struct nvmm_machine *mach = get_nvmm_mach();
349 
350     if (qcpu->int_window_exit) {
351         return false;
352     }
353 
354     if (qcpu->int_shadow || !(cpu_env(cpu)->eflags & IF_MASK)) {
355         struct nvmm_x64_state *state = vcpu->state;
356 
357         /* Exit on interrupt window. */
358         nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
359         state->intr.int_window_exiting = 1;
360         nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
361 
362         return false;
363     }
364 
365     return true;
366 }
367 
368 static bool
nvmm_can_take_nmi(CPUState * cpu)369 nvmm_can_take_nmi(CPUState *cpu)
370 {
371     AccelCPUState *qcpu = cpu->accel;
372 
373     /*
374      * Contrary to INTs, NMIs always schedule an exit when they are
375      * completed. Therefore, if window-exiting is enabled, it means
376      * NMIs are blocked.
377      */
378     if (qcpu->nmi_window_exit) {
379         return false;
380     }
381 
382     return true;
383 }
384 
385 /*
386  * Called before the VCPU is run. We inject events generated by the I/O
387  * thread, and synchronize the guest TPR.
388  */
389 static void
nvmm_vcpu_pre_run(CPUState * cpu)390 nvmm_vcpu_pre_run(CPUState *cpu)
391 {
392     CPUX86State *env = cpu_env(cpu);
393     struct nvmm_machine *mach = get_nvmm_mach();
394     AccelCPUState *qcpu = cpu->accel;
395     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
396     X86CPU *x86_cpu = X86_CPU(cpu);
397     struct nvmm_x64_state *state = vcpu->state;
398     struct nvmm_vcpu_event *event = vcpu->event;
399     bool has_event = false;
400     bool sync_tpr = false;
401     uint8_t tpr;
402     int ret;
403 
404     bql_lock();
405 
406     tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
407     if (tpr != qcpu->tpr) {
408         qcpu->tpr = tpr;
409         sync_tpr = true;
410     }
411 
412     /*
413      * Force the VCPU out of its inner loop to process any INIT requests
414      * or commit pending TPR access.
415      */
416     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
417         cpu->exit_request = 1;
418     }
419 
420     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
421         if (nvmm_can_take_nmi(cpu)) {
422             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
423             event->type = NVMM_VCPU_EVENT_INTR;
424             event->vector = 2;
425             has_event = true;
426         }
427     }
428 
429     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
430         if (nvmm_can_take_int(cpu)) {
431             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
432             event->type = NVMM_VCPU_EVENT_INTR;
433             event->vector = cpu_get_pic_interrupt(env);
434             has_event = true;
435         }
436     }
437 
438     /* Don't want SMIs. */
439     if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
440         cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
441     }
442 
443     if (sync_tpr) {
444         ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
445         if (ret == -1) {
446             error_report("NVMM: Failed to get CPU state,"
447                 " error=%d", errno);
448         }
449 
450         state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
451 
452         ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
453         if (ret == -1) {
454             error_report("NVMM: Failed to set CPU state,"
455                 " error=%d", errno);
456         }
457     }
458 
459     if (has_event) {
460         ret = nvmm_vcpu_inject(mach, vcpu);
461         if (ret == -1) {
462             error_report("NVMM: Failed to inject event,"
463                 " error=%d", errno);
464         }
465     }
466 
467     bql_unlock();
468 }
469 
470 /*
471  * Called after the VCPU ran. We synchronize the host view of the TPR and
472  * RFLAGS.
473  */
474 static void
nvmm_vcpu_post_run(CPUState * cpu,struct nvmm_vcpu_exit * exit)475 nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
476 {
477     AccelCPUState *qcpu = cpu->accel;
478     X86CPU *x86_cpu = X86_CPU(cpu);
479     CPUX86State *env = &x86_cpu->env;
480     uint64_t tpr;
481 
482     env->eflags = exit->exitstate.rflags;
483     qcpu->int_shadow = exit->exitstate.int_shadow;
484     qcpu->int_window_exit = exit->exitstate.int_window_exiting;
485     qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
486 
487     tpr = exit->exitstate.cr8;
488     if (qcpu->tpr != tpr) {
489         qcpu->tpr = tpr;
490         bql_lock();
491         cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
492         bql_unlock();
493     }
494 }
495 
496 /* -------------------------------------------------------------------------- */
497 
498 static void
nvmm_io_callback(struct nvmm_io * io)499 nvmm_io_callback(struct nvmm_io *io)
500 {
501     MemTxAttrs attrs = { 0 };
502     int ret;
503 
504     ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
505         io->size, !io->in);
506     if (ret != MEMTX_OK) {
507         error_report("NVMM: I/O Transaction Failed "
508             "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
509             io->port, io->size);
510     }
511 
512     /* Needed, otherwise infinite loop. */
513     current_cpu->vcpu_dirty = false;
514 }
515 
516 static void
nvmm_mem_callback(struct nvmm_mem * mem)517 nvmm_mem_callback(struct nvmm_mem *mem)
518 {
519     cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
520 
521     /* Needed, otherwise infinite loop. */
522     current_cpu->vcpu_dirty = false;
523 }
524 
525 static struct nvmm_assist_callbacks nvmm_callbacks = {
526     .io = nvmm_io_callback,
527     .mem = nvmm_mem_callback
528 };
529 
530 /* -------------------------------------------------------------------------- */
531 
532 static int
nvmm_handle_mem(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)533 nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
534 {
535     int ret;
536 
537     ret = nvmm_assist_mem(mach, vcpu);
538     if (ret == -1) {
539         error_report("NVMM: Mem Assist Failed [gpa=%p]",
540             (void *)vcpu->exit->u.mem.gpa);
541     }
542 
543     return ret;
544 }
545 
546 static int
nvmm_handle_io(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)547 nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
548 {
549     int ret;
550 
551     ret = nvmm_assist_io(mach, vcpu);
552     if (ret == -1) {
553         error_report("NVMM: I/O Assist Failed [port=%d]",
554             (int)vcpu->exit->u.io.port);
555     }
556 
557     return ret;
558 }
559 
560 static int
nvmm_handle_rdmsr(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)561 nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
562     struct nvmm_vcpu_exit *exit)
563 {
564     AccelCPUState *qcpu = cpu->accel;
565     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
566     X86CPU *x86_cpu = X86_CPU(cpu);
567     struct nvmm_x64_state *state = vcpu->state;
568     uint64_t val;
569     int ret;
570 
571     switch (exit->u.rdmsr.msr) {
572     case MSR_IA32_APICBASE:
573         val = cpu_get_apic_base(x86_cpu->apic_state);
574         break;
575     case MSR_MTRRcap:
576     case MSR_MTRRdefType:
577     case MSR_MCG_CAP:
578     case MSR_MCG_STATUS:
579         val = 0;
580         break;
581     default: /* More MSRs to add? */
582         val = 0;
583         error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
584             exit->u.rdmsr.msr);
585         break;
586     }
587 
588     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
589     if (ret == -1) {
590         return -1;
591     }
592 
593     state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
594     state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
595     state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
596 
597     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
598     if (ret == -1) {
599         return -1;
600     }
601 
602     return 0;
603 }
604 
605 static int
nvmm_handle_wrmsr(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)606 nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
607     struct nvmm_vcpu_exit *exit)
608 {
609     AccelCPUState *qcpu = cpu->accel;
610     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
611     X86CPU *x86_cpu = X86_CPU(cpu);
612     struct nvmm_x64_state *state = vcpu->state;
613     uint64_t val;
614     int ret;
615 
616     val = exit->u.wrmsr.val;
617 
618     switch (exit->u.wrmsr.msr) {
619     case MSR_IA32_APICBASE:
620         cpu_set_apic_base(x86_cpu->apic_state, val);
621         break;
622     case MSR_MTRRdefType:
623     case MSR_MCG_STATUS:
624         break;
625     default: /* More MSRs to add? */
626         error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
627             exit->u.wrmsr.msr, val);
628         break;
629     }
630 
631     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
632     if (ret == -1) {
633         return -1;
634     }
635 
636     state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
637 
638     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
639     if (ret == -1) {
640         return -1;
641     }
642 
643     return 0;
644 }
645 
646 static int
nvmm_handle_halted(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)647 nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
648     struct nvmm_vcpu_exit *exit)
649 {
650     int ret = 0;
651 
652     bql_lock();
653 
654     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
655           (cpu_env(cpu)->eflags & IF_MASK)) &&
656         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
657         cpu->exception_index = EXCP_HLT;
658         cpu->halted = true;
659         ret = 1;
660     }
661 
662     bql_unlock();
663 
664     return ret;
665 }
666 
667 static int
nvmm_inject_ud(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)668 nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
669 {
670     struct nvmm_vcpu_event *event = vcpu->event;
671 
672     event->type = NVMM_VCPU_EVENT_EXCP;
673     event->vector = 6;
674     event->u.excp.error = 0;
675 
676     return nvmm_vcpu_inject(mach, vcpu);
677 }
678 
679 static int
nvmm_vcpu_loop(CPUState * cpu)680 nvmm_vcpu_loop(CPUState *cpu)
681 {
682     struct nvmm_machine *mach = get_nvmm_mach();
683     AccelCPUState *qcpu = cpu->accel;
684     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
685     X86CPU *x86_cpu = X86_CPU(cpu);
686     CPUX86State *env = &x86_cpu->env;
687     struct nvmm_vcpu_exit *exit = vcpu->exit;
688     int ret;
689 
690     /*
691      * Some asynchronous events must be handled outside of the inner
692      * VCPU loop. They are handled here.
693      */
694     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
695         nvmm_cpu_synchronize_state(cpu);
696         do_cpu_init(x86_cpu);
697         /* set int/nmi windows back to the reset state */
698     }
699     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
700         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
701         apic_poll_irq(x86_cpu->apic_state);
702     }
703     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
704          (env->eflags & IF_MASK)) ||
705         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
706         cpu->halted = false;
707     }
708     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
709         cpu_reset_interrupt(cpu, CPU_INTERRUPT_SIPI);
710         nvmm_cpu_synchronize_state(cpu);
711         do_cpu_sipi(x86_cpu);
712     }
713     if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
714         cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
715         nvmm_cpu_synchronize_state(cpu);
716         apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
717             env->tpr_access_type);
718     }
719 
720     if (cpu->halted) {
721         cpu->exception_index = EXCP_HLT;
722         qatomic_set(&cpu->exit_request, false);
723         return 0;
724     }
725 
726     bql_unlock();
727     cpu_exec_start(cpu);
728 
729     /*
730      * Inner VCPU loop.
731      */
732     do {
733         if (cpu->vcpu_dirty) {
734             nvmm_set_registers(cpu);
735             cpu->vcpu_dirty = false;
736         }
737 
738         if (qcpu->stop) {
739             cpu->exception_index = EXCP_INTERRUPT;
740             qcpu->stop = false;
741             ret = 1;
742             break;
743         }
744 
745         nvmm_vcpu_pre_run(cpu);
746 
747         if (qatomic_read(&cpu->exit_request)) {
748 #if NVMM_USER_VERSION >= 2
749             nvmm_vcpu_stop(vcpu);
750 #else
751             qemu_cpu_kick_self();
752 #endif
753         }
754 
755         /* Read exit_request before the kernel reads the immediate exit flag */
756         smp_rmb();
757         ret = nvmm_vcpu_run(mach, vcpu);
758         if (ret == -1) {
759             error_report("NVMM: Failed to exec a virtual processor,"
760                 " error=%d", errno);
761             break;
762         }
763 
764         nvmm_vcpu_post_run(cpu, exit);
765 
766         switch (exit->reason) {
767         case NVMM_VCPU_EXIT_NONE:
768             break;
769 #if NVMM_USER_VERSION >= 2
770         case NVMM_VCPU_EXIT_STOPPED:
771             /*
772              * The kernel cleared the immediate exit flag; cpu->exit_request
773              * must be cleared after
774              */
775             smp_wmb();
776             qcpu->stop = true;
777             break;
778 #endif
779         case NVMM_VCPU_EXIT_MEMORY:
780             ret = nvmm_handle_mem(mach, vcpu);
781             break;
782         case NVMM_VCPU_EXIT_IO:
783             ret = nvmm_handle_io(mach, vcpu);
784             break;
785         case NVMM_VCPU_EXIT_INT_READY:
786         case NVMM_VCPU_EXIT_NMI_READY:
787         case NVMM_VCPU_EXIT_TPR_CHANGED:
788             break;
789         case NVMM_VCPU_EXIT_HALTED:
790             ret = nvmm_handle_halted(mach, cpu, exit);
791             break;
792         case NVMM_VCPU_EXIT_SHUTDOWN:
793             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
794             cpu->exception_index = EXCP_INTERRUPT;
795             ret = 1;
796             break;
797         case NVMM_VCPU_EXIT_RDMSR:
798             ret = nvmm_handle_rdmsr(mach, cpu, exit);
799             break;
800         case NVMM_VCPU_EXIT_WRMSR:
801             ret = nvmm_handle_wrmsr(mach, cpu, exit);
802             break;
803         case NVMM_VCPU_EXIT_MONITOR:
804         case NVMM_VCPU_EXIT_MWAIT:
805             ret = nvmm_inject_ud(mach, vcpu);
806             break;
807         default:
808             error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
809                 exit->reason, exit->u.inv.hwcode);
810             nvmm_get_registers(cpu);
811             bql_lock();
812             qemu_system_guest_panicked(cpu_get_crash_info(cpu));
813             bql_unlock();
814             ret = -1;
815             break;
816         }
817     } while (ret == 0);
818 
819     cpu_exec_end(cpu);
820     bql_lock();
821 
822     qatomic_set(&cpu->exit_request, false);
823 
824     return ret < 0;
825 }
826 
827 /* -------------------------------------------------------------------------- */
828 
829 static void
do_nvmm_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)830 do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
831 {
832     nvmm_get_registers(cpu);
833     cpu->vcpu_dirty = true;
834 }
835 
836 static void
do_nvmm_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)837 do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
838 {
839     nvmm_set_registers(cpu);
840     cpu->vcpu_dirty = false;
841 }
842 
843 static void
do_nvmm_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)844 do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
845 {
846     nvmm_set_registers(cpu);
847     cpu->vcpu_dirty = false;
848 }
849 
850 static void
do_nvmm_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)851 do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
852 {
853     cpu->vcpu_dirty = true;
854 }
855 
nvmm_cpu_synchronize_state(CPUState * cpu)856 void nvmm_cpu_synchronize_state(CPUState *cpu)
857 {
858     if (!cpu->vcpu_dirty) {
859         run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
860     }
861 }
862 
nvmm_cpu_synchronize_post_reset(CPUState * cpu)863 void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
864 {
865     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
866 }
867 
nvmm_cpu_synchronize_post_init(CPUState * cpu)868 void nvmm_cpu_synchronize_post_init(CPUState *cpu)
869 {
870     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
871 }
872 
nvmm_cpu_synchronize_pre_loadvm(CPUState * cpu)873 void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
874 {
875     run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
876 }
877 
878 /* -------------------------------------------------------------------------- */
879 
880 static Error *nvmm_migration_blocker;
881 
882 /*
883  * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
884  * and another thread signaling the vCPU thread to exit.
885  */
886 
887 static void
nvmm_ipi_signal(int sigcpu)888 nvmm_ipi_signal(int sigcpu)
889 {
890     if (current_cpu) {
891         AccelCPUState *qcpu = current_cpu->accel;
892 #if NVMM_USER_VERSION >= 2
893         struct nvmm_vcpu *vcpu = &qcpu->vcpu;
894         nvmm_vcpu_stop(vcpu);
895 #else
896         qcpu->stop = true;
897 #endif
898     }
899 }
900 
901 static void
nvmm_init_cpu_signals(void)902 nvmm_init_cpu_signals(void)
903 {
904     struct sigaction sigact;
905     sigset_t set;
906 
907     /* Install the IPI handler. */
908     memset(&sigact, 0, sizeof(sigact));
909     sigact.sa_handler = nvmm_ipi_signal;
910     sigaction(SIG_IPI, &sigact, NULL);
911 
912     /* Allow IPIs on the current thread. */
913     sigprocmask(SIG_BLOCK, NULL, &set);
914     sigdelset(&set, SIG_IPI);
915     pthread_sigmask(SIG_SETMASK, &set, NULL);
916 }
917 
918 int
nvmm_init_vcpu(CPUState * cpu)919 nvmm_init_vcpu(CPUState *cpu)
920 {
921     struct nvmm_machine *mach = get_nvmm_mach();
922     struct nvmm_vcpu_conf_cpuid cpuid;
923     struct nvmm_vcpu_conf_tpr tpr;
924     Error *local_error = NULL;
925     AccelCPUState *qcpu;
926     int ret, err;
927 
928     nvmm_init_cpu_signals();
929 
930     if (nvmm_migration_blocker == NULL) {
931         error_setg(&nvmm_migration_blocker,
932             "NVMM: Migration not supported");
933 
934         if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) {
935             error_report_err(local_error);
936             return -EINVAL;
937         }
938     }
939 
940     qcpu = g_new0(AccelCPUState, 1);
941 
942     ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
943     if (ret == -1) {
944         err = errno;
945         error_report("NVMM: Failed to create a virtual processor,"
946             " error=%d", err);
947         g_free(qcpu);
948         return -err;
949     }
950 
951     memset(&cpuid, 0, sizeof(cpuid));
952     cpuid.mask = 1;
953     cpuid.leaf = 0x00000001;
954     cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
955     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
956         &cpuid);
957     if (ret == -1) {
958         err = errno;
959         error_report("NVMM: Failed to configure a virtual processor,"
960             " error=%d", err);
961         g_free(qcpu);
962         return -err;
963     }
964 
965     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
966         &nvmm_callbacks);
967     if (ret == -1) {
968         err = errno;
969         error_report("NVMM: Failed to configure a virtual processor,"
970             " error=%d", err);
971         g_free(qcpu);
972         return -err;
973     }
974 
975     if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
976         memset(&tpr, 0, sizeof(tpr));
977         tpr.exit_changed = 1;
978         ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
979         if (ret == -1) {
980             err = errno;
981             error_report("NVMM: Failed to configure a virtual processor,"
982                 " error=%d", err);
983             g_free(qcpu);
984             return -err;
985         }
986     }
987 
988     qcpu->vcpu_dirty = true;
989     cpu->accel = qcpu;
990 
991     return 0;
992 }
993 
994 int
nvmm_vcpu_exec(CPUState * cpu)995 nvmm_vcpu_exec(CPUState *cpu)
996 {
997     int ret, fatal;
998 
999     while (1) {
1000         if (cpu->exception_index >= EXCP_INTERRUPT) {
1001             ret = cpu->exception_index;
1002             cpu->exception_index = -1;
1003             break;
1004         }
1005 
1006         fatal = nvmm_vcpu_loop(cpu);
1007 
1008         if (fatal) {
1009             error_report("NVMM: Failed to execute a VCPU.");
1010             abort();
1011         }
1012     }
1013 
1014     return ret;
1015 }
1016 
1017 void
nvmm_destroy_vcpu(CPUState * cpu)1018 nvmm_destroy_vcpu(CPUState *cpu)
1019 {
1020     struct nvmm_machine *mach = get_nvmm_mach();
1021     AccelCPUState *qcpu = cpu->accel;
1022 
1023     nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1024     g_free(cpu->accel);
1025 }
1026 
1027 /* -------------------------------------------------------------------------- */
1028 
1029 static void
nvmm_update_mapping(hwaddr start_pa,ram_addr_t size,uintptr_t hva,bool add,bool rom,const char * name)1030 nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1031     bool add, bool rom, const char *name)
1032 {
1033     struct nvmm_machine *mach = get_nvmm_mach();
1034     int ret, prot;
1035 
1036     if (add) {
1037         prot = PROT_READ | PROT_EXEC;
1038         if (!rom) {
1039             prot |= PROT_WRITE;
1040         }
1041         ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1042     } else {
1043         ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1044     }
1045 
1046     if (ret == -1) {
1047         error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1048             "Size:%p bytes, HostVA:%p, error=%d",
1049             (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1050             (void *)size, (void *)hva, errno);
1051     }
1052 }
1053 
1054 static void
nvmm_process_section(MemoryRegionSection * section,int add)1055 nvmm_process_section(MemoryRegionSection *section, int add)
1056 {
1057     MemoryRegion *mr = section->mr;
1058     hwaddr start_pa = section->offset_within_address_space;
1059     ram_addr_t size = int128_get64(section->size);
1060     unsigned int delta;
1061     uintptr_t hva;
1062 
1063     if (!memory_region_is_ram(mr)) {
1064         return;
1065     }
1066 
1067     /* Adjust start_pa and size so that they are page-aligned. */
1068     delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
1069     delta &= ~qemu_real_host_page_mask();
1070     if (delta > size) {
1071         return;
1072     }
1073     start_pa += delta;
1074     size -= delta;
1075     size &= qemu_real_host_page_mask();
1076     if (!size || (start_pa & ~qemu_real_host_page_mask())) {
1077         return;
1078     }
1079 
1080     hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1081         section->offset_within_region + delta;
1082 
1083     nvmm_update_mapping(start_pa, size, hva, add,
1084         memory_region_is_rom(mr), mr->name);
1085 }
1086 
1087 static void
nvmm_region_add(MemoryListener * listener,MemoryRegionSection * section)1088 nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1089 {
1090     memory_region_ref(section->mr);
1091     nvmm_process_section(section, 1);
1092 }
1093 
1094 static void
nvmm_region_del(MemoryListener * listener,MemoryRegionSection * section)1095 nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1096 {
1097     nvmm_process_section(section, 0);
1098     memory_region_unref(section->mr);
1099 }
1100 
1101 static void
nvmm_transaction_begin(MemoryListener * listener)1102 nvmm_transaction_begin(MemoryListener *listener)
1103 {
1104     /* nothing */
1105 }
1106 
1107 static void
nvmm_transaction_commit(MemoryListener * listener)1108 nvmm_transaction_commit(MemoryListener *listener)
1109 {
1110     /* nothing */
1111 }
1112 
1113 static void
nvmm_log_sync(MemoryListener * listener,MemoryRegionSection * section)1114 nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1115 {
1116     MemoryRegion *mr = section->mr;
1117 
1118     if (!memory_region_is_ram(mr)) {
1119         return;
1120     }
1121 
1122     memory_region_set_dirty(mr, 0, int128_get64(section->size));
1123 }
1124 
1125 static MemoryListener nvmm_memory_listener = {
1126     .name = "nvmm",
1127     .begin = nvmm_transaction_begin,
1128     .commit = nvmm_transaction_commit,
1129     .region_add = nvmm_region_add,
1130     .region_del = nvmm_region_del,
1131     .log_sync = nvmm_log_sync,
1132     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
1133 };
1134 
1135 static void
nvmm_ram_block_added(RAMBlockNotifier * n,void * host,size_t size,size_t max_size)1136 nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1137                      size_t max_size)
1138 {
1139     struct nvmm_machine *mach = get_nvmm_mach();
1140     uintptr_t hva = (uintptr_t)host;
1141     int ret;
1142 
1143     ret = nvmm_hva_map(mach, hva, max_size);
1144 
1145     if (ret == -1) {
1146         error_report("NVMM: Failed to map HVA, HostVA:%p "
1147             "Size:%p bytes, error=%d",
1148             (void *)hva, (void *)size, errno);
1149     }
1150 }
1151 
1152 static struct RAMBlockNotifier nvmm_ram_notifier = {
1153     .ram_block_added = nvmm_ram_block_added
1154 };
1155 
1156 /* -------------------------------------------------------------------------- */
1157 
1158 static int
nvmm_accel_init(AccelState * as,MachineState * ms)1159 nvmm_accel_init(AccelState *as, MachineState *ms)
1160 {
1161     int ret, err;
1162 
1163     ret = nvmm_init();
1164     if (ret == -1) {
1165         err = errno;
1166         error_report("NVMM: Initialization failed, error=%d", errno);
1167         return -err;
1168     }
1169 
1170     ret = nvmm_capability(&qemu_mach.cap);
1171     if (ret == -1) {
1172         err = errno;
1173         error_report("NVMM: Unable to fetch capability, error=%d", errno);
1174         return -err;
1175     }
1176     if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1177         error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1178         return -EPROGMISMATCH;
1179     }
1180     if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1181         error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1182         return -EPROGMISMATCH;
1183     }
1184 
1185     ret = nvmm_machine_create(&qemu_mach.mach);
1186     if (ret == -1) {
1187         err = errno;
1188         error_report("NVMM: Machine creation failed, error=%d", errno);
1189         return -err;
1190     }
1191 
1192     memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1193     ram_block_notifier_add(&nvmm_ram_notifier);
1194 
1195     printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1196     return 0;
1197 }
1198 
1199 static void
nvmm_accel_class_init(ObjectClass * oc,const void * data)1200 nvmm_accel_class_init(ObjectClass *oc, const void *data)
1201 {
1202     AccelClass *ac = ACCEL_CLASS(oc);
1203     ac->name = "NVMM";
1204     ac->init_machine = nvmm_accel_init;
1205     ac->allowed = &nvmm_allowed;
1206 }
1207 
1208 static const TypeInfo nvmm_accel_type = {
1209     .name = ACCEL_CLASS_NAME("nvmm"),
1210     .parent = TYPE_ACCEL,
1211     .class_init = nvmm_accel_class_init,
1212 };
1213 
nvmm_cpu_instance_init(CPUState * cs)1214 static void nvmm_cpu_instance_init(CPUState *cs)
1215 {
1216     X86CPU *cpu = X86_CPU(cs);
1217 
1218     host_cpu_instance_init(cpu);
1219 }
1220 
nvmm_cpu_accel_class_init(ObjectClass * oc,const void * data)1221 static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data)
1222 {
1223     AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
1224 
1225     acc->cpu_instance_init = nvmm_cpu_instance_init;
1226 }
1227 
1228 static const TypeInfo nvmm_cpu_accel_type = {
1229     .name = ACCEL_CPU_NAME("nvmm"),
1230 
1231     .parent = TYPE_ACCEL_CPU,
1232     .class_init = nvmm_cpu_accel_class_init,
1233     .abstract = true,
1234 };
1235 
1236 static void
nvmm_type_init(void)1237 nvmm_type_init(void)
1238 {
1239     type_register_static(&nvmm_accel_type);
1240     type_register_static(&nvmm_cpu_accel_type);
1241 }
1242 
1243 type_init(nvmm_type_init);
1244