xref: /openbmc/qemu/target/i386/nvmm/nvmm-all.c (revision f96b157ebb93f94cd56ebbc99bc20982b8fd86ef)
1 /*
2  * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
3  *
4  * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "cpu.h"
12 #include "system/address-spaces.h"
13 #include "system/ioport.h"
14 #include "qemu/accel.h"
15 #include "accel/accel-ops.h"
16 #include "system/nvmm.h"
17 #include "system/cpus.h"
18 #include "system/runstate.h"
19 #include "qemu/main-loop.h"
20 #include "qemu/error-report.h"
21 #include "qapi/error.h"
22 #include "qemu/queue.h"
23 #include "accel/accel-cpu-target.h"
24 #include "host-cpu.h"
25 #include "migration/blocker.h"
26 #include "strings.h"
27 
28 #include "nvmm-accel-ops.h"
29 
30 #include <nvmm.h>
31 
32 struct AccelCPUState {
33     struct nvmm_vcpu vcpu;
34     uint8_t tpr;
35     bool stop;
36 
37     /* Window-exiting for INTs/NMIs. */
38     bool int_window_exit;
39     bool nmi_window_exit;
40 
41     /* The guest is in an interrupt shadow (POP SS, etc). */
42     bool int_shadow;
43 };
44 
45 struct qemu_machine {
46     struct nvmm_capability cap;
47     struct nvmm_machine mach;
48 };
49 
50 /* -------------------------------------------------------------------------- */
51 
52 bool nvmm_allowed;
53 static struct qemu_machine qemu_mach;
54 
55 static struct nvmm_machine *
get_nvmm_mach(void)56 get_nvmm_mach(void)
57 {
58     return &qemu_mach.mach;
59 }
60 
61 /* -------------------------------------------------------------------------- */
62 
63 static void
nvmm_set_segment(struct nvmm_x64_state_seg * nseg,const SegmentCache * qseg)64 nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
65 {
66     uint32_t attrib = qseg->flags;
67 
68     nseg->selector = qseg->selector;
69     nseg->limit = qseg->limit;
70     nseg->base = qseg->base;
71     nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
72     nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
73     nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
74     nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
75     nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
76     nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
77     nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
78     nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
79 }
80 
81 static void
nvmm_set_registers(CPUState * cpu)82 nvmm_set_registers(CPUState *cpu)
83 {
84     CPUX86State *env = cpu_env(cpu);
85     struct nvmm_machine *mach = get_nvmm_mach();
86     AccelCPUState *qcpu = cpu->accel;
87     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
88     struct nvmm_x64_state *state = vcpu->state;
89     uint64_t bitmap;
90     size_t i;
91     int ret;
92 
93     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
94 
95     /* GPRs. */
96     state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
97     state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
98     state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
99     state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
100     state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
101     state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
102     state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
103     state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
104 #ifdef TARGET_X86_64
105     state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
106     state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
107     state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
108     state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
109     state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
110     state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
111     state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
112     state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
113 #endif
114 
115     /* RIP and RFLAGS. */
116     state->gprs[NVMM_X64_GPR_RIP] = env->eip;
117     state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
118 
119     /* Segments. */
120     nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
121     nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
122     nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
123     nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
124     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
125     nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
126 
127     /* Special segments. */
128     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
129     nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
130     nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
131     nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
132 
133     /* Control registers. */
134     state->crs[NVMM_X64_CR_CR0] = env->cr[0];
135     state->crs[NVMM_X64_CR_CR2] = env->cr[2];
136     state->crs[NVMM_X64_CR_CR3] = env->cr[3];
137     state->crs[NVMM_X64_CR_CR4] = env->cr[4];
138     state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
139     state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
140 
141     /* Debug registers. */
142     state->drs[NVMM_X64_DR_DR0] = env->dr[0];
143     state->drs[NVMM_X64_DR_DR1] = env->dr[1];
144     state->drs[NVMM_X64_DR_DR2] = env->dr[2];
145     state->drs[NVMM_X64_DR_DR3] = env->dr[3];
146     state->drs[NVMM_X64_DR_DR6] = env->dr[6];
147     state->drs[NVMM_X64_DR_DR7] = env->dr[7];
148 
149     /* FPU. */
150     state->fpu.fx_cw = env->fpuc;
151     state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
152     state->fpu.fx_tw = 0;
153     for (i = 0; i < 8; i++) {
154         state->fpu.fx_tw |= (!env->fptags[i]) << i;
155     }
156     state->fpu.fx_opcode = env->fpop;
157     state->fpu.fx_ip.fa_64 = env->fpip;
158     state->fpu.fx_dp.fa_64 = env->fpdp;
159     state->fpu.fx_mxcsr = env->mxcsr;
160     state->fpu.fx_mxcsr_mask = 0x0000FFFF;
161     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
162     memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
163     for (i = 0; i < CPU_NB_REGS; i++) {
164         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
165             &env->xmm_regs[i].ZMM_Q(0), 8);
166         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
167             &env->xmm_regs[i].ZMM_Q(1), 8);
168     }
169 
170     /* MSRs. */
171     state->msrs[NVMM_X64_MSR_EFER] = env->efer;
172     state->msrs[NVMM_X64_MSR_STAR] = env->star;
173 #ifdef TARGET_X86_64
174     state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
175     state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
176     state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
177     state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
178 #endif
179     state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
180     state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
181     state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
182     state->msrs[NVMM_X64_MSR_PAT] = env->pat;
183     state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
184 
185     bitmap =
186         NVMM_X64_STATE_SEGS |
187         NVMM_X64_STATE_GPRS |
188         NVMM_X64_STATE_CRS  |
189         NVMM_X64_STATE_DRS  |
190         NVMM_X64_STATE_MSRS |
191         NVMM_X64_STATE_FPU;
192 
193     ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
194     if (ret == -1) {
195         error_report("NVMM: Failed to set virtual processor context,"
196             " error=%d", errno);
197     }
198 }
199 
200 static void
nvmm_get_segment(SegmentCache * qseg,const struct nvmm_x64_state_seg * nseg)201 nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
202 {
203     qseg->selector = nseg->selector;
204     qseg->limit = nseg->limit;
205     qseg->base = nseg->base;
206 
207     qseg->flags =
208         __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
209         __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
210         __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
211         __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
212         __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
213         __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
214         __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
215         __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
216 }
217 
218 static void
nvmm_get_registers(CPUState * cpu)219 nvmm_get_registers(CPUState *cpu)
220 {
221     CPUX86State *env = cpu_env(cpu);
222     struct nvmm_machine *mach = get_nvmm_mach();
223     AccelCPUState *qcpu = cpu->accel;
224     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
225     X86CPU *x86_cpu = X86_CPU(cpu);
226     struct nvmm_x64_state *state = vcpu->state;
227     uint64_t bitmap, tpr;
228     size_t i;
229     int ret;
230 
231     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
232 
233     bitmap =
234         NVMM_X64_STATE_SEGS |
235         NVMM_X64_STATE_GPRS |
236         NVMM_X64_STATE_CRS  |
237         NVMM_X64_STATE_DRS  |
238         NVMM_X64_STATE_MSRS |
239         NVMM_X64_STATE_FPU;
240 
241     ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
242     if (ret == -1) {
243         error_report("NVMM: Failed to get virtual processor context,"
244             " error=%d", errno);
245     }
246 
247     /* GPRs. */
248     env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
249     env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
250     env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
251     env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
252     env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
253     env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
254     env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
255     env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
256 #ifdef TARGET_X86_64
257     env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
258     env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
259     env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
260     env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
261     env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
262     env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
263     env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
264     env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
265 #endif
266 
267     /* RIP and RFLAGS. */
268     env->eip = state->gprs[NVMM_X64_GPR_RIP];
269     env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
270 
271     /* Segments. */
272     nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
273     nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
274     nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
275     nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
276     nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
277     nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
278 
279     /* Special segments. */
280     nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
281     nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
282     nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
283     nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
284 
285     /* Control registers. */
286     env->cr[0] = state->crs[NVMM_X64_CR_CR0];
287     env->cr[2] = state->crs[NVMM_X64_CR_CR2];
288     env->cr[3] = state->crs[NVMM_X64_CR_CR3];
289     env->cr[4] = state->crs[NVMM_X64_CR_CR4];
290     tpr = state->crs[NVMM_X64_CR_CR8];
291     if (tpr != qcpu->tpr) {
292         qcpu->tpr = tpr;
293         cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
294     }
295     env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
296 
297     /* Debug registers. */
298     env->dr[0] = state->drs[NVMM_X64_DR_DR0];
299     env->dr[1] = state->drs[NVMM_X64_DR_DR1];
300     env->dr[2] = state->drs[NVMM_X64_DR_DR2];
301     env->dr[3] = state->drs[NVMM_X64_DR_DR3];
302     env->dr[6] = state->drs[NVMM_X64_DR_DR6];
303     env->dr[7] = state->drs[NVMM_X64_DR_DR7];
304 
305     /* FPU. */
306     env->fpuc = state->fpu.fx_cw;
307     env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
308     env->fpus = state->fpu.fx_sw & ~0x3800;
309     for (i = 0; i < 8; i++) {
310         env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
311     }
312     env->fpop = state->fpu.fx_opcode;
313     env->fpip = state->fpu.fx_ip.fa_64;
314     env->fpdp = state->fpu.fx_dp.fa_64;
315     env->mxcsr = state->fpu.fx_mxcsr;
316     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
317     memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
318     for (i = 0; i < CPU_NB_REGS; i++) {
319         memcpy(&env->xmm_regs[i].ZMM_Q(0),
320             &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
321         memcpy(&env->xmm_regs[i].ZMM_Q(1),
322             &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
323     }
324 
325     /* MSRs. */
326     env->efer = state->msrs[NVMM_X64_MSR_EFER];
327     env->star = state->msrs[NVMM_X64_MSR_STAR];
328 #ifdef TARGET_X86_64
329     env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
330     env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
331     env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
332     env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
333 #endif
334     env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
335     env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
336     env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
337     env->pat = state->msrs[NVMM_X64_MSR_PAT];
338     env->tsc = state->msrs[NVMM_X64_MSR_TSC];
339 
340     x86_update_hflags(env);
341 }
342 
343 static bool
nvmm_can_take_int(CPUState * cpu)344 nvmm_can_take_int(CPUState *cpu)
345 {
346     AccelCPUState *qcpu = cpu->accel;
347     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
348     struct nvmm_machine *mach = get_nvmm_mach();
349 
350     if (qcpu->int_window_exit) {
351         return false;
352     }
353 
354     if (qcpu->int_shadow || !(cpu_env(cpu)->eflags & IF_MASK)) {
355         struct nvmm_x64_state *state = vcpu->state;
356 
357         /* Exit on interrupt window. */
358         nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
359         state->intr.int_window_exiting = 1;
360         nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
361 
362         return false;
363     }
364 
365     return true;
366 }
367 
368 static bool
nvmm_can_take_nmi(CPUState * cpu)369 nvmm_can_take_nmi(CPUState *cpu)
370 {
371     AccelCPUState *qcpu = cpu->accel;
372 
373     /*
374      * Contrary to INTs, NMIs always schedule an exit when they are
375      * completed. Therefore, if window-exiting is enabled, it means
376      * NMIs are blocked.
377      */
378     if (qcpu->nmi_window_exit) {
379         return false;
380     }
381 
382     return true;
383 }
384 
385 /*
386  * Called before the VCPU is run. We inject events generated by the I/O
387  * thread, and synchronize the guest TPR.
388  */
389 static void
nvmm_vcpu_pre_run(CPUState * cpu)390 nvmm_vcpu_pre_run(CPUState *cpu)
391 {
392     CPUX86State *env = cpu_env(cpu);
393     struct nvmm_machine *mach = get_nvmm_mach();
394     AccelCPUState *qcpu = cpu->accel;
395     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
396     X86CPU *x86_cpu = X86_CPU(cpu);
397     struct nvmm_x64_state *state = vcpu->state;
398     struct nvmm_vcpu_event *event = vcpu->event;
399     bool has_event = false;
400     bool sync_tpr = false;
401     uint8_t tpr;
402     int ret;
403 
404     bql_lock();
405 
406     tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
407     if (tpr != qcpu->tpr) {
408         qcpu->tpr = tpr;
409         sync_tpr = true;
410     }
411 
412     /*
413      * Force the VCPU out of its inner loop to process any INIT requests
414      * or commit pending TPR access.
415      */
416     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
417         cpu->exit_request = 1;
418     }
419 
420     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
421         if (nvmm_can_take_nmi(cpu)) {
422             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
423             event->type = NVMM_VCPU_EVENT_INTR;
424             event->vector = 2;
425             has_event = true;
426         }
427     }
428 
429     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
430         if (nvmm_can_take_int(cpu)) {
431             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
432             event->type = NVMM_VCPU_EVENT_INTR;
433             event->vector = cpu_get_pic_interrupt(env);
434             has_event = true;
435         }
436     }
437 
438     /* Don't want SMIs. */
439     if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
440         cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
441     }
442 
443     if (sync_tpr) {
444         ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
445         if (ret == -1) {
446             error_report("NVMM: Failed to get CPU state,"
447                 " error=%d", errno);
448         }
449 
450         state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
451 
452         ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
453         if (ret == -1) {
454             error_report("NVMM: Failed to set CPU state,"
455                 " error=%d", errno);
456         }
457     }
458 
459     if (has_event) {
460         ret = nvmm_vcpu_inject(mach, vcpu);
461         if (ret == -1) {
462             error_report("NVMM: Failed to inject event,"
463                 " error=%d", errno);
464         }
465     }
466 
467     bql_unlock();
468 }
469 
470 /*
471  * Called after the VCPU ran. We synchronize the host view of the TPR and
472  * RFLAGS.
473  */
474 static void
nvmm_vcpu_post_run(CPUState * cpu,struct nvmm_vcpu_exit * exit)475 nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
476 {
477     AccelCPUState *qcpu = cpu->accel;
478     X86CPU *x86_cpu = X86_CPU(cpu);
479     CPUX86State *env = &x86_cpu->env;
480     uint64_t tpr;
481 
482     env->eflags = exit->exitstate.rflags;
483     qcpu->int_shadow = exit->exitstate.int_shadow;
484     qcpu->int_window_exit = exit->exitstate.int_window_exiting;
485     qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
486 
487     tpr = exit->exitstate.cr8;
488     if (qcpu->tpr != tpr) {
489         qcpu->tpr = tpr;
490         bql_lock();
491         cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
492         bql_unlock();
493     }
494 }
495 
496 /* -------------------------------------------------------------------------- */
497 
498 static void
nvmm_io_callback(struct nvmm_io * io)499 nvmm_io_callback(struct nvmm_io *io)
500 {
501     MemTxAttrs attrs = { 0 };
502     int ret;
503 
504     ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
505         io->size, !io->in);
506     if (ret != MEMTX_OK) {
507         error_report("NVMM: I/O Transaction Failed "
508             "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
509             io->port, io->size);
510     }
511 
512     /* Needed, otherwise infinite loop. */
513     current_cpu->vcpu_dirty = false;
514 }
515 
516 static void
nvmm_mem_callback(struct nvmm_mem * mem)517 nvmm_mem_callback(struct nvmm_mem *mem)
518 {
519     cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
520 
521     /* Needed, otherwise infinite loop. */
522     current_cpu->vcpu_dirty = false;
523 }
524 
525 static struct nvmm_assist_callbacks nvmm_callbacks = {
526     .io = nvmm_io_callback,
527     .mem = nvmm_mem_callback
528 };
529 
530 /* -------------------------------------------------------------------------- */
531 
532 static int
nvmm_handle_mem(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)533 nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
534 {
535     int ret;
536 
537     ret = nvmm_assist_mem(mach, vcpu);
538     if (ret == -1) {
539         error_report("NVMM: Mem Assist Failed [gpa=%p]",
540             (void *)vcpu->exit->u.mem.gpa);
541     }
542 
543     return ret;
544 }
545 
546 static int
nvmm_handle_io(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)547 nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
548 {
549     int ret;
550 
551     ret = nvmm_assist_io(mach, vcpu);
552     if (ret == -1) {
553         error_report("NVMM: I/O Assist Failed [port=%d]",
554             (int)vcpu->exit->u.io.port);
555     }
556 
557     return ret;
558 }
559 
560 static int
nvmm_handle_rdmsr(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)561 nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
562     struct nvmm_vcpu_exit *exit)
563 {
564     AccelCPUState *qcpu = cpu->accel;
565     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
566     X86CPU *x86_cpu = X86_CPU(cpu);
567     struct nvmm_x64_state *state = vcpu->state;
568     uint64_t val;
569     int ret;
570 
571     switch (exit->u.rdmsr.msr) {
572     case MSR_IA32_APICBASE:
573         val = cpu_get_apic_base(x86_cpu->apic_state);
574         break;
575     case MSR_MTRRcap:
576     case MSR_MTRRdefType:
577     case MSR_MCG_CAP:
578     case MSR_MCG_STATUS:
579         val = 0;
580         break;
581     default: /* More MSRs to add? */
582         val = 0;
583         error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
584             exit->u.rdmsr.msr);
585         break;
586     }
587 
588     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
589     if (ret == -1) {
590         return -1;
591     }
592 
593     state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
594     state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
595     state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
596 
597     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
598     if (ret == -1) {
599         return -1;
600     }
601 
602     return 0;
603 }
604 
605 static int
nvmm_handle_wrmsr(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)606 nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
607     struct nvmm_vcpu_exit *exit)
608 {
609     AccelCPUState *qcpu = cpu->accel;
610     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
611     X86CPU *x86_cpu = X86_CPU(cpu);
612     struct nvmm_x64_state *state = vcpu->state;
613     uint64_t val;
614     int ret;
615 
616     val = exit->u.wrmsr.val;
617 
618     switch (exit->u.wrmsr.msr) {
619     case MSR_IA32_APICBASE:
620         cpu_set_apic_base(x86_cpu->apic_state, val);
621         break;
622     case MSR_MTRRdefType:
623     case MSR_MCG_STATUS:
624         break;
625     default: /* More MSRs to add? */
626         error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
627             exit->u.wrmsr.msr, val);
628         break;
629     }
630 
631     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
632     if (ret == -1) {
633         return -1;
634     }
635 
636     state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
637 
638     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
639     if (ret == -1) {
640         return -1;
641     }
642 
643     return 0;
644 }
645 
646 static int
nvmm_handle_halted(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)647 nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
648     struct nvmm_vcpu_exit *exit)
649 {
650     int ret = 0;
651 
652     bql_lock();
653 
654     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
655           (cpu_env(cpu)->eflags & IF_MASK)) &&
656         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
657         cpu->exception_index = EXCP_HLT;
658         cpu->halted = true;
659         ret = 1;
660     }
661 
662     bql_unlock();
663 
664     return ret;
665 }
666 
667 static int
nvmm_inject_ud(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)668 nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
669 {
670     struct nvmm_vcpu_event *event = vcpu->event;
671 
672     event->type = NVMM_VCPU_EVENT_EXCP;
673     event->vector = 6;
674     event->u.excp.error = 0;
675 
676     return nvmm_vcpu_inject(mach, vcpu);
677 }
678 
679 static int
nvmm_vcpu_loop(CPUState * cpu)680 nvmm_vcpu_loop(CPUState *cpu)
681 {
682     struct nvmm_machine *mach = get_nvmm_mach();
683     AccelCPUState *qcpu = cpu->accel;
684     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
685     X86CPU *x86_cpu = X86_CPU(cpu);
686     CPUX86State *env = &x86_cpu->env;
687     struct nvmm_vcpu_exit *exit = vcpu->exit;
688     int ret;
689 
690     /*
691      * Some asynchronous events must be handled outside of the inner
692      * VCPU loop. They are handled here.
693      */
694     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
695         nvmm_cpu_synchronize_state(cpu);
696         do_cpu_init(x86_cpu);
697         /* set int/nmi windows back to the reset state */
698     }
699     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
700         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
701         apic_poll_irq(x86_cpu->apic_state);
702     }
703     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
704          (env->eflags & IF_MASK)) ||
705         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
706         cpu->halted = false;
707     }
708     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
709         nvmm_cpu_synchronize_state(cpu);
710         do_cpu_sipi(x86_cpu);
711     }
712     if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
713         cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
714         nvmm_cpu_synchronize_state(cpu);
715         apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
716             env->tpr_access_type);
717     }
718 
719     if (cpu->halted) {
720         cpu->exception_index = EXCP_HLT;
721         qatomic_set(&cpu->exit_request, false);
722         return 0;
723     }
724 
725     bql_unlock();
726     cpu_exec_start(cpu);
727 
728     /*
729      * Inner VCPU loop.
730      */
731     do {
732         if (cpu->vcpu_dirty) {
733             nvmm_set_registers(cpu);
734             cpu->vcpu_dirty = false;
735         }
736 
737         if (qcpu->stop) {
738             cpu->exception_index = EXCP_INTERRUPT;
739             qcpu->stop = false;
740             ret = 1;
741             break;
742         }
743 
744         nvmm_vcpu_pre_run(cpu);
745 
746         if (qatomic_read(&cpu->exit_request)) {
747 #if NVMM_USER_VERSION >= 2
748             nvmm_vcpu_stop(vcpu);
749 #else
750             qemu_cpu_kick_self();
751 #endif
752         }
753 
754         /* Read exit_request before the kernel reads the immediate exit flag */
755         smp_rmb();
756         ret = nvmm_vcpu_run(mach, vcpu);
757         if (ret == -1) {
758             error_report("NVMM: Failed to exec a virtual processor,"
759                 " error=%d", errno);
760             break;
761         }
762 
763         nvmm_vcpu_post_run(cpu, exit);
764 
765         switch (exit->reason) {
766         case NVMM_VCPU_EXIT_NONE:
767             break;
768 #if NVMM_USER_VERSION >= 2
769         case NVMM_VCPU_EXIT_STOPPED:
770             /*
771              * The kernel cleared the immediate exit flag; cpu->exit_request
772              * must be cleared after
773              */
774             smp_wmb();
775             qcpu->stop = true;
776             break;
777 #endif
778         case NVMM_VCPU_EXIT_MEMORY:
779             ret = nvmm_handle_mem(mach, vcpu);
780             break;
781         case NVMM_VCPU_EXIT_IO:
782             ret = nvmm_handle_io(mach, vcpu);
783             break;
784         case NVMM_VCPU_EXIT_INT_READY:
785         case NVMM_VCPU_EXIT_NMI_READY:
786         case NVMM_VCPU_EXIT_TPR_CHANGED:
787             break;
788         case NVMM_VCPU_EXIT_HALTED:
789             ret = nvmm_handle_halted(mach, cpu, exit);
790             break;
791         case NVMM_VCPU_EXIT_SHUTDOWN:
792             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
793             cpu->exception_index = EXCP_INTERRUPT;
794             ret = 1;
795             break;
796         case NVMM_VCPU_EXIT_RDMSR:
797             ret = nvmm_handle_rdmsr(mach, cpu, exit);
798             break;
799         case NVMM_VCPU_EXIT_WRMSR:
800             ret = nvmm_handle_wrmsr(mach, cpu, exit);
801             break;
802         case NVMM_VCPU_EXIT_MONITOR:
803         case NVMM_VCPU_EXIT_MWAIT:
804             ret = nvmm_inject_ud(mach, vcpu);
805             break;
806         default:
807             error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
808                 exit->reason, exit->u.inv.hwcode);
809             nvmm_get_registers(cpu);
810             bql_lock();
811             qemu_system_guest_panicked(cpu_get_crash_info(cpu));
812             bql_unlock();
813             ret = -1;
814             break;
815         }
816     } while (ret == 0);
817 
818     cpu_exec_end(cpu);
819     bql_lock();
820 
821     qatomic_set(&cpu->exit_request, false);
822 
823     return ret < 0;
824 }
825 
826 /* -------------------------------------------------------------------------- */
827 
828 static void
do_nvmm_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)829 do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
830 {
831     nvmm_get_registers(cpu);
832     cpu->vcpu_dirty = true;
833 }
834 
835 static void
do_nvmm_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)836 do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
837 {
838     nvmm_set_registers(cpu);
839     cpu->vcpu_dirty = false;
840 }
841 
842 static void
do_nvmm_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)843 do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
844 {
845     nvmm_set_registers(cpu);
846     cpu->vcpu_dirty = false;
847 }
848 
849 static void
do_nvmm_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)850 do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
851 {
852     cpu->vcpu_dirty = true;
853 }
854 
nvmm_cpu_synchronize_state(CPUState * cpu)855 void nvmm_cpu_synchronize_state(CPUState *cpu)
856 {
857     if (!cpu->vcpu_dirty) {
858         run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
859     }
860 }
861 
nvmm_cpu_synchronize_post_reset(CPUState * cpu)862 void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
863 {
864     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
865 }
866 
nvmm_cpu_synchronize_post_init(CPUState * cpu)867 void nvmm_cpu_synchronize_post_init(CPUState *cpu)
868 {
869     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
870 }
871 
nvmm_cpu_synchronize_pre_loadvm(CPUState * cpu)872 void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
873 {
874     run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
875 }
876 
877 /* -------------------------------------------------------------------------- */
878 
879 static Error *nvmm_migration_blocker;
880 
881 /*
882  * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
883  * and another thread signaling the vCPU thread to exit.
884  */
885 
886 static void
nvmm_ipi_signal(int sigcpu)887 nvmm_ipi_signal(int sigcpu)
888 {
889     if (current_cpu) {
890         AccelCPUState *qcpu = current_cpu->accel;
891 #if NVMM_USER_VERSION >= 2
892         struct nvmm_vcpu *vcpu = &qcpu->vcpu;
893         nvmm_vcpu_stop(vcpu);
894 #else
895         qcpu->stop = true;
896 #endif
897     }
898 }
899 
900 static void
nvmm_init_cpu_signals(void)901 nvmm_init_cpu_signals(void)
902 {
903     struct sigaction sigact;
904     sigset_t set;
905 
906     /* Install the IPI handler. */
907     memset(&sigact, 0, sizeof(sigact));
908     sigact.sa_handler = nvmm_ipi_signal;
909     sigaction(SIG_IPI, &sigact, NULL);
910 
911     /* Allow IPIs on the current thread. */
912     sigprocmask(SIG_BLOCK, NULL, &set);
913     sigdelset(&set, SIG_IPI);
914     pthread_sigmask(SIG_SETMASK, &set, NULL);
915 }
916 
917 int
nvmm_init_vcpu(CPUState * cpu)918 nvmm_init_vcpu(CPUState *cpu)
919 {
920     struct nvmm_machine *mach = get_nvmm_mach();
921     struct nvmm_vcpu_conf_cpuid cpuid;
922     struct nvmm_vcpu_conf_tpr tpr;
923     Error *local_error = NULL;
924     AccelCPUState *qcpu;
925     int ret, err;
926 
927     nvmm_init_cpu_signals();
928 
929     if (nvmm_migration_blocker == NULL) {
930         error_setg(&nvmm_migration_blocker,
931             "NVMM: Migration not supported");
932 
933         if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) {
934             error_report_err(local_error);
935             return -EINVAL;
936         }
937     }
938 
939     qcpu = g_new0(AccelCPUState, 1);
940 
941     ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
942     if (ret == -1) {
943         err = errno;
944         error_report("NVMM: Failed to create a virtual processor,"
945             " error=%d", err);
946         g_free(qcpu);
947         return -err;
948     }
949 
950     memset(&cpuid, 0, sizeof(cpuid));
951     cpuid.mask = 1;
952     cpuid.leaf = 0x00000001;
953     cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
954     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
955         &cpuid);
956     if (ret == -1) {
957         err = errno;
958         error_report("NVMM: Failed to configure a virtual processor,"
959             " error=%d", err);
960         g_free(qcpu);
961         return -err;
962     }
963 
964     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
965         &nvmm_callbacks);
966     if (ret == -1) {
967         err = errno;
968         error_report("NVMM: Failed to configure a virtual processor,"
969             " error=%d", err);
970         g_free(qcpu);
971         return -err;
972     }
973 
974     if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
975         memset(&tpr, 0, sizeof(tpr));
976         tpr.exit_changed = 1;
977         ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
978         if (ret == -1) {
979             err = errno;
980             error_report("NVMM: Failed to configure a virtual processor,"
981                 " error=%d", err);
982             g_free(qcpu);
983             return -err;
984         }
985     }
986 
987     qcpu->vcpu_dirty = true;
988     cpu->accel = qcpu;
989 
990     return 0;
991 }
992 
993 int
nvmm_vcpu_exec(CPUState * cpu)994 nvmm_vcpu_exec(CPUState *cpu)
995 {
996     int ret, fatal;
997 
998     while (1) {
999         if (cpu->exception_index >= EXCP_INTERRUPT) {
1000             ret = cpu->exception_index;
1001             cpu->exception_index = -1;
1002             break;
1003         }
1004 
1005         fatal = nvmm_vcpu_loop(cpu);
1006 
1007         if (fatal) {
1008             error_report("NVMM: Failed to execute a VCPU.");
1009             abort();
1010         }
1011     }
1012 
1013     return ret;
1014 }
1015 
1016 void
nvmm_destroy_vcpu(CPUState * cpu)1017 nvmm_destroy_vcpu(CPUState *cpu)
1018 {
1019     struct nvmm_machine *mach = get_nvmm_mach();
1020     AccelCPUState *qcpu = cpu->accel;
1021 
1022     nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1023     g_free(cpu->accel);
1024 }
1025 
1026 /* -------------------------------------------------------------------------- */
1027 
1028 static void
nvmm_update_mapping(hwaddr start_pa,ram_addr_t size,uintptr_t hva,bool add,bool rom,const char * name)1029 nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1030     bool add, bool rom, const char *name)
1031 {
1032     struct nvmm_machine *mach = get_nvmm_mach();
1033     int ret, prot;
1034 
1035     if (add) {
1036         prot = PROT_READ | PROT_EXEC;
1037         if (!rom) {
1038             prot |= PROT_WRITE;
1039         }
1040         ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1041     } else {
1042         ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1043     }
1044 
1045     if (ret == -1) {
1046         error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1047             "Size:%p bytes, HostVA:%p, error=%d",
1048             (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1049             (void *)size, (void *)hva, errno);
1050     }
1051 }
1052 
1053 static void
nvmm_process_section(MemoryRegionSection * section,int add)1054 nvmm_process_section(MemoryRegionSection *section, int add)
1055 {
1056     MemoryRegion *mr = section->mr;
1057     hwaddr start_pa = section->offset_within_address_space;
1058     ram_addr_t size = int128_get64(section->size);
1059     unsigned int delta;
1060     uintptr_t hva;
1061 
1062     if (!memory_region_is_ram(mr)) {
1063         return;
1064     }
1065 
1066     /* Adjust start_pa and size so that they are page-aligned. */
1067     delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
1068     delta &= ~qemu_real_host_page_mask();
1069     if (delta > size) {
1070         return;
1071     }
1072     start_pa += delta;
1073     size -= delta;
1074     size &= qemu_real_host_page_mask();
1075     if (!size || (start_pa & ~qemu_real_host_page_mask())) {
1076         return;
1077     }
1078 
1079     hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1080         section->offset_within_region + delta;
1081 
1082     nvmm_update_mapping(start_pa, size, hva, add,
1083         memory_region_is_rom(mr), mr->name);
1084 }
1085 
1086 static void
nvmm_region_add(MemoryListener * listener,MemoryRegionSection * section)1087 nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1088 {
1089     memory_region_ref(section->mr);
1090     nvmm_process_section(section, 1);
1091 }
1092 
1093 static void
nvmm_region_del(MemoryListener * listener,MemoryRegionSection * section)1094 nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1095 {
1096     nvmm_process_section(section, 0);
1097     memory_region_unref(section->mr);
1098 }
1099 
1100 static void
nvmm_transaction_begin(MemoryListener * listener)1101 nvmm_transaction_begin(MemoryListener *listener)
1102 {
1103     /* nothing */
1104 }
1105 
1106 static void
nvmm_transaction_commit(MemoryListener * listener)1107 nvmm_transaction_commit(MemoryListener *listener)
1108 {
1109     /* nothing */
1110 }
1111 
1112 static void
nvmm_log_sync(MemoryListener * listener,MemoryRegionSection * section)1113 nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1114 {
1115     MemoryRegion *mr = section->mr;
1116 
1117     if (!memory_region_is_ram(mr)) {
1118         return;
1119     }
1120 
1121     memory_region_set_dirty(mr, 0, int128_get64(section->size));
1122 }
1123 
1124 static MemoryListener nvmm_memory_listener = {
1125     .name = "nvmm",
1126     .begin = nvmm_transaction_begin,
1127     .commit = nvmm_transaction_commit,
1128     .region_add = nvmm_region_add,
1129     .region_del = nvmm_region_del,
1130     .log_sync = nvmm_log_sync,
1131     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
1132 };
1133 
1134 static void
nvmm_ram_block_added(RAMBlockNotifier * n,void * host,size_t size,size_t max_size)1135 nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1136                      size_t max_size)
1137 {
1138     struct nvmm_machine *mach = get_nvmm_mach();
1139     uintptr_t hva = (uintptr_t)host;
1140     int ret;
1141 
1142     ret = nvmm_hva_map(mach, hva, max_size);
1143 
1144     if (ret == -1) {
1145         error_report("NVMM: Failed to map HVA, HostVA:%p "
1146             "Size:%p bytes, error=%d",
1147             (void *)hva, (void *)size, errno);
1148     }
1149 }
1150 
1151 static struct RAMBlockNotifier nvmm_ram_notifier = {
1152     .ram_block_added = nvmm_ram_block_added
1153 };
1154 
1155 /* -------------------------------------------------------------------------- */
1156 
1157 static int
nvmm_accel_init(AccelState * as,MachineState * ms)1158 nvmm_accel_init(AccelState *as, MachineState *ms)
1159 {
1160     int ret, err;
1161 
1162     ret = nvmm_init();
1163     if (ret == -1) {
1164         err = errno;
1165         error_report("NVMM: Initialization failed, error=%d", errno);
1166         return -err;
1167     }
1168 
1169     ret = nvmm_capability(&qemu_mach.cap);
1170     if (ret == -1) {
1171         err = errno;
1172         error_report("NVMM: Unable to fetch capability, error=%d", errno);
1173         return -err;
1174     }
1175     if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1176         error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1177         return -EPROGMISMATCH;
1178     }
1179     if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1180         error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1181         return -EPROGMISMATCH;
1182     }
1183 
1184     ret = nvmm_machine_create(&qemu_mach.mach);
1185     if (ret == -1) {
1186         err = errno;
1187         error_report("NVMM: Machine creation failed, error=%d", errno);
1188         return -err;
1189     }
1190 
1191     memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1192     ram_block_notifier_add(&nvmm_ram_notifier);
1193 
1194     printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1195     return 0;
1196 }
1197 
1198 static void
nvmm_accel_class_init(ObjectClass * oc,const void * data)1199 nvmm_accel_class_init(ObjectClass *oc, const void *data)
1200 {
1201     AccelClass *ac = ACCEL_CLASS(oc);
1202     ac->name = "NVMM";
1203     ac->init_machine = nvmm_accel_init;
1204     ac->allowed = &nvmm_allowed;
1205 }
1206 
1207 static const TypeInfo nvmm_accel_type = {
1208     .name = ACCEL_CLASS_NAME("nvmm"),
1209     .parent = TYPE_ACCEL,
1210     .class_init = nvmm_accel_class_init,
1211 };
1212 
nvmm_cpu_instance_init(CPUState * cs)1213 static void nvmm_cpu_instance_init(CPUState *cs)
1214 {
1215     X86CPU *cpu = X86_CPU(cs);
1216 
1217     host_cpu_instance_init(cpu);
1218 }
1219 
nvmm_cpu_accel_class_init(ObjectClass * oc,const void * data)1220 static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data)
1221 {
1222     AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
1223 
1224     acc->cpu_instance_init = nvmm_cpu_instance_init;
1225 }
1226 
1227 static const TypeInfo nvmm_cpu_accel_type = {
1228     .name = ACCEL_CPU_NAME("nvmm"),
1229 
1230     .parent = TYPE_ACCEL_CPU,
1231     .class_init = nvmm_cpu_accel_class_init,
1232     .abstract = true,
1233 };
1234 
1235 static void
nvmm_type_init(void)1236 nvmm_type_init(void)
1237 {
1238     type_register_static(&nvmm_accel_type);
1239     type_register_static(&nvmm_cpu_accel_type);
1240 }
1241 
1242 type_init(nvmm_type_init);
1243