xref: /openbmc/qemu/target/arm/kvm.c (revision 8779fccbef0c2e97fd6564ddf9f1df9fc724f2f0)
1 /*
2  * ARM implementation of KVM hooks
3  *
4  * Copyright Christoffer Dall 2009-2010
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include <sys/ioctl.h>
13 
14 #include <linux/kvm.h>
15 
16 #include "qemu-common.h"
17 #include "qemu/timer.h"
18 #include "qemu/error-report.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/kvm.h"
21 #include "kvm_arm.h"
22 #include "cpu.h"
23 #include "internals.h"
24 #include "hw/arm/arm.h"
25 #include "exec/memattrs.h"
26 #include "exec/address-spaces.h"
27 #include "hw/boards.h"
28 #include "qemu/log.h"
29 
30 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
31     KVM_CAP_LAST_INFO
32 };
33 
34 static bool cap_has_mp_state;
35 
36 int kvm_arm_vcpu_init(CPUState *cs)
37 {
38     ARMCPU *cpu = ARM_CPU(cs);
39     struct kvm_vcpu_init init;
40 
41     init.target = cpu->kvm_target;
42     memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
43 
44     return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
45 }
46 
47 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
48                                       int *fdarray,
49                                       struct kvm_vcpu_init *init)
50 {
51     int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
52 
53     kvmfd = qemu_open("/dev/kvm", O_RDWR);
54     if (kvmfd < 0) {
55         goto err;
56     }
57     vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
58     if (vmfd < 0) {
59         goto err;
60     }
61     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
62     if (cpufd < 0) {
63         goto err;
64     }
65 
66     if (!init) {
67         /* Caller doesn't want the VCPU to be initialized, so skip it */
68         goto finish;
69     }
70 
71     ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
72     if (ret >= 0) {
73         ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
74         if (ret < 0) {
75             goto err;
76         }
77     } else if (cpus_to_try) {
78         /* Old kernel which doesn't know about the
79          * PREFERRED_TARGET ioctl: we know it will only support
80          * creating one kind of guest CPU which is its preferred
81          * CPU type.
82          */
83         while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
84             init->target = *cpus_to_try++;
85             memset(init->features, 0, sizeof(init->features));
86             ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
87             if (ret >= 0) {
88                 break;
89             }
90         }
91         if (ret < 0) {
92             goto err;
93         }
94     } else {
95         /* Treat a NULL cpus_to_try argument the same as an empty
96          * list, which means we will fail the call since this must
97          * be an old kernel which doesn't support PREFERRED_TARGET.
98          */
99         goto err;
100     }
101 
102 finish:
103     fdarray[0] = kvmfd;
104     fdarray[1] = vmfd;
105     fdarray[2] = cpufd;
106 
107     return true;
108 
109 err:
110     if (cpufd >= 0) {
111         close(cpufd);
112     }
113     if (vmfd >= 0) {
114         close(vmfd);
115     }
116     if (kvmfd >= 0) {
117         close(kvmfd);
118     }
119 
120     return false;
121 }
122 
123 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
124 {
125     int i;
126 
127     for (i = 2; i >= 0; i--) {
128         close(fdarray[i]);
129     }
130 }
131 
132 static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data)
133 {
134     ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc);
135 
136     /* All we really need to set up for the 'host' CPU
137      * is the feature bits -- we rely on the fact that the
138      * various ID register values in ARMCPU are only used for
139      * TCG CPUs.
140      */
141     if (!kvm_arm_get_host_cpu_features(ahcc)) {
142         fprintf(stderr, "Failed to retrieve host CPU features!\n");
143         abort();
144     }
145 }
146 
147 static void kvm_arm_host_cpu_initfn(Object *obj)
148 {
149     ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj);
150     ARMCPU *cpu = ARM_CPU(obj);
151     CPUARMState *env = &cpu->env;
152 
153     cpu->kvm_target = ahcc->target;
154     cpu->dtb_compatible = ahcc->dtb_compatible;
155     env->features = ahcc->features;
156 }
157 
158 static const TypeInfo host_arm_cpu_type_info = {
159     .name = TYPE_ARM_HOST_CPU,
160 #ifdef TARGET_AARCH64
161     .parent = TYPE_AARCH64_CPU,
162 #else
163     .parent = TYPE_ARM_CPU,
164 #endif
165     .instance_init = kvm_arm_host_cpu_initfn,
166     .class_init = kvm_arm_host_cpu_class_init,
167     .class_size = sizeof(ARMHostCPUClass),
168 };
169 
170 int kvm_arch_init(MachineState *ms, KVMState *s)
171 {
172     /* For ARM interrupt delivery is always asynchronous,
173      * whether we are using an in-kernel VGIC or not.
174      */
175     kvm_async_interrupts_allowed = true;
176 
177     cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
178 
179     type_register_static(&host_arm_cpu_type_info);
180 
181     return 0;
182 }
183 
184 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
185 {
186     return cpu->cpu_index;
187 }
188 
189 /* We track all the KVM devices which need their memory addresses
190  * passing to the kernel in a list of these structures.
191  * When board init is complete we run through the list and
192  * tell the kernel the base addresses of the memory regions.
193  * We use a MemoryListener to track mapping and unmapping of
194  * the regions during board creation, so the board models don't
195  * need to do anything special for the KVM case.
196  */
197 typedef struct KVMDevice {
198     struct kvm_arm_device_addr kda;
199     struct kvm_device_attr kdattr;
200     MemoryRegion *mr;
201     QSLIST_ENTRY(KVMDevice) entries;
202     int dev_fd;
203 } KVMDevice;
204 
205 static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
206 
207 static void kvm_arm_devlistener_add(MemoryListener *listener,
208                                     MemoryRegionSection *section)
209 {
210     KVMDevice *kd;
211 
212     QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
213         if (section->mr == kd->mr) {
214             kd->kda.addr = section->offset_within_address_space;
215         }
216     }
217 }
218 
219 static void kvm_arm_devlistener_del(MemoryListener *listener,
220                                     MemoryRegionSection *section)
221 {
222     KVMDevice *kd;
223 
224     QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
225         if (section->mr == kd->mr) {
226             kd->kda.addr = -1;
227         }
228     }
229 }
230 
231 static MemoryListener devlistener = {
232     .region_add = kvm_arm_devlistener_add,
233     .region_del = kvm_arm_devlistener_del,
234 };
235 
236 static void kvm_arm_set_device_addr(KVMDevice *kd)
237 {
238     struct kvm_device_attr *attr = &kd->kdattr;
239     int ret;
240 
241     /* If the device control API is available and we have a device fd on the
242      * KVMDevice struct, let's use the newer API
243      */
244     if (kd->dev_fd >= 0) {
245         uint64_t addr = kd->kda.addr;
246         attr->addr = (uintptr_t)&addr;
247         ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
248     } else {
249         ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
250     }
251 
252     if (ret < 0) {
253         fprintf(stderr, "Failed to set device address: %s\n",
254                 strerror(-ret));
255         abort();
256     }
257 }
258 
259 static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
260 {
261     KVMDevice *kd, *tkd;
262 
263     memory_listener_unregister(&devlistener);
264     QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
265         if (kd->kda.addr != -1) {
266             kvm_arm_set_device_addr(kd);
267         }
268         memory_region_unref(kd->mr);
269         g_free(kd);
270     }
271 }
272 
273 static Notifier notify = {
274     .notify = kvm_arm_machine_init_done,
275 };
276 
277 void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
278                              uint64_t attr, int dev_fd)
279 {
280     KVMDevice *kd;
281 
282     if (!kvm_irqchip_in_kernel()) {
283         return;
284     }
285 
286     if (QSLIST_EMPTY(&kvm_devices_head)) {
287         memory_listener_register(&devlistener, &address_space_memory);
288         qemu_add_machine_init_done_notifier(&notify);
289     }
290     kd = g_new0(KVMDevice, 1);
291     kd->mr = mr;
292     kd->kda.id = devid;
293     kd->kda.addr = -1;
294     kd->kdattr.flags = 0;
295     kd->kdattr.group = group;
296     kd->kdattr.attr = attr;
297     kd->dev_fd = dev_fd;
298     QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
299     memory_region_ref(kd->mr);
300 }
301 
302 static int compare_u64(const void *a, const void *b)
303 {
304     if (*(uint64_t *)a > *(uint64_t *)b) {
305         return 1;
306     }
307     if (*(uint64_t *)a < *(uint64_t *)b) {
308         return -1;
309     }
310     return 0;
311 }
312 
313 /* Initialize the CPUState's cpreg list according to the kernel's
314  * definition of what CPU registers it knows about (and throw away
315  * the previous TCG-created cpreg list).
316  */
317 int kvm_arm_init_cpreg_list(ARMCPU *cpu)
318 {
319     struct kvm_reg_list rl;
320     struct kvm_reg_list *rlp;
321     int i, ret, arraylen;
322     CPUState *cs = CPU(cpu);
323 
324     rl.n = 0;
325     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
326     if (ret != -E2BIG) {
327         return ret;
328     }
329     rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
330     rlp->n = rl.n;
331     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
332     if (ret) {
333         goto out;
334     }
335     /* Sort the list we get back from the kernel, since cpreg_tuples
336      * must be in strictly ascending order.
337      */
338     qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
339 
340     for (i = 0, arraylen = 0; i < rlp->n; i++) {
341         if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
342             continue;
343         }
344         switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
345         case KVM_REG_SIZE_U32:
346         case KVM_REG_SIZE_U64:
347             break;
348         default:
349             fprintf(stderr, "Can't handle size of register in kernel list\n");
350             ret = -EINVAL;
351             goto out;
352         }
353 
354         arraylen++;
355     }
356 
357     cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
358     cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
359     cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
360                                          arraylen);
361     cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
362                                         arraylen);
363     cpu->cpreg_array_len = arraylen;
364     cpu->cpreg_vmstate_array_len = arraylen;
365 
366     for (i = 0, arraylen = 0; i < rlp->n; i++) {
367         uint64_t regidx = rlp->reg[i];
368         if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
369             continue;
370         }
371         cpu->cpreg_indexes[arraylen] = regidx;
372         arraylen++;
373     }
374     assert(cpu->cpreg_array_len == arraylen);
375 
376     if (!write_kvmstate_to_list(cpu)) {
377         /* Shouldn't happen unless kernel is inconsistent about
378          * what registers exist.
379          */
380         fprintf(stderr, "Initial read of kernel register state failed\n");
381         ret = -EINVAL;
382         goto out;
383     }
384 
385 out:
386     g_free(rlp);
387     return ret;
388 }
389 
390 bool write_kvmstate_to_list(ARMCPU *cpu)
391 {
392     CPUState *cs = CPU(cpu);
393     int i;
394     bool ok = true;
395 
396     for (i = 0; i < cpu->cpreg_array_len; i++) {
397         struct kvm_one_reg r;
398         uint64_t regidx = cpu->cpreg_indexes[i];
399         uint32_t v32;
400         int ret;
401 
402         r.id = regidx;
403 
404         switch (regidx & KVM_REG_SIZE_MASK) {
405         case KVM_REG_SIZE_U32:
406             r.addr = (uintptr_t)&v32;
407             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
408             if (!ret) {
409                 cpu->cpreg_values[i] = v32;
410             }
411             break;
412         case KVM_REG_SIZE_U64:
413             r.addr = (uintptr_t)(cpu->cpreg_values + i);
414             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
415             break;
416         default:
417             abort();
418         }
419         if (ret) {
420             ok = false;
421         }
422     }
423     return ok;
424 }
425 
426 bool write_list_to_kvmstate(ARMCPU *cpu, int level)
427 {
428     CPUState *cs = CPU(cpu);
429     int i;
430     bool ok = true;
431 
432     for (i = 0; i < cpu->cpreg_array_len; i++) {
433         struct kvm_one_reg r;
434         uint64_t regidx = cpu->cpreg_indexes[i];
435         uint32_t v32;
436         int ret;
437 
438         if (kvm_arm_cpreg_level(regidx) > level) {
439             continue;
440         }
441 
442         r.id = regidx;
443         switch (regidx & KVM_REG_SIZE_MASK) {
444         case KVM_REG_SIZE_U32:
445             v32 = cpu->cpreg_values[i];
446             r.addr = (uintptr_t)&v32;
447             break;
448         case KVM_REG_SIZE_U64:
449             r.addr = (uintptr_t)(cpu->cpreg_values + i);
450             break;
451         default:
452             abort();
453         }
454         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
455         if (ret) {
456             /* We might fail for "unknown register" and also for
457              * "you tried to set a register which is constant with
458              * a different value from what it actually contains".
459              */
460             ok = false;
461         }
462     }
463     return ok;
464 }
465 
466 void kvm_arm_reset_vcpu(ARMCPU *cpu)
467 {
468     int ret;
469 
470     /* Re-init VCPU so that all registers are set to
471      * their respective reset values.
472      */
473     ret = kvm_arm_vcpu_init(CPU(cpu));
474     if (ret < 0) {
475         fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
476         abort();
477     }
478     if (!write_kvmstate_to_list(cpu)) {
479         fprintf(stderr, "write_kvmstate_to_list failed\n");
480         abort();
481     }
482 }
483 
484 /*
485  * Update KVM's MP_STATE based on what QEMU thinks it is
486  */
487 int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
488 {
489     if (cap_has_mp_state) {
490         struct kvm_mp_state mp_state = {
491             .mp_state = (cpu->power_state == PSCI_OFF) ?
492             KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
493         };
494         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
495         if (ret) {
496             fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
497                     __func__, ret, strerror(-ret));
498             return -1;
499         }
500     }
501 
502     return 0;
503 }
504 
505 /*
506  * Sync the KVM MP_STATE into QEMU
507  */
508 int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
509 {
510     if (cap_has_mp_state) {
511         struct kvm_mp_state mp_state;
512         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
513         if (ret) {
514             fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
515                     __func__, ret, strerror(-ret));
516             abort();
517         }
518         cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
519             PSCI_OFF : PSCI_ON;
520     }
521 
522     return 0;
523 }
524 
525 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
526 {
527 }
528 
529 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
530 {
531     return MEMTXATTRS_UNSPECIFIED;
532 }
533 
534 
535 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
536 {
537     int ret = 0;
538 
539     switch (run->exit_reason) {
540     case KVM_EXIT_DEBUG:
541         if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
542             ret = EXCP_DEBUG;
543         } /* otherwise return to guest */
544         break;
545     default:
546         qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
547                       __func__, run->exit_reason);
548         break;
549     }
550     return ret;
551 }
552 
553 bool kvm_arch_stop_on_emulation_error(CPUState *cs)
554 {
555     return true;
556 }
557 
558 int kvm_arch_process_async_events(CPUState *cs)
559 {
560     return 0;
561 }
562 
563 int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr)
564 {
565     return 1;
566 }
567 
568 int kvm_arch_on_sigbus(int code, void *addr)
569 {
570     return 1;
571 }
572 
573 /* The #ifdef protections are until 32bit headers are imported and can
574  * be removed once both 32 and 64 bit reach feature parity.
575  */
576 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
577 {
578 #ifdef KVM_GUESTDBG_USE_SW_BP
579     if (kvm_sw_breakpoints_active(cs)) {
580         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
581     }
582 #endif
583 #ifdef KVM_GUESTDBG_USE_HW
584     if (kvm_arm_hw_debug_active(cs)) {
585         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
586         kvm_arm_copy_hw_debug_data(&dbg->arch);
587     }
588 #endif
589 }
590 
591 void kvm_arch_init_irq_routing(KVMState *s)
592 {
593 }
594 
595 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
596 {
597      if (machine_kernel_irqchip_split(ms)) {
598          perror("-machine kernel_irqchip=split is not supported on ARM.");
599          exit(1);
600     }
601 
602     /* If we can create the VGIC using the newer device control API, we
603      * let the device do this when it initializes itself, otherwise we
604      * fall back to the old API */
605     return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
606 }
607 
608 int kvm_arm_vgic_probe(void)
609 {
610     if (kvm_create_device(kvm_state,
611                           KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
612         return 3;
613     } else if (kvm_create_device(kvm_state,
614                                  KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
615         return 2;
616     } else {
617         return 0;
618     }
619 }
620 
621 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
622                              uint64_t address, uint32_t data, PCIDevice *dev)
623 {
624     return 0;
625 }
626 
627 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
628                                 int vector, PCIDevice *dev)
629 {
630     return 0;
631 }
632 
633 int kvm_arch_release_virq_post(int virq)
634 {
635     return 0;
636 }
637 
638 int kvm_arch_msi_data_to_gsi(uint32_t data)
639 {
640     return (data - 32) & 0xffff;
641 }
642