xref: /openbmc/qemu/target/arm/kvm.c (revision 15cea92d9e8afd4472147e54efe2eef0b7754dcd)
1 /*
2  * ARM implementation of KVM hooks
3  *
4  * Copyright Christoffer Dall 2009-2010
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include <sys/ioctl.h>
13 
14 #include <linux/kvm.h>
15 
16 #include "qemu-common.h"
17 #include "qemu/timer.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "sysemu/sysemu.h"
21 #include "sysemu/kvm.h"
22 #include "sysemu/kvm_int.h"
23 #include "kvm_arm.h"
24 #include "cpu.h"
25 #include "trace.h"
26 #include "internals.h"
27 #include "hw/pci/pci.h"
28 #include "exec/memattrs.h"
29 #include "exec/address-spaces.h"
30 #include "hw/boards.h"
31 #include "hw/irq.h"
32 #include "qemu/log.h"
33 
34 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
35     KVM_CAP_LAST_INFO
36 };
37 
38 static bool cap_has_mp_state;
39 static bool cap_has_inject_serror_esr;
40 
41 static ARMHostCPUFeatures arm_host_cpu_features;
42 
43 int kvm_arm_vcpu_init(CPUState *cs)
44 {
45     ARMCPU *cpu = ARM_CPU(cs);
46     struct kvm_vcpu_init init;
47 
48     init.target = cpu->kvm_target;
49     memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
50 
51     return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
52 }
53 
54 int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
55 {
56     return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
57 }
58 
59 void kvm_arm_init_serror_injection(CPUState *cs)
60 {
61     cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
62                                     KVM_CAP_ARM_INJECT_SERROR_ESR);
63 }
64 
65 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
66                                       int *fdarray,
67                                       struct kvm_vcpu_init *init)
68 {
69     int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
70 
71     kvmfd = qemu_open("/dev/kvm", O_RDWR);
72     if (kvmfd < 0) {
73         goto err;
74     }
75     vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
76     if (vmfd < 0) {
77         goto err;
78     }
79     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
80     if (cpufd < 0) {
81         goto err;
82     }
83 
84     if (!init) {
85         /* Caller doesn't want the VCPU to be initialized, so skip it */
86         goto finish;
87     }
88 
89     if (init->target == -1) {
90         struct kvm_vcpu_init preferred;
91 
92         ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
93         if (!ret) {
94             init->target = preferred.target;
95         }
96     }
97     if (ret >= 0) {
98         ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
99         if (ret < 0) {
100             goto err;
101         }
102     } else if (cpus_to_try) {
103         /* Old kernel which doesn't know about the
104          * PREFERRED_TARGET ioctl: we know it will only support
105          * creating one kind of guest CPU which is its preferred
106          * CPU type.
107          */
108         struct kvm_vcpu_init try;
109 
110         while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
111             try.target = *cpus_to_try++;
112             memcpy(try.features, init->features, sizeof(init->features));
113             ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
114             if (ret >= 0) {
115                 break;
116             }
117         }
118         if (ret < 0) {
119             goto err;
120         }
121         init->target = try.target;
122     } else {
123         /* Treat a NULL cpus_to_try argument the same as an empty
124          * list, which means we will fail the call since this must
125          * be an old kernel which doesn't support PREFERRED_TARGET.
126          */
127         goto err;
128     }
129 
130 finish:
131     fdarray[0] = kvmfd;
132     fdarray[1] = vmfd;
133     fdarray[2] = cpufd;
134 
135     return true;
136 
137 err:
138     if (cpufd >= 0) {
139         close(cpufd);
140     }
141     if (vmfd >= 0) {
142         close(vmfd);
143     }
144     if (kvmfd >= 0) {
145         close(kvmfd);
146     }
147 
148     return false;
149 }
150 
151 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
152 {
153     int i;
154 
155     for (i = 2; i >= 0; i--) {
156         close(fdarray[i]);
157     }
158 }
159 
160 void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
161 {
162     CPUARMState *env = &cpu->env;
163 
164     if (!arm_host_cpu_features.dtb_compatible) {
165         if (!kvm_enabled() ||
166             !kvm_arm_get_host_cpu_features(&arm_host_cpu_features)) {
167             /* We can't report this error yet, so flag that we need to
168              * in arm_cpu_realizefn().
169              */
170             cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
171             cpu->host_cpu_probe_failed = true;
172             return;
173         }
174     }
175 
176     cpu->kvm_target = arm_host_cpu_features.target;
177     cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
178     cpu->isar = arm_host_cpu_features.isar;
179     env->features = arm_host_cpu_features.features;
180 }
181 
182 bool kvm_arm_pmu_supported(CPUState *cpu)
183 {
184     KVMState *s = KVM_STATE(current_machine->accelerator);
185 
186     return kvm_check_extension(s, KVM_CAP_ARM_PMU_V3);
187 }
188 
189 int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
190 {
191     KVMState *s = KVM_STATE(ms->accelerator);
192     int ret;
193 
194     ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE);
195     return ret > 0 ? ret : 40;
196 }
197 
198 int kvm_arch_init(MachineState *ms, KVMState *s)
199 {
200     int ret = 0;
201     /* For ARM interrupt delivery is always asynchronous,
202      * whether we are using an in-kernel VGIC or not.
203      */
204     kvm_async_interrupts_allowed = true;
205 
206     /*
207      * PSCI wakes up secondary cores, so we always need to
208      * have vCPUs waiting in kernel space
209      */
210     kvm_halt_in_kernel_allowed = true;
211 
212     cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
213 
214     if (ms->smp.cpus > 256 &&
215         !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) {
216         error_report("Using more than 256 vcpus requires a host kernel "
217                      "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2");
218         ret = -EINVAL;
219     }
220 
221     return ret;
222 }
223 
224 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
225 {
226     return cpu->cpu_index;
227 }
228 
229 /* We track all the KVM devices which need their memory addresses
230  * passing to the kernel in a list of these structures.
231  * When board init is complete we run through the list and
232  * tell the kernel the base addresses of the memory regions.
233  * We use a MemoryListener to track mapping and unmapping of
234  * the regions during board creation, so the board models don't
235  * need to do anything special for the KVM case.
236  *
237  * Sometimes the address must be OR'ed with some other fields
238  * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION).
239  * @kda_addr_ormask aims at storing the value of those fields.
240  */
241 typedef struct KVMDevice {
242     struct kvm_arm_device_addr kda;
243     struct kvm_device_attr kdattr;
244     uint64_t kda_addr_ormask;
245     MemoryRegion *mr;
246     QSLIST_ENTRY(KVMDevice) entries;
247     int dev_fd;
248 } KVMDevice;
249 
250 static QSLIST_HEAD(, KVMDevice) kvm_devices_head;
251 
252 static void kvm_arm_devlistener_add(MemoryListener *listener,
253                                     MemoryRegionSection *section)
254 {
255     KVMDevice *kd;
256 
257     QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
258         if (section->mr == kd->mr) {
259             kd->kda.addr = section->offset_within_address_space;
260         }
261     }
262 }
263 
264 static void kvm_arm_devlistener_del(MemoryListener *listener,
265                                     MemoryRegionSection *section)
266 {
267     KVMDevice *kd;
268 
269     QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
270         if (section->mr == kd->mr) {
271             kd->kda.addr = -1;
272         }
273     }
274 }
275 
276 static MemoryListener devlistener = {
277     .region_add = kvm_arm_devlistener_add,
278     .region_del = kvm_arm_devlistener_del,
279 };
280 
281 static void kvm_arm_set_device_addr(KVMDevice *kd)
282 {
283     struct kvm_device_attr *attr = &kd->kdattr;
284     int ret;
285 
286     /* If the device control API is available and we have a device fd on the
287      * KVMDevice struct, let's use the newer API
288      */
289     if (kd->dev_fd >= 0) {
290         uint64_t addr = kd->kda.addr;
291 
292         addr |= kd->kda_addr_ormask;
293         attr->addr = (uintptr_t)&addr;
294         ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
295     } else {
296         ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
297     }
298 
299     if (ret < 0) {
300         fprintf(stderr, "Failed to set device address: %s\n",
301                 strerror(-ret));
302         abort();
303     }
304 }
305 
306 static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
307 {
308     KVMDevice *kd, *tkd;
309 
310     QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
311         if (kd->kda.addr != -1) {
312             kvm_arm_set_device_addr(kd);
313         }
314         memory_region_unref(kd->mr);
315         QSLIST_REMOVE_HEAD(&kvm_devices_head, entries);
316         g_free(kd);
317     }
318     memory_listener_unregister(&devlistener);
319 }
320 
321 static Notifier notify = {
322     .notify = kvm_arm_machine_init_done,
323 };
324 
325 void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
326                              uint64_t attr, int dev_fd, uint64_t addr_ormask)
327 {
328     KVMDevice *kd;
329 
330     if (!kvm_irqchip_in_kernel()) {
331         return;
332     }
333 
334     if (QSLIST_EMPTY(&kvm_devices_head)) {
335         memory_listener_register(&devlistener, &address_space_memory);
336         qemu_add_machine_init_done_notifier(&notify);
337     }
338     kd = g_new0(KVMDevice, 1);
339     kd->mr = mr;
340     kd->kda.id = devid;
341     kd->kda.addr = -1;
342     kd->kdattr.flags = 0;
343     kd->kdattr.group = group;
344     kd->kdattr.attr = attr;
345     kd->dev_fd = dev_fd;
346     kd->kda_addr_ormask = addr_ormask;
347     QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
348     memory_region_ref(kd->mr);
349 }
350 
351 static int compare_u64(const void *a, const void *b)
352 {
353     if (*(uint64_t *)a > *(uint64_t *)b) {
354         return 1;
355     }
356     if (*(uint64_t *)a < *(uint64_t *)b) {
357         return -1;
358     }
359     return 0;
360 }
361 
362 /* Initialize the ARMCPU cpreg list according to the kernel's
363  * definition of what CPU registers it knows about (and throw away
364  * the previous TCG-created cpreg list).
365  */
366 int kvm_arm_init_cpreg_list(ARMCPU *cpu)
367 {
368     struct kvm_reg_list rl;
369     struct kvm_reg_list *rlp;
370     int i, ret, arraylen;
371     CPUState *cs = CPU(cpu);
372 
373     rl.n = 0;
374     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
375     if (ret != -E2BIG) {
376         return ret;
377     }
378     rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
379     rlp->n = rl.n;
380     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
381     if (ret) {
382         goto out;
383     }
384     /* Sort the list we get back from the kernel, since cpreg_tuples
385      * must be in strictly ascending order.
386      */
387     qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
388 
389     for (i = 0, arraylen = 0; i < rlp->n; i++) {
390         if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
391             continue;
392         }
393         switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
394         case KVM_REG_SIZE_U32:
395         case KVM_REG_SIZE_U64:
396             break;
397         default:
398             fprintf(stderr, "Can't handle size of register in kernel list\n");
399             ret = -EINVAL;
400             goto out;
401         }
402 
403         arraylen++;
404     }
405 
406     cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
407     cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
408     cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
409                                          arraylen);
410     cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
411                                         arraylen);
412     cpu->cpreg_array_len = arraylen;
413     cpu->cpreg_vmstate_array_len = arraylen;
414 
415     for (i = 0, arraylen = 0; i < rlp->n; i++) {
416         uint64_t regidx = rlp->reg[i];
417         if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
418             continue;
419         }
420         cpu->cpreg_indexes[arraylen] = regidx;
421         arraylen++;
422     }
423     assert(cpu->cpreg_array_len == arraylen);
424 
425     if (!write_kvmstate_to_list(cpu)) {
426         /* Shouldn't happen unless kernel is inconsistent about
427          * what registers exist.
428          */
429         fprintf(stderr, "Initial read of kernel register state failed\n");
430         ret = -EINVAL;
431         goto out;
432     }
433 
434 out:
435     g_free(rlp);
436     return ret;
437 }
438 
439 bool write_kvmstate_to_list(ARMCPU *cpu)
440 {
441     CPUState *cs = CPU(cpu);
442     int i;
443     bool ok = true;
444 
445     for (i = 0; i < cpu->cpreg_array_len; i++) {
446         struct kvm_one_reg r;
447         uint64_t regidx = cpu->cpreg_indexes[i];
448         uint32_t v32;
449         int ret;
450 
451         r.id = regidx;
452 
453         switch (regidx & KVM_REG_SIZE_MASK) {
454         case KVM_REG_SIZE_U32:
455             r.addr = (uintptr_t)&v32;
456             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
457             if (!ret) {
458                 cpu->cpreg_values[i] = v32;
459             }
460             break;
461         case KVM_REG_SIZE_U64:
462             r.addr = (uintptr_t)(cpu->cpreg_values + i);
463             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
464             break;
465         default:
466             abort();
467         }
468         if (ret) {
469             ok = false;
470         }
471     }
472     return ok;
473 }
474 
475 bool write_list_to_kvmstate(ARMCPU *cpu, int level)
476 {
477     CPUState *cs = CPU(cpu);
478     int i;
479     bool ok = true;
480 
481     for (i = 0; i < cpu->cpreg_array_len; i++) {
482         struct kvm_one_reg r;
483         uint64_t regidx = cpu->cpreg_indexes[i];
484         uint32_t v32;
485         int ret;
486 
487         if (kvm_arm_cpreg_level(regidx) > level) {
488             continue;
489         }
490 
491         r.id = regidx;
492         switch (regidx & KVM_REG_SIZE_MASK) {
493         case KVM_REG_SIZE_U32:
494             v32 = cpu->cpreg_values[i];
495             r.addr = (uintptr_t)&v32;
496             break;
497         case KVM_REG_SIZE_U64:
498             r.addr = (uintptr_t)(cpu->cpreg_values + i);
499             break;
500         default:
501             abort();
502         }
503         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
504         if (ret) {
505             /* We might fail for "unknown register" and also for
506              * "you tried to set a register which is constant with
507              * a different value from what it actually contains".
508              */
509             ok = false;
510         }
511     }
512     return ok;
513 }
514 
515 void kvm_arm_reset_vcpu(ARMCPU *cpu)
516 {
517     int ret;
518 
519     /* Re-init VCPU so that all registers are set to
520      * their respective reset values.
521      */
522     ret = kvm_arm_vcpu_init(CPU(cpu));
523     if (ret < 0) {
524         fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
525         abort();
526     }
527     if (!write_kvmstate_to_list(cpu)) {
528         fprintf(stderr, "write_kvmstate_to_list failed\n");
529         abort();
530     }
531     /*
532      * Sync the reset values also into the CPUState. This is necessary
533      * because the next thing we do will be a kvm_arch_put_registers()
534      * which will update the list values from the CPUState before copying
535      * the list values back to KVM. It's OK to ignore failure returns here
536      * for the same reason we do so in kvm_arch_get_registers().
537      */
538     write_list_to_cpustate(cpu);
539 }
540 
541 /*
542  * Update KVM's MP_STATE based on what QEMU thinks it is
543  */
544 int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
545 {
546     if (cap_has_mp_state) {
547         struct kvm_mp_state mp_state = {
548             .mp_state = (cpu->power_state == PSCI_OFF) ?
549             KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
550         };
551         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
552         if (ret) {
553             fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
554                     __func__, ret, strerror(-ret));
555             return -1;
556         }
557     }
558 
559     return 0;
560 }
561 
562 /*
563  * Sync the KVM MP_STATE into QEMU
564  */
565 int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
566 {
567     if (cap_has_mp_state) {
568         struct kvm_mp_state mp_state;
569         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
570         if (ret) {
571             fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
572                     __func__, ret, strerror(-ret));
573             abort();
574         }
575         cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
576             PSCI_OFF : PSCI_ON;
577     }
578 
579     return 0;
580 }
581 
582 int kvm_put_vcpu_events(ARMCPU *cpu)
583 {
584     CPUARMState *env = &cpu->env;
585     struct kvm_vcpu_events events;
586     int ret;
587 
588     if (!kvm_has_vcpu_events()) {
589         return 0;
590     }
591 
592     memset(&events, 0, sizeof(events));
593     events.exception.serror_pending = env->serror.pending;
594 
595     /* Inject SError to guest with specified syndrome if host kernel
596      * supports it, otherwise inject SError without syndrome.
597      */
598     if (cap_has_inject_serror_esr) {
599         events.exception.serror_has_esr = env->serror.has_esr;
600         events.exception.serror_esr = env->serror.esr;
601     }
602 
603     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
604     if (ret) {
605         error_report("failed to put vcpu events");
606     }
607 
608     return ret;
609 }
610 
611 int kvm_get_vcpu_events(ARMCPU *cpu)
612 {
613     CPUARMState *env = &cpu->env;
614     struct kvm_vcpu_events events;
615     int ret;
616 
617     if (!kvm_has_vcpu_events()) {
618         return 0;
619     }
620 
621     memset(&events, 0, sizeof(events));
622     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
623     if (ret) {
624         error_report("failed to get vcpu events");
625         return ret;
626     }
627 
628     env->serror.pending = events.exception.serror_pending;
629     env->serror.has_esr = events.exception.serror_has_esr;
630     env->serror.esr = events.exception.serror_esr;
631 
632     return 0;
633 }
634 
635 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
636 {
637 }
638 
639 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
640 {
641     ARMCPU *cpu;
642     uint32_t switched_level;
643 
644     if (kvm_irqchip_in_kernel()) {
645         /*
646          * We only need to sync timer states with user-space interrupt
647          * controllers, so return early and save cycles if we don't.
648          */
649         return MEMTXATTRS_UNSPECIFIED;
650     }
651 
652     cpu = ARM_CPU(cs);
653 
654     /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */
655     if (run->s.regs.device_irq_level != cpu->device_irq_level) {
656         switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level;
657 
658         qemu_mutex_lock_iothread();
659 
660         if (switched_level & KVM_ARM_DEV_EL1_VTIMER) {
661             qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT],
662                          !!(run->s.regs.device_irq_level &
663                             KVM_ARM_DEV_EL1_VTIMER));
664             switched_level &= ~KVM_ARM_DEV_EL1_VTIMER;
665         }
666 
667         if (switched_level & KVM_ARM_DEV_EL1_PTIMER) {
668             qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS],
669                          !!(run->s.regs.device_irq_level &
670                             KVM_ARM_DEV_EL1_PTIMER));
671             switched_level &= ~KVM_ARM_DEV_EL1_PTIMER;
672         }
673 
674         if (switched_level & KVM_ARM_DEV_PMU) {
675             qemu_set_irq(cpu->pmu_interrupt,
676                          !!(run->s.regs.device_irq_level & KVM_ARM_DEV_PMU));
677             switched_level &= ~KVM_ARM_DEV_PMU;
678         }
679 
680         if (switched_level) {
681             qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n",
682                           __func__, switched_level);
683         }
684 
685         /* We also mark unknown levels as processed to not waste cycles */
686         cpu->device_irq_level = run->s.regs.device_irq_level;
687         qemu_mutex_unlock_iothread();
688     }
689 
690     return MEMTXATTRS_UNSPECIFIED;
691 }
692 
693 
694 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
695 {
696     int ret = 0;
697 
698     switch (run->exit_reason) {
699     case KVM_EXIT_DEBUG:
700         if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
701             ret = EXCP_DEBUG;
702         } /* otherwise return to guest */
703         break;
704     default:
705         qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
706                       __func__, run->exit_reason);
707         break;
708     }
709     return ret;
710 }
711 
712 bool kvm_arch_stop_on_emulation_error(CPUState *cs)
713 {
714     return true;
715 }
716 
717 int kvm_arch_process_async_events(CPUState *cs)
718 {
719     return 0;
720 }
721 
722 /* The #ifdef protections are until 32bit headers are imported and can
723  * be removed once both 32 and 64 bit reach feature parity.
724  */
725 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
726 {
727 #ifdef KVM_GUESTDBG_USE_SW_BP
728     if (kvm_sw_breakpoints_active(cs)) {
729         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
730     }
731 #endif
732 #ifdef KVM_GUESTDBG_USE_HW
733     if (kvm_arm_hw_debug_active(cs)) {
734         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
735         kvm_arm_copy_hw_debug_data(&dbg->arch);
736     }
737 #endif
738 }
739 
740 void kvm_arch_init_irq_routing(KVMState *s)
741 {
742 }
743 
744 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
745 {
746      if (machine_kernel_irqchip_split(ms)) {
747          perror("-machine kernel_irqchip=split is not supported on ARM.");
748          exit(1);
749     }
750 
751     /* If we can create the VGIC using the newer device control API, we
752      * let the device do this when it initializes itself, otherwise we
753      * fall back to the old API */
754     return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
755 }
756 
757 int kvm_arm_vgic_probe(void)
758 {
759     if (kvm_create_device(kvm_state,
760                           KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
761         return 3;
762     } else if (kvm_create_device(kvm_state,
763                                  KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
764         return 2;
765     } else {
766         return 0;
767     }
768 }
769 
770 int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level)
771 {
772     int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq;
773     int cpu_idx1 = cpu % 256;
774     int cpu_idx2 = cpu / 256;
775 
776     kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) |
777                (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT);
778 
779     return kvm_set_irq(kvm_state, kvm_irq, !!level);
780 }
781 
782 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
783                              uint64_t address, uint32_t data, PCIDevice *dev)
784 {
785     AddressSpace *as = pci_device_iommu_address_space(dev);
786     hwaddr xlat, len, doorbell_gpa;
787     MemoryRegionSection mrs;
788     MemoryRegion *mr;
789     int ret = 1;
790 
791     if (as == &address_space_memory) {
792         return 0;
793     }
794 
795     /* MSI doorbell address is translated by an IOMMU */
796 
797     rcu_read_lock();
798     mr = address_space_translate(as, address, &xlat, &len, true,
799                                  MEMTXATTRS_UNSPECIFIED);
800     if (!mr) {
801         goto unlock;
802     }
803     mrs = memory_region_find(mr, xlat, 1);
804     if (!mrs.mr) {
805         goto unlock;
806     }
807 
808     doorbell_gpa = mrs.offset_within_address_space;
809     memory_region_unref(mrs.mr);
810 
811     route->u.msi.address_lo = doorbell_gpa;
812     route->u.msi.address_hi = doorbell_gpa >> 32;
813 
814     trace_kvm_arm_fixup_msi_route(address, doorbell_gpa);
815 
816     ret = 0;
817 
818 unlock:
819     rcu_read_unlock();
820     return ret;
821 }
822 
823 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
824                                 int vector, PCIDevice *dev)
825 {
826     return 0;
827 }
828 
829 int kvm_arch_release_virq_post(int virq)
830 {
831     return 0;
832 }
833 
834 int kvm_arch_msi_data_to_gsi(uint32_t data)
835 {
836     return (data - 32) & 0xffff;
837 }
838