xref: /openbmc/qemu/target/arm/kvm.c (revision ce7cdebd)
1 /*
2  * ARM implementation of KVM hooks
3  *
4  * Copyright Christoffer Dall 2009-2010
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include <sys/ioctl.h>
13 
14 #include <linux/kvm.h>
15 
16 #include "qemu-common.h"
17 #include "qemu/timer.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "sysemu/sysemu.h"
21 #include "sysemu/kvm.h"
22 #include "sysemu/kvm_int.h"
23 #include "kvm_arm.h"
24 #include "cpu.h"
25 #include "trace.h"
26 #include "internals.h"
27 #include "hw/pci/pci.h"
28 #include "exec/memattrs.h"
29 #include "exec/address-spaces.h"
30 #include "hw/boards.h"
31 #include "hw/irq.h"
32 #include "qemu/log.h"
33 
34 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
35     KVM_CAP_LAST_INFO
36 };
37 
38 static bool cap_has_mp_state;
39 static bool cap_has_inject_serror_esr;
40 
41 static ARMHostCPUFeatures arm_host_cpu_features;
42 
43 int kvm_arm_vcpu_init(CPUState *cs)
44 {
45     ARMCPU *cpu = ARM_CPU(cs);
46     struct kvm_vcpu_init init;
47 
48     init.target = cpu->kvm_target;
49     memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
50 
51     return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
52 }
53 
54 int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
55 {
56     return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
57 }
58 
59 void kvm_arm_init_serror_injection(CPUState *cs)
60 {
61     cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
62                                     KVM_CAP_ARM_INJECT_SERROR_ESR);
63 }
64 
65 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
66                                       int *fdarray,
67                                       struct kvm_vcpu_init *init)
68 {
69     int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
70 
71     kvmfd = qemu_open("/dev/kvm", O_RDWR);
72     if (kvmfd < 0) {
73         goto err;
74     }
75     vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
76     if (vmfd < 0) {
77         goto err;
78     }
79     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
80     if (cpufd < 0) {
81         goto err;
82     }
83 
84     if (!init) {
85         /* Caller doesn't want the VCPU to be initialized, so skip it */
86         goto finish;
87     }
88 
89     if (init->target == -1) {
90         struct kvm_vcpu_init preferred;
91 
92         ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
93         if (!ret) {
94             init->target = preferred.target;
95         }
96     }
97     if (ret >= 0) {
98         ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
99         if (ret < 0) {
100             goto err;
101         }
102     } else if (cpus_to_try) {
103         /* Old kernel which doesn't know about the
104          * PREFERRED_TARGET ioctl: we know it will only support
105          * creating one kind of guest CPU which is its preferred
106          * CPU type.
107          */
108         struct kvm_vcpu_init try;
109 
110         while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
111             try.target = *cpus_to_try++;
112             memcpy(try.features, init->features, sizeof(init->features));
113             ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
114             if (ret >= 0) {
115                 break;
116             }
117         }
118         if (ret < 0) {
119             goto err;
120         }
121         init->target = try.target;
122     } else {
123         /* Treat a NULL cpus_to_try argument the same as an empty
124          * list, which means we will fail the call since this must
125          * be an old kernel which doesn't support PREFERRED_TARGET.
126          */
127         goto err;
128     }
129 
130 finish:
131     fdarray[0] = kvmfd;
132     fdarray[1] = vmfd;
133     fdarray[2] = cpufd;
134 
135     return true;
136 
137 err:
138     if (cpufd >= 0) {
139         close(cpufd);
140     }
141     if (vmfd >= 0) {
142         close(vmfd);
143     }
144     if (kvmfd >= 0) {
145         close(kvmfd);
146     }
147 
148     return false;
149 }
150 
151 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
152 {
153     int i;
154 
155     for (i = 2; i >= 0; i--) {
156         close(fdarray[i]);
157     }
158 }
159 
160 void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
161 {
162     CPUARMState *env = &cpu->env;
163 
164     if (!arm_host_cpu_features.dtb_compatible) {
165         if (!kvm_enabled() ||
166             !kvm_arm_get_host_cpu_features(&arm_host_cpu_features)) {
167             /* We can't report this error yet, so flag that we need to
168              * in arm_cpu_realizefn().
169              */
170             cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
171             cpu->host_cpu_probe_failed = true;
172             return;
173         }
174     }
175 
176     cpu->kvm_target = arm_host_cpu_features.target;
177     cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
178     cpu->isar = arm_host_cpu_features.isar;
179     env->features = arm_host_cpu_features.features;
180 }
181 
182 bool kvm_arm_pmu_supported(CPUState *cpu)
183 {
184     return kvm_check_extension(cpu->kvm_state, KVM_CAP_ARM_PMU_V3);
185 }
186 
187 int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
188 {
189     KVMState *s = KVM_STATE(ms->accelerator);
190     int ret;
191 
192     ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE);
193     return ret > 0 ? ret : 40;
194 }
195 
196 int kvm_arch_init(MachineState *ms, KVMState *s)
197 {
198     int ret = 0;
199     /* For ARM interrupt delivery is always asynchronous,
200      * whether we are using an in-kernel VGIC or not.
201      */
202     kvm_async_interrupts_allowed = true;
203 
204     /*
205      * PSCI wakes up secondary cores, so we always need to
206      * have vCPUs waiting in kernel space
207      */
208     kvm_halt_in_kernel_allowed = true;
209 
210     cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
211 
212     if (ms->smp.cpus > 256 &&
213         !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) {
214         error_report("Using more than 256 vcpus requires a host kernel "
215                      "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2");
216         ret = -EINVAL;
217     }
218 
219     return ret;
220 }
221 
222 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
223 {
224     return cpu->cpu_index;
225 }
226 
227 /* We track all the KVM devices which need their memory addresses
228  * passing to the kernel in a list of these structures.
229  * When board init is complete we run through the list and
230  * tell the kernel the base addresses of the memory regions.
231  * We use a MemoryListener to track mapping and unmapping of
232  * the regions during board creation, so the board models don't
233  * need to do anything special for the KVM case.
234  *
235  * Sometimes the address must be OR'ed with some other fields
236  * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION).
237  * @kda_addr_ormask aims at storing the value of those fields.
238  */
239 typedef struct KVMDevice {
240     struct kvm_arm_device_addr kda;
241     struct kvm_device_attr kdattr;
242     uint64_t kda_addr_ormask;
243     MemoryRegion *mr;
244     QSLIST_ENTRY(KVMDevice) entries;
245     int dev_fd;
246 } KVMDevice;
247 
248 static QSLIST_HEAD(, KVMDevice) kvm_devices_head;
249 
250 static void kvm_arm_devlistener_add(MemoryListener *listener,
251                                     MemoryRegionSection *section)
252 {
253     KVMDevice *kd;
254 
255     QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
256         if (section->mr == kd->mr) {
257             kd->kda.addr = section->offset_within_address_space;
258         }
259     }
260 }
261 
262 static void kvm_arm_devlistener_del(MemoryListener *listener,
263                                     MemoryRegionSection *section)
264 {
265     KVMDevice *kd;
266 
267     QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
268         if (section->mr == kd->mr) {
269             kd->kda.addr = -1;
270         }
271     }
272 }
273 
274 static MemoryListener devlistener = {
275     .region_add = kvm_arm_devlistener_add,
276     .region_del = kvm_arm_devlistener_del,
277 };
278 
279 static void kvm_arm_set_device_addr(KVMDevice *kd)
280 {
281     struct kvm_device_attr *attr = &kd->kdattr;
282     int ret;
283 
284     /* If the device control API is available and we have a device fd on the
285      * KVMDevice struct, let's use the newer API
286      */
287     if (kd->dev_fd >= 0) {
288         uint64_t addr = kd->kda.addr;
289 
290         addr |= kd->kda_addr_ormask;
291         attr->addr = (uintptr_t)&addr;
292         ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
293     } else {
294         ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
295     }
296 
297     if (ret < 0) {
298         fprintf(stderr, "Failed to set device address: %s\n",
299                 strerror(-ret));
300         abort();
301     }
302 }
303 
304 static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
305 {
306     KVMDevice *kd, *tkd;
307 
308     QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
309         if (kd->kda.addr != -1) {
310             kvm_arm_set_device_addr(kd);
311         }
312         memory_region_unref(kd->mr);
313         QSLIST_REMOVE_HEAD(&kvm_devices_head, entries);
314         g_free(kd);
315     }
316     memory_listener_unregister(&devlistener);
317 }
318 
319 static Notifier notify = {
320     .notify = kvm_arm_machine_init_done,
321 };
322 
323 void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
324                              uint64_t attr, int dev_fd, uint64_t addr_ormask)
325 {
326     KVMDevice *kd;
327 
328     if (!kvm_irqchip_in_kernel()) {
329         return;
330     }
331 
332     if (QSLIST_EMPTY(&kvm_devices_head)) {
333         memory_listener_register(&devlistener, &address_space_memory);
334         qemu_add_machine_init_done_notifier(&notify);
335     }
336     kd = g_new0(KVMDevice, 1);
337     kd->mr = mr;
338     kd->kda.id = devid;
339     kd->kda.addr = -1;
340     kd->kdattr.flags = 0;
341     kd->kdattr.group = group;
342     kd->kdattr.attr = attr;
343     kd->dev_fd = dev_fd;
344     kd->kda_addr_ormask = addr_ormask;
345     QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
346     memory_region_ref(kd->mr);
347 }
348 
349 static int compare_u64(const void *a, const void *b)
350 {
351     if (*(uint64_t *)a > *(uint64_t *)b) {
352         return 1;
353     }
354     if (*(uint64_t *)a < *(uint64_t *)b) {
355         return -1;
356     }
357     return 0;
358 }
359 
360 /* Initialize the ARMCPU cpreg list according to the kernel's
361  * definition of what CPU registers it knows about (and throw away
362  * the previous TCG-created cpreg list).
363  */
364 int kvm_arm_init_cpreg_list(ARMCPU *cpu)
365 {
366     struct kvm_reg_list rl;
367     struct kvm_reg_list *rlp;
368     int i, ret, arraylen;
369     CPUState *cs = CPU(cpu);
370 
371     rl.n = 0;
372     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
373     if (ret != -E2BIG) {
374         return ret;
375     }
376     rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
377     rlp->n = rl.n;
378     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
379     if (ret) {
380         goto out;
381     }
382     /* Sort the list we get back from the kernel, since cpreg_tuples
383      * must be in strictly ascending order.
384      */
385     qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
386 
387     for (i = 0, arraylen = 0; i < rlp->n; i++) {
388         if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
389             continue;
390         }
391         switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
392         case KVM_REG_SIZE_U32:
393         case KVM_REG_SIZE_U64:
394             break;
395         default:
396             fprintf(stderr, "Can't handle size of register in kernel list\n");
397             ret = -EINVAL;
398             goto out;
399         }
400 
401         arraylen++;
402     }
403 
404     cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
405     cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
406     cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
407                                          arraylen);
408     cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
409                                         arraylen);
410     cpu->cpreg_array_len = arraylen;
411     cpu->cpreg_vmstate_array_len = arraylen;
412 
413     for (i = 0, arraylen = 0; i < rlp->n; i++) {
414         uint64_t regidx = rlp->reg[i];
415         if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
416             continue;
417         }
418         cpu->cpreg_indexes[arraylen] = regidx;
419         arraylen++;
420     }
421     assert(cpu->cpreg_array_len == arraylen);
422 
423     if (!write_kvmstate_to_list(cpu)) {
424         /* Shouldn't happen unless kernel is inconsistent about
425          * what registers exist.
426          */
427         fprintf(stderr, "Initial read of kernel register state failed\n");
428         ret = -EINVAL;
429         goto out;
430     }
431 
432 out:
433     g_free(rlp);
434     return ret;
435 }
436 
437 bool write_kvmstate_to_list(ARMCPU *cpu)
438 {
439     CPUState *cs = CPU(cpu);
440     int i;
441     bool ok = true;
442 
443     for (i = 0; i < cpu->cpreg_array_len; i++) {
444         struct kvm_one_reg r;
445         uint64_t regidx = cpu->cpreg_indexes[i];
446         uint32_t v32;
447         int ret;
448 
449         r.id = regidx;
450 
451         switch (regidx & KVM_REG_SIZE_MASK) {
452         case KVM_REG_SIZE_U32:
453             r.addr = (uintptr_t)&v32;
454             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
455             if (!ret) {
456                 cpu->cpreg_values[i] = v32;
457             }
458             break;
459         case KVM_REG_SIZE_U64:
460             r.addr = (uintptr_t)(cpu->cpreg_values + i);
461             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
462             break;
463         default:
464             abort();
465         }
466         if (ret) {
467             ok = false;
468         }
469     }
470     return ok;
471 }
472 
473 bool write_list_to_kvmstate(ARMCPU *cpu, int level)
474 {
475     CPUState *cs = CPU(cpu);
476     int i;
477     bool ok = true;
478 
479     for (i = 0; i < cpu->cpreg_array_len; i++) {
480         struct kvm_one_reg r;
481         uint64_t regidx = cpu->cpreg_indexes[i];
482         uint32_t v32;
483         int ret;
484 
485         if (kvm_arm_cpreg_level(regidx) > level) {
486             continue;
487         }
488 
489         r.id = regidx;
490         switch (regidx & KVM_REG_SIZE_MASK) {
491         case KVM_REG_SIZE_U32:
492             v32 = cpu->cpreg_values[i];
493             r.addr = (uintptr_t)&v32;
494             break;
495         case KVM_REG_SIZE_U64:
496             r.addr = (uintptr_t)(cpu->cpreg_values + i);
497             break;
498         default:
499             abort();
500         }
501         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
502         if (ret) {
503             /* We might fail for "unknown register" and also for
504              * "you tried to set a register which is constant with
505              * a different value from what it actually contains".
506              */
507             ok = false;
508         }
509     }
510     return ok;
511 }
512 
513 void kvm_arm_reset_vcpu(ARMCPU *cpu)
514 {
515     int ret;
516 
517     /* Re-init VCPU so that all registers are set to
518      * their respective reset values.
519      */
520     ret = kvm_arm_vcpu_init(CPU(cpu));
521     if (ret < 0) {
522         fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
523         abort();
524     }
525     if (!write_kvmstate_to_list(cpu)) {
526         fprintf(stderr, "write_kvmstate_to_list failed\n");
527         abort();
528     }
529     /*
530      * Sync the reset values also into the CPUState. This is necessary
531      * because the next thing we do will be a kvm_arch_put_registers()
532      * which will update the list values from the CPUState before copying
533      * the list values back to KVM. It's OK to ignore failure returns here
534      * for the same reason we do so in kvm_arch_get_registers().
535      */
536     write_list_to_cpustate(cpu);
537 }
538 
539 /*
540  * Update KVM's MP_STATE based on what QEMU thinks it is
541  */
542 int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
543 {
544     if (cap_has_mp_state) {
545         struct kvm_mp_state mp_state = {
546             .mp_state = (cpu->power_state == PSCI_OFF) ?
547             KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
548         };
549         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
550         if (ret) {
551             fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
552                     __func__, ret, strerror(-ret));
553             return -1;
554         }
555     }
556 
557     return 0;
558 }
559 
560 /*
561  * Sync the KVM MP_STATE into QEMU
562  */
563 int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
564 {
565     if (cap_has_mp_state) {
566         struct kvm_mp_state mp_state;
567         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
568         if (ret) {
569             fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
570                     __func__, ret, strerror(-ret));
571             abort();
572         }
573         cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
574             PSCI_OFF : PSCI_ON;
575     }
576 
577     return 0;
578 }
579 
580 int kvm_put_vcpu_events(ARMCPU *cpu)
581 {
582     CPUARMState *env = &cpu->env;
583     struct kvm_vcpu_events events;
584     int ret;
585 
586     if (!kvm_has_vcpu_events()) {
587         return 0;
588     }
589 
590     memset(&events, 0, sizeof(events));
591     events.exception.serror_pending = env->serror.pending;
592 
593     /* Inject SError to guest with specified syndrome if host kernel
594      * supports it, otherwise inject SError without syndrome.
595      */
596     if (cap_has_inject_serror_esr) {
597         events.exception.serror_has_esr = env->serror.has_esr;
598         events.exception.serror_esr = env->serror.esr;
599     }
600 
601     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
602     if (ret) {
603         error_report("failed to put vcpu events");
604     }
605 
606     return ret;
607 }
608 
609 int kvm_get_vcpu_events(ARMCPU *cpu)
610 {
611     CPUARMState *env = &cpu->env;
612     struct kvm_vcpu_events events;
613     int ret;
614 
615     if (!kvm_has_vcpu_events()) {
616         return 0;
617     }
618 
619     memset(&events, 0, sizeof(events));
620     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
621     if (ret) {
622         error_report("failed to get vcpu events");
623         return ret;
624     }
625 
626     env->serror.pending = events.exception.serror_pending;
627     env->serror.has_esr = events.exception.serror_has_esr;
628     env->serror.esr = events.exception.serror_esr;
629 
630     return 0;
631 }
632 
633 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
634 {
635 }
636 
637 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
638 {
639     ARMCPU *cpu;
640     uint32_t switched_level;
641 
642     if (kvm_irqchip_in_kernel()) {
643         /*
644          * We only need to sync timer states with user-space interrupt
645          * controllers, so return early and save cycles if we don't.
646          */
647         return MEMTXATTRS_UNSPECIFIED;
648     }
649 
650     cpu = ARM_CPU(cs);
651 
652     /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */
653     if (run->s.regs.device_irq_level != cpu->device_irq_level) {
654         switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level;
655 
656         qemu_mutex_lock_iothread();
657 
658         if (switched_level & KVM_ARM_DEV_EL1_VTIMER) {
659             qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT],
660                          !!(run->s.regs.device_irq_level &
661                             KVM_ARM_DEV_EL1_VTIMER));
662             switched_level &= ~KVM_ARM_DEV_EL1_VTIMER;
663         }
664 
665         if (switched_level & KVM_ARM_DEV_EL1_PTIMER) {
666             qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS],
667                          !!(run->s.regs.device_irq_level &
668                             KVM_ARM_DEV_EL1_PTIMER));
669             switched_level &= ~KVM_ARM_DEV_EL1_PTIMER;
670         }
671 
672         if (switched_level & KVM_ARM_DEV_PMU) {
673             qemu_set_irq(cpu->pmu_interrupt,
674                          !!(run->s.regs.device_irq_level & KVM_ARM_DEV_PMU));
675             switched_level &= ~KVM_ARM_DEV_PMU;
676         }
677 
678         if (switched_level) {
679             qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n",
680                           __func__, switched_level);
681         }
682 
683         /* We also mark unknown levels as processed to not waste cycles */
684         cpu->device_irq_level = run->s.regs.device_irq_level;
685         qemu_mutex_unlock_iothread();
686     }
687 
688     return MEMTXATTRS_UNSPECIFIED;
689 }
690 
691 
692 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
693 {
694     int ret = 0;
695 
696     switch (run->exit_reason) {
697     case KVM_EXIT_DEBUG:
698         if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
699             ret = EXCP_DEBUG;
700         } /* otherwise return to guest */
701         break;
702     default:
703         qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
704                       __func__, run->exit_reason);
705         break;
706     }
707     return ret;
708 }
709 
710 bool kvm_arch_stop_on_emulation_error(CPUState *cs)
711 {
712     return true;
713 }
714 
715 int kvm_arch_process_async_events(CPUState *cs)
716 {
717     return 0;
718 }
719 
720 /* The #ifdef protections are until 32bit headers are imported and can
721  * be removed once both 32 and 64 bit reach feature parity.
722  */
723 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
724 {
725 #ifdef KVM_GUESTDBG_USE_SW_BP
726     if (kvm_sw_breakpoints_active(cs)) {
727         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
728     }
729 #endif
730 #ifdef KVM_GUESTDBG_USE_HW
731     if (kvm_arm_hw_debug_active(cs)) {
732         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
733         kvm_arm_copy_hw_debug_data(&dbg->arch);
734     }
735 #endif
736 }
737 
738 void kvm_arch_init_irq_routing(KVMState *s)
739 {
740 }
741 
742 int kvm_arch_irqchip_create(KVMState *s)
743 {
744     if (kvm_kernel_irqchip_split()) {
745         perror("-machine kernel_irqchip=split is not supported on ARM.");
746         exit(1);
747     }
748 
749     /* If we can create the VGIC using the newer device control API, we
750      * let the device do this when it initializes itself, otherwise we
751      * fall back to the old API */
752     return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
753 }
754 
755 int kvm_arm_vgic_probe(void)
756 {
757     if (kvm_create_device(kvm_state,
758                           KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
759         return 3;
760     } else if (kvm_create_device(kvm_state,
761                                  KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
762         return 2;
763     } else {
764         return 0;
765     }
766 }
767 
768 int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level)
769 {
770     int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq;
771     int cpu_idx1 = cpu % 256;
772     int cpu_idx2 = cpu / 256;
773 
774     kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) |
775                (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT);
776 
777     return kvm_set_irq(kvm_state, kvm_irq, !!level);
778 }
779 
780 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
781                              uint64_t address, uint32_t data, PCIDevice *dev)
782 {
783     AddressSpace *as = pci_device_iommu_address_space(dev);
784     hwaddr xlat, len, doorbell_gpa;
785     MemoryRegionSection mrs;
786     MemoryRegion *mr;
787     int ret = 1;
788 
789     if (as == &address_space_memory) {
790         return 0;
791     }
792 
793     /* MSI doorbell address is translated by an IOMMU */
794 
795     rcu_read_lock();
796     mr = address_space_translate(as, address, &xlat, &len, true,
797                                  MEMTXATTRS_UNSPECIFIED);
798     if (!mr) {
799         goto unlock;
800     }
801     mrs = memory_region_find(mr, xlat, 1);
802     if (!mrs.mr) {
803         goto unlock;
804     }
805 
806     doorbell_gpa = mrs.offset_within_address_space;
807     memory_region_unref(mrs.mr);
808 
809     route->u.msi.address_lo = doorbell_gpa;
810     route->u.msi.address_hi = doorbell_gpa >> 32;
811 
812     trace_kvm_arm_fixup_msi_route(address, doorbell_gpa);
813 
814     ret = 0;
815 
816 unlock:
817     rcu_read_unlock();
818     return ret;
819 }
820 
821 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
822                                 int vector, PCIDevice *dev)
823 {
824     return 0;
825 }
826 
827 int kvm_arch_release_virq_post(int virq)
828 {
829     return 0;
830 }
831 
832 int kvm_arch_msi_data_to_gsi(uint32_t data)
833 {
834     return (data - 32) & 0xffff;
835 }
836