xref: /openbmc/qemu/hw/vfio/platform.c (revision 6af8037c42fdc3d20d5aa2686799ab356a9ee1a9)
10ea2730bSEric Auger /*
20ea2730bSEric Auger  * vfio based device assignment support - platform devices
30ea2730bSEric Auger  *
40ea2730bSEric Auger  * Copyright Linaro Limited, 2014
50ea2730bSEric Auger  *
60ea2730bSEric Auger  * Authors:
70ea2730bSEric Auger  *  Kim Phillips <kim.phillips@linaro.org>
80ea2730bSEric Auger  *  Eric Auger <eric.auger@linaro.org>
90ea2730bSEric Auger  *
100ea2730bSEric Auger  * This work is licensed under the terms of the GNU GPL, version 2.  See
110ea2730bSEric Auger  * the COPYING file in the top-level directory.
120ea2730bSEric Auger  *
130ea2730bSEric Auger  * Based on vfio based PCI device assignment support:
140ea2730bSEric Auger  *  Copyright Red Hat, Inc. 2012
150ea2730bSEric Auger  */
160ea2730bSEric Auger 
17c6eacb1aSPeter Maydell #include "qemu/osdep.h"
18a6c50e1cSZhenzhong Duan #include CONFIG_DEVICES /* CONFIG_IOMMUFD */
19da34e65cSMarkus Armbruster #include "qapi/error.h"
200ea2730bSEric Auger #include <sys/ioctl.h>
21e2075277SLeon Alrae #include <linux/vfio.h>
220ea2730bSEric Auger 
230ea2730bSEric Auger #include "hw/vfio/vfio-platform.h"
24a6c50e1cSZhenzhong Duan #include "sysemu/iommufd.h"
25d6454270SMarkus Armbruster #include "migration/vmstate.h"
260ea2730bSEric Auger #include "qemu/error-report.h"
276e8a355dSDaniel Brodsky #include "qemu/lockable.h"
28db725815SMarkus Armbruster #include "qemu/main-loop.h"
290b8fa32fSMarkus Armbruster #include "qemu/module.h"
300ea2730bSEric Auger #include "qemu/range.h"
310ea2730bSEric Auger #include "exec/memory.h"
32d791937fSPhilippe Mathieu-Daudé #include "exec/address-spaces.h"
3338559979SEric Auger #include "qemu/queue.h"
340ea2730bSEric Auger #include "hw/sysbus.h"
350ea2730bSEric Auger #include "trace.h"
3664552b6bSMarkus Armbruster #include "hw/irq.h"
370ea2730bSEric Auger #include "hw/platform-bus.h"
38a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h"
39fb5f8164SEric Auger #include "sysemu/kvm.h"
400ea2730bSEric Auger 
4138559979SEric Auger /*
4238559979SEric Auger  * Functions used whatever the injection method
4338559979SEric Auger  */
4438559979SEric Auger 
vfio_irq_is_automasked(VFIOINTp * intp)45a5b39cd3SEric Auger static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
46a5b39cd3SEric Auger {
47a5b39cd3SEric Auger     return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
48a5b39cd3SEric Auger }
49a5b39cd3SEric Auger 
5038559979SEric Auger /**
5138559979SEric Auger  * vfio_init_intp - allocate, initialize the IRQ struct pointer
5238559979SEric Auger  * and add it into the list of IRQs
5338559979SEric Auger  * @vbasedev: the VFIO device handle
5438559979SEric Auger  * @info: irq info struct retrieved from VFIO driver
555ff7419dSEric Auger  * @errp: error object
5638559979SEric Auger  */
vfio_init_intp(VFIODevice * vbasedev,struct vfio_irq_info info,Error ** errp)5738559979SEric Auger static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
585ff7419dSEric Auger                                 struct vfio_irq_info info, Error **errp)
5938559979SEric Auger {
6038559979SEric Auger     int ret;
6138559979SEric Auger     VFIOPlatformDevice *vdev =
6238559979SEric Auger         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
6338559979SEric Auger     SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
6438559979SEric Auger     VFIOINTp *intp;
6538559979SEric Auger 
6638559979SEric Auger     intp = g_malloc0(sizeof(*intp));
6738559979SEric Auger     intp->vdev = vdev;
6838559979SEric Auger     intp->pin = info.index;
6938559979SEric Auger     intp->flags = info.flags;
7038559979SEric Auger     intp->state = VFIO_IRQ_INACTIVE;
71fb5f8164SEric Auger     intp->kvm_accel = false;
7238559979SEric Auger 
7338559979SEric Auger     sysbus_init_irq(sbdev, &intp->qemuirq);
7438559979SEric Auger 
7538559979SEric Auger     /* Get an eventfd for trigger */
76b21e2380SMarkus Armbruster     intp->interrupt = g_new0(EventNotifier, 1);
77a22313deSEric Auger     ret = event_notifier_init(intp->interrupt, 0);
7838559979SEric Auger     if (ret) {
79a22313deSEric Auger         g_free(intp->interrupt);
8038559979SEric Auger         g_free(intp);
815ff7419dSEric Auger         error_setg_errno(errp, -ret,
82bf04ef35SLi Qiang                          "failed to initialize trigger eventfd notifier");
8338559979SEric Auger         return NULL;
8438559979SEric Auger     }
85a5b39cd3SEric Auger     if (vfio_irq_is_automasked(intp)) {
86fb5f8164SEric Auger         /* Get an eventfd for resample/unmask */
87b21e2380SMarkus Armbruster         intp->unmask = g_new0(EventNotifier, 1);
88a22313deSEric Auger         ret = event_notifier_init(intp->unmask, 0);
89fb5f8164SEric Auger         if (ret) {
90a22313deSEric Auger             g_free(intp->interrupt);
91a22313deSEric Auger             g_free(intp->unmask);
92fb5f8164SEric Auger             g_free(intp);
935ff7419dSEric Auger             error_setg_errno(errp, -ret,
94bf04ef35SLi Qiang                              "failed to initialize resample eventfd notifier");
95fb5f8164SEric Auger             return NULL;
96fb5f8164SEric Auger         }
97a5b39cd3SEric Auger     }
9838559979SEric Auger 
9938559979SEric Auger     QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
10038559979SEric Auger     return intp;
10138559979SEric Auger }
10238559979SEric Auger 
10338559979SEric Auger /**
10438559979SEric Auger  * vfio_set_trigger_eventfd - set VFIO eventfd handling
10538559979SEric Auger  *
10638559979SEric Auger  * @intp: IRQ struct handle
10738559979SEric Auger  * @handler: handler to be called on eventfd signaling
10838559979SEric Auger  *
10938559979SEric Auger  * Setup VFIO signaling and attach an optional user-side handler
11038559979SEric Auger  * to the eventfd
11138559979SEric Auger  */
vfio_set_trigger_eventfd(VFIOINTp * intp,eventfd_user_side_handler_t handler)11238559979SEric Auger static int vfio_set_trigger_eventfd(VFIOINTp *intp,
11338559979SEric Auger                                     eventfd_user_side_handler_t handler)
11438559979SEric Auger {
11538559979SEric Auger     VFIODevice *vbasedev = &intp->vdev->vbasedev;
116201a7331SEric Auger     int32_t fd = event_notifier_get_fd(intp->interrupt);
117201a7331SEric Auger     Error *err = NULL;
11838559979SEric Auger 
119201a7331SEric Auger     qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp);
120201a7331SEric Auger 
12184e37d02SZhenzhong Duan     if (!vfio_set_irq_signaling(vbasedev, intp->pin, 0,
12284e37d02SZhenzhong Duan                                 VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
123201a7331SEric Auger         error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
124201a7331SEric Auger         qemu_set_fd_handler(fd, NULL, NULL, NULL);
12584e37d02SZhenzhong Duan         return -EINVAL;
12638559979SEric Auger     }
127201a7331SEric Auger 
12884e37d02SZhenzhong Duan     return 0;
12938559979SEric Auger }
13038559979SEric Auger 
13138559979SEric Auger /*
13238559979SEric Auger  * Functions only used when eventfds are handled on user-side
13338559979SEric Auger  * ie. without irqfd
13438559979SEric Auger  */
13538559979SEric Auger 
13638559979SEric Auger /**
13738559979SEric Auger  * vfio_mmap_set_enabled - enable/disable the fast path mode
13838559979SEric Auger  * @vdev: the VFIO platform device
13938559979SEric Auger  * @enabled: the target mmap state
14038559979SEric Auger  *
14138559979SEric Auger  * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
14238559979SEric Auger  * enabled = false ~ slow path = MMIO region is trapped and region callbacks
14338559979SEric Auger  * are called; slow path enables to trap the device IRQ status register reset
14438559979SEric Auger */
14538559979SEric Auger 
vfio_mmap_set_enabled(VFIOPlatformDevice * vdev,bool enabled)14638559979SEric Auger static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
14738559979SEric Auger {
14838559979SEric Auger     int i;
14938559979SEric Auger 
15038559979SEric Auger     for (i = 0; i < vdev->vbasedev.num_regions; i++) {
151db0da029SAlex Williamson         vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
15238559979SEric Auger     }
15338559979SEric Auger }
15438559979SEric Auger 
15538559979SEric Auger /**
15638559979SEric Auger  * vfio_intp_mmap_enable - timer function, restores the fast path
15738559979SEric Auger  * if there is no more active IRQ
15838559979SEric Auger  * @opaque: actually points to the VFIO platform device
15938559979SEric Auger  *
160631ba5a1SCai Huoqing  * Called on mmap timer timeout, this function checks whether the
16138559979SEric Auger  * IRQ is still active and if not, restores the fast path.
16238559979SEric Auger  * by construction a single eventfd is handled at a time.
16338559979SEric Auger  * if the IRQ is still active, the timer is re-programmed.
16438559979SEric Auger  */
vfio_intp_mmap_enable(void * opaque)16538559979SEric Auger static void vfio_intp_mmap_enable(void *opaque)
16638559979SEric Auger {
16738559979SEric Auger     VFIOINTp *tmp;
16838559979SEric Auger     VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
16938559979SEric Auger 
17088eef597SAmey Narkhede     QEMU_LOCK_GUARD(&vdev->intp_mutex);
17138559979SEric Auger     QLIST_FOREACH(tmp, &vdev->intp_list, next) {
17238559979SEric Auger         if (tmp->state == VFIO_IRQ_ACTIVE) {
17338559979SEric Auger             trace_vfio_platform_intp_mmap_enable(tmp->pin);
17438559979SEric Auger             /* re-program the timer to check active status later */
17538559979SEric Auger             timer_mod(vdev->mmap_timer,
17638559979SEric Auger                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
17738559979SEric Auger                           vdev->mmap_timeout);
17838559979SEric Auger             return;
17938559979SEric Auger         }
18038559979SEric Auger     }
18138559979SEric Auger     vfio_mmap_set_enabled(vdev, true);
18238559979SEric Auger }
18338559979SEric Auger 
18438559979SEric Auger /**
18538559979SEric Auger  * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
18638559979SEric Auger  * @opaque: opaque pointer, in practice the VFIOINTp handle
18738559979SEric Auger  *
18838559979SEric Auger  * The function is called on a previous IRQ completion, from
18938559979SEric Auger  * vfio_platform_eoi, while the intp_mutex is locked.
19038559979SEric Auger  * Also in such situation, the slow path already is set and
19138559979SEric Auger  * the mmap timer was already programmed.
19238559979SEric Auger  */
vfio_intp_inject_pending_lockheld(VFIOINTp * intp)19338559979SEric Auger static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
19438559979SEric Auger {
19538559979SEric Auger     trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
196a22313deSEric Auger                               event_notifier_get_fd(intp->interrupt));
19738559979SEric Auger 
19838559979SEric Auger     intp->state = VFIO_IRQ_ACTIVE;
19938559979SEric Auger 
20038559979SEric Auger     /* trigger the virtual IRQ */
20138559979SEric Auger     qemu_set_irq(intp->qemuirq, 1);
20238559979SEric Auger }
20338559979SEric Auger 
20438559979SEric Auger /**
20538559979SEric Auger  * vfio_intp_interrupt - The user-side eventfd handler
20638559979SEric Auger  * @opaque: opaque pointer which in practice is the VFIOINTp handle
20738559979SEric Auger  *
20838559979SEric Auger  * the function is entered in event handler context:
20938559979SEric Auger  * the vIRQ is injected into the guest if there is no other active
21038559979SEric Auger  * or pending IRQ.
21138559979SEric Auger  */
vfio_intp_interrupt(VFIOINTp * intp)21238559979SEric Auger static void vfio_intp_interrupt(VFIOINTp *intp)
21338559979SEric Auger {
21438559979SEric Auger     int ret;
21538559979SEric Auger     VFIOINTp *tmp;
21638559979SEric Auger     VFIOPlatformDevice *vdev = intp->vdev;
21738559979SEric Auger     bool delay_handling = false;
21838559979SEric Auger 
2196e8a355dSDaniel Brodsky     QEMU_LOCK_GUARD(&vdev->intp_mutex);
22038559979SEric Auger     if (intp->state == VFIO_IRQ_INACTIVE) {
22138559979SEric Auger         QLIST_FOREACH(tmp, &vdev->intp_list, next) {
22238559979SEric Auger             if (tmp->state == VFIO_IRQ_ACTIVE ||
22338559979SEric Auger                 tmp->state == VFIO_IRQ_PENDING) {
22438559979SEric Auger                 delay_handling = true;
22538559979SEric Auger                 break;
22638559979SEric Auger             }
22738559979SEric Auger         }
22838559979SEric Auger     }
22938559979SEric Auger     if (delay_handling) {
23038559979SEric Auger         /*
23138559979SEric Auger          * the new IRQ gets a pending status and is pushed in
23238559979SEric Auger          * the pending queue
23338559979SEric Auger          */
23438559979SEric Auger         intp->state = VFIO_IRQ_PENDING;
23538559979SEric Auger         trace_vfio_intp_interrupt_set_pending(intp->pin);
23638559979SEric Auger         QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
23738559979SEric Auger                              intp, pqnext);
2389b83b004SChen Qun         event_notifier_test_and_clear(intp->interrupt);
23938559979SEric Auger         return;
24038559979SEric Auger     }
24138559979SEric Auger 
24238559979SEric Auger     trace_vfio_platform_intp_interrupt(intp->pin,
243a22313deSEric Auger                               event_notifier_get_fd(intp->interrupt));
24438559979SEric Auger 
245a22313deSEric Auger     ret = event_notifier_test_and_clear(intp->interrupt);
24638559979SEric Auger     if (!ret) {
247594fd211SJohn Snow         error_report("Error when clearing fd=%d (ret = %d)",
248a22313deSEric Auger                      event_notifier_get_fd(intp->interrupt), ret);
24938559979SEric Auger     }
25038559979SEric Auger 
25138559979SEric Auger     intp->state = VFIO_IRQ_ACTIVE;
25238559979SEric Auger 
25338559979SEric Auger     /* sets slow path */
25438559979SEric Auger     vfio_mmap_set_enabled(vdev, false);
25538559979SEric Auger 
25638559979SEric Auger     /* trigger the virtual IRQ */
25738559979SEric Auger     qemu_set_irq(intp->qemuirq, 1);
25838559979SEric Auger 
25938559979SEric Auger     /*
26038559979SEric Auger      * Schedule the mmap timer which will restore fastpath when no IRQ
26138559979SEric Auger      * is active anymore
26238559979SEric Auger      */
26338559979SEric Auger     if (vdev->mmap_timeout) {
26438559979SEric Auger         timer_mod(vdev->mmap_timer,
26538559979SEric Auger                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
26638559979SEric Auger                       vdev->mmap_timeout);
26738559979SEric Auger     }
26838559979SEric Auger }
26938559979SEric Auger 
27038559979SEric Auger /**
27138559979SEric Auger  * vfio_platform_eoi - IRQ completion routine
27238559979SEric Auger  * @vbasedev: the VFIO device handle
27338559979SEric Auger  *
27438559979SEric Auger  * De-asserts the active virtual IRQ and unmasks the physical IRQ
27538559979SEric Auger  * (effective for level sensitive IRQ auto-masked by the  VFIO driver).
27638559979SEric Auger  * Then it handles next pending IRQ if any.
27738559979SEric Auger  * eoi function is called on the first access to any MMIO region
27838559979SEric Auger  * after an IRQ was triggered, trapped since slow path was set.
27938559979SEric Auger  * It is assumed this access corresponds to the IRQ status
28038559979SEric Auger  * register reset. With such a mechanism, a single IRQ can be
28138559979SEric Auger  * handled at a time since there is no way to know which IRQ
28238559979SEric Auger  * was completed by the guest (we would need additional details
28338559979SEric Auger  * about the IRQ status register mask).
28438559979SEric Auger  */
vfio_platform_eoi(VFIODevice * vbasedev)28538559979SEric Auger static void vfio_platform_eoi(VFIODevice *vbasedev)
28638559979SEric Auger {
28738559979SEric Auger     VFIOINTp *intp;
28838559979SEric Auger     VFIOPlatformDevice *vdev =
28938559979SEric Auger         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
29038559979SEric Auger 
29188eef597SAmey Narkhede     QEMU_LOCK_GUARD(&vdev->intp_mutex);
29238559979SEric Auger     QLIST_FOREACH(intp, &vdev->intp_list, next) {
29338559979SEric Auger         if (intp->state == VFIO_IRQ_ACTIVE) {
29438559979SEric Auger             trace_vfio_platform_eoi(intp->pin,
295a22313deSEric Auger                                 event_notifier_get_fd(intp->interrupt));
29638559979SEric Auger             intp->state = VFIO_IRQ_INACTIVE;
29738559979SEric Auger 
29838559979SEric Auger             /* deassert the virtual IRQ */
29938559979SEric Auger             qemu_set_irq(intp->qemuirq, 0);
30038559979SEric Auger 
301a5b39cd3SEric Auger             if (vfio_irq_is_automasked(intp)) {
30238559979SEric Auger                 /* unmasks the physical level-sensitive IRQ */
30338559979SEric Auger                 vfio_unmask_single_irqindex(vbasedev, intp->pin);
30438559979SEric Auger             }
30538559979SEric Auger 
30638559979SEric Auger             /* a single IRQ can be active at a time */
30738559979SEric Auger             break;
30838559979SEric Auger         }
30938559979SEric Auger     }
31038559979SEric Auger     /* in case there are pending IRQs, handle the first one */
31138559979SEric Auger     if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
31238559979SEric Auger         intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
31338559979SEric Auger         vfio_intp_inject_pending_lockheld(intp);
31438559979SEric Auger         QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
31538559979SEric Auger     }
31638559979SEric Auger }
31738559979SEric Auger 
31838559979SEric Auger /**
31938559979SEric Auger  * vfio_start_eventfd_injection - starts the virtual IRQ injection using
32038559979SEric Auger  * user-side handled eventfds
32158892b44SEric Auger  * @sbdev: the sysbus device handle
32258892b44SEric Auger  * @irq: the qemu irq handle
32338559979SEric Auger  */
32438559979SEric Auger 
vfio_start_eventfd_injection(SysBusDevice * sbdev,qemu_irq irq)32558892b44SEric Auger static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
32638559979SEric Auger {
32758892b44SEric Auger     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
32858892b44SEric Auger     VFIOINTp *intp;
32958892b44SEric Auger 
33058892b44SEric Auger     QLIST_FOREACH(intp, &vdev->intp_list, next) {
33158892b44SEric Auger         if (intp->qemuirq == irq) {
33258892b44SEric Auger             break;
33358892b44SEric Auger         }
33458892b44SEric Auger     }
33558892b44SEric Auger     assert(intp);
33638559979SEric Auger 
337201a7331SEric Auger     if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) {
33858892b44SEric Auger         abort();
33938559979SEric Auger     }
34038559979SEric Auger }
34138559979SEric Auger 
342fb5f8164SEric Auger /*
343fb5f8164SEric Auger  * Functions used for irqfd
344fb5f8164SEric Auger  */
345fb5f8164SEric Auger 
346fb5f8164SEric Auger /**
347fb5f8164SEric Auger  * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
348fb5f8164SEric Auger  * @intp: the IRQ struct handle
349fb5f8164SEric Auger  * programs the VFIO driver to unmask this IRQ when the
350fb5f8164SEric Auger  * intp->unmask eventfd is triggered
351fb5f8164SEric Auger  */
vfio_set_resample_eventfd(VFIOINTp * intp)352fb5f8164SEric Auger static int vfio_set_resample_eventfd(VFIOINTp *intp)
353fb5f8164SEric Auger {
354201a7331SEric Auger     int32_t fd = event_notifier_get_fd(intp->unmask);
355fb5f8164SEric Auger     VFIODevice *vbasedev = &intp->vdev->vbasedev;
356201a7331SEric Auger     Error *err = NULL;
357fb5f8164SEric Auger 
358201a7331SEric Auger     qemu_set_fd_handler(fd, NULL, NULL, NULL);
35984e37d02SZhenzhong Duan     if (!vfio_set_irq_signaling(vbasedev, intp->pin, 0,
36084e37d02SZhenzhong Duan                                 VFIO_IRQ_SET_ACTION_UNMASK, fd, &err)) {
361201a7331SEric Auger         error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
36284e37d02SZhenzhong Duan         return -EINVAL;
363fb5f8164SEric Auger     }
36484e37d02SZhenzhong Duan     return 0;
365fb5f8164SEric Auger }
366fb5f8164SEric Auger 
36758892b44SEric Auger /**
36858892b44SEric Auger  * vfio_start_irqfd_injection - starts the virtual IRQ injection using
36958892b44SEric Auger  * irqfd
37058892b44SEric Auger  *
37158892b44SEric Auger  * @sbdev: the sysbus device handle
37258892b44SEric Auger  * @irq: the qemu irq handle
37358892b44SEric Auger  *
37458892b44SEric Auger  * In case the irqfd setup fails, we fallback to userspace handled eventfd
37558892b44SEric Auger  */
vfio_start_irqfd_injection(SysBusDevice * sbdev,qemu_irq irq)376fb5f8164SEric Auger static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
377fb5f8164SEric Auger {
378fb5f8164SEric Auger     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
379fb5f8164SEric Auger     VFIOINTp *intp;
380fb5f8164SEric Auger 
381fb5f8164SEric Auger     if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
382fb5f8164SEric Auger         !vdev->irqfd_allowed) {
38358892b44SEric Auger         goto fail_irqfd;
384fb5f8164SEric Auger     }
385fb5f8164SEric Auger 
386fb5f8164SEric Auger     QLIST_FOREACH(intp, &vdev->intp_list, next) {
387fb5f8164SEric Auger         if (intp->qemuirq == irq) {
388fb5f8164SEric Auger             break;
389fb5f8164SEric Auger         }
390fb5f8164SEric Auger     }
391fb5f8164SEric Auger     assert(intp);
392fb5f8164SEric Auger 
393a22313deSEric Auger     if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
394a22313deSEric Auger                                    intp->unmask, irq) < 0) {
395fb5f8164SEric Auger         goto fail_irqfd;
396fb5f8164SEric Auger     }
397fb5f8164SEric Auger 
398fb5f8164SEric Auger     if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
399fb5f8164SEric Auger         goto fail_vfio;
400fb5f8164SEric Auger     }
401a5b39cd3SEric Auger     if (vfio_irq_is_automasked(intp)) {
402fb5f8164SEric Auger         if (vfio_set_resample_eventfd(intp) < 0) {
403fb5f8164SEric Auger             goto fail_vfio;
404fb5f8164SEric Auger         }
405a5b39cd3SEric Auger         trace_vfio_platform_start_level_irqfd_injection(intp->pin,
406a5b39cd3SEric Auger                                     event_notifier_get_fd(intp->interrupt),
407a5b39cd3SEric Auger                                     event_notifier_get_fd(intp->unmask));
408a5b39cd3SEric Auger     } else {
409a5b39cd3SEric Auger         trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
410a5b39cd3SEric Auger                                     event_notifier_get_fd(intp->interrupt));
411a5b39cd3SEric Auger     }
412fb5f8164SEric Auger 
413fb5f8164SEric Auger     intp->kvm_accel = true;
414fb5f8164SEric Auger 
415fb5f8164SEric Auger     return;
416fb5f8164SEric Auger fail_vfio:
417a22313deSEric Auger     kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
41858892b44SEric Auger     abort();
419fb5f8164SEric Auger fail_irqfd:
42058892b44SEric Auger     vfio_start_eventfd_injection(sbdev, irq);
421fb5f8164SEric Auger     return;
422fb5f8164SEric Auger }
423fb5f8164SEric Auger 
4240ea2730bSEric Auger /* VFIO skeleton */
4250ea2730bSEric Auger 
vfio_platform_compute_needs_reset(VFIODevice * vbasedev)4260ea2730bSEric Auger static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
4270ea2730bSEric Auger {
4280ea2730bSEric Auger     vbasedev->needs_reset = true;
4290ea2730bSEric Auger }
4300ea2730bSEric Auger 
4310ea2730bSEric Auger /* not implemented yet */
vfio_platform_hot_reset_multi(VFIODevice * vbasedev)4320ea2730bSEric Auger static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
4330ea2730bSEric Auger {
4340ea2730bSEric Auger     return -1;
4350ea2730bSEric Auger }
4360ea2730bSEric Auger 
4370ea2730bSEric Auger /**
4380ea2730bSEric Auger  * vfio_populate_device - Allocate and populate MMIO region
43938559979SEric Auger  * and IRQ structs according to driver returned information
4400ea2730bSEric Auger  * @vbasedev: the VFIO device handle
4415ff7419dSEric Auger  * @errp: error object
4420ea2730bSEric Auger  *
4430ea2730bSEric Auger  */
vfio_populate_device(VFIODevice * vbasedev,Error ** errp)444*958609cfSZhenzhong Duan static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp)
4450ea2730bSEric Auger {
44638559979SEric Auger     VFIOINTp *intp, *tmp;
4470ea2730bSEric Auger     int i, ret = -1;
4480ea2730bSEric Auger     VFIOPlatformDevice *vdev =
4490ea2730bSEric Auger         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
4500ea2730bSEric Auger 
4510ea2730bSEric Auger     if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
4525ff7419dSEric Auger         error_setg(errp, "this isn't a platform device");
453*958609cfSZhenzhong Duan         return false;
4540ea2730bSEric Auger     }
4550ea2730bSEric Auger 
4560b70743dSEric Auger     vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
4570ea2730bSEric Auger 
4580ea2730bSEric Auger     for (i = 0; i < vbasedev->num_regions; i++) {
459db0da029SAlex Williamson         char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
4600ea2730bSEric Auger 
461bdd81addSMarkus Armbruster         vdev->regions[i] = g_new0(VFIORegion, 1);
462db0da029SAlex Williamson         ret = vfio_region_setup(OBJECT(vdev), vbasedev,
463db0da029SAlex Williamson                                 vdev->regions[i], i, name);
464db0da029SAlex Williamson         g_free(name);
4650ea2730bSEric Auger         if (ret) {
4665ff7419dSEric Auger             error_setg_errno(errp, -ret, "failed to get region %d info", i);
4670ea2730bSEric Auger             goto reg_error;
4680ea2730bSEric Auger         }
4690ea2730bSEric Auger     }
4700ea2730bSEric Auger 
47138559979SEric Auger     vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
47238559979SEric Auger                                     vfio_intp_mmap_enable, vdev);
47338559979SEric Auger 
47438559979SEric Auger     QSIMPLEQ_INIT(&vdev->pending_intp_queue);
47538559979SEric Auger 
47638559979SEric Auger     for (i = 0; i < vbasedev->num_irqs; i++) {
47738559979SEric Auger         struct vfio_irq_info irq = { .argsz = sizeof(irq) };
47838559979SEric Auger 
47938559979SEric Auger         irq.index = i;
48038559979SEric Auger         ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
48138559979SEric Auger         if (ret) {
4825ff7419dSEric Auger             error_setg_errno(errp, -ret, "failed to get device irq info");
48338559979SEric Auger             goto irq_err;
48438559979SEric Auger         } else {
48538559979SEric Auger             trace_vfio_platform_populate_interrupts(irq.index,
48638559979SEric Auger                                                     irq.count,
48738559979SEric Auger                                                     irq.flags);
4885ff7419dSEric Auger             intp = vfio_init_intp(vbasedev, irq, errp);
48938559979SEric Auger             if (!intp) {
49038559979SEric Auger                 goto irq_err;
49138559979SEric Auger             }
49238559979SEric Auger         }
49338559979SEric Auger     }
494*958609cfSZhenzhong Duan     return true;
49538559979SEric Auger irq_err:
49638559979SEric Auger     timer_del(vdev->mmap_timer);
49738559979SEric Auger     QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
49838559979SEric Auger         QLIST_REMOVE(intp, next);
49938559979SEric Auger         g_free(intp);
50038559979SEric Auger     }
5010ea2730bSEric Auger reg_error:
5020ea2730bSEric Auger     for (i = 0; i < vbasedev->num_regions; i++) {
503db0da029SAlex Williamson         if (vdev->regions[i]) {
504db0da029SAlex Williamson             vfio_region_finalize(vdev->regions[i]);
505db0da029SAlex Williamson         }
5060ea2730bSEric Auger         g_free(vdev->regions[i]);
5070ea2730bSEric Auger     }
5080ea2730bSEric Auger     g_free(vdev->regions);
509*958609cfSZhenzhong Duan     return false;
5100ea2730bSEric Auger }
5110ea2730bSEric Auger 
5120ea2730bSEric Auger /* specialized functions for VFIO Platform devices */
5130ea2730bSEric Auger static VFIODeviceOps vfio_platform_ops = {
5140ea2730bSEric Auger     .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
5150ea2730bSEric Auger     .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
51638559979SEric Auger     .vfio_eoi = vfio_platform_eoi,
5170ea2730bSEric Auger };
5180ea2730bSEric Auger 
5190ea2730bSEric Auger /**
5200ea2730bSEric Auger  * vfio_base_device_init - perform preliminary VFIO setup
5210ea2730bSEric Auger  * @vbasedev: the VFIO device handle
5229bdbfbd5SEric Auger  * @errp: error object
5230ea2730bSEric Auger  *
5240ea2730bSEric Auger  * Implement the VFIO command sequence that allows to discover
5250ea2730bSEric Auger  * assigned device resources: group extraction, device
5260ea2730bSEric Auger  * fd retrieval, resource query.
5270ea2730bSEric Auger  * Precondition: the device name must be initialized
5280ea2730bSEric Auger  */
vfio_base_device_init(VFIODevice * vbasedev,Error ** errp)529*958609cfSZhenzhong Duan static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
5300ea2730bSEric Auger {
5313016e60fSZhenzhong Duan     /* @fd takes precedence over @sysfsdev which takes precedence over @host */
5323016e60fSZhenzhong Duan     if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
5337df9381bSAlex Williamson         g_free(vbasedev->name);
5343e015d81SJulia Suvorova         vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
5353016e60fSZhenzhong Duan     } else if (vbasedev->fd < 0) {
5360ea2730bSEric Auger         if (!vbasedev->name || strchr(vbasedev->name, '/')) {
5379bdbfbd5SEric Auger             error_setg(errp, "wrong host device name");
538*958609cfSZhenzhong Duan             return false;
5390ea2730bSEric Auger         }
5400ea2730bSEric Auger 
5417df9381bSAlex Williamson         vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
5420ea2730bSEric Auger                                              vbasedev->name);
5437df9381bSAlex Williamson     }
5440ea2730bSEric Auger 
545c6c6cf91SZhenzhong Duan     if (!vfio_device_get_name(vbasedev, errp)) {
546*958609cfSZhenzhong Duan         return false;
5470ea2730bSEric Auger     }
5480ea2730bSEric Auger 
549b7754835SZhenzhong Duan     if (!vfio_attach_device(vbasedev->name, vbasedev,
550b7754835SZhenzhong Duan                             &address_space_memory, errp)) {
551*958609cfSZhenzhong Duan         return false;
5520ea2730bSEric Auger     }
5530ea2730bSEric Auger 
554*958609cfSZhenzhong Duan     if (vfio_populate_device(vbasedev, errp)) {
555*958609cfSZhenzhong Duan         return true;
556*958609cfSZhenzhong Duan     }
557*958609cfSZhenzhong Duan 
558da5ed432SEric Auger     vfio_detach_device(vbasedev);
559*958609cfSZhenzhong Duan     return false;
5600ea2730bSEric Auger }
5610ea2730bSEric Auger 
5620ea2730bSEric Auger /**
5630ea2730bSEric Auger  * vfio_platform_realize  - the device realize function
5640ea2730bSEric Auger  * @dev: device state pointer
5650ea2730bSEric Auger  * @errp: error
5660ea2730bSEric Auger  *
5670ea2730bSEric Auger  * initialize the device, its memory regions and IRQ structures
5680ea2730bSEric Auger  * IRQ are started separately
5690ea2730bSEric Auger  */
vfio_platform_realize(DeviceState * dev,Error ** errp)5700ea2730bSEric Auger static void vfio_platform_realize(DeviceState *dev, Error **errp)
5710ea2730bSEric Auger {
572498696efSZhao Liu     ERRP_GUARD();
5730ea2730bSEric Auger     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
5740ea2730bSEric Auger     SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
5750ea2730bSEric Auger     VFIODevice *vbasedev = &vdev->vbasedev;
576*958609cfSZhenzhong Duan     int i;
5770ea2730bSEric Auger 
57889202c6fSEric Auger     qemu_mutex_init(&vdev->intp_mutex);
57989202c6fSEric Auger 
5807df9381bSAlex Williamson     trace_vfio_platform_realize(vbasedev->sysfsdev ?
5817df9381bSAlex Williamson                                 vbasedev->sysfsdev : vbasedev->name,
5827df9381bSAlex Williamson                                 vdev->compat);
5830ea2730bSEric Auger 
584*958609cfSZhenzhong Duan     if (!vfio_base_device_init(vbasedev, errp)) {
585*958609cfSZhenzhong Duan         goto init_err;
5860ea2730bSEric Auger     }
5870ea2730bSEric Auger 
588a49531ebSEric Auger     if (!vdev->compat) {
589a49531ebSEric Auger         GError *gerr = NULL;
590a49531ebSEric Auger         gchar *contents;
591a49531ebSEric Auger         gsize length;
592a49531ebSEric Auger         char *path;
593a49531ebSEric Auger 
594a49531ebSEric Auger         path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
595a49531ebSEric Auger         if (!g_file_get_contents(path, &contents, &length, &gerr)) {
596a49531ebSEric Auger             error_setg(errp, "%s", gerr->message);
597a49531ebSEric Auger             g_error_free(gerr);
598a49531ebSEric Auger             g_free(path);
599a49531ebSEric Auger             return;
600a49531ebSEric Auger         }
601a49531ebSEric Auger         g_free(path);
602a49531ebSEric Auger         vdev->compat = contents;
603a49531ebSEric Auger         for (vdev->num_compat = 0; length; vdev->num_compat++) {
604a49531ebSEric Auger             size_t skip = strlen(contents) + 1;
605a49531ebSEric Auger             contents += skip;
606a49531ebSEric Auger             length -= skip;
607a49531ebSEric Auger         }
608a49531ebSEric Auger     }
609a49531ebSEric Auger 
6100ea2730bSEric Auger     for (i = 0; i < vbasedev->num_regions; i++) {
611db0da029SAlex Williamson         if (vfio_region_mmap(vdev->regions[i])) {
612e1eb292aSMarkus Armbruster             warn_report("%s mmap unsupported, performance may be slow",
613db0da029SAlex Williamson                         memory_region_name(vdev->regions[i]->mem));
614db0da029SAlex Williamson         }
615db0da029SAlex Williamson         sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
6160ea2730bSEric Auger     }
6179bdbfbd5SEric Auger     return;
6189bdbfbd5SEric Auger 
619*958609cfSZhenzhong Duan init_err:
6209bdbfbd5SEric Auger     if (vdev->vbasedev.name) {
621c3b8e3e0SMarkus Armbruster         error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
6229bdbfbd5SEric Auger     } else {
6239bdbfbd5SEric Auger         error_prepend(errp, "vfio error: ");
6249bdbfbd5SEric Auger     }
6250ea2730bSEric Auger }
6260ea2730bSEric Auger 
6270ea2730bSEric Auger static const VMStateDescription vfio_platform_vmstate = {
628da56e330SLi Qiang     .name = "vfio-platform",
6290ea2730bSEric Auger     .unmigratable = 1,
6300ea2730bSEric Auger };
6310ea2730bSEric Auger 
6320ea2730bSEric Auger static Property vfio_platform_dev_properties[] = {
6330ea2730bSEric Auger     DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
6347df9381bSAlex Williamson     DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
6355e15d79bSAlex Williamson     DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
63638559979SEric Auger     DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
63738559979SEric Auger                        mmap_timeout, 1100),
638fb5f8164SEric Auger     DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
639a6c50e1cSZhenzhong Duan #ifdef CONFIG_IOMMUFD
640a6c50e1cSZhenzhong Duan     DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd,
641a6c50e1cSZhenzhong Duan                      TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
642a6c50e1cSZhenzhong Duan #endif
6430ea2730bSEric Auger     DEFINE_PROP_END_OF_LIST(),
6440ea2730bSEric Auger };
6450ea2730bSEric Auger 
vfio_platform_instance_init(Object * obj)6463016e60fSZhenzhong Duan static void vfio_platform_instance_init(Object *obj)
6473016e60fSZhenzhong Duan {
6483016e60fSZhenzhong Duan     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
649a0cf44c8SZhenzhong Duan     VFIODevice *vbasedev = &vdev->vbasedev;
6503016e60fSZhenzhong Duan 
6516106a329SZhenzhong Duan     vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
6526106a329SZhenzhong Duan                      DEVICE(vdev), false);
6533016e60fSZhenzhong Duan }
6543016e60fSZhenzhong Duan 
6553016e60fSZhenzhong Duan #ifdef CONFIG_IOMMUFD
vfio_platform_set_fd(Object * obj,const char * str,Error ** errp)6563016e60fSZhenzhong Duan static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp)
6573016e60fSZhenzhong Duan {
6583016e60fSZhenzhong Duan     vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp);
6593016e60fSZhenzhong Duan }
6603016e60fSZhenzhong Duan #endif
6613016e60fSZhenzhong Duan 
vfio_platform_class_init(ObjectClass * klass,void * data)6620ea2730bSEric Auger static void vfio_platform_class_init(ObjectClass *klass, void *data)
6630ea2730bSEric Auger {
6640ea2730bSEric Auger     DeviceClass *dc = DEVICE_CLASS(klass);
665fb5f8164SEric Auger     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
6660ea2730bSEric Auger 
6670ea2730bSEric Auger     dc->realize = vfio_platform_realize;
6684f67d30bSMarc-André Lureau     device_class_set_props(dc, vfio_platform_dev_properties);
6693016e60fSZhenzhong Duan #ifdef CONFIG_IOMMUFD
6703016e60fSZhenzhong Duan     object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd);
6713016e60fSZhenzhong Duan #endif
6720ea2730bSEric Auger     dc->vmsd = &vfio_platform_vmstate;
6730ea2730bSEric Auger     dc->desc = "VFIO-based platform device assignment";
674fb5f8164SEric Auger     sbc->connect_irq_notifier = vfio_start_irqfd_injection;
6750ea2730bSEric Auger     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
676a49531ebSEric Auger     /* Supported by TYPE_VIRT_MACHINE */
677a49531ebSEric Auger     dc->user_creatable = true;
6780ea2730bSEric Auger }
6790ea2730bSEric Auger 
6800ea2730bSEric Auger static const TypeInfo vfio_platform_dev_info = {
6810ea2730bSEric Auger     .name = TYPE_VFIO_PLATFORM,
6820ea2730bSEric Auger     .parent = TYPE_SYS_BUS_DEVICE,
6830ea2730bSEric Auger     .instance_size = sizeof(VFIOPlatformDevice),
6843016e60fSZhenzhong Duan     .instance_init = vfio_platform_instance_init,
6850ea2730bSEric Auger     .class_init = vfio_platform_class_init,
6860ea2730bSEric Auger     .class_size = sizeof(VFIOPlatformDeviceClass),
6870ea2730bSEric Auger };
6880ea2730bSEric Auger 
register_vfio_platform_dev_type(void)6890ea2730bSEric Auger static void register_vfio_platform_dev_type(void)
6900ea2730bSEric Auger {
6910ea2730bSEric Auger     type_register_static(&vfio_platform_dev_info);
6920ea2730bSEric Auger }
6930ea2730bSEric Auger 
6940ea2730bSEric Auger type_init(register_vfio_platform_dev_type)
695