10ea2730bSEric Auger /*
20ea2730bSEric Auger * vfio based device assignment support - platform devices
30ea2730bSEric Auger *
40ea2730bSEric Auger * Copyright Linaro Limited, 2014
50ea2730bSEric Auger *
60ea2730bSEric Auger * Authors:
70ea2730bSEric Auger * Kim Phillips <kim.phillips@linaro.org>
80ea2730bSEric Auger * Eric Auger <eric.auger@linaro.org>
90ea2730bSEric Auger *
100ea2730bSEric Auger * This work is licensed under the terms of the GNU GPL, version 2. See
110ea2730bSEric Auger * the COPYING file in the top-level directory.
120ea2730bSEric Auger *
130ea2730bSEric Auger * Based on vfio based PCI device assignment support:
140ea2730bSEric Auger * Copyright Red Hat, Inc. 2012
150ea2730bSEric Auger */
160ea2730bSEric Auger
17c6eacb1aSPeter Maydell #include "qemu/osdep.h"
18a6c50e1cSZhenzhong Duan #include CONFIG_DEVICES /* CONFIG_IOMMUFD */
19da34e65cSMarkus Armbruster #include "qapi/error.h"
200ea2730bSEric Auger #include <sys/ioctl.h>
21e2075277SLeon Alrae #include <linux/vfio.h>
220ea2730bSEric Auger
230ea2730bSEric Auger #include "hw/vfio/vfio-platform.h"
24a6c50e1cSZhenzhong Duan #include "sysemu/iommufd.h"
25d6454270SMarkus Armbruster #include "migration/vmstate.h"
260ea2730bSEric Auger #include "qemu/error-report.h"
276e8a355dSDaniel Brodsky #include "qemu/lockable.h"
28db725815SMarkus Armbruster #include "qemu/main-loop.h"
290b8fa32fSMarkus Armbruster #include "qemu/module.h"
300ea2730bSEric Auger #include "qemu/range.h"
310ea2730bSEric Auger #include "exec/memory.h"
32d791937fSPhilippe Mathieu-Daudé #include "exec/address-spaces.h"
3338559979SEric Auger #include "qemu/queue.h"
340ea2730bSEric Auger #include "hw/sysbus.h"
350ea2730bSEric Auger #include "trace.h"
3664552b6bSMarkus Armbruster #include "hw/irq.h"
370ea2730bSEric Auger #include "hw/platform-bus.h"
38a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h"
39fb5f8164SEric Auger #include "sysemu/kvm.h"
400ea2730bSEric Auger
4138559979SEric Auger /*
4238559979SEric Auger * Functions used whatever the injection method
4338559979SEric Auger */
4438559979SEric Auger
vfio_irq_is_automasked(VFIOINTp * intp)45a5b39cd3SEric Auger static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
46a5b39cd3SEric Auger {
47a5b39cd3SEric Auger return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
48a5b39cd3SEric Auger }
49a5b39cd3SEric Auger
5038559979SEric Auger /**
5138559979SEric Auger * vfio_init_intp - allocate, initialize the IRQ struct pointer
5238559979SEric Auger * and add it into the list of IRQs
5338559979SEric Auger * @vbasedev: the VFIO device handle
5438559979SEric Auger * @info: irq info struct retrieved from VFIO driver
555ff7419dSEric Auger * @errp: error object
5638559979SEric Auger */
vfio_init_intp(VFIODevice * vbasedev,struct vfio_irq_info info,Error ** errp)5738559979SEric Auger static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
585ff7419dSEric Auger struct vfio_irq_info info, Error **errp)
5938559979SEric Auger {
6038559979SEric Auger int ret;
6138559979SEric Auger VFIOPlatformDevice *vdev =
6238559979SEric Auger container_of(vbasedev, VFIOPlatformDevice, vbasedev);
6338559979SEric Auger SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
6438559979SEric Auger VFIOINTp *intp;
6538559979SEric Auger
6638559979SEric Auger intp = g_malloc0(sizeof(*intp));
6738559979SEric Auger intp->vdev = vdev;
6838559979SEric Auger intp->pin = info.index;
6938559979SEric Auger intp->flags = info.flags;
7038559979SEric Auger intp->state = VFIO_IRQ_INACTIVE;
71fb5f8164SEric Auger intp->kvm_accel = false;
7238559979SEric Auger
7338559979SEric Auger sysbus_init_irq(sbdev, &intp->qemuirq);
7438559979SEric Auger
7538559979SEric Auger /* Get an eventfd for trigger */
76b21e2380SMarkus Armbruster intp->interrupt = g_new0(EventNotifier, 1);
77a22313deSEric Auger ret = event_notifier_init(intp->interrupt, 0);
7838559979SEric Auger if (ret) {
79a22313deSEric Auger g_free(intp->interrupt);
8038559979SEric Auger g_free(intp);
815ff7419dSEric Auger error_setg_errno(errp, -ret,
82bf04ef35SLi Qiang "failed to initialize trigger eventfd notifier");
8338559979SEric Auger return NULL;
8438559979SEric Auger }
85a5b39cd3SEric Auger if (vfio_irq_is_automasked(intp)) {
86fb5f8164SEric Auger /* Get an eventfd for resample/unmask */
87b21e2380SMarkus Armbruster intp->unmask = g_new0(EventNotifier, 1);
88a22313deSEric Auger ret = event_notifier_init(intp->unmask, 0);
89fb5f8164SEric Auger if (ret) {
90a22313deSEric Auger g_free(intp->interrupt);
91a22313deSEric Auger g_free(intp->unmask);
92fb5f8164SEric Auger g_free(intp);
935ff7419dSEric Auger error_setg_errno(errp, -ret,
94bf04ef35SLi Qiang "failed to initialize resample eventfd notifier");
95fb5f8164SEric Auger return NULL;
96fb5f8164SEric Auger }
97a5b39cd3SEric Auger }
9838559979SEric Auger
9938559979SEric Auger QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
10038559979SEric Auger return intp;
10138559979SEric Auger }
10238559979SEric Auger
10338559979SEric Auger /**
10438559979SEric Auger * vfio_set_trigger_eventfd - set VFIO eventfd handling
10538559979SEric Auger *
10638559979SEric Auger * @intp: IRQ struct handle
10738559979SEric Auger * @handler: handler to be called on eventfd signaling
10838559979SEric Auger *
10938559979SEric Auger * Setup VFIO signaling and attach an optional user-side handler
11038559979SEric Auger * to the eventfd
11138559979SEric Auger */
vfio_set_trigger_eventfd(VFIOINTp * intp,eventfd_user_side_handler_t handler)11238559979SEric Auger static int vfio_set_trigger_eventfd(VFIOINTp *intp,
11338559979SEric Auger eventfd_user_side_handler_t handler)
11438559979SEric Auger {
11538559979SEric Auger VFIODevice *vbasedev = &intp->vdev->vbasedev;
116201a7331SEric Auger int32_t fd = event_notifier_get_fd(intp->interrupt);
117201a7331SEric Auger Error *err = NULL;
11838559979SEric Auger
119201a7331SEric Auger qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp);
120201a7331SEric Auger
12184e37d02SZhenzhong Duan if (!vfio_set_irq_signaling(vbasedev, intp->pin, 0,
12284e37d02SZhenzhong Duan VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
123201a7331SEric Auger error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
124201a7331SEric Auger qemu_set_fd_handler(fd, NULL, NULL, NULL);
12584e37d02SZhenzhong Duan return -EINVAL;
12638559979SEric Auger }
127201a7331SEric Auger
12884e37d02SZhenzhong Duan return 0;
12938559979SEric Auger }
13038559979SEric Auger
13138559979SEric Auger /*
13238559979SEric Auger * Functions only used when eventfds are handled on user-side
13338559979SEric Auger * ie. without irqfd
13438559979SEric Auger */
13538559979SEric Auger
13638559979SEric Auger /**
13738559979SEric Auger * vfio_mmap_set_enabled - enable/disable the fast path mode
13838559979SEric Auger * @vdev: the VFIO platform device
13938559979SEric Auger * @enabled: the target mmap state
14038559979SEric Auger *
14138559979SEric Auger * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
14238559979SEric Auger * enabled = false ~ slow path = MMIO region is trapped and region callbacks
14338559979SEric Auger * are called; slow path enables to trap the device IRQ status register reset
14438559979SEric Auger */
14538559979SEric Auger
vfio_mmap_set_enabled(VFIOPlatformDevice * vdev,bool enabled)14638559979SEric Auger static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
14738559979SEric Auger {
14838559979SEric Auger int i;
14938559979SEric Auger
15038559979SEric Auger for (i = 0; i < vdev->vbasedev.num_regions; i++) {
151db0da029SAlex Williamson vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
15238559979SEric Auger }
15338559979SEric Auger }
15438559979SEric Auger
15538559979SEric Auger /**
15638559979SEric Auger * vfio_intp_mmap_enable - timer function, restores the fast path
15738559979SEric Auger * if there is no more active IRQ
15838559979SEric Auger * @opaque: actually points to the VFIO platform device
15938559979SEric Auger *
160631ba5a1SCai Huoqing * Called on mmap timer timeout, this function checks whether the
16138559979SEric Auger * IRQ is still active and if not, restores the fast path.
16238559979SEric Auger * by construction a single eventfd is handled at a time.
16338559979SEric Auger * if the IRQ is still active, the timer is re-programmed.
16438559979SEric Auger */
vfio_intp_mmap_enable(void * opaque)16538559979SEric Auger static void vfio_intp_mmap_enable(void *opaque)
16638559979SEric Auger {
16738559979SEric Auger VFIOINTp *tmp;
16838559979SEric Auger VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
16938559979SEric Auger
17088eef597SAmey Narkhede QEMU_LOCK_GUARD(&vdev->intp_mutex);
17138559979SEric Auger QLIST_FOREACH(tmp, &vdev->intp_list, next) {
17238559979SEric Auger if (tmp->state == VFIO_IRQ_ACTIVE) {
17338559979SEric Auger trace_vfio_platform_intp_mmap_enable(tmp->pin);
17438559979SEric Auger /* re-program the timer to check active status later */
17538559979SEric Auger timer_mod(vdev->mmap_timer,
17638559979SEric Auger qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
17738559979SEric Auger vdev->mmap_timeout);
17838559979SEric Auger return;
17938559979SEric Auger }
18038559979SEric Auger }
18138559979SEric Auger vfio_mmap_set_enabled(vdev, true);
18238559979SEric Auger }
18338559979SEric Auger
18438559979SEric Auger /**
18538559979SEric Auger * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
18638559979SEric Auger * @opaque: opaque pointer, in practice the VFIOINTp handle
18738559979SEric Auger *
18838559979SEric Auger * The function is called on a previous IRQ completion, from
18938559979SEric Auger * vfio_platform_eoi, while the intp_mutex is locked.
19038559979SEric Auger * Also in such situation, the slow path already is set and
19138559979SEric Auger * the mmap timer was already programmed.
19238559979SEric Auger */
vfio_intp_inject_pending_lockheld(VFIOINTp * intp)19338559979SEric Auger static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
19438559979SEric Auger {
19538559979SEric Auger trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
196a22313deSEric Auger event_notifier_get_fd(intp->interrupt));
19738559979SEric Auger
19838559979SEric Auger intp->state = VFIO_IRQ_ACTIVE;
19938559979SEric Auger
20038559979SEric Auger /* trigger the virtual IRQ */
20138559979SEric Auger qemu_set_irq(intp->qemuirq, 1);
20238559979SEric Auger }
20338559979SEric Auger
20438559979SEric Auger /**
20538559979SEric Auger * vfio_intp_interrupt - The user-side eventfd handler
20638559979SEric Auger * @opaque: opaque pointer which in practice is the VFIOINTp handle
20738559979SEric Auger *
20838559979SEric Auger * the function is entered in event handler context:
20938559979SEric Auger * the vIRQ is injected into the guest if there is no other active
21038559979SEric Auger * or pending IRQ.
21138559979SEric Auger */
vfio_intp_interrupt(VFIOINTp * intp)21238559979SEric Auger static void vfio_intp_interrupt(VFIOINTp *intp)
21338559979SEric Auger {
21438559979SEric Auger int ret;
21538559979SEric Auger VFIOINTp *tmp;
21638559979SEric Auger VFIOPlatformDevice *vdev = intp->vdev;
21738559979SEric Auger bool delay_handling = false;
21838559979SEric Auger
2196e8a355dSDaniel Brodsky QEMU_LOCK_GUARD(&vdev->intp_mutex);
22038559979SEric Auger if (intp->state == VFIO_IRQ_INACTIVE) {
22138559979SEric Auger QLIST_FOREACH(tmp, &vdev->intp_list, next) {
22238559979SEric Auger if (tmp->state == VFIO_IRQ_ACTIVE ||
22338559979SEric Auger tmp->state == VFIO_IRQ_PENDING) {
22438559979SEric Auger delay_handling = true;
22538559979SEric Auger break;
22638559979SEric Auger }
22738559979SEric Auger }
22838559979SEric Auger }
22938559979SEric Auger if (delay_handling) {
23038559979SEric Auger /*
23138559979SEric Auger * the new IRQ gets a pending status and is pushed in
23238559979SEric Auger * the pending queue
23338559979SEric Auger */
23438559979SEric Auger intp->state = VFIO_IRQ_PENDING;
23538559979SEric Auger trace_vfio_intp_interrupt_set_pending(intp->pin);
23638559979SEric Auger QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
23738559979SEric Auger intp, pqnext);
2389b83b004SChen Qun event_notifier_test_and_clear(intp->interrupt);
23938559979SEric Auger return;
24038559979SEric Auger }
24138559979SEric Auger
24238559979SEric Auger trace_vfio_platform_intp_interrupt(intp->pin,
243a22313deSEric Auger event_notifier_get_fd(intp->interrupt));
24438559979SEric Auger
245a22313deSEric Auger ret = event_notifier_test_and_clear(intp->interrupt);
24638559979SEric Auger if (!ret) {
247594fd211SJohn Snow error_report("Error when clearing fd=%d (ret = %d)",
248a22313deSEric Auger event_notifier_get_fd(intp->interrupt), ret);
24938559979SEric Auger }
25038559979SEric Auger
25138559979SEric Auger intp->state = VFIO_IRQ_ACTIVE;
25238559979SEric Auger
25338559979SEric Auger /* sets slow path */
25438559979SEric Auger vfio_mmap_set_enabled(vdev, false);
25538559979SEric Auger
25638559979SEric Auger /* trigger the virtual IRQ */
25738559979SEric Auger qemu_set_irq(intp->qemuirq, 1);
25838559979SEric Auger
25938559979SEric Auger /*
26038559979SEric Auger * Schedule the mmap timer which will restore fastpath when no IRQ
26138559979SEric Auger * is active anymore
26238559979SEric Auger */
26338559979SEric Auger if (vdev->mmap_timeout) {
26438559979SEric Auger timer_mod(vdev->mmap_timer,
26538559979SEric Auger qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
26638559979SEric Auger vdev->mmap_timeout);
26738559979SEric Auger }
26838559979SEric Auger }
26938559979SEric Auger
27038559979SEric Auger /**
27138559979SEric Auger * vfio_platform_eoi - IRQ completion routine
27238559979SEric Auger * @vbasedev: the VFIO device handle
27338559979SEric Auger *
27438559979SEric Auger * De-asserts the active virtual IRQ and unmasks the physical IRQ
27538559979SEric Auger * (effective for level sensitive IRQ auto-masked by the VFIO driver).
27638559979SEric Auger * Then it handles next pending IRQ if any.
27738559979SEric Auger * eoi function is called on the first access to any MMIO region
27838559979SEric Auger * after an IRQ was triggered, trapped since slow path was set.
27938559979SEric Auger * It is assumed this access corresponds to the IRQ status
28038559979SEric Auger * register reset. With such a mechanism, a single IRQ can be
28138559979SEric Auger * handled at a time since there is no way to know which IRQ
28238559979SEric Auger * was completed by the guest (we would need additional details
28338559979SEric Auger * about the IRQ status register mask).
28438559979SEric Auger */
vfio_platform_eoi(VFIODevice * vbasedev)28538559979SEric Auger static void vfio_platform_eoi(VFIODevice *vbasedev)
28638559979SEric Auger {
28738559979SEric Auger VFIOINTp *intp;
28838559979SEric Auger VFIOPlatformDevice *vdev =
28938559979SEric Auger container_of(vbasedev, VFIOPlatformDevice, vbasedev);
29038559979SEric Auger
29188eef597SAmey Narkhede QEMU_LOCK_GUARD(&vdev->intp_mutex);
29238559979SEric Auger QLIST_FOREACH(intp, &vdev->intp_list, next) {
29338559979SEric Auger if (intp->state == VFIO_IRQ_ACTIVE) {
29438559979SEric Auger trace_vfio_platform_eoi(intp->pin,
295a22313deSEric Auger event_notifier_get_fd(intp->interrupt));
29638559979SEric Auger intp->state = VFIO_IRQ_INACTIVE;
29738559979SEric Auger
29838559979SEric Auger /* deassert the virtual IRQ */
29938559979SEric Auger qemu_set_irq(intp->qemuirq, 0);
30038559979SEric Auger
301a5b39cd3SEric Auger if (vfio_irq_is_automasked(intp)) {
30238559979SEric Auger /* unmasks the physical level-sensitive IRQ */
30338559979SEric Auger vfio_unmask_single_irqindex(vbasedev, intp->pin);
30438559979SEric Auger }
30538559979SEric Auger
30638559979SEric Auger /* a single IRQ can be active at a time */
30738559979SEric Auger break;
30838559979SEric Auger }
30938559979SEric Auger }
31038559979SEric Auger /* in case there are pending IRQs, handle the first one */
31138559979SEric Auger if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
31238559979SEric Auger intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
31338559979SEric Auger vfio_intp_inject_pending_lockheld(intp);
31438559979SEric Auger QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
31538559979SEric Auger }
31638559979SEric Auger }
31738559979SEric Auger
31838559979SEric Auger /**
31938559979SEric Auger * vfio_start_eventfd_injection - starts the virtual IRQ injection using
32038559979SEric Auger * user-side handled eventfds
32158892b44SEric Auger * @sbdev: the sysbus device handle
32258892b44SEric Auger * @irq: the qemu irq handle
32338559979SEric Auger */
32438559979SEric Auger
vfio_start_eventfd_injection(SysBusDevice * sbdev,qemu_irq irq)32558892b44SEric Auger static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
32638559979SEric Auger {
32758892b44SEric Auger VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
32858892b44SEric Auger VFIOINTp *intp;
32958892b44SEric Auger
33058892b44SEric Auger QLIST_FOREACH(intp, &vdev->intp_list, next) {
33158892b44SEric Auger if (intp->qemuirq == irq) {
33258892b44SEric Auger break;
33358892b44SEric Auger }
33458892b44SEric Auger }
33558892b44SEric Auger assert(intp);
33638559979SEric Auger
337201a7331SEric Auger if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) {
33858892b44SEric Auger abort();
33938559979SEric Auger }
34038559979SEric Auger }
34138559979SEric Auger
342fb5f8164SEric Auger /*
343fb5f8164SEric Auger * Functions used for irqfd
344fb5f8164SEric Auger */
345fb5f8164SEric Auger
346fb5f8164SEric Auger /**
347fb5f8164SEric Auger * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
348fb5f8164SEric Auger * @intp: the IRQ struct handle
349fb5f8164SEric Auger * programs the VFIO driver to unmask this IRQ when the
350fb5f8164SEric Auger * intp->unmask eventfd is triggered
351fb5f8164SEric Auger */
vfio_set_resample_eventfd(VFIOINTp * intp)352fb5f8164SEric Auger static int vfio_set_resample_eventfd(VFIOINTp *intp)
353fb5f8164SEric Auger {
354201a7331SEric Auger int32_t fd = event_notifier_get_fd(intp->unmask);
355fb5f8164SEric Auger VFIODevice *vbasedev = &intp->vdev->vbasedev;
356201a7331SEric Auger Error *err = NULL;
357fb5f8164SEric Auger
358201a7331SEric Auger qemu_set_fd_handler(fd, NULL, NULL, NULL);
35984e37d02SZhenzhong Duan if (!vfio_set_irq_signaling(vbasedev, intp->pin, 0,
36084e37d02SZhenzhong Duan VFIO_IRQ_SET_ACTION_UNMASK, fd, &err)) {
361201a7331SEric Auger error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
36284e37d02SZhenzhong Duan return -EINVAL;
363fb5f8164SEric Auger }
36484e37d02SZhenzhong Duan return 0;
365fb5f8164SEric Auger }
366fb5f8164SEric Auger
36758892b44SEric Auger /**
36858892b44SEric Auger * vfio_start_irqfd_injection - starts the virtual IRQ injection using
36958892b44SEric Auger * irqfd
37058892b44SEric Auger *
37158892b44SEric Auger * @sbdev: the sysbus device handle
37258892b44SEric Auger * @irq: the qemu irq handle
37358892b44SEric Auger *
37458892b44SEric Auger * In case the irqfd setup fails, we fallback to userspace handled eventfd
37558892b44SEric Auger */
vfio_start_irqfd_injection(SysBusDevice * sbdev,qemu_irq irq)376fb5f8164SEric Auger static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
377fb5f8164SEric Auger {
378fb5f8164SEric Auger VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
379fb5f8164SEric Auger VFIOINTp *intp;
380fb5f8164SEric Auger
381fb5f8164SEric Auger if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
382fb5f8164SEric Auger !vdev->irqfd_allowed) {
38358892b44SEric Auger goto fail_irqfd;
384fb5f8164SEric Auger }
385fb5f8164SEric Auger
386fb5f8164SEric Auger QLIST_FOREACH(intp, &vdev->intp_list, next) {
387fb5f8164SEric Auger if (intp->qemuirq == irq) {
388fb5f8164SEric Auger break;
389fb5f8164SEric Auger }
390fb5f8164SEric Auger }
391fb5f8164SEric Auger assert(intp);
392fb5f8164SEric Auger
393a22313deSEric Auger if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
394a22313deSEric Auger intp->unmask, irq) < 0) {
395fb5f8164SEric Auger goto fail_irqfd;
396fb5f8164SEric Auger }
397fb5f8164SEric Auger
398fb5f8164SEric Auger if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
399fb5f8164SEric Auger goto fail_vfio;
400fb5f8164SEric Auger }
401a5b39cd3SEric Auger if (vfio_irq_is_automasked(intp)) {
402fb5f8164SEric Auger if (vfio_set_resample_eventfd(intp) < 0) {
403fb5f8164SEric Auger goto fail_vfio;
404fb5f8164SEric Auger }
405a5b39cd3SEric Auger trace_vfio_platform_start_level_irqfd_injection(intp->pin,
406a5b39cd3SEric Auger event_notifier_get_fd(intp->interrupt),
407a5b39cd3SEric Auger event_notifier_get_fd(intp->unmask));
408a5b39cd3SEric Auger } else {
409a5b39cd3SEric Auger trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
410a5b39cd3SEric Auger event_notifier_get_fd(intp->interrupt));
411a5b39cd3SEric Auger }
412fb5f8164SEric Auger
413fb5f8164SEric Auger intp->kvm_accel = true;
414fb5f8164SEric Auger
415fb5f8164SEric Auger return;
416fb5f8164SEric Auger fail_vfio:
417a22313deSEric Auger kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
41858892b44SEric Auger abort();
419fb5f8164SEric Auger fail_irqfd:
42058892b44SEric Auger vfio_start_eventfd_injection(sbdev, irq);
421fb5f8164SEric Auger return;
422fb5f8164SEric Auger }
423fb5f8164SEric Auger
4240ea2730bSEric Auger /* VFIO skeleton */
4250ea2730bSEric Auger
vfio_platform_compute_needs_reset(VFIODevice * vbasedev)4260ea2730bSEric Auger static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
4270ea2730bSEric Auger {
4280ea2730bSEric Auger vbasedev->needs_reset = true;
4290ea2730bSEric Auger }
4300ea2730bSEric Auger
4310ea2730bSEric Auger /* not implemented yet */
vfio_platform_hot_reset_multi(VFIODevice * vbasedev)4320ea2730bSEric Auger static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
4330ea2730bSEric Auger {
4340ea2730bSEric Auger return -1;
4350ea2730bSEric Auger }
4360ea2730bSEric Auger
4370ea2730bSEric Auger /**
4380ea2730bSEric Auger * vfio_populate_device - Allocate and populate MMIO region
43938559979SEric Auger * and IRQ structs according to driver returned information
4400ea2730bSEric Auger * @vbasedev: the VFIO device handle
4415ff7419dSEric Auger * @errp: error object
4420ea2730bSEric Auger *
4430ea2730bSEric Auger */
vfio_populate_device(VFIODevice * vbasedev,Error ** errp)444*958609cfSZhenzhong Duan static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp)
4450ea2730bSEric Auger {
44638559979SEric Auger VFIOINTp *intp, *tmp;
4470ea2730bSEric Auger int i, ret = -1;
4480ea2730bSEric Auger VFIOPlatformDevice *vdev =
4490ea2730bSEric Auger container_of(vbasedev, VFIOPlatformDevice, vbasedev);
4500ea2730bSEric Auger
4510ea2730bSEric Auger if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
4525ff7419dSEric Auger error_setg(errp, "this isn't a platform device");
453*958609cfSZhenzhong Duan return false;
4540ea2730bSEric Auger }
4550ea2730bSEric Auger
4560b70743dSEric Auger vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
4570ea2730bSEric Auger
4580ea2730bSEric Auger for (i = 0; i < vbasedev->num_regions; i++) {
459db0da029SAlex Williamson char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
4600ea2730bSEric Auger
461bdd81addSMarkus Armbruster vdev->regions[i] = g_new0(VFIORegion, 1);
462db0da029SAlex Williamson ret = vfio_region_setup(OBJECT(vdev), vbasedev,
463db0da029SAlex Williamson vdev->regions[i], i, name);
464db0da029SAlex Williamson g_free(name);
4650ea2730bSEric Auger if (ret) {
4665ff7419dSEric Auger error_setg_errno(errp, -ret, "failed to get region %d info", i);
4670ea2730bSEric Auger goto reg_error;
4680ea2730bSEric Auger }
4690ea2730bSEric Auger }
4700ea2730bSEric Auger
47138559979SEric Auger vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
47238559979SEric Auger vfio_intp_mmap_enable, vdev);
47338559979SEric Auger
47438559979SEric Auger QSIMPLEQ_INIT(&vdev->pending_intp_queue);
47538559979SEric Auger
47638559979SEric Auger for (i = 0; i < vbasedev->num_irqs; i++) {
47738559979SEric Auger struct vfio_irq_info irq = { .argsz = sizeof(irq) };
47838559979SEric Auger
47938559979SEric Auger irq.index = i;
48038559979SEric Auger ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
48138559979SEric Auger if (ret) {
4825ff7419dSEric Auger error_setg_errno(errp, -ret, "failed to get device irq info");
48338559979SEric Auger goto irq_err;
48438559979SEric Auger } else {
48538559979SEric Auger trace_vfio_platform_populate_interrupts(irq.index,
48638559979SEric Auger irq.count,
48738559979SEric Auger irq.flags);
4885ff7419dSEric Auger intp = vfio_init_intp(vbasedev, irq, errp);
48938559979SEric Auger if (!intp) {
49038559979SEric Auger goto irq_err;
49138559979SEric Auger }
49238559979SEric Auger }
49338559979SEric Auger }
494*958609cfSZhenzhong Duan return true;
49538559979SEric Auger irq_err:
49638559979SEric Auger timer_del(vdev->mmap_timer);
49738559979SEric Auger QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
49838559979SEric Auger QLIST_REMOVE(intp, next);
49938559979SEric Auger g_free(intp);
50038559979SEric Auger }
5010ea2730bSEric Auger reg_error:
5020ea2730bSEric Auger for (i = 0; i < vbasedev->num_regions; i++) {
503db0da029SAlex Williamson if (vdev->regions[i]) {
504db0da029SAlex Williamson vfio_region_finalize(vdev->regions[i]);
505db0da029SAlex Williamson }
5060ea2730bSEric Auger g_free(vdev->regions[i]);
5070ea2730bSEric Auger }
5080ea2730bSEric Auger g_free(vdev->regions);
509*958609cfSZhenzhong Duan return false;
5100ea2730bSEric Auger }
5110ea2730bSEric Auger
5120ea2730bSEric Auger /* specialized functions for VFIO Platform devices */
5130ea2730bSEric Auger static VFIODeviceOps vfio_platform_ops = {
5140ea2730bSEric Auger .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
5150ea2730bSEric Auger .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
51638559979SEric Auger .vfio_eoi = vfio_platform_eoi,
5170ea2730bSEric Auger };
5180ea2730bSEric Auger
5190ea2730bSEric Auger /**
5200ea2730bSEric Auger * vfio_base_device_init - perform preliminary VFIO setup
5210ea2730bSEric Auger * @vbasedev: the VFIO device handle
5229bdbfbd5SEric Auger * @errp: error object
5230ea2730bSEric Auger *
5240ea2730bSEric Auger * Implement the VFIO command sequence that allows to discover
5250ea2730bSEric Auger * assigned device resources: group extraction, device
5260ea2730bSEric Auger * fd retrieval, resource query.
5270ea2730bSEric Auger * Precondition: the device name must be initialized
5280ea2730bSEric Auger */
vfio_base_device_init(VFIODevice * vbasedev,Error ** errp)529*958609cfSZhenzhong Duan static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
5300ea2730bSEric Auger {
5313016e60fSZhenzhong Duan /* @fd takes precedence over @sysfsdev which takes precedence over @host */
5323016e60fSZhenzhong Duan if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
5337df9381bSAlex Williamson g_free(vbasedev->name);
5343e015d81SJulia Suvorova vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
5353016e60fSZhenzhong Duan } else if (vbasedev->fd < 0) {
5360ea2730bSEric Auger if (!vbasedev->name || strchr(vbasedev->name, '/')) {
5379bdbfbd5SEric Auger error_setg(errp, "wrong host device name");
538*958609cfSZhenzhong Duan return false;
5390ea2730bSEric Auger }
5400ea2730bSEric Auger
5417df9381bSAlex Williamson vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
5420ea2730bSEric Auger vbasedev->name);
5437df9381bSAlex Williamson }
5440ea2730bSEric Auger
545c6c6cf91SZhenzhong Duan if (!vfio_device_get_name(vbasedev, errp)) {
546*958609cfSZhenzhong Duan return false;
5470ea2730bSEric Auger }
5480ea2730bSEric Auger
549b7754835SZhenzhong Duan if (!vfio_attach_device(vbasedev->name, vbasedev,
550b7754835SZhenzhong Duan &address_space_memory, errp)) {
551*958609cfSZhenzhong Duan return false;
5520ea2730bSEric Auger }
5530ea2730bSEric Auger
554*958609cfSZhenzhong Duan if (vfio_populate_device(vbasedev, errp)) {
555*958609cfSZhenzhong Duan return true;
556*958609cfSZhenzhong Duan }
557*958609cfSZhenzhong Duan
558da5ed432SEric Auger vfio_detach_device(vbasedev);
559*958609cfSZhenzhong Duan return false;
5600ea2730bSEric Auger }
5610ea2730bSEric Auger
5620ea2730bSEric Auger /**
5630ea2730bSEric Auger * vfio_platform_realize - the device realize function
5640ea2730bSEric Auger * @dev: device state pointer
5650ea2730bSEric Auger * @errp: error
5660ea2730bSEric Auger *
5670ea2730bSEric Auger * initialize the device, its memory regions and IRQ structures
5680ea2730bSEric Auger * IRQ are started separately
5690ea2730bSEric Auger */
vfio_platform_realize(DeviceState * dev,Error ** errp)5700ea2730bSEric Auger static void vfio_platform_realize(DeviceState *dev, Error **errp)
5710ea2730bSEric Auger {
572498696efSZhao Liu ERRP_GUARD();
5730ea2730bSEric Auger VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
5740ea2730bSEric Auger SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
5750ea2730bSEric Auger VFIODevice *vbasedev = &vdev->vbasedev;
576*958609cfSZhenzhong Duan int i;
5770ea2730bSEric Auger
57889202c6fSEric Auger qemu_mutex_init(&vdev->intp_mutex);
57989202c6fSEric Auger
5807df9381bSAlex Williamson trace_vfio_platform_realize(vbasedev->sysfsdev ?
5817df9381bSAlex Williamson vbasedev->sysfsdev : vbasedev->name,
5827df9381bSAlex Williamson vdev->compat);
5830ea2730bSEric Auger
584*958609cfSZhenzhong Duan if (!vfio_base_device_init(vbasedev, errp)) {
585*958609cfSZhenzhong Duan goto init_err;
5860ea2730bSEric Auger }
5870ea2730bSEric Auger
588a49531ebSEric Auger if (!vdev->compat) {
589a49531ebSEric Auger GError *gerr = NULL;
590a49531ebSEric Auger gchar *contents;
591a49531ebSEric Auger gsize length;
592a49531ebSEric Auger char *path;
593a49531ebSEric Auger
594a49531ebSEric Auger path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
595a49531ebSEric Auger if (!g_file_get_contents(path, &contents, &length, &gerr)) {
596a49531ebSEric Auger error_setg(errp, "%s", gerr->message);
597a49531ebSEric Auger g_error_free(gerr);
598a49531ebSEric Auger g_free(path);
599a49531ebSEric Auger return;
600a49531ebSEric Auger }
601a49531ebSEric Auger g_free(path);
602a49531ebSEric Auger vdev->compat = contents;
603a49531ebSEric Auger for (vdev->num_compat = 0; length; vdev->num_compat++) {
604a49531ebSEric Auger size_t skip = strlen(contents) + 1;
605a49531ebSEric Auger contents += skip;
606a49531ebSEric Auger length -= skip;
607a49531ebSEric Auger }
608a49531ebSEric Auger }
609a49531ebSEric Auger
6100ea2730bSEric Auger for (i = 0; i < vbasedev->num_regions; i++) {
611db0da029SAlex Williamson if (vfio_region_mmap(vdev->regions[i])) {
612e1eb292aSMarkus Armbruster warn_report("%s mmap unsupported, performance may be slow",
613db0da029SAlex Williamson memory_region_name(vdev->regions[i]->mem));
614db0da029SAlex Williamson }
615db0da029SAlex Williamson sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
6160ea2730bSEric Auger }
6179bdbfbd5SEric Auger return;
6189bdbfbd5SEric Auger
619*958609cfSZhenzhong Duan init_err:
6209bdbfbd5SEric Auger if (vdev->vbasedev.name) {
621c3b8e3e0SMarkus Armbruster error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
6229bdbfbd5SEric Auger } else {
6239bdbfbd5SEric Auger error_prepend(errp, "vfio error: ");
6249bdbfbd5SEric Auger }
6250ea2730bSEric Auger }
6260ea2730bSEric Auger
6270ea2730bSEric Auger static const VMStateDescription vfio_platform_vmstate = {
628da56e330SLi Qiang .name = "vfio-platform",
6290ea2730bSEric Auger .unmigratable = 1,
6300ea2730bSEric Auger };
6310ea2730bSEric Auger
6320ea2730bSEric Auger static Property vfio_platform_dev_properties[] = {
6330ea2730bSEric Auger DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
6347df9381bSAlex Williamson DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
6355e15d79bSAlex Williamson DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
63638559979SEric Auger DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
63738559979SEric Auger mmap_timeout, 1100),
638fb5f8164SEric Auger DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
639a6c50e1cSZhenzhong Duan #ifdef CONFIG_IOMMUFD
640a6c50e1cSZhenzhong Duan DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd,
641a6c50e1cSZhenzhong Duan TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
642a6c50e1cSZhenzhong Duan #endif
6430ea2730bSEric Auger DEFINE_PROP_END_OF_LIST(),
6440ea2730bSEric Auger };
6450ea2730bSEric Auger
vfio_platform_instance_init(Object * obj)6463016e60fSZhenzhong Duan static void vfio_platform_instance_init(Object *obj)
6473016e60fSZhenzhong Duan {
6483016e60fSZhenzhong Duan VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
649a0cf44c8SZhenzhong Duan VFIODevice *vbasedev = &vdev->vbasedev;
6503016e60fSZhenzhong Duan
6516106a329SZhenzhong Duan vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
6526106a329SZhenzhong Duan DEVICE(vdev), false);
6533016e60fSZhenzhong Duan }
6543016e60fSZhenzhong Duan
6553016e60fSZhenzhong Duan #ifdef CONFIG_IOMMUFD
vfio_platform_set_fd(Object * obj,const char * str,Error ** errp)6563016e60fSZhenzhong Duan static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp)
6573016e60fSZhenzhong Duan {
6583016e60fSZhenzhong Duan vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp);
6593016e60fSZhenzhong Duan }
6603016e60fSZhenzhong Duan #endif
6613016e60fSZhenzhong Duan
vfio_platform_class_init(ObjectClass * klass,void * data)6620ea2730bSEric Auger static void vfio_platform_class_init(ObjectClass *klass, void *data)
6630ea2730bSEric Auger {
6640ea2730bSEric Auger DeviceClass *dc = DEVICE_CLASS(klass);
665fb5f8164SEric Auger SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
6660ea2730bSEric Auger
6670ea2730bSEric Auger dc->realize = vfio_platform_realize;
6684f67d30bSMarc-André Lureau device_class_set_props(dc, vfio_platform_dev_properties);
6693016e60fSZhenzhong Duan #ifdef CONFIG_IOMMUFD
6703016e60fSZhenzhong Duan object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd);
6713016e60fSZhenzhong Duan #endif
6720ea2730bSEric Auger dc->vmsd = &vfio_platform_vmstate;
6730ea2730bSEric Auger dc->desc = "VFIO-based platform device assignment";
674fb5f8164SEric Auger sbc->connect_irq_notifier = vfio_start_irqfd_injection;
6750ea2730bSEric Auger set_bit(DEVICE_CATEGORY_MISC, dc->categories);
676a49531ebSEric Auger /* Supported by TYPE_VIRT_MACHINE */
677a49531ebSEric Auger dc->user_creatable = true;
6780ea2730bSEric Auger }
6790ea2730bSEric Auger
6800ea2730bSEric Auger static const TypeInfo vfio_platform_dev_info = {
6810ea2730bSEric Auger .name = TYPE_VFIO_PLATFORM,
6820ea2730bSEric Auger .parent = TYPE_SYS_BUS_DEVICE,
6830ea2730bSEric Auger .instance_size = sizeof(VFIOPlatformDevice),
6843016e60fSZhenzhong Duan .instance_init = vfio_platform_instance_init,
6850ea2730bSEric Auger .class_init = vfio_platform_class_init,
6860ea2730bSEric Auger .class_size = sizeof(VFIOPlatformDeviceClass),
6870ea2730bSEric Auger };
6880ea2730bSEric Auger
register_vfio_platform_dev_type(void)6890ea2730bSEric Auger static void register_vfio_platform_dev_type(void)
6900ea2730bSEric Auger {
6910ea2730bSEric Auger type_register_static(&vfio_platform_dev_info);
6920ea2730bSEric Auger }
6930ea2730bSEric Auger
6940ea2730bSEric Auger type_init(register_vfio_platform_dev_type)
695