xref: /openbmc/qemu/hw/vfio/platform.c (revision 0bc12c4f)
1 /*
2  * vfio based device assignment support - platform devices
3  *
4  * Copyright Linaro Limited, 2014
5  *
6  * Authors:
7  *  Kim Phillips <kim.phillips@linaro.org>
8  *  Eric Auger <eric.auger@linaro.org>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  *
13  * Based on vfio based PCI device assignment support:
14  *  Copyright Red Hat, Inc. 2012
15  */
16 
17 #include <linux/vfio.h>
18 #include <sys/ioctl.h>
19 
20 #include "hw/vfio/vfio-platform.h"
21 #include "qemu/error-report.h"
22 #include "qemu/range.h"
23 #include "sysemu/sysemu.h"
24 #include "exec/memory.h"
25 #include "qemu/queue.h"
26 #include "hw/sysbus.h"
27 #include "trace.h"
28 #include "hw/platform-bus.h"
29 
30 /*
31  * Functions used whatever the injection method
32  */
33 
34 /**
35  * vfio_init_intp - allocate, initialize the IRQ struct pointer
36  * and add it into the list of IRQs
37  * @vbasedev: the VFIO device handle
38  * @info: irq info struct retrieved from VFIO driver
39  */
40 static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
41                                 struct vfio_irq_info info)
42 {
43     int ret;
44     VFIOPlatformDevice *vdev =
45         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
46     SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
47     VFIOINTp *intp;
48 
49     intp = g_malloc0(sizeof(*intp));
50     intp->vdev = vdev;
51     intp->pin = info.index;
52     intp->flags = info.flags;
53     intp->state = VFIO_IRQ_INACTIVE;
54 
55     sysbus_init_irq(sbdev, &intp->qemuirq);
56 
57     /* Get an eventfd for trigger */
58     ret = event_notifier_init(&intp->interrupt, 0);
59     if (ret) {
60         g_free(intp);
61         error_report("vfio: Error: trigger event_notifier_init failed ");
62         return NULL;
63     }
64 
65     QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
66     return intp;
67 }
68 
69 /**
70  * vfio_set_trigger_eventfd - set VFIO eventfd handling
71  *
72  * @intp: IRQ struct handle
73  * @handler: handler to be called on eventfd signaling
74  *
75  * Setup VFIO signaling and attach an optional user-side handler
76  * to the eventfd
77  */
78 static int vfio_set_trigger_eventfd(VFIOINTp *intp,
79                                     eventfd_user_side_handler_t handler)
80 {
81     VFIODevice *vbasedev = &intp->vdev->vbasedev;
82     struct vfio_irq_set *irq_set;
83     int argsz, ret;
84     int32_t *pfd;
85 
86     argsz = sizeof(*irq_set) + sizeof(*pfd);
87     irq_set = g_malloc0(argsz);
88     irq_set->argsz = argsz;
89     irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
90     irq_set->index = intp->pin;
91     irq_set->start = 0;
92     irq_set->count = 1;
93     pfd = (int32_t *)&irq_set->data;
94     *pfd = event_notifier_get_fd(&intp->interrupt);
95     qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
96     ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
97     g_free(irq_set);
98     if (ret < 0) {
99         error_report("vfio: Failed to set trigger eventfd: %m");
100         qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
101     }
102     return ret;
103 }
104 
105 /*
106  * Functions only used when eventfds are handled on user-side
107  * ie. without irqfd
108  */
109 
110 /**
111  * vfio_mmap_set_enabled - enable/disable the fast path mode
112  * @vdev: the VFIO platform device
113  * @enabled: the target mmap state
114  *
115  * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
116  * enabled = false ~ slow path = MMIO region is trapped and region callbacks
117  * are called; slow path enables to trap the device IRQ status register reset
118 */
119 
120 static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
121 {
122     int i;
123 
124     trace_vfio_platform_mmap_set_enabled(enabled);
125 
126     for (i = 0; i < vdev->vbasedev.num_regions; i++) {
127         VFIORegion *region = vdev->regions[i];
128 
129         memory_region_set_enabled(&region->mmap_mem, enabled);
130     }
131 }
132 
133 /**
134  * vfio_intp_mmap_enable - timer function, restores the fast path
135  * if there is no more active IRQ
136  * @opaque: actually points to the VFIO platform device
137  *
138  * Called on mmap timer timout, this function checks whether the
139  * IRQ is still active and if not, restores the fast path.
140  * by construction a single eventfd is handled at a time.
141  * if the IRQ is still active, the timer is re-programmed.
142  */
143 static void vfio_intp_mmap_enable(void *opaque)
144 {
145     VFIOINTp *tmp;
146     VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
147 
148     qemu_mutex_lock(&vdev->intp_mutex);
149     QLIST_FOREACH(tmp, &vdev->intp_list, next) {
150         if (tmp->state == VFIO_IRQ_ACTIVE) {
151             trace_vfio_platform_intp_mmap_enable(tmp->pin);
152             /* re-program the timer to check active status later */
153             timer_mod(vdev->mmap_timer,
154                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
155                           vdev->mmap_timeout);
156             qemu_mutex_unlock(&vdev->intp_mutex);
157             return;
158         }
159     }
160     vfio_mmap_set_enabled(vdev, true);
161     qemu_mutex_unlock(&vdev->intp_mutex);
162 }
163 
164 /**
165  * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
166  * @opaque: opaque pointer, in practice the VFIOINTp handle
167  *
168  * The function is called on a previous IRQ completion, from
169  * vfio_platform_eoi, while the intp_mutex is locked.
170  * Also in such situation, the slow path already is set and
171  * the mmap timer was already programmed.
172  */
173 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
174 {
175     trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
176                               event_notifier_get_fd(&intp->interrupt));
177 
178     intp->state = VFIO_IRQ_ACTIVE;
179 
180     /* trigger the virtual IRQ */
181     qemu_set_irq(intp->qemuirq, 1);
182 }
183 
184 /**
185  * vfio_intp_interrupt - The user-side eventfd handler
186  * @opaque: opaque pointer which in practice is the VFIOINTp handle
187  *
188  * the function is entered in event handler context:
189  * the vIRQ is injected into the guest if there is no other active
190  * or pending IRQ.
191  */
192 static void vfio_intp_interrupt(VFIOINTp *intp)
193 {
194     int ret;
195     VFIOINTp *tmp;
196     VFIOPlatformDevice *vdev = intp->vdev;
197     bool delay_handling = false;
198 
199     qemu_mutex_lock(&vdev->intp_mutex);
200     if (intp->state == VFIO_IRQ_INACTIVE) {
201         QLIST_FOREACH(tmp, &vdev->intp_list, next) {
202             if (tmp->state == VFIO_IRQ_ACTIVE ||
203                 tmp->state == VFIO_IRQ_PENDING) {
204                 delay_handling = true;
205                 break;
206             }
207         }
208     }
209     if (delay_handling) {
210         /*
211          * the new IRQ gets a pending status and is pushed in
212          * the pending queue
213          */
214         intp->state = VFIO_IRQ_PENDING;
215         trace_vfio_intp_interrupt_set_pending(intp->pin);
216         QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
217                              intp, pqnext);
218         ret = event_notifier_test_and_clear(&intp->interrupt);
219         qemu_mutex_unlock(&vdev->intp_mutex);
220         return;
221     }
222 
223     trace_vfio_platform_intp_interrupt(intp->pin,
224                               event_notifier_get_fd(&intp->interrupt));
225 
226     ret = event_notifier_test_and_clear(&intp->interrupt);
227     if (!ret) {
228         error_report("Error when clearing fd=%d (ret = %d)\n",
229                      event_notifier_get_fd(&intp->interrupt), ret);
230     }
231 
232     intp->state = VFIO_IRQ_ACTIVE;
233 
234     /* sets slow path */
235     vfio_mmap_set_enabled(vdev, false);
236 
237     /* trigger the virtual IRQ */
238     qemu_set_irq(intp->qemuirq, 1);
239 
240     /*
241      * Schedule the mmap timer which will restore fastpath when no IRQ
242      * is active anymore
243      */
244     if (vdev->mmap_timeout) {
245         timer_mod(vdev->mmap_timer,
246                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
247                       vdev->mmap_timeout);
248     }
249     qemu_mutex_unlock(&vdev->intp_mutex);
250 }
251 
252 /**
253  * vfio_platform_eoi - IRQ completion routine
254  * @vbasedev: the VFIO device handle
255  *
256  * De-asserts the active virtual IRQ and unmasks the physical IRQ
257  * (effective for level sensitive IRQ auto-masked by the  VFIO driver).
258  * Then it handles next pending IRQ if any.
259  * eoi function is called on the first access to any MMIO region
260  * after an IRQ was triggered, trapped since slow path was set.
261  * It is assumed this access corresponds to the IRQ status
262  * register reset. With such a mechanism, a single IRQ can be
263  * handled at a time since there is no way to know which IRQ
264  * was completed by the guest (we would need additional details
265  * about the IRQ status register mask).
266  */
267 static void vfio_platform_eoi(VFIODevice *vbasedev)
268 {
269     VFIOINTp *intp;
270     VFIOPlatformDevice *vdev =
271         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
272 
273     qemu_mutex_lock(&vdev->intp_mutex);
274     QLIST_FOREACH(intp, &vdev->intp_list, next) {
275         if (intp->state == VFIO_IRQ_ACTIVE) {
276             trace_vfio_platform_eoi(intp->pin,
277                                 event_notifier_get_fd(&intp->interrupt));
278             intp->state = VFIO_IRQ_INACTIVE;
279 
280             /* deassert the virtual IRQ */
281             qemu_set_irq(intp->qemuirq, 0);
282 
283             if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
284                 /* unmasks the physical level-sensitive IRQ */
285                 vfio_unmask_single_irqindex(vbasedev, intp->pin);
286             }
287 
288             /* a single IRQ can be active at a time */
289             break;
290         }
291     }
292     /* in case there are pending IRQs, handle the first one */
293     if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
294         intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
295         vfio_intp_inject_pending_lockheld(intp);
296         QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
297     }
298     qemu_mutex_unlock(&vdev->intp_mutex);
299 }
300 
301 /**
302  * vfio_start_eventfd_injection - starts the virtual IRQ injection using
303  * user-side handled eventfds
304  * @intp: the IRQ struct pointer
305  */
306 
307 static int vfio_start_eventfd_injection(VFIOINTp *intp)
308 {
309     int ret;
310 
311     ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
312     if (ret) {
313         error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
314     }
315     return ret;
316 }
317 
318 /* VFIO skeleton */
319 
320 static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
321 {
322     vbasedev->needs_reset = true;
323 }
324 
325 /* not implemented yet */
326 static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
327 {
328     return -1;
329 }
330 
331 /**
332  * vfio_populate_device - Allocate and populate MMIO region
333  * and IRQ structs according to driver returned information
334  * @vbasedev: the VFIO device handle
335  *
336  */
337 static int vfio_populate_device(VFIODevice *vbasedev)
338 {
339     VFIOINTp *intp, *tmp;
340     int i, ret = -1;
341     VFIOPlatformDevice *vdev =
342         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
343 
344     if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
345         error_report("vfio: Um, this isn't a platform device");
346         return ret;
347     }
348 
349     vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
350 
351     for (i = 0; i < vbasedev->num_regions; i++) {
352         struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
353         VFIORegion *ptr;
354 
355         vdev->regions[i] = g_malloc0(sizeof(VFIORegion));
356         ptr = vdev->regions[i];
357         reg_info.index = i;
358         ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
359         if (ret) {
360             error_report("vfio: Error getting region %d info: %m", i);
361             goto reg_error;
362         }
363         ptr->flags = reg_info.flags;
364         ptr->size = reg_info.size;
365         ptr->fd_offset = reg_info.offset;
366         ptr->nr = i;
367         ptr->vbasedev = vbasedev;
368 
369         trace_vfio_platform_populate_regions(ptr->nr,
370                             (unsigned long)ptr->flags,
371                             (unsigned long)ptr->size,
372                             ptr->vbasedev->fd,
373                             (unsigned long)ptr->fd_offset);
374     }
375 
376     vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
377                                     vfio_intp_mmap_enable, vdev);
378 
379     QSIMPLEQ_INIT(&vdev->pending_intp_queue);
380 
381     for (i = 0; i < vbasedev->num_irqs; i++) {
382         struct vfio_irq_info irq = { .argsz = sizeof(irq) };
383 
384         irq.index = i;
385         ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
386         if (ret) {
387             error_printf("vfio: error getting device %s irq info",
388                          vbasedev->name);
389             goto irq_err;
390         } else {
391             trace_vfio_platform_populate_interrupts(irq.index,
392                                                     irq.count,
393                                                     irq.flags);
394             intp = vfio_init_intp(vbasedev, irq);
395             if (!intp) {
396                 error_report("vfio: Error installing IRQ %d up", i);
397                 goto irq_err;
398             }
399         }
400     }
401     return 0;
402 irq_err:
403     timer_del(vdev->mmap_timer);
404     QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
405         QLIST_REMOVE(intp, next);
406         g_free(intp);
407     }
408 reg_error:
409     for (i = 0; i < vbasedev->num_regions; i++) {
410         g_free(vdev->regions[i]);
411     }
412     g_free(vdev->regions);
413     return ret;
414 }
415 
416 /* specialized functions for VFIO Platform devices */
417 static VFIODeviceOps vfio_platform_ops = {
418     .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
419     .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
420     .vfio_eoi = vfio_platform_eoi,
421 };
422 
423 /**
424  * vfio_base_device_init - perform preliminary VFIO setup
425  * @vbasedev: the VFIO device handle
426  *
427  * Implement the VFIO command sequence that allows to discover
428  * assigned device resources: group extraction, device
429  * fd retrieval, resource query.
430  * Precondition: the device name must be initialized
431  */
432 static int vfio_base_device_init(VFIODevice *vbasedev)
433 {
434     VFIOGroup *group;
435     VFIODevice *vbasedev_iter;
436     char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
437     ssize_t len;
438     struct stat st;
439     int groupid;
440     int ret;
441 
442     /* name must be set prior to the call */
443     if (!vbasedev->name || strchr(vbasedev->name, '/')) {
444         return -EINVAL;
445     }
446 
447     /* Check that the host device exists */
448     g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/",
449                vbasedev->name);
450 
451     if (stat(path, &st) < 0) {
452         error_report("vfio: error: no such host device: %s", path);
453         return -errno;
454     }
455 
456     g_strlcat(path, "iommu_group", sizeof(path));
457     len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
458     if (len < 0 || len >= sizeof(iommu_group_path)) {
459         error_report("vfio: error no iommu_group for device");
460         return len < 0 ? -errno : -ENAMETOOLONG;
461     }
462 
463     iommu_group_path[len] = 0;
464     group_name = basename(iommu_group_path);
465 
466     if (sscanf(group_name, "%d", &groupid) != 1) {
467         error_report("vfio: error reading %s: %m", path);
468         return -errno;
469     }
470 
471     trace_vfio_platform_base_device_init(vbasedev->name, groupid);
472 
473     group = vfio_get_group(groupid, &address_space_memory);
474     if (!group) {
475         error_report("vfio: failed to get group %d", groupid);
476         return -ENOENT;
477     }
478 
479     g_snprintf(path, sizeof(path), "%s", vbasedev->name);
480 
481     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
482         if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
483             error_report("vfio: error: device %s is already attached", path);
484             vfio_put_group(group);
485             return -EBUSY;
486         }
487     }
488     ret = vfio_get_device(group, path, vbasedev);
489     if (ret) {
490         error_report("vfio: failed to get device %s", path);
491         vfio_put_group(group);
492         return ret;
493     }
494 
495     ret = vfio_populate_device(vbasedev);
496     if (ret) {
497         error_report("vfio: failed to populate device %s", path);
498         vfio_put_group(group);
499     }
500 
501     return ret;
502 }
503 
504 /**
505  * vfio_map_region - initialize the 2 memory regions for a given
506  * MMIO region index
507  * @vdev: the VFIO platform device handle
508  * @nr: the index of the region
509  *
510  * Init the top memory region and the mmapped memory region beneath
511  * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
512  * and could not be passed to memory region functions
513 */
514 static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
515 {
516     VFIORegion *region = vdev->regions[nr];
517     uint64_t size = region->size;
518     char name[64];
519 
520     if (!size) {
521         return;
522     }
523 
524     g_snprintf(name, sizeof(name), "VFIO %s region %d",
525                vdev->vbasedev.name, nr);
526 
527     /* A "slow" read/write mapping underlies all regions */
528     memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
529                           region, name, size);
530 
531     g_strlcat(name, " mmap", sizeof(name));
532 
533     if (vfio_mmap_region(OBJECT(vdev), region, &region->mem,
534                          &region->mmap_mem, &region->mmap, size, 0, name)) {
535         error_report("%s unsupported. Performance may be slow", name);
536     }
537 }
538 
539 /**
540  * vfio_platform_realize  - the device realize function
541  * @dev: device state pointer
542  * @errp: error
543  *
544  * initialize the device, its memory regions and IRQ structures
545  * IRQ are started separately
546  */
547 static void vfio_platform_realize(DeviceState *dev, Error **errp)
548 {
549     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
550     SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
551     VFIODevice *vbasedev = &vdev->vbasedev;
552     VFIOINTp *intp;
553     int i, ret;
554 
555     vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
556     vbasedev->ops = &vfio_platform_ops;
557 
558     trace_vfio_platform_realize(vbasedev->name, vdev->compat);
559 
560     ret = vfio_base_device_init(vbasedev);
561     if (ret) {
562         error_setg(errp, "vfio: vfio_base_device_init failed for %s",
563                    vbasedev->name);
564         return;
565     }
566 
567     for (i = 0; i < vbasedev->num_regions; i++) {
568         vfio_map_region(vdev, i);
569         sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
570     }
571 
572     QLIST_FOREACH(intp, &vdev->intp_list, next) {
573         vfio_start_eventfd_injection(intp);
574     }
575 }
576 
577 static const VMStateDescription vfio_platform_vmstate = {
578     .name = TYPE_VFIO_PLATFORM,
579     .unmigratable = 1,
580 };
581 
582 static Property vfio_platform_dev_properties[] = {
583     DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
584     DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true),
585     DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
586                        mmap_timeout, 1100),
587     DEFINE_PROP_END_OF_LIST(),
588 };
589 
590 static void vfio_platform_class_init(ObjectClass *klass, void *data)
591 {
592     DeviceClass *dc = DEVICE_CLASS(klass);
593 
594     dc->realize = vfio_platform_realize;
595     dc->props = vfio_platform_dev_properties;
596     dc->vmsd = &vfio_platform_vmstate;
597     dc->desc = "VFIO-based platform device assignment";
598     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
599 }
600 
601 static const TypeInfo vfio_platform_dev_info = {
602     .name = TYPE_VFIO_PLATFORM,
603     .parent = TYPE_SYS_BUS_DEVICE,
604     .instance_size = sizeof(VFIOPlatformDevice),
605     .class_init = vfio_platform_class_init,
606     .class_size = sizeof(VFIOPlatformDeviceClass),
607     .abstract   = true,
608 };
609 
610 static void register_vfio_platform_dev_type(void)
611 {
612     type_register_static(&vfio_platform_dev_info);
613 }
614 
615 type_init(register_vfio_platform_dev_type)
616