1 /* 2 * vfio based device assignment support - platform devices 3 * 4 * Copyright Linaro Limited, 2014 5 * 6 * Authors: 7 * Kim Phillips <kim.phillips@linaro.org> 8 * Eric Auger <eric.auger@linaro.org> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2. See 11 * the COPYING file in the top-level directory. 12 * 13 * Based on vfio based PCI device assignment support: 14 * Copyright Red Hat, Inc. 2012 15 */ 16 17 #include <sys/ioctl.h> 18 #include <linux/vfio.h> 19 20 #include "hw/vfio/vfio-platform.h" 21 #include "qemu/error-report.h" 22 #include "qemu/range.h" 23 #include "sysemu/sysemu.h" 24 #include "exec/memory.h" 25 #include "qemu/queue.h" 26 #include "hw/sysbus.h" 27 #include "trace.h" 28 #include "hw/platform-bus.h" 29 30 /* 31 * Functions used whatever the injection method 32 */ 33 34 /** 35 * vfio_init_intp - allocate, initialize the IRQ struct pointer 36 * and add it into the list of IRQs 37 * @vbasedev: the VFIO device handle 38 * @info: irq info struct retrieved from VFIO driver 39 */ 40 static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, 41 struct vfio_irq_info info) 42 { 43 int ret; 44 VFIOPlatformDevice *vdev = 45 container_of(vbasedev, VFIOPlatformDevice, vbasedev); 46 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev); 47 VFIOINTp *intp; 48 49 intp = g_malloc0(sizeof(*intp)); 50 intp->vdev = vdev; 51 intp->pin = info.index; 52 intp->flags = info.flags; 53 intp->state = VFIO_IRQ_INACTIVE; 54 55 sysbus_init_irq(sbdev, &intp->qemuirq); 56 57 /* Get an eventfd for trigger */ 58 ret = event_notifier_init(&intp->interrupt, 0); 59 if (ret) { 60 g_free(intp); 61 error_report("vfio: Error: trigger event_notifier_init failed "); 62 return NULL; 63 } 64 65 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); 66 return intp; 67 } 68 69 /** 70 * vfio_set_trigger_eventfd - set VFIO eventfd handling 71 * 72 * @intp: IRQ struct handle 73 * @handler: handler to be called on eventfd signaling 74 * 75 * Setup VFIO signaling and attach an optional user-side handler 76 * to the eventfd 77 */ 78 static int vfio_set_trigger_eventfd(VFIOINTp *intp, 79 eventfd_user_side_handler_t handler) 80 { 81 VFIODevice *vbasedev = &intp->vdev->vbasedev; 82 struct vfio_irq_set *irq_set; 83 int argsz, ret; 84 int32_t *pfd; 85 86 argsz = sizeof(*irq_set) + sizeof(*pfd); 87 irq_set = g_malloc0(argsz); 88 irq_set->argsz = argsz; 89 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 90 irq_set->index = intp->pin; 91 irq_set->start = 0; 92 irq_set->count = 1; 93 pfd = (int32_t *)&irq_set->data; 94 *pfd = event_notifier_get_fd(&intp->interrupt); 95 qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp); 96 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set); 97 g_free(irq_set); 98 if (ret < 0) { 99 error_report("vfio: Failed to set trigger eventfd: %m"); 100 qemu_set_fd_handler(*pfd, NULL, NULL, NULL); 101 } 102 return ret; 103 } 104 105 /* 106 * Functions only used when eventfds are handled on user-side 107 * ie. without irqfd 108 */ 109 110 /** 111 * vfio_mmap_set_enabled - enable/disable the fast path mode 112 * @vdev: the VFIO platform device 113 * @enabled: the target mmap state 114 * 115 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP); 116 * enabled = false ~ slow path = MMIO region is trapped and region callbacks 117 * are called; slow path enables to trap the device IRQ status register reset 118 */ 119 120 static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) 121 { 122 int i; 123 124 trace_vfio_platform_mmap_set_enabled(enabled); 125 126 for (i = 0; i < vdev->vbasedev.num_regions; i++) { 127 VFIORegion *region = vdev->regions[i]; 128 129 memory_region_set_enabled(®ion->mmap_mem, enabled); 130 } 131 } 132 133 /** 134 * vfio_intp_mmap_enable - timer function, restores the fast path 135 * if there is no more active IRQ 136 * @opaque: actually points to the VFIO platform device 137 * 138 * Called on mmap timer timout, this function checks whether the 139 * IRQ is still active and if not, restores the fast path. 140 * by construction a single eventfd is handled at a time. 141 * if the IRQ is still active, the timer is re-programmed. 142 */ 143 static void vfio_intp_mmap_enable(void *opaque) 144 { 145 VFIOINTp *tmp; 146 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque; 147 148 qemu_mutex_lock(&vdev->intp_mutex); 149 QLIST_FOREACH(tmp, &vdev->intp_list, next) { 150 if (tmp->state == VFIO_IRQ_ACTIVE) { 151 trace_vfio_platform_intp_mmap_enable(tmp->pin); 152 /* re-program the timer to check active status later */ 153 timer_mod(vdev->mmap_timer, 154 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 155 vdev->mmap_timeout); 156 qemu_mutex_unlock(&vdev->intp_mutex); 157 return; 158 } 159 } 160 vfio_mmap_set_enabled(vdev, true); 161 qemu_mutex_unlock(&vdev->intp_mutex); 162 } 163 164 /** 165 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ 166 * @opaque: opaque pointer, in practice the VFIOINTp handle 167 * 168 * The function is called on a previous IRQ completion, from 169 * vfio_platform_eoi, while the intp_mutex is locked. 170 * Also in such situation, the slow path already is set and 171 * the mmap timer was already programmed. 172 */ 173 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp) 174 { 175 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin, 176 event_notifier_get_fd(&intp->interrupt)); 177 178 intp->state = VFIO_IRQ_ACTIVE; 179 180 /* trigger the virtual IRQ */ 181 qemu_set_irq(intp->qemuirq, 1); 182 } 183 184 /** 185 * vfio_intp_interrupt - The user-side eventfd handler 186 * @opaque: opaque pointer which in practice is the VFIOINTp handle 187 * 188 * the function is entered in event handler context: 189 * the vIRQ is injected into the guest if there is no other active 190 * or pending IRQ. 191 */ 192 static void vfio_intp_interrupt(VFIOINTp *intp) 193 { 194 int ret; 195 VFIOINTp *tmp; 196 VFIOPlatformDevice *vdev = intp->vdev; 197 bool delay_handling = false; 198 199 qemu_mutex_lock(&vdev->intp_mutex); 200 if (intp->state == VFIO_IRQ_INACTIVE) { 201 QLIST_FOREACH(tmp, &vdev->intp_list, next) { 202 if (tmp->state == VFIO_IRQ_ACTIVE || 203 tmp->state == VFIO_IRQ_PENDING) { 204 delay_handling = true; 205 break; 206 } 207 } 208 } 209 if (delay_handling) { 210 /* 211 * the new IRQ gets a pending status and is pushed in 212 * the pending queue 213 */ 214 intp->state = VFIO_IRQ_PENDING; 215 trace_vfio_intp_interrupt_set_pending(intp->pin); 216 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue, 217 intp, pqnext); 218 ret = event_notifier_test_and_clear(&intp->interrupt); 219 qemu_mutex_unlock(&vdev->intp_mutex); 220 return; 221 } 222 223 trace_vfio_platform_intp_interrupt(intp->pin, 224 event_notifier_get_fd(&intp->interrupt)); 225 226 ret = event_notifier_test_and_clear(&intp->interrupt); 227 if (!ret) { 228 error_report("Error when clearing fd=%d (ret = %d)\n", 229 event_notifier_get_fd(&intp->interrupt), ret); 230 } 231 232 intp->state = VFIO_IRQ_ACTIVE; 233 234 /* sets slow path */ 235 vfio_mmap_set_enabled(vdev, false); 236 237 /* trigger the virtual IRQ */ 238 qemu_set_irq(intp->qemuirq, 1); 239 240 /* 241 * Schedule the mmap timer which will restore fastpath when no IRQ 242 * is active anymore 243 */ 244 if (vdev->mmap_timeout) { 245 timer_mod(vdev->mmap_timer, 246 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 247 vdev->mmap_timeout); 248 } 249 qemu_mutex_unlock(&vdev->intp_mutex); 250 } 251 252 /** 253 * vfio_platform_eoi - IRQ completion routine 254 * @vbasedev: the VFIO device handle 255 * 256 * De-asserts the active virtual IRQ and unmasks the physical IRQ 257 * (effective for level sensitive IRQ auto-masked by the VFIO driver). 258 * Then it handles next pending IRQ if any. 259 * eoi function is called on the first access to any MMIO region 260 * after an IRQ was triggered, trapped since slow path was set. 261 * It is assumed this access corresponds to the IRQ status 262 * register reset. With such a mechanism, a single IRQ can be 263 * handled at a time since there is no way to know which IRQ 264 * was completed by the guest (we would need additional details 265 * about the IRQ status register mask). 266 */ 267 static void vfio_platform_eoi(VFIODevice *vbasedev) 268 { 269 VFIOINTp *intp; 270 VFIOPlatformDevice *vdev = 271 container_of(vbasedev, VFIOPlatformDevice, vbasedev); 272 273 qemu_mutex_lock(&vdev->intp_mutex); 274 QLIST_FOREACH(intp, &vdev->intp_list, next) { 275 if (intp->state == VFIO_IRQ_ACTIVE) { 276 trace_vfio_platform_eoi(intp->pin, 277 event_notifier_get_fd(&intp->interrupt)); 278 intp->state = VFIO_IRQ_INACTIVE; 279 280 /* deassert the virtual IRQ */ 281 qemu_set_irq(intp->qemuirq, 0); 282 283 if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) { 284 /* unmasks the physical level-sensitive IRQ */ 285 vfio_unmask_single_irqindex(vbasedev, intp->pin); 286 } 287 288 /* a single IRQ can be active at a time */ 289 break; 290 } 291 } 292 /* in case there are pending IRQs, handle the first one */ 293 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) { 294 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue); 295 vfio_intp_inject_pending_lockheld(intp); 296 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext); 297 } 298 qemu_mutex_unlock(&vdev->intp_mutex); 299 } 300 301 /** 302 * vfio_start_eventfd_injection - starts the virtual IRQ injection using 303 * user-side handled eventfds 304 * @intp: the IRQ struct pointer 305 */ 306 307 static int vfio_start_eventfd_injection(VFIOINTp *intp) 308 { 309 int ret; 310 311 ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt); 312 if (ret) { 313 error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m"); 314 } 315 return ret; 316 } 317 318 /* VFIO skeleton */ 319 320 static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) 321 { 322 vbasedev->needs_reset = true; 323 } 324 325 /* not implemented yet */ 326 static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev) 327 { 328 return -1; 329 } 330 331 /** 332 * vfio_populate_device - Allocate and populate MMIO region 333 * and IRQ structs according to driver returned information 334 * @vbasedev: the VFIO device handle 335 * 336 */ 337 static int vfio_populate_device(VFIODevice *vbasedev) 338 { 339 VFIOINTp *intp, *tmp; 340 int i, ret = -1; 341 VFIOPlatformDevice *vdev = 342 container_of(vbasedev, VFIOPlatformDevice, vbasedev); 343 344 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) { 345 error_report("vfio: Um, this isn't a platform device"); 346 return ret; 347 } 348 349 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); 350 351 for (i = 0; i < vbasedev->num_regions; i++) { 352 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; 353 VFIORegion *ptr; 354 355 vdev->regions[i] = g_malloc0(sizeof(VFIORegion)); 356 ptr = vdev->regions[i]; 357 reg_info.index = i; 358 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); 359 if (ret) { 360 error_report("vfio: Error getting region %d info: %m", i); 361 goto reg_error; 362 } 363 ptr->flags = reg_info.flags; 364 ptr->size = reg_info.size; 365 ptr->fd_offset = reg_info.offset; 366 ptr->nr = i; 367 ptr->vbasedev = vbasedev; 368 369 trace_vfio_platform_populate_regions(ptr->nr, 370 (unsigned long)ptr->flags, 371 (unsigned long)ptr->size, 372 ptr->vbasedev->fd, 373 (unsigned long)ptr->fd_offset); 374 } 375 376 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, 377 vfio_intp_mmap_enable, vdev); 378 379 QSIMPLEQ_INIT(&vdev->pending_intp_queue); 380 381 for (i = 0; i < vbasedev->num_irqs; i++) { 382 struct vfio_irq_info irq = { .argsz = sizeof(irq) }; 383 384 irq.index = i; 385 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); 386 if (ret) { 387 error_printf("vfio: error getting device %s irq info", 388 vbasedev->name); 389 goto irq_err; 390 } else { 391 trace_vfio_platform_populate_interrupts(irq.index, 392 irq.count, 393 irq.flags); 394 intp = vfio_init_intp(vbasedev, irq); 395 if (!intp) { 396 error_report("vfio: Error installing IRQ %d up", i); 397 goto irq_err; 398 } 399 } 400 } 401 return 0; 402 irq_err: 403 timer_del(vdev->mmap_timer); 404 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) { 405 QLIST_REMOVE(intp, next); 406 g_free(intp); 407 } 408 reg_error: 409 for (i = 0; i < vbasedev->num_regions; i++) { 410 g_free(vdev->regions[i]); 411 } 412 g_free(vdev->regions); 413 return ret; 414 } 415 416 /* specialized functions for VFIO Platform devices */ 417 static VFIODeviceOps vfio_platform_ops = { 418 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset, 419 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi, 420 .vfio_eoi = vfio_platform_eoi, 421 }; 422 423 /** 424 * vfio_base_device_init - perform preliminary VFIO setup 425 * @vbasedev: the VFIO device handle 426 * 427 * Implement the VFIO command sequence that allows to discover 428 * assigned device resources: group extraction, device 429 * fd retrieval, resource query. 430 * Precondition: the device name must be initialized 431 */ 432 static int vfio_base_device_init(VFIODevice *vbasedev) 433 { 434 VFIOGroup *group; 435 VFIODevice *vbasedev_iter; 436 char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; 437 ssize_t len; 438 struct stat st; 439 int groupid; 440 int ret; 441 442 /* name must be set prior to the call */ 443 if (!vbasedev->name || strchr(vbasedev->name, '/')) { 444 return -EINVAL; 445 } 446 447 /* Check that the host device exists */ 448 g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", 449 vbasedev->name); 450 451 if (stat(path, &st) < 0) { 452 error_report("vfio: error: no such host device: %s", path); 453 return -errno; 454 } 455 456 g_strlcat(path, "iommu_group", sizeof(path)); 457 len = readlink(path, iommu_group_path, sizeof(iommu_group_path)); 458 if (len < 0 || len >= sizeof(iommu_group_path)) { 459 error_report("vfio: error no iommu_group for device"); 460 return len < 0 ? -errno : -ENAMETOOLONG; 461 } 462 463 iommu_group_path[len] = 0; 464 group_name = basename(iommu_group_path); 465 466 if (sscanf(group_name, "%d", &groupid) != 1) { 467 error_report("vfio: error reading %s: %m", path); 468 return -errno; 469 } 470 471 trace_vfio_platform_base_device_init(vbasedev->name, groupid); 472 473 group = vfio_get_group(groupid, &address_space_memory); 474 if (!group) { 475 error_report("vfio: failed to get group %d", groupid); 476 return -ENOENT; 477 } 478 479 g_snprintf(path, sizeof(path), "%s", vbasedev->name); 480 481 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { 482 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { 483 error_report("vfio: error: device %s is already attached", path); 484 vfio_put_group(group); 485 return -EBUSY; 486 } 487 } 488 ret = vfio_get_device(group, path, vbasedev); 489 if (ret) { 490 error_report("vfio: failed to get device %s", path); 491 vfio_put_group(group); 492 return ret; 493 } 494 495 ret = vfio_populate_device(vbasedev); 496 if (ret) { 497 error_report("vfio: failed to populate device %s", path); 498 vfio_put_group(group); 499 } 500 501 return ret; 502 } 503 504 /** 505 * vfio_map_region - initialize the 2 memory regions for a given 506 * MMIO region index 507 * @vdev: the VFIO platform device handle 508 * @nr: the index of the region 509 * 510 * Init the top memory region and the mmapped memory region beneath 511 * VFIOPlatformDevice is used since VFIODevice is not a QOM Object 512 * and could not be passed to memory region functions 513 */ 514 static void vfio_map_region(VFIOPlatformDevice *vdev, int nr) 515 { 516 VFIORegion *region = vdev->regions[nr]; 517 uint64_t size = region->size; 518 char name[64]; 519 520 if (!size) { 521 return; 522 } 523 524 g_snprintf(name, sizeof(name), "VFIO %s region %d", 525 vdev->vbasedev.name, nr); 526 527 /* A "slow" read/write mapping underlies all regions */ 528 memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops, 529 region, name, size); 530 531 g_strlcat(name, " mmap", sizeof(name)); 532 533 if (vfio_mmap_region(OBJECT(vdev), region, ®ion->mem, 534 ®ion->mmap_mem, ®ion->mmap, size, 0, name)) { 535 error_report("%s unsupported. Performance may be slow", name); 536 } 537 } 538 539 /** 540 * vfio_platform_realize - the device realize function 541 * @dev: device state pointer 542 * @errp: error 543 * 544 * initialize the device, its memory regions and IRQ structures 545 * IRQ are started separately 546 */ 547 static void vfio_platform_realize(DeviceState *dev, Error **errp) 548 { 549 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); 550 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev); 551 VFIODevice *vbasedev = &vdev->vbasedev; 552 VFIOINTp *intp; 553 int i, ret; 554 555 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; 556 vbasedev->ops = &vfio_platform_ops; 557 558 trace_vfio_platform_realize(vbasedev->name, vdev->compat); 559 560 ret = vfio_base_device_init(vbasedev); 561 if (ret) { 562 error_setg(errp, "vfio: vfio_base_device_init failed for %s", 563 vbasedev->name); 564 return; 565 } 566 567 for (i = 0; i < vbasedev->num_regions; i++) { 568 vfio_map_region(vdev, i); 569 sysbus_init_mmio(sbdev, &vdev->regions[i]->mem); 570 } 571 572 QLIST_FOREACH(intp, &vdev->intp_list, next) { 573 vfio_start_eventfd_injection(intp); 574 } 575 } 576 577 static const VMStateDescription vfio_platform_vmstate = { 578 .name = TYPE_VFIO_PLATFORM, 579 .unmigratable = 1, 580 }; 581 582 static Property vfio_platform_dev_properties[] = { 583 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), 584 DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), 585 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, 586 mmap_timeout, 1100), 587 DEFINE_PROP_END_OF_LIST(), 588 }; 589 590 static void vfio_platform_class_init(ObjectClass *klass, void *data) 591 { 592 DeviceClass *dc = DEVICE_CLASS(klass); 593 594 dc->realize = vfio_platform_realize; 595 dc->props = vfio_platform_dev_properties; 596 dc->vmsd = &vfio_platform_vmstate; 597 dc->desc = "VFIO-based platform device assignment"; 598 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 599 } 600 601 static const TypeInfo vfio_platform_dev_info = { 602 .name = TYPE_VFIO_PLATFORM, 603 .parent = TYPE_SYS_BUS_DEVICE, 604 .instance_size = sizeof(VFIOPlatformDevice), 605 .class_init = vfio_platform_class_init, 606 .class_size = sizeof(VFIOPlatformDeviceClass), 607 .abstract = true, 608 }; 609 610 static void register_vfio_platform_dev_type(void) 611 { 612 type_register_static(&vfio_platform_dev_info); 613 } 614 615 type_init(register_vfio_platform_dev_type) 616