1 /** 2 * QEMU vfio-user-server server object 3 * 4 * Copyright © 2022 Oracle and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL-v2, version 2 or later. 7 * 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 /** 13 * Usage: add options: 14 * -machine x-remote,vfio-user=on,auto-shutdown=on 15 * -device <PCI-device>,id=<pci-dev-id> 16 * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>, 17 * device=<pci-dev-id> 18 * 19 * Note that x-vfio-user-server object must be used with x-remote machine only. 20 * This server could only support PCI devices for now. 21 * 22 * type - SocketAddress type - presently "unix" alone is supported. Required 23 * option 24 * 25 * path - named unix socket, it will be created by the server. It is 26 * a required option 27 * 28 * device - id of a device on the server, a required option. PCI devices 29 * alone are supported presently. 30 * 31 * notes - x-vfio-user-server could block IO and monitor during the 32 * initialization phase. 33 * 34 * When x-remote machine has the auto-shutdown property 35 * enabled (default), x-vfio-user-server terminates after the last 36 * client disconnects. Otherwise, it will continue running until 37 * explicitly killed. 38 */ 39 40 #include "qemu/osdep.h" 41 42 #include "qom/object.h" 43 #include "qom/object_interfaces.h" 44 #include "qemu/error-report.h" 45 #include "trace.h" 46 #include "sysemu/runstate.h" 47 #include "hw/boards.h" 48 #include "hw/remote/machine.h" 49 #include "qapi/error.h" 50 #include "qapi/qapi-visit-sockets.h" 51 #include "qapi/qapi-events-misc.h" 52 #include "qemu/notify.h" 53 #include "qemu/thread.h" 54 #include "qemu/main-loop.h" 55 #include "sysemu/sysemu.h" 56 #include "libvfio-user.h" 57 #include "hw/qdev-core.h" 58 #include "hw/pci/pci.h" 59 #include "qemu/timer.h" 60 #include "exec/memory.h" 61 #include "hw/pci/msi.h" 62 #include "hw/pci/msix.h" 63 #include "hw/remote/vfio-user-obj.h" 64 65 #define TYPE_VFU_OBJECT "x-vfio-user-server" 66 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) 67 68 /** 69 * VFU_OBJECT_ERROR - reports an error message. 70 * 71 * If auto_shutdown is set, it aborts the machine on error. Otherwise, 72 * it logs an error message without aborting. auto_shutdown is disabled 73 * when the server serves clients from multiple VMs; as such, an error 74 * from one VM shouldn't be able to disrupt other VM's services. 75 */ 76 #define VFU_OBJECT_ERROR(o, fmt, ...) \ 77 { \ 78 if (vfu_object_auto_shutdown()) { \ 79 error_setg(&error_abort, (fmt), ## __VA_ARGS__); \ 80 } else { \ 81 error_report((fmt), ## __VA_ARGS__); \ 82 } \ 83 } \ 84 85 struct VfuObjectClass { 86 ObjectClass parent_class; 87 88 unsigned int nr_devs; 89 }; 90 91 struct VfuObject { 92 /* private */ 93 Object parent; 94 95 SocketAddress *socket; 96 97 char *device; 98 99 Error *err; 100 101 Notifier machine_done; 102 103 vfu_ctx_t *vfu_ctx; 104 105 PCIDevice *pci_dev; 106 107 Error *unplug_blocker; 108 109 int vfu_poll_fd; 110 111 MSITriggerFunc *default_msi_trigger; 112 MSIPrepareMessageFunc *default_msi_prepare_message; 113 MSIxPrepareMessageFunc *default_msix_prepare_message; 114 }; 115 116 static void vfu_object_init_ctx(VfuObject *o, Error **errp); 117 118 static bool vfu_object_auto_shutdown(void) 119 { 120 bool auto_shutdown = true; 121 Error *local_err = NULL; 122 123 if (!current_machine) { 124 return auto_shutdown; 125 } 126 127 auto_shutdown = object_property_get_bool(OBJECT(current_machine), 128 "auto-shutdown", 129 &local_err); 130 131 /* 132 * local_err would be set if no such property exists - safe to ignore. 133 * Unlikely scenario as auto-shutdown is always defined for 134 * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with 135 * TYPE_REMOTE_MACHINE 136 */ 137 if (local_err) { 138 auto_shutdown = true; 139 error_free(local_err); 140 } 141 142 return auto_shutdown; 143 } 144 145 static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, 146 void *opaque, Error **errp) 147 { 148 VfuObject *o = VFU_OBJECT(obj); 149 150 if (o->vfu_ctx) { 151 error_setg(errp, "vfu: Unable to set socket property - server busy"); 152 return; 153 } 154 155 qapi_free_SocketAddress(o->socket); 156 157 o->socket = NULL; 158 159 visit_type_SocketAddress(v, name, &o->socket, errp); 160 161 if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { 162 error_setg(errp, "vfu: Unsupported socket type - %s", 163 SocketAddressType_str(o->socket->type)); 164 qapi_free_SocketAddress(o->socket); 165 o->socket = NULL; 166 return; 167 } 168 169 trace_vfu_prop("socket", o->socket->u.q_unix.path); 170 171 vfu_object_init_ctx(o, errp); 172 } 173 174 static void vfu_object_set_device(Object *obj, const char *str, Error **errp) 175 { 176 VfuObject *o = VFU_OBJECT(obj); 177 178 if (o->vfu_ctx) { 179 error_setg(errp, "vfu: Unable to set device property - server busy"); 180 return; 181 } 182 183 g_free(o->device); 184 185 o->device = g_strdup(str); 186 187 trace_vfu_prop("device", str); 188 189 vfu_object_init_ctx(o, errp); 190 } 191 192 static void vfu_object_ctx_run(void *opaque) 193 { 194 VfuObject *o = opaque; 195 const char *vfu_id; 196 char *vfu_path, *pci_dev_path; 197 int ret = -1; 198 199 while (ret != 0) { 200 ret = vfu_run_ctx(o->vfu_ctx); 201 if (ret < 0) { 202 if (errno == EINTR) { 203 continue; 204 } else if (errno == ENOTCONN) { 205 vfu_id = object_get_canonical_path_component(OBJECT(o)); 206 vfu_path = object_get_canonical_path(OBJECT(o)); 207 g_assert(o->pci_dev); 208 pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev)); 209 /* o->device is a required property and is non-NULL here */ 210 g_assert(o->device); 211 qapi_event_send_vfu_client_hangup(vfu_id, vfu_path, 212 o->device, pci_dev_path); 213 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); 214 o->vfu_poll_fd = -1; 215 object_unparent(OBJECT(o)); 216 g_free(vfu_path); 217 g_free(pci_dev_path); 218 break; 219 } else { 220 VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s", 221 o->device, strerror(errno)); 222 break; 223 } 224 } 225 } 226 } 227 228 static void vfu_object_attach_ctx(void *opaque) 229 { 230 VfuObject *o = opaque; 231 GPollFD pfds[1]; 232 int ret; 233 234 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); 235 236 pfds[0].fd = o->vfu_poll_fd; 237 pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; 238 239 retry_attach: 240 ret = vfu_attach_ctx(o->vfu_ctx); 241 if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { 242 /** 243 * vfu_object_attach_ctx can block QEMU's main loop 244 * during attach - the monitor and other IO 245 * could be unresponsive during this time. 246 */ 247 (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); 248 goto retry_attach; 249 } else if (ret < 0) { 250 VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s", 251 o->device, strerror(errno)); 252 return; 253 } 254 255 o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); 256 if (o->vfu_poll_fd < 0) { 257 VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device); 258 return; 259 } 260 261 qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); 262 } 263 264 static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, 265 size_t count, loff_t offset, 266 const bool is_write) 267 { 268 VfuObject *o = vfu_get_private(vfu_ctx); 269 uint32_t pci_access_width = sizeof(uint32_t); 270 size_t bytes = count; 271 uint32_t val = 0; 272 char *ptr = buf; 273 int len; 274 275 /* 276 * Writes to the BAR registers would trigger an update to the 277 * global Memory and IO AddressSpaces. But the remote device 278 * never uses the global AddressSpaces, therefore overlapping 279 * memory regions are not a problem 280 */ 281 while (bytes > 0) { 282 len = (bytes > pci_access_width) ? pci_access_width : bytes; 283 if (is_write) { 284 memcpy(&val, ptr, len); 285 pci_host_config_write_common(o->pci_dev, offset, 286 pci_config_size(o->pci_dev), 287 val, len); 288 trace_vfu_cfg_write(offset, val); 289 } else { 290 val = pci_host_config_read_common(o->pci_dev, offset, 291 pci_config_size(o->pci_dev), len); 292 memcpy(ptr, &val, len); 293 trace_vfu_cfg_read(offset, val); 294 } 295 offset += len; 296 ptr += len; 297 bytes -= len; 298 } 299 300 return count; 301 } 302 303 static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 304 { 305 VfuObject *o = vfu_get_private(vfu_ctx); 306 AddressSpace *dma_as = NULL; 307 MemoryRegion *subregion = NULL; 308 g_autofree char *name = NULL; 309 struct iovec *iov = &info->iova; 310 311 if (!info->vaddr) { 312 return; 313 } 314 315 name = g_strdup_printf("mem-%s-%"PRIx64"", o->device, 316 (uint64_t)info->vaddr); 317 318 subregion = g_new0(MemoryRegion, 1); 319 320 memory_region_init_ram_ptr(subregion, NULL, name, 321 iov->iov_len, info->vaddr); 322 323 dma_as = pci_device_iommu_address_space(o->pci_dev); 324 325 memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion); 326 327 trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len); 328 } 329 330 static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 331 { 332 VfuObject *o = vfu_get_private(vfu_ctx); 333 AddressSpace *dma_as = NULL; 334 MemoryRegion *mr = NULL; 335 ram_addr_t offset; 336 337 mr = memory_region_from_host(info->vaddr, &offset); 338 if (!mr) { 339 return; 340 } 341 342 dma_as = pci_device_iommu_address_space(o->pci_dev); 343 344 memory_region_del_subregion(dma_as->root, mr); 345 346 object_unparent((OBJECT(mr))); 347 348 trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); 349 } 350 351 static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, 352 hwaddr size, const bool is_write) 353 { 354 uint8_t *ptr = buf; 355 bool release_lock = false; 356 uint8_t *ram_ptr = NULL; 357 MemTxResult result; 358 int access_size; 359 uint64_t val; 360 361 if (memory_access_is_direct(mr, is_write)) { 362 /** 363 * Some devices expose a PCI expansion ROM, which could be buffer 364 * based as compared to other regions which are primarily based on 365 * MemoryRegionOps. memory_region_find() would already check 366 * for buffer overflow, we don't need to repeat it here. 367 */ 368 ram_ptr = memory_region_get_ram_ptr(mr); 369 370 if (is_write) { 371 memcpy((ram_ptr + offset), buf, size); 372 } else { 373 memcpy(buf, (ram_ptr + offset), size); 374 } 375 376 return 0; 377 } 378 379 while (size) { 380 /** 381 * The read/write logic used below is similar to the ones in 382 * flatview_read/write_continue() 383 */ 384 release_lock = prepare_mmio_access(mr); 385 386 access_size = memory_access_size(mr, size, offset); 387 388 if (is_write) { 389 val = ldn_he_p(ptr, access_size); 390 391 result = memory_region_dispatch_write(mr, offset, val, 392 size_memop(access_size), 393 MEMTXATTRS_UNSPECIFIED); 394 } else { 395 result = memory_region_dispatch_read(mr, offset, &val, 396 size_memop(access_size), 397 MEMTXATTRS_UNSPECIFIED); 398 399 stn_he_p(ptr, access_size, val); 400 } 401 402 if (release_lock) { 403 qemu_mutex_unlock_iothread(); 404 release_lock = false; 405 } 406 407 if (result != MEMTX_OK) { 408 return -1; 409 } 410 411 size -= access_size; 412 ptr += access_size; 413 offset += access_size; 414 } 415 416 return 0; 417 } 418 419 static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, 420 hwaddr bar_offset, char * const buf, 421 hwaddr len, const bool is_write) 422 { 423 MemoryRegionSection section = { 0 }; 424 uint8_t *ptr = (uint8_t *)buf; 425 MemoryRegion *section_mr = NULL; 426 uint64_t section_size; 427 hwaddr section_offset; 428 hwaddr size = 0; 429 430 while (len) { 431 section = memory_region_find(pci_dev->io_regions[pci_bar].memory, 432 bar_offset, len); 433 434 if (!section.mr) { 435 warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset); 436 return size; 437 } 438 439 section_mr = section.mr; 440 section_offset = section.offset_within_region; 441 section_size = int128_get64(section.size); 442 443 if (is_write && section_mr->readonly) { 444 warn_report("vfu: attempting to write to readonly region in " 445 "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", 446 pci_bar, bar_offset, 447 (bar_offset + section_size)); 448 memory_region_unref(section_mr); 449 return size; 450 } 451 452 if (vfu_object_mr_rw(section_mr, ptr, section_offset, 453 section_size, is_write)) { 454 warn_report("vfu: failed to %s " 455 "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d", 456 is_write ? "write to" : "read from", bar_offset, 457 (bar_offset + section_size), pci_bar); 458 memory_region_unref(section_mr); 459 return size; 460 } 461 462 size += section_size; 463 bar_offset += section_size; 464 ptr += section_size; 465 len -= section_size; 466 467 memory_region_unref(section_mr); 468 } 469 470 return size; 471 } 472 473 /** 474 * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. 475 * 476 * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would 477 * define vfu_object_bar2_handler 478 */ 479 #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ 480 static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ 481 char * const buf, size_t count, \ 482 loff_t offset, const bool is_write) \ 483 { \ 484 VfuObject *o = vfu_get_private(vfu_ctx); \ 485 PCIDevice *pci_dev = o->pci_dev; \ 486 \ 487 return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ 488 buf, count, is_write); \ 489 } \ 490 491 VFU_OBJECT_BAR_HANDLER(0) 492 VFU_OBJECT_BAR_HANDLER(1) 493 VFU_OBJECT_BAR_HANDLER(2) 494 VFU_OBJECT_BAR_HANDLER(3) 495 VFU_OBJECT_BAR_HANDLER(4) 496 VFU_OBJECT_BAR_HANDLER(5) 497 VFU_OBJECT_BAR_HANDLER(6) 498 499 static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { 500 &vfu_object_bar0_handler, 501 &vfu_object_bar1_handler, 502 &vfu_object_bar2_handler, 503 &vfu_object_bar3_handler, 504 &vfu_object_bar4_handler, 505 &vfu_object_bar5_handler, 506 &vfu_object_bar6_handler, 507 }; 508 509 /** 510 * vfu_object_register_bars - Identify active BAR regions of pdev and setup 511 * callbacks to handle read/write accesses 512 */ 513 static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) 514 { 515 int flags = VFU_REGION_FLAG_RW; 516 int i; 517 518 for (i = 0; i < PCI_NUM_REGIONS; i++) { 519 if (!pdev->io_regions[i].size) { 520 continue; 521 } 522 523 if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || 524 pdev->io_regions[i].memory->readonly) { 525 flags &= ~VFU_REGION_FLAG_WRITE; 526 } 527 528 vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, 529 (size_t)pdev->io_regions[i].size, 530 vfu_object_bar_handlers[i], 531 flags, NULL, 0, -1, 0); 532 533 trace_vfu_bar_register(i, pdev->io_regions[i].addr, 534 pdev->io_regions[i].size); 535 } 536 } 537 538 static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) 539 { 540 int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), 541 pci_dev->devfn); 542 543 return pci_bdf; 544 } 545 546 static void vfu_object_set_irq(void *opaque, int pirq, int level) 547 { 548 PCIBus *pci_bus = opaque; 549 PCIDevice *pci_dev = NULL; 550 vfu_ctx_t *vfu_ctx = NULL; 551 int pci_bus_num, devfn; 552 553 if (level) { 554 pci_bus_num = PCI_BUS_NUM(pirq); 555 devfn = PCI_BDF_TO_DEVFN(pirq); 556 557 /* 558 * pci_find_device() performs at O(1) if the device is attached 559 * to the root PCI bus. Whereas, if the device is attached to a 560 * secondary PCI bus (such as when a root port is involved), 561 * finding the parent PCI bus could take O(n) 562 */ 563 pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn); 564 565 vfu_ctx = pci_dev->irq_opaque; 566 567 g_assert(vfu_ctx); 568 569 vfu_irq_trigger(vfu_ctx, 0); 570 } 571 } 572 573 static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev, 574 unsigned int vector) 575 { 576 MSIMessage msg; 577 578 msg.address = 0; 579 msg.data = vector; 580 581 return msg; 582 } 583 584 static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg) 585 { 586 vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque; 587 588 vfu_irq_trigger(vfu_ctx, msg.data); 589 } 590 591 static void vfu_object_setup_msi_cbs(VfuObject *o) 592 { 593 o->default_msi_trigger = o->pci_dev->msi_trigger; 594 o->default_msi_prepare_message = o->pci_dev->msi_prepare_message; 595 o->default_msix_prepare_message = o->pci_dev->msix_prepare_message; 596 597 o->pci_dev->msi_trigger = vfu_object_msi_trigger; 598 o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg; 599 o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg; 600 } 601 602 static void vfu_object_restore_msi_cbs(VfuObject *o) 603 { 604 o->pci_dev->msi_trigger = o->default_msi_trigger; 605 o->pci_dev->msi_prepare_message = o->default_msi_prepare_message; 606 o->pci_dev->msix_prepare_message = o->default_msix_prepare_message; 607 } 608 609 static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, 610 uint32_t count, bool mask) 611 { 612 VfuObject *o = vfu_get_private(vfu_ctx); 613 uint32_t vector; 614 615 for (vector = start; vector < count; vector++) { 616 msix_set_mask(o->pci_dev, vector, mask); 617 } 618 } 619 620 static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, 621 uint32_t count, bool mask) 622 { 623 VfuObject *o = vfu_get_private(vfu_ctx); 624 Error *err = NULL; 625 uint32_t vector; 626 627 for (vector = start; vector < count; vector++) { 628 msi_set_mask(o->pci_dev, vector, mask, &err); 629 if (err) { 630 VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, 631 error_get_pretty(err)); 632 error_free(err); 633 err = NULL; 634 } 635 } 636 } 637 638 static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) 639 { 640 vfu_ctx_t *vfu_ctx = o->vfu_ctx; 641 int ret; 642 643 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 644 if (ret < 0) { 645 return ret; 646 } 647 648 if (msix_nr_vectors_allocated(pci_dev)) { 649 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, 650 msix_nr_vectors_allocated(pci_dev)); 651 vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ, 652 &vfu_msix_irq_state); 653 } else if (msi_nr_vectors_allocated(pci_dev)) { 654 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, 655 msi_nr_vectors_allocated(pci_dev)); 656 vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ, 657 &vfu_msi_irq_state); 658 } 659 660 if (ret < 0) { 661 return ret; 662 } 663 664 vfu_object_setup_msi_cbs(o); 665 666 pci_dev->irq_opaque = vfu_ctx; 667 668 return 0; 669 } 670 671 void vfu_object_set_bus_irq(PCIBus *pci_bus) 672 { 673 int bus_num = pci_bus_num(pci_bus); 674 int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1); 675 676 pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf); 677 pci_bus_map_irqs(pci_bus, vfu_object_map_irq); 678 } 679 680 static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type) 681 { 682 VfuObject *o = vfu_get_private(vfu_ctx); 683 684 /* vfu_object_ctx_run() handles lost connection */ 685 if (type == VFU_RESET_LOST_CONN) { 686 return 0; 687 } 688 689 device_cold_reset(DEVICE(o->pci_dev)); 690 691 return 0; 692 } 693 694 /* 695 * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' 696 * properties. It also depends on devices instantiated in QEMU. These 697 * dependencies are not available during the instance_init phase of this 698 * object's life-cycle. As such, the server is initialized after the 699 * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT 700 * when the machine is setup, and the dependencies are available. 701 */ 702 static void vfu_object_machine_done(Notifier *notifier, void *data) 703 { 704 VfuObject *o = container_of(notifier, VfuObject, machine_done); 705 Error *err = NULL; 706 707 vfu_object_init_ctx(o, &err); 708 709 if (err) { 710 error_propagate(&error_abort, err); 711 } 712 } 713 714 /** 715 * vfu_object_init_ctx: Create and initialize libvfio-user context. Add 716 * an unplug blocker for the associated PCI device. Setup a FD handler 717 * to process incoming messages in the context's socket. 718 * 719 * The socket and device properties are mandatory, and this function 720 * will not create the context without them - the setters for these 721 * properties should call this function when the property is set. The 722 * machine should also be ready when this function is invoked - it is 723 * because QEMU objects are initialized before devices, and the 724 * associated PCI device wouldn't be available at the object 725 * initialization time. Until these conditions are satisfied, this 726 * function would return early without performing any task. 727 */ 728 static void vfu_object_init_ctx(VfuObject *o, Error **errp) 729 { 730 DeviceState *dev = NULL; 731 vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL; 732 int ret; 733 734 if (o->vfu_ctx || !o->socket || !o->device || 735 !phase_check(PHASE_MACHINE_READY)) { 736 return; 737 } 738 739 if (o->err) { 740 error_propagate(errp, o->err); 741 o->err = NULL; 742 return; 743 } 744 745 o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 746 LIBVFIO_USER_FLAG_ATTACH_NB, 747 o, VFU_DEV_TYPE_PCI); 748 if (o->vfu_ctx == NULL) { 749 error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); 750 return; 751 } 752 753 dev = qdev_find_recursive(sysbus_get_default(), o->device); 754 if (dev == NULL) { 755 error_setg(errp, "vfu: Device %s not found", o->device); 756 goto fail; 757 } 758 759 if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { 760 error_setg(errp, "vfu: %s not a PCI device", o->device); 761 goto fail; 762 } 763 764 o->pci_dev = PCI_DEVICE(dev); 765 766 object_ref(OBJECT(o->pci_dev)); 767 768 if (pci_is_express(o->pci_dev)) { 769 pci_type = VFU_PCI_TYPE_EXPRESS; 770 } 771 772 ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0); 773 if (ret < 0) { 774 error_setg(errp, 775 "vfu: Failed to attach PCI device %s to context - %s", 776 o->device, strerror(errno)); 777 goto fail; 778 } 779 780 error_setg(&o->unplug_blocker, 781 "vfu: %s for %s must be deleted before unplugging", 782 TYPE_VFU_OBJECT, o->device); 783 qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); 784 785 ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, 786 pci_config_size(o->pci_dev), &vfu_object_cfg_access, 787 VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB, 788 NULL, 0, -1, 0); 789 if (ret < 0) { 790 error_setg(errp, 791 "vfu: Failed to setup config space handlers for %s- %s", 792 o->device, strerror(errno)); 793 goto fail; 794 } 795 796 ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister); 797 if (ret < 0) { 798 error_setg(errp, "vfu: Failed to setup DMA handlers for %s", 799 o->device); 800 goto fail; 801 } 802 803 vfu_object_register_bars(o->vfu_ctx, o->pci_dev); 804 805 ret = vfu_object_setup_irqs(o, o->pci_dev); 806 if (ret < 0) { 807 error_setg(errp, "vfu: Failed to setup interrupts for %s", 808 o->device); 809 goto fail; 810 } 811 812 ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset); 813 if (ret < 0) { 814 error_setg(errp, "vfu: Failed to setup reset callback"); 815 goto fail; 816 } 817 818 ret = vfu_realize_ctx(o->vfu_ctx); 819 if (ret < 0) { 820 error_setg(errp, "vfu: Failed to realize device %s- %s", 821 o->device, strerror(errno)); 822 goto fail; 823 } 824 825 o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); 826 if (o->vfu_poll_fd < 0) { 827 error_setg(errp, "vfu: Failed to get poll fd %s", o->device); 828 goto fail; 829 } 830 831 qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); 832 833 return; 834 835 fail: 836 vfu_destroy_ctx(o->vfu_ctx); 837 if (o->unplug_blocker && o->pci_dev) { 838 qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); 839 error_free(o->unplug_blocker); 840 o->unplug_blocker = NULL; 841 } 842 if (o->pci_dev) { 843 vfu_object_restore_msi_cbs(o); 844 o->pci_dev->irq_opaque = NULL; 845 object_unref(OBJECT(o->pci_dev)); 846 o->pci_dev = NULL; 847 } 848 o->vfu_ctx = NULL; 849 } 850 851 static void vfu_object_init(Object *obj) 852 { 853 VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); 854 VfuObject *o = VFU_OBJECT(obj); 855 856 k->nr_devs++; 857 858 if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) { 859 error_setg(&o->err, "vfu: %s only compatible with %s machine", 860 TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); 861 return; 862 } 863 864 if (!phase_check(PHASE_MACHINE_READY)) { 865 o->machine_done.notify = vfu_object_machine_done; 866 qemu_add_machine_init_done_notifier(&o->machine_done); 867 } 868 869 o->vfu_poll_fd = -1; 870 } 871 872 static void vfu_object_finalize(Object *obj) 873 { 874 VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); 875 VfuObject *o = VFU_OBJECT(obj); 876 877 k->nr_devs--; 878 879 qapi_free_SocketAddress(o->socket); 880 881 o->socket = NULL; 882 883 if (o->vfu_poll_fd != -1) { 884 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); 885 o->vfu_poll_fd = -1; 886 } 887 888 if (o->vfu_ctx) { 889 vfu_destroy_ctx(o->vfu_ctx); 890 o->vfu_ctx = NULL; 891 } 892 893 g_free(o->device); 894 895 o->device = NULL; 896 897 if (o->unplug_blocker && o->pci_dev) { 898 qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); 899 error_free(o->unplug_blocker); 900 o->unplug_blocker = NULL; 901 } 902 903 if (o->pci_dev) { 904 vfu_object_restore_msi_cbs(o); 905 o->pci_dev->irq_opaque = NULL; 906 object_unref(OBJECT(o->pci_dev)); 907 o->pci_dev = NULL; 908 } 909 910 if (!k->nr_devs && vfu_object_auto_shutdown()) { 911 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); 912 } 913 914 if (o->machine_done.notify) { 915 qemu_remove_machine_init_done_notifier(&o->machine_done); 916 o->machine_done.notify = NULL; 917 } 918 } 919 920 static void vfu_object_class_init(ObjectClass *klass, void *data) 921 { 922 VfuObjectClass *k = VFU_OBJECT_CLASS(klass); 923 924 k->nr_devs = 0; 925 926 object_class_property_add(klass, "socket", "SocketAddress", NULL, 927 vfu_object_set_socket, NULL, NULL); 928 object_class_property_set_description(klass, "socket", 929 "SocketAddress " 930 "(ex: type=unix,path=/tmp/sock). " 931 "Only UNIX is presently supported"); 932 object_class_property_add_str(klass, "device", NULL, 933 vfu_object_set_device); 934 object_class_property_set_description(klass, "device", 935 "device ID - only PCI devices " 936 "are presently supported"); 937 } 938 939 static const TypeInfo vfu_object_info = { 940 .name = TYPE_VFU_OBJECT, 941 .parent = TYPE_OBJECT, 942 .instance_size = sizeof(VfuObject), 943 .instance_init = vfu_object_init, 944 .instance_finalize = vfu_object_finalize, 945 .class_size = sizeof(VfuObjectClass), 946 .class_init = vfu_object_class_init, 947 .interfaces = (InterfaceInfo[]) { 948 { TYPE_USER_CREATABLE }, 949 { } 950 } 951 }; 952 953 static void vfu_register_types(void) 954 { 955 type_register_static(&vfu_object_info); 956 } 957 958 type_init(vfu_register_types); 959