1 /** 2 * QEMU vfio-user-server server object 3 * 4 * Copyright © 2022 Oracle and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL-v2, version 2 or later. 7 * 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 /** 13 * Usage: add options: 14 * -machine x-remote,vfio-user=on,auto-shutdown=on 15 * -device <PCI-device>,id=<pci-dev-id> 16 * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>, 17 * device=<pci-dev-id> 18 * 19 * Note that x-vfio-user-server object must be used with x-remote machine only. 20 * This server could only support PCI devices for now. 21 * 22 * type - SocketAddress type - presently "unix" alone is supported. Required 23 * option 24 * 25 * path - named unix socket, it will be created by the server. It is 26 * a required option 27 * 28 * device - id of a device on the server, a required option. PCI devices 29 * alone are supported presently. 30 * 31 * notes - x-vfio-user-server could block IO and monitor during the 32 * initialization phase. 33 */ 34 35 #include "qemu/osdep.h" 36 37 #include "qom/object.h" 38 #include "qom/object_interfaces.h" 39 #include "qemu/error-report.h" 40 #include "trace.h" 41 #include "sysemu/runstate.h" 42 #include "hw/boards.h" 43 #include "hw/remote/machine.h" 44 #include "qapi/error.h" 45 #include "qapi/qapi-visit-sockets.h" 46 #include "qapi/qapi-events-misc.h" 47 #include "qemu/notify.h" 48 #include "qemu/thread.h" 49 #include "qemu/main-loop.h" 50 #include "sysemu/sysemu.h" 51 #include "libvfio-user.h" 52 #include "hw/qdev-core.h" 53 #include "hw/pci/pci.h" 54 #include "qemu/timer.h" 55 #include "exec/memory.h" 56 #include "hw/pci/msi.h" 57 #include "hw/pci/msix.h" 58 #include "hw/remote/vfio-user-obj.h" 59 60 #define TYPE_VFU_OBJECT "x-vfio-user-server" 61 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) 62 63 /** 64 * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown 65 * is set, it aborts the machine on error. Otherwise, it logs an 66 * error message without aborting. 67 */ 68 #define VFU_OBJECT_ERROR(o, fmt, ...) \ 69 { \ 70 if (vfu_object_auto_shutdown()) { \ 71 error_setg(&error_abort, (fmt), ## __VA_ARGS__); \ 72 } else { \ 73 error_report((fmt), ## __VA_ARGS__); \ 74 } \ 75 } \ 76 77 struct VfuObjectClass { 78 ObjectClass parent_class; 79 80 unsigned int nr_devs; 81 }; 82 83 struct VfuObject { 84 /* private */ 85 Object parent; 86 87 SocketAddress *socket; 88 89 char *device; 90 91 Error *err; 92 93 Notifier machine_done; 94 95 vfu_ctx_t *vfu_ctx; 96 97 PCIDevice *pci_dev; 98 99 Error *unplug_blocker; 100 101 int vfu_poll_fd; 102 103 MSITriggerFunc *default_msi_trigger; 104 MSIPrepareMessageFunc *default_msi_prepare_message; 105 MSIxPrepareMessageFunc *default_msix_prepare_message; 106 }; 107 108 static void vfu_object_init_ctx(VfuObject *o, Error **errp); 109 110 static bool vfu_object_auto_shutdown(void) 111 { 112 bool auto_shutdown = true; 113 Error *local_err = NULL; 114 115 if (!current_machine) { 116 return auto_shutdown; 117 } 118 119 auto_shutdown = object_property_get_bool(OBJECT(current_machine), 120 "auto-shutdown", 121 &local_err); 122 123 /* 124 * local_err would be set if no such property exists - safe to ignore. 125 * Unlikely scenario as auto-shutdown is always defined for 126 * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with 127 * TYPE_REMOTE_MACHINE 128 */ 129 if (local_err) { 130 auto_shutdown = true; 131 error_free(local_err); 132 } 133 134 return auto_shutdown; 135 } 136 137 static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, 138 void *opaque, Error **errp) 139 { 140 VfuObject *o = VFU_OBJECT(obj); 141 142 if (o->vfu_ctx) { 143 error_setg(errp, "vfu: Unable to set socket property - server busy"); 144 return; 145 } 146 147 qapi_free_SocketAddress(o->socket); 148 149 o->socket = NULL; 150 151 visit_type_SocketAddress(v, name, &o->socket, errp); 152 153 if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { 154 error_setg(errp, "vfu: Unsupported socket type - %s", 155 SocketAddressType_str(o->socket->type)); 156 qapi_free_SocketAddress(o->socket); 157 o->socket = NULL; 158 return; 159 } 160 161 trace_vfu_prop("socket", o->socket->u.q_unix.path); 162 163 vfu_object_init_ctx(o, errp); 164 } 165 166 static void vfu_object_set_device(Object *obj, const char *str, Error **errp) 167 { 168 VfuObject *o = VFU_OBJECT(obj); 169 170 if (o->vfu_ctx) { 171 error_setg(errp, "vfu: Unable to set device property - server busy"); 172 return; 173 } 174 175 g_free(o->device); 176 177 o->device = g_strdup(str); 178 179 trace_vfu_prop("device", str); 180 181 vfu_object_init_ctx(o, errp); 182 } 183 184 static void vfu_object_ctx_run(void *opaque) 185 { 186 VfuObject *o = opaque; 187 const char *vfu_id; 188 char *vfu_path, *pci_dev_path; 189 int ret = -1; 190 191 while (ret != 0) { 192 ret = vfu_run_ctx(o->vfu_ctx); 193 if (ret < 0) { 194 if (errno == EINTR) { 195 continue; 196 } else if (errno == ENOTCONN) { 197 vfu_id = object_get_canonical_path_component(OBJECT(o)); 198 vfu_path = object_get_canonical_path(OBJECT(o)); 199 g_assert(o->pci_dev); 200 pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev)); 201 /* o->device is a required property and is non-NULL here */ 202 g_assert(o->device); 203 qapi_event_send_vfu_client_hangup(vfu_id, vfu_path, 204 o->device, pci_dev_path); 205 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); 206 o->vfu_poll_fd = -1; 207 object_unparent(OBJECT(o)); 208 g_free(vfu_path); 209 g_free(pci_dev_path); 210 break; 211 } else { 212 VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s", 213 o->device, strerror(errno)); 214 break; 215 } 216 } 217 } 218 } 219 220 static void vfu_object_attach_ctx(void *opaque) 221 { 222 VfuObject *o = opaque; 223 GPollFD pfds[1]; 224 int ret; 225 226 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); 227 228 pfds[0].fd = o->vfu_poll_fd; 229 pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; 230 231 retry_attach: 232 ret = vfu_attach_ctx(o->vfu_ctx); 233 if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { 234 /** 235 * vfu_object_attach_ctx can block QEMU's main loop 236 * during attach - the monitor and other IO 237 * could be unresponsive during this time. 238 */ 239 (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); 240 goto retry_attach; 241 } else if (ret < 0) { 242 VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s", 243 o->device, strerror(errno)); 244 return; 245 } 246 247 o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); 248 if (o->vfu_poll_fd < 0) { 249 VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device); 250 return; 251 } 252 253 qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); 254 } 255 256 static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, 257 size_t count, loff_t offset, 258 const bool is_write) 259 { 260 VfuObject *o = vfu_get_private(vfu_ctx); 261 uint32_t pci_access_width = sizeof(uint32_t); 262 size_t bytes = count; 263 uint32_t val = 0; 264 char *ptr = buf; 265 int len; 266 267 /* 268 * Writes to the BAR registers would trigger an update to the 269 * global Memory and IO AddressSpaces. But the remote device 270 * never uses the global AddressSpaces, therefore overlapping 271 * memory regions are not a problem 272 */ 273 while (bytes > 0) { 274 len = (bytes > pci_access_width) ? pci_access_width : bytes; 275 if (is_write) { 276 memcpy(&val, ptr, len); 277 pci_host_config_write_common(o->pci_dev, offset, 278 pci_config_size(o->pci_dev), 279 val, len); 280 trace_vfu_cfg_write(offset, val); 281 } else { 282 val = pci_host_config_read_common(o->pci_dev, offset, 283 pci_config_size(o->pci_dev), len); 284 memcpy(ptr, &val, len); 285 trace_vfu_cfg_read(offset, val); 286 } 287 offset += len; 288 ptr += len; 289 bytes -= len; 290 } 291 292 return count; 293 } 294 295 static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 296 { 297 VfuObject *o = vfu_get_private(vfu_ctx); 298 AddressSpace *dma_as = NULL; 299 MemoryRegion *subregion = NULL; 300 g_autofree char *name = NULL; 301 struct iovec *iov = &info->iova; 302 303 if (!info->vaddr) { 304 return; 305 } 306 307 name = g_strdup_printf("mem-%s-%"PRIx64"", o->device, 308 (uint64_t)info->vaddr); 309 310 subregion = g_new0(MemoryRegion, 1); 311 312 memory_region_init_ram_ptr(subregion, NULL, name, 313 iov->iov_len, info->vaddr); 314 315 dma_as = pci_device_iommu_address_space(o->pci_dev); 316 317 memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion); 318 319 trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len); 320 } 321 322 static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 323 { 324 VfuObject *o = vfu_get_private(vfu_ctx); 325 AddressSpace *dma_as = NULL; 326 MemoryRegion *mr = NULL; 327 ram_addr_t offset; 328 329 mr = memory_region_from_host(info->vaddr, &offset); 330 if (!mr) { 331 return; 332 } 333 334 dma_as = pci_device_iommu_address_space(o->pci_dev); 335 336 memory_region_del_subregion(dma_as->root, mr); 337 338 object_unparent((OBJECT(mr))); 339 340 trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); 341 } 342 343 static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, 344 hwaddr size, const bool is_write) 345 { 346 uint8_t *ptr = buf; 347 bool release_lock = false; 348 uint8_t *ram_ptr = NULL; 349 MemTxResult result; 350 int access_size; 351 uint64_t val; 352 353 if (memory_access_is_direct(mr, is_write)) { 354 /** 355 * Some devices expose a PCI expansion ROM, which could be buffer 356 * based as compared to other regions which are primarily based on 357 * MemoryRegionOps. memory_region_find() would already check 358 * for buffer overflow, we don't need to repeat it here. 359 */ 360 ram_ptr = memory_region_get_ram_ptr(mr); 361 362 if (is_write) { 363 memcpy((ram_ptr + offset), buf, size); 364 } else { 365 memcpy(buf, (ram_ptr + offset), size); 366 } 367 368 return 0; 369 } 370 371 while (size) { 372 /** 373 * The read/write logic used below is similar to the ones in 374 * flatview_read/write_continue() 375 */ 376 release_lock = prepare_mmio_access(mr); 377 378 access_size = memory_access_size(mr, size, offset); 379 380 if (is_write) { 381 val = ldn_he_p(ptr, access_size); 382 383 result = memory_region_dispatch_write(mr, offset, val, 384 size_memop(access_size), 385 MEMTXATTRS_UNSPECIFIED); 386 } else { 387 result = memory_region_dispatch_read(mr, offset, &val, 388 size_memop(access_size), 389 MEMTXATTRS_UNSPECIFIED); 390 391 stn_he_p(ptr, access_size, val); 392 } 393 394 if (release_lock) { 395 qemu_mutex_unlock_iothread(); 396 release_lock = false; 397 } 398 399 if (result != MEMTX_OK) { 400 return -1; 401 } 402 403 size -= access_size; 404 ptr += access_size; 405 offset += access_size; 406 } 407 408 return 0; 409 } 410 411 static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, 412 hwaddr bar_offset, char * const buf, 413 hwaddr len, const bool is_write) 414 { 415 MemoryRegionSection section = { 0 }; 416 uint8_t *ptr = (uint8_t *)buf; 417 MemoryRegion *section_mr = NULL; 418 uint64_t section_size; 419 hwaddr section_offset; 420 hwaddr size = 0; 421 422 while (len) { 423 section = memory_region_find(pci_dev->io_regions[pci_bar].memory, 424 bar_offset, len); 425 426 if (!section.mr) { 427 warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset); 428 return size; 429 } 430 431 section_mr = section.mr; 432 section_offset = section.offset_within_region; 433 section_size = int128_get64(section.size); 434 435 if (is_write && section_mr->readonly) { 436 warn_report("vfu: attempting to write to readonly region in " 437 "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", 438 pci_bar, bar_offset, 439 (bar_offset + section_size)); 440 memory_region_unref(section_mr); 441 return size; 442 } 443 444 if (vfu_object_mr_rw(section_mr, ptr, section_offset, 445 section_size, is_write)) { 446 warn_report("vfu: failed to %s " 447 "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d", 448 is_write ? "write to" : "read from", bar_offset, 449 (bar_offset + section_size), pci_bar); 450 memory_region_unref(section_mr); 451 return size; 452 } 453 454 size += section_size; 455 bar_offset += section_size; 456 ptr += section_size; 457 len -= section_size; 458 459 memory_region_unref(section_mr); 460 } 461 462 return size; 463 } 464 465 /** 466 * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. 467 * 468 * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would 469 * define vfu_object_bar2_handler 470 */ 471 #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ 472 static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ 473 char * const buf, size_t count, \ 474 loff_t offset, const bool is_write) \ 475 { \ 476 VfuObject *o = vfu_get_private(vfu_ctx); \ 477 PCIDevice *pci_dev = o->pci_dev; \ 478 \ 479 return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ 480 buf, count, is_write); \ 481 } \ 482 483 VFU_OBJECT_BAR_HANDLER(0) 484 VFU_OBJECT_BAR_HANDLER(1) 485 VFU_OBJECT_BAR_HANDLER(2) 486 VFU_OBJECT_BAR_HANDLER(3) 487 VFU_OBJECT_BAR_HANDLER(4) 488 VFU_OBJECT_BAR_HANDLER(5) 489 VFU_OBJECT_BAR_HANDLER(6) 490 491 static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { 492 &vfu_object_bar0_handler, 493 &vfu_object_bar1_handler, 494 &vfu_object_bar2_handler, 495 &vfu_object_bar3_handler, 496 &vfu_object_bar4_handler, 497 &vfu_object_bar5_handler, 498 &vfu_object_bar6_handler, 499 }; 500 501 /** 502 * vfu_object_register_bars - Identify active BAR regions of pdev and setup 503 * callbacks to handle read/write accesses 504 */ 505 static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) 506 { 507 int flags = VFU_REGION_FLAG_RW; 508 int i; 509 510 for (i = 0; i < PCI_NUM_REGIONS; i++) { 511 if (!pdev->io_regions[i].size) { 512 continue; 513 } 514 515 if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || 516 pdev->io_regions[i].memory->readonly) { 517 flags &= ~VFU_REGION_FLAG_WRITE; 518 } 519 520 vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, 521 (size_t)pdev->io_regions[i].size, 522 vfu_object_bar_handlers[i], 523 flags, NULL, 0, -1, 0); 524 525 trace_vfu_bar_register(i, pdev->io_regions[i].addr, 526 pdev->io_regions[i].size); 527 } 528 } 529 530 static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) 531 { 532 int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), 533 pci_dev->devfn); 534 535 return pci_bdf; 536 } 537 538 static void vfu_object_set_irq(void *opaque, int pirq, int level) 539 { 540 PCIBus *pci_bus = opaque; 541 PCIDevice *pci_dev = NULL; 542 vfu_ctx_t *vfu_ctx = NULL; 543 int pci_bus_num, devfn; 544 545 if (level) { 546 pci_bus_num = PCI_BUS_NUM(pirq); 547 devfn = PCI_BDF_TO_DEVFN(pirq); 548 549 /* 550 * pci_find_device() performs at O(1) if the device is attached 551 * to the root PCI bus. Whereas, if the device is attached to a 552 * secondary PCI bus (such as when a root port is involved), 553 * finding the parent PCI bus could take O(n) 554 */ 555 pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn); 556 557 vfu_ctx = pci_dev->irq_opaque; 558 559 g_assert(vfu_ctx); 560 561 vfu_irq_trigger(vfu_ctx, 0); 562 } 563 } 564 565 static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev, 566 unsigned int vector) 567 { 568 MSIMessage msg; 569 570 msg.address = 0; 571 msg.data = vector; 572 573 return msg; 574 } 575 576 static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg) 577 { 578 vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque; 579 580 vfu_irq_trigger(vfu_ctx, msg.data); 581 } 582 583 static void vfu_object_setup_msi_cbs(VfuObject *o) 584 { 585 o->default_msi_trigger = o->pci_dev->msi_trigger; 586 o->default_msi_prepare_message = o->pci_dev->msi_prepare_message; 587 o->default_msix_prepare_message = o->pci_dev->msix_prepare_message; 588 589 o->pci_dev->msi_trigger = vfu_object_msi_trigger; 590 o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg; 591 o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg; 592 } 593 594 static void vfu_object_restore_msi_cbs(VfuObject *o) 595 { 596 o->pci_dev->msi_trigger = o->default_msi_trigger; 597 o->pci_dev->msi_prepare_message = o->default_msi_prepare_message; 598 o->pci_dev->msix_prepare_message = o->default_msix_prepare_message; 599 } 600 601 static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, 602 uint32_t count, bool mask) 603 { 604 VfuObject *o = vfu_get_private(vfu_ctx); 605 uint32_t vector; 606 607 for (vector = start; vector < count; vector++) { 608 msix_set_mask(o->pci_dev, vector, mask); 609 } 610 } 611 612 static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, 613 uint32_t count, bool mask) 614 { 615 VfuObject *o = vfu_get_private(vfu_ctx); 616 Error *err = NULL; 617 uint32_t vector; 618 619 for (vector = start; vector < count; vector++) { 620 msi_set_mask(o->pci_dev, vector, mask, &err); 621 if (err) { 622 VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, 623 error_get_pretty(err)); 624 error_free(err); 625 err = NULL; 626 } 627 } 628 } 629 630 static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) 631 { 632 vfu_ctx_t *vfu_ctx = o->vfu_ctx; 633 int ret; 634 635 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 636 if (ret < 0) { 637 return ret; 638 } 639 640 if (msix_nr_vectors_allocated(pci_dev)) { 641 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, 642 msix_nr_vectors_allocated(pci_dev)); 643 vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ, 644 &vfu_msix_irq_state); 645 } else if (msi_nr_vectors_allocated(pci_dev)) { 646 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, 647 msi_nr_vectors_allocated(pci_dev)); 648 vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ, 649 &vfu_msi_irq_state); 650 } 651 652 if (ret < 0) { 653 return ret; 654 } 655 656 vfu_object_setup_msi_cbs(o); 657 658 pci_dev->irq_opaque = vfu_ctx; 659 660 return 0; 661 } 662 663 void vfu_object_set_bus_irq(PCIBus *pci_bus) 664 { 665 int bus_num = pci_bus_num(pci_bus); 666 int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1); 667 668 pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus, 669 max_bdf); 670 } 671 672 static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type) 673 { 674 VfuObject *o = vfu_get_private(vfu_ctx); 675 676 /* vfu_object_ctx_run() handles lost connection */ 677 if (type == VFU_RESET_LOST_CONN) { 678 return 0; 679 } 680 681 device_cold_reset(DEVICE(o->pci_dev)); 682 683 return 0; 684 } 685 686 /* 687 * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' 688 * properties. It also depends on devices instantiated in QEMU. These 689 * dependencies are not available during the instance_init phase of this 690 * object's life-cycle. As such, the server is initialized after the 691 * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT 692 * when the machine is setup, and the dependencies are available. 693 */ 694 static void vfu_object_machine_done(Notifier *notifier, void *data) 695 { 696 VfuObject *o = container_of(notifier, VfuObject, machine_done); 697 Error *err = NULL; 698 699 vfu_object_init_ctx(o, &err); 700 701 if (err) { 702 error_propagate(&error_abort, err); 703 } 704 } 705 706 /** 707 * vfu_object_init_ctx: Create and initialize libvfio-user context. Add 708 * an unplug blocker for the associated PCI device. Setup a FD handler 709 * to process incoming messages in the context's socket. 710 * 711 * The socket and device properties are mandatory, and this function 712 * will not create the context without them - the setters for these 713 * properties should call this function when the property is set. The 714 * machine should also be ready when this function is invoked - it is 715 * because QEMU objects are initialized before devices, and the 716 * associated PCI device wouldn't be available at the object 717 * initialization time. Until these conditions are satisfied, this 718 * function would return early without performing any task. 719 */ 720 static void vfu_object_init_ctx(VfuObject *o, Error **errp) 721 { 722 DeviceState *dev = NULL; 723 vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL; 724 int ret; 725 726 if (o->vfu_ctx || !o->socket || !o->device || 727 !phase_check(PHASE_MACHINE_READY)) { 728 return; 729 } 730 731 if (o->err) { 732 error_propagate(errp, o->err); 733 o->err = NULL; 734 return; 735 } 736 737 o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 738 LIBVFIO_USER_FLAG_ATTACH_NB, 739 o, VFU_DEV_TYPE_PCI); 740 if (o->vfu_ctx == NULL) { 741 error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); 742 return; 743 } 744 745 dev = qdev_find_recursive(sysbus_get_default(), o->device); 746 if (dev == NULL) { 747 error_setg(errp, "vfu: Device %s not found", o->device); 748 goto fail; 749 } 750 751 if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { 752 error_setg(errp, "vfu: %s not a PCI device", o->device); 753 goto fail; 754 } 755 756 o->pci_dev = PCI_DEVICE(dev); 757 758 object_ref(OBJECT(o->pci_dev)); 759 760 if (pci_is_express(o->pci_dev)) { 761 pci_type = VFU_PCI_TYPE_EXPRESS; 762 } 763 764 ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0); 765 if (ret < 0) { 766 error_setg(errp, 767 "vfu: Failed to attach PCI device %s to context - %s", 768 o->device, strerror(errno)); 769 goto fail; 770 } 771 772 error_setg(&o->unplug_blocker, 773 "vfu: %s for %s must be deleted before unplugging", 774 TYPE_VFU_OBJECT, o->device); 775 qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); 776 777 ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, 778 pci_config_size(o->pci_dev), &vfu_object_cfg_access, 779 VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB, 780 NULL, 0, -1, 0); 781 if (ret < 0) { 782 error_setg(errp, 783 "vfu: Failed to setup config space handlers for %s- %s", 784 o->device, strerror(errno)); 785 goto fail; 786 } 787 788 ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister); 789 if (ret < 0) { 790 error_setg(errp, "vfu: Failed to setup DMA handlers for %s", 791 o->device); 792 goto fail; 793 } 794 795 vfu_object_register_bars(o->vfu_ctx, o->pci_dev); 796 797 ret = vfu_object_setup_irqs(o, o->pci_dev); 798 if (ret < 0) { 799 error_setg(errp, "vfu: Failed to setup interrupts for %s", 800 o->device); 801 goto fail; 802 } 803 804 ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset); 805 if (ret < 0) { 806 error_setg(errp, "vfu: Failed to setup reset callback"); 807 goto fail; 808 } 809 810 ret = vfu_realize_ctx(o->vfu_ctx); 811 if (ret < 0) { 812 error_setg(errp, "vfu: Failed to realize device %s- %s", 813 o->device, strerror(errno)); 814 goto fail; 815 } 816 817 o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); 818 if (o->vfu_poll_fd < 0) { 819 error_setg(errp, "vfu: Failed to get poll fd %s", o->device); 820 goto fail; 821 } 822 823 qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); 824 825 return; 826 827 fail: 828 vfu_destroy_ctx(o->vfu_ctx); 829 if (o->unplug_blocker && o->pci_dev) { 830 qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); 831 error_free(o->unplug_blocker); 832 o->unplug_blocker = NULL; 833 } 834 if (o->pci_dev) { 835 vfu_object_restore_msi_cbs(o); 836 o->pci_dev->irq_opaque = NULL; 837 object_unref(OBJECT(o->pci_dev)); 838 o->pci_dev = NULL; 839 } 840 o->vfu_ctx = NULL; 841 } 842 843 static void vfu_object_init(Object *obj) 844 { 845 VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); 846 VfuObject *o = VFU_OBJECT(obj); 847 848 k->nr_devs++; 849 850 if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) { 851 error_setg(&o->err, "vfu: %s only compatible with %s machine", 852 TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); 853 return; 854 } 855 856 if (!phase_check(PHASE_MACHINE_READY)) { 857 o->machine_done.notify = vfu_object_machine_done; 858 qemu_add_machine_init_done_notifier(&o->machine_done); 859 } 860 861 o->vfu_poll_fd = -1; 862 } 863 864 static void vfu_object_finalize(Object *obj) 865 { 866 VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); 867 VfuObject *o = VFU_OBJECT(obj); 868 869 k->nr_devs--; 870 871 qapi_free_SocketAddress(o->socket); 872 873 o->socket = NULL; 874 875 if (o->vfu_poll_fd != -1) { 876 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); 877 o->vfu_poll_fd = -1; 878 } 879 880 if (o->vfu_ctx) { 881 vfu_destroy_ctx(o->vfu_ctx); 882 o->vfu_ctx = NULL; 883 } 884 885 g_free(o->device); 886 887 o->device = NULL; 888 889 if (o->unplug_blocker && o->pci_dev) { 890 qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); 891 error_free(o->unplug_blocker); 892 o->unplug_blocker = NULL; 893 } 894 895 if (o->pci_dev) { 896 vfu_object_restore_msi_cbs(o); 897 o->pci_dev->irq_opaque = NULL; 898 object_unref(OBJECT(o->pci_dev)); 899 o->pci_dev = NULL; 900 } 901 902 if (!k->nr_devs && vfu_object_auto_shutdown()) { 903 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); 904 } 905 906 if (o->machine_done.notify) { 907 qemu_remove_machine_init_done_notifier(&o->machine_done); 908 o->machine_done.notify = NULL; 909 } 910 } 911 912 static void vfu_object_class_init(ObjectClass *klass, void *data) 913 { 914 VfuObjectClass *k = VFU_OBJECT_CLASS(klass); 915 916 k->nr_devs = 0; 917 918 object_class_property_add(klass, "socket", "SocketAddress", NULL, 919 vfu_object_set_socket, NULL, NULL); 920 object_class_property_set_description(klass, "socket", 921 "SocketAddress " 922 "(ex: type=unix,path=/tmp/sock). " 923 "Only UNIX is presently supported"); 924 object_class_property_add_str(klass, "device", NULL, 925 vfu_object_set_device); 926 object_class_property_set_description(klass, "device", 927 "device ID - only PCI devices " 928 "are presently supported"); 929 } 930 931 static const TypeInfo vfu_object_info = { 932 .name = TYPE_VFU_OBJECT, 933 .parent = TYPE_OBJECT, 934 .instance_size = sizeof(VfuObject), 935 .instance_init = vfu_object_init, 936 .instance_finalize = vfu_object_finalize, 937 .class_size = sizeof(VfuObjectClass), 938 .class_init = vfu_object_class_init, 939 .interfaces = (InterfaceInfo[]) { 940 { TYPE_USER_CREATABLE }, 941 { } 942 } 943 }; 944 945 static void vfu_register_types(void) 946 { 947 type_register_static(&vfu_object_info); 948 } 949 950 type_init(vfu_register_types); 951