1 /* 2 * iommufd container backend 3 * 4 * Copyright (C) 2023 Intel Corporation. 5 * Copyright Red Hat, Inc. 2023 6 * 7 * Authors: Yi Liu <yi.l.liu@intel.com> 8 * Eric Auger <eric.auger@redhat.com> 9 * 10 * SPDX-License-Identifier: GPL-2.0-or-later 11 */ 12 13 #include "qemu/osdep.h" 14 #include <sys/ioctl.h> 15 #include <linux/vfio.h> 16 #include <linux/iommufd.h> 17 18 #include "hw/vfio/vfio-device.h" 19 #include "qemu/error-report.h" 20 #include "trace.h" 21 #include "qapi/error.h" 22 #include "system/iommufd.h" 23 #include "hw/qdev-core.h" 24 #include "hw/vfio/vfio-cpr.h" 25 #include "system/reset.h" 26 #include "qemu/cutils.h" 27 #include "qemu/chardev_open.h" 28 #include "migration/cpr.h" 29 #include "pci.h" 30 #include "vfio-iommufd.h" 31 #include "vfio-helpers.h" 32 #include "vfio-listener.h" 33 34 #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ 35 TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" 36 37 static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, 38 ram_addr_t size, void *vaddr, bool readonly, 39 MemoryRegion *mr) 40 { 41 const VFIOIOMMUFDContainer *container = 42 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 43 44 return iommufd_backend_map_dma(container->be, 45 container->ioas_id, 46 iova, size, vaddr, readonly); 47 } 48 49 static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, 50 hwaddr iova, ram_addr_t size, 51 int fd, unsigned long start, bool readonly) 52 { 53 const VFIOIOMMUFDContainer *container = 54 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 55 56 return iommufd_backend_map_file_dma(container->be, 57 container->ioas_id, 58 iova, size, fd, start, readonly); 59 } 60 61 static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, 62 hwaddr iova, ram_addr_t size, 63 IOMMUTLBEntry *iotlb, bool unmap_all) 64 { 65 const VFIOIOMMUFDContainer *container = 66 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 67 68 /* unmap in halves */ 69 if (unmap_all) { 70 Int128 llsize = int128_rshift(int128_2_64(), 1); 71 int ret; 72 73 ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, 74 0, int128_get64(llsize)); 75 76 if (ret == 0) { 77 ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, 78 int128_get64(llsize), 79 int128_get64(llsize)); 80 } 81 82 return ret; 83 } 84 85 /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ 86 return iommufd_backend_unmap_dma(container->be, 87 container->ioas_id, iova, size); 88 } 89 90 static bool iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) 91 { 92 return !vfio_kvm_device_add_fd(vbasedev->fd, errp); 93 } 94 95 static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) 96 { 97 Error *err = NULL; 98 99 if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { 100 error_report_err(err); 101 } 102 } 103 104 static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) 105 { 106 IOMMUFDBackend *iommufd = vbasedev->iommufd; 107 struct vfio_device_bind_iommufd bind = { 108 .argsz = sizeof(bind), 109 .flags = 0, 110 }; 111 112 if (!iommufd_backend_connect(iommufd, errp)) { 113 return false; 114 } 115 116 /* 117 * Add device to kvm-vfio to be prepared for the tracking 118 * in KVM. Especially for some emulated devices, it requires 119 * to have kvm information in the device open. 120 */ 121 if (!iommufd_cdev_kvm_device_add(vbasedev, errp)) { 122 goto err_kvm_device_add; 123 } 124 125 if (cpr_is_incoming()) { 126 goto skip_bind; 127 } 128 129 /* Bind device to iommufd */ 130 bind.iommufd = iommufd->fd; 131 if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { 132 error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", 133 vbasedev->fd, bind.iommufd); 134 goto err_bind; 135 } 136 137 vbasedev->devid = bind.out_devid; 138 trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, 139 vbasedev->fd, vbasedev->devid); 140 141 skip_bind: 142 return true; 143 err_bind: 144 iommufd_cdev_kvm_device_del(vbasedev); 145 err_kvm_device_add: 146 iommufd_backend_disconnect(iommufd); 147 return false; 148 } 149 150 static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) 151 { 152 /* Unbind is automatically conducted when device fd is closed */ 153 iommufd_cdev_kvm_device_del(vbasedev); 154 iommufd_backend_disconnect(vbasedev->iommufd); 155 } 156 157 static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) 158 { 159 return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 160 } 161 162 static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, 163 bool start, Error **errp) 164 { 165 const VFIOIOMMUFDContainer *container = 166 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 167 VFIOIOASHwpt *hwpt; 168 169 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 170 if (!iommufd_hwpt_dirty_tracking(hwpt)) { 171 continue; 172 } 173 174 if (!iommufd_backend_set_dirty_tracking(container->be, 175 hwpt->hwpt_id, start, errp)) { 176 goto err; 177 } 178 } 179 180 return 0; 181 182 err: 183 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 184 if (!iommufd_hwpt_dirty_tracking(hwpt)) { 185 continue; 186 } 187 iommufd_backend_set_dirty_tracking(container->be, 188 hwpt->hwpt_id, !start, NULL); 189 } 190 return -EINVAL; 191 } 192 193 static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 194 VFIOBitmap *vbmap, hwaddr iova, 195 hwaddr size, Error **errp) 196 { 197 VFIOIOMMUFDContainer *container = container_of(bcontainer, 198 VFIOIOMMUFDContainer, 199 bcontainer); 200 unsigned long page_size = qemu_real_host_page_size(); 201 VFIOIOASHwpt *hwpt; 202 203 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 204 if (!iommufd_hwpt_dirty_tracking(hwpt)) { 205 continue; 206 } 207 208 if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id, 209 iova, size, page_size, 210 (uint64_t *)vbmap->bitmap, 211 errp)) { 212 return -EINVAL; 213 } 214 } 215 216 return 0; 217 } 218 219 static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) 220 { 221 ERRP_GUARD(); 222 long int ret = -ENOTTY; 223 g_autofree char *path = NULL; 224 g_autofree char *vfio_dev_path = NULL; 225 g_autofree char *vfio_path = NULL; 226 DIR *dir = NULL; 227 struct dirent *dent; 228 g_autofree gchar *contents = NULL; 229 gsize length; 230 int major, minor; 231 dev_t vfio_devt; 232 233 path = g_strdup_printf("%s/vfio-dev", sysfs_path); 234 dir = opendir(path); 235 if (!dir) { 236 error_setg_errno(errp, errno, "couldn't open directory %s", path); 237 goto out; 238 } 239 240 while ((dent = readdir(dir))) { 241 if (!strncmp(dent->d_name, "vfio", 4)) { 242 vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); 243 break; 244 } 245 } 246 247 if (!vfio_dev_path) { 248 error_setg(errp, "failed to find vfio-dev/vfioX/dev"); 249 goto out_close_dir; 250 } 251 252 if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { 253 error_setg(errp, "failed to load \"%s\"", vfio_dev_path); 254 goto out_close_dir; 255 } 256 257 if (sscanf(contents, "%d:%d", &major, &minor) != 2) { 258 error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); 259 goto out_close_dir; 260 } 261 vfio_devt = makedev(major, minor); 262 263 vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); 264 ret = open_cdev(vfio_path, vfio_devt); 265 if (ret < 0) { 266 error_setg(errp, "Failed to open %s", vfio_path); 267 } 268 269 trace_iommufd_cdev_getfd(vfio_path, ret); 270 271 out_close_dir: 272 closedir(dir); 273 out: 274 if (*errp) { 275 error_prepend(errp, VFIO_MSG_PREFIX, path); 276 } 277 278 return ret; 279 } 280 281 static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, 282 Error **errp) 283 { 284 int iommufd = vbasedev->iommufd->fd; 285 struct vfio_device_attach_iommufd_pt attach_data = { 286 .argsz = sizeof(attach_data), 287 .flags = 0, 288 .pt_id = id, 289 }; 290 291 /* Attach device to an IOAS or hwpt within iommufd */ 292 if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) { 293 error_setg_errno(errp, errno, 294 "[iommufd=%d] error attach %s (%d) to id=%d", 295 iommufd, vbasedev->name, vbasedev->fd, id); 296 return -errno; 297 } 298 299 trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, 300 vbasedev->fd, id); 301 return 0; 302 } 303 304 static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) 305 { 306 int iommufd = vbasedev->iommufd->fd; 307 struct vfio_device_detach_iommufd_pt detach_data = { 308 .argsz = sizeof(detach_data), 309 .flags = 0, 310 }; 311 312 if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data)) { 313 error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); 314 return false; 315 } 316 317 trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); 318 return true; 319 } 320 321 static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, 322 VFIOIOMMUFDContainer *container, 323 Error **errp) 324 { 325 ERRP_GUARD(); 326 IOMMUFDBackend *iommufd = vbasedev->iommufd; 327 uint32_t type, flags = 0; 328 uint64_t hw_caps; 329 VFIOIOASHwpt *hwpt; 330 uint32_t hwpt_id; 331 int ret; 332 333 /* Try to find a domain */ 334 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 335 if (!cpr_is_incoming()) { 336 ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); 337 } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) { 338 ret = 0; 339 } else { 340 continue; 341 } 342 343 if (ret) { 344 /* -EINVAL means the domain is incompatible with the device. */ 345 if (ret == -EINVAL) { 346 /* 347 * It is an expected failure and it just means we will try 348 * another domain, or create one if no existing compatible 349 * domain is found. Hence why the error is discarded below. 350 */ 351 error_free(*errp); 352 *errp = NULL; 353 continue; 354 } 355 356 return false; 357 } else { 358 vbasedev->hwpt = hwpt; 359 vbasedev->cpr.hwpt_id = hwpt->hwpt_id; 360 QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); 361 vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); 362 return true; 363 } 364 } 365 366 /* 367 * This is quite early and VFIO Migration state isn't yet fully 368 * initialized, thus rely only on IOMMU hardware capabilities as to 369 * whether IOMMU dirty tracking is going to be requested. Later 370 * vfio_migration_realize() may decide to use VF dirty tracking 371 * instead. 372 */ 373 if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid, 374 &type, NULL, 0, &hw_caps, errp)) { 375 return false; 376 } 377 378 if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { 379 flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 380 } 381 382 if (cpr_is_incoming()) { 383 hwpt_id = vbasedev->cpr.hwpt_id; 384 goto skip_alloc; 385 } 386 387 if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, 388 container->ioas_id, flags, 389 IOMMU_HWPT_DATA_NONE, 0, NULL, 390 &hwpt_id, errp)) { 391 return false; 392 } 393 394 ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); 395 if (ret) { 396 iommufd_backend_free_id(container->be, hwpt_id); 397 return false; 398 } 399 400 skip_alloc: 401 hwpt = g_malloc0(sizeof(*hwpt)); 402 hwpt->hwpt_id = hwpt_id; 403 hwpt->hwpt_flags = flags; 404 QLIST_INIT(&hwpt->device_list); 405 406 vbasedev->hwpt = hwpt; 407 vbasedev->cpr.hwpt_id = hwpt->hwpt_id; 408 vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); 409 QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); 410 QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); 411 container->bcontainer.dirty_pages_supported |= 412 vbasedev->iommu_dirty_tracking; 413 if (container->bcontainer.dirty_pages_supported && 414 !vbasedev->iommu_dirty_tracking) { 415 warn_report("IOMMU instance for device %s doesn't support dirty tracking", 416 vbasedev->name); 417 } 418 return true; 419 } 420 421 static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev, 422 VFIOIOMMUFDContainer *container) 423 { 424 VFIOIOASHwpt *hwpt = vbasedev->hwpt; 425 426 QLIST_REMOVE(vbasedev, hwpt_next); 427 vbasedev->hwpt = NULL; 428 429 if (QLIST_EMPTY(&hwpt->device_list)) { 430 QLIST_REMOVE(hwpt, next); 431 iommufd_backend_free_id(container->be, hwpt->hwpt_id); 432 g_free(hwpt); 433 } 434 } 435 436 static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, 437 VFIOIOMMUFDContainer *container, 438 Error **errp) 439 { 440 /* mdevs aren't physical devices and will fail with auto domains */ 441 if (!vbasedev->mdev) { 442 return iommufd_cdev_autodomains_get(vbasedev, container, errp); 443 } 444 445 /* If CPR, we are already attached to ioas_id. */ 446 return cpr_is_incoming() || 447 !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); 448 } 449 450 static void iommufd_cdev_detach_container(VFIODevice *vbasedev, 451 VFIOIOMMUFDContainer *container) 452 { 453 Error *err = NULL; 454 455 if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { 456 error_report_err(err); 457 } 458 459 if (vbasedev->hwpt) { 460 iommufd_cdev_autodomains_put(vbasedev, container); 461 } 462 463 } 464 465 static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) 466 { 467 VFIOContainerBase *bcontainer = &container->bcontainer; 468 469 if (!QLIST_EMPTY(&bcontainer->device_list)) { 470 return; 471 } 472 vfio_iommufd_cpr_unregister_container(container); 473 vfio_listener_unregister(bcontainer); 474 iommufd_backend_free_id(container->be, container->ioas_id); 475 object_unref(container); 476 } 477 478 static int iommufd_cdev_ram_block_discard_disable(bool state) 479 { 480 /* 481 * We support coordinated discarding of RAM via the RamDiscardManager. 482 */ 483 return ram_block_uncoordinated_discard_disable(state); 484 } 485 486 static bool iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, 487 uint32_t ioas_id, Error **errp) 488 { 489 VFIOContainerBase *bcontainer = &container->bcontainer; 490 g_autofree struct iommu_ioas_iova_ranges *info = NULL; 491 struct iommu_iova_range *iova_ranges; 492 int sz, fd = container->be->fd; 493 494 info = g_malloc0(sizeof(*info)); 495 info->size = sizeof(*info); 496 info->ioas_id = ioas_id; 497 498 if (ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info) && errno != EMSGSIZE) { 499 goto error; 500 } 501 502 sz = info->num_iovas * sizeof(struct iommu_iova_range); 503 info = g_realloc(info, sizeof(*info) + sz); 504 info->allowed_iovas = (uintptr_t)(info + 1); 505 506 if (ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info)) { 507 goto error; 508 } 509 510 iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; 511 512 for (int i = 0; i < info->num_iovas; i++) { 513 Range *range = g_new(Range, 1); 514 515 range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); 516 bcontainer->iova_ranges = 517 range_list_insert(bcontainer->iova_ranges, range); 518 } 519 bcontainer->pgsizes = info->out_iova_alignment; 520 521 return true; 522 523 error: 524 error_setg_errno(errp, errno, "Cannot get IOVA ranges"); 525 return false; 526 } 527 528 static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, 529 AddressSpace *as, Error **errp) 530 { 531 VFIOContainerBase *bcontainer; 532 VFIOIOMMUFDContainer *container; 533 VFIOAddressSpace *space; 534 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; 535 int ret, devfd; 536 bool res; 537 uint32_t ioas_id; 538 Error *err = NULL; 539 const VFIOIOMMUClass *iommufd_vioc = 540 VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); 541 542 vfio_cpr_load_device(vbasedev); 543 544 if (vbasedev->fd < 0) { 545 devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); 546 if (devfd < 0) { 547 return false; 548 } 549 vbasedev->fd = devfd; 550 } else { 551 devfd = vbasedev->fd; 552 } 553 554 if (!iommufd_cdev_connect_and_bind(vbasedev, errp)) { 555 goto err_connect_bind; 556 } 557 558 space = vfio_address_space_get(as); 559 560 /* try to attach to an existing container in this space */ 561 QLIST_FOREACH(bcontainer, &space->containers, next) { 562 container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 563 if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc || 564 vbasedev->iommufd != container->be) { 565 continue; 566 } 567 568 if (!cpr_is_incoming()) { 569 res = iommufd_cdev_attach_container(vbasedev, container, &err); 570 } else if (vbasedev->cpr.ioas_id == container->ioas_id) { 571 res = true; 572 } else { 573 continue; 574 } 575 576 if (!res) { 577 const char *msg = error_get_pretty(err); 578 579 trace_iommufd_cdev_fail_attach_existing_container(msg); 580 error_free(err); 581 err = NULL; 582 } else { 583 ret = iommufd_cdev_ram_block_discard_disable(true); 584 if (ret) { 585 error_setg_errno(errp, -ret, 586 "Cannot set discarding of RAM broken"); 587 goto err_discard_disable; 588 } 589 goto found_container; 590 } 591 } 592 593 if (cpr_is_incoming()) { 594 ioas_id = vbasedev->cpr.ioas_id; 595 goto skip_ioas_alloc; 596 } 597 598 /* Need to allocate a new dedicated container */ 599 if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { 600 goto err_alloc_ioas; 601 } 602 603 trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); 604 605 skip_ioas_alloc: 606 container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); 607 container->be = vbasedev->iommufd; 608 container->ioas_id = ioas_id; 609 QLIST_INIT(&container->hwpt_list); 610 vbasedev->cpr.ioas_id = ioas_id; 611 612 bcontainer = &container->bcontainer; 613 vfio_address_space_insert(space, bcontainer); 614 615 if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { 616 goto err_attach_container; 617 } 618 619 ret = iommufd_cdev_ram_block_discard_disable(true); 620 if (ret) { 621 error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); 622 goto err_discard_disable; 623 } 624 625 if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) { 626 error_append_hint(&err, 627 "Fallback to default 64bit IOVA range and 4K page size\n"); 628 warn_report_err(err); 629 err = NULL; 630 bcontainer->pgsizes = qemu_real_host_page_size(); 631 } 632 633 if (!vfio_listener_register(bcontainer, errp)) { 634 goto err_listener_register; 635 } 636 637 if (!vfio_iommufd_cpr_register_container(container, errp)) { 638 goto err_listener_register; 639 } 640 641 bcontainer->initialized = true; 642 643 found_container: 644 ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); 645 if (ret) { 646 error_setg_errno(errp, errno, "error getting device info"); 647 goto err_listener_register; 648 } 649 650 /* 651 * Do not move this code before attachment! The nested IOMMU support 652 * needs device and hwpt id which are generated only after attachment. 653 */ 654 if (!vfio_device_hiod_create_and_realize(vbasedev, 655 TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { 656 goto err_listener_register; 657 } 658 659 /* 660 * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level 661 * for discarding incompatibility check as well? 662 */ 663 if (vbasedev->ram_block_discard_allowed) { 664 iommufd_cdev_ram_block_discard_disable(false); 665 } 666 667 vfio_device_prepare(vbasedev, bcontainer, &dev_info); 668 vfio_iommufd_cpr_register_device(vbasedev); 669 670 trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, 671 vbasedev->num_regions, vbasedev->flags); 672 return true; 673 674 err_listener_register: 675 iommufd_cdev_ram_block_discard_disable(false); 676 err_discard_disable: 677 iommufd_cdev_detach_container(vbasedev, container); 678 err_attach_container: 679 iommufd_cdev_container_destroy(container); 680 err_alloc_ioas: 681 vfio_address_space_put(space); 682 iommufd_cdev_unbind_and_disconnect(vbasedev); 683 err_connect_bind: 684 close(vbasedev->fd); 685 return false; 686 } 687 688 static void iommufd_cdev_detach(VFIODevice *vbasedev) 689 { 690 VFIOContainerBase *bcontainer = vbasedev->bcontainer; 691 VFIOAddressSpace *space = bcontainer->space; 692 VFIOIOMMUFDContainer *container = container_of(bcontainer, 693 VFIOIOMMUFDContainer, 694 bcontainer); 695 vfio_device_unprepare(vbasedev); 696 697 if (!vbasedev->ram_block_discard_allowed) { 698 iommufd_cdev_ram_block_discard_disable(false); 699 } 700 701 object_unref(vbasedev->hiod); 702 iommufd_cdev_detach_container(vbasedev, container); 703 iommufd_cdev_container_destroy(container); 704 vfio_address_space_put(space); 705 706 vfio_iommufd_cpr_unregister_device(vbasedev); 707 iommufd_cdev_unbind_and_disconnect(vbasedev); 708 close(vbasedev->fd); 709 } 710 711 static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) 712 { 713 VFIODevice *vbasedev_iter; 714 const VFIOIOMMUClass *iommufd_vioc = 715 VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); 716 717 QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { 718 if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) { 719 continue; 720 } 721 if (devid == vbasedev_iter->devid) { 722 return vbasedev_iter; 723 } 724 } 725 return NULL; 726 } 727 728 static VFIOPCIDevice * 729 iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, 730 VFIODevice *reset_dev) 731 { 732 VFIODevice *vbasedev_tmp; 733 734 if (dep_dev->devid == reset_dev->devid || 735 dep_dev->devid == VFIO_PCI_DEVID_OWNED) { 736 return NULL; 737 } 738 739 vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); 740 if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || 741 vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { 742 return NULL; 743 } 744 745 return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); 746 } 747 748 static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) 749 { 750 VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); 751 struct vfio_pci_hot_reset_info *info = NULL; 752 struct vfio_pci_dependent_device *devices; 753 struct vfio_pci_hot_reset *reset; 754 int ret, i; 755 bool multi = false; 756 757 trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); 758 759 if (!single) { 760 vfio_pci_pre_reset(vdev); 761 } 762 vdev->vbasedev.needs_reset = false; 763 764 ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); 765 766 if (ret) { 767 goto out_single; 768 } 769 770 assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); 771 772 devices = &info->devices[0]; 773 774 if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { 775 if (!vdev->has_pm_reset) { 776 for (i = 0; i < info->count; i++) { 777 if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { 778 error_report("vfio: Cannot reset device %s, " 779 "depends on device %04x:%02x:%02x.%x " 780 "which is not owned.", 781 vdev->vbasedev.name, devices[i].segment, 782 devices[i].bus, PCI_SLOT(devices[i].devfn), 783 PCI_FUNC(devices[i].devfn)); 784 } 785 } 786 } 787 ret = -EPERM; 788 goto out_single; 789 } 790 791 trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); 792 793 for (i = 0; i < info->count; i++) { 794 VFIOPCIDevice *tmp; 795 796 trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, 797 devices[i].bus, 798 PCI_SLOT(devices[i].devfn), 799 PCI_FUNC(devices[i].devfn), 800 devices[i].devid); 801 802 /* 803 * If a VFIO cdev device is resettable, all the dependent devices 804 * are either bound to same iommufd or within same iommu_groups as 805 * one of the iommufd bound devices. 806 */ 807 assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); 808 809 tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); 810 if (!tmp) { 811 continue; 812 } 813 814 if (single) { 815 ret = -EINVAL; 816 goto out_single; 817 } 818 vfio_pci_pre_reset(tmp); 819 tmp->vbasedev.needs_reset = false; 820 multi = true; 821 } 822 823 if (!single && !multi) { 824 ret = -EINVAL; 825 goto out_single; 826 } 827 828 /* Use zero length array for hot reset with iommufd backend */ 829 reset = g_malloc0(sizeof(*reset)); 830 reset->argsz = sizeof(*reset); 831 832 /* Bus reset! */ 833 ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); 834 g_free(reset); 835 if (ret) { 836 ret = -errno; 837 } 838 839 trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, 840 ret ? strerror(errno) : "Success"); 841 842 /* Re-enable INTx on affected devices */ 843 for (i = 0; i < info->count; i++) { 844 VFIOPCIDevice *tmp; 845 846 tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); 847 if (!tmp) { 848 continue; 849 } 850 vfio_pci_post_reset(tmp); 851 } 852 out_single: 853 if (!single) { 854 vfio_pci_post_reset(vdev); 855 } 856 g_free(info); 857 858 return ret; 859 } 860 861 static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) 862 { 863 VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); 864 865 vioc->dma_map = iommufd_cdev_map; 866 vioc->dma_map_file = iommufd_cdev_map_file; 867 vioc->dma_unmap = iommufd_cdev_unmap; 868 vioc->attach_device = iommufd_cdev_attach; 869 vioc->detach_device = iommufd_cdev_detach; 870 vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; 871 vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking; 872 vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; 873 }; 874 875 static bool 876 host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, 877 uint32_t hwpt_id, Error **errp) 878 { 879 VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; 880 881 return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); 882 } 883 884 static bool 885 host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, 886 Error **errp) 887 { 888 VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; 889 890 return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); 891 } 892 893 static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, 894 Error **errp) 895 { 896 VFIODevice *vdev = opaque; 897 HostIOMMUDeviceIOMMUFD *idev; 898 HostIOMMUDeviceCaps *caps = &hiod->caps; 899 VendorCaps *vendor_caps = &caps->vendor_caps; 900 enum iommu_hw_info_type type; 901 uint64_t hw_caps; 902 903 hiod->agent = opaque; 904 905 if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, 906 vendor_caps, sizeof(*vendor_caps), 907 &hw_caps, errp)) { 908 return false; 909 } 910 911 hiod->name = g_strdup(vdev->name); 912 caps->type = type; 913 caps->hw_caps = hw_caps; 914 915 idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); 916 idev->iommufd = vdev->iommufd; 917 idev->devid = vdev->devid; 918 idev->hwpt_id = vdev->hwpt->hwpt_id; 919 920 return true; 921 } 922 923 static GList * 924 hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod) 925 { 926 VFIODevice *vdev = hiod->agent; 927 928 g_assert(vdev); 929 return vfio_container_get_iova_ranges(vdev->bcontainer); 930 } 931 932 static uint64_t 933 hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) 934 { 935 VFIODevice *vdev = hiod->agent; 936 937 g_assert(vdev); 938 return vfio_container_get_page_size_mask(vdev->bcontainer); 939 } 940 941 942 static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) 943 { 944 HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); 945 HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); 946 947 hiodc->realize = hiod_iommufd_vfio_realize; 948 hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; 949 hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; 950 951 idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; 952 idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; 953 }; 954 955 static const TypeInfo types[] = { 956 { 957 .name = TYPE_VFIO_IOMMU_IOMMUFD, 958 .parent = TYPE_VFIO_IOMMU, 959 .instance_size = sizeof(VFIOIOMMUFDContainer), 960 .class_init = vfio_iommu_iommufd_class_init, 961 }, { 962 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, 963 .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, 964 .class_init = hiod_iommufd_vfio_class_init, 965 } 966 }; 967 968 DEFINE_TYPES(types) 969