1 /* 2 * VFIO utility 3 * 4 * Copyright 2016 - 2018 Red Hat, Inc. 5 * 6 * Authors: 7 * Fam Zheng <famz@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include <sys/ioctl.h> 15 #include <linux/vfio.h> 16 #include "qapi/error.h" 17 #include "exec/ramlist.h" 18 #include "exec/cpu-common.h" 19 #include "trace.h" 20 #include "qemu/error-report.h" 21 #include "standard-headers/linux/pci_regs.h" 22 #include "qemu/event_notifier.h" 23 #include "qemu/vfio-helpers.h" 24 #include "trace.h" 25 26 #define QEMU_VFIO_DEBUG 0 27 28 #define QEMU_VFIO_IOVA_MIN 0x10000ULL 29 /* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface, 30 * we can use a runtime limit; alternatively it's also possible to do platform 31 * specific detection by reading sysfs entries. Until then, 39 is a safe bet. 32 **/ 33 #define QEMU_VFIO_IOVA_MAX (1ULL << 39) 34 35 typedef struct { 36 /* Page aligned addr. */ 37 void *host; 38 size_t size; 39 uint64_t iova; 40 } IOVAMapping; 41 42 struct QEMUVFIOState { 43 QemuMutex lock; 44 45 /* These fields are protected by BQL */ 46 int container; 47 int group; 48 int device; 49 RAMBlockNotifier ram_notifier; 50 struct vfio_region_info config_region_info, bar_region_info[6]; 51 52 /* These fields are protected by @lock */ 53 /* VFIO's IO virtual address space is managed by splitting into a few 54 * sections: 55 * 56 * --------------- <= 0 57 * |xxxxxxxxxxxxx| 58 * |-------------| <= QEMU_VFIO_IOVA_MIN 59 * | | 60 * | Fixed | 61 * | | 62 * |-------------| <= low_water_mark 63 * | | 64 * | Free | 65 * | | 66 * |-------------| <= high_water_mark 67 * | | 68 * | Temp | 69 * | | 70 * |-------------| <= QEMU_VFIO_IOVA_MAX 71 * |xxxxxxxxxxxxx| 72 * |xxxxxxxxxxxxx| 73 * --------------- 74 * 75 * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid; 76 * 77 * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of 78 * [QEMU_VFIO_IOVA_MIN, low_water_mark). Once allocated they will not be 79 * reclaimed - low_water_mark never shrinks; 80 * 81 * - IOVAs in range [low_water_mark, high_water_mark) are free; 82 * 83 * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile 84 * mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area 85 * is recycled. The caller should make sure I/O's depending on these 86 * mappings are completed before calling. 87 **/ 88 uint64_t low_water_mark; 89 uint64_t high_water_mark; 90 IOVAMapping *mappings; 91 int nr_mappings; 92 }; 93 94 /** 95 * Find group file by PCI device address as specified @device, and return the 96 * path. The returned string is owned by caller and should be g_free'ed later. 97 */ 98 static char *sysfs_find_group_file(const char *device, Error **errp) 99 { 100 char *sysfs_link; 101 char *sysfs_group; 102 char *p; 103 char *path = NULL; 104 105 sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); 106 sysfs_group = g_malloc0(PATH_MAX); 107 if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { 108 error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); 109 goto out; 110 } 111 p = strrchr(sysfs_group, '/'); 112 if (!p) { 113 error_setg(errp, "Failed to find iommu group number"); 114 goto out; 115 } 116 117 path = g_strdup_printf("/dev/vfio/%s", p + 1); 118 out: 119 g_free(sysfs_link); 120 g_free(sysfs_group); 121 return path; 122 } 123 124 static inline void assert_bar_index_valid(QEMUVFIOState *s, int index) 125 { 126 assert(index >= 0 && index < ARRAY_SIZE(s->bar_region_info)); 127 } 128 129 static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp) 130 { 131 assert_bar_index_valid(s, index); 132 s->bar_region_info[index] = (struct vfio_region_info) { 133 .index = VFIO_PCI_BAR0_REGION_INDEX + index, 134 .argsz = sizeof(struct vfio_region_info), 135 }; 136 if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->bar_region_info[index])) { 137 error_setg_errno(errp, errno, "Failed to get BAR region info"); 138 return -errno; 139 } 140 141 return 0; 142 } 143 144 /** 145 * Map a PCI bar area. 146 */ 147 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, 148 uint64_t offset, uint64_t size, 149 Error **errp) 150 { 151 void *p; 152 assert_bar_index_valid(s, index); 153 p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset), 154 PROT_READ | PROT_WRITE, MAP_SHARED, 155 s->device, s->bar_region_info[index].offset + offset); 156 if (p == MAP_FAILED) { 157 error_setg_errno(errp, errno, "Failed to map BAR region"); 158 p = NULL; 159 } 160 return p; 161 } 162 163 /** 164 * Unmap a PCI bar area. 165 */ 166 void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar, 167 uint64_t offset, uint64_t size) 168 { 169 if (bar) { 170 munmap(bar, MIN(size, s->bar_region_info[index].size - offset)); 171 } 172 } 173 174 /** 175 * Initialize device IRQ with @irq_type and and register an event notifier. 176 */ 177 int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e, 178 int irq_type, Error **errp) 179 { 180 int r; 181 struct vfio_irq_set *irq_set; 182 size_t irq_set_size; 183 struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; 184 185 irq_info.index = irq_type; 186 if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) { 187 error_setg_errno(errp, errno, "Failed to get device interrupt info"); 188 return -errno; 189 } 190 if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) { 191 error_setg(errp, "Device interrupt doesn't support eventfd"); 192 return -EINVAL; 193 } 194 195 irq_set_size = sizeof(*irq_set) + sizeof(int); 196 irq_set = g_malloc0(irq_set_size); 197 198 /* Get to a known IRQ state */ 199 *irq_set = (struct vfio_irq_set) { 200 .argsz = irq_set_size, 201 .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 202 .index = irq_info.index, 203 .start = 0, 204 .count = 1, 205 }; 206 207 *(int *)&irq_set->data = event_notifier_get_fd(e); 208 r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set); 209 g_free(irq_set); 210 if (r) { 211 error_setg_errno(errp, errno, "Failed to setup device interrupt"); 212 return -errno; 213 } 214 return 0; 215 } 216 217 static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf, 218 int size, int ofs) 219 { 220 int ret; 221 222 do { 223 ret = pread(s->device, buf, size, s->config_region_info.offset + ofs); 224 } while (ret == -1 && errno == EINTR); 225 return ret == size ? 0 : -errno; 226 } 227 228 static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int ofs) 229 { 230 int ret; 231 232 do { 233 ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs); 234 } while (ret == -1 && errno == EINTR); 235 return ret == size ? 0 : -errno; 236 } 237 238 static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, 239 Error **errp) 240 { 241 int ret; 242 int i; 243 uint16_t pci_cmd; 244 struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; 245 struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) }; 246 struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 247 char *group_file = NULL; 248 249 /* Create a new container */ 250 s->container = open("/dev/vfio/vfio", O_RDWR); 251 252 if (s->container == -1) { 253 error_setg_errno(errp, errno, "Failed to open /dev/vfio/vfio"); 254 return -errno; 255 } 256 if (ioctl(s->container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) { 257 error_setg(errp, "Invalid VFIO version"); 258 ret = -EINVAL; 259 goto fail_container; 260 } 261 262 if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { 263 error_setg_errno(errp, errno, "VFIO IOMMU check failed"); 264 ret = -EINVAL; 265 goto fail_container; 266 } 267 268 /* Open the group */ 269 group_file = sysfs_find_group_file(device, errp); 270 if (!group_file) { 271 ret = -EINVAL; 272 goto fail_container; 273 } 274 275 s->group = open(group_file, O_RDWR); 276 if (s->group == -1) { 277 error_setg_errno(errp, errno, "Failed to open VFIO group file: %s", 278 group_file); 279 g_free(group_file); 280 ret = -errno; 281 goto fail_container; 282 } 283 g_free(group_file); 284 285 /* Test the group is viable and available */ 286 if (ioctl(s->group, VFIO_GROUP_GET_STATUS, &group_status)) { 287 error_setg_errno(errp, errno, "Failed to get VFIO group status"); 288 ret = -errno; 289 goto fail; 290 } 291 292 if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { 293 error_setg(errp, "VFIO group is not viable"); 294 ret = -EINVAL; 295 goto fail; 296 } 297 298 /* Add the group to the container */ 299 if (ioctl(s->group, VFIO_GROUP_SET_CONTAINER, &s->container)) { 300 error_setg_errno(errp, errno, "Failed to add group to VFIO container"); 301 ret = -errno; 302 goto fail; 303 } 304 305 /* Enable the IOMMU model we want */ 306 if (ioctl(s->container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)) { 307 error_setg_errno(errp, errno, "Failed to set VFIO IOMMU type"); 308 ret = -errno; 309 goto fail; 310 } 311 312 /* Get additional IOMMU info */ 313 if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) { 314 error_setg_errno(errp, errno, "Failed to get IOMMU info"); 315 ret = -errno; 316 goto fail; 317 } 318 319 s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device); 320 321 if (s->device < 0) { 322 error_setg_errno(errp, errno, "Failed to get device fd"); 323 ret = -errno; 324 goto fail; 325 } 326 327 /* Test and setup the device */ 328 if (ioctl(s->device, VFIO_DEVICE_GET_INFO, &device_info)) { 329 error_setg_errno(errp, errno, "Failed to get device info"); 330 ret = -errno; 331 goto fail; 332 } 333 334 if (device_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX) { 335 error_setg(errp, "Invalid device regions"); 336 ret = -EINVAL; 337 goto fail; 338 } 339 340 s->config_region_info = (struct vfio_region_info) { 341 .index = VFIO_PCI_CONFIG_REGION_INDEX, 342 .argsz = sizeof(struct vfio_region_info), 343 }; 344 if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->config_region_info)) { 345 error_setg_errno(errp, errno, "Failed to get config region info"); 346 ret = -errno; 347 goto fail; 348 } 349 350 for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) { 351 ret = qemu_vfio_pci_init_bar(s, i, errp); 352 if (ret) { 353 goto fail; 354 } 355 } 356 357 /* Enable bus master */ 358 ret = qemu_vfio_pci_read_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND); 359 if (ret) { 360 goto fail; 361 } 362 pci_cmd |= PCI_COMMAND_MASTER; 363 ret = qemu_vfio_pci_write_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND); 364 if (ret) { 365 goto fail; 366 } 367 return 0; 368 fail: 369 close(s->group); 370 fail_container: 371 close(s->container); 372 return ret; 373 } 374 375 static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, 376 void *host, size_t size) 377 { 378 QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); 379 trace_qemu_vfio_ram_block_added(s, host, size); 380 qemu_vfio_dma_map(s, host, size, false, NULL); 381 } 382 383 static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, 384 void *host, size_t size) 385 { 386 QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); 387 if (host) { 388 trace_qemu_vfio_ram_block_removed(s, host, size); 389 qemu_vfio_dma_unmap(s, host); 390 } 391 } 392 393 static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque) 394 { 395 void *host_addr = qemu_ram_get_host_addr(rb); 396 ram_addr_t length = qemu_ram_get_used_length(rb); 397 int ret; 398 QEMUVFIOState *s = opaque; 399 400 if (!host_addr) { 401 return 0; 402 } 403 ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL); 404 if (ret) { 405 fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n", 406 host_addr, (uint64_t)length); 407 } 408 return 0; 409 } 410 411 static void qemu_vfio_open_common(QEMUVFIOState *s) 412 { 413 qemu_mutex_init(&s->lock); 414 s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added; 415 s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed; 416 ram_block_notifier_add(&s->ram_notifier); 417 s->low_water_mark = QEMU_VFIO_IOVA_MIN; 418 s->high_water_mark = QEMU_VFIO_IOVA_MAX; 419 qemu_ram_foreach_block(qemu_vfio_init_ramblock, s); 420 } 421 422 /** 423 * Open a PCI device, e.g. "0000:00:01.0". 424 */ 425 QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp) 426 { 427 int r; 428 QEMUVFIOState *s = g_new0(QEMUVFIOState, 1); 429 430 r = qemu_vfio_init_pci(s, device, errp); 431 if (r) { 432 g_free(s); 433 return NULL; 434 } 435 qemu_vfio_open_common(s); 436 return s; 437 } 438 439 static void qemu_vfio_dump_mapping(IOVAMapping *m) 440 { 441 if (QEMU_VFIO_DEBUG) { 442 printf(" vfio mapping %p %" PRIx64 " to %" PRIx64 "\n", m->host, 443 (uint64_t)m->size, (uint64_t)m->iova); 444 } 445 } 446 447 static void qemu_vfio_dump_mappings(QEMUVFIOState *s) 448 { 449 int i; 450 451 if (QEMU_VFIO_DEBUG) { 452 printf("vfio mappings\n"); 453 for (i = 0; i < s->nr_mappings; ++i) { 454 qemu_vfio_dump_mapping(&s->mappings[i]); 455 } 456 } 457 } 458 459 /** 460 * Find the mapping entry that contains [host, host + size) and set @index to 461 * the position. If no entry contains it, @index is the position _after_ which 462 * to insert the new mapping. IOW, it is the index of the largest element that 463 * is smaller than @host, or -1 if no entry is. 464 */ 465 static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host, 466 int *index) 467 { 468 IOVAMapping *p = s->mappings; 469 IOVAMapping *q = p ? p + s->nr_mappings - 1 : NULL; 470 IOVAMapping *mid; 471 trace_qemu_vfio_find_mapping(s, host); 472 if (!p) { 473 *index = -1; 474 return NULL; 475 } 476 while (true) { 477 mid = p + (q - p) / 2; 478 if (mid == p) { 479 break; 480 } 481 if (mid->host > host) { 482 q = mid; 483 } else if (mid->host < host) { 484 p = mid; 485 } else { 486 break; 487 } 488 } 489 if (mid->host > host) { 490 mid--; 491 } else if (mid < &s->mappings[s->nr_mappings - 1] 492 && (mid + 1)->host <= host) { 493 mid++; 494 } 495 *index = mid - &s->mappings[0]; 496 if (mid >= &s->mappings[0] && 497 mid->host <= host && mid->host + mid->size > host) { 498 assert(mid < &s->mappings[s->nr_mappings]); 499 return mid; 500 } 501 /* At this point *index + 1 is the right position to insert the new 502 * mapping.*/ 503 return NULL; 504 } 505 506 /** 507 * Allocate IOVA and and create a new mapping record and insert it in @s. 508 */ 509 static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s, 510 void *host, size_t size, 511 int index, uint64_t iova) 512 { 513 int shift; 514 IOVAMapping m = {.host = host, .size = size, .iova = iova}; 515 IOVAMapping *insert; 516 517 assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size)); 518 assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size)); 519 assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size)); 520 trace_qemu_vfio_new_mapping(s, host, size, index, iova); 521 522 assert(index >= 0); 523 s->nr_mappings++; 524 s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings); 525 insert = &s->mappings[index]; 526 shift = s->nr_mappings - index - 1; 527 if (shift) { 528 memmove(insert + 1, insert, shift * sizeof(s->mappings[0])); 529 } 530 *insert = m; 531 return insert; 532 } 533 534 /* Do the DMA mapping with VFIO. */ 535 static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size, 536 uint64_t iova) 537 { 538 struct vfio_iommu_type1_dma_map dma_map = { 539 .argsz = sizeof(dma_map), 540 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 541 .iova = iova, 542 .vaddr = (uintptr_t)host, 543 .size = size, 544 }; 545 trace_qemu_vfio_do_mapping(s, host, size, iova); 546 547 if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) { 548 error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); 549 return -errno; 550 } 551 return 0; 552 } 553 554 /** 555 * Undo the DMA mapping from @s with VFIO, and remove from mapping list. 556 */ 557 static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping, 558 Error **errp) 559 { 560 int index; 561 struct vfio_iommu_type1_dma_unmap unmap = { 562 .argsz = sizeof(unmap), 563 .flags = 0, 564 .iova = mapping->iova, 565 .size = mapping->size, 566 }; 567 568 index = mapping - s->mappings; 569 assert(mapping->size > 0); 570 assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size)); 571 assert(index >= 0 && index < s->nr_mappings); 572 if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) { 573 error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed"); 574 } 575 memmove(mapping, &s->mappings[index + 1], 576 sizeof(s->mappings[0]) * (s->nr_mappings - index - 1)); 577 s->nr_mappings--; 578 s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings); 579 } 580 581 /* Check if the mapping list is (ascending) ordered. */ 582 static bool qemu_vfio_verify_mappings(QEMUVFIOState *s) 583 { 584 int i; 585 if (QEMU_VFIO_DEBUG) { 586 for (i = 0; i < s->nr_mappings - 1; ++i) { 587 if (!(s->mappings[i].host < s->mappings[i + 1].host)) { 588 fprintf(stderr, "item %d not sorted!\n", i); 589 qemu_vfio_dump_mappings(s); 590 return false; 591 } 592 if (!(s->mappings[i].host + s->mappings[i].size <= 593 s->mappings[i + 1].host)) { 594 fprintf(stderr, "item %d overlap with next!\n", i); 595 qemu_vfio_dump_mappings(s); 596 return false; 597 } 598 } 599 } 600 return true; 601 } 602 603 /* Map [host, host + size) area into a contiguous IOVA address space, and store 604 * the result in @iova if not NULL. The caller need to make sure the area is 605 * aligned to page size, and mustn't overlap with existing mapping areas (split 606 * mapping status within this area is not allowed). 607 */ 608 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, 609 bool temporary, uint64_t *iova) 610 { 611 int ret = 0; 612 int index; 613 IOVAMapping *mapping; 614 uint64_t iova0; 615 616 assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size)); 617 assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size)); 618 trace_qemu_vfio_dma_map(s, host, size, temporary, iova); 619 qemu_mutex_lock(&s->lock); 620 mapping = qemu_vfio_find_mapping(s, host, &index); 621 if (mapping) { 622 iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host); 623 } else { 624 if (s->high_water_mark - s->low_water_mark + 1 < size) { 625 ret = -ENOMEM; 626 goto out; 627 } 628 if (!temporary) { 629 iova0 = s->low_water_mark; 630 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0); 631 if (!mapping) { 632 ret = -ENOMEM; 633 goto out; 634 } 635 assert(qemu_vfio_verify_mappings(s)); 636 ret = qemu_vfio_do_mapping(s, host, size, iova0); 637 if (ret) { 638 qemu_vfio_undo_mapping(s, mapping, NULL); 639 goto out; 640 } 641 s->low_water_mark += size; 642 qemu_vfio_dump_mappings(s); 643 } else { 644 iova0 = s->high_water_mark - size; 645 ret = qemu_vfio_do_mapping(s, host, size, iova0); 646 if (ret) { 647 goto out; 648 } 649 s->high_water_mark -= size; 650 } 651 } 652 if (iova) { 653 *iova = iova0; 654 } 655 out: 656 qemu_mutex_unlock(&s->lock); 657 return ret; 658 } 659 660 /* Reset the high watermark and free all "temporary" mappings. */ 661 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s) 662 { 663 struct vfio_iommu_type1_dma_unmap unmap = { 664 .argsz = sizeof(unmap), 665 .flags = 0, 666 .iova = s->high_water_mark, 667 .size = QEMU_VFIO_IOVA_MAX - s->high_water_mark, 668 }; 669 trace_qemu_vfio_dma_reset_temporary(s); 670 qemu_mutex_lock(&s->lock); 671 if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) { 672 error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); 673 qemu_mutex_unlock(&s->lock); 674 return -errno; 675 } 676 s->high_water_mark = QEMU_VFIO_IOVA_MAX; 677 qemu_mutex_unlock(&s->lock); 678 return 0; 679 } 680 681 /* Unmapping the whole area that was previously mapped with 682 * qemu_vfio_dma_map(). */ 683 void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host) 684 { 685 int index = 0; 686 IOVAMapping *m; 687 688 if (!host) { 689 return; 690 } 691 692 trace_qemu_vfio_dma_unmap(s, host); 693 qemu_mutex_lock(&s->lock); 694 m = qemu_vfio_find_mapping(s, host, &index); 695 if (!m) { 696 goto out; 697 } 698 qemu_vfio_undo_mapping(s, m, NULL); 699 out: 700 qemu_mutex_unlock(&s->lock); 701 } 702 703 static void qemu_vfio_reset(QEMUVFIOState *s) 704 { 705 ioctl(s->device, VFIO_DEVICE_RESET); 706 } 707 708 /* Close and free the VFIO resources. */ 709 void qemu_vfio_close(QEMUVFIOState *s) 710 { 711 int i; 712 713 if (!s) { 714 return; 715 } 716 for (i = 0; i < s->nr_mappings; ++i) { 717 qemu_vfio_undo_mapping(s, &s->mappings[i], NULL); 718 } 719 ram_block_notifier_remove(&s->ram_notifier); 720 qemu_vfio_reset(s); 721 close(s->device); 722 close(s->group); 723 close(s->container); 724 } 725