1 /* 2 * virtio-iommu device 3 * 4 * Copyright (c) 2020 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "qemu/iov.h" 23 #include "qemu/range.h" 24 #include "qemu/reserved-region.h" 25 #include "exec/target_page.h" 26 #include "hw/qdev-properties.h" 27 #include "hw/virtio/virtio.h" 28 #include "sysemu/kvm.h" 29 #include "sysemu/reset.h" 30 #include "sysemu/sysemu.h" 31 #include "qemu/reserved-region.h" 32 #include "qemu/units.h" 33 #include "qapi/error.h" 34 #include "qemu/error-report.h" 35 #include "trace.h" 36 37 #include "standard-headers/linux/virtio_ids.h" 38 39 #include "hw/virtio/virtio-bus.h" 40 #include "hw/virtio/virtio-iommu.h" 41 #include "hw/pci/pci_bus.h" 42 #include "hw/pci/pci.h" 43 44 /* Max size */ 45 #define VIOMMU_DEFAULT_QUEUE_SIZE 256 46 #define VIOMMU_PROBE_SIZE 512 47 48 typedef struct VirtIOIOMMUDomain { 49 uint32_t id; 50 bool bypass; 51 GTree *mappings; 52 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; 53 } VirtIOIOMMUDomain; 54 55 typedef struct VirtIOIOMMUEndpoint { 56 uint32_t id; 57 VirtIOIOMMUDomain *domain; 58 IOMMUMemoryRegion *iommu_mr; 59 QLIST_ENTRY(VirtIOIOMMUEndpoint) next; 60 } VirtIOIOMMUEndpoint; 61 62 typedef struct VirtIOIOMMUInterval { 63 uint64_t low; 64 uint64_t high; 65 } VirtIOIOMMUInterval; 66 67 typedef struct VirtIOIOMMUMapping { 68 uint64_t phys_addr; 69 uint32_t flags; 70 } VirtIOIOMMUMapping; 71 72 struct hiod_key { 73 PCIBus *bus; 74 uint8_t devfn; 75 }; 76 77 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) 78 { 79 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); 80 } 81 82 static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) 83 { 84 uint32_t sid; 85 bool bypassed; 86 VirtIOIOMMU *s = sdev->viommu; 87 VirtIOIOMMUEndpoint *ep; 88 89 sid = virtio_iommu_get_bdf(sdev); 90 91 qemu_rec_mutex_lock(&s->mutex); 92 /* need to check bypass before system reset */ 93 if (!s->endpoints) { 94 bypassed = s->config.bypass; 95 goto unlock; 96 } 97 98 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 99 if (!ep || !ep->domain) { 100 bypassed = s->config.bypass; 101 } else { 102 bypassed = ep->domain->bypass; 103 } 104 105 unlock: 106 qemu_rec_mutex_unlock(&s->mutex); 107 return bypassed; 108 } 109 110 /* Return whether the device is using IOMMU translation. */ 111 static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) 112 { 113 bool use_remapping; 114 115 assert(sdev); 116 117 use_remapping = !virtio_iommu_device_bypassed(sdev); 118 119 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), 120 PCI_SLOT(sdev->devfn), 121 PCI_FUNC(sdev->devfn), 122 use_remapping); 123 124 /* Turn off first then on the other */ 125 if (use_remapping) { 126 memory_region_set_enabled(&sdev->bypass_mr, false); 127 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); 128 } else { 129 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); 130 memory_region_set_enabled(&sdev->bypass_mr, true); 131 } 132 133 return use_remapping; 134 } 135 136 static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) 137 { 138 GHashTableIter iter; 139 IOMMUPciBus *iommu_pci_bus; 140 int i; 141 142 g_hash_table_iter_init(&iter, s->as_by_busptr); 143 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 144 for (i = 0; i < PCI_DEVFN_MAX; i++) { 145 if (!iommu_pci_bus->pbdev[i]) { 146 continue; 147 } 148 virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); 149 } 150 } 151 } 152 153 /** 154 * The bus number is used for lookup when SID based operations occur. 155 * In that case we lazily populate the IOMMUPciBus array from the bus hash 156 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus 157 * numbers may not be always initialized yet. 158 */ 159 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) 160 { 161 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; 162 163 if (!iommu_pci_bus) { 164 GHashTableIter iter; 165 166 g_hash_table_iter_init(&iter, s->as_by_busptr); 167 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 168 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { 169 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; 170 return iommu_pci_bus; 171 } 172 } 173 return NULL; 174 } 175 return iommu_pci_bus; 176 } 177 178 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) 179 { 180 uint8_t bus_n, devfn; 181 IOMMUPciBus *iommu_pci_bus; 182 IOMMUDevice *dev; 183 184 bus_n = PCI_BUS_NUM(sid); 185 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); 186 if (iommu_pci_bus) { 187 devfn = sid & (PCI_DEVFN_MAX - 1); 188 dev = iommu_pci_bus->pbdev[devfn]; 189 if (dev) { 190 return &dev->iommu_mr; 191 } 192 } 193 return NULL; 194 } 195 196 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 197 { 198 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; 199 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; 200 201 if (inta->high < intb->low) { 202 return -1; 203 } else if (intb->high < inta->low) { 204 return 1; 205 } else { 206 return 0; 207 } 208 } 209 210 static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr, 211 IOMMUTLBEvent *event, 212 hwaddr virt_start, hwaddr virt_end) 213 { 214 uint64_t delta = virt_end - virt_start; 215 216 event->entry.iova = virt_start; 217 event->entry.addr_mask = delta; 218 219 if (delta == UINT64_MAX) { 220 memory_region_notify_iommu(mr, 0, *event); 221 } 222 223 while (virt_start != virt_end + 1) { 224 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); 225 226 event->entry.addr_mask = mask; 227 event->entry.iova = virt_start; 228 memory_region_notify_iommu(mr, 0, *event); 229 virt_start += mask + 1; 230 if (event->entry.perm != IOMMU_NONE) { 231 event->entry.translated_addr += mask + 1; 232 } 233 } 234 } 235 236 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, 237 hwaddr virt_end, hwaddr paddr, 238 uint32_t flags) 239 { 240 IOMMUTLBEvent event; 241 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, 242 flags & VIRTIO_IOMMU_MAP_F_WRITE); 243 244 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) || 245 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) { 246 return; 247 } 248 249 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, 250 paddr, perm); 251 252 event.type = IOMMU_NOTIFIER_MAP; 253 event.entry.target_as = &address_space_memory; 254 event.entry.perm = perm; 255 event.entry.translated_addr = paddr; 256 257 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 258 } 259 260 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, 261 hwaddr virt_end) 262 { 263 IOMMUTLBEvent event; 264 265 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { 266 return; 267 } 268 269 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); 270 271 event.type = IOMMU_NOTIFIER_UNMAP; 272 event.entry.target_as = &address_space_memory; 273 event.entry.perm = IOMMU_NONE; 274 event.entry.translated_addr = 0; 275 276 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 277 } 278 279 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, 280 gpointer data) 281 { 282 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 283 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 284 285 virtio_iommu_notify_unmap(mr, interval->low, interval->high); 286 287 return false; 288 } 289 290 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, 291 gpointer data) 292 { 293 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 294 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 295 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 296 297 virtio_iommu_notify_map(mr, interval->low, interval->high, 298 mapping->phys_addr, mapping->flags); 299 300 return false; 301 } 302 303 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) 304 { 305 VirtIOIOMMUDomain *domain = ep->domain; 306 IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 307 308 if (!ep->domain) { 309 return; 310 } 311 trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id); 312 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, 313 ep->iommu_mr); 314 QLIST_REMOVE(ep, next); 315 ep->domain = NULL; 316 virtio_iommu_switch_address_space(sdev); 317 } 318 319 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, 320 uint32_t ep_id) 321 { 322 VirtIOIOMMUEndpoint *ep; 323 IOMMUMemoryRegion *mr; 324 325 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 326 if (ep) { 327 return ep; 328 } 329 mr = virtio_iommu_mr(s, ep_id); 330 if (!mr) { 331 return NULL; 332 } 333 ep = g_malloc0(sizeof(*ep)); 334 ep->id = ep_id; 335 ep->iommu_mr = mr; 336 trace_virtio_iommu_get_endpoint(ep_id); 337 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); 338 return ep; 339 } 340 341 static void virtio_iommu_put_endpoint(gpointer data) 342 { 343 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; 344 345 if (ep->domain) { 346 virtio_iommu_detach_endpoint_from_domain(ep); 347 } 348 349 trace_virtio_iommu_put_endpoint(ep->id); 350 g_free(ep); 351 } 352 353 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, 354 uint32_t domain_id, 355 bool bypass) 356 { 357 VirtIOIOMMUDomain *domain; 358 359 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 360 if (domain) { 361 if (domain->bypass != bypass) { 362 return NULL; 363 } 364 return domain; 365 } 366 domain = g_malloc0(sizeof(*domain)); 367 domain->id = domain_id; 368 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 369 NULL, (GDestroyNotify)g_free, 370 (GDestroyNotify)g_free); 371 domain->bypass = bypass; 372 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); 373 QLIST_INIT(&domain->endpoint_list); 374 trace_virtio_iommu_get_domain(domain_id); 375 return domain; 376 } 377 378 static void virtio_iommu_put_domain(gpointer data) 379 { 380 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; 381 VirtIOIOMMUEndpoint *iter, *tmp; 382 383 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { 384 virtio_iommu_detach_endpoint_from_domain(iter); 385 } 386 g_tree_destroy(domain->mappings); 387 trace_virtio_iommu_put_domain(domain->id); 388 g_free(domain); 389 } 390 391 static void add_prop_resv_regions(IOMMUDevice *sdev) 392 { 393 VirtIOIOMMU *s = sdev->viommu; 394 int i; 395 396 for (i = 0; i < s->nr_prop_resv_regions; i++) { 397 ReservedRegion *reg = g_new0(ReservedRegion, 1); 398 399 *reg = s->prop_resv_regions[i]; 400 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); 401 } 402 } 403 404 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, 405 int devfn) 406 { 407 VirtIOIOMMU *s = opaque; 408 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 409 static uint32_t mr_index; 410 IOMMUDevice *sdev; 411 412 if (!sbus) { 413 sbus = g_malloc0(sizeof(IOMMUPciBus) + 414 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); 415 sbus->bus = bus; 416 g_hash_table_insert(s->as_by_busptr, bus, sbus); 417 } 418 419 sdev = sbus->pbdev[devfn]; 420 if (!sdev) { 421 char *name = g_strdup_printf("%s-%d-%d", 422 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 423 mr_index++, devfn); 424 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1); 425 426 sdev->viommu = s; 427 sdev->bus = bus; 428 sdev->devfn = devfn; 429 430 trace_virtio_iommu_init_iommu_mr(name); 431 432 memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); 433 address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); 434 add_prop_resv_regions(sdev); 435 436 /* 437 * Build the IOMMU disabled container with aliases to the 438 * shared MRs. Note that aliasing to a shared memory region 439 * could help the memory API to detect same FlatViews so we 440 * can have devices to share the same FlatView when in bypass 441 * mode. (either by not configuring virtio-iommu driver or with 442 * "iommu=pt"). It will greatly reduce the total number of 443 * FlatViews of the system hence VM runs faster. 444 */ 445 memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), 446 "system", get_system_memory(), 0, 447 memory_region_size(get_system_memory())); 448 449 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), 450 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 451 OBJECT(s), name, 452 UINT64_MAX); 453 454 /* 455 * Hook both the containers under the root container, we 456 * switch between iommu & bypass MRs by enable/disable 457 * corresponding sub-containers 458 */ 459 memory_region_add_subregion_overlap(&sdev->root, 0, 460 MEMORY_REGION(&sdev->iommu_mr), 461 0); 462 memory_region_add_subregion_overlap(&sdev->root, 0, 463 &sdev->bypass_mr, 0); 464 465 virtio_iommu_switch_address_space(sdev); 466 g_free(name); 467 } 468 return &sdev->as; 469 } 470 471 static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) 472 { 473 const struct hiod_key *key1 = v1; 474 const struct hiod_key *key2 = v2; 475 476 return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); 477 } 478 479 static guint hiod_hash(gconstpointer v) 480 { 481 const struct hiod_key *key = v; 482 guint value = (guint)(uintptr_t)key->bus; 483 484 return (guint)(value << 8 | key->devfn); 485 } 486 487 static void hiod_destroy(gpointer v) 488 { 489 object_unref(v); 490 } 491 492 static HostIOMMUDevice * 493 get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { 494 struct hiod_key key = { 495 .bus = bus, 496 .devfn = devfn, 497 }; 498 499 return g_hash_table_lookup(viommu->host_iommu_devices, &key); 500 } 501 502 /** 503 * rebuild_resv_regions: rebuild resv regions with both the 504 * info of host resv ranges and property set resv ranges 505 */ 506 static int rebuild_resv_regions(IOMMUDevice *sdev) 507 { 508 GList *l; 509 int i = 0; 510 511 /* free the existing list and rebuild it from scratch */ 512 g_list_free_full(sdev->resv_regions, g_free); 513 sdev->resv_regions = NULL; 514 515 /* First add host reserved regions if any, all tagged as RESERVED */ 516 for (l = sdev->host_resv_ranges; l; l = l->next) { 517 ReservedRegion *reg = g_new0(ReservedRegion, 1); 518 Range *r = (Range *)l->data; 519 520 reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; 521 range_set_bounds(®->range, range_lob(r), range_upb(r)); 522 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); 523 trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, 524 range_lob(®->range), 525 range_upb(®->range)); 526 i++; 527 } 528 /* 529 * then add higher priority reserved regions set by the machine 530 * through properties 531 */ 532 add_prop_resv_regions(sdev); 533 return 0; 534 } 535 536 static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, 537 int devfn, GList *iova_ranges, 538 Error **errp) 539 { 540 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 541 IOMMUDevice *sdev; 542 int ret = -EINVAL; 543 544 if (!sbus) { 545 error_setg(errp, "%s: no IOMMUPciBus found!", __func__); 546 return ret; 547 } 548 549 sdev = sbus->pbdev[devfn]; 550 if (!sdev) { 551 error_setg(errp, "%s: no IOMMUDevice found!", __func__); 552 return ret; 553 } 554 555 if (sdev->host_resv_ranges) { 556 error_setg(errp, "%s virtio-iommu does not support aliased BDF", 557 __func__); 558 return ret; 559 } 560 561 range_inverse_array(iova_ranges, 562 &sdev->host_resv_ranges, 563 0, UINT64_MAX); 564 rebuild_resv_regions(sdev); 565 566 return 0; 567 } 568 569 static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, 570 int devfn) 571 { 572 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 573 IOMMUDevice *sdev; 574 575 if (!sbus) { 576 return; 577 } 578 579 sdev = sbus->pbdev[devfn]; 580 if (!sdev) { 581 return; 582 } 583 584 g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free); 585 g_list_free_full(sdev->resv_regions, g_free); 586 sdev->host_resv_ranges = NULL; 587 sdev->resv_regions = NULL; 588 add_prop_resv_regions(sdev); 589 } 590 591 592 static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask, 593 Error **errp) 594 { 595 uint64_t cur_mask = viommu->config.page_size_mask; 596 597 if ((cur_mask & new_mask) == 0) { 598 error_setg(errp, "virtio-iommu reports a page size mask 0x%"PRIx64 599 " incompatible with currently supported mask 0x%"PRIx64, 600 new_mask, cur_mask); 601 return false; 602 } 603 /* 604 * Once the granule is frozen we can't change the mask anymore. If by 605 * chance the hotplugged device supports the same granule, we can still 606 * accept it. 607 */ 608 if (viommu->granule_frozen) { 609 int cur_granule = ctz64(cur_mask); 610 611 if (!(BIT_ULL(cur_granule) & new_mask)) { 612 error_setg(errp, 613 "virtio-iommu does not support frozen granule 0x%llx", 614 BIT_ULL(cur_granule)); 615 return false; 616 } 617 } 618 return true; 619 } 620 621 static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, 622 HostIOMMUDevice *hiod, Error **errp) 623 { 624 ERRP_GUARD(); 625 VirtIOIOMMU *viommu = opaque; 626 HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); 627 struct hiod_key *new_key; 628 GList *host_iova_ranges = NULL; 629 630 assert(hiod); 631 632 if (get_host_iommu_device(viommu, bus, devfn)) { 633 error_setg(errp, "Host IOMMU device already exists"); 634 return false; 635 } 636 637 if (hiodc->get_iova_ranges) { 638 int ret; 639 host_iova_ranges = hiodc->get_iova_ranges(hiod); 640 if (!host_iova_ranges) { 641 return true; /* some old kernels may not support that capability */ 642 } 643 ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus, 644 hiod->aliased_devfn, 645 host_iova_ranges, errp); 646 if (ret) { 647 goto error; 648 } 649 } 650 if (hiodc->get_page_size_mask) { 651 uint64_t new_mask = hiodc->get_page_size_mask(hiod); 652 653 if (check_page_size_mask(viommu, new_mask, errp)) { 654 /* 655 * The default mask depends on the "granule" property. For example, 656 * with 4k granule, it is -(4 * KiB). When an assigned device has 657 * page size restrictions due to the hardware IOMMU configuration, 658 * apply this restriction to the mask. 659 */ 660 trace_virtio_iommu_update_page_size_mask(hiod->name, 661 viommu->config.page_size_mask, 662 new_mask); 663 if (!viommu->granule_frozen) { 664 viommu->config.page_size_mask &= new_mask; 665 } 666 } else { 667 error_prepend(errp, "%s: ", hiod->name); 668 goto error; 669 } 670 } 671 672 new_key = g_malloc(sizeof(*new_key)); 673 new_key->bus = bus; 674 new_key->devfn = devfn; 675 676 object_ref(hiod); 677 g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); 678 g_list_free_full(host_iova_ranges, g_free); 679 680 return true; 681 error: 682 g_list_free_full(host_iova_ranges, g_free); 683 return false; 684 } 685 686 static void 687 virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) 688 { 689 VirtIOIOMMU *viommu = opaque; 690 HostIOMMUDevice *hiod; 691 struct hiod_key key = { 692 .bus = bus, 693 .devfn = devfn, 694 }; 695 696 hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key); 697 if (!hiod) { 698 return; 699 } 700 virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus, 701 hiod->aliased_devfn); 702 703 g_hash_table_remove(viommu->host_iommu_devices, &key); 704 } 705 706 static const PCIIOMMUOps virtio_iommu_ops = { 707 .get_address_space = virtio_iommu_find_add_as, 708 .set_iommu_device = virtio_iommu_set_iommu_device, 709 .unset_iommu_device = virtio_iommu_unset_iommu_device, 710 }; 711 712 static int virtio_iommu_attach(VirtIOIOMMU *s, 713 struct virtio_iommu_req_attach *req) 714 { 715 uint32_t domain_id = le32_to_cpu(req->domain); 716 uint32_t ep_id = le32_to_cpu(req->endpoint); 717 uint32_t flags = le32_to_cpu(req->flags); 718 VirtIOIOMMUDomain *domain; 719 VirtIOIOMMUEndpoint *ep; 720 IOMMUDevice *sdev; 721 722 trace_virtio_iommu_attach(domain_id, ep_id); 723 724 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) { 725 return VIRTIO_IOMMU_S_INVAL; 726 } 727 728 ep = virtio_iommu_get_endpoint(s, ep_id); 729 if (!ep) { 730 return VIRTIO_IOMMU_S_NOENT; 731 } 732 733 if (ep->domain) { 734 VirtIOIOMMUDomain *previous_domain = ep->domain; 735 /* 736 * the device is already attached to a domain, 737 * detach it first 738 */ 739 virtio_iommu_detach_endpoint_from_domain(ep); 740 if (QLIST_EMPTY(&previous_domain->endpoint_list)) { 741 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); 742 } 743 } 744 745 domain = virtio_iommu_get_domain(s, domain_id, 746 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS); 747 if (!domain) { 748 /* Incompatible bypass flag */ 749 return VIRTIO_IOMMU_S_INVAL; 750 } 751 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); 752 753 ep->domain = domain; 754 sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 755 virtio_iommu_switch_address_space(sdev); 756 757 /* Replay domain mappings on the associated memory region */ 758 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, 759 ep->iommu_mr); 760 761 return VIRTIO_IOMMU_S_OK; 762 } 763 764 static int virtio_iommu_detach(VirtIOIOMMU *s, 765 struct virtio_iommu_req_detach *req) 766 { 767 uint32_t domain_id = le32_to_cpu(req->domain); 768 uint32_t ep_id = le32_to_cpu(req->endpoint); 769 VirtIOIOMMUDomain *domain; 770 VirtIOIOMMUEndpoint *ep; 771 772 trace_virtio_iommu_detach(domain_id, ep_id); 773 774 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 775 if (!ep) { 776 return VIRTIO_IOMMU_S_NOENT; 777 } 778 779 domain = ep->domain; 780 781 if (!domain || domain->id != domain_id) { 782 return VIRTIO_IOMMU_S_INVAL; 783 } 784 785 virtio_iommu_detach_endpoint_from_domain(ep); 786 787 if (QLIST_EMPTY(&domain->endpoint_list)) { 788 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); 789 } 790 g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id)); 791 return VIRTIO_IOMMU_S_OK; 792 } 793 794 static int virtio_iommu_map(VirtIOIOMMU *s, 795 struct virtio_iommu_req_map *req) 796 { 797 uint32_t domain_id = le32_to_cpu(req->domain); 798 uint64_t phys_start = le64_to_cpu(req->phys_start); 799 uint64_t virt_start = le64_to_cpu(req->virt_start); 800 uint64_t virt_end = le64_to_cpu(req->virt_end); 801 uint32_t flags = le32_to_cpu(req->flags); 802 VirtIOIOMMUDomain *domain; 803 VirtIOIOMMUInterval *interval; 804 VirtIOIOMMUMapping *mapping; 805 VirtIOIOMMUEndpoint *ep; 806 807 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { 808 return VIRTIO_IOMMU_S_INVAL; 809 } 810 811 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 812 if (!domain) { 813 return VIRTIO_IOMMU_S_NOENT; 814 } 815 816 if (domain->bypass) { 817 return VIRTIO_IOMMU_S_INVAL; 818 } 819 820 interval = g_malloc0(sizeof(*interval)); 821 822 interval->low = virt_start; 823 interval->high = virt_end; 824 825 mapping = g_tree_lookup(domain->mappings, (gpointer)interval); 826 if (mapping) { 827 g_free(interval); 828 return VIRTIO_IOMMU_S_INVAL; 829 } 830 831 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); 832 833 mapping = g_malloc0(sizeof(*mapping)); 834 mapping->phys_addr = phys_start; 835 mapping->flags = flags; 836 837 g_tree_insert(domain->mappings, interval, mapping); 838 839 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 840 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start, 841 flags); 842 } 843 844 return VIRTIO_IOMMU_S_OK; 845 } 846 847 static int virtio_iommu_unmap(VirtIOIOMMU *s, 848 struct virtio_iommu_req_unmap *req) 849 { 850 uint32_t domain_id = le32_to_cpu(req->domain); 851 uint64_t virt_start = le64_to_cpu(req->virt_start); 852 uint64_t virt_end = le64_to_cpu(req->virt_end); 853 VirtIOIOMMUMapping *iter_val; 854 VirtIOIOMMUInterval interval, *iter_key; 855 VirtIOIOMMUDomain *domain; 856 VirtIOIOMMUEndpoint *ep; 857 int ret = VIRTIO_IOMMU_S_OK; 858 859 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); 860 861 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 862 if (!domain) { 863 return VIRTIO_IOMMU_S_NOENT; 864 } 865 866 if (domain->bypass) { 867 return VIRTIO_IOMMU_S_INVAL; 868 } 869 870 interval.low = virt_start; 871 interval.high = virt_end; 872 873 while (g_tree_lookup_extended(domain->mappings, &interval, 874 (void **)&iter_key, (void**)&iter_val)) { 875 uint64_t current_low = iter_key->low; 876 uint64_t current_high = iter_key->high; 877 878 if (interval.low <= current_low && interval.high >= current_high) { 879 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 880 virtio_iommu_notify_unmap(ep->iommu_mr, current_low, 881 current_high); 882 } 883 g_tree_remove(domain->mappings, iter_key); 884 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); 885 } else { 886 ret = VIRTIO_IOMMU_S_RANGE; 887 break; 888 } 889 } 890 return ret; 891 } 892 893 static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep, 894 uint8_t *buf, size_t free) 895 { 896 struct virtio_iommu_probe_resv_mem prop = {}; 897 size_t size = sizeof(prop), length = size - sizeof(prop.head), total; 898 GList *l; 899 900 total = size * g_list_length(sdev->resv_regions); 901 if (total > free) { 902 return -ENOSPC; 903 } 904 905 for (l = sdev->resv_regions; l; l = l->next) { 906 ReservedRegion *reg = l->data; 907 unsigned subtype = reg->type; 908 Range *range = ®->range; 909 910 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || 911 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); 912 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); 913 prop.head.length = cpu_to_le16(length); 914 prop.subtype = subtype; 915 prop.start = cpu_to_le64(range_lob(range)); 916 prop.end = cpu_to_le64(range_upb(range)); 917 918 memcpy(buf, &prop, size); 919 920 trace_virtio_iommu_fill_resv_property(ep, prop.subtype, 921 prop.start, prop.end); 922 buf += size; 923 } 924 return total; 925 } 926 927 /** 928 * virtio_iommu_probe - Fill the probe request buffer with 929 * the properties the device is able to return 930 */ 931 static int virtio_iommu_probe(VirtIOIOMMU *s, 932 struct virtio_iommu_req_probe *req, 933 uint8_t *buf) 934 { 935 uint32_t ep_id = le32_to_cpu(req->endpoint); 936 IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id); 937 size_t free = VIOMMU_PROBE_SIZE; 938 IOMMUDevice *sdev; 939 ssize_t count; 940 941 if (!iommu_mr) { 942 return VIRTIO_IOMMU_S_NOENT; 943 } 944 945 sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr); 946 947 count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free); 948 if (count < 0) { 949 return VIRTIO_IOMMU_S_INVAL; 950 } 951 buf += count; 952 free -= count; 953 954 return VIRTIO_IOMMU_S_OK; 955 } 956 957 static int virtio_iommu_iov_to_req(struct iovec *iov, 958 unsigned int iov_cnt, 959 void *req, size_t payload_sz) 960 { 961 size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); 962 963 if (unlikely(sz != payload_sz)) { 964 return VIRTIO_IOMMU_S_INVAL; 965 } 966 return 0; 967 } 968 969 #define virtio_iommu_handle_req(__req) \ 970 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ 971 struct iovec *iov, \ 972 unsigned int iov_cnt) \ 973 { \ 974 struct virtio_iommu_req_ ## __req req; \ 975 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ 976 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ 977 \ 978 return ret ? ret : virtio_iommu_ ## __req(s, &req); \ 979 } 980 981 virtio_iommu_handle_req(attach) 982 virtio_iommu_handle_req(detach) 983 virtio_iommu_handle_req(map) 984 virtio_iommu_handle_req(unmap) 985 986 static int virtio_iommu_handle_probe(VirtIOIOMMU *s, 987 struct iovec *iov, 988 unsigned int iov_cnt, 989 uint8_t *buf) 990 { 991 struct virtio_iommu_req_probe req; 992 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); 993 994 return ret ? ret : virtio_iommu_probe(s, &req, buf); 995 } 996 997 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) 998 { 999 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1000 struct virtio_iommu_req_head head; 1001 struct virtio_iommu_req_tail tail = {}; 1002 VirtQueueElement *elem; 1003 unsigned int iov_cnt; 1004 struct iovec *iov; 1005 void *buf = NULL; 1006 size_t sz; 1007 1008 for (;;) { 1009 size_t output_size = sizeof(tail); 1010 1011 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1012 if (!elem) { 1013 return; 1014 } 1015 1016 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || 1017 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { 1018 virtio_error(vdev, "virtio-iommu bad head/tail size"); 1019 virtqueue_detach_element(vq, elem, 0); 1020 g_free(elem); 1021 break; 1022 } 1023 1024 iov_cnt = elem->out_num; 1025 iov = elem->out_sg; 1026 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); 1027 if (unlikely(sz != sizeof(head))) { 1028 qemu_log_mask(LOG_GUEST_ERROR, 1029 "%s: read %zu bytes from command head" 1030 "but expected %zu\n", __func__, sz, sizeof(head)); 1031 tail.status = VIRTIO_IOMMU_S_DEVERR; 1032 goto out; 1033 } 1034 qemu_rec_mutex_lock(&s->mutex); 1035 switch (head.type) { 1036 case VIRTIO_IOMMU_T_ATTACH: 1037 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); 1038 break; 1039 case VIRTIO_IOMMU_T_DETACH: 1040 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); 1041 break; 1042 case VIRTIO_IOMMU_T_MAP: 1043 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); 1044 break; 1045 case VIRTIO_IOMMU_T_UNMAP: 1046 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); 1047 break; 1048 case VIRTIO_IOMMU_T_PROBE: 1049 { 1050 struct virtio_iommu_req_tail *ptail; 1051 1052 output_size = s->config.probe_size + sizeof(tail); 1053 buf = g_malloc0(output_size); 1054 1055 ptail = buf + s->config.probe_size; 1056 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf); 1057 break; 1058 } 1059 default: 1060 tail.status = VIRTIO_IOMMU_S_UNSUPP; 1061 } 1062 qemu_rec_mutex_unlock(&s->mutex); 1063 1064 out: 1065 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 1066 buf ? buf : &tail, output_size); 1067 if (unlikely(sz != output_size)) { 1068 qemu_log_mask(LOG_GUEST_ERROR, 1069 "%s: wrote %zu bytes to command response" 1070 "but response size is %zu\n", 1071 __func__, sz, output_size); 1072 tail.status = VIRTIO_IOMMU_S_DEVERR; 1073 /* 1074 * We checked that sizeof(tail) can fit to elem->in_sg at the 1075 * beginning of the loop 1076 */ 1077 output_size = sizeof(tail); 1078 g_free(buf); 1079 buf = NULL; 1080 sz = iov_from_buf(elem->in_sg, 1081 elem->in_num, 1082 0, 1083 &tail, 1084 output_size); 1085 } 1086 assert(sz == output_size); 1087 1088 virtqueue_push(vq, elem, sz); 1089 virtio_notify(vdev, vq); 1090 g_free(elem); 1091 g_free(buf); 1092 buf = NULL; 1093 } 1094 } 1095 1096 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, 1097 int flags, uint32_t endpoint, 1098 uint64_t address) 1099 { 1100 VirtIODevice *vdev = &viommu->parent_obj; 1101 VirtQueue *vq = viommu->event_vq; 1102 struct virtio_iommu_fault fault; 1103 VirtQueueElement *elem; 1104 size_t sz; 1105 1106 memset(&fault, 0, sizeof(fault)); 1107 fault.reason = reason; 1108 fault.flags = cpu_to_le32(flags); 1109 fault.endpoint = cpu_to_le32(endpoint); 1110 fault.address = cpu_to_le64(address); 1111 1112 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1113 1114 if (!elem) { 1115 error_report_once( 1116 "no buffer available in event queue to report event"); 1117 return; 1118 } 1119 1120 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { 1121 virtio_error(vdev, "error buffer of wrong size"); 1122 virtqueue_detach_element(vq, elem, 0); 1123 g_free(elem); 1124 return; 1125 } 1126 1127 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 1128 &fault, sizeof(fault)); 1129 assert(sz == sizeof(fault)); 1130 1131 trace_virtio_iommu_report_fault(reason, flags, endpoint, address); 1132 virtqueue_push(vq, elem, sz); 1133 virtio_notify(vdev, vq); 1134 g_free(elem); 1135 1136 } 1137 1138 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, 1139 IOMMUAccessFlags flag, 1140 int iommu_idx) 1141 { 1142 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1143 VirtIOIOMMUInterval interval, *mapping_key; 1144 VirtIOIOMMUMapping *mapping_value; 1145 VirtIOIOMMU *s = sdev->viommu; 1146 bool read_fault, write_fault; 1147 VirtIOIOMMUEndpoint *ep; 1148 uint32_t sid, flags; 1149 bool bypass_allowed; 1150 int granule; 1151 bool found; 1152 GList *l; 1153 1154 interval.low = addr; 1155 interval.high = addr + 1; 1156 granule = ctz64(s->config.page_size_mask); 1157 1158 IOMMUTLBEntry entry = { 1159 .target_as = &address_space_memory, 1160 .iova = addr, 1161 .translated_addr = addr, 1162 .addr_mask = BIT_ULL(granule) - 1, 1163 .perm = IOMMU_NONE, 1164 }; 1165 1166 bypass_allowed = s->config.bypass; 1167 1168 sid = virtio_iommu_get_bdf(sdev); 1169 1170 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); 1171 qemu_rec_mutex_lock(&s->mutex); 1172 1173 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 1174 1175 if (bypass_allowed) 1176 assert(ep && ep->domain && !ep->domain->bypass); 1177 1178 if (!ep) { 1179 if (!bypass_allowed) { 1180 error_report_once("%s sid=%d is not known!!", __func__, sid); 1181 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, 1182 VIRTIO_IOMMU_FAULT_F_ADDRESS, 1183 sid, addr); 1184 } else { 1185 entry.perm = flag; 1186 } 1187 goto unlock; 1188 } 1189 1190 for (l = sdev->resv_regions; l; l = l->next) { 1191 ReservedRegion *reg = l->data; 1192 1193 if (range_contains(®->range, addr)) { 1194 switch (reg->type) { 1195 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 1196 entry.perm = flag; 1197 break; 1198 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 1199 default: 1200 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 1201 VIRTIO_IOMMU_FAULT_F_ADDRESS, 1202 sid, addr); 1203 break; 1204 } 1205 goto unlock; 1206 } 1207 } 1208 1209 if (!ep->domain) { 1210 if (!bypass_allowed) { 1211 error_report_once("%s %02x:%02x.%01x not attached to any domain", 1212 __func__, PCI_BUS_NUM(sid), 1213 PCI_SLOT(sid), PCI_FUNC(sid)); 1214 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, 1215 VIRTIO_IOMMU_FAULT_F_ADDRESS, 1216 sid, addr); 1217 } else { 1218 entry.perm = flag; 1219 } 1220 goto unlock; 1221 } else if (ep->domain->bypass) { 1222 entry.perm = flag; 1223 goto unlock; 1224 } 1225 1226 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), 1227 (void **)&mapping_key, 1228 (void **)&mapping_value); 1229 if (!found) { 1230 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", 1231 __func__, addr, sid); 1232 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 1233 VIRTIO_IOMMU_FAULT_F_ADDRESS, 1234 sid, addr); 1235 goto unlock; 1236 } 1237 1238 read_fault = (flag & IOMMU_RO) && 1239 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); 1240 write_fault = (flag & IOMMU_WO) && 1241 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); 1242 1243 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; 1244 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; 1245 if (flags) { 1246 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", 1247 __func__, addr, flag, mapping_value->flags); 1248 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; 1249 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 1250 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, 1251 sid, addr); 1252 goto unlock; 1253 } 1254 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; 1255 entry.perm = flag; 1256 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); 1257 1258 unlock: 1259 qemu_rec_mutex_unlock(&s->mutex); 1260 return entry; 1261 } 1262 1263 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) 1264 { 1265 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1266 struct virtio_iommu_config *dev_config = &dev->config; 1267 struct virtio_iommu_config *out_config = (void *)config_data; 1268 1269 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask); 1270 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start); 1271 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end); 1272 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start); 1273 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end); 1274 out_config->probe_size = cpu_to_le32(dev_config->probe_size); 1275 out_config->bypass = dev_config->bypass; 1276 1277 trace_virtio_iommu_get_config(dev_config->page_size_mask, 1278 dev_config->input_range.start, 1279 dev_config->input_range.end, 1280 dev_config->domain_range.start, 1281 dev_config->domain_range.end, 1282 dev_config->probe_size, 1283 dev_config->bypass); 1284 } 1285 1286 static void virtio_iommu_set_config(VirtIODevice *vdev, 1287 const uint8_t *config_data) 1288 { 1289 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1290 struct virtio_iommu_config *dev_config = &dev->config; 1291 const struct virtio_iommu_config *in_config = (void *)config_data; 1292 1293 if (in_config->bypass != dev_config->bypass) { 1294 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 1295 virtio_error(vdev, "cannot set config.bypass"); 1296 return; 1297 } else if (in_config->bypass != 0 && in_config->bypass != 1) { 1298 virtio_error(vdev, "invalid config.bypass value '%u'", 1299 in_config->bypass); 1300 return; 1301 } 1302 dev_config->bypass = in_config->bypass; 1303 virtio_iommu_switch_address_space_all(dev); 1304 } 1305 1306 trace_virtio_iommu_set_config(in_config->bypass); 1307 } 1308 1309 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, 1310 Error **errp) 1311 { 1312 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1313 1314 f |= dev->features; 1315 trace_virtio_iommu_get_features(f); 1316 return f; 1317 } 1318 1319 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 1320 { 1321 guint ua = GPOINTER_TO_UINT(a); 1322 guint ub = GPOINTER_TO_UINT(b); 1323 return (ua > ub) - (ua < ub); 1324 } 1325 1326 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data) 1327 { 1328 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 1329 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 1330 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 1331 1332 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high, 1333 mapping->phys_addr); 1334 virtio_iommu_notify_map(mr, interval->low, interval->high, 1335 mapping->phys_addr, mapping->flags); 1336 return false; 1337 } 1338 1339 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) 1340 { 1341 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1342 VirtIOIOMMU *s = sdev->viommu; 1343 uint32_t sid; 1344 VirtIOIOMMUEndpoint *ep; 1345 1346 sid = virtio_iommu_get_bdf(sdev); 1347 1348 qemu_rec_mutex_lock(&s->mutex); 1349 1350 if (!s->endpoints) { 1351 goto unlock; 1352 } 1353 1354 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 1355 if (!ep || !ep->domain) { 1356 goto unlock; 1357 } 1358 1359 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); 1360 1361 unlock: 1362 qemu_rec_mutex_unlock(&s->mutex); 1363 } 1364 1365 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, 1366 IOMMUNotifierFlag old, 1367 IOMMUNotifierFlag new, 1368 Error **errp) 1369 { 1370 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { 1371 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); 1372 return -EINVAL; 1373 } 1374 1375 if (old == IOMMU_NOTIFIER_NONE) { 1376 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); 1377 } else if (new == IOMMU_NOTIFIER_NONE) { 1378 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name); 1379 } 1380 return 0; 1381 } 1382 1383 static void virtio_iommu_system_reset(void *opaque) 1384 { 1385 VirtIOIOMMU *s = opaque; 1386 1387 trace_virtio_iommu_system_reset(); 1388 1389 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); 1390 1391 /* 1392 * config.bypass is sticky across device reset, but should be restored on 1393 * system reset 1394 */ 1395 s->config.bypass = s->boot_bypass; 1396 virtio_iommu_switch_address_space_all(s); 1397 1398 } 1399 1400 static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) 1401 { 1402 VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); 1403 int granule; 1404 1405 s->granule_frozen = true; 1406 granule = ctz64(s->config.page_size_mask); 1407 trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); 1408 } 1409 1410 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) 1411 { 1412 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1413 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1414 1415 virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); 1416 1417 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, 1418 virtio_iommu_handle_command); 1419 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); 1420 1421 /* 1422 * config.bypass is needed to get initial address space early, such as 1423 * in vfio realize 1424 */ 1425 s->config.bypass = s->boot_bypass; 1426 if (s->aw_bits < 32 || s->aw_bits > 64) { 1427 error_setg(errp, "aw-bits must be within [32,64]"); 1428 return; 1429 } 1430 s->config.input_range.end = 1431 s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1; 1432 1433 switch (s->granule_mode) { 1434 case GRANULE_MODE_4K: 1435 s->config.page_size_mask = -(4 * KiB); 1436 break; 1437 case GRANULE_MODE_8K: 1438 s->config.page_size_mask = -(8 * KiB); 1439 break; 1440 case GRANULE_MODE_16K: 1441 s->config.page_size_mask = -(16 * KiB); 1442 break; 1443 case GRANULE_MODE_64K: 1444 s->config.page_size_mask = -(64 * KiB); 1445 break; 1446 case GRANULE_MODE_HOST: 1447 s->config.page_size_mask = qemu_real_host_page_mask(); 1448 break; 1449 default: 1450 error_setg(errp, "Unsupported granule mode"); 1451 } 1452 s->config.domain_range.end = UINT32_MAX; 1453 s->config.probe_size = VIOMMU_PROBE_SIZE; 1454 1455 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); 1456 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); 1457 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); 1458 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); 1459 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); 1460 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); 1461 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); 1462 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); 1463 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); 1464 1465 qemu_rec_mutex_init(&s->mutex); 1466 1467 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); 1468 1469 s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal, 1470 g_free, hiod_destroy); 1471 1472 if (s->primary_bus) { 1473 pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); 1474 } else { 1475 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); 1476 } 1477 1478 s->machine_done.notify = virtio_iommu_freeze_granule; 1479 qemu_add_machine_init_done_notifier(&s->machine_done); 1480 1481 qemu_register_reset(virtio_iommu_system_reset, s); 1482 } 1483 1484 static void virtio_iommu_device_unrealize(DeviceState *dev) 1485 { 1486 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1487 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1488 1489 qemu_unregister_reset(virtio_iommu_system_reset, s); 1490 qemu_remove_machine_init_done_notifier(&s->machine_done); 1491 1492 g_hash_table_destroy(s->as_by_busptr); 1493 if (s->domains) { 1494 g_tree_destroy(s->domains); 1495 } 1496 if (s->endpoints) { 1497 g_tree_destroy(s->endpoints); 1498 } 1499 1500 qemu_rec_mutex_destroy(&s->mutex); 1501 1502 virtio_delete_queue(s->req_vq); 1503 virtio_delete_queue(s->event_vq); 1504 virtio_cleanup(vdev); 1505 } 1506 1507 static void virtio_iommu_device_reset(VirtIODevice *vdev) 1508 { 1509 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1510 1511 trace_virtio_iommu_device_reset(); 1512 1513 if (s->domains) { 1514 g_tree_destroy(s->domains); 1515 } 1516 if (s->endpoints) { 1517 g_tree_destroy(s->endpoints); 1518 } 1519 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, 1520 NULL, NULL, virtio_iommu_put_domain); 1521 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, 1522 NULL, NULL, virtio_iommu_put_endpoint); 1523 } 1524 1525 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) 1526 { 1527 trace_virtio_iommu_device_status(status); 1528 } 1529 1530 static void virtio_iommu_instance_init(Object *obj) 1531 { 1532 } 1533 1534 #define VMSTATE_INTERVAL \ 1535 { \ 1536 .name = "interval", \ 1537 .version_id = 1, \ 1538 .minimum_version_id = 1, \ 1539 .fields = (const VMStateField[]) { \ 1540 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ 1541 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ 1542 VMSTATE_END_OF_LIST() \ 1543 } \ 1544 } 1545 1546 #define VMSTATE_MAPPING \ 1547 { \ 1548 .name = "mapping", \ 1549 .version_id = 1, \ 1550 .minimum_version_id = 1, \ 1551 .fields = (const VMStateField[]) { \ 1552 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ 1553 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ 1554 VMSTATE_END_OF_LIST() \ 1555 }, \ 1556 } 1557 1558 static const VMStateDescription vmstate_interval_mapping[2] = { 1559 VMSTATE_MAPPING, /* value */ 1560 VMSTATE_INTERVAL /* key */ 1561 }; 1562 1563 static int domain_preload(void *opaque) 1564 { 1565 VirtIOIOMMUDomain *domain = opaque; 1566 1567 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 1568 NULL, g_free, g_free); 1569 return 0; 1570 } 1571 1572 static const VMStateDescription vmstate_endpoint = { 1573 .name = "endpoint", 1574 .version_id = 1, 1575 .minimum_version_id = 1, 1576 .fields = (const VMStateField[]) { 1577 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), 1578 VMSTATE_END_OF_LIST() 1579 } 1580 }; 1581 1582 static const VMStateDescription vmstate_domain = { 1583 .name = "domain", 1584 .version_id = 2, 1585 .minimum_version_id = 2, 1586 .pre_load = domain_preload, 1587 .fields = (const VMStateField[]) { 1588 VMSTATE_UINT32(id, VirtIOIOMMUDomain), 1589 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, 1590 vmstate_interval_mapping, 1591 VirtIOIOMMUInterval, VirtIOIOMMUMapping), 1592 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, 1593 vmstate_endpoint, VirtIOIOMMUEndpoint, next), 1594 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2), 1595 VMSTATE_END_OF_LIST() 1596 } 1597 }; 1598 1599 static gboolean reconstruct_endpoints(gpointer key, gpointer value, 1600 gpointer data) 1601 { 1602 VirtIOIOMMU *s = (VirtIOIOMMU *)data; 1603 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; 1604 VirtIOIOMMUEndpoint *iter; 1605 IOMMUMemoryRegion *mr; 1606 1607 QLIST_FOREACH(iter, &d->endpoint_list, next) { 1608 mr = virtio_iommu_mr(s, iter->id); 1609 assert(mr); 1610 1611 iter->domain = d; 1612 iter->iommu_mr = mr; 1613 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); 1614 } 1615 return false; /* continue the domain traversal */ 1616 } 1617 1618 static int iommu_post_load(void *opaque, int version_id) 1619 { 1620 VirtIOIOMMU *s = opaque; 1621 1622 g_tree_foreach(s->domains, reconstruct_endpoints, s); 1623 1624 /* 1625 * Memory regions are dynamically turned on/off depending on 1626 * 'config.bypass' and attached domain type if there is. After 1627 * migration, we need to make sure the memory regions are 1628 * still correct. 1629 */ 1630 virtio_iommu_switch_address_space_all(s); 1631 return 0; 1632 } 1633 1634 static const VMStateDescription vmstate_virtio_iommu_device = { 1635 .name = "virtio-iommu-device", 1636 .minimum_version_id = 2, 1637 .version_id = 2, 1638 .post_load = iommu_post_load, 1639 .fields = (const VMStateField[]) { 1640 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2, 1641 &vmstate_domain, VirtIOIOMMUDomain), 1642 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2), 1643 VMSTATE_END_OF_LIST() 1644 }, 1645 }; 1646 1647 static const VMStateDescription vmstate_virtio_iommu = { 1648 .name = "virtio-iommu", 1649 .minimum_version_id = 2, 1650 .priority = MIG_PRI_IOMMU, 1651 .version_id = 2, 1652 .fields = (const VMStateField[]) { 1653 VMSTATE_VIRTIO_DEVICE, 1654 VMSTATE_END_OF_LIST() 1655 }, 1656 }; 1657 1658 static Property virtio_iommu_properties[] = { 1659 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, 1660 TYPE_PCI_BUS, PCIBus *), 1661 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true), 1662 DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode, 1663 GRANULE_MODE_HOST), 1664 DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64), 1665 DEFINE_PROP_END_OF_LIST(), 1666 }; 1667 1668 static void virtio_iommu_class_init(ObjectClass *klass, void *data) 1669 { 1670 DeviceClass *dc = DEVICE_CLASS(klass); 1671 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1672 1673 device_class_set_props(dc, virtio_iommu_properties); 1674 dc->vmsd = &vmstate_virtio_iommu; 1675 1676 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1677 vdc->realize = virtio_iommu_device_realize; 1678 vdc->unrealize = virtio_iommu_device_unrealize; 1679 vdc->reset = virtio_iommu_device_reset; 1680 vdc->get_config = virtio_iommu_get_config; 1681 vdc->set_config = virtio_iommu_set_config; 1682 vdc->get_features = virtio_iommu_get_features; 1683 vdc->set_status = virtio_iommu_set_status; 1684 vdc->vmsd = &vmstate_virtio_iommu_device; 1685 } 1686 1687 static void virtio_iommu_memory_region_class_init(ObjectClass *klass, 1688 void *data) 1689 { 1690 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1691 1692 imrc->translate = virtio_iommu_translate; 1693 imrc->replay = virtio_iommu_replay; 1694 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; 1695 } 1696 1697 static const TypeInfo virtio_iommu_info = { 1698 .name = TYPE_VIRTIO_IOMMU, 1699 .parent = TYPE_VIRTIO_DEVICE, 1700 .instance_size = sizeof(VirtIOIOMMU), 1701 .instance_init = virtio_iommu_instance_init, 1702 .class_init = virtio_iommu_class_init, 1703 }; 1704 1705 static const TypeInfo virtio_iommu_memory_region_info = { 1706 .parent = TYPE_IOMMU_MEMORY_REGION, 1707 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, 1708 .class_init = virtio_iommu_memory_region_class_init, 1709 }; 1710 1711 static void virtio_register_types(void) 1712 { 1713 type_register_static(&virtio_iommu_info); 1714 type_register_static(&virtio_iommu_memory_region_info); 1715 } 1716 1717 type_init(virtio_register_types) 1718