1 /* 2 * virtio-iommu device 3 * 4 * Copyright (c) 2020 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "qemu/iov.h" 23 #include "exec/target_page.h" 24 #include "hw/qdev-properties.h" 25 #include "hw/virtio/virtio.h" 26 #include "sysemu/kvm.h" 27 #include "sysemu/reset.h" 28 #include "sysemu/sysemu.h" 29 #include "qemu/reserved-region.h" 30 #include "qapi/error.h" 31 #include "qemu/error-report.h" 32 #include "trace.h" 33 34 #include "standard-headers/linux/virtio_ids.h" 35 36 #include "hw/virtio/virtio-bus.h" 37 #include "hw/virtio/virtio-iommu.h" 38 #include "hw/pci/pci_bus.h" 39 #include "hw/pci/pci.h" 40 41 /* Max size */ 42 #define VIOMMU_DEFAULT_QUEUE_SIZE 256 43 #define VIOMMU_PROBE_SIZE 512 44 45 typedef struct VirtIOIOMMUDomain { 46 uint32_t id; 47 bool bypass; 48 GTree *mappings; 49 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; 50 } VirtIOIOMMUDomain; 51 52 typedef struct VirtIOIOMMUEndpoint { 53 uint32_t id; 54 VirtIOIOMMUDomain *domain; 55 IOMMUMemoryRegion *iommu_mr; 56 QLIST_ENTRY(VirtIOIOMMUEndpoint) next; 57 } VirtIOIOMMUEndpoint; 58 59 typedef struct VirtIOIOMMUInterval { 60 uint64_t low; 61 uint64_t high; 62 } VirtIOIOMMUInterval; 63 64 typedef struct VirtIOIOMMUMapping { 65 uint64_t phys_addr; 66 uint32_t flags; 67 } VirtIOIOMMUMapping; 68 69 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) 70 { 71 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); 72 } 73 74 static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) 75 { 76 uint32_t sid; 77 bool bypassed; 78 VirtIOIOMMU *s = sdev->viommu; 79 VirtIOIOMMUEndpoint *ep; 80 81 sid = virtio_iommu_get_bdf(sdev); 82 83 qemu_rec_mutex_lock(&s->mutex); 84 /* need to check bypass before system reset */ 85 if (!s->endpoints) { 86 bypassed = s->config.bypass; 87 goto unlock; 88 } 89 90 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 91 if (!ep || !ep->domain) { 92 bypassed = s->config.bypass; 93 } else { 94 bypassed = ep->domain->bypass; 95 } 96 97 unlock: 98 qemu_rec_mutex_unlock(&s->mutex); 99 return bypassed; 100 } 101 102 /* Return whether the device is using IOMMU translation. */ 103 static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) 104 { 105 bool use_remapping; 106 107 assert(sdev); 108 109 use_remapping = !virtio_iommu_device_bypassed(sdev); 110 111 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), 112 PCI_SLOT(sdev->devfn), 113 PCI_FUNC(sdev->devfn), 114 use_remapping); 115 116 /* Turn off first then on the other */ 117 if (use_remapping) { 118 memory_region_set_enabled(&sdev->bypass_mr, false); 119 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); 120 } else { 121 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); 122 memory_region_set_enabled(&sdev->bypass_mr, true); 123 } 124 125 return use_remapping; 126 } 127 128 static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) 129 { 130 GHashTableIter iter; 131 IOMMUPciBus *iommu_pci_bus; 132 int i; 133 134 g_hash_table_iter_init(&iter, s->as_by_busptr); 135 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 136 for (i = 0; i < PCI_DEVFN_MAX; i++) { 137 if (!iommu_pci_bus->pbdev[i]) { 138 continue; 139 } 140 virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); 141 } 142 } 143 } 144 145 /** 146 * The bus number is used for lookup when SID based operations occur. 147 * In that case we lazily populate the IOMMUPciBus array from the bus hash 148 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus 149 * numbers may not be always initialized yet. 150 */ 151 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) 152 { 153 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; 154 155 if (!iommu_pci_bus) { 156 GHashTableIter iter; 157 158 g_hash_table_iter_init(&iter, s->as_by_busptr); 159 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 160 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { 161 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; 162 return iommu_pci_bus; 163 } 164 } 165 return NULL; 166 } 167 return iommu_pci_bus; 168 } 169 170 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) 171 { 172 uint8_t bus_n, devfn; 173 IOMMUPciBus *iommu_pci_bus; 174 IOMMUDevice *dev; 175 176 bus_n = PCI_BUS_NUM(sid); 177 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); 178 if (iommu_pci_bus) { 179 devfn = sid & (PCI_DEVFN_MAX - 1); 180 dev = iommu_pci_bus->pbdev[devfn]; 181 if (dev) { 182 return &dev->iommu_mr; 183 } 184 } 185 return NULL; 186 } 187 188 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 189 { 190 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; 191 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; 192 193 if (inta->high < intb->low) { 194 return -1; 195 } else if (intb->high < inta->low) { 196 return 1; 197 } else { 198 return 0; 199 } 200 } 201 202 static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr, 203 IOMMUTLBEvent *event, 204 hwaddr virt_start, hwaddr virt_end) 205 { 206 uint64_t delta = virt_end - virt_start; 207 208 event->entry.iova = virt_start; 209 event->entry.addr_mask = delta; 210 211 if (delta == UINT64_MAX) { 212 memory_region_notify_iommu(mr, 0, *event); 213 } 214 215 while (virt_start != virt_end + 1) { 216 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); 217 218 event->entry.addr_mask = mask; 219 event->entry.iova = virt_start; 220 memory_region_notify_iommu(mr, 0, *event); 221 virt_start += mask + 1; 222 if (event->entry.perm != IOMMU_NONE) { 223 event->entry.translated_addr += mask + 1; 224 } 225 } 226 } 227 228 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, 229 hwaddr virt_end, hwaddr paddr, 230 uint32_t flags) 231 { 232 IOMMUTLBEvent event; 233 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, 234 flags & VIRTIO_IOMMU_MAP_F_WRITE); 235 236 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) || 237 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) { 238 return; 239 } 240 241 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, 242 paddr, perm); 243 244 event.type = IOMMU_NOTIFIER_MAP; 245 event.entry.target_as = &address_space_memory; 246 event.entry.perm = perm; 247 event.entry.translated_addr = paddr; 248 249 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 250 } 251 252 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, 253 hwaddr virt_end) 254 { 255 IOMMUTLBEvent event; 256 257 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { 258 return; 259 } 260 261 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); 262 263 event.type = IOMMU_NOTIFIER_UNMAP; 264 event.entry.target_as = &address_space_memory; 265 event.entry.perm = IOMMU_NONE; 266 event.entry.translated_addr = 0; 267 268 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 269 } 270 271 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, 272 gpointer data) 273 { 274 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 275 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 276 277 virtio_iommu_notify_unmap(mr, interval->low, interval->high); 278 279 return false; 280 } 281 282 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, 283 gpointer data) 284 { 285 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 286 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 287 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 288 289 virtio_iommu_notify_map(mr, interval->low, interval->high, 290 mapping->phys_addr, mapping->flags); 291 292 return false; 293 } 294 295 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) 296 { 297 VirtIOIOMMUDomain *domain = ep->domain; 298 IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 299 300 if (!ep->domain) { 301 return; 302 } 303 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, 304 ep->iommu_mr); 305 QLIST_REMOVE(ep, next); 306 ep->domain = NULL; 307 virtio_iommu_switch_address_space(sdev); 308 } 309 310 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, 311 uint32_t ep_id) 312 { 313 VirtIOIOMMUEndpoint *ep; 314 IOMMUMemoryRegion *mr; 315 316 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 317 if (ep) { 318 return ep; 319 } 320 mr = virtio_iommu_mr(s, ep_id); 321 if (!mr) { 322 return NULL; 323 } 324 ep = g_malloc0(sizeof(*ep)); 325 ep->id = ep_id; 326 ep->iommu_mr = mr; 327 trace_virtio_iommu_get_endpoint(ep_id); 328 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); 329 return ep; 330 } 331 332 static void virtio_iommu_put_endpoint(gpointer data) 333 { 334 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; 335 336 if (ep->domain) { 337 virtio_iommu_detach_endpoint_from_domain(ep); 338 } 339 340 trace_virtio_iommu_put_endpoint(ep->id); 341 g_free(ep); 342 } 343 344 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, 345 uint32_t domain_id, 346 bool bypass) 347 { 348 VirtIOIOMMUDomain *domain; 349 350 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 351 if (domain) { 352 if (domain->bypass != bypass) { 353 return NULL; 354 } 355 return domain; 356 } 357 domain = g_malloc0(sizeof(*domain)); 358 domain->id = domain_id; 359 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 360 NULL, (GDestroyNotify)g_free, 361 (GDestroyNotify)g_free); 362 domain->bypass = bypass; 363 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); 364 QLIST_INIT(&domain->endpoint_list); 365 trace_virtio_iommu_get_domain(domain_id); 366 return domain; 367 } 368 369 static void virtio_iommu_put_domain(gpointer data) 370 { 371 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; 372 VirtIOIOMMUEndpoint *iter, *tmp; 373 374 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { 375 virtio_iommu_detach_endpoint_from_domain(iter); 376 } 377 g_tree_destroy(domain->mappings); 378 trace_virtio_iommu_put_domain(domain->id); 379 g_free(domain); 380 } 381 382 static void add_prop_resv_regions(IOMMUDevice *sdev) 383 { 384 VirtIOIOMMU *s = sdev->viommu; 385 int i; 386 387 for (i = 0; i < s->nr_prop_resv_regions; i++) { 388 ReservedRegion *reg = g_new0(ReservedRegion, 1); 389 390 *reg = s->prop_resv_regions[i]; 391 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); 392 } 393 } 394 395 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, 396 int devfn) 397 { 398 VirtIOIOMMU *s = opaque; 399 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 400 static uint32_t mr_index; 401 IOMMUDevice *sdev; 402 403 if (!sbus) { 404 sbus = g_malloc0(sizeof(IOMMUPciBus) + 405 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); 406 sbus->bus = bus; 407 g_hash_table_insert(s->as_by_busptr, bus, sbus); 408 } 409 410 sdev = sbus->pbdev[devfn]; 411 if (!sdev) { 412 char *name = g_strdup_printf("%s-%d-%d", 413 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 414 mr_index++, devfn); 415 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1); 416 417 sdev->viommu = s; 418 sdev->bus = bus; 419 sdev->devfn = devfn; 420 421 trace_virtio_iommu_init_iommu_mr(name); 422 423 memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); 424 address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); 425 add_prop_resv_regions(sdev); 426 427 /* 428 * Build the IOMMU disabled container with aliases to the 429 * shared MRs. Note that aliasing to a shared memory region 430 * could help the memory API to detect same FlatViews so we 431 * can have devices to share the same FlatView when in bypass 432 * mode. (either by not configuring virtio-iommu driver or with 433 * "iommu=pt"). It will greatly reduce the total number of 434 * FlatViews of the system hence VM runs faster. 435 */ 436 memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), 437 "system", get_system_memory(), 0, 438 memory_region_size(get_system_memory())); 439 440 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), 441 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 442 OBJECT(s), name, 443 UINT64_MAX); 444 445 /* 446 * Hook both the containers under the root container, we 447 * switch between iommu & bypass MRs by enable/disable 448 * corresponding sub-containers 449 */ 450 memory_region_add_subregion_overlap(&sdev->root, 0, 451 MEMORY_REGION(&sdev->iommu_mr), 452 0); 453 memory_region_add_subregion_overlap(&sdev->root, 0, 454 &sdev->bypass_mr, 0); 455 456 virtio_iommu_switch_address_space(sdev); 457 g_free(name); 458 } 459 return &sdev->as; 460 } 461 462 static int virtio_iommu_attach(VirtIOIOMMU *s, 463 struct virtio_iommu_req_attach *req) 464 { 465 uint32_t domain_id = le32_to_cpu(req->domain); 466 uint32_t ep_id = le32_to_cpu(req->endpoint); 467 uint32_t flags = le32_to_cpu(req->flags); 468 VirtIOIOMMUDomain *domain; 469 VirtIOIOMMUEndpoint *ep; 470 IOMMUDevice *sdev; 471 472 trace_virtio_iommu_attach(domain_id, ep_id); 473 474 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) { 475 return VIRTIO_IOMMU_S_INVAL; 476 } 477 478 ep = virtio_iommu_get_endpoint(s, ep_id); 479 if (!ep) { 480 return VIRTIO_IOMMU_S_NOENT; 481 } 482 483 if (ep->domain) { 484 VirtIOIOMMUDomain *previous_domain = ep->domain; 485 /* 486 * the device is already attached to a domain, 487 * detach it first 488 */ 489 virtio_iommu_detach_endpoint_from_domain(ep); 490 if (QLIST_EMPTY(&previous_domain->endpoint_list)) { 491 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); 492 } 493 } 494 495 domain = virtio_iommu_get_domain(s, domain_id, 496 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS); 497 if (!domain) { 498 /* Incompatible bypass flag */ 499 return VIRTIO_IOMMU_S_INVAL; 500 } 501 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); 502 503 ep->domain = domain; 504 sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 505 virtio_iommu_switch_address_space(sdev); 506 507 /* Replay domain mappings on the associated memory region */ 508 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, 509 ep->iommu_mr); 510 511 return VIRTIO_IOMMU_S_OK; 512 } 513 514 static int virtio_iommu_detach(VirtIOIOMMU *s, 515 struct virtio_iommu_req_detach *req) 516 { 517 uint32_t domain_id = le32_to_cpu(req->domain); 518 uint32_t ep_id = le32_to_cpu(req->endpoint); 519 VirtIOIOMMUDomain *domain; 520 VirtIOIOMMUEndpoint *ep; 521 522 trace_virtio_iommu_detach(domain_id, ep_id); 523 524 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 525 if (!ep) { 526 return VIRTIO_IOMMU_S_NOENT; 527 } 528 529 domain = ep->domain; 530 531 if (!domain || domain->id != domain_id) { 532 return VIRTIO_IOMMU_S_INVAL; 533 } 534 535 virtio_iommu_detach_endpoint_from_domain(ep); 536 537 if (QLIST_EMPTY(&domain->endpoint_list)) { 538 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); 539 } 540 return VIRTIO_IOMMU_S_OK; 541 } 542 543 static int virtio_iommu_map(VirtIOIOMMU *s, 544 struct virtio_iommu_req_map *req) 545 { 546 uint32_t domain_id = le32_to_cpu(req->domain); 547 uint64_t phys_start = le64_to_cpu(req->phys_start); 548 uint64_t virt_start = le64_to_cpu(req->virt_start); 549 uint64_t virt_end = le64_to_cpu(req->virt_end); 550 uint32_t flags = le32_to_cpu(req->flags); 551 VirtIOIOMMUDomain *domain; 552 VirtIOIOMMUInterval *interval; 553 VirtIOIOMMUMapping *mapping; 554 VirtIOIOMMUEndpoint *ep; 555 556 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { 557 return VIRTIO_IOMMU_S_INVAL; 558 } 559 560 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 561 if (!domain) { 562 return VIRTIO_IOMMU_S_NOENT; 563 } 564 565 if (domain->bypass) { 566 return VIRTIO_IOMMU_S_INVAL; 567 } 568 569 interval = g_malloc0(sizeof(*interval)); 570 571 interval->low = virt_start; 572 interval->high = virt_end; 573 574 mapping = g_tree_lookup(domain->mappings, (gpointer)interval); 575 if (mapping) { 576 g_free(interval); 577 return VIRTIO_IOMMU_S_INVAL; 578 } 579 580 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); 581 582 mapping = g_malloc0(sizeof(*mapping)); 583 mapping->phys_addr = phys_start; 584 mapping->flags = flags; 585 586 g_tree_insert(domain->mappings, interval, mapping); 587 588 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 589 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start, 590 flags); 591 } 592 593 return VIRTIO_IOMMU_S_OK; 594 } 595 596 static int virtio_iommu_unmap(VirtIOIOMMU *s, 597 struct virtio_iommu_req_unmap *req) 598 { 599 uint32_t domain_id = le32_to_cpu(req->domain); 600 uint64_t virt_start = le64_to_cpu(req->virt_start); 601 uint64_t virt_end = le64_to_cpu(req->virt_end); 602 VirtIOIOMMUMapping *iter_val; 603 VirtIOIOMMUInterval interval, *iter_key; 604 VirtIOIOMMUDomain *domain; 605 VirtIOIOMMUEndpoint *ep; 606 int ret = VIRTIO_IOMMU_S_OK; 607 608 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); 609 610 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 611 if (!domain) { 612 return VIRTIO_IOMMU_S_NOENT; 613 } 614 615 if (domain->bypass) { 616 return VIRTIO_IOMMU_S_INVAL; 617 } 618 619 interval.low = virt_start; 620 interval.high = virt_end; 621 622 while (g_tree_lookup_extended(domain->mappings, &interval, 623 (void **)&iter_key, (void**)&iter_val)) { 624 uint64_t current_low = iter_key->low; 625 uint64_t current_high = iter_key->high; 626 627 if (interval.low <= current_low && interval.high >= current_high) { 628 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 629 virtio_iommu_notify_unmap(ep->iommu_mr, current_low, 630 current_high); 631 } 632 g_tree_remove(domain->mappings, iter_key); 633 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); 634 } else { 635 ret = VIRTIO_IOMMU_S_RANGE; 636 break; 637 } 638 } 639 return ret; 640 } 641 642 static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep, 643 uint8_t *buf, size_t free) 644 { 645 struct virtio_iommu_probe_resv_mem prop = {}; 646 size_t size = sizeof(prop), length = size - sizeof(prop.head), total; 647 IOMMUDevice *sdev; 648 GList *l; 649 650 sdev = container_of(virtio_iommu_mr(s, ep), IOMMUDevice, iommu_mr); 651 if (!sdev) { 652 return -EINVAL; 653 } 654 655 total = size * g_list_length(sdev->resv_regions); 656 if (total > free) { 657 return -ENOSPC; 658 } 659 660 for (l = sdev->resv_regions; l; l = l->next) { 661 ReservedRegion *reg = l->data; 662 unsigned subtype = reg->type; 663 Range *range = ®->range; 664 665 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || 666 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); 667 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); 668 prop.head.length = cpu_to_le16(length); 669 prop.subtype = subtype; 670 prop.start = cpu_to_le64(range_lob(range)); 671 prop.end = cpu_to_le64(range_upb(range)); 672 673 memcpy(buf, &prop, size); 674 675 trace_virtio_iommu_fill_resv_property(ep, prop.subtype, 676 prop.start, prop.end); 677 buf += size; 678 } 679 return total; 680 } 681 682 /** 683 * virtio_iommu_probe - Fill the probe request buffer with 684 * the properties the device is able to return 685 */ 686 static int virtio_iommu_probe(VirtIOIOMMU *s, 687 struct virtio_iommu_req_probe *req, 688 uint8_t *buf) 689 { 690 uint32_t ep_id = le32_to_cpu(req->endpoint); 691 size_t free = VIOMMU_PROBE_SIZE; 692 ssize_t count; 693 694 if (!virtio_iommu_mr(s, ep_id)) { 695 return VIRTIO_IOMMU_S_NOENT; 696 } 697 698 count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free); 699 if (count < 0) { 700 return VIRTIO_IOMMU_S_INVAL; 701 } 702 buf += count; 703 free -= count; 704 705 return VIRTIO_IOMMU_S_OK; 706 } 707 708 static int virtio_iommu_iov_to_req(struct iovec *iov, 709 unsigned int iov_cnt, 710 void *req, size_t payload_sz) 711 { 712 size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); 713 714 if (unlikely(sz != payload_sz)) { 715 return VIRTIO_IOMMU_S_INVAL; 716 } 717 return 0; 718 } 719 720 #define virtio_iommu_handle_req(__req) \ 721 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ 722 struct iovec *iov, \ 723 unsigned int iov_cnt) \ 724 { \ 725 struct virtio_iommu_req_ ## __req req; \ 726 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ 727 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ 728 \ 729 return ret ? ret : virtio_iommu_ ## __req(s, &req); \ 730 } 731 732 virtio_iommu_handle_req(attach) 733 virtio_iommu_handle_req(detach) 734 virtio_iommu_handle_req(map) 735 virtio_iommu_handle_req(unmap) 736 737 static int virtio_iommu_handle_probe(VirtIOIOMMU *s, 738 struct iovec *iov, 739 unsigned int iov_cnt, 740 uint8_t *buf) 741 { 742 struct virtio_iommu_req_probe req; 743 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); 744 745 return ret ? ret : virtio_iommu_probe(s, &req, buf); 746 } 747 748 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) 749 { 750 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 751 struct virtio_iommu_req_head head; 752 struct virtio_iommu_req_tail tail = {}; 753 VirtQueueElement *elem; 754 unsigned int iov_cnt; 755 struct iovec *iov; 756 void *buf = NULL; 757 size_t sz; 758 759 for (;;) { 760 size_t output_size = sizeof(tail); 761 762 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 763 if (!elem) { 764 return; 765 } 766 767 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || 768 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { 769 virtio_error(vdev, "virtio-iommu bad head/tail size"); 770 virtqueue_detach_element(vq, elem, 0); 771 g_free(elem); 772 break; 773 } 774 775 iov_cnt = elem->out_num; 776 iov = elem->out_sg; 777 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); 778 if (unlikely(sz != sizeof(head))) { 779 tail.status = VIRTIO_IOMMU_S_DEVERR; 780 goto out; 781 } 782 qemu_rec_mutex_lock(&s->mutex); 783 switch (head.type) { 784 case VIRTIO_IOMMU_T_ATTACH: 785 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); 786 break; 787 case VIRTIO_IOMMU_T_DETACH: 788 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); 789 break; 790 case VIRTIO_IOMMU_T_MAP: 791 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); 792 break; 793 case VIRTIO_IOMMU_T_UNMAP: 794 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); 795 break; 796 case VIRTIO_IOMMU_T_PROBE: 797 { 798 struct virtio_iommu_req_tail *ptail; 799 800 output_size = s->config.probe_size + sizeof(tail); 801 buf = g_malloc0(output_size); 802 803 ptail = buf + s->config.probe_size; 804 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf); 805 break; 806 } 807 default: 808 tail.status = VIRTIO_IOMMU_S_UNSUPP; 809 } 810 qemu_rec_mutex_unlock(&s->mutex); 811 812 out: 813 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 814 buf ? buf : &tail, output_size); 815 assert(sz == output_size); 816 817 virtqueue_push(vq, elem, sz); 818 virtio_notify(vdev, vq); 819 g_free(elem); 820 g_free(buf); 821 buf = NULL; 822 } 823 } 824 825 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, 826 int flags, uint32_t endpoint, 827 uint64_t address) 828 { 829 VirtIODevice *vdev = &viommu->parent_obj; 830 VirtQueue *vq = viommu->event_vq; 831 struct virtio_iommu_fault fault; 832 VirtQueueElement *elem; 833 size_t sz; 834 835 memset(&fault, 0, sizeof(fault)); 836 fault.reason = reason; 837 fault.flags = cpu_to_le32(flags); 838 fault.endpoint = cpu_to_le32(endpoint); 839 fault.address = cpu_to_le64(address); 840 841 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 842 843 if (!elem) { 844 error_report_once( 845 "no buffer available in event queue to report event"); 846 return; 847 } 848 849 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { 850 virtio_error(vdev, "error buffer of wrong size"); 851 virtqueue_detach_element(vq, elem, 0); 852 g_free(elem); 853 return; 854 } 855 856 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 857 &fault, sizeof(fault)); 858 assert(sz == sizeof(fault)); 859 860 trace_virtio_iommu_report_fault(reason, flags, endpoint, address); 861 virtqueue_push(vq, elem, sz); 862 virtio_notify(vdev, vq); 863 g_free(elem); 864 865 } 866 867 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, 868 IOMMUAccessFlags flag, 869 int iommu_idx) 870 { 871 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 872 VirtIOIOMMUInterval interval, *mapping_key; 873 VirtIOIOMMUMapping *mapping_value; 874 VirtIOIOMMU *s = sdev->viommu; 875 bool read_fault, write_fault; 876 VirtIOIOMMUEndpoint *ep; 877 uint32_t sid, flags; 878 bool bypass_allowed; 879 int granule; 880 bool found; 881 GList *l; 882 883 interval.low = addr; 884 interval.high = addr + 1; 885 granule = ctz64(s->config.page_size_mask); 886 887 IOMMUTLBEntry entry = { 888 .target_as = &address_space_memory, 889 .iova = addr, 890 .translated_addr = addr, 891 .addr_mask = BIT_ULL(granule) - 1, 892 .perm = IOMMU_NONE, 893 }; 894 895 bypass_allowed = s->config.bypass; 896 897 sid = virtio_iommu_get_bdf(sdev); 898 899 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); 900 qemu_rec_mutex_lock(&s->mutex); 901 902 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 903 904 if (bypass_allowed) 905 assert(ep && ep->domain && !ep->domain->bypass); 906 907 if (!ep) { 908 if (!bypass_allowed) { 909 error_report_once("%s sid=%d is not known!!", __func__, sid); 910 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, 911 VIRTIO_IOMMU_FAULT_F_ADDRESS, 912 sid, addr); 913 } else { 914 entry.perm = flag; 915 } 916 goto unlock; 917 } 918 919 for (l = sdev->resv_regions; l; l = l->next) { 920 ReservedRegion *reg = l->data; 921 922 if (range_contains(®->range, addr)) { 923 switch (reg->type) { 924 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 925 entry.perm = flag; 926 break; 927 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 928 default: 929 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 930 VIRTIO_IOMMU_FAULT_F_ADDRESS, 931 sid, addr); 932 break; 933 } 934 goto unlock; 935 } 936 } 937 938 if (!ep->domain) { 939 if (!bypass_allowed) { 940 error_report_once("%s %02x:%02x.%01x not attached to any domain", 941 __func__, PCI_BUS_NUM(sid), 942 PCI_SLOT(sid), PCI_FUNC(sid)); 943 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, 944 VIRTIO_IOMMU_FAULT_F_ADDRESS, 945 sid, addr); 946 } else { 947 entry.perm = flag; 948 } 949 goto unlock; 950 } else if (ep->domain->bypass) { 951 entry.perm = flag; 952 goto unlock; 953 } 954 955 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), 956 (void **)&mapping_key, 957 (void **)&mapping_value); 958 if (!found) { 959 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", 960 __func__, addr, sid); 961 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 962 VIRTIO_IOMMU_FAULT_F_ADDRESS, 963 sid, addr); 964 goto unlock; 965 } 966 967 read_fault = (flag & IOMMU_RO) && 968 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); 969 write_fault = (flag & IOMMU_WO) && 970 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); 971 972 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; 973 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; 974 if (flags) { 975 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", 976 __func__, addr, flag, mapping_value->flags); 977 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; 978 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 979 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, 980 sid, addr); 981 goto unlock; 982 } 983 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; 984 entry.perm = flag; 985 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); 986 987 unlock: 988 qemu_rec_mutex_unlock(&s->mutex); 989 return entry; 990 } 991 992 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) 993 { 994 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 995 struct virtio_iommu_config *dev_config = &dev->config; 996 struct virtio_iommu_config *out_config = (void *)config_data; 997 998 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask); 999 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start); 1000 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end); 1001 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start); 1002 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end); 1003 out_config->probe_size = cpu_to_le32(dev_config->probe_size); 1004 out_config->bypass = dev_config->bypass; 1005 1006 trace_virtio_iommu_get_config(dev_config->page_size_mask, 1007 dev_config->input_range.start, 1008 dev_config->input_range.end, 1009 dev_config->domain_range.start, 1010 dev_config->domain_range.end, 1011 dev_config->probe_size, 1012 dev_config->bypass); 1013 } 1014 1015 static void virtio_iommu_set_config(VirtIODevice *vdev, 1016 const uint8_t *config_data) 1017 { 1018 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1019 struct virtio_iommu_config *dev_config = &dev->config; 1020 const struct virtio_iommu_config *in_config = (void *)config_data; 1021 1022 if (in_config->bypass != dev_config->bypass) { 1023 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 1024 virtio_error(vdev, "cannot set config.bypass"); 1025 return; 1026 } else if (in_config->bypass != 0 && in_config->bypass != 1) { 1027 virtio_error(vdev, "invalid config.bypass value '%u'", 1028 in_config->bypass); 1029 return; 1030 } 1031 dev_config->bypass = in_config->bypass; 1032 virtio_iommu_switch_address_space_all(dev); 1033 } 1034 1035 trace_virtio_iommu_set_config(in_config->bypass); 1036 } 1037 1038 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, 1039 Error **errp) 1040 { 1041 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1042 1043 f |= dev->features; 1044 trace_virtio_iommu_get_features(f); 1045 return f; 1046 } 1047 1048 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 1049 { 1050 guint ua = GPOINTER_TO_UINT(a); 1051 guint ub = GPOINTER_TO_UINT(b); 1052 return (ua > ub) - (ua < ub); 1053 } 1054 1055 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data) 1056 { 1057 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 1058 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 1059 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 1060 1061 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high, 1062 mapping->phys_addr); 1063 virtio_iommu_notify_map(mr, interval->low, interval->high, 1064 mapping->phys_addr, mapping->flags); 1065 return false; 1066 } 1067 1068 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) 1069 { 1070 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1071 VirtIOIOMMU *s = sdev->viommu; 1072 uint32_t sid; 1073 VirtIOIOMMUEndpoint *ep; 1074 1075 sid = virtio_iommu_get_bdf(sdev); 1076 1077 qemu_rec_mutex_lock(&s->mutex); 1078 1079 if (!s->endpoints) { 1080 goto unlock; 1081 } 1082 1083 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 1084 if (!ep || !ep->domain) { 1085 goto unlock; 1086 } 1087 1088 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); 1089 1090 unlock: 1091 qemu_rec_mutex_unlock(&s->mutex); 1092 } 1093 1094 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, 1095 IOMMUNotifierFlag old, 1096 IOMMUNotifierFlag new, 1097 Error **errp) 1098 { 1099 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { 1100 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); 1101 return -EINVAL; 1102 } 1103 1104 if (old == IOMMU_NOTIFIER_NONE) { 1105 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); 1106 } else if (new == IOMMU_NOTIFIER_NONE) { 1107 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name); 1108 } 1109 return 0; 1110 } 1111 1112 /* 1113 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule, 1114 * for example 0xfffffffffffff000. When an assigned device has page size 1115 * restrictions due to the hardware IOMMU configuration, apply this restriction 1116 * to the mask. 1117 */ 1118 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, 1119 uint64_t new_mask, 1120 Error **errp) 1121 { 1122 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1123 VirtIOIOMMU *s = sdev->viommu; 1124 uint64_t cur_mask = s->config.page_size_mask; 1125 1126 trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, 1127 new_mask); 1128 1129 if ((cur_mask & new_mask) == 0) { 1130 error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 1131 " incompatible with currently supported mask 0x%"PRIx64, 1132 mr->parent_obj.name, new_mask, cur_mask); 1133 return -1; 1134 } 1135 1136 /* 1137 * Once the granule is frozen we can't change the mask anymore. If by 1138 * chance the hotplugged device supports the same granule, we can still 1139 * accept it. 1140 */ 1141 if (s->granule_frozen) { 1142 int cur_granule = ctz64(cur_mask); 1143 1144 if (!(BIT_ULL(cur_granule) & new_mask)) { 1145 error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", 1146 mr->parent_obj.name, BIT_ULL(cur_granule)); 1147 return -1; 1148 } 1149 return 0; 1150 } 1151 1152 s->config.page_size_mask &= new_mask; 1153 return 0; 1154 } 1155 1156 static void virtio_iommu_system_reset(void *opaque) 1157 { 1158 VirtIOIOMMU *s = opaque; 1159 1160 trace_virtio_iommu_system_reset(); 1161 1162 /* 1163 * config.bypass is sticky across device reset, but should be restored on 1164 * system reset 1165 */ 1166 s->config.bypass = s->boot_bypass; 1167 virtio_iommu_switch_address_space_all(s); 1168 1169 } 1170 1171 static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) 1172 { 1173 VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); 1174 int granule; 1175 1176 if (likely(s->config.bypass)) { 1177 /* 1178 * Transient IOMMU MR enable to collect page_size_mask requirements 1179 * through memory_region_iommu_set_page_size_mask() called by 1180 * VFIO region_add() callback 1181 */ 1182 s->config.bypass = false; 1183 virtio_iommu_switch_address_space_all(s); 1184 /* restore default */ 1185 s->config.bypass = true; 1186 virtio_iommu_switch_address_space_all(s); 1187 } 1188 s->granule_frozen = true; 1189 granule = ctz64(s->config.page_size_mask); 1190 trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); 1191 } 1192 1193 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) 1194 { 1195 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1196 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1197 1198 virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); 1199 1200 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); 1201 1202 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, 1203 virtio_iommu_handle_command); 1204 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); 1205 1206 /* 1207 * config.bypass is needed to get initial address space early, such as 1208 * in vfio realize 1209 */ 1210 s->config.bypass = s->boot_bypass; 1211 s->config.page_size_mask = qemu_target_page_mask(); 1212 s->config.input_range.end = UINT64_MAX; 1213 s->config.domain_range.end = UINT32_MAX; 1214 s->config.probe_size = VIOMMU_PROBE_SIZE; 1215 1216 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); 1217 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); 1218 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); 1219 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); 1220 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); 1221 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); 1222 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); 1223 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); 1224 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); 1225 1226 qemu_rec_mutex_init(&s->mutex); 1227 1228 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); 1229 1230 if (s->primary_bus) { 1231 pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); 1232 } else { 1233 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); 1234 } 1235 1236 s->machine_done.notify = virtio_iommu_freeze_granule; 1237 qemu_add_machine_init_done_notifier(&s->machine_done); 1238 1239 qemu_register_reset(virtio_iommu_system_reset, s); 1240 } 1241 1242 static void virtio_iommu_device_unrealize(DeviceState *dev) 1243 { 1244 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1245 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1246 1247 qemu_unregister_reset(virtio_iommu_system_reset, s); 1248 qemu_remove_machine_init_done_notifier(&s->machine_done); 1249 1250 g_hash_table_destroy(s->as_by_busptr); 1251 if (s->domains) { 1252 g_tree_destroy(s->domains); 1253 } 1254 if (s->endpoints) { 1255 g_tree_destroy(s->endpoints); 1256 } 1257 1258 qemu_rec_mutex_destroy(&s->mutex); 1259 1260 virtio_delete_queue(s->req_vq); 1261 virtio_delete_queue(s->event_vq); 1262 virtio_cleanup(vdev); 1263 } 1264 1265 static void virtio_iommu_device_reset(VirtIODevice *vdev) 1266 { 1267 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1268 1269 trace_virtio_iommu_device_reset(); 1270 1271 if (s->domains) { 1272 g_tree_destroy(s->domains); 1273 } 1274 if (s->endpoints) { 1275 g_tree_destroy(s->endpoints); 1276 } 1277 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, 1278 NULL, NULL, virtio_iommu_put_domain); 1279 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, 1280 NULL, NULL, virtio_iommu_put_endpoint); 1281 } 1282 1283 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) 1284 { 1285 trace_virtio_iommu_device_status(status); 1286 } 1287 1288 static void virtio_iommu_instance_init(Object *obj) 1289 { 1290 } 1291 1292 #define VMSTATE_INTERVAL \ 1293 { \ 1294 .name = "interval", \ 1295 .version_id = 1, \ 1296 .minimum_version_id = 1, \ 1297 .fields = (VMStateField[]) { \ 1298 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ 1299 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ 1300 VMSTATE_END_OF_LIST() \ 1301 } \ 1302 } 1303 1304 #define VMSTATE_MAPPING \ 1305 { \ 1306 .name = "mapping", \ 1307 .version_id = 1, \ 1308 .minimum_version_id = 1, \ 1309 .fields = (VMStateField[]) { \ 1310 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ 1311 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ 1312 VMSTATE_END_OF_LIST() \ 1313 }, \ 1314 } 1315 1316 static const VMStateDescription vmstate_interval_mapping[2] = { 1317 VMSTATE_MAPPING, /* value */ 1318 VMSTATE_INTERVAL /* key */ 1319 }; 1320 1321 static int domain_preload(void *opaque) 1322 { 1323 VirtIOIOMMUDomain *domain = opaque; 1324 1325 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 1326 NULL, g_free, g_free); 1327 return 0; 1328 } 1329 1330 static const VMStateDescription vmstate_endpoint = { 1331 .name = "endpoint", 1332 .version_id = 1, 1333 .minimum_version_id = 1, 1334 .fields = (VMStateField[]) { 1335 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), 1336 VMSTATE_END_OF_LIST() 1337 } 1338 }; 1339 1340 static const VMStateDescription vmstate_domain = { 1341 .name = "domain", 1342 .version_id = 2, 1343 .minimum_version_id = 2, 1344 .pre_load = domain_preload, 1345 .fields = (VMStateField[]) { 1346 VMSTATE_UINT32(id, VirtIOIOMMUDomain), 1347 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, 1348 vmstate_interval_mapping, 1349 VirtIOIOMMUInterval, VirtIOIOMMUMapping), 1350 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, 1351 vmstate_endpoint, VirtIOIOMMUEndpoint, next), 1352 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2), 1353 VMSTATE_END_OF_LIST() 1354 } 1355 }; 1356 1357 static gboolean reconstruct_endpoints(gpointer key, gpointer value, 1358 gpointer data) 1359 { 1360 VirtIOIOMMU *s = (VirtIOIOMMU *)data; 1361 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; 1362 VirtIOIOMMUEndpoint *iter; 1363 IOMMUMemoryRegion *mr; 1364 1365 QLIST_FOREACH(iter, &d->endpoint_list, next) { 1366 mr = virtio_iommu_mr(s, iter->id); 1367 assert(mr); 1368 1369 iter->domain = d; 1370 iter->iommu_mr = mr; 1371 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); 1372 } 1373 return false; /* continue the domain traversal */ 1374 } 1375 1376 static int iommu_post_load(void *opaque, int version_id) 1377 { 1378 VirtIOIOMMU *s = opaque; 1379 1380 g_tree_foreach(s->domains, reconstruct_endpoints, s); 1381 1382 /* 1383 * Memory regions are dynamically turned on/off depending on 1384 * 'config.bypass' and attached domain type if there is. After 1385 * migration, we need to make sure the memory regions are 1386 * still correct. 1387 */ 1388 virtio_iommu_switch_address_space_all(s); 1389 return 0; 1390 } 1391 1392 static const VMStateDescription vmstate_virtio_iommu_device = { 1393 .name = "virtio-iommu-device", 1394 .minimum_version_id = 2, 1395 .version_id = 2, 1396 .post_load = iommu_post_load, 1397 .fields = (VMStateField[]) { 1398 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2, 1399 &vmstate_domain, VirtIOIOMMUDomain), 1400 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2), 1401 VMSTATE_END_OF_LIST() 1402 }, 1403 }; 1404 1405 static const VMStateDescription vmstate_virtio_iommu = { 1406 .name = "virtio-iommu", 1407 .minimum_version_id = 2, 1408 .priority = MIG_PRI_IOMMU, 1409 .version_id = 2, 1410 .fields = (VMStateField[]) { 1411 VMSTATE_VIRTIO_DEVICE, 1412 VMSTATE_END_OF_LIST() 1413 }, 1414 }; 1415 1416 static Property virtio_iommu_properties[] = { 1417 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, 1418 TYPE_PCI_BUS, PCIBus *), 1419 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true), 1420 DEFINE_PROP_END_OF_LIST(), 1421 }; 1422 1423 static void virtio_iommu_class_init(ObjectClass *klass, void *data) 1424 { 1425 DeviceClass *dc = DEVICE_CLASS(klass); 1426 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1427 1428 device_class_set_props(dc, virtio_iommu_properties); 1429 dc->vmsd = &vmstate_virtio_iommu; 1430 1431 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1432 vdc->realize = virtio_iommu_device_realize; 1433 vdc->unrealize = virtio_iommu_device_unrealize; 1434 vdc->reset = virtio_iommu_device_reset; 1435 vdc->get_config = virtio_iommu_get_config; 1436 vdc->set_config = virtio_iommu_set_config; 1437 vdc->get_features = virtio_iommu_get_features; 1438 vdc->set_status = virtio_iommu_set_status; 1439 vdc->vmsd = &vmstate_virtio_iommu_device; 1440 } 1441 1442 static void virtio_iommu_memory_region_class_init(ObjectClass *klass, 1443 void *data) 1444 { 1445 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1446 1447 imrc->translate = virtio_iommu_translate; 1448 imrc->replay = virtio_iommu_replay; 1449 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; 1450 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; 1451 } 1452 1453 static const TypeInfo virtio_iommu_info = { 1454 .name = TYPE_VIRTIO_IOMMU, 1455 .parent = TYPE_VIRTIO_DEVICE, 1456 .instance_size = sizeof(VirtIOIOMMU), 1457 .instance_init = virtio_iommu_instance_init, 1458 .class_init = virtio_iommu_class_init, 1459 }; 1460 1461 static const TypeInfo virtio_iommu_memory_region_info = { 1462 .parent = TYPE_IOMMU_MEMORY_REGION, 1463 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, 1464 .class_init = virtio_iommu_memory_region_class_init, 1465 }; 1466 1467 static void virtio_register_types(void) 1468 { 1469 type_register_static(&virtio_iommu_info); 1470 type_register_static(&virtio_iommu_memory_region_info); 1471 } 1472 1473 type_init(virtio_register_types) 1474