1 /* 2 * virtio-iommu device 3 * 4 * Copyright (c) 2020 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "qemu/iov.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/virtio/virtio.h" 25 #include "sysemu/kvm.h" 26 #include "sysemu/reset.h" 27 #include "qapi/error.h" 28 #include "qemu/error-report.h" 29 #include "trace.h" 30 31 #include "standard-headers/linux/virtio_ids.h" 32 33 #include "hw/virtio/virtio-bus.h" 34 #include "hw/virtio/virtio-access.h" 35 #include "hw/virtio/virtio-iommu.h" 36 #include "hw/pci/pci_bus.h" 37 #include "hw/pci/pci.h" 38 39 /* Max size */ 40 #define VIOMMU_DEFAULT_QUEUE_SIZE 256 41 #define VIOMMU_PROBE_SIZE 512 42 43 typedef struct VirtIOIOMMUDomain { 44 uint32_t id; 45 bool bypass; 46 GTree *mappings; 47 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; 48 } VirtIOIOMMUDomain; 49 50 typedef struct VirtIOIOMMUEndpoint { 51 uint32_t id; 52 VirtIOIOMMUDomain *domain; 53 IOMMUMemoryRegion *iommu_mr; 54 QLIST_ENTRY(VirtIOIOMMUEndpoint) next; 55 } VirtIOIOMMUEndpoint; 56 57 typedef struct VirtIOIOMMUInterval { 58 uint64_t low; 59 uint64_t high; 60 } VirtIOIOMMUInterval; 61 62 typedef struct VirtIOIOMMUMapping { 63 uint64_t phys_addr; 64 uint32_t flags; 65 } VirtIOIOMMUMapping; 66 67 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) 68 { 69 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); 70 } 71 72 static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) 73 { 74 uint32_t sid; 75 bool bypassed; 76 VirtIOIOMMU *s = sdev->viommu; 77 VirtIOIOMMUEndpoint *ep; 78 79 sid = virtio_iommu_get_bdf(sdev); 80 81 qemu_rec_mutex_lock(&s->mutex); 82 /* need to check bypass before system reset */ 83 if (!s->endpoints) { 84 bypassed = s->config.bypass; 85 goto unlock; 86 } 87 88 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 89 if (!ep || !ep->domain) { 90 bypassed = s->config.bypass; 91 } else { 92 bypassed = ep->domain->bypass; 93 } 94 95 unlock: 96 qemu_rec_mutex_unlock(&s->mutex); 97 return bypassed; 98 } 99 100 /* Return whether the device is using IOMMU translation. */ 101 static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) 102 { 103 bool use_remapping; 104 105 assert(sdev); 106 107 use_remapping = !virtio_iommu_device_bypassed(sdev); 108 109 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), 110 PCI_SLOT(sdev->devfn), 111 PCI_FUNC(sdev->devfn), 112 use_remapping); 113 114 /* Turn off first then on the other */ 115 if (use_remapping) { 116 memory_region_set_enabled(&sdev->bypass_mr, false); 117 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); 118 } else { 119 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); 120 memory_region_set_enabled(&sdev->bypass_mr, true); 121 } 122 123 return use_remapping; 124 } 125 126 static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) 127 { 128 GHashTableIter iter; 129 IOMMUPciBus *iommu_pci_bus; 130 int i; 131 132 g_hash_table_iter_init(&iter, s->as_by_busptr); 133 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 134 for (i = 0; i < PCI_DEVFN_MAX; i++) { 135 if (!iommu_pci_bus->pbdev[i]) { 136 continue; 137 } 138 virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); 139 } 140 } 141 } 142 143 /** 144 * The bus number is used for lookup when SID based operations occur. 145 * In that case we lazily populate the IOMMUPciBus array from the bus hash 146 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus 147 * numbers may not be always initialized yet. 148 */ 149 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) 150 { 151 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; 152 153 if (!iommu_pci_bus) { 154 GHashTableIter iter; 155 156 g_hash_table_iter_init(&iter, s->as_by_busptr); 157 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 158 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { 159 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; 160 return iommu_pci_bus; 161 } 162 } 163 return NULL; 164 } 165 return iommu_pci_bus; 166 } 167 168 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) 169 { 170 uint8_t bus_n, devfn; 171 IOMMUPciBus *iommu_pci_bus; 172 IOMMUDevice *dev; 173 174 bus_n = PCI_BUS_NUM(sid); 175 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); 176 if (iommu_pci_bus) { 177 devfn = sid & (PCI_DEVFN_MAX - 1); 178 dev = iommu_pci_bus->pbdev[devfn]; 179 if (dev) { 180 return &dev->iommu_mr; 181 } 182 } 183 return NULL; 184 } 185 186 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 187 { 188 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; 189 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; 190 191 if (inta->high < intb->low) { 192 return -1; 193 } else if (intb->high < inta->low) { 194 return 1; 195 } else { 196 return 0; 197 } 198 } 199 200 static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr, 201 IOMMUTLBEvent *event, 202 hwaddr virt_start, hwaddr virt_end) 203 { 204 uint64_t delta = virt_end - virt_start; 205 206 event->entry.iova = virt_start; 207 event->entry.addr_mask = delta; 208 209 if (delta == UINT64_MAX) { 210 memory_region_notify_iommu(mr, 0, *event); 211 } 212 213 while (virt_start != virt_end + 1) { 214 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); 215 216 event->entry.addr_mask = mask; 217 event->entry.iova = virt_start; 218 memory_region_notify_iommu(mr, 0, *event); 219 virt_start += mask + 1; 220 if (event->entry.perm != IOMMU_NONE) { 221 event->entry.translated_addr += mask + 1; 222 } 223 } 224 } 225 226 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, 227 hwaddr virt_end, hwaddr paddr, 228 uint32_t flags) 229 { 230 IOMMUTLBEvent event; 231 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, 232 flags & VIRTIO_IOMMU_MAP_F_WRITE); 233 234 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) || 235 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) { 236 return; 237 } 238 239 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, 240 paddr, perm); 241 242 event.type = IOMMU_NOTIFIER_MAP; 243 event.entry.target_as = &address_space_memory; 244 event.entry.perm = perm; 245 event.entry.translated_addr = paddr; 246 247 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 248 } 249 250 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, 251 hwaddr virt_end) 252 { 253 IOMMUTLBEvent event; 254 255 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { 256 return; 257 } 258 259 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); 260 261 event.type = IOMMU_NOTIFIER_UNMAP; 262 event.entry.target_as = &address_space_memory; 263 event.entry.perm = IOMMU_NONE; 264 event.entry.translated_addr = 0; 265 266 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 267 } 268 269 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, 270 gpointer data) 271 { 272 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 273 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 274 275 virtio_iommu_notify_unmap(mr, interval->low, interval->high); 276 277 return false; 278 } 279 280 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, 281 gpointer data) 282 { 283 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 284 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 285 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 286 287 virtio_iommu_notify_map(mr, interval->low, interval->high, 288 mapping->phys_addr, mapping->flags); 289 290 return false; 291 } 292 293 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) 294 { 295 VirtIOIOMMUDomain *domain = ep->domain; 296 IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 297 298 if (!ep->domain) { 299 return; 300 } 301 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, 302 ep->iommu_mr); 303 QLIST_REMOVE(ep, next); 304 ep->domain = NULL; 305 virtio_iommu_switch_address_space(sdev); 306 } 307 308 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, 309 uint32_t ep_id) 310 { 311 VirtIOIOMMUEndpoint *ep; 312 IOMMUMemoryRegion *mr; 313 314 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 315 if (ep) { 316 return ep; 317 } 318 mr = virtio_iommu_mr(s, ep_id); 319 if (!mr) { 320 return NULL; 321 } 322 ep = g_malloc0(sizeof(*ep)); 323 ep->id = ep_id; 324 ep->iommu_mr = mr; 325 trace_virtio_iommu_get_endpoint(ep_id); 326 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); 327 return ep; 328 } 329 330 static void virtio_iommu_put_endpoint(gpointer data) 331 { 332 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; 333 334 if (ep->domain) { 335 virtio_iommu_detach_endpoint_from_domain(ep); 336 } 337 338 trace_virtio_iommu_put_endpoint(ep->id); 339 g_free(ep); 340 } 341 342 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, 343 uint32_t domain_id, 344 bool bypass) 345 { 346 VirtIOIOMMUDomain *domain; 347 348 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 349 if (domain) { 350 if (domain->bypass != bypass) { 351 return NULL; 352 } 353 return domain; 354 } 355 domain = g_malloc0(sizeof(*domain)); 356 domain->id = domain_id; 357 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 358 NULL, (GDestroyNotify)g_free, 359 (GDestroyNotify)g_free); 360 domain->bypass = bypass; 361 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); 362 QLIST_INIT(&domain->endpoint_list); 363 trace_virtio_iommu_get_domain(domain_id); 364 return domain; 365 } 366 367 static void virtio_iommu_put_domain(gpointer data) 368 { 369 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; 370 VirtIOIOMMUEndpoint *iter, *tmp; 371 372 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { 373 virtio_iommu_detach_endpoint_from_domain(iter); 374 } 375 g_tree_destroy(domain->mappings); 376 trace_virtio_iommu_put_domain(domain->id); 377 g_free(domain); 378 } 379 380 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, 381 int devfn) 382 { 383 VirtIOIOMMU *s = opaque; 384 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 385 static uint32_t mr_index; 386 IOMMUDevice *sdev; 387 388 if (!sbus) { 389 sbus = g_malloc0(sizeof(IOMMUPciBus) + 390 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); 391 sbus->bus = bus; 392 g_hash_table_insert(s->as_by_busptr, bus, sbus); 393 } 394 395 sdev = sbus->pbdev[devfn]; 396 if (!sdev) { 397 char *name = g_strdup_printf("%s-%d-%d", 398 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 399 mr_index++, devfn); 400 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1); 401 402 sdev->viommu = s; 403 sdev->bus = bus; 404 sdev->devfn = devfn; 405 406 trace_virtio_iommu_init_iommu_mr(name); 407 408 memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); 409 address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); 410 411 /* 412 * Build the IOMMU disabled container with aliases to the 413 * shared MRs. Note that aliasing to a shared memory region 414 * could help the memory API to detect same FlatViews so we 415 * can have devices to share the same FlatView when in bypass 416 * mode. (either by not configuring virtio-iommu driver or with 417 * "iommu=pt"). It will greatly reduce the total number of 418 * FlatViews of the system hence VM runs faster. 419 */ 420 memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), 421 "system", get_system_memory(), 0, 422 memory_region_size(get_system_memory())); 423 424 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), 425 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 426 OBJECT(s), name, 427 UINT64_MAX); 428 429 /* 430 * Hook both the containers under the root container, we 431 * switch between iommu & bypass MRs by enable/disable 432 * corresponding sub-containers 433 */ 434 memory_region_add_subregion_overlap(&sdev->root, 0, 435 MEMORY_REGION(&sdev->iommu_mr), 436 0); 437 memory_region_add_subregion_overlap(&sdev->root, 0, 438 &sdev->bypass_mr, 0); 439 440 virtio_iommu_switch_address_space(sdev); 441 g_free(name); 442 } 443 return &sdev->as; 444 } 445 446 static int virtio_iommu_attach(VirtIOIOMMU *s, 447 struct virtio_iommu_req_attach *req) 448 { 449 uint32_t domain_id = le32_to_cpu(req->domain); 450 uint32_t ep_id = le32_to_cpu(req->endpoint); 451 uint32_t flags = le32_to_cpu(req->flags); 452 VirtIOIOMMUDomain *domain; 453 VirtIOIOMMUEndpoint *ep; 454 IOMMUDevice *sdev; 455 456 trace_virtio_iommu_attach(domain_id, ep_id); 457 458 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) { 459 return VIRTIO_IOMMU_S_INVAL; 460 } 461 462 ep = virtio_iommu_get_endpoint(s, ep_id); 463 if (!ep) { 464 return VIRTIO_IOMMU_S_NOENT; 465 } 466 467 if (ep->domain) { 468 VirtIOIOMMUDomain *previous_domain = ep->domain; 469 /* 470 * the device is already attached to a domain, 471 * detach it first 472 */ 473 virtio_iommu_detach_endpoint_from_domain(ep); 474 if (QLIST_EMPTY(&previous_domain->endpoint_list)) { 475 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); 476 } 477 } 478 479 domain = virtio_iommu_get_domain(s, domain_id, 480 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS); 481 if (!domain) { 482 /* Incompatible bypass flag */ 483 return VIRTIO_IOMMU_S_INVAL; 484 } 485 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); 486 487 ep->domain = domain; 488 sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 489 virtio_iommu_switch_address_space(sdev); 490 491 /* Replay domain mappings on the associated memory region */ 492 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, 493 ep->iommu_mr); 494 495 return VIRTIO_IOMMU_S_OK; 496 } 497 498 static int virtio_iommu_detach(VirtIOIOMMU *s, 499 struct virtio_iommu_req_detach *req) 500 { 501 uint32_t domain_id = le32_to_cpu(req->domain); 502 uint32_t ep_id = le32_to_cpu(req->endpoint); 503 VirtIOIOMMUDomain *domain; 504 VirtIOIOMMUEndpoint *ep; 505 506 trace_virtio_iommu_detach(domain_id, ep_id); 507 508 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 509 if (!ep) { 510 return VIRTIO_IOMMU_S_NOENT; 511 } 512 513 domain = ep->domain; 514 515 if (!domain || domain->id != domain_id) { 516 return VIRTIO_IOMMU_S_INVAL; 517 } 518 519 virtio_iommu_detach_endpoint_from_domain(ep); 520 521 if (QLIST_EMPTY(&domain->endpoint_list)) { 522 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); 523 } 524 return VIRTIO_IOMMU_S_OK; 525 } 526 527 static int virtio_iommu_map(VirtIOIOMMU *s, 528 struct virtio_iommu_req_map *req) 529 { 530 uint32_t domain_id = le32_to_cpu(req->domain); 531 uint64_t phys_start = le64_to_cpu(req->phys_start); 532 uint64_t virt_start = le64_to_cpu(req->virt_start); 533 uint64_t virt_end = le64_to_cpu(req->virt_end); 534 uint32_t flags = le32_to_cpu(req->flags); 535 VirtIOIOMMUDomain *domain; 536 VirtIOIOMMUInterval *interval; 537 VirtIOIOMMUMapping *mapping; 538 VirtIOIOMMUEndpoint *ep; 539 540 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { 541 return VIRTIO_IOMMU_S_INVAL; 542 } 543 544 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 545 if (!domain) { 546 return VIRTIO_IOMMU_S_NOENT; 547 } 548 549 if (domain->bypass) { 550 return VIRTIO_IOMMU_S_INVAL; 551 } 552 553 interval = g_malloc0(sizeof(*interval)); 554 555 interval->low = virt_start; 556 interval->high = virt_end; 557 558 mapping = g_tree_lookup(domain->mappings, (gpointer)interval); 559 if (mapping) { 560 g_free(interval); 561 return VIRTIO_IOMMU_S_INVAL; 562 } 563 564 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); 565 566 mapping = g_malloc0(sizeof(*mapping)); 567 mapping->phys_addr = phys_start; 568 mapping->flags = flags; 569 570 g_tree_insert(domain->mappings, interval, mapping); 571 572 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 573 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start, 574 flags); 575 } 576 577 return VIRTIO_IOMMU_S_OK; 578 } 579 580 static int virtio_iommu_unmap(VirtIOIOMMU *s, 581 struct virtio_iommu_req_unmap *req) 582 { 583 uint32_t domain_id = le32_to_cpu(req->domain); 584 uint64_t virt_start = le64_to_cpu(req->virt_start); 585 uint64_t virt_end = le64_to_cpu(req->virt_end); 586 VirtIOIOMMUMapping *iter_val; 587 VirtIOIOMMUInterval interval, *iter_key; 588 VirtIOIOMMUDomain *domain; 589 VirtIOIOMMUEndpoint *ep; 590 int ret = VIRTIO_IOMMU_S_OK; 591 592 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); 593 594 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 595 if (!domain) { 596 return VIRTIO_IOMMU_S_NOENT; 597 } 598 599 if (domain->bypass) { 600 return VIRTIO_IOMMU_S_INVAL; 601 } 602 603 interval.low = virt_start; 604 interval.high = virt_end; 605 606 while (g_tree_lookup_extended(domain->mappings, &interval, 607 (void **)&iter_key, (void**)&iter_val)) { 608 uint64_t current_low = iter_key->low; 609 uint64_t current_high = iter_key->high; 610 611 if (interval.low <= current_low && interval.high >= current_high) { 612 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 613 virtio_iommu_notify_unmap(ep->iommu_mr, current_low, 614 current_high); 615 } 616 g_tree_remove(domain->mappings, iter_key); 617 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); 618 } else { 619 ret = VIRTIO_IOMMU_S_RANGE; 620 break; 621 } 622 } 623 return ret; 624 } 625 626 static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep, 627 uint8_t *buf, size_t free) 628 { 629 struct virtio_iommu_probe_resv_mem prop = {}; 630 size_t size = sizeof(prop), length = size - sizeof(prop.head), total; 631 int i; 632 633 total = size * s->nb_reserved_regions; 634 635 if (total > free) { 636 return -ENOSPC; 637 } 638 639 for (i = 0; i < s->nb_reserved_regions; i++) { 640 unsigned subtype = s->reserved_regions[i].type; 641 642 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || 643 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); 644 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); 645 prop.head.length = cpu_to_le16(length); 646 prop.subtype = subtype; 647 prop.start = cpu_to_le64(s->reserved_regions[i].low); 648 prop.end = cpu_to_le64(s->reserved_regions[i].high); 649 650 memcpy(buf, &prop, size); 651 652 trace_virtio_iommu_fill_resv_property(ep, prop.subtype, 653 prop.start, prop.end); 654 buf += size; 655 } 656 return total; 657 } 658 659 /** 660 * virtio_iommu_probe - Fill the probe request buffer with 661 * the properties the device is able to return 662 */ 663 static int virtio_iommu_probe(VirtIOIOMMU *s, 664 struct virtio_iommu_req_probe *req, 665 uint8_t *buf) 666 { 667 uint32_t ep_id = le32_to_cpu(req->endpoint); 668 size_t free = VIOMMU_PROBE_SIZE; 669 ssize_t count; 670 671 if (!virtio_iommu_mr(s, ep_id)) { 672 return VIRTIO_IOMMU_S_NOENT; 673 } 674 675 count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free); 676 if (count < 0) { 677 return VIRTIO_IOMMU_S_INVAL; 678 } 679 buf += count; 680 free -= count; 681 682 return VIRTIO_IOMMU_S_OK; 683 } 684 685 static int virtio_iommu_iov_to_req(struct iovec *iov, 686 unsigned int iov_cnt, 687 void *req, size_t payload_sz) 688 { 689 size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); 690 691 if (unlikely(sz != payload_sz)) { 692 return VIRTIO_IOMMU_S_INVAL; 693 } 694 return 0; 695 } 696 697 #define virtio_iommu_handle_req(__req) \ 698 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ 699 struct iovec *iov, \ 700 unsigned int iov_cnt) \ 701 { \ 702 struct virtio_iommu_req_ ## __req req; \ 703 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ 704 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ 705 \ 706 return ret ? ret : virtio_iommu_ ## __req(s, &req); \ 707 } 708 709 virtio_iommu_handle_req(attach) 710 virtio_iommu_handle_req(detach) 711 virtio_iommu_handle_req(map) 712 virtio_iommu_handle_req(unmap) 713 714 static int virtio_iommu_handle_probe(VirtIOIOMMU *s, 715 struct iovec *iov, 716 unsigned int iov_cnt, 717 uint8_t *buf) 718 { 719 struct virtio_iommu_req_probe req; 720 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); 721 722 return ret ? ret : virtio_iommu_probe(s, &req, buf); 723 } 724 725 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) 726 { 727 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 728 struct virtio_iommu_req_head head; 729 struct virtio_iommu_req_tail tail = {}; 730 size_t output_size = sizeof(tail), sz; 731 VirtQueueElement *elem; 732 unsigned int iov_cnt; 733 struct iovec *iov; 734 void *buf = NULL; 735 736 for (;;) { 737 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 738 if (!elem) { 739 return; 740 } 741 742 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || 743 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { 744 virtio_error(vdev, "virtio-iommu bad head/tail size"); 745 virtqueue_detach_element(vq, elem, 0); 746 g_free(elem); 747 break; 748 } 749 750 iov_cnt = elem->out_num; 751 iov = elem->out_sg; 752 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); 753 if (unlikely(sz != sizeof(head))) { 754 tail.status = VIRTIO_IOMMU_S_DEVERR; 755 goto out; 756 } 757 qemu_rec_mutex_lock(&s->mutex); 758 switch (head.type) { 759 case VIRTIO_IOMMU_T_ATTACH: 760 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); 761 break; 762 case VIRTIO_IOMMU_T_DETACH: 763 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); 764 break; 765 case VIRTIO_IOMMU_T_MAP: 766 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); 767 break; 768 case VIRTIO_IOMMU_T_UNMAP: 769 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); 770 break; 771 case VIRTIO_IOMMU_T_PROBE: 772 { 773 struct virtio_iommu_req_tail *ptail; 774 775 output_size = s->config.probe_size + sizeof(tail); 776 buf = g_malloc0(output_size); 777 778 ptail = buf + s->config.probe_size; 779 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf); 780 break; 781 } 782 default: 783 tail.status = VIRTIO_IOMMU_S_UNSUPP; 784 } 785 qemu_rec_mutex_unlock(&s->mutex); 786 787 out: 788 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 789 buf ? buf : &tail, output_size); 790 assert(sz == output_size); 791 792 virtqueue_push(vq, elem, sz); 793 virtio_notify(vdev, vq); 794 g_free(elem); 795 g_free(buf); 796 buf = NULL; 797 } 798 } 799 800 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, 801 int flags, uint32_t endpoint, 802 uint64_t address) 803 { 804 VirtIODevice *vdev = &viommu->parent_obj; 805 VirtQueue *vq = viommu->event_vq; 806 struct virtio_iommu_fault fault; 807 VirtQueueElement *elem; 808 size_t sz; 809 810 memset(&fault, 0, sizeof(fault)); 811 fault.reason = reason; 812 fault.flags = cpu_to_le32(flags); 813 fault.endpoint = cpu_to_le32(endpoint); 814 fault.address = cpu_to_le64(address); 815 816 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 817 818 if (!elem) { 819 error_report_once( 820 "no buffer available in event queue to report event"); 821 return; 822 } 823 824 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { 825 virtio_error(vdev, "error buffer of wrong size"); 826 virtqueue_detach_element(vq, elem, 0); 827 g_free(elem); 828 return; 829 } 830 831 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 832 &fault, sizeof(fault)); 833 assert(sz == sizeof(fault)); 834 835 trace_virtio_iommu_report_fault(reason, flags, endpoint, address); 836 virtqueue_push(vq, elem, sz); 837 virtio_notify(vdev, vq); 838 g_free(elem); 839 840 } 841 842 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, 843 IOMMUAccessFlags flag, 844 int iommu_idx) 845 { 846 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 847 VirtIOIOMMUInterval interval, *mapping_key; 848 VirtIOIOMMUMapping *mapping_value; 849 VirtIOIOMMU *s = sdev->viommu; 850 bool read_fault, write_fault; 851 VirtIOIOMMUEndpoint *ep; 852 uint32_t sid, flags; 853 bool bypass_allowed; 854 bool found; 855 int i; 856 857 interval.low = addr; 858 interval.high = addr + 1; 859 860 IOMMUTLBEntry entry = { 861 .target_as = &address_space_memory, 862 .iova = addr, 863 .translated_addr = addr, 864 .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, 865 .perm = IOMMU_NONE, 866 }; 867 868 bypass_allowed = s->config.bypass; 869 870 sid = virtio_iommu_get_bdf(sdev); 871 872 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); 873 qemu_rec_mutex_lock(&s->mutex); 874 875 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 876 877 if (bypass_allowed) 878 assert(ep && ep->domain && !ep->domain->bypass); 879 880 if (!ep) { 881 if (!bypass_allowed) { 882 error_report_once("%s sid=%d is not known!!", __func__, sid); 883 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, 884 VIRTIO_IOMMU_FAULT_F_ADDRESS, 885 sid, addr); 886 } else { 887 entry.perm = flag; 888 } 889 goto unlock; 890 } 891 892 for (i = 0; i < s->nb_reserved_regions; i++) { 893 ReservedRegion *reg = &s->reserved_regions[i]; 894 895 if (addr >= reg->low && addr <= reg->high) { 896 switch (reg->type) { 897 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 898 entry.perm = flag; 899 break; 900 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 901 default: 902 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 903 VIRTIO_IOMMU_FAULT_F_ADDRESS, 904 sid, addr); 905 break; 906 } 907 goto unlock; 908 } 909 } 910 911 if (!ep->domain) { 912 if (!bypass_allowed) { 913 error_report_once("%s %02x:%02x.%01x not attached to any domain", 914 __func__, PCI_BUS_NUM(sid), 915 PCI_SLOT(sid), PCI_FUNC(sid)); 916 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, 917 VIRTIO_IOMMU_FAULT_F_ADDRESS, 918 sid, addr); 919 } else { 920 entry.perm = flag; 921 } 922 goto unlock; 923 } else if (ep->domain->bypass) { 924 entry.perm = flag; 925 goto unlock; 926 } 927 928 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), 929 (void **)&mapping_key, 930 (void **)&mapping_value); 931 if (!found) { 932 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", 933 __func__, addr, sid); 934 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 935 VIRTIO_IOMMU_FAULT_F_ADDRESS, 936 sid, addr); 937 goto unlock; 938 } 939 940 read_fault = (flag & IOMMU_RO) && 941 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); 942 write_fault = (flag & IOMMU_WO) && 943 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); 944 945 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; 946 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; 947 if (flags) { 948 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", 949 __func__, addr, flag, mapping_value->flags); 950 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; 951 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 952 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, 953 sid, addr); 954 goto unlock; 955 } 956 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; 957 entry.perm = flag; 958 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); 959 960 unlock: 961 qemu_rec_mutex_unlock(&s->mutex); 962 return entry; 963 } 964 965 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) 966 { 967 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 968 struct virtio_iommu_config *dev_config = &dev->config; 969 struct virtio_iommu_config *out_config = (void *)config_data; 970 971 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask); 972 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start); 973 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end); 974 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start); 975 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end); 976 out_config->probe_size = cpu_to_le32(dev_config->probe_size); 977 out_config->bypass = dev_config->bypass; 978 979 trace_virtio_iommu_get_config(dev_config->page_size_mask, 980 dev_config->input_range.start, 981 dev_config->input_range.end, 982 dev_config->domain_range.start, 983 dev_config->domain_range.end, 984 dev_config->probe_size, 985 dev_config->bypass); 986 } 987 988 static void virtio_iommu_set_config(VirtIODevice *vdev, 989 const uint8_t *config_data) 990 { 991 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 992 struct virtio_iommu_config *dev_config = &dev->config; 993 const struct virtio_iommu_config *in_config = (void *)config_data; 994 995 if (in_config->bypass != dev_config->bypass) { 996 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 997 virtio_error(vdev, "cannot set config.bypass"); 998 return; 999 } else if (in_config->bypass != 0 && in_config->bypass != 1) { 1000 virtio_error(vdev, "invalid config.bypass value '%u'", 1001 in_config->bypass); 1002 return; 1003 } 1004 dev_config->bypass = in_config->bypass; 1005 virtio_iommu_switch_address_space_all(dev); 1006 } 1007 1008 trace_virtio_iommu_set_config(in_config->bypass); 1009 } 1010 1011 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, 1012 Error **errp) 1013 { 1014 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1015 1016 f |= dev->features; 1017 trace_virtio_iommu_get_features(f); 1018 return f; 1019 } 1020 1021 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 1022 { 1023 guint ua = GPOINTER_TO_UINT(a); 1024 guint ub = GPOINTER_TO_UINT(b); 1025 return (ua > ub) - (ua < ub); 1026 } 1027 1028 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data) 1029 { 1030 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 1031 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 1032 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 1033 1034 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high, 1035 mapping->phys_addr); 1036 virtio_iommu_notify_map(mr, interval->low, interval->high, 1037 mapping->phys_addr, mapping->flags); 1038 return false; 1039 } 1040 1041 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) 1042 { 1043 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1044 VirtIOIOMMU *s = sdev->viommu; 1045 uint32_t sid; 1046 VirtIOIOMMUEndpoint *ep; 1047 1048 sid = virtio_iommu_get_bdf(sdev); 1049 1050 qemu_rec_mutex_lock(&s->mutex); 1051 1052 if (!s->endpoints) { 1053 goto unlock; 1054 } 1055 1056 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 1057 if (!ep || !ep->domain) { 1058 goto unlock; 1059 } 1060 1061 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); 1062 1063 unlock: 1064 qemu_rec_mutex_unlock(&s->mutex); 1065 } 1066 1067 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, 1068 IOMMUNotifierFlag old, 1069 IOMMUNotifierFlag new, 1070 Error **errp) 1071 { 1072 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { 1073 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); 1074 return -EINVAL; 1075 } 1076 1077 if (old == IOMMU_NOTIFIER_NONE) { 1078 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); 1079 } else if (new == IOMMU_NOTIFIER_NONE) { 1080 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name); 1081 } 1082 return 0; 1083 } 1084 1085 /* 1086 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule, 1087 * for example 0xfffffffffffff000. When an assigned device has page size 1088 * restrictions due to the hardware IOMMU configuration, apply this restriction 1089 * to the mask. 1090 */ 1091 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, 1092 uint64_t new_mask, 1093 Error **errp) 1094 { 1095 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1096 VirtIOIOMMU *s = sdev->viommu; 1097 uint64_t cur_mask = s->config.page_size_mask; 1098 1099 trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, 1100 new_mask); 1101 1102 if ((cur_mask & new_mask) == 0) { 1103 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 1104 " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); 1105 return -1; 1106 } 1107 1108 /* 1109 * After the machine is finalized, we can't change the mask anymore. If by 1110 * chance the hotplugged device supports the same granule, we can still 1111 * accept it. Having a different masks is possible but the guest will use 1112 * sub-optimal block sizes, so warn about it. 1113 */ 1114 if (phase_check(PHASE_MACHINE_READY)) { 1115 int new_granule = ctz64(new_mask); 1116 int cur_granule = ctz64(cur_mask); 1117 1118 if (new_granule != cur_granule) { 1119 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 1120 " is incompatible with mask 0x%"PRIx64, cur_mask, 1121 new_mask); 1122 return -1; 1123 } else if (new_mask != cur_mask) { 1124 warn_report("virtio-iommu page mask 0x%"PRIx64 1125 " does not match 0x%"PRIx64, cur_mask, new_mask); 1126 } 1127 return 0; 1128 } 1129 1130 s->config.page_size_mask &= new_mask; 1131 return 0; 1132 } 1133 1134 static void virtio_iommu_system_reset(void *opaque) 1135 { 1136 VirtIOIOMMU *s = opaque; 1137 1138 trace_virtio_iommu_system_reset(); 1139 1140 /* 1141 * config.bypass is sticky across device reset, but should be restored on 1142 * system reset 1143 */ 1144 s->config.bypass = s->boot_bypass; 1145 virtio_iommu_switch_address_space_all(s); 1146 1147 } 1148 1149 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) 1150 { 1151 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1152 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1153 1154 virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); 1155 1156 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); 1157 1158 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, 1159 virtio_iommu_handle_command); 1160 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); 1161 1162 /* 1163 * config.bypass is needed to get initial address space early, such as 1164 * in vfio realize 1165 */ 1166 s->config.bypass = s->boot_bypass; 1167 s->config.page_size_mask = TARGET_PAGE_MASK; 1168 s->config.input_range.end = UINT64_MAX; 1169 s->config.domain_range.end = UINT32_MAX; 1170 s->config.probe_size = VIOMMU_PROBE_SIZE; 1171 1172 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); 1173 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); 1174 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); 1175 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); 1176 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); 1177 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); 1178 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); 1179 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); 1180 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); 1181 1182 qemu_rec_mutex_init(&s->mutex); 1183 1184 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); 1185 1186 if (s->primary_bus) { 1187 pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); 1188 } else { 1189 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); 1190 } 1191 1192 qemu_register_reset(virtio_iommu_system_reset, s); 1193 } 1194 1195 static void virtio_iommu_device_unrealize(DeviceState *dev) 1196 { 1197 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1198 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1199 1200 qemu_unregister_reset(virtio_iommu_system_reset, s); 1201 1202 g_hash_table_destroy(s->as_by_busptr); 1203 if (s->domains) { 1204 g_tree_destroy(s->domains); 1205 } 1206 if (s->endpoints) { 1207 g_tree_destroy(s->endpoints); 1208 } 1209 1210 qemu_rec_mutex_destroy(&s->mutex); 1211 1212 virtio_delete_queue(s->req_vq); 1213 virtio_delete_queue(s->event_vq); 1214 virtio_cleanup(vdev); 1215 } 1216 1217 static void virtio_iommu_device_reset(VirtIODevice *vdev) 1218 { 1219 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1220 1221 trace_virtio_iommu_device_reset(); 1222 1223 if (s->domains) { 1224 g_tree_destroy(s->domains); 1225 } 1226 if (s->endpoints) { 1227 g_tree_destroy(s->endpoints); 1228 } 1229 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, 1230 NULL, NULL, virtio_iommu_put_domain); 1231 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, 1232 NULL, NULL, virtio_iommu_put_endpoint); 1233 } 1234 1235 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) 1236 { 1237 trace_virtio_iommu_device_status(status); 1238 } 1239 1240 static void virtio_iommu_instance_init(Object *obj) 1241 { 1242 } 1243 1244 #define VMSTATE_INTERVAL \ 1245 { \ 1246 .name = "interval", \ 1247 .version_id = 1, \ 1248 .minimum_version_id = 1, \ 1249 .fields = (VMStateField[]) { \ 1250 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ 1251 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ 1252 VMSTATE_END_OF_LIST() \ 1253 } \ 1254 } 1255 1256 #define VMSTATE_MAPPING \ 1257 { \ 1258 .name = "mapping", \ 1259 .version_id = 1, \ 1260 .minimum_version_id = 1, \ 1261 .fields = (VMStateField[]) { \ 1262 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ 1263 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ 1264 VMSTATE_END_OF_LIST() \ 1265 }, \ 1266 } 1267 1268 static const VMStateDescription vmstate_interval_mapping[2] = { 1269 VMSTATE_MAPPING, /* value */ 1270 VMSTATE_INTERVAL /* key */ 1271 }; 1272 1273 static int domain_preload(void *opaque) 1274 { 1275 VirtIOIOMMUDomain *domain = opaque; 1276 1277 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 1278 NULL, g_free, g_free); 1279 return 0; 1280 } 1281 1282 static const VMStateDescription vmstate_endpoint = { 1283 .name = "endpoint", 1284 .version_id = 1, 1285 .minimum_version_id = 1, 1286 .fields = (VMStateField[]) { 1287 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), 1288 VMSTATE_END_OF_LIST() 1289 } 1290 }; 1291 1292 static const VMStateDescription vmstate_domain = { 1293 .name = "domain", 1294 .version_id = 2, 1295 .minimum_version_id = 2, 1296 .pre_load = domain_preload, 1297 .fields = (VMStateField[]) { 1298 VMSTATE_UINT32(id, VirtIOIOMMUDomain), 1299 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, 1300 vmstate_interval_mapping, 1301 VirtIOIOMMUInterval, VirtIOIOMMUMapping), 1302 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, 1303 vmstate_endpoint, VirtIOIOMMUEndpoint, next), 1304 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2), 1305 VMSTATE_END_OF_LIST() 1306 } 1307 }; 1308 1309 static gboolean reconstruct_endpoints(gpointer key, gpointer value, 1310 gpointer data) 1311 { 1312 VirtIOIOMMU *s = (VirtIOIOMMU *)data; 1313 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; 1314 VirtIOIOMMUEndpoint *iter; 1315 IOMMUMemoryRegion *mr; 1316 1317 QLIST_FOREACH(iter, &d->endpoint_list, next) { 1318 mr = virtio_iommu_mr(s, iter->id); 1319 assert(mr); 1320 1321 iter->domain = d; 1322 iter->iommu_mr = mr; 1323 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); 1324 } 1325 return false; /* continue the domain traversal */ 1326 } 1327 1328 static int iommu_post_load(void *opaque, int version_id) 1329 { 1330 VirtIOIOMMU *s = opaque; 1331 1332 g_tree_foreach(s->domains, reconstruct_endpoints, s); 1333 1334 /* 1335 * Memory regions are dynamically turned on/off depending on 1336 * 'config.bypass' and attached domain type if there is. After 1337 * migration, we need to make sure the memory regions are 1338 * still correct. 1339 */ 1340 virtio_iommu_switch_address_space_all(s); 1341 return 0; 1342 } 1343 1344 static const VMStateDescription vmstate_virtio_iommu_device = { 1345 .name = "virtio-iommu-device", 1346 .minimum_version_id = 2, 1347 .version_id = 2, 1348 .post_load = iommu_post_load, 1349 .fields = (VMStateField[]) { 1350 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2, 1351 &vmstate_domain, VirtIOIOMMUDomain), 1352 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2), 1353 VMSTATE_END_OF_LIST() 1354 }, 1355 }; 1356 1357 static const VMStateDescription vmstate_virtio_iommu = { 1358 .name = "virtio-iommu", 1359 .minimum_version_id = 2, 1360 .priority = MIG_PRI_IOMMU, 1361 .version_id = 2, 1362 .fields = (VMStateField[]) { 1363 VMSTATE_VIRTIO_DEVICE, 1364 VMSTATE_END_OF_LIST() 1365 }, 1366 }; 1367 1368 static Property virtio_iommu_properties[] = { 1369 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, 1370 TYPE_PCI_BUS, PCIBus *), 1371 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true), 1372 DEFINE_PROP_END_OF_LIST(), 1373 }; 1374 1375 static void virtio_iommu_class_init(ObjectClass *klass, void *data) 1376 { 1377 DeviceClass *dc = DEVICE_CLASS(klass); 1378 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1379 1380 device_class_set_props(dc, virtio_iommu_properties); 1381 dc->vmsd = &vmstate_virtio_iommu; 1382 1383 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1384 vdc->realize = virtio_iommu_device_realize; 1385 vdc->unrealize = virtio_iommu_device_unrealize; 1386 vdc->reset = virtio_iommu_device_reset; 1387 vdc->get_config = virtio_iommu_get_config; 1388 vdc->set_config = virtio_iommu_set_config; 1389 vdc->get_features = virtio_iommu_get_features; 1390 vdc->set_status = virtio_iommu_set_status; 1391 vdc->vmsd = &vmstate_virtio_iommu_device; 1392 } 1393 1394 static void virtio_iommu_memory_region_class_init(ObjectClass *klass, 1395 void *data) 1396 { 1397 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1398 1399 imrc->translate = virtio_iommu_translate; 1400 imrc->replay = virtio_iommu_replay; 1401 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; 1402 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; 1403 } 1404 1405 static const TypeInfo virtio_iommu_info = { 1406 .name = TYPE_VIRTIO_IOMMU, 1407 .parent = TYPE_VIRTIO_DEVICE, 1408 .instance_size = sizeof(VirtIOIOMMU), 1409 .instance_init = virtio_iommu_instance_init, 1410 .class_init = virtio_iommu_class_init, 1411 }; 1412 1413 static const TypeInfo virtio_iommu_memory_region_info = { 1414 .parent = TYPE_IOMMU_MEMORY_REGION, 1415 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, 1416 .class_init = virtio_iommu_memory_region_class_init, 1417 }; 1418 1419 static void virtio_register_types(void) 1420 { 1421 type_register_static(&virtio_iommu_info); 1422 type_register_static(&virtio_iommu_memory_region_info); 1423 } 1424 1425 type_init(virtio_register_types) 1426