1 /* 2 * virtio-iommu device 3 * 4 * Copyright (c) 2020 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "qemu/iov.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/virtio/virtio.h" 25 #include "sysemu/kvm.h" 26 #include "sysemu/reset.h" 27 #include "qapi/error.h" 28 #include "qemu/error-report.h" 29 #include "trace.h" 30 31 #include "standard-headers/linux/virtio_ids.h" 32 33 #include "hw/virtio/virtio-bus.h" 34 #include "hw/virtio/virtio-access.h" 35 #include "hw/virtio/virtio-iommu.h" 36 #include "hw/pci/pci_bus.h" 37 #include "hw/pci/pci.h" 38 39 /* Max size */ 40 #define VIOMMU_DEFAULT_QUEUE_SIZE 256 41 #define VIOMMU_PROBE_SIZE 512 42 43 typedef struct VirtIOIOMMUDomain { 44 uint32_t id; 45 bool bypass; 46 GTree *mappings; 47 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; 48 } VirtIOIOMMUDomain; 49 50 typedef struct VirtIOIOMMUEndpoint { 51 uint32_t id; 52 VirtIOIOMMUDomain *domain; 53 IOMMUMemoryRegion *iommu_mr; 54 QLIST_ENTRY(VirtIOIOMMUEndpoint) next; 55 } VirtIOIOMMUEndpoint; 56 57 typedef struct VirtIOIOMMUInterval { 58 uint64_t low; 59 uint64_t high; 60 } VirtIOIOMMUInterval; 61 62 typedef struct VirtIOIOMMUMapping { 63 uint64_t phys_addr; 64 uint32_t flags; 65 } VirtIOIOMMUMapping; 66 67 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) 68 { 69 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); 70 } 71 72 static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) 73 { 74 uint32_t sid; 75 bool bypassed; 76 VirtIOIOMMU *s = sdev->viommu; 77 VirtIOIOMMUEndpoint *ep; 78 79 sid = virtio_iommu_get_bdf(sdev); 80 81 qemu_rec_mutex_lock(&s->mutex); 82 /* need to check bypass before system reset */ 83 if (!s->endpoints) { 84 bypassed = s->config.bypass; 85 goto unlock; 86 } 87 88 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 89 if (!ep || !ep->domain) { 90 bypassed = s->config.bypass; 91 } else { 92 bypassed = ep->domain->bypass; 93 } 94 95 unlock: 96 qemu_rec_mutex_unlock(&s->mutex); 97 return bypassed; 98 } 99 100 /* Return whether the device is using IOMMU translation. */ 101 static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) 102 { 103 bool use_remapping; 104 105 assert(sdev); 106 107 use_remapping = !virtio_iommu_device_bypassed(sdev); 108 109 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), 110 PCI_SLOT(sdev->devfn), 111 PCI_FUNC(sdev->devfn), 112 use_remapping); 113 114 /* Turn off first then on the other */ 115 if (use_remapping) { 116 memory_region_set_enabled(&sdev->bypass_mr, false); 117 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); 118 } else { 119 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); 120 memory_region_set_enabled(&sdev->bypass_mr, true); 121 } 122 123 return use_remapping; 124 } 125 126 static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) 127 { 128 GHashTableIter iter; 129 IOMMUPciBus *iommu_pci_bus; 130 int i; 131 132 g_hash_table_iter_init(&iter, s->as_by_busptr); 133 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 134 for (i = 0; i < PCI_DEVFN_MAX; i++) { 135 if (!iommu_pci_bus->pbdev[i]) { 136 continue; 137 } 138 virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); 139 } 140 } 141 } 142 143 /** 144 * The bus number is used for lookup when SID based operations occur. 145 * In that case we lazily populate the IOMMUPciBus array from the bus hash 146 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus 147 * numbers may not be always initialized yet. 148 */ 149 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) 150 { 151 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; 152 153 if (!iommu_pci_bus) { 154 GHashTableIter iter; 155 156 g_hash_table_iter_init(&iter, s->as_by_busptr); 157 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 158 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { 159 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; 160 return iommu_pci_bus; 161 } 162 } 163 return NULL; 164 } 165 return iommu_pci_bus; 166 } 167 168 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) 169 { 170 uint8_t bus_n, devfn; 171 IOMMUPciBus *iommu_pci_bus; 172 IOMMUDevice *dev; 173 174 bus_n = PCI_BUS_NUM(sid); 175 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); 176 if (iommu_pci_bus) { 177 devfn = sid & (PCI_DEVFN_MAX - 1); 178 dev = iommu_pci_bus->pbdev[devfn]; 179 if (dev) { 180 return &dev->iommu_mr; 181 } 182 } 183 return NULL; 184 } 185 186 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 187 { 188 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; 189 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; 190 191 if (inta->high < intb->low) { 192 return -1; 193 } else if (intb->high < inta->low) { 194 return 1; 195 } else { 196 return 0; 197 } 198 } 199 200 static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr, 201 IOMMUTLBEvent *event, 202 hwaddr virt_start, hwaddr virt_end) 203 { 204 uint64_t delta = virt_end - virt_start; 205 206 event->entry.iova = virt_start; 207 event->entry.addr_mask = delta; 208 209 if (delta == UINT64_MAX) { 210 memory_region_notify_iommu(mr, 0, *event); 211 } 212 213 while (virt_start != virt_end + 1) { 214 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); 215 216 event->entry.addr_mask = mask; 217 event->entry.iova = virt_start; 218 memory_region_notify_iommu(mr, 0, *event); 219 virt_start += mask + 1; 220 if (event->entry.perm != IOMMU_NONE) { 221 event->entry.translated_addr += mask + 1; 222 } 223 } 224 } 225 226 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, 227 hwaddr virt_end, hwaddr paddr, 228 uint32_t flags) 229 { 230 IOMMUTLBEvent event; 231 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, 232 flags & VIRTIO_IOMMU_MAP_F_WRITE); 233 234 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) || 235 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) { 236 return; 237 } 238 239 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, 240 paddr, perm); 241 242 event.type = IOMMU_NOTIFIER_MAP; 243 event.entry.target_as = &address_space_memory; 244 event.entry.perm = perm; 245 event.entry.translated_addr = paddr; 246 247 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 248 } 249 250 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, 251 hwaddr virt_end) 252 { 253 IOMMUTLBEvent event; 254 255 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { 256 return; 257 } 258 259 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); 260 261 event.type = IOMMU_NOTIFIER_UNMAP; 262 event.entry.target_as = &address_space_memory; 263 event.entry.perm = IOMMU_NONE; 264 event.entry.translated_addr = 0; 265 266 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 267 } 268 269 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, 270 gpointer data) 271 { 272 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 273 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 274 275 virtio_iommu_notify_unmap(mr, interval->low, interval->high); 276 277 return false; 278 } 279 280 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, 281 gpointer data) 282 { 283 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 284 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 285 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 286 287 virtio_iommu_notify_map(mr, interval->low, interval->high, 288 mapping->phys_addr, mapping->flags); 289 290 return false; 291 } 292 293 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) 294 { 295 VirtIOIOMMUDomain *domain = ep->domain; 296 IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 297 298 if (!ep->domain) { 299 return; 300 } 301 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, 302 ep->iommu_mr); 303 QLIST_REMOVE(ep, next); 304 ep->domain = NULL; 305 virtio_iommu_switch_address_space(sdev); 306 } 307 308 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, 309 uint32_t ep_id) 310 { 311 VirtIOIOMMUEndpoint *ep; 312 IOMMUMemoryRegion *mr; 313 314 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 315 if (ep) { 316 return ep; 317 } 318 mr = virtio_iommu_mr(s, ep_id); 319 if (!mr) { 320 return NULL; 321 } 322 ep = g_malloc0(sizeof(*ep)); 323 ep->id = ep_id; 324 ep->iommu_mr = mr; 325 trace_virtio_iommu_get_endpoint(ep_id); 326 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); 327 return ep; 328 } 329 330 static void virtio_iommu_put_endpoint(gpointer data) 331 { 332 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; 333 334 if (ep->domain) { 335 virtio_iommu_detach_endpoint_from_domain(ep); 336 } 337 338 trace_virtio_iommu_put_endpoint(ep->id); 339 g_free(ep); 340 } 341 342 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, 343 uint32_t domain_id, 344 bool bypass) 345 { 346 VirtIOIOMMUDomain *domain; 347 348 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 349 if (domain) { 350 if (domain->bypass != bypass) { 351 return NULL; 352 } 353 return domain; 354 } 355 domain = g_malloc0(sizeof(*domain)); 356 domain->id = domain_id; 357 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 358 NULL, (GDestroyNotify)g_free, 359 (GDestroyNotify)g_free); 360 domain->bypass = bypass; 361 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); 362 QLIST_INIT(&domain->endpoint_list); 363 trace_virtio_iommu_get_domain(domain_id); 364 return domain; 365 } 366 367 static void virtio_iommu_put_domain(gpointer data) 368 { 369 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; 370 VirtIOIOMMUEndpoint *iter, *tmp; 371 372 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { 373 virtio_iommu_detach_endpoint_from_domain(iter); 374 } 375 g_tree_destroy(domain->mappings); 376 trace_virtio_iommu_put_domain(domain->id); 377 g_free(domain); 378 } 379 380 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, 381 int devfn) 382 { 383 VirtIOIOMMU *s = opaque; 384 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 385 static uint32_t mr_index; 386 IOMMUDevice *sdev; 387 388 if (!sbus) { 389 sbus = g_malloc0(sizeof(IOMMUPciBus) + 390 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); 391 sbus->bus = bus; 392 g_hash_table_insert(s->as_by_busptr, bus, sbus); 393 } 394 395 sdev = sbus->pbdev[devfn]; 396 if (!sdev) { 397 char *name = g_strdup_printf("%s-%d-%d", 398 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 399 mr_index++, devfn); 400 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1); 401 402 sdev->viommu = s; 403 sdev->bus = bus; 404 sdev->devfn = devfn; 405 406 trace_virtio_iommu_init_iommu_mr(name); 407 408 memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); 409 address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); 410 411 /* 412 * Build the IOMMU disabled container with aliases to the 413 * shared MRs. Note that aliasing to a shared memory region 414 * could help the memory API to detect same FlatViews so we 415 * can have devices to share the same FlatView when in bypass 416 * mode. (either by not configuring virtio-iommu driver or with 417 * "iommu=pt"). It will greatly reduce the total number of 418 * FlatViews of the system hence VM runs faster. 419 */ 420 memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), 421 "system", get_system_memory(), 0, 422 memory_region_size(get_system_memory())); 423 424 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), 425 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 426 OBJECT(s), name, 427 UINT64_MAX); 428 429 /* 430 * Hook both the containers under the root container, we 431 * switch between iommu & bypass MRs by enable/disable 432 * corresponding sub-containers 433 */ 434 memory_region_add_subregion_overlap(&sdev->root, 0, 435 MEMORY_REGION(&sdev->iommu_mr), 436 0); 437 memory_region_add_subregion_overlap(&sdev->root, 0, 438 &sdev->bypass_mr, 0); 439 440 virtio_iommu_switch_address_space(sdev); 441 g_free(name); 442 } 443 return &sdev->as; 444 } 445 446 static int virtio_iommu_attach(VirtIOIOMMU *s, 447 struct virtio_iommu_req_attach *req) 448 { 449 uint32_t domain_id = le32_to_cpu(req->domain); 450 uint32_t ep_id = le32_to_cpu(req->endpoint); 451 uint32_t flags = le32_to_cpu(req->flags); 452 VirtIOIOMMUDomain *domain; 453 VirtIOIOMMUEndpoint *ep; 454 IOMMUDevice *sdev; 455 456 trace_virtio_iommu_attach(domain_id, ep_id); 457 458 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) { 459 return VIRTIO_IOMMU_S_INVAL; 460 } 461 462 ep = virtio_iommu_get_endpoint(s, ep_id); 463 if (!ep) { 464 return VIRTIO_IOMMU_S_NOENT; 465 } 466 467 if (ep->domain) { 468 VirtIOIOMMUDomain *previous_domain = ep->domain; 469 /* 470 * the device is already attached to a domain, 471 * detach it first 472 */ 473 virtio_iommu_detach_endpoint_from_domain(ep); 474 if (QLIST_EMPTY(&previous_domain->endpoint_list)) { 475 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); 476 } 477 } 478 479 domain = virtio_iommu_get_domain(s, domain_id, 480 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS); 481 if (!domain) { 482 /* Incompatible bypass flag */ 483 return VIRTIO_IOMMU_S_INVAL; 484 } 485 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); 486 487 ep->domain = domain; 488 sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 489 virtio_iommu_switch_address_space(sdev); 490 491 /* Replay domain mappings on the associated memory region */ 492 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, 493 ep->iommu_mr); 494 495 return VIRTIO_IOMMU_S_OK; 496 } 497 498 static int virtio_iommu_detach(VirtIOIOMMU *s, 499 struct virtio_iommu_req_detach *req) 500 { 501 uint32_t domain_id = le32_to_cpu(req->domain); 502 uint32_t ep_id = le32_to_cpu(req->endpoint); 503 VirtIOIOMMUDomain *domain; 504 VirtIOIOMMUEndpoint *ep; 505 506 trace_virtio_iommu_detach(domain_id, ep_id); 507 508 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 509 if (!ep) { 510 return VIRTIO_IOMMU_S_NOENT; 511 } 512 513 domain = ep->domain; 514 515 if (!domain || domain->id != domain_id) { 516 return VIRTIO_IOMMU_S_INVAL; 517 } 518 519 virtio_iommu_detach_endpoint_from_domain(ep); 520 521 if (QLIST_EMPTY(&domain->endpoint_list)) { 522 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); 523 } 524 return VIRTIO_IOMMU_S_OK; 525 } 526 527 static int virtio_iommu_map(VirtIOIOMMU *s, 528 struct virtio_iommu_req_map *req) 529 { 530 uint32_t domain_id = le32_to_cpu(req->domain); 531 uint64_t phys_start = le64_to_cpu(req->phys_start); 532 uint64_t virt_start = le64_to_cpu(req->virt_start); 533 uint64_t virt_end = le64_to_cpu(req->virt_end); 534 uint32_t flags = le32_to_cpu(req->flags); 535 VirtIOIOMMUDomain *domain; 536 VirtIOIOMMUInterval *interval; 537 VirtIOIOMMUMapping *mapping; 538 VirtIOIOMMUEndpoint *ep; 539 540 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { 541 return VIRTIO_IOMMU_S_INVAL; 542 } 543 544 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 545 if (!domain) { 546 return VIRTIO_IOMMU_S_NOENT; 547 } 548 549 if (domain->bypass) { 550 return VIRTIO_IOMMU_S_INVAL; 551 } 552 553 interval = g_malloc0(sizeof(*interval)); 554 555 interval->low = virt_start; 556 interval->high = virt_end; 557 558 mapping = g_tree_lookup(domain->mappings, (gpointer)interval); 559 if (mapping) { 560 g_free(interval); 561 return VIRTIO_IOMMU_S_INVAL; 562 } 563 564 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); 565 566 mapping = g_malloc0(sizeof(*mapping)); 567 mapping->phys_addr = phys_start; 568 mapping->flags = flags; 569 570 g_tree_insert(domain->mappings, interval, mapping); 571 572 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 573 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start, 574 flags); 575 } 576 577 return VIRTIO_IOMMU_S_OK; 578 } 579 580 static int virtio_iommu_unmap(VirtIOIOMMU *s, 581 struct virtio_iommu_req_unmap *req) 582 { 583 uint32_t domain_id = le32_to_cpu(req->domain); 584 uint64_t virt_start = le64_to_cpu(req->virt_start); 585 uint64_t virt_end = le64_to_cpu(req->virt_end); 586 VirtIOIOMMUMapping *iter_val; 587 VirtIOIOMMUInterval interval, *iter_key; 588 VirtIOIOMMUDomain *domain; 589 VirtIOIOMMUEndpoint *ep; 590 int ret = VIRTIO_IOMMU_S_OK; 591 592 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); 593 594 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 595 if (!domain) { 596 return VIRTIO_IOMMU_S_NOENT; 597 } 598 599 if (domain->bypass) { 600 return VIRTIO_IOMMU_S_INVAL; 601 } 602 603 interval.low = virt_start; 604 interval.high = virt_end; 605 606 while (g_tree_lookup_extended(domain->mappings, &interval, 607 (void **)&iter_key, (void**)&iter_val)) { 608 uint64_t current_low = iter_key->low; 609 uint64_t current_high = iter_key->high; 610 611 if (interval.low <= current_low && interval.high >= current_high) { 612 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 613 virtio_iommu_notify_unmap(ep->iommu_mr, current_low, 614 current_high); 615 } 616 g_tree_remove(domain->mappings, iter_key); 617 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); 618 } else { 619 ret = VIRTIO_IOMMU_S_RANGE; 620 break; 621 } 622 } 623 return ret; 624 } 625 626 static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep, 627 uint8_t *buf, size_t free) 628 { 629 struct virtio_iommu_probe_resv_mem prop = {}; 630 size_t size = sizeof(prop), length = size - sizeof(prop.head), total; 631 int i; 632 633 total = size * s->nb_reserved_regions; 634 635 if (total > free) { 636 return -ENOSPC; 637 } 638 639 for (i = 0; i < s->nb_reserved_regions; i++) { 640 unsigned subtype = s->reserved_regions[i].type; 641 642 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || 643 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); 644 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); 645 prop.head.length = cpu_to_le16(length); 646 prop.subtype = subtype; 647 prop.start = cpu_to_le64(s->reserved_regions[i].low); 648 prop.end = cpu_to_le64(s->reserved_regions[i].high); 649 650 memcpy(buf, &prop, size); 651 652 trace_virtio_iommu_fill_resv_property(ep, prop.subtype, 653 prop.start, prop.end); 654 buf += size; 655 } 656 return total; 657 } 658 659 /** 660 * virtio_iommu_probe - Fill the probe request buffer with 661 * the properties the device is able to return 662 */ 663 static int virtio_iommu_probe(VirtIOIOMMU *s, 664 struct virtio_iommu_req_probe *req, 665 uint8_t *buf) 666 { 667 uint32_t ep_id = le32_to_cpu(req->endpoint); 668 size_t free = VIOMMU_PROBE_SIZE; 669 ssize_t count; 670 671 if (!virtio_iommu_mr(s, ep_id)) { 672 return VIRTIO_IOMMU_S_NOENT; 673 } 674 675 count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free); 676 if (count < 0) { 677 return VIRTIO_IOMMU_S_INVAL; 678 } 679 buf += count; 680 free -= count; 681 682 return VIRTIO_IOMMU_S_OK; 683 } 684 685 static int virtio_iommu_iov_to_req(struct iovec *iov, 686 unsigned int iov_cnt, 687 void *req, size_t payload_sz) 688 { 689 size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); 690 691 if (unlikely(sz != payload_sz)) { 692 return VIRTIO_IOMMU_S_INVAL; 693 } 694 return 0; 695 } 696 697 #define virtio_iommu_handle_req(__req) \ 698 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ 699 struct iovec *iov, \ 700 unsigned int iov_cnt) \ 701 { \ 702 struct virtio_iommu_req_ ## __req req; \ 703 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ 704 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ 705 \ 706 return ret ? ret : virtio_iommu_ ## __req(s, &req); \ 707 } 708 709 virtio_iommu_handle_req(attach) 710 virtio_iommu_handle_req(detach) 711 virtio_iommu_handle_req(map) 712 virtio_iommu_handle_req(unmap) 713 714 static int virtio_iommu_handle_probe(VirtIOIOMMU *s, 715 struct iovec *iov, 716 unsigned int iov_cnt, 717 uint8_t *buf) 718 { 719 struct virtio_iommu_req_probe req; 720 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); 721 722 return ret ? ret : virtio_iommu_probe(s, &req, buf); 723 } 724 725 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) 726 { 727 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 728 struct virtio_iommu_req_head head; 729 struct virtio_iommu_req_tail tail = {}; 730 size_t output_size = sizeof(tail), sz; 731 VirtQueueElement *elem; 732 unsigned int iov_cnt; 733 struct iovec *iov; 734 void *buf = NULL; 735 736 for (;;) { 737 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 738 if (!elem) { 739 return; 740 } 741 742 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || 743 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { 744 virtio_error(vdev, "virtio-iommu bad head/tail size"); 745 virtqueue_detach_element(vq, elem, 0); 746 g_free(elem); 747 break; 748 } 749 750 iov_cnt = elem->out_num; 751 iov = elem->out_sg; 752 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); 753 if (unlikely(sz != sizeof(head))) { 754 tail.status = VIRTIO_IOMMU_S_DEVERR; 755 goto out; 756 } 757 qemu_rec_mutex_lock(&s->mutex); 758 switch (head.type) { 759 case VIRTIO_IOMMU_T_ATTACH: 760 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); 761 break; 762 case VIRTIO_IOMMU_T_DETACH: 763 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); 764 break; 765 case VIRTIO_IOMMU_T_MAP: 766 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); 767 break; 768 case VIRTIO_IOMMU_T_UNMAP: 769 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); 770 break; 771 case VIRTIO_IOMMU_T_PROBE: 772 { 773 struct virtio_iommu_req_tail *ptail; 774 775 output_size = s->config.probe_size + sizeof(tail); 776 buf = g_malloc0(output_size); 777 778 ptail = (struct virtio_iommu_req_tail *) 779 (buf + s->config.probe_size); 780 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf); 781 break; 782 } 783 default: 784 tail.status = VIRTIO_IOMMU_S_UNSUPP; 785 } 786 qemu_rec_mutex_unlock(&s->mutex); 787 788 out: 789 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 790 buf ? buf : &tail, output_size); 791 assert(sz == output_size); 792 793 virtqueue_push(vq, elem, sz); 794 virtio_notify(vdev, vq); 795 g_free(elem); 796 g_free(buf); 797 buf = NULL; 798 } 799 } 800 801 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, 802 int flags, uint32_t endpoint, 803 uint64_t address) 804 { 805 VirtIODevice *vdev = &viommu->parent_obj; 806 VirtQueue *vq = viommu->event_vq; 807 struct virtio_iommu_fault fault; 808 VirtQueueElement *elem; 809 size_t sz; 810 811 memset(&fault, 0, sizeof(fault)); 812 fault.reason = reason; 813 fault.flags = cpu_to_le32(flags); 814 fault.endpoint = cpu_to_le32(endpoint); 815 fault.address = cpu_to_le64(address); 816 817 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 818 819 if (!elem) { 820 error_report_once( 821 "no buffer available in event queue to report event"); 822 return; 823 } 824 825 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { 826 virtio_error(vdev, "error buffer of wrong size"); 827 virtqueue_detach_element(vq, elem, 0); 828 g_free(elem); 829 return; 830 } 831 832 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 833 &fault, sizeof(fault)); 834 assert(sz == sizeof(fault)); 835 836 trace_virtio_iommu_report_fault(reason, flags, endpoint, address); 837 virtqueue_push(vq, elem, sz); 838 virtio_notify(vdev, vq); 839 g_free(elem); 840 841 } 842 843 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, 844 IOMMUAccessFlags flag, 845 int iommu_idx) 846 { 847 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 848 VirtIOIOMMUInterval interval, *mapping_key; 849 VirtIOIOMMUMapping *mapping_value; 850 VirtIOIOMMU *s = sdev->viommu; 851 bool read_fault, write_fault; 852 VirtIOIOMMUEndpoint *ep; 853 uint32_t sid, flags; 854 bool bypass_allowed; 855 bool found; 856 int i; 857 858 interval.low = addr; 859 interval.high = addr + 1; 860 861 IOMMUTLBEntry entry = { 862 .target_as = &address_space_memory, 863 .iova = addr, 864 .translated_addr = addr, 865 .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, 866 .perm = IOMMU_NONE, 867 }; 868 869 bypass_allowed = s->config.bypass; 870 871 sid = virtio_iommu_get_bdf(sdev); 872 873 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); 874 qemu_rec_mutex_lock(&s->mutex); 875 876 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 877 878 if (bypass_allowed) 879 assert(ep && ep->domain && !ep->domain->bypass); 880 881 if (!ep) { 882 if (!bypass_allowed) { 883 error_report_once("%s sid=%d is not known!!", __func__, sid); 884 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, 885 VIRTIO_IOMMU_FAULT_F_ADDRESS, 886 sid, addr); 887 } else { 888 entry.perm = flag; 889 } 890 goto unlock; 891 } 892 893 for (i = 0; i < s->nb_reserved_regions; i++) { 894 ReservedRegion *reg = &s->reserved_regions[i]; 895 896 if (addr >= reg->low && addr <= reg->high) { 897 switch (reg->type) { 898 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 899 entry.perm = flag; 900 break; 901 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 902 default: 903 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 904 VIRTIO_IOMMU_FAULT_F_ADDRESS, 905 sid, addr); 906 break; 907 } 908 goto unlock; 909 } 910 } 911 912 if (!ep->domain) { 913 if (!bypass_allowed) { 914 error_report_once("%s %02x:%02x.%01x not attached to any domain", 915 __func__, PCI_BUS_NUM(sid), 916 PCI_SLOT(sid), PCI_FUNC(sid)); 917 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, 918 VIRTIO_IOMMU_FAULT_F_ADDRESS, 919 sid, addr); 920 } else { 921 entry.perm = flag; 922 } 923 goto unlock; 924 } else if (ep->domain->bypass) { 925 entry.perm = flag; 926 goto unlock; 927 } 928 929 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), 930 (void **)&mapping_key, 931 (void **)&mapping_value); 932 if (!found) { 933 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", 934 __func__, addr, sid); 935 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 936 VIRTIO_IOMMU_FAULT_F_ADDRESS, 937 sid, addr); 938 goto unlock; 939 } 940 941 read_fault = (flag & IOMMU_RO) && 942 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); 943 write_fault = (flag & IOMMU_WO) && 944 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); 945 946 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; 947 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; 948 if (flags) { 949 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", 950 __func__, addr, flag, mapping_value->flags); 951 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; 952 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 953 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, 954 sid, addr); 955 goto unlock; 956 } 957 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; 958 entry.perm = flag; 959 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); 960 961 unlock: 962 qemu_rec_mutex_unlock(&s->mutex); 963 return entry; 964 } 965 966 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) 967 { 968 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 969 struct virtio_iommu_config *dev_config = &dev->config; 970 struct virtio_iommu_config *out_config = (void *)config_data; 971 972 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask); 973 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start); 974 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end); 975 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start); 976 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end); 977 out_config->probe_size = cpu_to_le32(dev_config->probe_size); 978 out_config->bypass = dev_config->bypass; 979 980 trace_virtio_iommu_get_config(dev_config->page_size_mask, 981 dev_config->input_range.start, 982 dev_config->input_range.end, 983 dev_config->domain_range.start, 984 dev_config->domain_range.end, 985 dev_config->probe_size, 986 dev_config->bypass); 987 } 988 989 static void virtio_iommu_set_config(VirtIODevice *vdev, 990 const uint8_t *config_data) 991 { 992 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 993 struct virtio_iommu_config *dev_config = &dev->config; 994 const struct virtio_iommu_config *in_config = (void *)config_data; 995 996 if (in_config->bypass != dev_config->bypass) { 997 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 998 virtio_error(vdev, "cannot set config.bypass"); 999 return; 1000 } else if (in_config->bypass != 0 && in_config->bypass != 1) { 1001 virtio_error(vdev, "invalid config.bypass value '%u'", 1002 in_config->bypass); 1003 return; 1004 } 1005 dev_config->bypass = in_config->bypass; 1006 virtio_iommu_switch_address_space_all(dev); 1007 } 1008 1009 trace_virtio_iommu_set_config(in_config->bypass); 1010 } 1011 1012 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, 1013 Error **errp) 1014 { 1015 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1016 1017 f |= dev->features; 1018 trace_virtio_iommu_get_features(f); 1019 return f; 1020 } 1021 1022 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 1023 { 1024 guint ua = GPOINTER_TO_UINT(a); 1025 guint ub = GPOINTER_TO_UINT(b); 1026 return (ua > ub) - (ua < ub); 1027 } 1028 1029 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data) 1030 { 1031 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 1032 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 1033 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 1034 1035 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high, 1036 mapping->phys_addr); 1037 virtio_iommu_notify_map(mr, interval->low, interval->high, 1038 mapping->phys_addr, mapping->flags); 1039 return false; 1040 } 1041 1042 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) 1043 { 1044 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1045 VirtIOIOMMU *s = sdev->viommu; 1046 uint32_t sid; 1047 VirtIOIOMMUEndpoint *ep; 1048 1049 sid = virtio_iommu_get_bdf(sdev); 1050 1051 qemu_rec_mutex_lock(&s->mutex); 1052 1053 if (!s->endpoints) { 1054 goto unlock; 1055 } 1056 1057 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 1058 if (!ep || !ep->domain) { 1059 goto unlock; 1060 } 1061 1062 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); 1063 1064 unlock: 1065 qemu_rec_mutex_unlock(&s->mutex); 1066 } 1067 1068 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, 1069 IOMMUNotifierFlag old, 1070 IOMMUNotifierFlag new, 1071 Error **errp) 1072 { 1073 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { 1074 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); 1075 return -EINVAL; 1076 } 1077 1078 if (old == IOMMU_NOTIFIER_NONE) { 1079 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); 1080 } else if (new == IOMMU_NOTIFIER_NONE) { 1081 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name); 1082 } 1083 return 0; 1084 } 1085 1086 /* 1087 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule, 1088 * for example 0xfffffffffffff000. When an assigned device has page size 1089 * restrictions due to the hardware IOMMU configuration, apply this restriction 1090 * to the mask. 1091 */ 1092 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, 1093 uint64_t new_mask, 1094 Error **errp) 1095 { 1096 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1097 VirtIOIOMMU *s = sdev->viommu; 1098 uint64_t cur_mask = s->config.page_size_mask; 1099 1100 trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, 1101 new_mask); 1102 1103 if ((cur_mask & new_mask) == 0) { 1104 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 1105 " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); 1106 return -1; 1107 } 1108 1109 /* 1110 * After the machine is finalized, we can't change the mask anymore. If by 1111 * chance the hotplugged device supports the same granule, we can still 1112 * accept it. Having a different masks is possible but the guest will use 1113 * sub-optimal block sizes, so warn about it. 1114 */ 1115 if (phase_check(PHASE_MACHINE_READY)) { 1116 int new_granule = ctz64(new_mask); 1117 int cur_granule = ctz64(cur_mask); 1118 1119 if (new_granule != cur_granule) { 1120 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 1121 " is incompatible with mask 0x%"PRIx64, cur_mask, 1122 new_mask); 1123 return -1; 1124 } else if (new_mask != cur_mask) { 1125 warn_report("virtio-iommu page mask 0x%"PRIx64 1126 " does not match 0x%"PRIx64, cur_mask, new_mask); 1127 } 1128 return 0; 1129 } 1130 1131 s->config.page_size_mask &= new_mask; 1132 return 0; 1133 } 1134 1135 static void virtio_iommu_system_reset(void *opaque) 1136 { 1137 VirtIOIOMMU *s = opaque; 1138 1139 trace_virtio_iommu_system_reset(); 1140 1141 /* 1142 * config.bypass is sticky across device reset, but should be restored on 1143 * system reset 1144 */ 1145 s->config.bypass = s->boot_bypass; 1146 virtio_iommu_switch_address_space_all(s); 1147 1148 } 1149 1150 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) 1151 { 1152 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1153 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1154 1155 virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); 1156 1157 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); 1158 1159 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, 1160 virtio_iommu_handle_command); 1161 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); 1162 1163 /* 1164 * config.bypass is needed to get initial address space early, such as 1165 * in vfio realize 1166 */ 1167 s->config.bypass = s->boot_bypass; 1168 s->config.page_size_mask = TARGET_PAGE_MASK; 1169 s->config.input_range.end = UINT64_MAX; 1170 s->config.domain_range.end = UINT32_MAX; 1171 s->config.probe_size = VIOMMU_PROBE_SIZE; 1172 1173 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); 1174 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); 1175 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); 1176 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); 1177 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); 1178 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); 1179 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); 1180 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); 1181 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); 1182 1183 qemu_rec_mutex_init(&s->mutex); 1184 1185 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); 1186 1187 if (s->primary_bus) { 1188 pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); 1189 } else { 1190 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); 1191 } 1192 1193 qemu_register_reset(virtio_iommu_system_reset, s); 1194 } 1195 1196 static void virtio_iommu_device_unrealize(DeviceState *dev) 1197 { 1198 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1199 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1200 1201 qemu_unregister_reset(virtio_iommu_system_reset, s); 1202 1203 g_hash_table_destroy(s->as_by_busptr); 1204 if (s->domains) { 1205 g_tree_destroy(s->domains); 1206 } 1207 if (s->endpoints) { 1208 g_tree_destroy(s->endpoints); 1209 } 1210 1211 qemu_rec_mutex_destroy(&s->mutex); 1212 1213 virtio_delete_queue(s->req_vq); 1214 virtio_delete_queue(s->event_vq); 1215 virtio_cleanup(vdev); 1216 } 1217 1218 static void virtio_iommu_device_reset(VirtIODevice *vdev) 1219 { 1220 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1221 1222 trace_virtio_iommu_device_reset(); 1223 1224 if (s->domains) { 1225 g_tree_destroy(s->domains); 1226 } 1227 if (s->endpoints) { 1228 g_tree_destroy(s->endpoints); 1229 } 1230 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, 1231 NULL, NULL, virtio_iommu_put_domain); 1232 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, 1233 NULL, NULL, virtio_iommu_put_endpoint); 1234 } 1235 1236 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) 1237 { 1238 trace_virtio_iommu_device_status(status); 1239 } 1240 1241 static void virtio_iommu_instance_init(Object *obj) 1242 { 1243 } 1244 1245 #define VMSTATE_INTERVAL \ 1246 { \ 1247 .name = "interval", \ 1248 .version_id = 1, \ 1249 .minimum_version_id = 1, \ 1250 .fields = (VMStateField[]) { \ 1251 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ 1252 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ 1253 VMSTATE_END_OF_LIST() \ 1254 } \ 1255 } 1256 1257 #define VMSTATE_MAPPING \ 1258 { \ 1259 .name = "mapping", \ 1260 .version_id = 1, \ 1261 .minimum_version_id = 1, \ 1262 .fields = (VMStateField[]) { \ 1263 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ 1264 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ 1265 VMSTATE_END_OF_LIST() \ 1266 }, \ 1267 } 1268 1269 static const VMStateDescription vmstate_interval_mapping[2] = { 1270 VMSTATE_MAPPING, /* value */ 1271 VMSTATE_INTERVAL /* key */ 1272 }; 1273 1274 static int domain_preload(void *opaque) 1275 { 1276 VirtIOIOMMUDomain *domain = opaque; 1277 1278 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 1279 NULL, g_free, g_free); 1280 return 0; 1281 } 1282 1283 static const VMStateDescription vmstate_endpoint = { 1284 .name = "endpoint", 1285 .version_id = 1, 1286 .minimum_version_id = 1, 1287 .fields = (VMStateField[]) { 1288 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), 1289 VMSTATE_END_OF_LIST() 1290 } 1291 }; 1292 1293 static const VMStateDescription vmstate_domain = { 1294 .name = "domain", 1295 .version_id = 2, 1296 .minimum_version_id = 2, 1297 .pre_load = domain_preload, 1298 .fields = (VMStateField[]) { 1299 VMSTATE_UINT32(id, VirtIOIOMMUDomain), 1300 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, 1301 vmstate_interval_mapping, 1302 VirtIOIOMMUInterval, VirtIOIOMMUMapping), 1303 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, 1304 vmstate_endpoint, VirtIOIOMMUEndpoint, next), 1305 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2), 1306 VMSTATE_END_OF_LIST() 1307 } 1308 }; 1309 1310 static gboolean reconstruct_endpoints(gpointer key, gpointer value, 1311 gpointer data) 1312 { 1313 VirtIOIOMMU *s = (VirtIOIOMMU *)data; 1314 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; 1315 VirtIOIOMMUEndpoint *iter; 1316 IOMMUMemoryRegion *mr; 1317 1318 QLIST_FOREACH(iter, &d->endpoint_list, next) { 1319 mr = virtio_iommu_mr(s, iter->id); 1320 assert(mr); 1321 1322 iter->domain = d; 1323 iter->iommu_mr = mr; 1324 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); 1325 } 1326 return false; /* continue the domain traversal */ 1327 } 1328 1329 static int iommu_post_load(void *opaque, int version_id) 1330 { 1331 VirtIOIOMMU *s = opaque; 1332 1333 g_tree_foreach(s->domains, reconstruct_endpoints, s); 1334 1335 /* 1336 * Memory regions are dynamically turned on/off depending on 1337 * 'config.bypass' and attached domain type if there is. After 1338 * migration, we need to make sure the memory regions are 1339 * still correct. 1340 */ 1341 virtio_iommu_switch_address_space_all(s); 1342 return 0; 1343 } 1344 1345 static const VMStateDescription vmstate_virtio_iommu_device = { 1346 .name = "virtio-iommu-device", 1347 .minimum_version_id = 2, 1348 .version_id = 2, 1349 .post_load = iommu_post_load, 1350 .fields = (VMStateField[]) { 1351 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2, 1352 &vmstate_domain, VirtIOIOMMUDomain), 1353 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2), 1354 VMSTATE_END_OF_LIST() 1355 }, 1356 }; 1357 1358 static const VMStateDescription vmstate_virtio_iommu = { 1359 .name = "virtio-iommu", 1360 .minimum_version_id = 2, 1361 .priority = MIG_PRI_IOMMU, 1362 .version_id = 2, 1363 .fields = (VMStateField[]) { 1364 VMSTATE_VIRTIO_DEVICE, 1365 VMSTATE_END_OF_LIST() 1366 }, 1367 }; 1368 1369 static Property virtio_iommu_properties[] = { 1370 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *), 1371 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true), 1372 DEFINE_PROP_END_OF_LIST(), 1373 }; 1374 1375 static void virtio_iommu_class_init(ObjectClass *klass, void *data) 1376 { 1377 DeviceClass *dc = DEVICE_CLASS(klass); 1378 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1379 1380 device_class_set_props(dc, virtio_iommu_properties); 1381 dc->vmsd = &vmstate_virtio_iommu; 1382 1383 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1384 vdc->realize = virtio_iommu_device_realize; 1385 vdc->unrealize = virtio_iommu_device_unrealize; 1386 vdc->reset = virtio_iommu_device_reset; 1387 vdc->get_config = virtio_iommu_get_config; 1388 vdc->set_config = virtio_iommu_set_config; 1389 vdc->get_features = virtio_iommu_get_features; 1390 vdc->set_status = virtio_iommu_set_status; 1391 vdc->vmsd = &vmstate_virtio_iommu_device; 1392 } 1393 1394 static void virtio_iommu_memory_region_class_init(ObjectClass *klass, 1395 void *data) 1396 { 1397 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1398 1399 imrc->translate = virtio_iommu_translate; 1400 imrc->replay = virtio_iommu_replay; 1401 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; 1402 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; 1403 } 1404 1405 static const TypeInfo virtio_iommu_info = { 1406 .name = TYPE_VIRTIO_IOMMU, 1407 .parent = TYPE_VIRTIO_DEVICE, 1408 .instance_size = sizeof(VirtIOIOMMU), 1409 .instance_init = virtio_iommu_instance_init, 1410 .class_init = virtio_iommu_class_init, 1411 }; 1412 1413 static const TypeInfo virtio_iommu_memory_region_info = { 1414 .parent = TYPE_IOMMU_MEMORY_REGION, 1415 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, 1416 .class_init = virtio_iommu_memory_region_class_init, 1417 }; 1418 1419 static void virtio_register_types(void) 1420 { 1421 type_register_static(&virtio_iommu_info); 1422 type_register_static(&virtio_iommu_memory_region_info); 1423 } 1424 1425 type_init(virtio_register_types) 1426