1 /* 2 * virtio-iommu device 3 * 4 * Copyright (c) 2020 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "qemu/iov.h" 23 #include "qemu/range.h" 24 #include "qemu/reserved-region.h" 25 #include "exec/target_page.h" 26 #include "hw/qdev-properties.h" 27 #include "hw/virtio/virtio.h" 28 #include "sysemu/kvm.h" 29 #include "sysemu/reset.h" 30 #include "sysemu/sysemu.h" 31 #include "qemu/reserved-region.h" 32 #include "qemu/units.h" 33 #include "qapi/error.h" 34 #include "qemu/error-report.h" 35 #include "trace.h" 36 37 #include "standard-headers/linux/virtio_ids.h" 38 39 #include "hw/virtio/virtio-bus.h" 40 #include "hw/virtio/virtio-iommu.h" 41 #include "hw/pci/pci_bus.h" 42 #include "hw/pci/pci.h" 43 44 /* Max size */ 45 #define VIOMMU_DEFAULT_QUEUE_SIZE 256 46 #define VIOMMU_PROBE_SIZE 512 47 48 typedef struct VirtIOIOMMUDomain { 49 uint32_t id; 50 bool bypass; 51 GTree *mappings; 52 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; 53 } VirtIOIOMMUDomain; 54 55 typedef struct VirtIOIOMMUEndpoint { 56 uint32_t id; 57 VirtIOIOMMUDomain *domain; 58 IOMMUMemoryRegion *iommu_mr; 59 QLIST_ENTRY(VirtIOIOMMUEndpoint) next; 60 } VirtIOIOMMUEndpoint; 61 62 typedef struct VirtIOIOMMUInterval { 63 uint64_t low; 64 uint64_t high; 65 } VirtIOIOMMUInterval; 66 67 typedef struct VirtIOIOMMUMapping { 68 uint64_t phys_addr; 69 uint32_t flags; 70 } VirtIOIOMMUMapping; 71 72 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) 73 { 74 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); 75 } 76 77 static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) 78 { 79 uint32_t sid; 80 bool bypassed; 81 VirtIOIOMMU *s = sdev->viommu; 82 VirtIOIOMMUEndpoint *ep; 83 84 sid = virtio_iommu_get_bdf(sdev); 85 86 qemu_rec_mutex_lock(&s->mutex); 87 /* need to check bypass before system reset */ 88 if (!s->endpoints) { 89 bypassed = s->config.bypass; 90 goto unlock; 91 } 92 93 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 94 if (!ep || !ep->domain) { 95 bypassed = s->config.bypass; 96 } else { 97 bypassed = ep->domain->bypass; 98 } 99 100 unlock: 101 qemu_rec_mutex_unlock(&s->mutex); 102 return bypassed; 103 } 104 105 /* Return whether the device is using IOMMU translation. */ 106 static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) 107 { 108 bool use_remapping; 109 110 assert(sdev); 111 112 use_remapping = !virtio_iommu_device_bypassed(sdev); 113 114 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), 115 PCI_SLOT(sdev->devfn), 116 PCI_FUNC(sdev->devfn), 117 use_remapping); 118 119 /* Turn off first then on the other */ 120 if (use_remapping) { 121 memory_region_set_enabled(&sdev->bypass_mr, false); 122 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); 123 } else { 124 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); 125 memory_region_set_enabled(&sdev->bypass_mr, true); 126 } 127 128 return use_remapping; 129 } 130 131 static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) 132 { 133 GHashTableIter iter; 134 IOMMUPciBus *iommu_pci_bus; 135 int i; 136 137 g_hash_table_iter_init(&iter, s->as_by_busptr); 138 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 139 for (i = 0; i < PCI_DEVFN_MAX; i++) { 140 if (!iommu_pci_bus->pbdev[i]) { 141 continue; 142 } 143 virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); 144 } 145 } 146 } 147 148 /** 149 * The bus number is used for lookup when SID based operations occur. 150 * In that case we lazily populate the IOMMUPciBus array from the bus hash 151 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus 152 * numbers may not be always initialized yet. 153 */ 154 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) 155 { 156 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; 157 158 if (!iommu_pci_bus) { 159 GHashTableIter iter; 160 161 g_hash_table_iter_init(&iter, s->as_by_busptr); 162 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 163 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { 164 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; 165 return iommu_pci_bus; 166 } 167 } 168 return NULL; 169 } 170 return iommu_pci_bus; 171 } 172 173 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) 174 { 175 uint8_t bus_n, devfn; 176 IOMMUPciBus *iommu_pci_bus; 177 IOMMUDevice *dev; 178 179 bus_n = PCI_BUS_NUM(sid); 180 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); 181 if (iommu_pci_bus) { 182 devfn = sid & (PCI_DEVFN_MAX - 1); 183 dev = iommu_pci_bus->pbdev[devfn]; 184 if (dev) { 185 return &dev->iommu_mr; 186 } 187 } 188 return NULL; 189 } 190 191 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 192 { 193 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; 194 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; 195 196 if (inta->high < intb->low) { 197 return -1; 198 } else if (intb->high < inta->low) { 199 return 1; 200 } else { 201 return 0; 202 } 203 } 204 205 static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr, 206 IOMMUTLBEvent *event, 207 hwaddr virt_start, hwaddr virt_end) 208 { 209 uint64_t delta = virt_end - virt_start; 210 211 event->entry.iova = virt_start; 212 event->entry.addr_mask = delta; 213 214 if (delta == UINT64_MAX) { 215 memory_region_notify_iommu(mr, 0, *event); 216 } 217 218 while (virt_start != virt_end + 1) { 219 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); 220 221 event->entry.addr_mask = mask; 222 event->entry.iova = virt_start; 223 memory_region_notify_iommu(mr, 0, *event); 224 virt_start += mask + 1; 225 if (event->entry.perm != IOMMU_NONE) { 226 event->entry.translated_addr += mask + 1; 227 } 228 } 229 } 230 231 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, 232 hwaddr virt_end, hwaddr paddr, 233 uint32_t flags) 234 { 235 IOMMUTLBEvent event; 236 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, 237 flags & VIRTIO_IOMMU_MAP_F_WRITE); 238 239 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) || 240 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) { 241 return; 242 } 243 244 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, 245 paddr, perm); 246 247 event.type = IOMMU_NOTIFIER_MAP; 248 event.entry.target_as = &address_space_memory; 249 event.entry.perm = perm; 250 event.entry.translated_addr = paddr; 251 252 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 253 } 254 255 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, 256 hwaddr virt_end) 257 { 258 IOMMUTLBEvent event; 259 260 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { 261 return; 262 } 263 264 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); 265 266 event.type = IOMMU_NOTIFIER_UNMAP; 267 event.entry.target_as = &address_space_memory; 268 event.entry.perm = IOMMU_NONE; 269 event.entry.translated_addr = 0; 270 271 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); 272 } 273 274 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, 275 gpointer data) 276 { 277 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 278 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 279 280 virtio_iommu_notify_unmap(mr, interval->low, interval->high); 281 282 return false; 283 } 284 285 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, 286 gpointer data) 287 { 288 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 289 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 290 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 291 292 virtio_iommu_notify_map(mr, interval->low, interval->high, 293 mapping->phys_addr, mapping->flags); 294 295 return false; 296 } 297 298 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) 299 { 300 VirtIOIOMMUDomain *domain = ep->domain; 301 IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 302 303 if (!ep->domain) { 304 return; 305 } 306 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, 307 ep->iommu_mr); 308 QLIST_REMOVE(ep, next); 309 ep->domain = NULL; 310 virtio_iommu_switch_address_space(sdev); 311 } 312 313 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, 314 uint32_t ep_id) 315 { 316 VirtIOIOMMUEndpoint *ep; 317 IOMMUMemoryRegion *mr; 318 319 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 320 if (ep) { 321 return ep; 322 } 323 mr = virtio_iommu_mr(s, ep_id); 324 if (!mr) { 325 return NULL; 326 } 327 ep = g_malloc0(sizeof(*ep)); 328 ep->id = ep_id; 329 ep->iommu_mr = mr; 330 trace_virtio_iommu_get_endpoint(ep_id); 331 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); 332 return ep; 333 } 334 335 static void virtio_iommu_put_endpoint(gpointer data) 336 { 337 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; 338 339 if (ep->domain) { 340 virtio_iommu_detach_endpoint_from_domain(ep); 341 } 342 343 trace_virtio_iommu_put_endpoint(ep->id); 344 g_free(ep); 345 } 346 347 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, 348 uint32_t domain_id, 349 bool bypass) 350 { 351 VirtIOIOMMUDomain *domain; 352 353 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 354 if (domain) { 355 if (domain->bypass != bypass) { 356 return NULL; 357 } 358 return domain; 359 } 360 domain = g_malloc0(sizeof(*domain)); 361 domain->id = domain_id; 362 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 363 NULL, (GDestroyNotify)g_free, 364 (GDestroyNotify)g_free); 365 domain->bypass = bypass; 366 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); 367 QLIST_INIT(&domain->endpoint_list); 368 trace_virtio_iommu_get_domain(domain_id); 369 return domain; 370 } 371 372 static void virtio_iommu_put_domain(gpointer data) 373 { 374 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; 375 VirtIOIOMMUEndpoint *iter, *tmp; 376 377 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { 378 virtio_iommu_detach_endpoint_from_domain(iter); 379 } 380 g_tree_destroy(domain->mappings); 381 trace_virtio_iommu_put_domain(domain->id); 382 g_free(domain); 383 } 384 385 static void add_prop_resv_regions(IOMMUDevice *sdev) 386 { 387 VirtIOIOMMU *s = sdev->viommu; 388 int i; 389 390 for (i = 0; i < s->nr_prop_resv_regions; i++) { 391 ReservedRegion *reg = g_new0(ReservedRegion, 1); 392 393 *reg = s->prop_resv_regions[i]; 394 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); 395 } 396 } 397 398 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, 399 int devfn) 400 { 401 VirtIOIOMMU *s = opaque; 402 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 403 static uint32_t mr_index; 404 IOMMUDevice *sdev; 405 406 if (!sbus) { 407 sbus = g_malloc0(sizeof(IOMMUPciBus) + 408 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); 409 sbus->bus = bus; 410 g_hash_table_insert(s->as_by_busptr, bus, sbus); 411 } 412 413 sdev = sbus->pbdev[devfn]; 414 if (!sdev) { 415 char *name = g_strdup_printf("%s-%d-%d", 416 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 417 mr_index++, devfn); 418 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1); 419 420 sdev->viommu = s; 421 sdev->bus = bus; 422 sdev->devfn = devfn; 423 424 trace_virtio_iommu_init_iommu_mr(name); 425 426 memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); 427 address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); 428 add_prop_resv_regions(sdev); 429 430 /* 431 * Build the IOMMU disabled container with aliases to the 432 * shared MRs. Note that aliasing to a shared memory region 433 * could help the memory API to detect same FlatViews so we 434 * can have devices to share the same FlatView when in bypass 435 * mode. (either by not configuring virtio-iommu driver or with 436 * "iommu=pt"). It will greatly reduce the total number of 437 * FlatViews of the system hence VM runs faster. 438 */ 439 memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), 440 "system", get_system_memory(), 0, 441 memory_region_size(get_system_memory())); 442 443 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), 444 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 445 OBJECT(s), name, 446 UINT64_MAX); 447 448 /* 449 * Hook both the containers under the root container, we 450 * switch between iommu & bypass MRs by enable/disable 451 * corresponding sub-containers 452 */ 453 memory_region_add_subregion_overlap(&sdev->root, 0, 454 MEMORY_REGION(&sdev->iommu_mr), 455 0); 456 memory_region_add_subregion_overlap(&sdev->root, 0, 457 &sdev->bypass_mr, 0); 458 459 virtio_iommu_switch_address_space(sdev); 460 g_free(name); 461 } 462 return &sdev->as; 463 } 464 465 static const PCIIOMMUOps virtio_iommu_ops = { 466 .get_address_space = virtio_iommu_find_add_as, 467 }; 468 469 static int virtio_iommu_attach(VirtIOIOMMU *s, 470 struct virtio_iommu_req_attach *req) 471 { 472 uint32_t domain_id = le32_to_cpu(req->domain); 473 uint32_t ep_id = le32_to_cpu(req->endpoint); 474 uint32_t flags = le32_to_cpu(req->flags); 475 VirtIOIOMMUDomain *domain; 476 VirtIOIOMMUEndpoint *ep; 477 IOMMUDevice *sdev; 478 479 trace_virtio_iommu_attach(domain_id, ep_id); 480 481 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) { 482 return VIRTIO_IOMMU_S_INVAL; 483 } 484 485 ep = virtio_iommu_get_endpoint(s, ep_id); 486 if (!ep) { 487 return VIRTIO_IOMMU_S_NOENT; 488 } 489 490 if (ep->domain) { 491 VirtIOIOMMUDomain *previous_domain = ep->domain; 492 /* 493 * the device is already attached to a domain, 494 * detach it first 495 */ 496 virtio_iommu_detach_endpoint_from_domain(ep); 497 if (QLIST_EMPTY(&previous_domain->endpoint_list)) { 498 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); 499 } 500 } 501 502 domain = virtio_iommu_get_domain(s, domain_id, 503 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS); 504 if (!domain) { 505 /* Incompatible bypass flag */ 506 return VIRTIO_IOMMU_S_INVAL; 507 } 508 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); 509 510 ep->domain = domain; 511 sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); 512 virtio_iommu_switch_address_space(sdev); 513 514 /* Replay domain mappings on the associated memory region */ 515 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, 516 ep->iommu_mr); 517 518 return VIRTIO_IOMMU_S_OK; 519 } 520 521 static int virtio_iommu_detach(VirtIOIOMMU *s, 522 struct virtio_iommu_req_detach *req) 523 { 524 uint32_t domain_id = le32_to_cpu(req->domain); 525 uint32_t ep_id = le32_to_cpu(req->endpoint); 526 VirtIOIOMMUDomain *domain; 527 VirtIOIOMMUEndpoint *ep; 528 529 trace_virtio_iommu_detach(domain_id, ep_id); 530 531 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 532 if (!ep) { 533 return VIRTIO_IOMMU_S_NOENT; 534 } 535 536 domain = ep->domain; 537 538 if (!domain || domain->id != domain_id) { 539 return VIRTIO_IOMMU_S_INVAL; 540 } 541 542 virtio_iommu_detach_endpoint_from_domain(ep); 543 544 if (QLIST_EMPTY(&domain->endpoint_list)) { 545 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); 546 } 547 return VIRTIO_IOMMU_S_OK; 548 } 549 550 static int virtio_iommu_map(VirtIOIOMMU *s, 551 struct virtio_iommu_req_map *req) 552 { 553 uint32_t domain_id = le32_to_cpu(req->domain); 554 uint64_t phys_start = le64_to_cpu(req->phys_start); 555 uint64_t virt_start = le64_to_cpu(req->virt_start); 556 uint64_t virt_end = le64_to_cpu(req->virt_end); 557 uint32_t flags = le32_to_cpu(req->flags); 558 VirtIOIOMMUDomain *domain; 559 VirtIOIOMMUInterval *interval; 560 VirtIOIOMMUMapping *mapping; 561 VirtIOIOMMUEndpoint *ep; 562 563 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { 564 return VIRTIO_IOMMU_S_INVAL; 565 } 566 567 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 568 if (!domain) { 569 return VIRTIO_IOMMU_S_NOENT; 570 } 571 572 if (domain->bypass) { 573 return VIRTIO_IOMMU_S_INVAL; 574 } 575 576 interval = g_malloc0(sizeof(*interval)); 577 578 interval->low = virt_start; 579 interval->high = virt_end; 580 581 mapping = g_tree_lookup(domain->mappings, (gpointer)interval); 582 if (mapping) { 583 g_free(interval); 584 return VIRTIO_IOMMU_S_INVAL; 585 } 586 587 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); 588 589 mapping = g_malloc0(sizeof(*mapping)); 590 mapping->phys_addr = phys_start; 591 mapping->flags = flags; 592 593 g_tree_insert(domain->mappings, interval, mapping); 594 595 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 596 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start, 597 flags); 598 } 599 600 return VIRTIO_IOMMU_S_OK; 601 } 602 603 static int virtio_iommu_unmap(VirtIOIOMMU *s, 604 struct virtio_iommu_req_unmap *req) 605 { 606 uint32_t domain_id = le32_to_cpu(req->domain); 607 uint64_t virt_start = le64_to_cpu(req->virt_start); 608 uint64_t virt_end = le64_to_cpu(req->virt_end); 609 VirtIOIOMMUMapping *iter_val; 610 VirtIOIOMMUInterval interval, *iter_key; 611 VirtIOIOMMUDomain *domain; 612 VirtIOIOMMUEndpoint *ep; 613 int ret = VIRTIO_IOMMU_S_OK; 614 615 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); 616 617 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 618 if (!domain) { 619 return VIRTIO_IOMMU_S_NOENT; 620 } 621 622 if (domain->bypass) { 623 return VIRTIO_IOMMU_S_INVAL; 624 } 625 626 interval.low = virt_start; 627 interval.high = virt_end; 628 629 while (g_tree_lookup_extended(domain->mappings, &interval, 630 (void **)&iter_key, (void**)&iter_val)) { 631 uint64_t current_low = iter_key->low; 632 uint64_t current_high = iter_key->high; 633 634 if (interval.low <= current_low && interval.high >= current_high) { 635 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 636 virtio_iommu_notify_unmap(ep->iommu_mr, current_low, 637 current_high); 638 } 639 g_tree_remove(domain->mappings, iter_key); 640 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); 641 } else { 642 ret = VIRTIO_IOMMU_S_RANGE; 643 break; 644 } 645 } 646 return ret; 647 } 648 649 static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep, 650 uint8_t *buf, size_t free) 651 { 652 struct virtio_iommu_probe_resv_mem prop = {}; 653 size_t size = sizeof(prop), length = size - sizeof(prop.head), total; 654 GList *l; 655 656 total = size * g_list_length(sdev->resv_regions); 657 if (total > free) { 658 return -ENOSPC; 659 } 660 661 for (l = sdev->resv_regions; l; l = l->next) { 662 ReservedRegion *reg = l->data; 663 unsigned subtype = reg->type; 664 Range *range = ®->range; 665 666 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || 667 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); 668 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); 669 prop.head.length = cpu_to_le16(length); 670 prop.subtype = subtype; 671 prop.start = cpu_to_le64(range_lob(range)); 672 prop.end = cpu_to_le64(range_upb(range)); 673 674 memcpy(buf, &prop, size); 675 676 trace_virtio_iommu_fill_resv_property(ep, prop.subtype, 677 prop.start, prop.end); 678 buf += size; 679 } 680 return total; 681 } 682 683 /** 684 * virtio_iommu_probe - Fill the probe request buffer with 685 * the properties the device is able to return 686 */ 687 static int virtio_iommu_probe(VirtIOIOMMU *s, 688 struct virtio_iommu_req_probe *req, 689 uint8_t *buf) 690 { 691 uint32_t ep_id = le32_to_cpu(req->endpoint); 692 IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id); 693 size_t free = VIOMMU_PROBE_SIZE; 694 IOMMUDevice *sdev; 695 ssize_t count; 696 697 if (!iommu_mr) { 698 return VIRTIO_IOMMU_S_NOENT; 699 } 700 701 sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr); 702 703 count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free); 704 if (count < 0) { 705 return VIRTIO_IOMMU_S_INVAL; 706 } 707 buf += count; 708 free -= count; 709 sdev->probe_done = true; 710 711 return VIRTIO_IOMMU_S_OK; 712 } 713 714 static int virtio_iommu_iov_to_req(struct iovec *iov, 715 unsigned int iov_cnt, 716 void *req, size_t payload_sz) 717 { 718 size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); 719 720 if (unlikely(sz != payload_sz)) { 721 return VIRTIO_IOMMU_S_INVAL; 722 } 723 return 0; 724 } 725 726 #define virtio_iommu_handle_req(__req) \ 727 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ 728 struct iovec *iov, \ 729 unsigned int iov_cnt) \ 730 { \ 731 struct virtio_iommu_req_ ## __req req; \ 732 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ 733 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ 734 \ 735 return ret ? ret : virtio_iommu_ ## __req(s, &req); \ 736 } 737 738 virtio_iommu_handle_req(attach) 739 virtio_iommu_handle_req(detach) 740 virtio_iommu_handle_req(map) 741 virtio_iommu_handle_req(unmap) 742 743 static int virtio_iommu_handle_probe(VirtIOIOMMU *s, 744 struct iovec *iov, 745 unsigned int iov_cnt, 746 uint8_t *buf) 747 { 748 struct virtio_iommu_req_probe req; 749 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); 750 751 return ret ? ret : virtio_iommu_probe(s, &req, buf); 752 } 753 754 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) 755 { 756 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 757 struct virtio_iommu_req_head head; 758 struct virtio_iommu_req_tail tail = {}; 759 VirtQueueElement *elem; 760 unsigned int iov_cnt; 761 struct iovec *iov; 762 void *buf = NULL; 763 size_t sz; 764 765 for (;;) { 766 size_t output_size = sizeof(tail); 767 768 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 769 if (!elem) { 770 return; 771 } 772 773 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || 774 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { 775 virtio_error(vdev, "virtio-iommu bad head/tail size"); 776 virtqueue_detach_element(vq, elem, 0); 777 g_free(elem); 778 break; 779 } 780 781 iov_cnt = elem->out_num; 782 iov = elem->out_sg; 783 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); 784 if (unlikely(sz != sizeof(head))) { 785 tail.status = VIRTIO_IOMMU_S_DEVERR; 786 goto out; 787 } 788 qemu_rec_mutex_lock(&s->mutex); 789 switch (head.type) { 790 case VIRTIO_IOMMU_T_ATTACH: 791 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); 792 break; 793 case VIRTIO_IOMMU_T_DETACH: 794 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); 795 break; 796 case VIRTIO_IOMMU_T_MAP: 797 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); 798 break; 799 case VIRTIO_IOMMU_T_UNMAP: 800 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); 801 break; 802 case VIRTIO_IOMMU_T_PROBE: 803 { 804 struct virtio_iommu_req_tail *ptail; 805 806 output_size = s->config.probe_size + sizeof(tail); 807 buf = g_malloc0(output_size); 808 809 ptail = buf + s->config.probe_size; 810 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf); 811 break; 812 } 813 default: 814 tail.status = VIRTIO_IOMMU_S_UNSUPP; 815 } 816 qemu_rec_mutex_unlock(&s->mutex); 817 818 out: 819 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 820 buf ? buf : &tail, output_size); 821 assert(sz == output_size); 822 823 virtqueue_push(vq, elem, sz); 824 virtio_notify(vdev, vq); 825 g_free(elem); 826 g_free(buf); 827 buf = NULL; 828 } 829 } 830 831 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, 832 int flags, uint32_t endpoint, 833 uint64_t address) 834 { 835 VirtIODevice *vdev = &viommu->parent_obj; 836 VirtQueue *vq = viommu->event_vq; 837 struct virtio_iommu_fault fault; 838 VirtQueueElement *elem; 839 size_t sz; 840 841 memset(&fault, 0, sizeof(fault)); 842 fault.reason = reason; 843 fault.flags = cpu_to_le32(flags); 844 fault.endpoint = cpu_to_le32(endpoint); 845 fault.address = cpu_to_le64(address); 846 847 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 848 849 if (!elem) { 850 error_report_once( 851 "no buffer available in event queue to report event"); 852 return; 853 } 854 855 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { 856 virtio_error(vdev, "error buffer of wrong size"); 857 virtqueue_detach_element(vq, elem, 0); 858 g_free(elem); 859 return; 860 } 861 862 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 863 &fault, sizeof(fault)); 864 assert(sz == sizeof(fault)); 865 866 trace_virtio_iommu_report_fault(reason, flags, endpoint, address); 867 virtqueue_push(vq, elem, sz); 868 virtio_notify(vdev, vq); 869 g_free(elem); 870 871 } 872 873 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, 874 IOMMUAccessFlags flag, 875 int iommu_idx) 876 { 877 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 878 VirtIOIOMMUInterval interval, *mapping_key; 879 VirtIOIOMMUMapping *mapping_value; 880 VirtIOIOMMU *s = sdev->viommu; 881 bool read_fault, write_fault; 882 VirtIOIOMMUEndpoint *ep; 883 uint32_t sid, flags; 884 bool bypass_allowed; 885 int granule; 886 bool found; 887 GList *l; 888 889 interval.low = addr; 890 interval.high = addr + 1; 891 granule = ctz64(s->config.page_size_mask); 892 893 IOMMUTLBEntry entry = { 894 .target_as = &address_space_memory, 895 .iova = addr, 896 .translated_addr = addr, 897 .addr_mask = BIT_ULL(granule) - 1, 898 .perm = IOMMU_NONE, 899 }; 900 901 bypass_allowed = s->config.bypass; 902 903 sid = virtio_iommu_get_bdf(sdev); 904 905 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); 906 qemu_rec_mutex_lock(&s->mutex); 907 908 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 909 910 if (bypass_allowed) 911 assert(ep && ep->domain && !ep->domain->bypass); 912 913 if (!ep) { 914 if (!bypass_allowed) { 915 error_report_once("%s sid=%d is not known!!", __func__, sid); 916 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, 917 VIRTIO_IOMMU_FAULT_F_ADDRESS, 918 sid, addr); 919 } else { 920 entry.perm = flag; 921 } 922 goto unlock; 923 } 924 925 for (l = sdev->resv_regions; l; l = l->next) { 926 ReservedRegion *reg = l->data; 927 928 if (range_contains(®->range, addr)) { 929 switch (reg->type) { 930 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 931 entry.perm = flag; 932 break; 933 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 934 default: 935 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 936 VIRTIO_IOMMU_FAULT_F_ADDRESS, 937 sid, addr); 938 break; 939 } 940 goto unlock; 941 } 942 } 943 944 if (!ep->domain) { 945 if (!bypass_allowed) { 946 error_report_once("%s %02x:%02x.%01x not attached to any domain", 947 __func__, PCI_BUS_NUM(sid), 948 PCI_SLOT(sid), PCI_FUNC(sid)); 949 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, 950 VIRTIO_IOMMU_FAULT_F_ADDRESS, 951 sid, addr); 952 } else { 953 entry.perm = flag; 954 } 955 goto unlock; 956 } else if (ep->domain->bypass) { 957 entry.perm = flag; 958 goto unlock; 959 } 960 961 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), 962 (void **)&mapping_key, 963 (void **)&mapping_value); 964 if (!found) { 965 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", 966 __func__, addr, sid); 967 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 968 VIRTIO_IOMMU_FAULT_F_ADDRESS, 969 sid, addr); 970 goto unlock; 971 } 972 973 read_fault = (flag & IOMMU_RO) && 974 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); 975 write_fault = (flag & IOMMU_WO) && 976 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); 977 978 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; 979 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; 980 if (flags) { 981 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", 982 __func__, addr, flag, mapping_value->flags); 983 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; 984 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 985 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, 986 sid, addr); 987 goto unlock; 988 } 989 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; 990 entry.perm = flag; 991 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); 992 993 unlock: 994 qemu_rec_mutex_unlock(&s->mutex); 995 return entry; 996 } 997 998 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) 999 { 1000 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1001 struct virtio_iommu_config *dev_config = &dev->config; 1002 struct virtio_iommu_config *out_config = (void *)config_data; 1003 1004 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask); 1005 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start); 1006 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end); 1007 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start); 1008 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end); 1009 out_config->probe_size = cpu_to_le32(dev_config->probe_size); 1010 out_config->bypass = dev_config->bypass; 1011 1012 trace_virtio_iommu_get_config(dev_config->page_size_mask, 1013 dev_config->input_range.start, 1014 dev_config->input_range.end, 1015 dev_config->domain_range.start, 1016 dev_config->domain_range.end, 1017 dev_config->probe_size, 1018 dev_config->bypass); 1019 } 1020 1021 static void virtio_iommu_set_config(VirtIODevice *vdev, 1022 const uint8_t *config_data) 1023 { 1024 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1025 struct virtio_iommu_config *dev_config = &dev->config; 1026 const struct virtio_iommu_config *in_config = (void *)config_data; 1027 1028 if (in_config->bypass != dev_config->bypass) { 1029 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 1030 virtio_error(vdev, "cannot set config.bypass"); 1031 return; 1032 } else if (in_config->bypass != 0 && in_config->bypass != 1) { 1033 virtio_error(vdev, "invalid config.bypass value '%u'", 1034 in_config->bypass); 1035 return; 1036 } 1037 dev_config->bypass = in_config->bypass; 1038 virtio_iommu_switch_address_space_all(dev); 1039 } 1040 1041 trace_virtio_iommu_set_config(in_config->bypass); 1042 } 1043 1044 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, 1045 Error **errp) 1046 { 1047 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 1048 1049 f |= dev->features; 1050 trace_virtio_iommu_get_features(f); 1051 return f; 1052 } 1053 1054 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 1055 { 1056 guint ua = GPOINTER_TO_UINT(a); 1057 guint ub = GPOINTER_TO_UINT(b); 1058 return (ua > ub) - (ua < ub); 1059 } 1060 1061 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data) 1062 { 1063 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 1064 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 1065 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 1066 1067 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high, 1068 mapping->phys_addr); 1069 virtio_iommu_notify_map(mr, interval->low, interval->high, 1070 mapping->phys_addr, mapping->flags); 1071 return false; 1072 } 1073 1074 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) 1075 { 1076 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1077 VirtIOIOMMU *s = sdev->viommu; 1078 uint32_t sid; 1079 VirtIOIOMMUEndpoint *ep; 1080 1081 sid = virtio_iommu_get_bdf(sdev); 1082 1083 qemu_rec_mutex_lock(&s->mutex); 1084 1085 if (!s->endpoints) { 1086 goto unlock; 1087 } 1088 1089 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 1090 if (!ep || !ep->domain) { 1091 goto unlock; 1092 } 1093 1094 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); 1095 1096 unlock: 1097 qemu_rec_mutex_unlock(&s->mutex); 1098 } 1099 1100 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, 1101 IOMMUNotifierFlag old, 1102 IOMMUNotifierFlag new, 1103 Error **errp) 1104 { 1105 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { 1106 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); 1107 return -EINVAL; 1108 } 1109 1110 if (old == IOMMU_NOTIFIER_NONE) { 1111 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); 1112 } else if (new == IOMMU_NOTIFIER_NONE) { 1113 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name); 1114 } 1115 return 0; 1116 } 1117 1118 /* 1119 * The default mask depends on the "granule" property. For example, with 1120 * 4k granule, it is -(4 * KiB). When an assigned device has page size 1121 * restrictions due to the hardware IOMMU configuration, apply this restriction 1122 * to the mask. 1123 */ 1124 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, 1125 uint64_t new_mask, 1126 Error **errp) 1127 { 1128 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1129 VirtIOIOMMU *s = sdev->viommu; 1130 uint64_t cur_mask = s->config.page_size_mask; 1131 1132 trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, 1133 new_mask); 1134 1135 if ((cur_mask & new_mask) == 0) { 1136 error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 1137 " incompatible with currently supported mask 0x%"PRIx64, 1138 mr->parent_obj.name, new_mask, cur_mask); 1139 return -1; 1140 } 1141 1142 /* 1143 * Once the granule is frozen we can't change the mask anymore. If by 1144 * chance the hotplugged device supports the same granule, we can still 1145 * accept it. 1146 */ 1147 if (s->granule_frozen) { 1148 int cur_granule = ctz64(cur_mask); 1149 1150 if (!(BIT_ULL(cur_granule) & new_mask)) { 1151 error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", 1152 mr->parent_obj.name, BIT_ULL(cur_granule)); 1153 return -1; 1154 } 1155 return 0; 1156 } 1157 1158 s->config.page_size_mask &= new_mask; 1159 return 0; 1160 } 1161 1162 /** 1163 * rebuild_resv_regions: rebuild resv regions with both the 1164 * info of host resv ranges and property set resv ranges 1165 */ 1166 static int rebuild_resv_regions(IOMMUDevice *sdev) 1167 { 1168 GList *l; 1169 int i = 0; 1170 1171 /* free the existing list and rebuild it from scratch */ 1172 g_list_free_full(sdev->resv_regions, g_free); 1173 sdev->resv_regions = NULL; 1174 1175 /* First add host reserved regions if any, all tagged as RESERVED */ 1176 for (l = sdev->host_resv_ranges; l; l = l->next) { 1177 ReservedRegion *reg = g_new0(ReservedRegion, 1); 1178 Range *r = (Range *)l->data; 1179 1180 reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; 1181 range_set_bounds(®->range, range_lob(r), range_upb(r)); 1182 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); 1183 trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, 1184 range_lob(®->range), 1185 range_upb(®->range)); 1186 i++; 1187 } 1188 /* 1189 * then add higher priority reserved regions set by the machine 1190 * through properties 1191 */ 1192 add_prop_resv_regions(sdev); 1193 return 0; 1194 } 1195 1196 /** 1197 * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges 1198 * 1199 * The function turns those into reserved ranges. Once some 1200 * reserved ranges have been set, new reserved regions cannot be 1201 * added outside of the original ones. 1202 * 1203 * @mr: IOMMU MR 1204 * @iova_ranges: list of usable IOVA ranges 1205 * @errp: error handle 1206 */ 1207 static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, 1208 GList *iova_ranges, 1209 Error **errp) 1210 { 1211 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 1212 GList *current_ranges = sdev->host_resv_ranges; 1213 GList *l, *tmp, *new_ranges = NULL; 1214 int ret = -EINVAL; 1215 1216 /* check that each new resv region is included in an existing one */ 1217 if (sdev->host_resv_ranges) { 1218 range_inverse_array(iova_ranges, 1219 &new_ranges, 1220 0, UINT64_MAX); 1221 1222 for (tmp = new_ranges; tmp; tmp = tmp->next) { 1223 Range *newr = (Range *)tmp->data; 1224 bool included = false; 1225 1226 for (l = current_ranges; l; l = l->next) { 1227 Range * r = (Range *)l->data; 1228 1229 if (range_contains_range(r, newr)) { 1230 included = true; 1231 break; 1232 } 1233 } 1234 if (!included) { 1235 goto error; 1236 } 1237 } 1238 /* all new reserved ranges are included in existing ones */ 1239 ret = 0; 1240 goto out; 1241 } 1242 1243 if (sdev->probe_done) { 1244 warn_report("%s: Notified about new host reserved regions after probe", 1245 mr->parent_obj.name); 1246 } 1247 1248 range_inverse_array(iova_ranges, 1249 &sdev->host_resv_ranges, 1250 0, UINT64_MAX); 1251 rebuild_resv_regions(sdev); 1252 1253 return 0; 1254 error: 1255 error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", 1256 mr->parent_obj.name); 1257 out: 1258 g_list_free_full(new_ranges, g_free); 1259 return ret; 1260 } 1261 1262 static void virtio_iommu_system_reset(void *opaque) 1263 { 1264 VirtIOIOMMU *s = opaque; 1265 1266 trace_virtio_iommu_system_reset(); 1267 1268 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); 1269 1270 /* 1271 * config.bypass is sticky across device reset, but should be restored on 1272 * system reset 1273 */ 1274 s->config.bypass = s->boot_bypass; 1275 virtio_iommu_switch_address_space_all(s); 1276 1277 } 1278 1279 static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) 1280 { 1281 VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); 1282 int granule; 1283 1284 if (likely(s->config.bypass)) { 1285 /* 1286 * Transient IOMMU MR enable to collect page_size_mask requirements 1287 * through memory_region_iommu_set_page_size_mask() called by 1288 * VFIO region_add() callback 1289 */ 1290 s->config.bypass = false; 1291 virtio_iommu_switch_address_space_all(s); 1292 /* restore default */ 1293 s->config.bypass = true; 1294 virtio_iommu_switch_address_space_all(s); 1295 } 1296 s->granule_frozen = true; 1297 granule = ctz64(s->config.page_size_mask); 1298 trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); 1299 } 1300 1301 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) 1302 { 1303 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1304 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1305 1306 virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); 1307 1308 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, 1309 virtio_iommu_handle_command); 1310 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); 1311 1312 /* 1313 * config.bypass is needed to get initial address space early, such as 1314 * in vfio realize 1315 */ 1316 s->config.bypass = s->boot_bypass; 1317 if (s->aw_bits < 32 || s->aw_bits > 64) { 1318 error_setg(errp, "aw-bits must be within [32,64]"); 1319 return; 1320 } 1321 s->config.input_range.end = 1322 s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1; 1323 1324 switch (s->granule_mode) { 1325 case GRANULE_MODE_4K: 1326 s->config.page_size_mask = -(4 * KiB); 1327 break; 1328 case GRANULE_MODE_8K: 1329 s->config.page_size_mask = -(8 * KiB); 1330 break; 1331 case GRANULE_MODE_16K: 1332 s->config.page_size_mask = -(16 * KiB); 1333 break; 1334 case GRANULE_MODE_64K: 1335 s->config.page_size_mask = -(64 * KiB); 1336 break; 1337 case GRANULE_MODE_HOST: 1338 s->config.page_size_mask = qemu_real_host_page_mask(); 1339 break; 1340 default: 1341 error_setg(errp, "Unsupported granule mode"); 1342 } 1343 s->config.domain_range.end = UINT32_MAX; 1344 s->config.probe_size = VIOMMU_PROBE_SIZE; 1345 1346 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); 1347 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); 1348 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); 1349 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); 1350 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); 1351 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); 1352 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); 1353 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); 1354 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); 1355 1356 qemu_rec_mutex_init(&s->mutex); 1357 1358 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); 1359 1360 if (s->primary_bus) { 1361 pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); 1362 } else { 1363 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); 1364 } 1365 1366 s->machine_done.notify = virtio_iommu_freeze_granule; 1367 qemu_add_machine_init_done_notifier(&s->machine_done); 1368 1369 qemu_register_reset(virtio_iommu_system_reset, s); 1370 } 1371 1372 static void virtio_iommu_device_unrealize(DeviceState *dev) 1373 { 1374 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1375 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 1376 1377 qemu_unregister_reset(virtio_iommu_system_reset, s); 1378 qemu_remove_machine_init_done_notifier(&s->machine_done); 1379 1380 g_hash_table_destroy(s->as_by_busptr); 1381 if (s->domains) { 1382 g_tree_destroy(s->domains); 1383 } 1384 if (s->endpoints) { 1385 g_tree_destroy(s->endpoints); 1386 } 1387 1388 qemu_rec_mutex_destroy(&s->mutex); 1389 1390 virtio_delete_queue(s->req_vq); 1391 virtio_delete_queue(s->event_vq); 1392 virtio_cleanup(vdev); 1393 } 1394 1395 static void virtio_iommu_device_reset(VirtIODevice *vdev) 1396 { 1397 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1398 1399 trace_virtio_iommu_device_reset(); 1400 1401 if (s->domains) { 1402 g_tree_destroy(s->domains); 1403 } 1404 if (s->endpoints) { 1405 g_tree_destroy(s->endpoints); 1406 } 1407 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, 1408 NULL, NULL, virtio_iommu_put_domain); 1409 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, 1410 NULL, NULL, virtio_iommu_put_endpoint); 1411 } 1412 1413 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) 1414 { 1415 trace_virtio_iommu_device_status(status); 1416 } 1417 1418 static void virtio_iommu_instance_init(Object *obj) 1419 { 1420 } 1421 1422 #define VMSTATE_INTERVAL \ 1423 { \ 1424 .name = "interval", \ 1425 .version_id = 1, \ 1426 .minimum_version_id = 1, \ 1427 .fields = (const VMStateField[]) { \ 1428 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ 1429 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ 1430 VMSTATE_END_OF_LIST() \ 1431 } \ 1432 } 1433 1434 #define VMSTATE_MAPPING \ 1435 { \ 1436 .name = "mapping", \ 1437 .version_id = 1, \ 1438 .minimum_version_id = 1, \ 1439 .fields = (const VMStateField[]) { \ 1440 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ 1441 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ 1442 VMSTATE_END_OF_LIST() \ 1443 }, \ 1444 } 1445 1446 static const VMStateDescription vmstate_interval_mapping[2] = { 1447 VMSTATE_MAPPING, /* value */ 1448 VMSTATE_INTERVAL /* key */ 1449 }; 1450 1451 static int domain_preload(void *opaque) 1452 { 1453 VirtIOIOMMUDomain *domain = opaque; 1454 1455 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 1456 NULL, g_free, g_free); 1457 return 0; 1458 } 1459 1460 static const VMStateDescription vmstate_endpoint = { 1461 .name = "endpoint", 1462 .version_id = 1, 1463 .minimum_version_id = 1, 1464 .fields = (const VMStateField[]) { 1465 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), 1466 VMSTATE_END_OF_LIST() 1467 } 1468 }; 1469 1470 static const VMStateDescription vmstate_domain = { 1471 .name = "domain", 1472 .version_id = 2, 1473 .minimum_version_id = 2, 1474 .pre_load = domain_preload, 1475 .fields = (const VMStateField[]) { 1476 VMSTATE_UINT32(id, VirtIOIOMMUDomain), 1477 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, 1478 vmstate_interval_mapping, 1479 VirtIOIOMMUInterval, VirtIOIOMMUMapping), 1480 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, 1481 vmstate_endpoint, VirtIOIOMMUEndpoint, next), 1482 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2), 1483 VMSTATE_END_OF_LIST() 1484 } 1485 }; 1486 1487 static gboolean reconstruct_endpoints(gpointer key, gpointer value, 1488 gpointer data) 1489 { 1490 VirtIOIOMMU *s = (VirtIOIOMMU *)data; 1491 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; 1492 VirtIOIOMMUEndpoint *iter; 1493 IOMMUMemoryRegion *mr; 1494 1495 QLIST_FOREACH(iter, &d->endpoint_list, next) { 1496 mr = virtio_iommu_mr(s, iter->id); 1497 assert(mr); 1498 1499 iter->domain = d; 1500 iter->iommu_mr = mr; 1501 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); 1502 } 1503 return false; /* continue the domain traversal */ 1504 } 1505 1506 static int iommu_post_load(void *opaque, int version_id) 1507 { 1508 VirtIOIOMMU *s = opaque; 1509 1510 g_tree_foreach(s->domains, reconstruct_endpoints, s); 1511 1512 /* 1513 * Memory regions are dynamically turned on/off depending on 1514 * 'config.bypass' and attached domain type if there is. After 1515 * migration, we need to make sure the memory regions are 1516 * still correct. 1517 */ 1518 virtio_iommu_switch_address_space_all(s); 1519 return 0; 1520 } 1521 1522 static const VMStateDescription vmstate_virtio_iommu_device = { 1523 .name = "virtio-iommu-device", 1524 .minimum_version_id = 2, 1525 .version_id = 2, 1526 .post_load = iommu_post_load, 1527 .fields = (const VMStateField[]) { 1528 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2, 1529 &vmstate_domain, VirtIOIOMMUDomain), 1530 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2), 1531 VMSTATE_END_OF_LIST() 1532 }, 1533 }; 1534 1535 static const VMStateDescription vmstate_virtio_iommu = { 1536 .name = "virtio-iommu", 1537 .minimum_version_id = 2, 1538 .priority = MIG_PRI_IOMMU, 1539 .version_id = 2, 1540 .fields = (const VMStateField[]) { 1541 VMSTATE_VIRTIO_DEVICE, 1542 VMSTATE_END_OF_LIST() 1543 }, 1544 }; 1545 1546 static Property virtio_iommu_properties[] = { 1547 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, 1548 TYPE_PCI_BUS, PCIBus *), 1549 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true), 1550 DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode, 1551 GRANULE_MODE_HOST), 1552 DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64), 1553 DEFINE_PROP_END_OF_LIST(), 1554 }; 1555 1556 static void virtio_iommu_class_init(ObjectClass *klass, void *data) 1557 { 1558 DeviceClass *dc = DEVICE_CLASS(klass); 1559 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1560 1561 device_class_set_props(dc, virtio_iommu_properties); 1562 dc->vmsd = &vmstate_virtio_iommu; 1563 1564 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1565 vdc->realize = virtio_iommu_device_realize; 1566 vdc->unrealize = virtio_iommu_device_unrealize; 1567 vdc->reset = virtio_iommu_device_reset; 1568 vdc->get_config = virtio_iommu_get_config; 1569 vdc->set_config = virtio_iommu_set_config; 1570 vdc->get_features = virtio_iommu_get_features; 1571 vdc->set_status = virtio_iommu_set_status; 1572 vdc->vmsd = &vmstate_virtio_iommu_device; 1573 } 1574 1575 static void virtio_iommu_memory_region_class_init(ObjectClass *klass, 1576 void *data) 1577 { 1578 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1579 1580 imrc->translate = virtio_iommu_translate; 1581 imrc->replay = virtio_iommu_replay; 1582 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; 1583 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; 1584 imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; 1585 } 1586 1587 static const TypeInfo virtio_iommu_info = { 1588 .name = TYPE_VIRTIO_IOMMU, 1589 .parent = TYPE_VIRTIO_DEVICE, 1590 .instance_size = sizeof(VirtIOIOMMU), 1591 .instance_init = virtio_iommu_instance_init, 1592 .class_init = virtio_iommu_class_init, 1593 }; 1594 1595 static const TypeInfo virtio_iommu_memory_region_info = { 1596 .parent = TYPE_IOMMU_MEMORY_REGION, 1597 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, 1598 .class_init = virtio_iommu_memory_region_class_init, 1599 }; 1600 1601 static void virtio_register_types(void) 1602 { 1603 type_register_static(&virtio_iommu_info); 1604 type_register_static(&virtio_iommu_memory_region_info); 1605 } 1606 1607 type_init(virtio_register_types) 1608