1 /* 2 * virtio-iommu device 3 * 4 * Copyright (c) 2020 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/log.h" 22 #include "qemu/iov.h" 23 #include "qemu-common.h" 24 #include "hw/qdev-properties.h" 25 #include "hw/virtio/virtio.h" 26 #include "sysemu/kvm.h" 27 #include "qapi/error.h" 28 #include "qemu/error-report.h" 29 #include "trace.h" 30 31 #include "standard-headers/linux/virtio_ids.h" 32 33 #include "hw/virtio/virtio-bus.h" 34 #include "hw/virtio/virtio-access.h" 35 #include "hw/virtio/virtio-iommu.h" 36 #include "hw/pci/pci_bus.h" 37 #include "hw/pci/pci.h" 38 39 /* Max size */ 40 #define VIOMMU_DEFAULT_QUEUE_SIZE 256 41 #define VIOMMU_PROBE_SIZE 512 42 43 typedef struct VirtIOIOMMUDomain { 44 uint32_t id; 45 GTree *mappings; 46 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; 47 } VirtIOIOMMUDomain; 48 49 typedef struct VirtIOIOMMUEndpoint { 50 uint32_t id; 51 VirtIOIOMMUDomain *domain; 52 IOMMUMemoryRegion *iommu_mr; 53 QLIST_ENTRY(VirtIOIOMMUEndpoint) next; 54 } VirtIOIOMMUEndpoint; 55 56 typedef struct VirtIOIOMMUInterval { 57 uint64_t low; 58 uint64_t high; 59 } VirtIOIOMMUInterval; 60 61 typedef struct VirtIOIOMMUMapping { 62 uint64_t phys_addr; 63 uint32_t flags; 64 } VirtIOIOMMUMapping; 65 66 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) 67 { 68 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); 69 } 70 71 /** 72 * The bus number is used for lookup when SID based operations occur. 73 * In that case we lazily populate the IOMMUPciBus array from the bus hash 74 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus 75 * numbers may not be always initialized yet. 76 */ 77 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) 78 { 79 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; 80 81 if (!iommu_pci_bus) { 82 GHashTableIter iter; 83 84 g_hash_table_iter_init(&iter, s->as_by_busptr); 85 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { 86 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { 87 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; 88 return iommu_pci_bus; 89 } 90 } 91 return NULL; 92 } 93 return iommu_pci_bus; 94 } 95 96 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) 97 { 98 uint8_t bus_n, devfn; 99 IOMMUPciBus *iommu_pci_bus; 100 IOMMUDevice *dev; 101 102 bus_n = PCI_BUS_NUM(sid); 103 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); 104 if (iommu_pci_bus) { 105 devfn = sid & (PCI_DEVFN_MAX - 1); 106 dev = iommu_pci_bus->pbdev[devfn]; 107 if (dev) { 108 return &dev->iommu_mr; 109 } 110 } 111 return NULL; 112 } 113 114 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 115 { 116 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; 117 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; 118 119 if (inta->high < intb->low) { 120 return -1; 121 } else if (intb->high < inta->low) { 122 return 1; 123 } else { 124 return 0; 125 } 126 } 127 128 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, 129 hwaddr virt_end, hwaddr paddr, 130 uint32_t flags) 131 { 132 IOMMUTLBEntry entry; 133 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, 134 flags & VIRTIO_IOMMU_MAP_F_WRITE); 135 136 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) || 137 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) { 138 return; 139 } 140 141 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, 142 paddr, perm); 143 144 entry.target_as = &address_space_memory; 145 entry.addr_mask = virt_end - virt_start; 146 entry.iova = virt_start; 147 entry.perm = perm; 148 entry.translated_addr = paddr; 149 150 memory_region_notify_iommu(mr, 0, entry); 151 } 152 153 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, 154 hwaddr virt_end) 155 { 156 IOMMUTLBEntry entry; 157 158 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { 159 return; 160 } 161 162 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); 163 164 entry.target_as = &address_space_memory; 165 entry.addr_mask = virt_end - virt_start; 166 entry.iova = virt_start; 167 entry.perm = IOMMU_NONE; 168 entry.translated_addr = 0; 169 170 memory_region_notify_iommu(mr, 0, entry); 171 } 172 173 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, 174 gpointer data) 175 { 176 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 177 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 178 179 virtio_iommu_notify_unmap(mr, interval->low, interval->high); 180 181 return false; 182 } 183 184 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, 185 gpointer data) 186 { 187 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 188 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 189 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 190 191 virtio_iommu_notify_map(mr, interval->low, interval->high, 192 mapping->phys_addr, mapping->flags); 193 194 return false; 195 } 196 197 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) 198 { 199 VirtIOIOMMUDomain *domain = ep->domain; 200 201 if (!ep->domain) { 202 return; 203 } 204 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, 205 ep->iommu_mr); 206 QLIST_REMOVE(ep, next); 207 ep->domain = NULL; 208 } 209 210 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, 211 uint32_t ep_id) 212 { 213 VirtIOIOMMUEndpoint *ep; 214 IOMMUMemoryRegion *mr; 215 216 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 217 if (ep) { 218 return ep; 219 } 220 mr = virtio_iommu_mr(s, ep_id); 221 if (!mr) { 222 return NULL; 223 } 224 ep = g_malloc0(sizeof(*ep)); 225 ep->id = ep_id; 226 ep->iommu_mr = mr; 227 trace_virtio_iommu_get_endpoint(ep_id); 228 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); 229 return ep; 230 } 231 232 static void virtio_iommu_put_endpoint(gpointer data) 233 { 234 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; 235 236 if (ep->domain) { 237 virtio_iommu_detach_endpoint_from_domain(ep); 238 } 239 240 trace_virtio_iommu_put_endpoint(ep->id); 241 g_free(ep); 242 } 243 244 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, 245 uint32_t domain_id) 246 { 247 VirtIOIOMMUDomain *domain; 248 249 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 250 if (domain) { 251 return domain; 252 } 253 domain = g_malloc0(sizeof(*domain)); 254 domain->id = domain_id; 255 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 256 NULL, (GDestroyNotify)g_free, 257 (GDestroyNotify)g_free); 258 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); 259 QLIST_INIT(&domain->endpoint_list); 260 trace_virtio_iommu_get_domain(domain_id); 261 return domain; 262 } 263 264 static void virtio_iommu_put_domain(gpointer data) 265 { 266 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; 267 VirtIOIOMMUEndpoint *iter, *tmp; 268 269 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { 270 virtio_iommu_detach_endpoint_from_domain(iter); 271 } 272 g_tree_destroy(domain->mappings); 273 trace_virtio_iommu_put_domain(domain->id); 274 g_free(domain); 275 } 276 277 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, 278 int devfn) 279 { 280 VirtIOIOMMU *s = opaque; 281 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); 282 static uint32_t mr_index; 283 IOMMUDevice *sdev; 284 285 if (!sbus) { 286 sbus = g_malloc0(sizeof(IOMMUPciBus) + 287 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); 288 sbus->bus = bus; 289 g_hash_table_insert(s->as_by_busptr, bus, sbus); 290 } 291 292 sdev = sbus->pbdev[devfn]; 293 if (!sdev) { 294 char *name = g_strdup_printf("%s-%d-%d", 295 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 296 mr_index++, devfn); 297 sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice)); 298 299 sdev->viommu = s; 300 sdev->bus = bus; 301 sdev->devfn = devfn; 302 303 trace_virtio_iommu_init_iommu_mr(name); 304 305 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), 306 TYPE_VIRTIO_IOMMU_MEMORY_REGION, 307 OBJECT(s), name, 308 UINT64_MAX); 309 address_space_init(&sdev->as, 310 MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); 311 g_free(name); 312 } 313 return &sdev->as; 314 } 315 316 static int virtio_iommu_attach(VirtIOIOMMU *s, 317 struct virtio_iommu_req_attach *req) 318 { 319 uint32_t domain_id = le32_to_cpu(req->domain); 320 uint32_t ep_id = le32_to_cpu(req->endpoint); 321 VirtIOIOMMUDomain *domain; 322 VirtIOIOMMUEndpoint *ep; 323 324 trace_virtio_iommu_attach(domain_id, ep_id); 325 326 ep = virtio_iommu_get_endpoint(s, ep_id); 327 if (!ep) { 328 return VIRTIO_IOMMU_S_NOENT; 329 } 330 331 if (ep->domain) { 332 VirtIOIOMMUDomain *previous_domain = ep->domain; 333 /* 334 * the device is already attached to a domain, 335 * detach it first 336 */ 337 virtio_iommu_detach_endpoint_from_domain(ep); 338 if (QLIST_EMPTY(&previous_domain->endpoint_list)) { 339 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); 340 } 341 } 342 343 domain = virtio_iommu_get_domain(s, domain_id); 344 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); 345 346 ep->domain = domain; 347 348 /* Replay domain mappings on the associated memory region */ 349 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, 350 ep->iommu_mr); 351 352 return VIRTIO_IOMMU_S_OK; 353 } 354 355 static int virtio_iommu_detach(VirtIOIOMMU *s, 356 struct virtio_iommu_req_detach *req) 357 { 358 uint32_t domain_id = le32_to_cpu(req->domain); 359 uint32_t ep_id = le32_to_cpu(req->endpoint); 360 VirtIOIOMMUDomain *domain; 361 VirtIOIOMMUEndpoint *ep; 362 363 trace_virtio_iommu_detach(domain_id, ep_id); 364 365 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); 366 if (!ep) { 367 return VIRTIO_IOMMU_S_NOENT; 368 } 369 370 domain = ep->domain; 371 372 if (!domain || domain->id != domain_id) { 373 return VIRTIO_IOMMU_S_INVAL; 374 } 375 376 virtio_iommu_detach_endpoint_from_domain(ep); 377 378 if (QLIST_EMPTY(&domain->endpoint_list)) { 379 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); 380 } 381 return VIRTIO_IOMMU_S_OK; 382 } 383 384 static int virtio_iommu_map(VirtIOIOMMU *s, 385 struct virtio_iommu_req_map *req) 386 { 387 uint32_t domain_id = le32_to_cpu(req->domain); 388 uint64_t phys_start = le64_to_cpu(req->phys_start); 389 uint64_t virt_start = le64_to_cpu(req->virt_start); 390 uint64_t virt_end = le64_to_cpu(req->virt_end); 391 uint32_t flags = le32_to_cpu(req->flags); 392 VirtIOIOMMUDomain *domain; 393 VirtIOIOMMUInterval *interval; 394 VirtIOIOMMUMapping *mapping; 395 VirtIOIOMMUEndpoint *ep; 396 397 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { 398 return VIRTIO_IOMMU_S_INVAL; 399 } 400 401 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 402 if (!domain) { 403 return VIRTIO_IOMMU_S_NOENT; 404 } 405 406 interval = g_malloc0(sizeof(*interval)); 407 408 interval->low = virt_start; 409 interval->high = virt_end; 410 411 mapping = g_tree_lookup(domain->mappings, (gpointer)interval); 412 if (mapping) { 413 g_free(interval); 414 return VIRTIO_IOMMU_S_INVAL; 415 } 416 417 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); 418 419 mapping = g_malloc0(sizeof(*mapping)); 420 mapping->phys_addr = phys_start; 421 mapping->flags = flags; 422 423 g_tree_insert(domain->mappings, interval, mapping); 424 425 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 426 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start, 427 flags); 428 } 429 430 return VIRTIO_IOMMU_S_OK; 431 } 432 433 static int virtio_iommu_unmap(VirtIOIOMMU *s, 434 struct virtio_iommu_req_unmap *req) 435 { 436 uint32_t domain_id = le32_to_cpu(req->domain); 437 uint64_t virt_start = le64_to_cpu(req->virt_start); 438 uint64_t virt_end = le64_to_cpu(req->virt_end); 439 VirtIOIOMMUMapping *iter_val; 440 VirtIOIOMMUInterval interval, *iter_key; 441 VirtIOIOMMUDomain *domain; 442 VirtIOIOMMUEndpoint *ep; 443 int ret = VIRTIO_IOMMU_S_OK; 444 445 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); 446 447 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); 448 if (!domain) { 449 return VIRTIO_IOMMU_S_NOENT; 450 } 451 interval.low = virt_start; 452 interval.high = virt_end; 453 454 while (g_tree_lookup_extended(domain->mappings, &interval, 455 (void **)&iter_key, (void**)&iter_val)) { 456 uint64_t current_low = iter_key->low; 457 uint64_t current_high = iter_key->high; 458 459 if (interval.low <= current_low && interval.high >= current_high) { 460 QLIST_FOREACH(ep, &domain->endpoint_list, next) { 461 virtio_iommu_notify_unmap(ep->iommu_mr, current_low, 462 current_high); 463 } 464 g_tree_remove(domain->mappings, iter_key); 465 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); 466 } else { 467 ret = VIRTIO_IOMMU_S_RANGE; 468 break; 469 } 470 } 471 return ret; 472 } 473 474 static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep, 475 uint8_t *buf, size_t free) 476 { 477 struct virtio_iommu_probe_resv_mem prop = {}; 478 size_t size = sizeof(prop), length = size - sizeof(prop.head), total; 479 int i; 480 481 total = size * s->nb_reserved_regions; 482 483 if (total > free) { 484 return -ENOSPC; 485 } 486 487 for (i = 0; i < s->nb_reserved_regions; i++) { 488 unsigned subtype = s->reserved_regions[i].type; 489 490 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || 491 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); 492 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); 493 prop.head.length = cpu_to_le16(length); 494 prop.subtype = subtype; 495 prop.start = cpu_to_le64(s->reserved_regions[i].low); 496 prop.end = cpu_to_le64(s->reserved_regions[i].high); 497 498 memcpy(buf, &prop, size); 499 500 trace_virtio_iommu_fill_resv_property(ep, prop.subtype, 501 prop.start, prop.end); 502 buf += size; 503 } 504 return total; 505 } 506 507 /** 508 * virtio_iommu_probe - Fill the probe request buffer with 509 * the properties the device is able to return 510 */ 511 static int virtio_iommu_probe(VirtIOIOMMU *s, 512 struct virtio_iommu_req_probe *req, 513 uint8_t *buf) 514 { 515 uint32_t ep_id = le32_to_cpu(req->endpoint); 516 size_t free = VIOMMU_PROBE_SIZE; 517 ssize_t count; 518 519 if (!virtio_iommu_mr(s, ep_id)) { 520 return VIRTIO_IOMMU_S_NOENT; 521 } 522 523 count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free); 524 if (count < 0) { 525 return VIRTIO_IOMMU_S_INVAL; 526 } 527 buf += count; 528 free -= count; 529 530 return VIRTIO_IOMMU_S_OK; 531 } 532 533 static int virtio_iommu_iov_to_req(struct iovec *iov, 534 unsigned int iov_cnt, 535 void *req, size_t req_sz) 536 { 537 size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); 538 539 sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); 540 if (unlikely(sz != payload_sz)) { 541 return VIRTIO_IOMMU_S_INVAL; 542 } 543 return 0; 544 } 545 546 #define virtio_iommu_handle_req(__req) \ 547 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ 548 struct iovec *iov, \ 549 unsigned int iov_cnt) \ 550 { \ 551 struct virtio_iommu_req_ ## __req req; \ 552 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ 553 \ 554 return ret ? ret : virtio_iommu_ ## __req(s, &req); \ 555 } 556 557 virtio_iommu_handle_req(attach) 558 virtio_iommu_handle_req(detach) 559 virtio_iommu_handle_req(map) 560 virtio_iommu_handle_req(unmap) 561 562 static int virtio_iommu_handle_probe(VirtIOIOMMU *s, 563 struct iovec *iov, 564 unsigned int iov_cnt, 565 uint8_t *buf) 566 { 567 struct virtio_iommu_req_probe req; 568 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); 569 570 return ret ? ret : virtio_iommu_probe(s, &req, buf); 571 } 572 573 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) 574 { 575 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 576 struct virtio_iommu_req_head head; 577 struct virtio_iommu_req_tail tail = {}; 578 size_t output_size = sizeof(tail), sz; 579 VirtQueueElement *elem; 580 unsigned int iov_cnt; 581 struct iovec *iov; 582 void *buf = NULL; 583 584 for (;;) { 585 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 586 if (!elem) { 587 return; 588 } 589 590 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || 591 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { 592 virtio_error(vdev, "virtio-iommu bad head/tail size"); 593 virtqueue_detach_element(vq, elem, 0); 594 g_free(elem); 595 break; 596 } 597 598 iov_cnt = elem->out_num; 599 iov = elem->out_sg; 600 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); 601 if (unlikely(sz != sizeof(head))) { 602 tail.status = VIRTIO_IOMMU_S_DEVERR; 603 goto out; 604 } 605 qemu_mutex_lock(&s->mutex); 606 switch (head.type) { 607 case VIRTIO_IOMMU_T_ATTACH: 608 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); 609 break; 610 case VIRTIO_IOMMU_T_DETACH: 611 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); 612 break; 613 case VIRTIO_IOMMU_T_MAP: 614 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); 615 break; 616 case VIRTIO_IOMMU_T_UNMAP: 617 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); 618 break; 619 case VIRTIO_IOMMU_T_PROBE: 620 { 621 struct virtio_iommu_req_tail *ptail; 622 623 output_size = s->config.probe_size + sizeof(tail); 624 buf = g_malloc0(output_size); 625 626 ptail = (struct virtio_iommu_req_tail *) 627 (buf + s->config.probe_size); 628 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf); 629 break; 630 } 631 default: 632 tail.status = VIRTIO_IOMMU_S_UNSUPP; 633 } 634 qemu_mutex_unlock(&s->mutex); 635 636 out: 637 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 638 buf ? buf : &tail, output_size); 639 assert(sz == output_size); 640 641 virtqueue_push(vq, elem, sz); 642 virtio_notify(vdev, vq); 643 g_free(elem); 644 g_free(buf); 645 } 646 } 647 648 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, 649 int flags, uint32_t endpoint, 650 uint64_t address) 651 { 652 VirtIODevice *vdev = &viommu->parent_obj; 653 VirtQueue *vq = viommu->event_vq; 654 struct virtio_iommu_fault fault; 655 VirtQueueElement *elem; 656 size_t sz; 657 658 memset(&fault, 0, sizeof(fault)); 659 fault.reason = reason; 660 fault.flags = cpu_to_le32(flags); 661 fault.endpoint = cpu_to_le32(endpoint); 662 fault.address = cpu_to_le64(address); 663 664 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 665 666 if (!elem) { 667 error_report_once( 668 "no buffer available in event queue to report event"); 669 return; 670 } 671 672 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { 673 virtio_error(vdev, "error buffer of wrong size"); 674 virtqueue_detach_element(vq, elem, 0); 675 g_free(elem); 676 return; 677 } 678 679 sz = iov_from_buf(elem->in_sg, elem->in_num, 0, 680 &fault, sizeof(fault)); 681 assert(sz == sizeof(fault)); 682 683 trace_virtio_iommu_report_fault(reason, flags, endpoint, address); 684 virtqueue_push(vq, elem, sz); 685 virtio_notify(vdev, vq); 686 g_free(elem); 687 688 } 689 690 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, 691 IOMMUAccessFlags flag, 692 int iommu_idx) 693 { 694 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 695 VirtIOIOMMUInterval interval, *mapping_key; 696 VirtIOIOMMUMapping *mapping_value; 697 VirtIOIOMMU *s = sdev->viommu; 698 bool read_fault, write_fault; 699 VirtIOIOMMUEndpoint *ep; 700 uint32_t sid, flags; 701 bool bypass_allowed; 702 bool found; 703 int i; 704 705 interval.low = addr; 706 interval.high = addr + 1; 707 708 IOMMUTLBEntry entry = { 709 .target_as = &address_space_memory, 710 .iova = addr, 711 .translated_addr = addr, 712 .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, 713 .perm = IOMMU_NONE, 714 }; 715 716 bypass_allowed = virtio_vdev_has_feature(&s->parent_obj, 717 VIRTIO_IOMMU_F_BYPASS); 718 719 sid = virtio_iommu_get_bdf(sdev); 720 721 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); 722 qemu_mutex_lock(&s->mutex); 723 724 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 725 if (!ep) { 726 if (!bypass_allowed) { 727 error_report_once("%s sid=%d is not known!!", __func__, sid); 728 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, 729 VIRTIO_IOMMU_FAULT_F_ADDRESS, 730 sid, addr); 731 } else { 732 entry.perm = flag; 733 } 734 goto unlock; 735 } 736 737 for (i = 0; i < s->nb_reserved_regions; i++) { 738 ReservedRegion *reg = &s->reserved_regions[i]; 739 740 if (addr >= reg->low && addr <= reg->high) { 741 switch (reg->type) { 742 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 743 entry.perm = flag; 744 break; 745 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 746 default: 747 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 748 VIRTIO_IOMMU_FAULT_F_ADDRESS, 749 sid, addr); 750 break; 751 } 752 goto unlock; 753 } 754 } 755 756 if (!ep->domain) { 757 if (!bypass_allowed) { 758 error_report_once("%s %02x:%02x.%01x not attached to any domain", 759 __func__, PCI_BUS_NUM(sid), 760 PCI_SLOT(sid), PCI_FUNC(sid)); 761 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, 762 VIRTIO_IOMMU_FAULT_F_ADDRESS, 763 sid, addr); 764 } else { 765 entry.perm = flag; 766 } 767 goto unlock; 768 } 769 770 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), 771 (void **)&mapping_key, 772 (void **)&mapping_value); 773 if (!found) { 774 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", 775 __func__, addr, sid); 776 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 777 VIRTIO_IOMMU_FAULT_F_ADDRESS, 778 sid, addr); 779 goto unlock; 780 } 781 782 read_fault = (flag & IOMMU_RO) && 783 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); 784 write_fault = (flag & IOMMU_WO) && 785 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); 786 787 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; 788 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; 789 if (flags) { 790 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", 791 __func__, addr, flag, mapping_value->flags); 792 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; 793 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, 794 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, 795 sid, addr); 796 goto unlock; 797 } 798 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; 799 entry.perm = flag; 800 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); 801 802 unlock: 803 qemu_mutex_unlock(&s->mutex); 804 return entry; 805 } 806 807 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) 808 { 809 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 810 struct virtio_iommu_config *config = &dev->config; 811 812 trace_virtio_iommu_get_config(config->page_size_mask, 813 config->input_range.start, 814 config->input_range.end, 815 config->domain_range.end, 816 config->probe_size); 817 memcpy(config_data, &dev->config, sizeof(struct virtio_iommu_config)); 818 } 819 820 static void virtio_iommu_set_config(VirtIODevice *vdev, 821 const uint8_t *config_data) 822 { 823 struct virtio_iommu_config config; 824 825 memcpy(&config, config_data, sizeof(struct virtio_iommu_config)); 826 trace_virtio_iommu_set_config(config.page_size_mask, 827 config.input_range.start, 828 config.input_range.end, 829 config.domain_range.end, 830 config.probe_size); 831 } 832 833 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, 834 Error **errp) 835 { 836 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); 837 838 f |= dev->features; 839 trace_virtio_iommu_get_features(f); 840 return f; 841 } 842 843 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) 844 { 845 guint ua = GPOINTER_TO_UINT(a); 846 guint ub = GPOINTER_TO_UINT(b); 847 return (ua > ub) - (ua < ub); 848 } 849 850 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data) 851 { 852 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value; 853 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key; 854 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data; 855 856 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high, 857 mapping->phys_addr); 858 virtio_iommu_notify_map(mr, interval->low, interval->high, 859 mapping->phys_addr, mapping->flags); 860 return false; 861 } 862 863 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) 864 { 865 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 866 VirtIOIOMMU *s = sdev->viommu; 867 uint32_t sid; 868 VirtIOIOMMUEndpoint *ep; 869 870 sid = virtio_iommu_get_bdf(sdev); 871 872 qemu_mutex_lock(&s->mutex); 873 874 if (!s->endpoints) { 875 goto unlock; 876 } 877 878 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); 879 if (!ep || !ep->domain) { 880 goto unlock; 881 } 882 883 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); 884 885 unlock: 886 qemu_mutex_unlock(&s->mutex); 887 } 888 889 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, 890 IOMMUNotifierFlag old, 891 IOMMUNotifierFlag new, 892 Error **errp) 893 { 894 if (old == IOMMU_NOTIFIER_NONE) { 895 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); 896 } else if (new == IOMMU_NOTIFIER_NONE) { 897 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name); 898 } 899 return 0; 900 } 901 902 /* 903 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule, 904 * for example 0xfffffffffffff000. When an assigned device has page size 905 * restrictions due to the hardware IOMMU configuration, apply this restriction 906 * to the mask. 907 */ 908 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, 909 uint64_t new_mask, 910 Error **errp) 911 { 912 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); 913 VirtIOIOMMU *s = sdev->viommu; 914 uint64_t cur_mask = s->config.page_size_mask; 915 916 trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, 917 new_mask); 918 919 if ((cur_mask & new_mask) == 0) { 920 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 921 " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); 922 return -1; 923 } 924 925 /* 926 * After the machine is finalized, we can't change the mask anymore. If by 927 * chance the hotplugged device supports the same granule, we can still 928 * accept it. Having a different masks is possible but the guest will use 929 * sub-optimal block sizes, so warn about it. 930 */ 931 if (qdev_hotplug) { 932 int new_granule = ctz64(new_mask); 933 int cur_granule = ctz64(cur_mask); 934 935 if (new_granule != cur_granule) { 936 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 937 " is incompatible with mask 0x%"PRIx64, cur_mask, 938 new_mask); 939 return -1; 940 } else if (new_mask != cur_mask) { 941 warn_report("virtio-iommu page mask 0x%"PRIx64 942 " does not match 0x%"PRIx64, cur_mask, new_mask); 943 } 944 return 0; 945 } 946 947 s->config.page_size_mask &= new_mask; 948 return 0; 949 } 950 951 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) 952 { 953 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 954 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 955 956 virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU, 957 sizeof(struct virtio_iommu_config)); 958 959 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); 960 961 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, 962 virtio_iommu_handle_command); 963 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); 964 965 s->config.page_size_mask = TARGET_PAGE_MASK; 966 s->config.input_range.end = -1UL; 967 s->config.domain_range.end = 32; 968 s->config.probe_size = VIOMMU_PROBE_SIZE; 969 970 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); 971 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); 972 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); 973 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); 974 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); 975 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); 976 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS); 977 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); 978 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); 979 980 qemu_mutex_init(&s->mutex); 981 982 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); 983 984 if (s->primary_bus) { 985 pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); 986 } else { 987 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); 988 } 989 } 990 991 static void virtio_iommu_device_unrealize(DeviceState *dev) 992 { 993 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 994 VirtIOIOMMU *s = VIRTIO_IOMMU(dev); 995 996 g_hash_table_destroy(s->as_by_busptr); 997 if (s->domains) { 998 g_tree_destroy(s->domains); 999 } 1000 if (s->endpoints) { 1001 g_tree_destroy(s->endpoints); 1002 } 1003 1004 virtio_delete_queue(s->req_vq); 1005 virtio_delete_queue(s->event_vq); 1006 virtio_cleanup(vdev); 1007 } 1008 1009 static void virtio_iommu_device_reset(VirtIODevice *vdev) 1010 { 1011 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); 1012 1013 trace_virtio_iommu_device_reset(); 1014 1015 if (s->domains) { 1016 g_tree_destroy(s->domains); 1017 } 1018 if (s->endpoints) { 1019 g_tree_destroy(s->endpoints); 1020 } 1021 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, 1022 NULL, NULL, virtio_iommu_put_domain); 1023 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, 1024 NULL, NULL, virtio_iommu_put_endpoint); 1025 } 1026 1027 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) 1028 { 1029 trace_virtio_iommu_device_status(status); 1030 } 1031 1032 static void virtio_iommu_instance_init(Object *obj) 1033 { 1034 } 1035 1036 #define VMSTATE_INTERVAL \ 1037 { \ 1038 .name = "interval", \ 1039 .version_id = 1, \ 1040 .minimum_version_id = 1, \ 1041 .fields = (VMStateField[]) { \ 1042 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ 1043 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ 1044 VMSTATE_END_OF_LIST() \ 1045 } \ 1046 } 1047 1048 #define VMSTATE_MAPPING \ 1049 { \ 1050 .name = "mapping", \ 1051 .version_id = 1, \ 1052 .minimum_version_id = 1, \ 1053 .fields = (VMStateField[]) { \ 1054 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ 1055 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ 1056 VMSTATE_END_OF_LIST() \ 1057 }, \ 1058 } 1059 1060 static const VMStateDescription vmstate_interval_mapping[2] = { 1061 VMSTATE_MAPPING, /* value */ 1062 VMSTATE_INTERVAL /* key */ 1063 }; 1064 1065 static int domain_preload(void *opaque) 1066 { 1067 VirtIOIOMMUDomain *domain = opaque; 1068 1069 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, 1070 NULL, g_free, g_free); 1071 return 0; 1072 } 1073 1074 static const VMStateDescription vmstate_endpoint = { 1075 .name = "endpoint", 1076 .version_id = 1, 1077 .minimum_version_id = 1, 1078 .fields = (VMStateField[]) { 1079 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), 1080 VMSTATE_END_OF_LIST() 1081 } 1082 }; 1083 1084 static const VMStateDescription vmstate_domain = { 1085 .name = "domain", 1086 .version_id = 1, 1087 .minimum_version_id = 1, 1088 .pre_load = domain_preload, 1089 .fields = (VMStateField[]) { 1090 VMSTATE_UINT32(id, VirtIOIOMMUDomain), 1091 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, 1092 vmstate_interval_mapping, 1093 VirtIOIOMMUInterval, VirtIOIOMMUMapping), 1094 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, 1095 vmstate_endpoint, VirtIOIOMMUEndpoint, next), 1096 VMSTATE_END_OF_LIST() 1097 } 1098 }; 1099 1100 static gboolean reconstruct_endpoints(gpointer key, gpointer value, 1101 gpointer data) 1102 { 1103 VirtIOIOMMU *s = (VirtIOIOMMU *)data; 1104 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; 1105 VirtIOIOMMUEndpoint *iter; 1106 IOMMUMemoryRegion *mr; 1107 1108 QLIST_FOREACH(iter, &d->endpoint_list, next) { 1109 mr = virtio_iommu_mr(s, iter->id); 1110 assert(mr); 1111 1112 iter->domain = d; 1113 iter->iommu_mr = mr; 1114 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); 1115 } 1116 return false; /* continue the domain traversal */ 1117 } 1118 1119 static int iommu_post_load(void *opaque, int version_id) 1120 { 1121 VirtIOIOMMU *s = opaque; 1122 1123 g_tree_foreach(s->domains, reconstruct_endpoints, s); 1124 return 0; 1125 } 1126 1127 static const VMStateDescription vmstate_virtio_iommu_device = { 1128 .name = "virtio-iommu-device", 1129 .minimum_version_id = 1, 1130 .version_id = 1, 1131 .post_load = iommu_post_load, 1132 .fields = (VMStateField[]) { 1133 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 1, 1134 &vmstate_domain, VirtIOIOMMUDomain), 1135 VMSTATE_END_OF_LIST() 1136 }, 1137 }; 1138 1139 static const VMStateDescription vmstate_virtio_iommu = { 1140 .name = "virtio-iommu", 1141 .minimum_version_id = 1, 1142 .priority = MIG_PRI_IOMMU, 1143 .version_id = 1, 1144 .fields = (VMStateField[]) { 1145 VMSTATE_VIRTIO_DEVICE, 1146 VMSTATE_END_OF_LIST() 1147 }, 1148 }; 1149 1150 static Property virtio_iommu_properties[] = { 1151 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *), 1152 DEFINE_PROP_END_OF_LIST(), 1153 }; 1154 1155 static void virtio_iommu_class_init(ObjectClass *klass, void *data) 1156 { 1157 DeviceClass *dc = DEVICE_CLASS(klass); 1158 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1159 1160 device_class_set_props(dc, virtio_iommu_properties); 1161 dc->vmsd = &vmstate_virtio_iommu; 1162 1163 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1164 vdc->realize = virtio_iommu_device_realize; 1165 vdc->unrealize = virtio_iommu_device_unrealize; 1166 vdc->reset = virtio_iommu_device_reset; 1167 vdc->get_config = virtio_iommu_get_config; 1168 vdc->set_config = virtio_iommu_set_config; 1169 vdc->get_features = virtio_iommu_get_features; 1170 vdc->set_status = virtio_iommu_set_status; 1171 vdc->vmsd = &vmstate_virtio_iommu_device; 1172 } 1173 1174 static void virtio_iommu_memory_region_class_init(ObjectClass *klass, 1175 void *data) 1176 { 1177 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1178 1179 imrc->translate = virtio_iommu_translate; 1180 imrc->replay = virtio_iommu_replay; 1181 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; 1182 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; 1183 } 1184 1185 static const TypeInfo virtio_iommu_info = { 1186 .name = TYPE_VIRTIO_IOMMU, 1187 .parent = TYPE_VIRTIO_DEVICE, 1188 .instance_size = sizeof(VirtIOIOMMU), 1189 .instance_init = virtio_iommu_instance_init, 1190 .class_init = virtio_iommu_class_init, 1191 }; 1192 1193 static const TypeInfo virtio_iommu_memory_region_info = { 1194 .parent = TYPE_IOMMU_MEMORY_REGION, 1195 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, 1196 .class_init = virtio_iommu_memory_region_class_init, 1197 }; 1198 1199 static void virtio_register_types(void) 1200 { 1201 type_register_static(&virtio_iommu_info); 1202 type_register_static(&virtio_iommu_memory_region_info); 1203 } 1204 1205 type_init(virtio_register_types) 1206