1 /* 2 * VFIO: IOMMU DMA mapping support for Type1 IOMMU 3 * 4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 5 * Author: Alex Williamson <alex.williamson@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio: 12 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 13 * Author: Tom Lyon, pugs@cisco.com 14 * 15 * We arbitrarily define a Type1 IOMMU as one matching the below code. 16 * It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel 17 * VT-d, but that makes it harder to re-use as theoretically anyone 18 * implementing a similar IOMMU could make use of this. We expect the 19 * IOMMU to support the IOMMU API and have few to no restrictions around 20 * the IOVA range that can be mapped. The Type1 IOMMU is currently 21 * optimized for relatively static mappings of a userspace process with 22 * userpsace pages pinned into memory. We also assume devices and IOMMU 23 * domains are PCI based as the IOMMU API is still centered around a 24 * device/bus interface rather than a group interface. 25 */ 26 27 #include <linux/compat.h> 28 #include <linux/device.h> 29 #include <linux/fs.h> 30 #include <linux/iommu.h> 31 #include <linux/module.h> 32 #include <linux/mm.h> 33 #include <linux/rbtree.h> 34 #include <linux/sched.h> 35 #include <linux/slab.h> 36 #include <linux/uaccess.h> 37 #include <linux/vfio.h> 38 #include <linux/workqueue.h> 39 40 #define DRIVER_VERSION "0.2" 41 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 42 #define DRIVER_DESC "Type1 IOMMU driver for VFIO" 43 44 static bool allow_unsafe_interrupts; 45 module_param_named(allow_unsafe_interrupts, 46 allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); 47 MODULE_PARM_DESC(allow_unsafe_interrupts, 48 "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); 49 50 static bool disable_hugepages; 51 module_param_named(disable_hugepages, 52 disable_hugepages, bool, S_IRUGO | S_IWUSR); 53 MODULE_PARM_DESC(disable_hugepages, 54 "Disable VFIO IOMMU support for IOMMU hugepages."); 55 56 struct vfio_iommu { 57 struct list_head domain_list; 58 struct mutex lock; 59 struct rb_root dma_list; 60 bool v2; 61 }; 62 63 struct vfio_domain { 64 struct iommu_domain *domain; 65 struct list_head next; 66 struct list_head group_list; 67 int prot; /* IOMMU_CACHE */ 68 }; 69 70 struct vfio_dma { 71 struct rb_node node; 72 dma_addr_t iova; /* Device address */ 73 unsigned long vaddr; /* Process virtual addr */ 74 size_t size; /* Map size (bytes) */ 75 int prot; /* IOMMU_READ/WRITE */ 76 }; 77 78 struct vfio_group { 79 struct iommu_group *iommu_group; 80 struct list_head next; 81 }; 82 83 /* 84 * This code handles mapping and unmapping of user data buffers 85 * into DMA'ble space using the IOMMU 86 */ 87 88 static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, 89 dma_addr_t start, size_t size) 90 { 91 struct rb_node *node = iommu->dma_list.rb_node; 92 93 while (node) { 94 struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); 95 96 if (start + size <= dma->iova) 97 node = node->rb_left; 98 else if (start >= dma->iova + dma->size) 99 node = node->rb_right; 100 else 101 return dma; 102 } 103 104 return NULL; 105 } 106 107 static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) 108 { 109 struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; 110 struct vfio_dma *dma; 111 112 while (*link) { 113 parent = *link; 114 dma = rb_entry(parent, struct vfio_dma, node); 115 116 if (new->iova + new->size <= dma->iova) 117 link = &(*link)->rb_left; 118 else 119 link = &(*link)->rb_right; 120 } 121 122 rb_link_node(&new->node, parent, link); 123 rb_insert_color(&new->node, &iommu->dma_list); 124 } 125 126 static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old) 127 { 128 rb_erase(&old->node, &iommu->dma_list); 129 } 130 131 struct vwork { 132 struct mm_struct *mm; 133 long npage; 134 struct work_struct work; 135 }; 136 137 /* delayed decrement/increment for locked_vm */ 138 static void vfio_lock_acct_bg(struct work_struct *work) 139 { 140 struct vwork *vwork = container_of(work, struct vwork, work); 141 struct mm_struct *mm; 142 143 mm = vwork->mm; 144 down_write(&mm->mmap_sem); 145 mm->locked_vm += vwork->npage; 146 up_write(&mm->mmap_sem); 147 mmput(mm); 148 kfree(vwork); 149 } 150 151 static void vfio_lock_acct(long npage) 152 { 153 struct vwork *vwork; 154 struct mm_struct *mm; 155 156 if (!current->mm || !npage) 157 return; /* process exited or nothing to do */ 158 159 if (down_write_trylock(¤t->mm->mmap_sem)) { 160 current->mm->locked_vm += npage; 161 up_write(¤t->mm->mmap_sem); 162 return; 163 } 164 165 /* 166 * Couldn't get mmap_sem lock, so must setup to update 167 * mm->locked_vm later. If locked_vm were atomic, we 168 * wouldn't need this silliness 169 */ 170 vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); 171 if (!vwork) 172 return; 173 mm = get_task_mm(current); 174 if (!mm) { 175 kfree(vwork); 176 return; 177 } 178 INIT_WORK(&vwork->work, vfio_lock_acct_bg); 179 vwork->mm = mm; 180 vwork->npage = npage; 181 schedule_work(&vwork->work); 182 } 183 184 /* 185 * Some mappings aren't backed by a struct page, for example an mmap'd 186 * MMIO range for our own or another device. These use a different 187 * pfn conversion and shouldn't be tracked as locked pages. 188 */ 189 static bool is_invalid_reserved_pfn(unsigned long pfn) 190 { 191 if (pfn_valid(pfn)) { 192 bool reserved; 193 struct page *tail = pfn_to_page(pfn); 194 struct page *head = compound_head(tail); 195 reserved = !!(PageReserved(head)); 196 if (head != tail) { 197 /* 198 * "head" is not a dangling pointer 199 * (compound_head takes care of that) 200 * but the hugepage may have been split 201 * from under us (and we may not hold a 202 * reference count on the head page so it can 203 * be reused before we run PageReferenced), so 204 * we've to check PageTail before returning 205 * what we just read. 206 */ 207 smp_rmb(); 208 if (PageTail(tail)) 209 return reserved; 210 } 211 return PageReserved(tail); 212 } 213 214 return true; 215 } 216 217 static int put_pfn(unsigned long pfn, int prot) 218 { 219 if (!is_invalid_reserved_pfn(pfn)) { 220 struct page *page = pfn_to_page(pfn); 221 if (prot & IOMMU_WRITE) 222 SetPageDirty(page); 223 put_page(page); 224 return 1; 225 } 226 return 0; 227 } 228 229 static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) 230 { 231 struct page *page[1]; 232 struct vm_area_struct *vma; 233 int ret = -EFAULT; 234 235 if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { 236 *pfn = page_to_pfn(page[0]); 237 return 0; 238 } 239 240 down_read(¤t->mm->mmap_sem); 241 242 vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); 243 244 if (vma && vma->vm_flags & VM_PFNMAP) { 245 *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 246 if (is_invalid_reserved_pfn(*pfn)) 247 ret = 0; 248 } 249 250 up_read(¤t->mm->mmap_sem); 251 252 return ret; 253 } 254 255 /* 256 * Attempt to pin pages. We really don't want to track all the pfns and 257 * the iommu can only map chunks of consecutive pfns anyway, so get the 258 * first page and all consecutive pages with the same locking. 259 */ 260 static long vfio_pin_pages(unsigned long vaddr, long npage, 261 int prot, unsigned long *pfn_base) 262 { 263 unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 264 bool lock_cap = capable(CAP_IPC_LOCK); 265 long ret, i; 266 267 if (!current->mm) 268 return -ENODEV; 269 270 ret = vaddr_get_pfn(vaddr, prot, pfn_base); 271 if (ret) 272 return ret; 273 274 if (is_invalid_reserved_pfn(*pfn_base)) 275 return 1; 276 277 if (!lock_cap && current->mm->locked_vm + 1 > limit) { 278 put_pfn(*pfn_base, prot); 279 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, 280 limit << PAGE_SHIFT); 281 return -ENOMEM; 282 } 283 284 if (unlikely(disable_hugepages)) { 285 vfio_lock_acct(1); 286 return 1; 287 } 288 289 /* Lock all the consecutive pages from pfn_base */ 290 for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { 291 unsigned long pfn = 0; 292 293 ret = vaddr_get_pfn(vaddr, prot, &pfn); 294 if (ret) 295 break; 296 297 if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { 298 put_pfn(pfn, prot); 299 break; 300 } 301 302 if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { 303 put_pfn(pfn, prot); 304 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", 305 __func__, limit << PAGE_SHIFT); 306 break; 307 } 308 } 309 310 vfio_lock_acct(i); 311 312 return i; 313 } 314 315 static long vfio_unpin_pages(unsigned long pfn, long npage, 316 int prot, bool do_accounting) 317 { 318 unsigned long unlocked = 0; 319 long i; 320 321 for (i = 0; i < npage; i++) 322 unlocked += put_pfn(pfn++, prot); 323 324 if (do_accounting) 325 vfio_lock_acct(-unlocked); 326 327 return unlocked; 328 } 329 330 static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) 331 { 332 dma_addr_t iova = dma->iova, end = dma->iova + dma->size; 333 struct vfio_domain *domain, *d; 334 long unlocked = 0; 335 336 if (!dma->size) 337 return; 338 /* 339 * We use the IOMMU to track the physical addresses, otherwise we'd 340 * need a much more complicated tracking system. Unfortunately that 341 * means we need to use one of the iommu domains to figure out the 342 * pfns to unpin. The rest need to be unmapped in advance so we have 343 * no iommu translations remaining when the pages are unpinned. 344 */ 345 domain = d = list_first_entry(&iommu->domain_list, 346 struct vfio_domain, next); 347 348 list_for_each_entry_continue(d, &iommu->domain_list, next) 349 iommu_unmap(d->domain, dma->iova, dma->size); 350 351 while (iova < end) { 352 size_t unmapped; 353 phys_addr_t phys; 354 355 phys = iommu_iova_to_phys(domain->domain, iova); 356 if (WARN_ON(!phys)) { 357 iova += PAGE_SIZE; 358 continue; 359 } 360 361 unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE); 362 if (WARN_ON(!unmapped)) 363 break; 364 365 unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT, 366 unmapped >> PAGE_SHIFT, 367 dma->prot, false); 368 iova += unmapped; 369 } 370 371 vfio_lock_acct(-unlocked); 372 } 373 374 static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) 375 { 376 vfio_unmap_unpin(iommu, dma); 377 vfio_unlink_dma(iommu, dma); 378 kfree(dma); 379 } 380 381 static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) 382 { 383 struct vfio_domain *domain; 384 unsigned long bitmap = PAGE_MASK; 385 386 mutex_lock(&iommu->lock); 387 list_for_each_entry(domain, &iommu->domain_list, next) 388 bitmap &= domain->domain->ops->pgsize_bitmap; 389 mutex_unlock(&iommu->lock); 390 391 return bitmap; 392 } 393 394 static int vfio_dma_do_unmap(struct vfio_iommu *iommu, 395 struct vfio_iommu_type1_dma_unmap *unmap) 396 { 397 uint64_t mask; 398 struct vfio_dma *dma; 399 size_t unmapped = 0; 400 int ret = 0; 401 402 mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; 403 404 if (unmap->iova & mask) 405 return -EINVAL; 406 if (!unmap->size || unmap->size & mask) 407 return -EINVAL; 408 409 WARN_ON(mask & PAGE_MASK); 410 411 mutex_lock(&iommu->lock); 412 413 /* 414 * vfio-iommu-type1 (v1) - User mappings were coalesced together to 415 * avoid tracking individual mappings. This means that the granularity 416 * of the original mapping was lost and the user was allowed to attempt 417 * to unmap any range. Depending on the contiguousness of physical 418 * memory and page sizes supported by the IOMMU, arbitrary unmaps may 419 * or may not have worked. We only guaranteed unmap granularity 420 * matching the original mapping; even though it was untracked here, 421 * the original mappings are reflected in IOMMU mappings. This 422 * resulted in a couple unusual behaviors. First, if a range is not 423 * able to be unmapped, ex. a set of 4k pages that was mapped as a 424 * 2M hugepage into the IOMMU, the unmap ioctl returns success but with 425 * a zero sized unmap. Also, if an unmap request overlaps the first 426 * address of a hugepage, the IOMMU will unmap the entire hugepage. 427 * This also returns success and the returned unmap size reflects the 428 * actual size unmapped. 429 * 430 * We attempt to maintain compatibility with this "v1" interface, but 431 * we take control out of the hands of the IOMMU. Therefore, an unmap 432 * request offset from the beginning of the original mapping will 433 * return success with zero sized unmap. And an unmap request covering 434 * the first iova of mapping will unmap the entire range. 435 * 436 * The v2 version of this interface intends to be more deterministic. 437 * Unmap requests must fully cover previous mappings. Multiple 438 * mappings may still be unmaped by specifying large ranges, but there 439 * must not be any previous mappings bisected by the range. An error 440 * will be returned if these conditions are not met. The v2 interface 441 * will only return success and a size of zero if there were no 442 * mappings within the range. 443 */ 444 if (iommu->v2) { 445 dma = vfio_find_dma(iommu, unmap->iova, 0); 446 if (dma && dma->iova != unmap->iova) { 447 ret = -EINVAL; 448 goto unlock; 449 } 450 dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0); 451 if (dma && dma->iova + dma->size != unmap->iova + unmap->size) { 452 ret = -EINVAL; 453 goto unlock; 454 } 455 } 456 457 while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { 458 if (!iommu->v2 && unmap->iova > dma->iova) 459 break; 460 unmapped += dma->size; 461 vfio_remove_dma(iommu, dma); 462 } 463 464 unlock: 465 mutex_unlock(&iommu->lock); 466 467 /* Report how much was unmapped */ 468 unmap->size = unmapped; 469 470 return ret; 471 } 472 473 /* 474 * Turns out AMD IOMMU has a page table bug where it won't map large pages 475 * to a region that previously mapped smaller pages. This should be fixed 476 * soon, so this is just a temporary workaround to break mappings down into 477 * PAGE_SIZE. Better to map smaller pages than nothing. 478 */ 479 static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova, 480 unsigned long pfn, long npage, int prot) 481 { 482 long i; 483 int ret; 484 485 for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) { 486 ret = iommu_map(domain->domain, iova, 487 (phys_addr_t)pfn << PAGE_SHIFT, 488 PAGE_SIZE, prot | domain->prot); 489 if (ret) 490 break; 491 } 492 493 for (; i < npage && i > 0; i--, iova -= PAGE_SIZE) 494 iommu_unmap(domain->domain, iova, PAGE_SIZE); 495 496 return ret; 497 } 498 499 static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, 500 unsigned long pfn, long npage, int prot) 501 { 502 struct vfio_domain *d; 503 int ret; 504 505 list_for_each_entry(d, &iommu->domain_list, next) { 506 ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, 507 npage << PAGE_SHIFT, prot | d->prot); 508 if (ret) { 509 if (ret != -EBUSY || 510 map_try_harder(d, iova, pfn, npage, prot)) 511 goto unwind; 512 } 513 } 514 515 return 0; 516 517 unwind: 518 list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) 519 iommu_unmap(d->domain, iova, npage << PAGE_SHIFT); 520 521 return ret; 522 } 523 524 static int vfio_dma_do_map(struct vfio_iommu *iommu, 525 struct vfio_iommu_type1_dma_map *map) 526 { 527 dma_addr_t iova = map->iova; 528 unsigned long vaddr = map->vaddr; 529 size_t size = map->size; 530 long npage; 531 int ret = 0, prot = 0; 532 uint64_t mask; 533 struct vfio_dma *dma; 534 unsigned long pfn; 535 536 /* Verify that none of our __u64 fields overflow */ 537 if (map->size != size || map->vaddr != vaddr || map->iova != iova) 538 return -EINVAL; 539 540 mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; 541 542 WARN_ON(mask & PAGE_MASK); 543 544 /* READ/WRITE from device perspective */ 545 if (map->flags & VFIO_DMA_MAP_FLAG_WRITE) 546 prot |= IOMMU_WRITE; 547 if (map->flags & VFIO_DMA_MAP_FLAG_READ) 548 prot |= IOMMU_READ; 549 550 if (!prot || !size || (size | iova | vaddr) & mask) 551 return -EINVAL; 552 553 /* Don't allow IOVA or virtual address wrap */ 554 if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) 555 return -EINVAL; 556 557 mutex_lock(&iommu->lock); 558 559 if (vfio_find_dma(iommu, iova, size)) { 560 mutex_unlock(&iommu->lock); 561 return -EEXIST; 562 } 563 564 dma = kzalloc(sizeof(*dma), GFP_KERNEL); 565 if (!dma) { 566 mutex_unlock(&iommu->lock); 567 return -ENOMEM; 568 } 569 570 dma->iova = iova; 571 dma->vaddr = vaddr; 572 dma->prot = prot; 573 574 /* Insert zero-sized and grow as we map chunks of it */ 575 vfio_link_dma(iommu, dma); 576 577 while (size) { 578 /* Pin a contiguous chunk of memory */ 579 npage = vfio_pin_pages(vaddr + dma->size, 580 size >> PAGE_SHIFT, prot, &pfn); 581 if (npage <= 0) { 582 WARN_ON(!npage); 583 ret = (int)npage; 584 break; 585 } 586 587 /* Map it! */ 588 ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot); 589 if (ret) { 590 vfio_unpin_pages(pfn, npage, prot, true); 591 break; 592 } 593 594 size -= npage << PAGE_SHIFT; 595 dma->size += npage << PAGE_SHIFT; 596 } 597 598 if (ret) 599 vfio_remove_dma(iommu, dma); 600 601 mutex_unlock(&iommu->lock); 602 return ret; 603 } 604 605 static int vfio_bus_type(struct device *dev, void *data) 606 { 607 struct bus_type **bus = data; 608 609 if (*bus && *bus != dev->bus) 610 return -EINVAL; 611 612 *bus = dev->bus; 613 614 return 0; 615 } 616 617 static int vfio_iommu_replay(struct vfio_iommu *iommu, 618 struct vfio_domain *domain) 619 { 620 struct vfio_domain *d; 621 struct rb_node *n; 622 int ret; 623 624 /* Arbitrarily pick the first domain in the list for lookups */ 625 d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); 626 n = rb_first(&iommu->dma_list); 627 628 /* If there's not a domain, there better not be any mappings */ 629 if (WARN_ON(n && !d)) 630 return -EINVAL; 631 632 for (; n; n = rb_next(n)) { 633 struct vfio_dma *dma; 634 dma_addr_t iova; 635 636 dma = rb_entry(n, struct vfio_dma, node); 637 iova = dma->iova; 638 639 while (iova < dma->iova + dma->size) { 640 phys_addr_t phys = iommu_iova_to_phys(d->domain, iova); 641 size_t size; 642 643 if (WARN_ON(!phys)) { 644 iova += PAGE_SIZE; 645 continue; 646 } 647 648 size = PAGE_SIZE; 649 650 while (iova + size < dma->iova + dma->size && 651 phys + size == iommu_iova_to_phys(d->domain, 652 iova + size)) 653 size += PAGE_SIZE; 654 655 ret = iommu_map(domain->domain, iova, phys, 656 size, dma->prot | domain->prot); 657 if (ret) 658 return ret; 659 660 iova += size; 661 } 662 } 663 664 return 0; 665 } 666 667 static int vfio_iommu_type1_attach_group(void *iommu_data, 668 struct iommu_group *iommu_group) 669 { 670 struct vfio_iommu *iommu = iommu_data; 671 struct vfio_group *group, *g; 672 struct vfio_domain *domain, *d; 673 struct bus_type *bus = NULL; 674 int ret; 675 676 mutex_lock(&iommu->lock); 677 678 list_for_each_entry(d, &iommu->domain_list, next) { 679 list_for_each_entry(g, &d->group_list, next) { 680 if (g->iommu_group != iommu_group) 681 continue; 682 683 mutex_unlock(&iommu->lock); 684 return -EINVAL; 685 } 686 } 687 688 group = kzalloc(sizeof(*group), GFP_KERNEL); 689 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 690 if (!group || !domain) { 691 ret = -ENOMEM; 692 goto out_free; 693 } 694 695 group->iommu_group = iommu_group; 696 697 /* Determine bus_type in order to allocate a domain */ 698 ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type); 699 if (ret) 700 goto out_free; 701 702 domain->domain = iommu_domain_alloc(bus); 703 if (!domain->domain) { 704 ret = -EIO; 705 goto out_free; 706 } 707 708 ret = iommu_attach_group(domain->domain, iommu_group); 709 if (ret) 710 goto out_domain; 711 712 INIT_LIST_HEAD(&domain->group_list); 713 list_add(&group->next, &domain->group_list); 714 715 if (!allow_unsafe_interrupts && 716 !iommu_domain_has_cap(domain->domain, IOMMU_CAP_INTR_REMAP)) { 717 pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", 718 __func__); 719 ret = -EPERM; 720 goto out_detach; 721 } 722 723 if (iommu_domain_has_cap(domain->domain, IOMMU_CAP_CACHE_COHERENCY)) 724 domain->prot |= IOMMU_CACHE; 725 726 /* 727 * Try to match an existing compatible domain. We don't want to 728 * preclude an IOMMU driver supporting multiple bus_types and being 729 * able to include different bus_types in the same IOMMU domain, so 730 * we test whether the domains use the same iommu_ops rather than 731 * testing if they're on the same bus_type. 732 */ 733 list_for_each_entry(d, &iommu->domain_list, next) { 734 if (d->domain->ops == domain->domain->ops && 735 d->prot == domain->prot) { 736 iommu_detach_group(domain->domain, iommu_group); 737 if (!iommu_attach_group(d->domain, iommu_group)) { 738 list_add(&group->next, &d->group_list); 739 iommu_domain_free(domain->domain); 740 kfree(domain); 741 mutex_unlock(&iommu->lock); 742 return 0; 743 } 744 745 ret = iommu_attach_group(domain->domain, iommu_group); 746 if (ret) 747 goto out_domain; 748 } 749 } 750 751 /* replay mappings on new domains */ 752 ret = vfio_iommu_replay(iommu, domain); 753 if (ret) 754 goto out_detach; 755 756 list_add(&domain->next, &iommu->domain_list); 757 758 mutex_unlock(&iommu->lock); 759 760 return 0; 761 762 out_detach: 763 iommu_detach_group(domain->domain, iommu_group); 764 out_domain: 765 iommu_domain_free(domain->domain); 766 out_free: 767 kfree(domain); 768 kfree(group); 769 mutex_unlock(&iommu->lock); 770 return ret; 771 } 772 773 static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu) 774 { 775 struct rb_node *node; 776 777 while ((node = rb_first(&iommu->dma_list))) 778 vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node)); 779 } 780 781 static void vfio_iommu_type1_detach_group(void *iommu_data, 782 struct iommu_group *iommu_group) 783 { 784 struct vfio_iommu *iommu = iommu_data; 785 struct vfio_domain *domain; 786 struct vfio_group *group; 787 788 mutex_lock(&iommu->lock); 789 790 list_for_each_entry(domain, &iommu->domain_list, next) { 791 list_for_each_entry(group, &domain->group_list, next) { 792 if (group->iommu_group != iommu_group) 793 continue; 794 795 iommu_detach_group(domain->domain, iommu_group); 796 list_del(&group->next); 797 kfree(group); 798 /* 799 * Group ownership provides privilege, if the group 800 * list is empty, the domain goes away. If it's the 801 * last domain, then all the mappings go away too. 802 */ 803 if (list_empty(&domain->group_list)) { 804 if (list_is_singular(&iommu->domain_list)) 805 vfio_iommu_unmap_unpin_all(iommu); 806 iommu_domain_free(domain->domain); 807 list_del(&domain->next); 808 kfree(domain); 809 } 810 goto done; 811 } 812 } 813 814 done: 815 mutex_unlock(&iommu->lock); 816 } 817 818 static void *vfio_iommu_type1_open(unsigned long arg) 819 { 820 struct vfio_iommu *iommu; 821 822 if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU) 823 return ERR_PTR(-EINVAL); 824 825 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 826 if (!iommu) 827 return ERR_PTR(-ENOMEM); 828 829 INIT_LIST_HEAD(&iommu->domain_list); 830 iommu->dma_list = RB_ROOT; 831 mutex_init(&iommu->lock); 832 iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU); 833 834 return iommu; 835 } 836 837 static void vfio_iommu_type1_release(void *iommu_data) 838 { 839 struct vfio_iommu *iommu = iommu_data; 840 struct vfio_domain *domain, *domain_tmp; 841 struct vfio_group *group, *group_tmp; 842 843 vfio_iommu_unmap_unpin_all(iommu); 844 845 list_for_each_entry_safe(domain, domain_tmp, 846 &iommu->domain_list, next) { 847 list_for_each_entry_safe(group, group_tmp, 848 &domain->group_list, next) { 849 iommu_detach_group(domain->domain, group->iommu_group); 850 list_del(&group->next); 851 kfree(group); 852 } 853 iommu_domain_free(domain->domain); 854 list_del(&domain->next); 855 kfree(domain); 856 } 857 858 kfree(iommu); 859 } 860 861 static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) 862 { 863 struct vfio_domain *domain; 864 int ret = 1; 865 866 mutex_lock(&iommu->lock); 867 list_for_each_entry(domain, &iommu->domain_list, next) { 868 if (!(domain->prot & IOMMU_CACHE)) { 869 ret = 0; 870 break; 871 } 872 } 873 mutex_unlock(&iommu->lock); 874 875 return ret; 876 } 877 878 static long vfio_iommu_type1_ioctl(void *iommu_data, 879 unsigned int cmd, unsigned long arg) 880 { 881 struct vfio_iommu *iommu = iommu_data; 882 unsigned long minsz; 883 884 if (cmd == VFIO_CHECK_EXTENSION) { 885 switch (arg) { 886 case VFIO_TYPE1_IOMMU: 887 case VFIO_TYPE1v2_IOMMU: 888 return 1; 889 case VFIO_DMA_CC_IOMMU: 890 if (!iommu) 891 return 0; 892 return vfio_domains_have_iommu_cache(iommu); 893 default: 894 return 0; 895 } 896 } else if (cmd == VFIO_IOMMU_GET_INFO) { 897 struct vfio_iommu_type1_info info; 898 899 minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); 900 901 if (copy_from_user(&info, (void __user *)arg, minsz)) 902 return -EFAULT; 903 904 if (info.argsz < minsz) 905 return -EINVAL; 906 907 info.flags = 0; 908 909 info.iova_pgsizes = vfio_pgsize_bitmap(iommu); 910 911 return copy_to_user((void __user *)arg, &info, minsz); 912 913 } else if (cmd == VFIO_IOMMU_MAP_DMA) { 914 struct vfio_iommu_type1_dma_map map; 915 uint32_t mask = VFIO_DMA_MAP_FLAG_READ | 916 VFIO_DMA_MAP_FLAG_WRITE; 917 918 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); 919 920 if (copy_from_user(&map, (void __user *)arg, minsz)) 921 return -EFAULT; 922 923 if (map.argsz < minsz || map.flags & ~mask) 924 return -EINVAL; 925 926 return vfio_dma_do_map(iommu, &map); 927 928 } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { 929 struct vfio_iommu_type1_dma_unmap unmap; 930 long ret; 931 932 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); 933 934 if (copy_from_user(&unmap, (void __user *)arg, minsz)) 935 return -EFAULT; 936 937 if (unmap.argsz < minsz || unmap.flags) 938 return -EINVAL; 939 940 ret = vfio_dma_do_unmap(iommu, &unmap); 941 if (ret) 942 return ret; 943 944 return copy_to_user((void __user *)arg, &unmap, minsz); 945 } 946 947 return -ENOTTY; 948 } 949 950 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { 951 .name = "vfio-iommu-type1", 952 .owner = THIS_MODULE, 953 .open = vfio_iommu_type1_open, 954 .release = vfio_iommu_type1_release, 955 .ioctl = vfio_iommu_type1_ioctl, 956 .attach_group = vfio_iommu_type1_attach_group, 957 .detach_group = vfio_iommu_type1_detach_group, 958 }; 959 960 static int __init vfio_iommu_type1_init(void) 961 { 962 return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1); 963 } 964 965 static void __exit vfio_iommu_type1_cleanup(void) 966 { 967 vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1); 968 } 969 970 module_init(vfio_iommu_type1_init); 971 module_exit(vfio_iommu_type1_cleanup); 972 973 MODULE_VERSION(DRIVER_VERSION); 974 MODULE_LICENSE("GPL v2"); 975 MODULE_AUTHOR(DRIVER_AUTHOR); 976 MODULE_DESCRIPTION(DRIVER_DESC); 977