1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * A fairly generic DMA-API to IOMMU-API glue layer. 4 * 5 * Copyright (C) 2014-2015 ARM Ltd. 6 * 7 * based in part on arch/arm/mm/dma-mapping.c: 8 * Copyright (C) 2000-2004 Russell King 9 */ 10 11 #include <linux/acpi_iort.h> 12 #include <linux/device.h> 13 #include <linux/dma-contiguous.h> 14 #include <linux/dma-iommu.h> 15 #include <linux/dma-noncoherent.h> 16 #include <linux/gfp.h> 17 #include <linux/huge_mm.h> 18 #include <linux/iommu.h> 19 #include <linux/iova.h> 20 #include <linux/irq.h> 21 #include <linux/mm.h> 22 #include <linux/pci.h> 23 #include <linux/scatterlist.h> 24 #include <linux/vmalloc.h> 25 #include <linux/crash_dump.h> 26 27 struct iommu_dma_msi_page { 28 struct list_head list; 29 dma_addr_t iova; 30 phys_addr_t phys; 31 }; 32 33 enum iommu_dma_cookie_type { 34 IOMMU_DMA_IOVA_COOKIE, 35 IOMMU_DMA_MSI_COOKIE, 36 }; 37 38 struct iommu_dma_cookie { 39 enum iommu_dma_cookie_type type; 40 union { 41 /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ 42 struct iova_domain iovad; 43 /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ 44 dma_addr_t msi_iova; 45 }; 46 struct list_head msi_page_list; 47 spinlock_t msi_lock; 48 49 /* Domain for flush queue callback; NULL if flush queue not in use */ 50 struct iommu_domain *fq_domain; 51 }; 52 53 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) 54 { 55 if (cookie->type == IOMMU_DMA_IOVA_COOKIE) 56 return cookie->iovad.granule; 57 return PAGE_SIZE; 58 } 59 60 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) 61 { 62 struct iommu_dma_cookie *cookie; 63 64 cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); 65 if (cookie) { 66 spin_lock_init(&cookie->msi_lock); 67 INIT_LIST_HEAD(&cookie->msi_page_list); 68 cookie->type = type; 69 } 70 return cookie; 71 } 72 73 /** 74 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain 75 * @domain: IOMMU domain to prepare for DMA-API usage 76 * 77 * IOMMU drivers should normally call this from their domain_alloc 78 * callback when domain->type == IOMMU_DOMAIN_DMA. 79 */ 80 int iommu_get_dma_cookie(struct iommu_domain *domain) 81 { 82 if (domain->iova_cookie) 83 return -EEXIST; 84 85 domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); 86 if (!domain->iova_cookie) 87 return -ENOMEM; 88 89 return 0; 90 } 91 EXPORT_SYMBOL(iommu_get_dma_cookie); 92 93 /** 94 * iommu_get_msi_cookie - Acquire just MSI remapping resources 95 * @domain: IOMMU domain to prepare 96 * @base: Start address of IOVA region for MSI mappings 97 * 98 * Users who manage their own IOVA allocation and do not want DMA API support, 99 * but would still like to take advantage of automatic MSI remapping, can use 100 * this to initialise their own domain appropriately. Users should reserve a 101 * contiguous IOVA region, starting at @base, large enough to accommodate the 102 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address 103 * used by the devices attached to @domain. 104 */ 105 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) 106 { 107 struct iommu_dma_cookie *cookie; 108 109 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 110 return -EINVAL; 111 112 if (domain->iova_cookie) 113 return -EEXIST; 114 115 cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); 116 if (!cookie) 117 return -ENOMEM; 118 119 cookie->msi_iova = base; 120 domain->iova_cookie = cookie; 121 return 0; 122 } 123 EXPORT_SYMBOL(iommu_get_msi_cookie); 124 125 /** 126 * iommu_put_dma_cookie - Release a domain's DMA mapping resources 127 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or 128 * iommu_get_msi_cookie() 129 * 130 * IOMMU drivers should normally call this from their domain_free callback. 131 */ 132 void iommu_put_dma_cookie(struct iommu_domain *domain) 133 { 134 struct iommu_dma_cookie *cookie = domain->iova_cookie; 135 struct iommu_dma_msi_page *msi, *tmp; 136 137 if (!cookie) 138 return; 139 140 if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) 141 put_iova_domain(&cookie->iovad); 142 143 list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { 144 list_del(&msi->list); 145 kfree(msi); 146 } 147 kfree(cookie); 148 domain->iova_cookie = NULL; 149 } 150 EXPORT_SYMBOL(iommu_put_dma_cookie); 151 152 /** 153 * iommu_dma_get_resv_regions - Reserved region driver helper 154 * @dev: Device from iommu_get_resv_regions() 155 * @list: Reserved region list from iommu_get_resv_regions() 156 * 157 * IOMMU drivers can use this to implement their .get_resv_regions callback 158 * for general non-IOMMU-specific reservations. Currently, this covers GICv3 159 * ITS region reservation on ACPI based ARM platforms that may require HW MSI 160 * reservation. 161 */ 162 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) 163 { 164 165 if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) 166 iort_iommu_msi_get_resv_regions(dev, list); 167 168 } 169 EXPORT_SYMBOL(iommu_dma_get_resv_regions); 170 171 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, 172 phys_addr_t start, phys_addr_t end) 173 { 174 struct iova_domain *iovad = &cookie->iovad; 175 struct iommu_dma_msi_page *msi_page; 176 int i, num_pages; 177 178 start -= iova_offset(iovad, start); 179 num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); 180 181 msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); 182 if (!msi_page) 183 return -ENOMEM; 184 185 for (i = 0; i < num_pages; i++) { 186 msi_page[i].phys = start; 187 msi_page[i].iova = start; 188 INIT_LIST_HEAD(&msi_page[i].list); 189 list_add(&msi_page[i].list, &cookie->msi_page_list); 190 start += iovad->granule; 191 } 192 193 return 0; 194 } 195 196 static int iova_reserve_pci_windows(struct pci_dev *dev, 197 struct iova_domain *iovad) 198 { 199 struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); 200 struct resource_entry *window; 201 unsigned long lo, hi; 202 phys_addr_t start = 0, end; 203 204 resource_list_for_each_entry(window, &bridge->windows) { 205 if (resource_type(window->res) != IORESOURCE_MEM) 206 continue; 207 208 lo = iova_pfn(iovad, window->res->start - window->offset); 209 hi = iova_pfn(iovad, window->res->end - window->offset); 210 reserve_iova(iovad, lo, hi); 211 } 212 213 /* Get reserved DMA windows from host bridge */ 214 resource_list_for_each_entry(window, &bridge->dma_ranges) { 215 end = window->res->start - window->offset; 216 resv_iova: 217 if (end > start) { 218 lo = iova_pfn(iovad, start); 219 hi = iova_pfn(iovad, end); 220 reserve_iova(iovad, lo, hi); 221 } else { 222 /* dma_ranges list should be sorted */ 223 dev_err(&dev->dev, "Failed to reserve IOVA\n"); 224 return -EINVAL; 225 } 226 227 start = window->res->end - window->offset + 1; 228 /* If window is last entry */ 229 if (window->node.next == &bridge->dma_ranges && 230 end != ~(phys_addr_t)0) { 231 end = ~(phys_addr_t)0; 232 goto resv_iova; 233 } 234 } 235 236 return 0; 237 } 238 239 static int iova_reserve_iommu_regions(struct device *dev, 240 struct iommu_domain *domain) 241 { 242 struct iommu_dma_cookie *cookie = domain->iova_cookie; 243 struct iova_domain *iovad = &cookie->iovad; 244 struct iommu_resv_region *region; 245 LIST_HEAD(resv_regions); 246 int ret = 0; 247 248 if (dev_is_pci(dev)) { 249 ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad); 250 if (ret) 251 return ret; 252 } 253 254 iommu_get_resv_regions(dev, &resv_regions); 255 list_for_each_entry(region, &resv_regions, list) { 256 unsigned long lo, hi; 257 258 /* We ARE the software that manages these! */ 259 if (region->type == IOMMU_RESV_SW_MSI) 260 continue; 261 262 lo = iova_pfn(iovad, region->start); 263 hi = iova_pfn(iovad, region->start + region->length - 1); 264 reserve_iova(iovad, lo, hi); 265 266 if (region->type == IOMMU_RESV_MSI) 267 ret = cookie_init_hw_msi_region(cookie, region->start, 268 region->start + region->length); 269 if (ret) 270 break; 271 } 272 iommu_put_resv_regions(dev, &resv_regions); 273 274 return ret; 275 } 276 277 static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) 278 { 279 struct iommu_dma_cookie *cookie; 280 struct iommu_domain *domain; 281 282 cookie = container_of(iovad, struct iommu_dma_cookie, iovad); 283 domain = cookie->fq_domain; 284 /* 285 * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE 286 * implies that ops->flush_iotlb_all must be non-NULL. 287 */ 288 domain->ops->flush_iotlb_all(domain); 289 } 290 291 /** 292 * iommu_dma_init_domain - Initialise a DMA mapping domain 293 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 294 * @base: IOVA at which the mappable address space starts 295 * @size: Size of IOVA space 296 * @dev: Device the domain is being initialised for 297 * 298 * @base and @size should be exact multiples of IOMMU page granularity to 299 * avoid rounding surprises. If necessary, we reserve the page at address 0 300 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but 301 * any change which could make prior IOVAs invalid will fail. 302 */ 303 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, 304 u64 size, struct device *dev) 305 { 306 struct iommu_dma_cookie *cookie = domain->iova_cookie; 307 unsigned long order, base_pfn; 308 struct iova_domain *iovad; 309 int attr; 310 311 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) 312 return -EINVAL; 313 314 iovad = &cookie->iovad; 315 316 /* Use the smallest supported page size for IOVA granularity */ 317 order = __ffs(domain->pgsize_bitmap); 318 base_pfn = max_t(unsigned long, 1, base >> order); 319 320 /* Check the domain allows at least some access to the device... */ 321 if (domain->geometry.force_aperture) { 322 if (base > domain->geometry.aperture_end || 323 base + size <= domain->geometry.aperture_start) { 324 pr_warn("specified DMA range outside IOMMU capability\n"); 325 return -EFAULT; 326 } 327 /* ...then finally give it a kicking to make sure it fits */ 328 base_pfn = max_t(unsigned long, base_pfn, 329 domain->geometry.aperture_start >> order); 330 } 331 332 /* start_pfn is always nonzero for an already-initialised domain */ 333 if (iovad->start_pfn) { 334 if (1UL << order != iovad->granule || 335 base_pfn != iovad->start_pfn) { 336 pr_warn("Incompatible range for DMA domain\n"); 337 return -EFAULT; 338 } 339 340 return 0; 341 } 342 343 init_iova_domain(iovad, 1UL << order, base_pfn); 344 345 if (!cookie->fq_domain && !iommu_domain_get_attr(domain, 346 DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) { 347 cookie->fq_domain = domain; 348 init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL); 349 } 350 351 if (!dev) 352 return 0; 353 354 return iova_reserve_iommu_regions(dev, domain); 355 } 356 357 static int iommu_dma_deferred_attach(struct device *dev, 358 struct iommu_domain *domain) 359 { 360 const struct iommu_ops *ops = domain->ops; 361 362 if (!is_kdump_kernel()) 363 return 0; 364 365 if (unlikely(ops->is_attach_deferred && 366 ops->is_attach_deferred(domain, dev))) 367 return iommu_attach_device(domain, dev); 368 369 return 0; 370 } 371 372 /** 373 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API 374 * page flags. 375 * @dir: Direction of DMA transfer 376 * @coherent: Is the DMA master cache-coherent? 377 * @attrs: DMA attributes for the mapping 378 * 379 * Return: corresponding IOMMU API page protection flags 380 */ 381 static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, 382 unsigned long attrs) 383 { 384 int prot = coherent ? IOMMU_CACHE : 0; 385 386 if (attrs & DMA_ATTR_PRIVILEGED) 387 prot |= IOMMU_PRIV; 388 389 switch (dir) { 390 case DMA_BIDIRECTIONAL: 391 return prot | IOMMU_READ | IOMMU_WRITE; 392 case DMA_TO_DEVICE: 393 return prot | IOMMU_READ; 394 case DMA_FROM_DEVICE: 395 return prot | IOMMU_WRITE; 396 default: 397 return 0; 398 } 399 } 400 401 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, 402 size_t size, dma_addr_t dma_limit, struct device *dev) 403 { 404 struct iommu_dma_cookie *cookie = domain->iova_cookie; 405 struct iova_domain *iovad = &cookie->iovad; 406 unsigned long shift, iova_len, iova = 0; 407 408 if (cookie->type == IOMMU_DMA_MSI_COOKIE) { 409 cookie->msi_iova += size; 410 return cookie->msi_iova - size; 411 } 412 413 shift = iova_shift(iovad); 414 iova_len = size >> shift; 415 /* 416 * Freeing non-power-of-two-sized allocations back into the IOVA caches 417 * will come back to bite us badly, so we have to waste a bit of space 418 * rounding up anything cacheable to make sure that can't happen. The 419 * order of the unadjusted size will still match upon freeing. 420 */ 421 if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) 422 iova_len = roundup_pow_of_two(iova_len); 423 424 dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); 425 426 if (domain->geometry.force_aperture) 427 dma_limit = min(dma_limit, domain->geometry.aperture_end); 428 429 /* Try to get PCI devices a SAC address */ 430 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) 431 iova = alloc_iova_fast(iovad, iova_len, 432 DMA_BIT_MASK(32) >> shift, false); 433 434 if (!iova) 435 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, 436 true); 437 438 return (dma_addr_t)iova << shift; 439 } 440 441 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, 442 dma_addr_t iova, size_t size) 443 { 444 struct iova_domain *iovad = &cookie->iovad; 445 446 /* The MSI case is only ever cleaning up its most recent allocation */ 447 if (cookie->type == IOMMU_DMA_MSI_COOKIE) 448 cookie->msi_iova -= size; 449 else if (cookie->fq_domain) /* non-strict mode */ 450 queue_iova(iovad, iova_pfn(iovad, iova), 451 size >> iova_shift(iovad), 0); 452 else 453 free_iova_fast(iovad, iova_pfn(iovad, iova), 454 size >> iova_shift(iovad)); 455 } 456 457 static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, 458 size_t size) 459 { 460 struct iommu_domain *domain = iommu_get_dma_domain(dev); 461 struct iommu_dma_cookie *cookie = domain->iova_cookie; 462 struct iova_domain *iovad = &cookie->iovad; 463 size_t iova_off = iova_offset(iovad, dma_addr); 464 struct iommu_iotlb_gather iotlb_gather; 465 size_t unmapped; 466 467 dma_addr -= iova_off; 468 size = iova_align(iovad, size + iova_off); 469 iommu_iotlb_gather_init(&iotlb_gather); 470 471 unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); 472 WARN_ON(unmapped != size); 473 474 if (!cookie->fq_domain) 475 iommu_tlb_sync(domain, &iotlb_gather); 476 iommu_dma_free_iova(cookie, dma_addr, size); 477 } 478 479 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, 480 size_t size, int prot, dma_addr_t dma_mask) 481 { 482 struct iommu_domain *domain = iommu_get_dma_domain(dev); 483 struct iommu_dma_cookie *cookie = domain->iova_cookie; 484 struct iova_domain *iovad = &cookie->iovad; 485 size_t iova_off = iova_offset(iovad, phys); 486 dma_addr_t iova; 487 488 if (unlikely(iommu_dma_deferred_attach(dev, domain))) 489 return DMA_MAPPING_ERROR; 490 491 size = iova_align(iovad, size + iova_off); 492 493 iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); 494 if (!iova) 495 return DMA_MAPPING_ERROR; 496 497 if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { 498 iommu_dma_free_iova(cookie, iova, size); 499 return DMA_MAPPING_ERROR; 500 } 501 return iova + iova_off; 502 } 503 504 static void __iommu_dma_free_pages(struct page **pages, int count) 505 { 506 while (count--) 507 __free_page(pages[count]); 508 kvfree(pages); 509 } 510 511 static struct page **__iommu_dma_alloc_pages(struct device *dev, 512 unsigned int count, unsigned long order_mask, gfp_t gfp) 513 { 514 struct page **pages; 515 unsigned int i = 0, nid = dev_to_node(dev); 516 517 order_mask &= (2U << MAX_ORDER) - 1; 518 if (!order_mask) 519 return NULL; 520 521 pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL); 522 if (!pages) 523 return NULL; 524 525 /* IOMMU can map any pages, so himem can also be used here */ 526 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 527 528 while (count) { 529 struct page *page = NULL; 530 unsigned int order_size; 531 532 /* 533 * Higher-order allocations are a convenience rather 534 * than a necessity, hence using __GFP_NORETRY until 535 * falling back to minimum-order allocations. 536 */ 537 for (order_mask &= (2U << __fls(count)) - 1; 538 order_mask; order_mask &= ~order_size) { 539 unsigned int order = __fls(order_mask); 540 gfp_t alloc_flags = gfp; 541 542 order_size = 1U << order; 543 if (order_mask > order_size) 544 alloc_flags |= __GFP_NORETRY; 545 page = alloc_pages_node(nid, alloc_flags, order); 546 if (!page) 547 continue; 548 if (!order) 549 break; 550 if (!PageCompound(page)) { 551 split_page(page, order); 552 break; 553 } else if (!split_huge_page(page)) { 554 break; 555 } 556 __free_pages(page, order); 557 } 558 if (!page) { 559 __iommu_dma_free_pages(pages, i); 560 return NULL; 561 } 562 count -= order_size; 563 while (order_size--) 564 pages[i++] = page++; 565 } 566 return pages; 567 } 568 569 /** 570 * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space 571 * @dev: Device to allocate memory for. Must be a real device 572 * attached to an iommu_dma_domain 573 * @size: Size of buffer in bytes 574 * @dma_handle: Out argument for allocated DMA handle 575 * @gfp: Allocation flags 576 * @attrs: DMA attributes for this allocation 577 * 578 * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, 579 * but an IOMMU which supports smaller pages might not map the whole thing. 580 * 581 * Return: Mapped virtual address, or NULL on failure. 582 */ 583 static void *iommu_dma_alloc_remap(struct device *dev, size_t size, 584 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 585 { 586 struct iommu_domain *domain = iommu_get_dma_domain(dev); 587 struct iommu_dma_cookie *cookie = domain->iova_cookie; 588 struct iova_domain *iovad = &cookie->iovad; 589 bool coherent = dev_is_dma_coherent(dev); 590 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 591 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 592 unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; 593 struct page **pages; 594 struct sg_table sgt; 595 dma_addr_t iova; 596 void *vaddr; 597 598 *dma_handle = DMA_MAPPING_ERROR; 599 600 if (unlikely(iommu_dma_deferred_attach(dev, domain))) 601 return NULL; 602 603 min_size = alloc_sizes & -alloc_sizes; 604 if (min_size < PAGE_SIZE) { 605 min_size = PAGE_SIZE; 606 alloc_sizes |= PAGE_SIZE; 607 } else { 608 size = ALIGN(size, min_size); 609 } 610 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 611 alloc_sizes = min_size; 612 613 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 614 pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT, 615 gfp); 616 if (!pages) 617 return NULL; 618 619 size = iova_align(iovad, size); 620 iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev); 621 if (!iova) 622 goto out_free_pages; 623 624 if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) 625 goto out_free_iova; 626 627 if (!(ioprot & IOMMU_CACHE)) { 628 struct scatterlist *sg; 629 int i; 630 631 for_each_sg(sgt.sgl, sg, sgt.orig_nents, i) 632 arch_dma_prep_coherent(sg_page(sg), sg->length); 633 } 634 635 if (iommu_map_sg_atomic(domain, iova, sgt.sgl, sgt.orig_nents, ioprot) 636 < size) 637 goto out_free_sg; 638 639 vaddr = dma_common_pages_remap(pages, size, prot, 640 __builtin_return_address(0)); 641 if (!vaddr) 642 goto out_unmap; 643 644 *dma_handle = iova; 645 sg_free_table(&sgt); 646 return vaddr; 647 648 out_unmap: 649 __iommu_dma_unmap(dev, iova, size); 650 out_free_sg: 651 sg_free_table(&sgt); 652 out_free_iova: 653 iommu_dma_free_iova(cookie, iova, size); 654 out_free_pages: 655 __iommu_dma_free_pages(pages, count); 656 return NULL; 657 } 658 659 /** 660 * __iommu_dma_mmap - Map a buffer into provided user VMA 661 * @pages: Array representing buffer from __iommu_dma_alloc() 662 * @size: Size of buffer in bytes 663 * @vma: VMA describing requested userspace mapping 664 * 665 * Maps the pages of the buffer in @pages into @vma. The caller is responsible 666 * for verifying the correct size and protection of @vma beforehand. 667 */ 668 static int __iommu_dma_mmap(struct page **pages, size_t size, 669 struct vm_area_struct *vma) 670 { 671 return vm_map_pages(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 672 } 673 674 static void iommu_dma_sync_single_for_cpu(struct device *dev, 675 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 676 { 677 phys_addr_t phys; 678 679 if (dev_is_dma_coherent(dev)) 680 return; 681 682 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 683 arch_sync_dma_for_cpu(phys, size, dir); 684 } 685 686 static void iommu_dma_sync_single_for_device(struct device *dev, 687 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 688 { 689 phys_addr_t phys; 690 691 if (dev_is_dma_coherent(dev)) 692 return; 693 694 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 695 arch_sync_dma_for_device(phys, size, dir); 696 } 697 698 static void iommu_dma_sync_sg_for_cpu(struct device *dev, 699 struct scatterlist *sgl, int nelems, 700 enum dma_data_direction dir) 701 { 702 struct scatterlist *sg; 703 int i; 704 705 if (dev_is_dma_coherent(dev)) 706 return; 707 708 for_each_sg(sgl, sg, nelems, i) 709 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 710 } 711 712 static void iommu_dma_sync_sg_for_device(struct device *dev, 713 struct scatterlist *sgl, int nelems, 714 enum dma_data_direction dir) 715 { 716 struct scatterlist *sg; 717 int i; 718 719 if (dev_is_dma_coherent(dev)) 720 return; 721 722 for_each_sg(sgl, sg, nelems, i) 723 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); 724 } 725 726 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 727 unsigned long offset, size_t size, enum dma_data_direction dir, 728 unsigned long attrs) 729 { 730 phys_addr_t phys = page_to_phys(page) + offset; 731 bool coherent = dev_is_dma_coherent(dev); 732 int prot = dma_info_to_prot(dir, coherent, attrs); 733 dma_addr_t dma_handle; 734 735 dma_handle = __iommu_dma_map(dev, phys, size, prot, dma_get_mask(dev)); 736 if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 737 dma_handle != DMA_MAPPING_ERROR) 738 arch_sync_dma_for_device(phys, size, dir); 739 return dma_handle; 740 } 741 742 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 743 size_t size, enum dma_data_direction dir, unsigned long attrs) 744 { 745 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 746 iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir); 747 __iommu_dma_unmap(dev, dma_handle, size); 748 } 749 750 /* 751 * Prepare a successfully-mapped scatterlist to give back to the caller. 752 * 753 * At this point the segments are already laid out by iommu_dma_map_sg() to 754 * avoid individually crossing any boundaries, so we merely need to check a 755 * segment's start address to avoid concatenating across one. 756 */ 757 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, 758 dma_addr_t dma_addr) 759 { 760 struct scatterlist *s, *cur = sg; 761 unsigned long seg_mask = dma_get_seg_boundary(dev); 762 unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); 763 int i, count = 0; 764 765 for_each_sg(sg, s, nents, i) { 766 /* Restore this segment's original unaligned fields first */ 767 unsigned int s_iova_off = sg_dma_address(s); 768 unsigned int s_length = sg_dma_len(s); 769 unsigned int s_iova_len = s->length; 770 771 s->offset += s_iova_off; 772 s->length = s_length; 773 sg_dma_address(s) = DMA_MAPPING_ERROR; 774 sg_dma_len(s) = 0; 775 776 /* 777 * Now fill in the real DMA data. If... 778 * - there is a valid output segment to append to 779 * - and this segment starts on an IOVA page boundary 780 * - but doesn't fall at a segment boundary 781 * - and wouldn't make the resulting output segment too long 782 */ 783 if (cur_len && !s_iova_off && (dma_addr & seg_mask) && 784 (max_len - cur_len >= s_length)) { 785 /* ...then concatenate it with the previous one */ 786 cur_len += s_length; 787 } else { 788 /* Otherwise start the next output segment */ 789 if (i > 0) 790 cur = sg_next(cur); 791 cur_len = s_length; 792 count++; 793 794 sg_dma_address(cur) = dma_addr + s_iova_off; 795 } 796 797 sg_dma_len(cur) = cur_len; 798 dma_addr += s_iova_len; 799 800 if (s_length + s_iova_off < s_iova_len) 801 cur_len = 0; 802 } 803 return count; 804 } 805 806 /* 807 * If mapping failed, then just restore the original list, 808 * but making sure the DMA fields are invalidated. 809 */ 810 static void __invalidate_sg(struct scatterlist *sg, int nents) 811 { 812 struct scatterlist *s; 813 int i; 814 815 for_each_sg(sg, s, nents, i) { 816 if (sg_dma_address(s) != DMA_MAPPING_ERROR) 817 s->offset += sg_dma_address(s); 818 if (sg_dma_len(s)) 819 s->length = sg_dma_len(s); 820 sg_dma_address(s) = DMA_MAPPING_ERROR; 821 sg_dma_len(s) = 0; 822 } 823 } 824 825 /* 826 * The DMA API client is passing in a scatterlist which could describe 827 * any old buffer layout, but the IOMMU API requires everything to be 828 * aligned to IOMMU pages. Hence the need for this complicated bit of 829 * impedance-matching, to be able to hand off a suitably-aligned list, 830 * but still preserve the original offsets and sizes for the caller. 831 */ 832 static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 833 int nents, enum dma_data_direction dir, unsigned long attrs) 834 { 835 struct iommu_domain *domain = iommu_get_dma_domain(dev); 836 struct iommu_dma_cookie *cookie = domain->iova_cookie; 837 struct iova_domain *iovad = &cookie->iovad; 838 struct scatterlist *s, *prev = NULL; 839 int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); 840 dma_addr_t iova; 841 size_t iova_len = 0; 842 unsigned long mask = dma_get_seg_boundary(dev); 843 int i; 844 845 if (unlikely(iommu_dma_deferred_attach(dev, domain))) 846 return 0; 847 848 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 849 iommu_dma_sync_sg_for_device(dev, sg, nents, dir); 850 851 /* 852 * Work out how much IOVA space we need, and align the segments to 853 * IOVA granules for the IOMMU driver to handle. With some clever 854 * trickery we can modify the list in-place, but reversibly, by 855 * stashing the unaligned parts in the as-yet-unused DMA fields. 856 */ 857 for_each_sg(sg, s, nents, i) { 858 size_t s_iova_off = iova_offset(iovad, s->offset); 859 size_t s_length = s->length; 860 size_t pad_len = (mask - iova_len + 1) & mask; 861 862 sg_dma_address(s) = s_iova_off; 863 sg_dma_len(s) = s_length; 864 s->offset -= s_iova_off; 865 s_length = iova_align(iovad, s_length + s_iova_off); 866 s->length = s_length; 867 868 /* 869 * Due to the alignment of our single IOVA allocation, we can 870 * depend on these assumptions about the segment boundary mask: 871 * - If mask size >= IOVA size, then the IOVA range cannot 872 * possibly fall across a boundary, so we don't care. 873 * - If mask size < IOVA size, then the IOVA range must start 874 * exactly on a boundary, therefore we can lay things out 875 * based purely on segment lengths without needing to know 876 * the actual addresses beforehand. 877 * - The mask must be a power of 2, so pad_len == 0 if 878 * iova_len == 0, thus we cannot dereference prev the first 879 * time through here (i.e. before it has a meaningful value). 880 */ 881 if (pad_len && pad_len < s_length - 1) { 882 prev->length += pad_len; 883 iova_len += pad_len; 884 } 885 886 iova_len += s_length; 887 prev = s; 888 } 889 890 iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); 891 if (!iova) 892 goto out_restore_sg; 893 894 /* 895 * We'll leave any physical concatenation to the IOMMU driver's 896 * implementation - it knows better than we do. 897 */ 898 if (iommu_map_sg_atomic(domain, iova, sg, nents, prot) < iova_len) 899 goto out_free_iova; 900 901 return __finalise_sg(dev, sg, nents, iova); 902 903 out_free_iova: 904 iommu_dma_free_iova(cookie, iova, iova_len); 905 out_restore_sg: 906 __invalidate_sg(sg, nents); 907 return 0; 908 } 909 910 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 911 int nents, enum dma_data_direction dir, unsigned long attrs) 912 { 913 dma_addr_t start, end; 914 struct scatterlist *tmp; 915 int i; 916 917 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 918 iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); 919 920 /* 921 * The scatterlist segments are mapped into a single 922 * contiguous IOVA allocation, so this is incredibly easy. 923 */ 924 start = sg_dma_address(sg); 925 for_each_sg(sg_next(sg), tmp, nents - 1, i) { 926 if (sg_dma_len(tmp) == 0) 927 break; 928 sg = tmp; 929 } 930 end = sg_dma_address(sg) + sg_dma_len(sg); 931 __iommu_dma_unmap(dev, start, end - start); 932 } 933 934 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 935 size_t size, enum dma_data_direction dir, unsigned long attrs) 936 { 937 return __iommu_dma_map(dev, phys, size, 938 dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, 939 dma_get_mask(dev)); 940 } 941 942 static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 943 size_t size, enum dma_data_direction dir, unsigned long attrs) 944 { 945 __iommu_dma_unmap(dev, handle, size); 946 } 947 948 static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) 949 { 950 size_t alloc_size = PAGE_ALIGN(size); 951 int count = alloc_size >> PAGE_SHIFT; 952 struct page *page = NULL, **pages = NULL; 953 954 /* Non-coherent atomic allocation? Easy */ 955 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 956 dma_free_from_pool(cpu_addr, alloc_size)) 957 return; 958 959 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 960 /* 961 * If it the address is remapped, then it's either non-coherent 962 * or highmem CMA, or an iommu_dma_alloc_remap() construction. 963 */ 964 pages = dma_common_find_pages(cpu_addr); 965 if (!pages) 966 page = vmalloc_to_page(cpu_addr); 967 dma_common_free_remap(cpu_addr, alloc_size); 968 } else { 969 /* Lowmem means a coherent atomic or CMA allocation */ 970 page = virt_to_page(cpu_addr); 971 } 972 973 if (pages) 974 __iommu_dma_free_pages(pages, count); 975 if (page) 976 dma_free_contiguous(dev, page, alloc_size); 977 } 978 979 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 980 dma_addr_t handle, unsigned long attrs) 981 { 982 __iommu_dma_unmap(dev, handle, size); 983 __iommu_dma_free(dev, size, cpu_addr); 984 } 985 986 static void *iommu_dma_alloc_pages(struct device *dev, size_t size, 987 struct page **pagep, gfp_t gfp, unsigned long attrs) 988 { 989 bool coherent = dev_is_dma_coherent(dev); 990 size_t alloc_size = PAGE_ALIGN(size); 991 int node = dev_to_node(dev); 992 struct page *page = NULL; 993 void *cpu_addr; 994 995 page = dma_alloc_contiguous(dev, alloc_size, gfp); 996 if (!page) 997 page = alloc_pages_node(node, gfp, get_order(alloc_size)); 998 if (!page) 999 return NULL; 1000 1001 if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) { 1002 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 1003 1004 cpu_addr = dma_common_contiguous_remap(page, alloc_size, 1005 prot, __builtin_return_address(0)); 1006 if (!cpu_addr) 1007 goto out_free_pages; 1008 1009 if (!coherent) 1010 arch_dma_prep_coherent(page, size); 1011 } else { 1012 cpu_addr = page_address(page); 1013 } 1014 1015 *pagep = page; 1016 memset(cpu_addr, 0, alloc_size); 1017 return cpu_addr; 1018 out_free_pages: 1019 dma_free_contiguous(dev, page, alloc_size); 1020 return NULL; 1021 } 1022 1023 static void *iommu_dma_alloc(struct device *dev, size_t size, 1024 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1025 { 1026 bool coherent = dev_is_dma_coherent(dev); 1027 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 1028 struct page *page = NULL; 1029 void *cpu_addr; 1030 1031 gfp |= __GFP_ZERO; 1032 1033 if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) && 1034 !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) 1035 return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs); 1036 1037 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1038 !gfpflags_allow_blocking(gfp) && !coherent) 1039 cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp); 1040 else 1041 cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); 1042 if (!cpu_addr) 1043 return NULL; 1044 1045 *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot, 1046 dev->coherent_dma_mask); 1047 if (*handle == DMA_MAPPING_ERROR) { 1048 __iommu_dma_free(dev, size, cpu_addr); 1049 return NULL; 1050 } 1051 1052 return cpu_addr; 1053 } 1054 1055 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 1056 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1057 unsigned long attrs) 1058 { 1059 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1060 unsigned long pfn, off = vma->vm_pgoff; 1061 int ret; 1062 1063 vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); 1064 1065 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 1066 return ret; 1067 1068 if (off >= nr_pages || vma_pages(vma) > nr_pages - off) 1069 return -ENXIO; 1070 1071 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 1072 struct page **pages = dma_common_find_pages(cpu_addr); 1073 1074 if (pages) 1075 return __iommu_dma_mmap(pages, size, vma); 1076 pfn = vmalloc_to_pfn(cpu_addr); 1077 } else { 1078 pfn = page_to_pfn(virt_to_page(cpu_addr)); 1079 } 1080 1081 return remap_pfn_range(vma, vma->vm_start, pfn + off, 1082 vma->vm_end - vma->vm_start, 1083 vma->vm_page_prot); 1084 } 1085 1086 static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 1087 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1088 unsigned long attrs) 1089 { 1090 struct page *page; 1091 int ret; 1092 1093 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 1094 struct page **pages = dma_common_find_pages(cpu_addr); 1095 1096 if (pages) { 1097 return sg_alloc_table_from_pages(sgt, pages, 1098 PAGE_ALIGN(size) >> PAGE_SHIFT, 1099 0, size, GFP_KERNEL); 1100 } 1101 1102 page = vmalloc_to_page(cpu_addr); 1103 } else { 1104 page = virt_to_page(cpu_addr); 1105 } 1106 1107 ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 1108 if (!ret) 1109 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 1110 return ret; 1111 } 1112 1113 static unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1114 { 1115 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1116 1117 return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1118 } 1119 1120 static const struct dma_map_ops iommu_dma_ops = { 1121 .alloc = iommu_dma_alloc, 1122 .free = iommu_dma_free, 1123 .mmap = iommu_dma_mmap, 1124 .get_sgtable = iommu_dma_get_sgtable, 1125 .map_page = iommu_dma_map_page, 1126 .unmap_page = iommu_dma_unmap_page, 1127 .map_sg = iommu_dma_map_sg, 1128 .unmap_sg = iommu_dma_unmap_sg, 1129 .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, 1130 .sync_single_for_device = iommu_dma_sync_single_for_device, 1131 .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, 1132 .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1133 .map_resource = iommu_dma_map_resource, 1134 .unmap_resource = iommu_dma_unmap_resource, 1135 .get_merge_boundary = iommu_dma_get_merge_boundary, 1136 }; 1137 1138 /* 1139 * The IOMMU core code allocates the default DMA domain, which the underlying 1140 * IOMMU driver needs to support via the dma-iommu layer. 1141 */ 1142 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) 1143 { 1144 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1145 1146 if (!domain) 1147 goto out_err; 1148 1149 /* 1150 * The IOMMU core code allocates the default DMA domain, which the 1151 * underlying IOMMU driver needs to support via the dma-iommu layer. 1152 */ 1153 if (domain->type == IOMMU_DOMAIN_DMA) { 1154 if (iommu_dma_init_domain(domain, dma_base, size, dev)) 1155 goto out_err; 1156 dev->dma_ops = &iommu_dma_ops; 1157 } 1158 1159 return; 1160 out_err: 1161 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 1162 dev_name(dev)); 1163 } 1164 1165 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, 1166 phys_addr_t msi_addr, struct iommu_domain *domain) 1167 { 1168 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1169 struct iommu_dma_msi_page *msi_page; 1170 dma_addr_t iova; 1171 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 1172 size_t size = cookie_msi_granule(cookie); 1173 1174 msi_addr &= ~(phys_addr_t)(size - 1); 1175 list_for_each_entry(msi_page, &cookie->msi_page_list, list) 1176 if (msi_page->phys == msi_addr) 1177 return msi_page; 1178 1179 msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC); 1180 if (!msi_page) 1181 return NULL; 1182 1183 iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); 1184 if (!iova) 1185 goto out_free_page; 1186 1187 if (iommu_map(domain, iova, msi_addr, size, prot)) 1188 goto out_free_iova; 1189 1190 INIT_LIST_HEAD(&msi_page->list); 1191 msi_page->phys = msi_addr; 1192 msi_page->iova = iova; 1193 list_add(&msi_page->list, &cookie->msi_page_list); 1194 return msi_page; 1195 1196 out_free_iova: 1197 iommu_dma_free_iova(cookie, iova, size); 1198 out_free_page: 1199 kfree(msi_page); 1200 return NULL; 1201 } 1202 1203 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 1204 { 1205 struct device *dev = msi_desc_to_dev(desc); 1206 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1207 struct iommu_dma_cookie *cookie; 1208 struct iommu_dma_msi_page *msi_page; 1209 unsigned long flags; 1210 1211 if (!domain || !domain->iova_cookie) { 1212 desc->iommu_cookie = NULL; 1213 return 0; 1214 } 1215 1216 cookie = domain->iova_cookie; 1217 1218 /* 1219 * We disable IRQs to rule out a possible inversion against 1220 * irq_desc_lock if, say, someone tries to retarget the affinity 1221 * of an MSI from within an IPI handler. 1222 */ 1223 spin_lock_irqsave(&cookie->msi_lock, flags); 1224 msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); 1225 spin_unlock_irqrestore(&cookie->msi_lock, flags); 1226 1227 msi_desc_set_iommu_cookie(desc, msi_page); 1228 1229 if (!msi_page) 1230 return -ENOMEM; 1231 return 0; 1232 } 1233 1234 void iommu_dma_compose_msi_msg(struct msi_desc *desc, 1235 struct msi_msg *msg) 1236 { 1237 struct device *dev = msi_desc_to_dev(desc); 1238 const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1239 const struct iommu_dma_msi_page *msi_page; 1240 1241 msi_page = msi_desc_get_iommu_cookie(desc); 1242 1243 if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) 1244 return; 1245 1246 msg->address_hi = upper_32_bits(msi_page->iova); 1247 msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; 1248 msg->address_lo += lower_32_bits(msi_page->iova); 1249 } 1250 1251 static int iommu_dma_init(void) 1252 { 1253 return iova_cache_get(); 1254 } 1255 arch_initcall(iommu_dma_init); 1256