1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * A fairly generic DMA-API to IOMMU-API glue layer. 4 * 5 * Copyright (C) 2014-2015 ARM Ltd. 6 * 7 * based in part on arch/arm/mm/dma-mapping.c: 8 * Copyright (C) 2000-2004 Russell King 9 */ 10 11 #include <linux/acpi_iort.h> 12 #include <linux/atomic.h> 13 #include <linux/crash_dump.h> 14 #include <linux/device.h> 15 #include <linux/dma-direct.h> 16 #include <linux/dma-iommu.h> 17 #include <linux/dma-map-ops.h> 18 #include <linux/gfp.h> 19 #include <linux/huge_mm.h> 20 #include <linux/iommu.h> 21 #include <linux/iova.h> 22 #include <linux/irq.h> 23 #include <linux/list_sort.h> 24 #include <linux/mm.h> 25 #include <linux/mutex.h> 26 #include <linux/pci.h> 27 #include <linux/scatterlist.h> 28 #include <linux/spinlock.h> 29 #include <linux/swiotlb.h> 30 #include <linux/vmalloc.h> 31 32 struct iommu_dma_msi_page { 33 struct list_head list; 34 dma_addr_t iova; 35 phys_addr_t phys; 36 }; 37 38 enum iommu_dma_cookie_type { 39 IOMMU_DMA_IOVA_COOKIE, 40 IOMMU_DMA_MSI_COOKIE, 41 }; 42 43 struct iommu_dma_cookie { 44 enum iommu_dma_cookie_type type; 45 union { 46 /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ 47 struct { 48 struct iova_domain iovad; 49 50 struct iova_fq __percpu *fq; /* Flush queue */ 51 /* Number of TLB flushes that have been started */ 52 atomic64_t fq_flush_start_cnt; 53 /* Number of TLB flushes that have been finished */ 54 atomic64_t fq_flush_finish_cnt; 55 /* Timer to regularily empty the flush queues */ 56 struct timer_list fq_timer; 57 /* 1 when timer is active, 0 when not */ 58 atomic_t fq_timer_on; 59 }; 60 /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ 61 dma_addr_t msi_iova; 62 }; 63 struct list_head msi_page_list; 64 65 /* Domain for flush queue callback; NULL if flush queue not in use */ 66 struct iommu_domain *fq_domain; 67 }; 68 69 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); 70 bool iommu_dma_forcedac __read_mostly; 71 72 static int __init iommu_dma_forcedac_setup(char *str) 73 { 74 int ret = kstrtobool(str, &iommu_dma_forcedac); 75 76 if (!ret && iommu_dma_forcedac) 77 pr_info("Forcing DAC for PCI devices\n"); 78 return ret; 79 } 80 early_param("iommu.forcedac", iommu_dma_forcedac_setup); 81 82 /* Number of entries per flush queue */ 83 #define IOVA_FQ_SIZE 256 84 85 /* Timeout (in ms) after which entries are flushed from the queue */ 86 #define IOVA_FQ_TIMEOUT 10 87 88 /* Flush queue entry for deferred flushing */ 89 struct iova_fq_entry { 90 unsigned long iova_pfn; 91 unsigned long pages; 92 struct list_head freelist; 93 u64 counter; /* Flush counter when this entry was added */ 94 }; 95 96 /* Per-CPU flush queue structure */ 97 struct iova_fq { 98 struct iova_fq_entry entries[IOVA_FQ_SIZE]; 99 unsigned int head, tail; 100 spinlock_t lock; 101 }; 102 103 #define fq_ring_for_each(i, fq) \ 104 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 105 106 static inline bool fq_full(struct iova_fq *fq) 107 { 108 assert_spin_locked(&fq->lock); 109 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 110 } 111 112 static inline unsigned int fq_ring_add(struct iova_fq *fq) 113 { 114 unsigned int idx = fq->tail; 115 116 assert_spin_locked(&fq->lock); 117 118 fq->tail = (idx + 1) % IOVA_FQ_SIZE; 119 120 return idx; 121 } 122 123 static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) 124 { 125 u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); 126 unsigned int idx; 127 128 assert_spin_locked(&fq->lock); 129 130 fq_ring_for_each(idx, fq) { 131 132 if (fq->entries[idx].counter >= counter) 133 break; 134 135 put_pages_list(&fq->entries[idx].freelist); 136 free_iova_fast(&cookie->iovad, 137 fq->entries[idx].iova_pfn, 138 fq->entries[idx].pages); 139 140 fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 141 } 142 } 143 144 static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) 145 { 146 atomic64_inc(&cookie->fq_flush_start_cnt); 147 cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); 148 atomic64_inc(&cookie->fq_flush_finish_cnt); 149 } 150 151 static void fq_flush_timeout(struct timer_list *t) 152 { 153 struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); 154 int cpu; 155 156 atomic_set(&cookie->fq_timer_on, 0); 157 fq_flush_iotlb(cookie); 158 159 for_each_possible_cpu(cpu) { 160 unsigned long flags; 161 struct iova_fq *fq; 162 163 fq = per_cpu_ptr(cookie->fq, cpu); 164 spin_lock_irqsave(&fq->lock, flags); 165 fq_ring_free(cookie, fq); 166 spin_unlock_irqrestore(&fq->lock, flags); 167 } 168 } 169 170 static void queue_iova(struct iommu_dma_cookie *cookie, 171 unsigned long pfn, unsigned long pages, 172 struct list_head *freelist) 173 { 174 struct iova_fq *fq; 175 unsigned long flags; 176 unsigned int idx; 177 178 /* 179 * Order against the IOMMU driver's pagetable update from unmapping 180 * @pte, to guarantee that fq_flush_iotlb() observes that if called 181 * from a different CPU before we release the lock below. Full barrier 182 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially 183 * written fq state here. 184 */ 185 smp_mb(); 186 187 fq = raw_cpu_ptr(cookie->fq); 188 spin_lock_irqsave(&fq->lock, flags); 189 190 /* 191 * First remove all entries from the flush queue that have already been 192 * flushed out on another CPU. This makes the fq_full() check below less 193 * likely to be true. 194 */ 195 fq_ring_free(cookie, fq); 196 197 if (fq_full(fq)) { 198 fq_flush_iotlb(cookie); 199 fq_ring_free(cookie, fq); 200 } 201 202 idx = fq_ring_add(fq); 203 204 fq->entries[idx].iova_pfn = pfn; 205 fq->entries[idx].pages = pages; 206 fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); 207 list_splice(freelist, &fq->entries[idx].freelist); 208 209 spin_unlock_irqrestore(&fq->lock, flags); 210 211 /* Avoid false sharing as much as possible. */ 212 if (!atomic_read(&cookie->fq_timer_on) && 213 !atomic_xchg(&cookie->fq_timer_on, 1)) 214 mod_timer(&cookie->fq_timer, 215 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 216 } 217 218 static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) 219 { 220 int cpu, idx; 221 222 if (!cookie->fq) 223 return; 224 225 del_timer_sync(&cookie->fq_timer); 226 /* The IOVAs will be torn down separately, so just free our queued pages */ 227 for_each_possible_cpu(cpu) { 228 struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); 229 230 fq_ring_for_each(idx, fq) 231 put_pages_list(&fq->entries[idx].freelist); 232 } 233 234 free_percpu(cookie->fq); 235 } 236 237 /* sysfs updates are serialised by the mutex of the group owning @domain */ 238 int iommu_dma_init_fq(struct iommu_domain *domain) 239 { 240 struct iommu_dma_cookie *cookie = domain->iova_cookie; 241 struct iova_fq __percpu *queue; 242 int i, cpu; 243 244 if (cookie->fq_domain) 245 return 0; 246 247 atomic64_set(&cookie->fq_flush_start_cnt, 0); 248 atomic64_set(&cookie->fq_flush_finish_cnt, 0); 249 250 queue = alloc_percpu(struct iova_fq); 251 if (!queue) { 252 pr_warn("iova flush queue initialization failed\n"); 253 return -ENOMEM; 254 } 255 256 for_each_possible_cpu(cpu) { 257 struct iova_fq *fq = per_cpu_ptr(queue, cpu); 258 259 fq->head = 0; 260 fq->tail = 0; 261 262 spin_lock_init(&fq->lock); 263 264 for (i = 0; i < IOVA_FQ_SIZE; i++) 265 INIT_LIST_HEAD(&fq->entries[i].freelist); 266 } 267 268 cookie->fq = queue; 269 270 timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); 271 atomic_set(&cookie->fq_timer_on, 0); 272 /* 273 * Prevent incomplete fq state being observable. Pairs with path from 274 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() 275 */ 276 smp_wmb(); 277 WRITE_ONCE(cookie->fq_domain, domain); 278 return 0; 279 } 280 281 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) 282 { 283 if (cookie->type == IOMMU_DMA_IOVA_COOKIE) 284 return cookie->iovad.granule; 285 return PAGE_SIZE; 286 } 287 288 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) 289 { 290 struct iommu_dma_cookie *cookie; 291 292 cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); 293 if (cookie) { 294 INIT_LIST_HEAD(&cookie->msi_page_list); 295 cookie->type = type; 296 } 297 return cookie; 298 } 299 300 /** 301 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain 302 * @domain: IOMMU domain to prepare for DMA-API usage 303 */ 304 int iommu_get_dma_cookie(struct iommu_domain *domain) 305 { 306 if (domain->iova_cookie) 307 return -EEXIST; 308 309 domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); 310 if (!domain->iova_cookie) 311 return -ENOMEM; 312 313 return 0; 314 } 315 316 /** 317 * iommu_get_msi_cookie - Acquire just MSI remapping resources 318 * @domain: IOMMU domain to prepare 319 * @base: Start address of IOVA region for MSI mappings 320 * 321 * Users who manage their own IOVA allocation and do not want DMA API support, 322 * but would still like to take advantage of automatic MSI remapping, can use 323 * this to initialise their own domain appropriately. Users should reserve a 324 * contiguous IOVA region, starting at @base, large enough to accommodate the 325 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address 326 * used by the devices attached to @domain. 327 */ 328 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) 329 { 330 struct iommu_dma_cookie *cookie; 331 332 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 333 return -EINVAL; 334 335 if (domain->iova_cookie) 336 return -EEXIST; 337 338 cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); 339 if (!cookie) 340 return -ENOMEM; 341 342 cookie->msi_iova = base; 343 domain->iova_cookie = cookie; 344 return 0; 345 } 346 EXPORT_SYMBOL(iommu_get_msi_cookie); 347 348 /** 349 * iommu_put_dma_cookie - Release a domain's DMA mapping resources 350 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or 351 * iommu_get_msi_cookie() 352 */ 353 void iommu_put_dma_cookie(struct iommu_domain *domain) 354 { 355 struct iommu_dma_cookie *cookie = domain->iova_cookie; 356 struct iommu_dma_msi_page *msi, *tmp; 357 358 if (!cookie) 359 return; 360 361 if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { 362 iommu_dma_free_fq(cookie); 363 put_iova_domain(&cookie->iovad); 364 } 365 366 list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { 367 list_del(&msi->list); 368 kfree(msi); 369 } 370 kfree(cookie); 371 domain->iova_cookie = NULL; 372 } 373 374 /** 375 * iommu_dma_get_resv_regions - Reserved region driver helper 376 * @dev: Device from iommu_get_resv_regions() 377 * @list: Reserved region list from iommu_get_resv_regions() 378 * 379 * IOMMU drivers can use this to implement their .get_resv_regions callback 380 * for general non-IOMMU-specific reservations. Currently, this covers GICv3 381 * ITS region reservation on ACPI based ARM platforms that may require HW MSI 382 * reservation. 383 */ 384 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) 385 { 386 387 if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) 388 iort_iommu_msi_get_resv_regions(dev, list); 389 390 } 391 EXPORT_SYMBOL(iommu_dma_get_resv_regions); 392 393 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, 394 phys_addr_t start, phys_addr_t end) 395 { 396 struct iova_domain *iovad = &cookie->iovad; 397 struct iommu_dma_msi_page *msi_page; 398 int i, num_pages; 399 400 start -= iova_offset(iovad, start); 401 num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); 402 403 for (i = 0; i < num_pages; i++) { 404 msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); 405 if (!msi_page) 406 return -ENOMEM; 407 408 msi_page->phys = start; 409 msi_page->iova = start; 410 INIT_LIST_HEAD(&msi_page->list); 411 list_add(&msi_page->list, &cookie->msi_page_list); 412 start += iovad->granule; 413 } 414 415 return 0; 416 } 417 418 static int iommu_dma_ranges_sort(void *priv, const struct list_head *a, 419 const struct list_head *b) 420 { 421 struct resource_entry *res_a = list_entry(a, typeof(*res_a), node); 422 struct resource_entry *res_b = list_entry(b, typeof(*res_b), node); 423 424 return res_a->res->start > res_b->res->start; 425 } 426 427 static int iova_reserve_pci_windows(struct pci_dev *dev, 428 struct iova_domain *iovad) 429 { 430 struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); 431 struct resource_entry *window; 432 unsigned long lo, hi; 433 phys_addr_t start = 0, end; 434 435 resource_list_for_each_entry(window, &bridge->windows) { 436 if (resource_type(window->res) != IORESOURCE_MEM) 437 continue; 438 439 lo = iova_pfn(iovad, window->res->start - window->offset); 440 hi = iova_pfn(iovad, window->res->end - window->offset); 441 reserve_iova(iovad, lo, hi); 442 } 443 444 /* Get reserved DMA windows from host bridge */ 445 list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort); 446 resource_list_for_each_entry(window, &bridge->dma_ranges) { 447 end = window->res->start - window->offset; 448 resv_iova: 449 if (end > start) { 450 lo = iova_pfn(iovad, start); 451 hi = iova_pfn(iovad, end); 452 reserve_iova(iovad, lo, hi); 453 } else if (end < start) { 454 /* DMA ranges should be non-overlapping */ 455 dev_err(&dev->dev, 456 "Failed to reserve IOVA [%pa-%pa]\n", 457 &start, &end); 458 return -EINVAL; 459 } 460 461 start = window->res->end - window->offset + 1; 462 /* If window is last entry */ 463 if (window->node.next == &bridge->dma_ranges && 464 end != ~(phys_addr_t)0) { 465 end = ~(phys_addr_t)0; 466 goto resv_iova; 467 } 468 } 469 470 return 0; 471 } 472 473 static int iova_reserve_iommu_regions(struct device *dev, 474 struct iommu_domain *domain) 475 { 476 struct iommu_dma_cookie *cookie = domain->iova_cookie; 477 struct iova_domain *iovad = &cookie->iovad; 478 struct iommu_resv_region *region; 479 LIST_HEAD(resv_regions); 480 int ret = 0; 481 482 if (dev_is_pci(dev)) { 483 ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad); 484 if (ret) 485 return ret; 486 } 487 488 iommu_get_resv_regions(dev, &resv_regions); 489 list_for_each_entry(region, &resv_regions, list) { 490 unsigned long lo, hi; 491 492 /* We ARE the software that manages these! */ 493 if (region->type == IOMMU_RESV_SW_MSI) 494 continue; 495 496 lo = iova_pfn(iovad, region->start); 497 hi = iova_pfn(iovad, region->start + region->length - 1); 498 reserve_iova(iovad, lo, hi); 499 500 if (region->type == IOMMU_RESV_MSI) 501 ret = cookie_init_hw_msi_region(cookie, region->start, 502 region->start + region->length); 503 if (ret) 504 break; 505 } 506 iommu_put_resv_regions(dev, &resv_regions); 507 508 return ret; 509 } 510 511 static bool dev_is_untrusted(struct device *dev) 512 { 513 return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; 514 } 515 516 static bool dev_use_swiotlb(struct device *dev) 517 { 518 return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev); 519 } 520 521 /** 522 * iommu_dma_init_domain - Initialise a DMA mapping domain 523 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 524 * @base: IOVA at which the mappable address space starts 525 * @limit: Last address of the IOVA space 526 * @dev: Device the domain is being initialised for 527 * 528 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to 529 * avoid rounding surprises. If necessary, we reserve the page at address 0 530 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but 531 * any change which could make prior IOVAs invalid will fail. 532 */ 533 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, 534 dma_addr_t limit, struct device *dev) 535 { 536 struct iommu_dma_cookie *cookie = domain->iova_cookie; 537 unsigned long order, base_pfn; 538 struct iova_domain *iovad; 539 int ret; 540 541 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) 542 return -EINVAL; 543 544 iovad = &cookie->iovad; 545 546 /* Use the smallest supported page size for IOVA granularity */ 547 order = __ffs(domain->pgsize_bitmap); 548 base_pfn = max_t(unsigned long, 1, base >> order); 549 550 /* Check the domain allows at least some access to the device... */ 551 if (domain->geometry.force_aperture) { 552 if (base > domain->geometry.aperture_end || 553 limit < domain->geometry.aperture_start) { 554 pr_warn("specified DMA range outside IOMMU capability\n"); 555 return -EFAULT; 556 } 557 /* ...then finally give it a kicking to make sure it fits */ 558 base_pfn = max_t(unsigned long, base_pfn, 559 domain->geometry.aperture_start >> order); 560 } 561 562 /* start_pfn is always nonzero for an already-initialised domain */ 563 if (iovad->start_pfn) { 564 if (1UL << order != iovad->granule || 565 base_pfn != iovad->start_pfn) { 566 pr_warn("Incompatible range for DMA domain\n"); 567 return -EFAULT; 568 } 569 570 return 0; 571 } 572 573 init_iova_domain(iovad, 1UL << order, base_pfn); 574 ret = iova_domain_init_rcaches(iovad); 575 if (ret) 576 return ret; 577 578 /* If the FQ fails we can simply fall back to strict mode */ 579 if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) 580 domain->type = IOMMU_DOMAIN_DMA; 581 582 return iova_reserve_iommu_regions(dev, domain); 583 } 584 585 /** 586 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API 587 * page flags. 588 * @dir: Direction of DMA transfer 589 * @coherent: Is the DMA master cache-coherent? 590 * @attrs: DMA attributes for the mapping 591 * 592 * Return: corresponding IOMMU API page protection flags 593 */ 594 static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, 595 unsigned long attrs) 596 { 597 int prot = coherent ? IOMMU_CACHE : 0; 598 599 if (attrs & DMA_ATTR_PRIVILEGED) 600 prot |= IOMMU_PRIV; 601 602 switch (dir) { 603 case DMA_BIDIRECTIONAL: 604 return prot | IOMMU_READ | IOMMU_WRITE; 605 case DMA_TO_DEVICE: 606 return prot | IOMMU_READ; 607 case DMA_FROM_DEVICE: 608 return prot | IOMMU_WRITE; 609 default: 610 return 0; 611 } 612 } 613 614 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, 615 size_t size, u64 dma_limit, struct device *dev) 616 { 617 struct iommu_dma_cookie *cookie = domain->iova_cookie; 618 struct iova_domain *iovad = &cookie->iovad; 619 unsigned long shift, iova_len, iova = 0; 620 621 if (cookie->type == IOMMU_DMA_MSI_COOKIE) { 622 cookie->msi_iova += size; 623 return cookie->msi_iova - size; 624 } 625 626 shift = iova_shift(iovad); 627 iova_len = size >> shift; 628 629 dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); 630 631 if (domain->geometry.force_aperture) 632 dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); 633 634 /* Try to get PCI devices a SAC address */ 635 if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev)) 636 iova = alloc_iova_fast(iovad, iova_len, 637 DMA_BIT_MASK(32) >> shift, false); 638 639 if (!iova) 640 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, 641 true); 642 643 return (dma_addr_t)iova << shift; 644 } 645 646 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, 647 dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) 648 { 649 struct iova_domain *iovad = &cookie->iovad; 650 651 /* The MSI case is only ever cleaning up its most recent allocation */ 652 if (cookie->type == IOMMU_DMA_MSI_COOKIE) 653 cookie->msi_iova -= size; 654 else if (gather && gather->queued) 655 queue_iova(cookie, iova_pfn(iovad, iova), 656 size >> iova_shift(iovad), 657 &gather->freelist); 658 else 659 free_iova_fast(iovad, iova_pfn(iovad, iova), 660 size >> iova_shift(iovad)); 661 } 662 663 static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, 664 size_t size) 665 { 666 struct iommu_domain *domain = iommu_get_dma_domain(dev); 667 struct iommu_dma_cookie *cookie = domain->iova_cookie; 668 struct iova_domain *iovad = &cookie->iovad; 669 size_t iova_off = iova_offset(iovad, dma_addr); 670 struct iommu_iotlb_gather iotlb_gather; 671 size_t unmapped; 672 673 dma_addr -= iova_off; 674 size = iova_align(iovad, size + iova_off); 675 iommu_iotlb_gather_init(&iotlb_gather); 676 iotlb_gather.queued = READ_ONCE(cookie->fq_domain); 677 678 unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); 679 WARN_ON(unmapped != size); 680 681 if (!iotlb_gather.queued) 682 iommu_iotlb_sync(domain, &iotlb_gather); 683 iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); 684 } 685 686 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, 687 size_t size, int prot, u64 dma_mask) 688 { 689 struct iommu_domain *domain = iommu_get_dma_domain(dev); 690 struct iommu_dma_cookie *cookie = domain->iova_cookie; 691 struct iova_domain *iovad = &cookie->iovad; 692 size_t iova_off = iova_offset(iovad, phys); 693 dma_addr_t iova; 694 695 if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 696 iommu_deferred_attach(dev, domain)) 697 return DMA_MAPPING_ERROR; 698 699 size = iova_align(iovad, size + iova_off); 700 701 iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); 702 if (!iova) 703 return DMA_MAPPING_ERROR; 704 705 if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { 706 iommu_dma_free_iova(cookie, iova, size, NULL); 707 return DMA_MAPPING_ERROR; 708 } 709 return iova + iova_off; 710 } 711 712 static void __iommu_dma_free_pages(struct page **pages, int count) 713 { 714 while (count--) 715 __free_page(pages[count]); 716 kvfree(pages); 717 } 718 719 static struct page **__iommu_dma_alloc_pages(struct device *dev, 720 unsigned int count, unsigned long order_mask, gfp_t gfp) 721 { 722 struct page **pages; 723 unsigned int i = 0, nid = dev_to_node(dev); 724 725 order_mask &= (2U << MAX_ORDER) - 1; 726 if (!order_mask) 727 return NULL; 728 729 pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); 730 if (!pages) 731 return NULL; 732 733 /* IOMMU can map any pages, so himem can also be used here */ 734 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 735 736 /* It makes no sense to muck about with huge pages */ 737 gfp &= ~__GFP_COMP; 738 739 while (count) { 740 struct page *page = NULL; 741 unsigned int order_size; 742 743 /* 744 * Higher-order allocations are a convenience rather 745 * than a necessity, hence using __GFP_NORETRY until 746 * falling back to minimum-order allocations. 747 */ 748 for (order_mask &= (2U << __fls(count)) - 1; 749 order_mask; order_mask &= ~order_size) { 750 unsigned int order = __fls(order_mask); 751 gfp_t alloc_flags = gfp; 752 753 order_size = 1U << order; 754 if (order_mask > order_size) 755 alloc_flags |= __GFP_NORETRY; 756 page = alloc_pages_node(nid, alloc_flags, order); 757 if (!page) 758 continue; 759 if (order) 760 split_page(page, order); 761 break; 762 } 763 if (!page) { 764 __iommu_dma_free_pages(pages, i); 765 return NULL; 766 } 767 count -= order_size; 768 while (order_size--) 769 pages[i++] = page++; 770 } 771 return pages; 772 } 773 774 /* 775 * If size is less than PAGE_SIZE, then a full CPU page will be allocated, 776 * but an IOMMU which supports smaller pages might not map the whole thing. 777 */ 778 static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, 779 size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot, 780 unsigned long attrs) 781 { 782 struct iommu_domain *domain = iommu_get_dma_domain(dev); 783 struct iommu_dma_cookie *cookie = domain->iova_cookie; 784 struct iova_domain *iovad = &cookie->iovad; 785 bool coherent = dev_is_dma_coherent(dev); 786 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 787 unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; 788 struct page **pages; 789 dma_addr_t iova; 790 ssize_t ret; 791 792 if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 793 iommu_deferred_attach(dev, domain)) 794 return NULL; 795 796 min_size = alloc_sizes & -alloc_sizes; 797 if (min_size < PAGE_SIZE) { 798 min_size = PAGE_SIZE; 799 alloc_sizes |= PAGE_SIZE; 800 } else { 801 size = ALIGN(size, min_size); 802 } 803 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 804 alloc_sizes = min_size; 805 806 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 807 pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT, 808 gfp); 809 if (!pages) 810 return NULL; 811 812 size = iova_align(iovad, size); 813 iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev); 814 if (!iova) 815 goto out_free_pages; 816 817 if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL)) 818 goto out_free_iova; 819 820 if (!(ioprot & IOMMU_CACHE)) { 821 struct scatterlist *sg; 822 int i; 823 824 for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) 825 arch_dma_prep_coherent(sg_page(sg), sg->length); 826 } 827 828 ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot); 829 if (ret < 0 || ret < size) 830 goto out_free_sg; 831 832 sgt->sgl->dma_address = iova; 833 sgt->sgl->dma_length = size; 834 return pages; 835 836 out_free_sg: 837 sg_free_table(sgt); 838 out_free_iova: 839 iommu_dma_free_iova(cookie, iova, size, NULL); 840 out_free_pages: 841 __iommu_dma_free_pages(pages, count); 842 return NULL; 843 } 844 845 static void *iommu_dma_alloc_remap(struct device *dev, size_t size, 846 dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, 847 unsigned long attrs) 848 { 849 struct page **pages; 850 struct sg_table sgt; 851 void *vaddr; 852 853 pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot, 854 attrs); 855 if (!pages) 856 return NULL; 857 *dma_handle = sgt.sgl->dma_address; 858 sg_free_table(&sgt); 859 vaddr = dma_common_pages_remap(pages, size, prot, 860 __builtin_return_address(0)); 861 if (!vaddr) 862 goto out_unmap; 863 return vaddr; 864 865 out_unmap: 866 __iommu_dma_unmap(dev, *dma_handle, size); 867 __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 868 return NULL; 869 } 870 871 static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, 872 size_t size, enum dma_data_direction dir, gfp_t gfp, 873 unsigned long attrs) 874 { 875 struct dma_sgt_handle *sh; 876 877 sh = kmalloc(sizeof(*sh), gfp); 878 if (!sh) 879 return NULL; 880 881 sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, 882 PAGE_KERNEL, attrs); 883 if (!sh->pages) { 884 kfree(sh); 885 return NULL; 886 } 887 return &sh->sgt; 888 } 889 890 static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 891 struct sg_table *sgt, enum dma_data_direction dir) 892 { 893 struct dma_sgt_handle *sh = sgt_handle(sgt); 894 895 __iommu_dma_unmap(dev, sgt->sgl->dma_address, size); 896 __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 897 sg_free_table(&sh->sgt); 898 kfree(sh); 899 } 900 901 static void iommu_dma_sync_single_for_cpu(struct device *dev, 902 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 903 { 904 phys_addr_t phys; 905 906 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev)) 907 return; 908 909 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 910 if (!dev_is_dma_coherent(dev)) 911 arch_sync_dma_for_cpu(phys, size, dir); 912 913 if (is_swiotlb_buffer(dev, phys)) 914 swiotlb_sync_single_for_cpu(dev, phys, size, dir); 915 } 916 917 static void iommu_dma_sync_single_for_device(struct device *dev, 918 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 919 { 920 phys_addr_t phys; 921 922 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev)) 923 return; 924 925 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 926 if (is_swiotlb_buffer(dev, phys)) 927 swiotlb_sync_single_for_device(dev, phys, size, dir); 928 929 if (!dev_is_dma_coherent(dev)) 930 arch_sync_dma_for_device(phys, size, dir); 931 } 932 933 static void iommu_dma_sync_sg_for_cpu(struct device *dev, 934 struct scatterlist *sgl, int nelems, 935 enum dma_data_direction dir) 936 { 937 struct scatterlist *sg; 938 int i; 939 940 if (dev_use_swiotlb(dev)) 941 for_each_sg(sgl, sg, nelems, i) 942 iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), 943 sg->length, dir); 944 else if (!dev_is_dma_coherent(dev)) 945 for_each_sg(sgl, sg, nelems, i) 946 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 947 } 948 949 static void iommu_dma_sync_sg_for_device(struct device *dev, 950 struct scatterlist *sgl, int nelems, 951 enum dma_data_direction dir) 952 { 953 struct scatterlist *sg; 954 int i; 955 956 if (dev_use_swiotlb(dev)) 957 for_each_sg(sgl, sg, nelems, i) 958 iommu_dma_sync_single_for_device(dev, 959 sg_dma_address(sg), 960 sg->length, dir); 961 else if (!dev_is_dma_coherent(dev)) 962 for_each_sg(sgl, sg, nelems, i) 963 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); 964 } 965 966 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 967 unsigned long offset, size_t size, enum dma_data_direction dir, 968 unsigned long attrs) 969 { 970 phys_addr_t phys = page_to_phys(page) + offset; 971 bool coherent = dev_is_dma_coherent(dev); 972 int prot = dma_info_to_prot(dir, coherent, attrs); 973 struct iommu_domain *domain = iommu_get_dma_domain(dev); 974 struct iommu_dma_cookie *cookie = domain->iova_cookie; 975 struct iova_domain *iovad = &cookie->iovad; 976 dma_addr_t iova, dma_mask = dma_get_mask(dev); 977 978 /* 979 * If both the physical buffer start address and size are 980 * page aligned, we don't need to use a bounce page. 981 */ 982 if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) { 983 void *padding_start; 984 size_t padding_size, aligned_size; 985 986 if (!is_swiotlb_active(dev)) { 987 dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); 988 return DMA_MAPPING_ERROR; 989 } 990 991 aligned_size = iova_align(iovad, size); 992 phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, 993 iova_mask(iovad), dir, attrs); 994 995 if (phys == DMA_MAPPING_ERROR) 996 return DMA_MAPPING_ERROR; 997 998 /* Cleanup the padding area. */ 999 padding_start = phys_to_virt(phys); 1000 padding_size = aligned_size; 1001 1002 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 1003 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { 1004 padding_start += size; 1005 padding_size -= size; 1006 } 1007 1008 memset(padding_start, 0, padding_size); 1009 } 1010 1011 if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1012 arch_sync_dma_for_device(phys, size, dir); 1013 1014 iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); 1015 if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) 1016 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 1017 return iova; 1018 } 1019 1020 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 1021 size_t size, enum dma_data_direction dir, unsigned long attrs) 1022 { 1023 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1024 phys_addr_t phys; 1025 1026 phys = iommu_iova_to_phys(domain, dma_handle); 1027 if (WARN_ON(!phys)) 1028 return; 1029 1030 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) 1031 arch_sync_dma_for_cpu(phys, size, dir); 1032 1033 __iommu_dma_unmap(dev, dma_handle, size); 1034 1035 if (unlikely(is_swiotlb_buffer(dev, phys))) 1036 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 1037 } 1038 1039 /* 1040 * Prepare a successfully-mapped scatterlist to give back to the caller. 1041 * 1042 * At this point the segments are already laid out by iommu_dma_map_sg() to 1043 * avoid individually crossing any boundaries, so we merely need to check a 1044 * segment's start address to avoid concatenating across one. 1045 */ 1046 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, 1047 dma_addr_t dma_addr) 1048 { 1049 struct scatterlist *s, *cur = sg; 1050 unsigned long seg_mask = dma_get_seg_boundary(dev); 1051 unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); 1052 int i, count = 0; 1053 1054 for_each_sg(sg, s, nents, i) { 1055 /* Restore this segment's original unaligned fields first */ 1056 unsigned int s_iova_off = sg_dma_address(s); 1057 unsigned int s_length = sg_dma_len(s); 1058 unsigned int s_iova_len = s->length; 1059 1060 s->offset += s_iova_off; 1061 s->length = s_length; 1062 sg_dma_address(s) = DMA_MAPPING_ERROR; 1063 sg_dma_len(s) = 0; 1064 1065 /* 1066 * Now fill in the real DMA data. If... 1067 * - there is a valid output segment to append to 1068 * - and this segment starts on an IOVA page boundary 1069 * - but doesn't fall at a segment boundary 1070 * - and wouldn't make the resulting output segment too long 1071 */ 1072 if (cur_len && !s_iova_off && (dma_addr & seg_mask) && 1073 (max_len - cur_len >= s_length)) { 1074 /* ...then concatenate it with the previous one */ 1075 cur_len += s_length; 1076 } else { 1077 /* Otherwise start the next output segment */ 1078 if (i > 0) 1079 cur = sg_next(cur); 1080 cur_len = s_length; 1081 count++; 1082 1083 sg_dma_address(cur) = dma_addr + s_iova_off; 1084 } 1085 1086 sg_dma_len(cur) = cur_len; 1087 dma_addr += s_iova_len; 1088 1089 if (s_length + s_iova_off < s_iova_len) 1090 cur_len = 0; 1091 } 1092 return count; 1093 } 1094 1095 /* 1096 * If mapping failed, then just restore the original list, 1097 * but making sure the DMA fields are invalidated. 1098 */ 1099 static void __invalidate_sg(struct scatterlist *sg, int nents) 1100 { 1101 struct scatterlist *s; 1102 int i; 1103 1104 for_each_sg(sg, s, nents, i) { 1105 if (sg_dma_address(s) != DMA_MAPPING_ERROR) 1106 s->offset += sg_dma_address(s); 1107 if (sg_dma_len(s)) 1108 s->length = sg_dma_len(s); 1109 sg_dma_address(s) = DMA_MAPPING_ERROR; 1110 sg_dma_len(s) = 0; 1111 } 1112 } 1113 1114 static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg, 1115 int nents, enum dma_data_direction dir, unsigned long attrs) 1116 { 1117 struct scatterlist *s; 1118 int i; 1119 1120 for_each_sg(sg, s, nents, i) 1121 iommu_dma_unmap_page(dev, sg_dma_address(s), 1122 sg_dma_len(s), dir, attrs); 1123 } 1124 1125 static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg, 1126 int nents, enum dma_data_direction dir, unsigned long attrs) 1127 { 1128 struct scatterlist *s; 1129 int i; 1130 1131 for_each_sg(sg, s, nents, i) { 1132 sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s), 1133 s->offset, s->length, dir, attrs); 1134 if (sg_dma_address(s) == DMA_MAPPING_ERROR) 1135 goto out_unmap; 1136 sg_dma_len(s) = s->length; 1137 } 1138 1139 return nents; 1140 1141 out_unmap: 1142 iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 1143 return -EIO; 1144 } 1145 1146 /* 1147 * The DMA API client is passing in a scatterlist which could describe 1148 * any old buffer layout, but the IOMMU API requires everything to be 1149 * aligned to IOMMU pages. Hence the need for this complicated bit of 1150 * impedance-matching, to be able to hand off a suitably-aligned list, 1151 * but still preserve the original offsets and sizes for the caller. 1152 */ 1153 static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 1154 int nents, enum dma_data_direction dir, unsigned long attrs) 1155 { 1156 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1157 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1158 struct iova_domain *iovad = &cookie->iovad; 1159 struct scatterlist *s, *prev = NULL; 1160 int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); 1161 dma_addr_t iova; 1162 size_t iova_len = 0; 1163 unsigned long mask = dma_get_seg_boundary(dev); 1164 ssize_t ret; 1165 int i; 1166 1167 if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { 1168 ret = iommu_deferred_attach(dev, domain); 1169 if (ret) 1170 goto out; 1171 } 1172 1173 if (dev_use_swiotlb(dev)) 1174 return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs); 1175 1176 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1177 iommu_dma_sync_sg_for_device(dev, sg, nents, dir); 1178 1179 /* 1180 * Work out how much IOVA space we need, and align the segments to 1181 * IOVA granules for the IOMMU driver to handle. With some clever 1182 * trickery we can modify the list in-place, but reversibly, by 1183 * stashing the unaligned parts in the as-yet-unused DMA fields. 1184 */ 1185 for_each_sg(sg, s, nents, i) { 1186 size_t s_iova_off = iova_offset(iovad, s->offset); 1187 size_t s_length = s->length; 1188 size_t pad_len = (mask - iova_len + 1) & mask; 1189 1190 sg_dma_address(s) = s_iova_off; 1191 sg_dma_len(s) = s_length; 1192 s->offset -= s_iova_off; 1193 s_length = iova_align(iovad, s_length + s_iova_off); 1194 s->length = s_length; 1195 1196 /* 1197 * Due to the alignment of our single IOVA allocation, we can 1198 * depend on these assumptions about the segment boundary mask: 1199 * - If mask size >= IOVA size, then the IOVA range cannot 1200 * possibly fall across a boundary, so we don't care. 1201 * - If mask size < IOVA size, then the IOVA range must start 1202 * exactly on a boundary, therefore we can lay things out 1203 * based purely on segment lengths without needing to know 1204 * the actual addresses beforehand. 1205 * - The mask must be a power of 2, so pad_len == 0 if 1206 * iova_len == 0, thus we cannot dereference prev the first 1207 * time through here (i.e. before it has a meaningful value). 1208 */ 1209 if (pad_len && pad_len < s_length - 1) { 1210 prev->length += pad_len; 1211 iova_len += pad_len; 1212 } 1213 1214 iova_len += s_length; 1215 prev = s; 1216 } 1217 1218 iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); 1219 if (!iova) { 1220 ret = -ENOMEM; 1221 goto out_restore_sg; 1222 } 1223 1224 /* 1225 * We'll leave any physical concatenation to the IOMMU driver's 1226 * implementation - it knows better than we do. 1227 */ 1228 ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot); 1229 if (ret < 0 || ret < iova_len) 1230 goto out_free_iova; 1231 1232 return __finalise_sg(dev, sg, nents, iova); 1233 1234 out_free_iova: 1235 iommu_dma_free_iova(cookie, iova, iova_len, NULL); 1236 out_restore_sg: 1237 __invalidate_sg(sg, nents); 1238 out: 1239 if (ret != -ENOMEM) 1240 return -EINVAL; 1241 return ret; 1242 } 1243 1244 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 1245 int nents, enum dma_data_direction dir, unsigned long attrs) 1246 { 1247 dma_addr_t start, end; 1248 struct scatterlist *tmp; 1249 int i; 1250 1251 if (dev_use_swiotlb(dev)) { 1252 iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs); 1253 return; 1254 } 1255 1256 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1257 iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); 1258 1259 /* 1260 * The scatterlist segments are mapped into a single 1261 * contiguous IOVA allocation, so this is incredibly easy. 1262 */ 1263 start = sg_dma_address(sg); 1264 for_each_sg(sg_next(sg), tmp, nents - 1, i) { 1265 if (sg_dma_len(tmp) == 0) 1266 break; 1267 sg = tmp; 1268 } 1269 end = sg_dma_address(sg) + sg_dma_len(sg); 1270 __iommu_dma_unmap(dev, start, end - start); 1271 } 1272 1273 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 1274 size_t size, enum dma_data_direction dir, unsigned long attrs) 1275 { 1276 return __iommu_dma_map(dev, phys, size, 1277 dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, 1278 dma_get_mask(dev)); 1279 } 1280 1281 static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 1282 size_t size, enum dma_data_direction dir, unsigned long attrs) 1283 { 1284 __iommu_dma_unmap(dev, handle, size); 1285 } 1286 1287 static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) 1288 { 1289 size_t alloc_size = PAGE_ALIGN(size); 1290 int count = alloc_size >> PAGE_SHIFT; 1291 struct page *page = NULL, **pages = NULL; 1292 1293 /* Non-coherent atomic allocation? Easy */ 1294 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1295 dma_free_from_pool(dev, cpu_addr, alloc_size)) 1296 return; 1297 1298 if (is_vmalloc_addr(cpu_addr)) { 1299 /* 1300 * If it the address is remapped, then it's either non-coherent 1301 * or highmem CMA, or an iommu_dma_alloc_remap() construction. 1302 */ 1303 pages = dma_common_find_pages(cpu_addr); 1304 if (!pages) 1305 page = vmalloc_to_page(cpu_addr); 1306 dma_common_free_remap(cpu_addr, alloc_size); 1307 } else { 1308 /* Lowmem means a coherent atomic or CMA allocation */ 1309 page = virt_to_page(cpu_addr); 1310 } 1311 1312 if (pages) 1313 __iommu_dma_free_pages(pages, count); 1314 if (page) 1315 dma_free_contiguous(dev, page, alloc_size); 1316 } 1317 1318 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 1319 dma_addr_t handle, unsigned long attrs) 1320 { 1321 __iommu_dma_unmap(dev, handle, size); 1322 __iommu_dma_free(dev, size, cpu_addr); 1323 } 1324 1325 static void *iommu_dma_alloc_pages(struct device *dev, size_t size, 1326 struct page **pagep, gfp_t gfp, unsigned long attrs) 1327 { 1328 bool coherent = dev_is_dma_coherent(dev); 1329 size_t alloc_size = PAGE_ALIGN(size); 1330 int node = dev_to_node(dev); 1331 struct page *page = NULL; 1332 void *cpu_addr; 1333 1334 page = dma_alloc_contiguous(dev, alloc_size, gfp); 1335 if (!page) 1336 page = alloc_pages_node(node, gfp, get_order(alloc_size)); 1337 if (!page) 1338 return NULL; 1339 1340 if (!coherent || PageHighMem(page)) { 1341 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 1342 1343 cpu_addr = dma_common_contiguous_remap(page, alloc_size, 1344 prot, __builtin_return_address(0)); 1345 if (!cpu_addr) 1346 goto out_free_pages; 1347 1348 if (!coherent) 1349 arch_dma_prep_coherent(page, size); 1350 } else { 1351 cpu_addr = page_address(page); 1352 } 1353 1354 *pagep = page; 1355 memset(cpu_addr, 0, alloc_size); 1356 return cpu_addr; 1357 out_free_pages: 1358 dma_free_contiguous(dev, page, alloc_size); 1359 return NULL; 1360 } 1361 1362 static void *iommu_dma_alloc(struct device *dev, size_t size, 1363 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1364 { 1365 bool coherent = dev_is_dma_coherent(dev); 1366 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 1367 struct page *page = NULL; 1368 void *cpu_addr; 1369 1370 gfp |= __GFP_ZERO; 1371 1372 if (gfpflags_allow_blocking(gfp) && 1373 !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { 1374 return iommu_dma_alloc_remap(dev, size, handle, gfp, 1375 dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); 1376 } 1377 1378 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1379 !gfpflags_allow_blocking(gfp) && !coherent) 1380 page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr, 1381 gfp, NULL); 1382 else 1383 cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); 1384 if (!cpu_addr) 1385 return NULL; 1386 1387 *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot, 1388 dev->coherent_dma_mask); 1389 if (*handle == DMA_MAPPING_ERROR) { 1390 __iommu_dma_free(dev, size, cpu_addr); 1391 return NULL; 1392 } 1393 1394 return cpu_addr; 1395 } 1396 1397 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 1398 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1399 unsigned long attrs) 1400 { 1401 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1402 unsigned long pfn, off = vma->vm_pgoff; 1403 int ret; 1404 1405 vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); 1406 1407 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 1408 return ret; 1409 1410 if (off >= nr_pages || vma_pages(vma) > nr_pages - off) 1411 return -ENXIO; 1412 1413 if (is_vmalloc_addr(cpu_addr)) { 1414 struct page **pages = dma_common_find_pages(cpu_addr); 1415 1416 if (pages) 1417 return vm_map_pages(vma, pages, nr_pages); 1418 pfn = vmalloc_to_pfn(cpu_addr); 1419 } else { 1420 pfn = page_to_pfn(virt_to_page(cpu_addr)); 1421 } 1422 1423 return remap_pfn_range(vma, vma->vm_start, pfn + off, 1424 vma->vm_end - vma->vm_start, 1425 vma->vm_page_prot); 1426 } 1427 1428 static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 1429 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1430 unsigned long attrs) 1431 { 1432 struct page *page; 1433 int ret; 1434 1435 if (is_vmalloc_addr(cpu_addr)) { 1436 struct page **pages = dma_common_find_pages(cpu_addr); 1437 1438 if (pages) { 1439 return sg_alloc_table_from_pages(sgt, pages, 1440 PAGE_ALIGN(size) >> PAGE_SHIFT, 1441 0, size, GFP_KERNEL); 1442 } 1443 1444 page = vmalloc_to_page(cpu_addr); 1445 } else { 1446 page = virt_to_page(cpu_addr); 1447 } 1448 1449 ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 1450 if (!ret) 1451 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 1452 return ret; 1453 } 1454 1455 static unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1456 { 1457 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1458 1459 return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1460 } 1461 1462 static const struct dma_map_ops iommu_dma_ops = { 1463 .alloc = iommu_dma_alloc, 1464 .free = iommu_dma_free, 1465 .alloc_pages = dma_common_alloc_pages, 1466 .free_pages = dma_common_free_pages, 1467 .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, 1468 .free_noncontiguous = iommu_dma_free_noncontiguous, 1469 .mmap = iommu_dma_mmap, 1470 .get_sgtable = iommu_dma_get_sgtable, 1471 .map_page = iommu_dma_map_page, 1472 .unmap_page = iommu_dma_unmap_page, 1473 .map_sg = iommu_dma_map_sg, 1474 .unmap_sg = iommu_dma_unmap_sg, 1475 .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, 1476 .sync_single_for_device = iommu_dma_sync_single_for_device, 1477 .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, 1478 .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1479 .map_resource = iommu_dma_map_resource, 1480 .unmap_resource = iommu_dma_unmap_resource, 1481 .get_merge_boundary = iommu_dma_get_merge_boundary, 1482 }; 1483 1484 /* 1485 * The IOMMU core code allocates the default DMA domain, which the underlying 1486 * IOMMU driver needs to support via the dma-iommu layer. 1487 */ 1488 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) 1489 { 1490 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1491 1492 if (!domain) 1493 goto out_err; 1494 1495 /* 1496 * The IOMMU core code allocates the default DMA domain, which the 1497 * underlying IOMMU driver needs to support via the dma-iommu layer. 1498 */ 1499 if (iommu_is_dma_domain(domain)) { 1500 if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) 1501 goto out_err; 1502 dev->dma_ops = &iommu_dma_ops; 1503 } 1504 1505 return; 1506 out_err: 1507 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 1508 dev_name(dev)); 1509 } 1510 EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); 1511 1512 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, 1513 phys_addr_t msi_addr, struct iommu_domain *domain) 1514 { 1515 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1516 struct iommu_dma_msi_page *msi_page; 1517 dma_addr_t iova; 1518 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 1519 size_t size = cookie_msi_granule(cookie); 1520 1521 msi_addr &= ~(phys_addr_t)(size - 1); 1522 list_for_each_entry(msi_page, &cookie->msi_page_list, list) 1523 if (msi_page->phys == msi_addr) 1524 return msi_page; 1525 1526 msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL); 1527 if (!msi_page) 1528 return NULL; 1529 1530 iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); 1531 if (!iova) 1532 goto out_free_page; 1533 1534 if (iommu_map(domain, iova, msi_addr, size, prot)) 1535 goto out_free_iova; 1536 1537 INIT_LIST_HEAD(&msi_page->list); 1538 msi_page->phys = msi_addr; 1539 msi_page->iova = iova; 1540 list_add(&msi_page->list, &cookie->msi_page_list); 1541 return msi_page; 1542 1543 out_free_iova: 1544 iommu_dma_free_iova(cookie, iova, size, NULL); 1545 out_free_page: 1546 kfree(msi_page); 1547 return NULL; 1548 } 1549 1550 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 1551 { 1552 struct device *dev = msi_desc_to_dev(desc); 1553 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1554 struct iommu_dma_msi_page *msi_page; 1555 static DEFINE_MUTEX(msi_prepare_lock); /* see below */ 1556 1557 if (!domain || !domain->iova_cookie) { 1558 desc->iommu_cookie = NULL; 1559 return 0; 1560 } 1561 1562 /* 1563 * In fact the whole prepare operation should already be serialised by 1564 * irq_domain_mutex further up the callchain, but that's pretty subtle 1565 * on its own, so consider this locking as failsafe documentation... 1566 */ 1567 mutex_lock(&msi_prepare_lock); 1568 msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); 1569 mutex_unlock(&msi_prepare_lock); 1570 1571 msi_desc_set_iommu_cookie(desc, msi_page); 1572 1573 if (!msi_page) 1574 return -ENOMEM; 1575 return 0; 1576 } 1577 1578 void iommu_dma_compose_msi_msg(struct msi_desc *desc, 1579 struct msi_msg *msg) 1580 { 1581 struct device *dev = msi_desc_to_dev(desc); 1582 const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1583 const struct iommu_dma_msi_page *msi_page; 1584 1585 msi_page = msi_desc_get_iommu_cookie(desc); 1586 1587 if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) 1588 return; 1589 1590 msg->address_hi = upper_32_bits(msi_page->iova); 1591 msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; 1592 msg->address_lo += lower_32_bits(msi_page->iova); 1593 } 1594 1595 static int iommu_dma_init(void) 1596 { 1597 if (is_kdump_kernel()) 1598 static_branch_enable(&iommu_deferred_attach_enabled); 1599 1600 return iova_cache_get(); 1601 } 1602 arch_initcall(iommu_dma_init); 1603