1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/arch/arm/mm/dma-mapping.c 4 * 5 * Copyright (C) 2000-2004 Russell King 6 * 7 * DMA uncached mapping support. 8 */ 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/genalloc.h> 12 #include <linux/gfp.h> 13 #include <linux/errno.h> 14 #include <linux/list.h> 15 #include <linux/init.h> 16 #include <linux/device.h> 17 #include <linux/dma-direct.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/highmem.h> 20 #include <linux/memblock.h> 21 #include <linux/slab.h> 22 #include <linux/iommu.h> 23 #include <linux/io.h> 24 #include <linux/vmalloc.h> 25 #include <linux/sizes.h> 26 #include <linux/cma.h> 27 28 #include <asm/page.h> 29 #include <asm/highmem.h> 30 #include <asm/cacheflush.h> 31 #include <asm/tlbflush.h> 32 #include <asm/mach/arch.h> 33 #include <asm/dma-iommu.h> 34 #include <asm/mach/map.h> 35 #include <asm/system_info.h> 36 #include <asm/xen/xen-ops.h> 37 38 #include "dma.h" 39 #include "mm.h" 40 41 struct arm_dma_alloc_args { 42 struct device *dev; 43 size_t size; 44 gfp_t gfp; 45 pgprot_t prot; 46 const void *caller; 47 bool want_vaddr; 48 int coherent_flag; 49 }; 50 51 struct arm_dma_free_args { 52 struct device *dev; 53 size_t size; 54 void *cpu_addr; 55 struct page *page; 56 bool want_vaddr; 57 }; 58 59 #define NORMAL 0 60 #define COHERENT 1 61 62 struct arm_dma_allocator { 63 void *(*alloc)(struct arm_dma_alloc_args *args, 64 struct page **ret_page); 65 void (*free)(struct arm_dma_free_args *args); 66 }; 67 68 struct arm_dma_buffer { 69 struct list_head list; 70 void *virt; 71 struct arm_dma_allocator *allocator; 72 }; 73 74 static LIST_HEAD(arm_dma_bufs); 75 static DEFINE_SPINLOCK(arm_dma_bufs_lock); 76 77 static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) 78 { 79 struct arm_dma_buffer *buf, *found = NULL; 80 unsigned long flags; 81 82 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 83 list_for_each_entry(buf, &arm_dma_bufs, list) { 84 if (buf->virt == virt) { 85 list_del(&buf->list); 86 found = buf; 87 break; 88 } 89 } 90 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 91 return found; 92 } 93 94 /* 95 * The DMA API is built upon the notion of "buffer ownership". A buffer 96 * is either exclusively owned by the CPU (and therefore may be accessed 97 * by it) or exclusively owned by the DMA device. These helper functions 98 * represent the transitions between these two ownership states. 99 * 100 * Note, however, that on later ARMs, this notion does not work due to 101 * speculative prefetches. We model our approach on the assumption that 102 * the CPU does do speculative prefetches, which means we clean caches 103 * before transfers and delay cache invalidation until transfer completion. 104 * 105 */ 106 107 static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) 108 { 109 /* 110 * Ensure that the allocated pages are zeroed, and that any data 111 * lurking in the kernel direct-mapped region is invalidated. 112 */ 113 if (PageHighMem(page)) { 114 phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); 115 phys_addr_t end = base + size; 116 while (size > 0) { 117 void *ptr = kmap_atomic(page); 118 memset(ptr, 0, PAGE_SIZE); 119 if (coherent_flag != COHERENT) 120 dmac_flush_range(ptr, ptr + PAGE_SIZE); 121 kunmap_atomic(ptr); 122 page++; 123 size -= PAGE_SIZE; 124 } 125 if (coherent_flag != COHERENT) 126 outer_flush_range(base, end); 127 } else { 128 void *ptr = page_address(page); 129 memset(ptr, 0, size); 130 if (coherent_flag != COHERENT) { 131 dmac_flush_range(ptr, ptr + size); 132 outer_flush_range(__pa(ptr), __pa(ptr) + size); 133 } 134 } 135 } 136 137 /* 138 * Allocate a DMA buffer for 'dev' of size 'size' using the 139 * specified gfp mask. Note that 'size' must be page aligned. 140 */ 141 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, 142 gfp_t gfp, int coherent_flag) 143 { 144 unsigned long order = get_order(size); 145 struct page *page, *p, *e; 146 147 page = alloc_pages(gfp, order); 148 if (!page) 149 return NULL; 150 151 /* 152 * Now split the huge page and free the excess pages 153 */ 154 split_page(page, order); 155 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 156 __free_page(p); 157 158 __dma_clear_buffer(page, size, coherent_flag); 159 160 return page; 161 } 162 163 /* 164 * Free a DMA buffer. 'size' must be page aligned. 165 */ 166 static void __dma_free_buffer(struct page *page, size_t size) 167 { 168 struct page *e = page + (size >> PAGE_SHIFT); 169 170 while (page < e) { 171 __free_page(page); 172 page++; 173 } 174 } 175 176 static void *__alloc_from_contiguous(struct device *dev, size_t size, 177 pgprot_t prot, struct page **ret_page, 178 const void *caller, bool want_vaddr, 179 int coherent_flag, gfp_t gfp); 180 181 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 182 pgprot_t prot, struct page **ret_page, 183 const void *caller, bool want_vaddr); 184 185 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 186 static struct gen_pool *atomic_pool __ro_after_init; 187 188 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; 189 190 static int __init early_coherent_pool(char *p) 191 { 192 atomic_pool_size = memparse(p, &p); 193 return 0; 194 } 195 early_param("coherent_pool", early_coherent_pool); 196 197 /* 198 * Initialise the coherent pool for atomic allocations. 199 */ 200 static int __init atomic_pool_init(void) 201 { 202 pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); 203 gfp_t gfp = GFP_KERNEL | GFP_DMA; 204 struct page *page; 205 void *ptr; 206 207 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 208 if (!atomic_pool) 209 goto out; 210 /* 211 * The atomic pool is only used for non-coherent allocations 212 * so we must pass NORMAL for coherent_flag. 213 */ 214 if (dev_get_cma_area(NULL)) 215 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, 216 &page, atomic_pool_init, true, NORMAL, 217 GFP_KERNEL); 218 else 219 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, 220 &page, atomic_pool_init, true); 221 if (ptr) { 222 int ret; 223 224 ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, 225 page_to_phys(page), 226 atomic_pool_size, -1); 227 if (ret) 228 goto destroy_genpool; 229 230 gen_pool_set_algo(atomic_pool, 231 gen_pool_first_fit_order_align, 232 NULL); 233 pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", 234 atomic_pool_size / 1024); 235 return 0; 236 } 237 238 destroy_genpool: 239 gen_pool_destroy(atomic_pool); 240 atomic_pool = NULL; 241 out: 242 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 243 atomic_pool_size / 1024); 244 return -ENOMEM; 245 } 246 /* 247 * CMA is activated by core_initcall, so we must be called after it. 248 */ 249 postcore_initcall(atomic_pool_init); 250 251 #ifdef CONFIG_CMA_AREAS 252 struct dma_contig_early_reserve { 253 phys_addr_t base; 254 unsigned long size; 255 }; 256 257 static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; 258 259 static int dma_mmu_remap_num __initdata; 260 261 #ifdef CONFIG_DMA_CMA 262 void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) 263 { 264 dma_mmu_remap[dma_mmu_remap_num].base = base; 265 dma_mmu_remap[dma_mmu_remap_num].size = size; 266 dma_mmu_remap_num++; 267 } 268 #endif 269 270 void __init dma_contiguous_remap(void) 271 { 272 int i; 273 for (i = 0; i < dma_mmu_remap_num; i++) { 274 phys_addr_t start = dma_mmu_remap[i].base; 275 phys_addr_t end = start + dma_mmu_remap[i].size; 276 struct map_desc map; 277 unsigned long addr; 278 279 if (end > arm_lowmem_limit) 280 end = arm_lowmem_limit; 281 if (start >= end) 282 continue; 283 284 map.pfn = __phys_to_pfn(start); 285 map.virtual = __phys_to_virt(start); 286 map.length = end - start; 287 map.type = MT_MEMORY_DMA_READY; 288 289 /* 290 * Clear previous low-memory mapping to ensure that the 291 * TLB does not see any conflicting entries, then flush 292 * the TLB of the old entries before creating new mappings. 293 * 294 * This ensures that any speculatively loaded TLB entries 295 * (even though they may be rare) can not cause any problems, 296 * and ensures that this code is architecturally compliant. 297 */ 298 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); 299 addr += PMD_SIZE) 300 pmd_clear(pmd_off_k(addr)); 301 302 flush_tlb_kernel_range(__phys_to_virt(start), 303 __phys_to_virt(end)); 304 305 iotable_init(&map, 1); 306 } 307 } 308 #endif 309 310 static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data) 311 { 312 struct page *page = virt_to_page((void *)addr); 313 pgprot_t prot = *(pgprot_t *)data; 314 315 set_pte_ext(pte, mk_pte(page, prot), 0); 316 return 0; 317 } 318 319 static void __dma_remap(struct page *page, size_t size, pgprot_t prot) 320 { 321 unsigned long start = (unsigned long) page_address(page); 322 unsigned end = start + size; 323 324 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); 325 flush_tlb_kernel_range(start, end); 326 } 327 328 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 329 pgprot_t prot, struct page **ret_page, 330 const void *caller, bool want_vaddr) 331 { 332 struct page *page; 333 void *ptr = NULL; 334 /* 335 * __alloc_remap_buffer is only called when the device is 336 * non-coherent 337 */ 338 page = __dma_alloc_buffer(dev, size, gfp, NORMAL); 339 if (!page) 340 return NULL; 341 if (!want_vaddr) 342 goto out; 343 344 ptr = dma_common_contiguous_remap(page, size, prot, caller); 345 if (!ptr) { 346 __dma_free_buffer(page, size); 347 return NULL; 348 } 349 350 out: 351 *ret_page = page; 352 return ptr; 353 } 354 355 static void *__alloc_from_pool(size_t size, struct page **ret_page) 356 { 357 unsigned long val; 358 void *ptr = NULL; 359 360 if (!atomic_pool) { 361 WARN(1, "coherent pool not initialised!\n"); 362 return NULL; 363 } 364 365 val = gen_pool_alloc(atomic_pool, size); 366 if (val) { 367 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 368 369 *ret_page = phys_to_page(phys); 370 ptr = (void *)val; 371 } 372 373 return ptr; 374 } 375 376 static bool __in_atomic_pool(void *start, size_t size) 377 { 378 return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); 379 } 380 381 static int __free_from_pool(void *start, size_t size) 382 { 383 if (!__in_atomic_pool(start, size)) 384 return 0; 385 386 gen_pool_free(atomic_pool, (unsigned long)start, size); 387 388 return 1; 389 } 390 391 static void *__alloc_from_contiguous(struct device *dev, size_t size, 392 pgprot_t prot, struct page **ret_page, 393 const void *caller, bool want_vaddr, 394 int coherent_flag, gfp_t gfp) 395 { 396 unsigned long order = get_order(size); 397 size_t count = size >> PAGE_SHIFT; 398 struct page *page; 399 void *ptr = NULL; 400 401 page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN); 402 if (!page) 403 return NULL; 404 405 __dma_clear_buffer(page, size, coherent_flag); 406 407 if (!want_vaddr) 408 goto out; 409 410 if (PageHighMem(page)) { 411 ptr = dma_common_contiguous_remap(page, size, prot, caller); 412 if (!ptr) { 413 dma_release_from_contiguous(dev, page, count); 414 return NULL; 415 } 416 } else { 417 __dma_remap(page, size, prot); 418 ptr = page_address(page); 419 } 420 421 out: 422 *ret_page = page; 423 return ptr; 424 } 425 426 static void __free_from_contiguous(struct device *dev, struct page *page, 427 void *cpu_addr, size_t size, bool want_vaddr) 428 { 429 if (want_vaddr) { 430 if (PageHighMem(page)) 431 dma_common_free_remap(cpu_addr, size); 432 else 433 __dma_remap(page, size, PAGE_KERNEL); 434 } 435 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); 436 } 437 438 static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) 439 { 440 prot = (attrs & DMA_ATTR_WRITE_COMBINE) ? 441 pgprot_writecombine(prot) : 442 pgprot_dmacoherent(prot); 443 return prot; 444 } 445 446 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, 447 struct page **ret_page) 448 { 449 struct page *page; 450 /* __alloc_simple_buffer is only called when the device is coherent */ 451 page = __dma_alloc_buffer(dev, size, gfp, COHERENT); 452 if (!page) 453 return NULL; 454 455 *ret_page = page; 456 return page_address(page); 457 } 458 459 static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, 460 struct page **ret_page) 461 { 462 return __alloc_simple_buffer(args->dev, args->size, args->gfp, 463 ret_page); 464 } 465 466 static void simple_allocator_free(struct arm_dma_free_args *args) 467 { 468 __dma_free_buffer(args->page, args->size); 469 } 470 471 static struct arm_dma_allocator simple_allocator = { 472 .alloc = simple_allocator_alloc, 473 .free = simple_allocator_free, 474 }; 475 476 static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, 477 struct page **ret_page) 478 { 479 return __alloc_from_contiguous(args->dev, args->size, args->prot, 480 ret_page, args->caller, 481 args->want_vaddr, args->coherent_flag, 482 args->gfp); 483 } 484 485 static void cma_allocator_free(struct arm_dma_free_args *args) 486 { 487 __free_from_contiguous(args->dev, args->page, args->cpu_addr, 488 args->size, args->want_vaddr); 489 } 490 491 static struct arm_dma_allocator cma_allocator = { 492 .alloc = cma_allocator_alloc, 493 .free = cma_allocator_free, 494 }; 495 496 static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, 497 struct page **ret_page) 498 { 499 return __alloc_from_pool(args->size, ret_page); 500 } 501 502 static void pool_allocator_free(struct arm_dma_free_args *args) 503 { 504 __free_from_pool(args->cpu_addr, args->size); 505 } 506 507 static struct arm_dma_allocator pool_allocator = { 508 .alloc = pool_allocator_alloc, 509 .free = pool_allocator_free, 510 }; 511 512 static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, 513 struct page **ret_page) 514 { 515 return __alloc_remap_buffer(args->dev, args->size, args->gfp, 516 args->prot, ret_page, args->caller, 517 args->want_vaddr); 518 } 519 520 static void remap_allocator_free(struct arm_dma_free_args *args) 521 { 522 if (args->want_vaddr) 523 dma_common_free_remap(args->cpu_addr, args->size); 524 525 __dma_free_buffer(args->page, args->size); 526 } 527 528 static struct arm_dma_allocator remap_allocator = { 529 .alloc = remap_allocator_alloc, 530 .free = remap_allocator_free, 531 }; 532 533 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 534 gfp_t gfp, pgprot_t prot, bool is_coherent, 535 unsigned long attrs, const void *caller) 536 { 537 u64 mask = min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); 538 struct page *page = NULL; 539 void *addr; 540 bool allowblock, cma; 541 struct arm_dma_buffer *buf; 542 struct arm_dma_alloc_args args = { 543 .dev = dev, 544 .size = PAGE_ALIGN(size), 545 .gfp = gfp, 546 .prot = prot, 547 .caller = caller, 548 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 549 .coherent_flag = is_coherent ? COHERENT : NORMAL, 550 }; 551 552 #ifdef CONFIG_DMA_API_DEBUG 553 u64 limit = (mask + 1) & ~mask; 554 if (limit && size >= limit) { 555 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 556 size, mask); 557 return NULL; 558 } 559 #endif 560 561 buf = kzalloc(sizeof(*buf), 562 gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)); 563 if (!buf) 564 return NULL; 565 566 if (mask < 0xffffffffULL) 567 gfp |= GFP_DMA; 568 569 args.gfp = gfp; 570 571 *handle = DMA_MAPPING_ERROR; 572 allowblock = gfpflags_allow_blocking(gfp); 573 cma = allowblock ? dev_get_cma_area(dev) : NULL; 574 575 if (cma) 576 buf->allocator = &cma_allocator; 577 else if (is_coherent) 578 buf->allocator = &simple_allocator; 579 else if (allowblock) 580 buf->allocator = &remap_allocator; 581 else 582 buf->allocator = &pool_allocator; 583 584 addr = buf->allocator->alloc(&args, &page); 585 586 if (page) { 587 unsigned long flags; 588 589 *handle = phys_to_dma(dev, page_to_phys(page)); 590 buf->virt = args.want_vaddr ? addr : page; 591 592 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 593 list_add(&buf->list, &arm_dma_bufs); 594 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 595 } else { 596 kfree(buf); 597 } 598 599 return args.want_vaddr ? addr : page; 600 } 601 602 /* 603 * Free a buffer as defined by the above mapping. 604 */ 605 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 606 dma_addr_t handle, unsigned long attrs, 607 bool is_coherent) 608 { 609 struct page *page = phys_to_page(dma_to_phys(dev, handle)); 610 struct arm_dma_buffer *buf; 611 struct arm_dma_free_args args = { 612 .dev = dev, 613 .size = PAGE_ALIGN(size), 614 .cpu_addr = cpu_addr, 615 .page = page, 616 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 617 }; 618 619 buf = arm_dma_buffer_find(cpu_addr); 620 if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) 621 return; 622 623 buf->allocator->free(&args); 624 kfree(buf); 625 } 626 627 static void dma_cache_maint_page(struct page *page, unsigned long offset, 628 size_t size, enum dma_data_direction dir, 629 void (*op)(const void *, size_t, int)) 630 { 631 unsigned long pfn; 632 size_t left = size; 633 634 pfn = page_to_pfn(page) + offset / PAGE_SIZE; 635 offset %= PAGE_SIZE; 636 637 /* 638 * A single sg entry may refer to multiple physically contiguous 639 * pages. But we still need to process highmem pages individually. 640 * If highmem is not configured then the bulk of this loop gets 641 * optimized out. 642 */ 643 do { 644 size_t len = left; 645 void *vaddr; 646 647 page = pfn_to_page(pfn); 648 649 if (PageHighMem(page)) { 650 if (len + offset > PAGE_SIZE) 651 len = PAGE_SIZE - offset; 652 653 if (cache_is_vipt_nonaliasing()) { 654 vaddr = kmap_atomic(page); 655 op(vaddr + offset, len, dir); 656 kunmap_atomic(vaddr); 657 } else { 658 vaddr = kmap_high_get(page); 659 if (vaddr) { 660 op(vaddr + offset, len, dir); 661 kunmap_high(page); 662 } 663 } 664 } else { 665 vaddr = page_address(page) + offset; 666 op(vaddr, len, dir); 667 } 668 offset = 0; 669 pfn++; 670 left -= len; 671 } while (left); 672 } 673 674 /* 675 * Make an area consistent for devices. 676 * Note: Drivers should NOT use this function directly. 677 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 678 */ 679 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, 680 size_t size, enum dma_data_direction dir) 681 { 682 phys_addr_t paddr; 683 684 dma_cache_maint_page(page, off, size, dir, dmac_map_area); 685 686 paddr = page_to_phys(page) + off; 687 if (dir == DMA_FROM_DEVICE) { 688 outer_inv_range(paddr, paddr + size); 689 } else { 690 outer_clean_range(paddr, paddr + size); 691 } 692 /* FIXME: non-speculating: flush on bidirectional mappings? */ 693 } 694 695 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, 696 size_t size, enum dma_data_direction dir) 697 { 698 phys_addr_t paddr = page_to_phys(page) + off; 699 700 /* FIXME: non-speculating: not required */ 701 /* in any case, don't bother invalidating if DMA to device */ 702 if (dir != DMA_TO_DEVICE) { 703 outer_inv_range(paddr, paddr + size); 704 705 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); 706 } 707 708 /* 709 * Mark the D-cache clean for these pages to avoid extra flushing. 710 */ 711 if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { 712 struct folio *folio = pfn_folio(paddr / PAGE_SIZE); 713 size_t offset = offset_in_folio(folio, paddr); 714 715 for (;;) { 716 size_t sz = folio_size(folio) - offset; 717 718 if (size < sz) 719 break; 720 if (!offset) 721 set_bit(PG_dcache_clean, &folio->flags); 722 offset = 0; 723 size -= sz; 724 if (!size) 725 break; 726 folio = folio_next(folio); 727 } 728 } 729 } 730 731 #ifdef CONFIG_ARM_DMA_USE_IOMMU 732 733 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs) 734 { 735 int prot = 0; 736 737 if (attrs & DMA_ATTR_PRIVILEGED) 738 prot |= IOMMU_PRIV; 739 740 switch (dir) { 741 case DMA_BIDIRECTIONAL: 742 return prot | IOMMU_READ | IOMMU_WRITE; 743 case DMA_TO_DEVICE: 744 return prot | IOMMU_READ; 745 case DMA_FROM_DEVICE: 746 return prot | IOMMU_WRITE; 747 default: 748 return prot; 749 } 750 } 751 752 /* IOMMU */ 753 754 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); 755 756 static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, 757 size_t size) 758 { 759 unsigned int order = get_order(size); 760 unsigned int align = 0; 761 unsigned int count, start; 762 size_t mapping_size = mapping->bits << PAGE_SHIFT; 763 unsigned long flags; 764 dma_addr_t iova; 765 int i; 766 767 if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) 768 order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; 769 770 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 771 align = (1 << order) - 1; 772 773 spin_lock_irqsave(&mapping->lock, flags); 774 for (i = 0; i < mapping->nr_bitmaps; i++) { 775 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 776 mapping->bits, 0, count, align); 777 778 if (start > mapping->bits) 779 continue; 780 781 bitmap_set(mapping->bitmaps[i], start, count); 782 break; 783 } 784 785 /* 786 * No unused range found. Try to extend the existing mapping 787 * and perform a second attempt to reserve an IO virtual 788 * address range of size bytes. 789 */ 790 if (i == mapping->nr_bitmaps) { 791 if (extend_iommu_mapping(mapping)) { 792 spin_unlock_irqrestore(&mapping->lock, flags); 793 return DMA_MAPPING_ERROR; 794 } 795 796 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 797 mapping->bits, 0, count, align); 798 799 if (start > mapping->bits) { 800 spin_unlock_irqrestore(&mapping->lock, flags); 801 return DMA_MAPPING_ERROR; 802 } 803 804 bitmap_set(mapping->bitmaps[i], start, count); 805 } 806 spin_unlock_irqrestore(&mapping->lock, flags); 807 808 iova = mapping->base + (mapping_size * i); 809 iova += start << PAGE_SHIFT; 810 811 return iova; 812 } 813 814 static inline void __free_iova(struct dma_iommu_mapping *mapping, 815 dma_addr_t addr, size_t size) 816 { 817 unsigned int start, count; 818 size_t mapping_size = mapping->bits << PAGE_SHIFT; 819 unsigned long flags; 820 dma_addr_t bitmap_base; 821 u32 bitmap_index; 822 823 if (!size) 824 return; 825 826 bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; 827 BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); 828 829 bitmap_base = mapping->base + mapping_size * bitmap_index; 830 831 start = (addr - bitmap_base) >> PAGE_SHIFT; 832 833 if (addr + size > bitmap_base + mapping_size) { 834 /* 835 * The address range to be freed reaches into the iova 836 * range of the next bitmap. This should not happen as 837 * we don't allow this in __alloc_iova (at the 838 * moment). 839 */ 840 BUG(); 841 } else 842 count = size >> PAGE_SHIFT; 843 844 spin_lock_irqsave(&mapping->lock, flags); 845 bitmap_clear(mapping->bitmaps[bitmap_index], start, count); 846 spin_unlock_irqrestore(&mapping->lock, flags); 847 } 848 849 /* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ 850 static const int iommu_order_array[] = { 9, 8, 4, 0 }; 851 852 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, 853 gfp_t gfp, unsigned long attrs, 854 int coherent_flag) 855 { 856 struct page **pages; 857 int count = size >> PAGE_SHIFT; 858 int array_size = count * sizeof(struct page *); 859 int i = 0; 860 int order_idx = 0; 861 862 if (array_size <= PAGE_SIZE) 863 pages = kzalloc(array_size, GFP_KERNEL); 864 else 865 pages = vzalloc(array_size); 866 if (!pages) 867 return NULL; 868 869 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) 870 { 871 unsigned long order = get_order(size); 872 struct page *page; 873 874 page = dma_alloc_from_contiguous(dev, count, order, 875 gfp & __GFP_NOWARN); 876 if (!page) 877 goto error; 878 879 __dma_clear_buffer(page, size, coherent_flag); 880 881 for (i = 0; i < count; i++) 882 pages[i] = page + i; 883 884 return pages; 885 } 886 887 /* Go straight to 4K chunks if caller says it's OK. */ 888 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 889 order_idx = ARRAY_SIZE(iommu_order_array) - 1; 890 891 /* 892 * IOMMU can map any pages, so himem can also be used here 893 */ 894 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 895 896 while (count) { 897 int j, order; 898 899 order = iommu_order_array[order_idx]; 900 901 /* Drop down when we get small */ 902 if (__fls(count) < order) { 903 order_idx++; 904 continue; 905 } 906 907 if (order) { 908 /* See if it's easy to allocate a high-order chunk */ 909 pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); 910 911 /* Go down a notch at first sign of pressure */ 912 if (!pages[i]) { 913 order_idx++; 914 continue; 915 } 916 } else { 917 pages[i] = alloc_pages(gfp, 0); 918 if (!pages[i]) 919 goto error; 920 } 921 922 if (order) { 923 split_page(pages[i], order); 924 j = 1 << order; 925 while (--j) 926 pages[i + j] = pages[i] + j; 927 } 928 929 __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag); 930 i += 1 << order; 931 count -= 1 << order; 932 } 933 934 return pages; 935 error: 936 while (i--) 937 if (pages[i]) 938 __free_pages(pages[i], 0); 939 kvfree(pages); 940 return NULL; 941 } 942 943 static int __iommu_free_buffer(struct device *dev, struct page **pages, 944 size_t size, unsigned long attrs) 945 { 946 int count = size >> PAGE_SHIFT; 947 int i; 948 949 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 950 dma_release_from_contiguous(dev, pages[0], count); 951 } else { 952 for (i = 0; i < count; i++) 953 if (pages[i]) 954 __free_pages(pages[i], 0); 955 } 956 957 kvfree(pages); 958 return 0; 959 } 960 961 /* 962 * Create a mapping in device IO address space for specified pages 963 */ 964 static dma_addr_t 965 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, 966 unsigned long attrs) 967 { 968 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 969 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 970 dma_addr_t dma_addr, iova; 971 int i; 972 973 dma_addr = __alloc_iova(mapping, size); 974 if (dma_addr == DMA_MAPPING_ERROR) 975 return dma_addr; 976 977 iova = dma_addr; 978 for (i = 0; i < count; ) { 979 int ret; 980 981 unsigned int next_pfn = page_to_pfn(pages[i]) + 1; 982 phys_addr_t phys = page_to_phys(pages[i]); 983 unsigned int len, j; 984 985 for (j = i + 1; j < count; j++, next_pfn++) 986 if (page_to_pfn(pages[j]) != next_pfn) 987 break; 988 989 len = (j - i) << PAGE_SHIFT; 990 ret = iommu_map(mapping->domain, iova, phys, len, 991 __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs), 992 GFP_KERNEL); 993 if (ret < 0) 994 goto fail; 995 iova += len; 996 i = j; 997 } 998 return dma_addr; 999 fail: 1000 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); 1001 __free_iova(mapping, dma_addr, size); 1002 return DMA_MAPPING_ERROR; 1003 } 1004 1005 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) 1006 { 1007 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1008 1009 /* 1010 * add optional in-page offset from iova to size and align 1011 * result to page size 1012 */ 1013 size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); 1014 iova &= PAGE_MASK; 1015 1016 iommu_unmap(mapping->domain, iova, size); 1017 __free_iova(mapping, iova, size); 1018 return 0; 1019 } 1020 1021 static struct page **__atomic_get_pages(void *addr) 1022 { 1023 struct page *page; 1024 phys_addr_t phys; 1025 1026 phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); 1027 page = phys_to_page(phys); 1028 1029 return (struct page **)page; 1030 } 1031 1032 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) 1033 { 1034 if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) 1035 return __atomic_get_pages(cpu_addr); 1036 1037 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1038 return cpu_addr; 1039 1040 return dma_common_find_pages(cpu_addr); 1041 } 1042 1043 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, 1044 dma_addr_t *handle, int coherent_flag, 1045 unsigned long attrs) 1046 { 1047 struct page *page; 1048 void *addr; 1049 1050 if (coherent_flag == COHERENT) 1051 addr = __alloc_simple_buffer(dev, size, gfp, &page); 1052 else 1053 addr = __alloc_from_pool(size, &page); 1054 if (!addr) 1055 return NULL; 1056 1057 *handle = __iommu_create_mapping(dev, &page, size, attrs); 1058 if (*handle == DMA_MAPPING_ERROR) 1059 goto err_mapping; 1060 1061 return addr; 1062 1063 err_mapping: 1064 __free_from_pool(addr, size); 1065 return NULL; 1066 } 1067 1068 static void __iommu_free_atomic(struct device *dev, void *cpu_addr, 1069 dma_addr_t handle, size_t size, int coherent_flag) 1070 { 1071 __iommu_remove_mapping(dev, handle, size); 1072 if (coherent_flag == COHERENT) 1073 __dma_free_buffer(virt_to_page(cpu_addr), size); 1074 else 1075 __free_from_pool(cpu_addr, size); 1076 } 1077 1078 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1079 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1080 { 1081 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 1082 struct page **pages; 1083 void *addr = NULL; 1084 int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; 1085 1086 *handle = DMA_MAPPING_ERROR; 1087 size = PAGE_ALIGN(size); 1088 1089 if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) 1090 return __iommu_alloc_simple(dev, size, gfp, handle, 1091 coherent_flag, attrs); 1092 1093 pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); 1094 if (!pages) 1095 return NULL; 1096 1097 *handle = __iommu_create_mapping(dev, pages, size, attrs); 1098 if (*handle == DMA_MAPPING_ERROR) 1099 goto err_buffer; 1100 1101 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1102 return pages; 1103 1104 addr = dma_common_pages_remap(pages, size, prot, 1105 __builtin_return_address(0)); 1106 if (!addr) 1107 goto err_mapping; 1108 1109 return addr; 1110 1111 err_mapping: 1112 __iommu_remove_mapping(dev, *handle, size); 1113 err_buffer: 1114 __iommu_free_buffer(dev, pages, size, attrs); 1115 return NULL; 1116 } 1117 1118 static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1119 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1120 unsigned long attrs) 1121 { 1122 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1123 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1124 int err; 1125 1126 if (!pages) 1127 return -ENXIO; 1128 1129 if (vma->vm_pgoff >= nr_pages) 1130 return -ENXIO; 1131 1132 if (!dev->dma_coherent) 1133 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1134 1135 err = vm_map_pages(vma, pages, nr_pages); 1136 if (err) 1137 pr_err("Remapping memory failed: %d\n", err); 1138 1139 return err; 1140 } 1141 1142 /* 1143 * free a page as defined by the above mapping. 1144 * Must not be called with IRQs disabled. 1145 */ 1146 static void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1147 dma_addr_t handle, unsigned long attrs) 1148 { 1149 int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; 1150 struct page **pages; 1151 size = PAGE_ALIGN(size); 1152 1153 if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { 1154 __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag); 1155 return; 1156 } 1157 1158 pages = __iommu_get_pages(cpu_addr, attrs); 1159 if (!pages) { 1160 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 1161 return; 1162 } 1163 1164 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) 1165 dma_common_free_remap(cpu_addr, size); 1166 1167 __iommu_remove_mapping(dev, handle, size); 1168 __iommu_free_buffer(dev, pages, size, attrs); 1169 } 1170 1171 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 1172 void *cpu_addr, dma_addr_t dma_addr, 1173 size_t size, unsigned long attrs) 1174 { 1175 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1176 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1177 1178 if (!pages) 1179 return -ENXIO; 1180 1181 return sg_alloc_table_from_pages(sgt, pages, count, 0, size, 1182 GFP_KERNEL); 1183 } 1184 1185 /* 1186 * Map a part of the scatter-gather list into contiguous io address space 1187 */ 1188 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1189 size_t size, dma_addr_t *handle, 1190 enum dma_data_direction dir, unsigned long attrs) 1191 { 1192 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1193 dma_addr_t iova, iova_base; 1194 int ret = 0; 1195 unsigned int count; 1196 struct scatterlist *s; 1197 int prot; 1198 1199 size = PAGE_ALIGN(size); 1200 *handle = DMA_MAPPING_ERROR; 1201 1202 iova_base = iova = __alloc_iova(mapping, size); 1203 if (iova == DMA_MAPPING_ERROR) 1204 return -ENOMEM; 1205 1206 for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { 1207 phys_addr_t phys = page_to_phys(sg_page(s)); 1208 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1209 1210 if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1211 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1212 1213 prot = __dma_info_to_prot(dir, attrs); 1214 1215 ret = iommu_map(mapping->domain, iova, phys, len, prot, 1216 GFP_KERNEL); 1217 if (ret < 0) 1218 goto fail; 1219 count += len >> PAGE_SHIFT; 1220 iova += len; 1221 } 1222 *handle = iova_base; 1223 1224 return 0; 1225 fail: 1226 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); 1227 __free_iova(mapping, iova_base, size); 1228 return ret; 1229 } 1230 1231 /** 1232 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1233 * @dev: valid struct device pointer 1234 * @sg: list of buffers 1235 * @nents: number of buffers to map 1236 * @dir: DMA transfer direction 1237 * 1238 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1239 * The scatter gather list elements are merged together (if possible) and 1240 * tagged with the appropriate dma address and length. They are obtained via 1241 * sg_dma_{address,length}. 1242 */ 1243 static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1244 int nents, enum dma_data_direction dir, unsigned long attrs) 1245 { 1246 struct scatterlist *s = sg, *dma = sg, *start = sg; 1247 int i, count = 0, ret; 1248 unsigned int offset = s->offset; 1249 unsigned int size = s->offset + s->length; 1250 unsigned int max = dma_get_max_seg_size(dev); 1251 1252 for (i = 1; i < nents; i++) { 1253 s = sg_next(s); 1254 1255 s->dma_length = 0; 1256 1257 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1258 ret = __map_sg_chunk(dev, start, size, 1259 &dma->dma_address, dir, attrs); 1260 if (ret < 0) 1261 goto bad_mapping; 1262 1263 dma->dma_address += offset; 1264 dma->dma_length = size - offset; 1265 1266 size = offset = s->offset; 1267 start = s; 1268 dma = sg_next(dma); 1269 count += 1; 1270 } 1271 size += s->length; 1272 } 1273 ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs); 1274 if (ret < 0) 1275 goto bad_mapping; 1276 1277 dma->dma_address += offset; 1278 dma->dma_length = size - offset; 1279 1280 return count+1; 1281 1282 bad_mapping: 1283 for_each_sg(sg, s, count, i) 1284 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); 1285 if (ret == -ENOMEM) 1286 return ret; 1287 return -EINVAL; 1288 } 1289 1290 /** 1291 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1292 * @dev: valid struct device pointer 1293 * @sg: list of buffers 1294 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1295 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1296 * 1297 * Unmap a set of streaming mode DMA translations. Again, CPU access 1298 * rules concerning calls here are the same as for dma_unmap_single(). 1299 */ 1300 static void arm_iommu_unmap_sg(struct device *dev, 1301 struct scatterlist *sg, int nents, 1302 enum dma_data_direction dir, 1303 unsigned long attrs) 1304 { 1305 struct scatterlist *s; 1306 int i; 1307 1308 for_each_sg(sg, s, nents, i) { 1309 if (sg_dma_len(s)) 1310 __iommu_remove_mapping(dev, sg_dma_address(s), 1311 sg_dma_len(s)); 1312 if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1313 __dma_page_dev_to_cpu(sg_page(s), s->offset, 1314 s->length, dir); 1315 } 1316 } 1317 1318 /** 1319 * arm_iommu_sync_sg_for_cpu 1320 * @dev: valid struct device pointer 1321 * @sg: list of buffers 1322 * @nents: number of buffers to map (returned from dma_map_sg) 1323 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1324 */ 1325 static void arm_iommu_sync_sg_for_cpu(struct device *dev, 1326 struct scatterlist *sg, 1327 int nents, enum dma_data_direction dir) 1328 { 1329 struct scatterlist *s; 1330 int i; 1331 1332 if (dev->dma_coherent) 1333 return; 1334 1335 for_each_sg(sg, s, nents, i) 1336 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); 1337 1338 } 1339 1340 /** 1341 * arm_iommu_sync_sg_for_device 1342 * @dev: valid struct device pointer 1343 * @sg: list of buffers 1344 * @nents: number of buffers to map (returned from dma_map_sg) 1345 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1346 */ 1347 static void arm_iommu_sync_sg_for_device(struct device *dev, 1348 struct scatterlist *sg, 1349 int nents, enum dma_data_direction dir) 1350 { 1351 struct scatterlist *s; 1352 int i; 1353 1354 if (dev->dma_coherent) 1355 return; 1356 1357 for_each_sg(sg, s, nents, i) 1358 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1359 } 1360 1361 /** 1362 * arm_iommu_map_page 1363 * @dev: valid struct device pointer 1364 * @page: page that buffer resides in 1365 * @offset: offset into page for start of buffer 1366 * @size: size of buffer to map 1367 * @dir: DMA transfer direction 1368 * 1369 * IOMMU aware version of arm_dma_map_page() 1370 */ 1371 static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, 1372 unsigned long offset, size_t size, enum dma_data_direction dir, 1373 unsigned long attrs) 1374 { 1375 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1376 dma_addr_t dma_addr; 1377 int ret, prot, len = PAGE_ALIGN(size + offset); 1378 1379 if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1380 __dma_page_cpu_to_dev(page, offset, size, dir); 1381 1382 dma_addr = __alloc_iova(mapping, len); 1383 if (dma_addr == DMA_MAPPING_ERROR) 1384 return dma_addr; 1385 1386 prot = __dma_info_to_prot(dir, attrs); 1387 1388 ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 1389 prot, GFP_KERNEL); 1390 if (ret < 0) 1391 goto fail; 1392 1393 return dma_addr + offset; 1394 fail: 1395 __free_iova(mapping, dma_addr, len); 1396 return DMA_MAPPING_ERROR; 1397 } 1398 1399 /** 1400 * arm_iommu_unmap_page 1401 * @dev: valid struct device pointer 1402 * @handle: DMA address of buffer 1403 * @size: size of buffer (same as passed to dma_map_page) 1404 * @dir: DMA transfer direction (same as passed to dma_map_page) 1405 * 1406 * IOMMU aware version of arm_dma_unmap_page() 1407 */ 1408 static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1409 size_t size, enum dma_data_direction dir, unsigned long attrs) 1410 { 1411 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1412 dma_addr_t iova = handle & PAGE_MASK; 1413 struct page *page; 1414 int offset = handle & ~PAGE_MASK; 1415 int len = PAGE_ALIGN(size + offset); 1416 1417 if (!iova) 1418 return; 1419 1420 if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 1421 page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1422 __dma_page_dev_to_cpu(page, offset, size, dir); 1423 } 1424 1425 iommu_unmap(mapping->domain, iova, len); 1426 __free_iova(mapping, iova, len); 1427 } 1428 1429 /** 1430 * arm_iommu_map_resource - map a device resource for DMA 1431 * @dev: valid struct device pointer 1432 * @phys_addr: physical address of resource 1433 * @size: size of resource to map 1434 * @dir: DMA transfer direction 1435 */ 1436 static dma_addr_t arm_iommu_map_resource(struct device *dev, 1437 phys_addr_t phys_addr, size_t size, 1438 enum dma_data_direction dir, unsigned long attrs) 1439 { 1440 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1441 dma_addr_t dma_addr; 1442 int ret, prot; 1443 phys_addr_t addr = phys_addr & PAGE_MASK; 1444 unsigned int offset = phys_addr & ~PAGE_MASK; 1445 size_t len = PAGE_ALIGN(size + offset); 1446 1447 dma_addr = __alloc_iova(mapping, len); 1448 if (dma_addr == DMA_MAPPING_ERROR) 1449 return dma_addr; 1450 1451 prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; 1452 1453 ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL); 1454 if (ret < 0) 1455 goto fail; 1456 1457 return dma_addr + offset; 1458 fail: 1459 __free_iova(mapping, dma_addr, len); 1460 return DMA_MAPPING_ERROR; 1461 } 1462 1463 /** 1464 * arm_iommu_unmap_resource - unmap a device DMA resource 1465 * @dev: valid struct device pointer 1466 * @dma_handle: DMA address to resource 1467 * @size: size of resource to map 1468 * @dir: DMA transfer direction 1469 */ 1470 static void arm_iommu_unmap_resource(struct device *dev, dma_addr_t dma_handle, 1471 size_t size, enum dma_data_direction dir, 1472 unsigned long attrs) 1473 { 1474 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1475 dma_addr_t iova = dma_handle & PAGE_MASK; 1476 unsigned int offset = dma_handle & ~PAGE_MASK; 1477 size_t len = PAGE_ALIGN(size + offset); 1478 1479 if (!iova) 1480 return; 1481 1482 iommu_unmap(mapping->domain, iova, len); 1483 __free_iova(mapping, iova, len); 1484 } 1485 1486 static void arm_iommu_sync_single_for_cpu(struct device *dev, 1487 dma_addr_t handle, size_t size, enum dma_data_direction dir) 1488 { 1489 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1490 dma_addr_t iova = handle & PAGE_MASK; 1491 struct page *page; 1492 unsigned int offset = handle & ~PAGE_MASK; 1493 1494 if (dev->dma_coherent || !iova) 1495 return; 1496 1497 page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1498 __dma_page_dev_to_cpu(page, offset, size, dir); 1499 } 1500 1501 static void arm_iommu_sync_single_for_device(struct device *dev, 1502 dma_addr_t handle, size_t size, enum dma_data_direction dir) 1503 { 1504 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1505 dma_addr_t iova = handle & PAGE_MASK; 1506 struct page *page; 1507 unsigned int offset = handle & ~PAGE_MASK; 1508 1509 if (dev->dma_coherent || !iova) 1510 return; 1511 1512 page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1513 __dma_page_cpu_to_dev(page, offset, size, dir); 1514 } 1515 1516 static const struct dma_map_ops iommu_ops = { 1517 .alloc = arm_iommu_alloc_attrs, 1518 .free = arm_iommu_free_attrs, 1519 .mmap = arm_iommu_mmap_attrs, 1520 .get_sgtable = arm_iommu_get_sgtable, 1521 1522 .map_page = arm_iommu_map_page, 1523 .unmap_page = arm_iommu_unmap_page, 1524 .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, 1525 .sync_single_for_device = arm_iommu_sync_single_for_device, 1526 1527 .map_sg = arm_iommu_map_sg, 1528 .unmap_sg = arm_iommu_unmap_sg, 1529 .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, 1530 .sync_sg_for_device = arm_iommu_sync_sg_for_device, 1531 1532 .map_resource = arm_iommu_map_resource, 1533 .unmap_resource = arm_iommu_unmap_resource, 1534 }; 1535 1536 /** 1537 * arm_iommu_create_mapping 1538 * @bus: pointer to the bus holding the client device (for IOMMU calls) 1539 * @base: start address of the valid IO address space 1540 * @size: maximum size of the valid IO address space 1541 * 1542 * Creates a mapping structure which holds information about used/unused 1543 * IO address ranges, which is required to perform memory allocation and 1544 * mapping with IOMMU aware functions. 1545 * 1546 * The client device need to be attached to the mapping with 1547 * arm_iommu_attach_device function. 1548 */ 1549 struct dma_iommu_mapping * 1550 arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size) 1551 { 1552 unsigned int bits = size >> PAGE_SHIFT; 1553 unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); 1554 struct dma_iommu_mapping *mapping; 1555 int extensions = 1; 1556 int err = -ENOMEM; 1557 1558 /* currently only 32-bit DMA address space is supported */ 1559 if (size > DMA_BIT_MASK(32) + 1) 1560 return ERR_PTR(-ERANGE); 1561 1562 if (!bitmap_size) 1563 return ERR_PTR(-EINVAL); 1564 1565 if (bitmap_size > PAGE_SIZE) { 1566 extensions = bitmap_size / PAGE_SIZE; 1567 bitmap_size = PAGE_SIZE; 1568 } 1569 1570 mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); 1571 if (!mapping) 1572 goto err; 1573 1574 mapping->bitmap_size = bitmap_size; 1575 mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *), 1576 GFP_KERNEL); 1577 if (!mapping->bitmaps) 1578 goto err2; 1579 1580 mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); 1581 if (!mapping->bitmaps[0]) 1582 goto err3; 1583 1584 mapping->nr_bitmaps = 1; 1585 mapping->extensions = extensions; 1586 mapping->base = base; 1587 mapping->bits = BITS_PER_BYTE * bitmap_size; 1588 1589 spin_lock_init(&mapping->lock); 1590 1591 mapping->domain = iommu_domain_alloc(bus); 1592 if (!mapping->domain) 1593 goto err4; 1594 1595 kref_init(&mapping->kref); 1596 return mapping; 1597 err4: 1598 kfree(mapping->bitmaps[0]); 1599 err3: 1600 kfree(mapping->bitmaps); 1601 err2: 1602 kfree(mapping); 1603 err: 1604 return ERR_PTR(err); 1605 } 1606 EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); 1607 1608 static void release_iommu_mapping(struct kref *kref) 1609 { 1610 int i; 1611 struct dma_iommu_mapping *mapping = 1612 container_of(kref, struct dma_iommu_mapping, kref); 1613 1614 iommu_domain_free(mapping->domain); 1615 for (i = 0; i < mapping->nr_bitmaps; i++) 1616 kfree(mapping->bitmaps[i]); 1617 kfree(mapping->bitmaps); 1618 kfree(mapping); 1619 } 1620 1621 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) 1622 { 1623 int next_bitmap; 1624 1625 if (mapping->nr_bitmaps >= mapping->extensions) 1626 return -EINVAL; 1627 1628 next_bitmap = mapping->nr_bitmaps; 1629 mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, 1630 GFP_ATOMIC); 1631 if (!mapping->bitmaps[next_bitmap]) 1632 return -ENOMEM; 1633 1634 mapping->nr_bitmaps++; 1635 1636 return 0; 1637 } 1638 1639 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) 1640 { 1641 if (mapping) 1642 kref_put(&mapping->kref, release_iommu_mapping); 1643 } 1644 EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); 1645 1646 static int __arm_iommu_attach_device(struct device *dev, 1647 struct dma_iommu_mapping *mapping) 1648 { 1649 int err; 1650 1651 err = iommu_attach_device(mapping->domain, dev); 1652 if (err) 1653 return err; 1654 1655 kref_get(&mapping->kref); 1656 to_dma_iommu_mapping(dev) = mapping; 1657 1658 pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); 1659 return 0; 1660 } 1661 1662 /** 1663 * arm_iommu_attach_device 1664 * @dev: valid struct device pointer 1665 * @mapping: io address space mapping structure (returned from 1666 * arm_iommu_create_mapping) 1667 * 1668 * Attaches specified io address space mapping to the provided device. 1669 * This replaces the dma operations (dma_map_ops pointer) with the 1670 * IOMMU aware version. 1671 * 1672 * More than one client might be attached to the same io address space 1673 * mapping. 1674 */ 1675 int arm_iommu_attach_device(struct device *dev, 1676 struct dma_iommu_mapping *mapping) 1677 { 1678 int err; 1679 1680 err = __arm_iommu_attach_device(dev, mapping); 1681 if (err) 1682 return err; 1683 1684 set_dma_ops(dev, &iommu_ops); 1685 return 0; 1686 } 1687 EXPORT_SYMBOL_GPL(arm_iommu_attach_device); 1688 1689 /** 1690 * arm_iommu_detach_device 1691 * @dev: valid struct device pointer 1692 * 1693 * Detaches the provided device from a previously attached map. 1694 * This overwrites the dma_ops pointer with appropriate non-IOMMU ops. 1695 */ 1696 void arm_iommu_detach_device(struct device *dev) 1697 { 1698 struct dma_iommu_mapping *mapping; 1699 1700 mapping = to_dma_iommu_mapping(dev); 1701 if (!mapping) { 1702 dev_warn(dev, "Not attached\n"); 1703 return; 1704 } 1705 1706 iommu_detach_device(mapping->domain, dev); 1707 kref_put(&mapping->kref, release_iommu_mapping); 1708 to_dma_iommu_mapping(dev) = NULL; 1709 set_dma_ops(dev, NULL); 1710 1711 pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); 1712 } 1713 EXPORT_SYMBOL_GPL(arm_iommu_detach_device); 1714 1715 static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 1716 const struct iommu_ops *iommu, bool coherent) 1717 { 1718 struct dma_iommu_mapping *mapping; 1719 1720 mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); 1721 if (IS_ERR(mapping)) { 1722 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", 1723 size, dev_name(dev)); 1724 return; 1725 } 1726 1727 if (__arm_iommu_attach_device(dev, mapping)) { 1728 pr_warn("Failed to attached device %s to IOMMU_mapping\n", 1729 dev_name(dev)); 1730 arm_iommu_release_mapping(mapping); 1731 return; 1732 } 1733 1734 set_dma_ops(dev, &iommu_ops); 1735 } 1736 1737 static void arm_teardown_iommu_dma_ops(struct device *dev) 1738 { 1739 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1740 1741 if (!mapping) 1742 return; 1743 1744 arm_iommu_detach_device(dev); 1745 arm_iommu_release_mapping(mapping); 1746 } 1747 1748 #else 1749 1750 static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 1751 const struct iommu_ops *iommu, bool coherent) 1752 { 1753 } 1754 1755 static void arm_teardown_iommu_dma_ops(struct device *dev) { } 1756 1757 #endif /* CONFIG_ARM_DMA_USE_IOMMU */ 1758 1759 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 1760 const struct iommu_ops *iommu, bool coherent) 1761 { 1762 /* 1763 * Due to legacy code that sets the ->dma_coherent flag from a bus 1764 * notifier we can't just assign coherent to the ->dma_coherent flag 1765 * here, but instead have to make sure we only set but never clear it 1766 * for now. 1767 */ 1768 if (coherent) 1769 dev->dma_coherent = true; 1770 1771 /* 1772 * Don't override the dma_ops if they have already been set. Ideally 1773 * this should be the only location where dma_ops are set, remove this 1774 * check when all other callers of set_dma_ops will have disappeared. 1775 */ 1776 if (dev->dma_ops) 1777 return; 1778 1779 if (iommu) 1780 arm_setup_iommu_dma_ops(dev, dma_base, size, iommu, coherent); 1781 1782 xen_setup_dma_ops(dev); 1783 dev->archdata.dma_ops_setup = true; 1784 } 1785 1786 void arch_teardown_dma_ops(struct device *dev) 1787 { 1788 if (!dev->archdata.dma_ops_setup) 1789 return; 1790 1791 arm_teardown_iommu_dma_ops(dev); 1792 /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ 1793 set_dma_ops(dev, NULL); 1794 } 1795 1796 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, 1797 enum dma_data_direction dir) 1798 { 1799 __dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), 1800 size, dir); 1801 } 1802 1803 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, 1804 enum dma_data_direction dir) 1805 { 1806 __dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), 1807 size, dir); 1808 } 1809 1810 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 1811 gfp_t gfp, unsigned long attrs) 1812 { 1813 return __dma_alloc(dev, size, dma_handle, gfp, 1814 __get_dma_pgprot(attrs, PAGE_KERNEL), false, 1815 attrs, __builtin_return_address(0)); 1816 } 1817 1818 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, 1819 dma_addr_t dma_handle, unsigned long attrs) 1820 { 1821 __arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false); 1822 } 1823