1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/bootmem.h> 13 #include <linux/module.h> 14 #include <linux/mm.h> 15 #include <linux/genalloc.h> 16 #include <linux/gfp.h> 17 #include <linux/errno.h> 18 #include <linux/list.h> 19 #include <linux/init.h> 20 #include <linux/device.h> 21 #include <linux/dma-mapping.h> 22 #include <linux/dma-contiguous.h> 23 #include <linux/highmem.h> 24 #include <linux/memblock.h> 25 #include <linux/slab.h> 26 #include <linux/iommu.h> 27 #include <linux/io.h> 28 #include <linux/vmalloc.h> 29 #include <linux/sizes.h> 30 #include <linux/cma.h> 31 32 #include <asm/memory.h> 33 #include <asm/highmem.h> 34 #include <asm/cacheflush.h> 35 #include <asm/tlbflush.h> 36 #include <asm/mach/arch.h> 37 #include <asm/dma-iommu.h> 38 #include <asm/mach/map.h> 39 #include <asm/system_info.h> 40 #include <asm/dma-contiguous.h> 41 42 #include "dma.h" 43 #include "mm.h" 44 45 struct arm_dma_alloc_args { 46 struct device *dev; 47 size_t size; 48 gfp_t gfp; 49 pgprot_t prot; 50 const void *caller; 51 bool want_vaddr; 52 int coherent_flag; 53 }; 54 55 struct arm_dma_free_args { 56 struct device *dev; 57 size_t size; 58 void *cpu_addr; 59 struct page *page; 60 bool want_vaddr; 61 }; 62 63 #define NORMAL 0 64 #define COHERENT 1 65 66 struct arm_dma_allocator { 67 void *(*alloc)(struct arm_dma_alloc_args *args, 68 struct page **ret_page); 69 void (*free)(struct arm_dma_free_args *args); 70 }; 71 72 struct arm_dma_buffer { 73 struct list_head list; 74 void *virt; 75 struct arm_dma_allocator *allocator; 76 }; 77 78 static LIST_HEAD(arm_dma_bufs); 79 static DEFINE_SPINLOCK(arm_dma_bufs_lock); 80 81 static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) 82 { 83 struct arm_dma_buffer *buf, *found = NULL; 84 unsigned long flags; 85 86 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 87 list_for_each_entry(buf, &arm_dma_bufs, list) { 88 if (buf->virt == virt) { 89 list_del(&buf->list); 90 found = buf; 91 break; 92 } 93 } 94 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 95 return found; 96 } 97 98 /* 99 * The DMA API is built upon the notion of "buffer ownership". A buffer 100 * is either exclusively owned by the CPU (and therefore may be accessed 101 * by it) or exclusively owned by the DMA device. These helper functions 102 * represent the transitions between these two ownership states. 103 * 104 * Note, however, that on later ARMs, this notion does not work due to 105 * speculative prefetches. We model our approach on the assumption that 106 * the CPU does do speculative prefetches, which means we clean caches 107 * before transfers and delay cache invalidation until transfer completion. 108 * 109 */ 110 static void __dma_page_cpu_to_dev(struct page *, unsigned long, 111 size_t, enum dma_data_direction); 112 static void __dma_page_dev_to_cpu(struct page *, unsigned long, 113 size_t, enum dma_data_direction); 114 115 /** 116 * arm_dma_map_page - map a portion of a page for streaming DMA 117 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 118 * @page: page that buffer resides in 119 * @offset: offset into page for start of buffer 120 * @size: size of buffer to map 121 * @dir: DMA transfer direction 122 * 123 * Ensure that any data held in the cache is appropriately discarded 124 * or written back. 125 * 126 * The device owns this memory once this call has completed. The CPU 127 * can regain ownership by calling dma_unmap_page(). 128 */ 129 static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, 130 unsigned long offset, size_t size, enum dma_data_direction dir, 131 unsigned long attrs) 132 { 133 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 134 __dma_page_cpu_to_dev(page, offset, size, dir); 135 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 136 } 137 138 static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, 139 unsigned long offset, size_t size, enum dma_data_direction dir, 140 unsigned long attrs) 141 { 142 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 143 } 144 145 /** 146 * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() 147 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 148 * @handle: DMA address of buffer 149 * @size: size of buffer (same as passed to dma_map_page) 150 * @dir: DMA transfer direction (same as passed to dma_map_page) 151 * 152 * Unmap a page streaming mode DMA translation. The handle and size 153 * must match what was provided in the previous dma_map_page() call. 154 * All other usages are undefined. 155 * 156 * After this call, reads by the CPU to the buffer are guaranteed to see 157 * whatever the device wrote there. 158 */ 159 static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, 160 size_t size, enum dma_data_direction dir, unsigned long attrs) 161 { 162 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 163 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), 164 handle & ~PAGE_MASK, size, dir); 165 } 166 167 static void arm_dma_sync_single_for_cpu(struct device *dev, 168 dma_addr_t handle, size_t size, enum dma_data_direction dir) 169 { 170 unsigned int offset = handle & (PAGE_SIZE - 1); 171 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 172 __dma_page_dev_to_cpu(page, offset, size, dir); 173 } 174 175 static void arm_dma_sync_single_for_device(struct device *dev, 176 dma_addr_t handle, size_t size, enum dma_data_direction dir) 177 { 178 unsigned int offset = handle & (PAGE_SIZE - 1); 179 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 180 __dma_page_cpu_to_dev(page, offset, size, dir); 181 } 182 183 struct dma_map_ops arm_dma_ops = { 184 .alloc = arm_dma_alloc, 185 .free = arm_dma_free, 186 .mmap = arm_dma_mmap, 187 .get_sgtable = arm_dma_get_sgtable, 188 .map_page = arm_dma_map_page, 189 .unmap_page = arm_dma_unmap_page, 190 .map_sg = arm_dma_map_sg, 191 .unmap_sg = arm_dma_unmap_sg, 192 .sync_single_for_cpu = arm_dma_sync_single_for_cpu, 193 .sync_single_for_device = arm_dma_sync_single_for_device, 194 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 195 .sync_sg_for_device = arm_dma_sync_sg_for_device, 196 }; 197 EXPORT_SYMBOL(arm_dma_ops); 198 199 static void *arm_coherent_dma_alloc(struct device *dev, size_t size, 200 dma_addr_t *handle, gfp_t gfp, unsigned long attrs); 201 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, 202 dma_addr_t handle, unsigned long attrs); 203 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, 204 void *cpu_addr, dma_addr_t dma_addr, size_t size, 205 unsigned long attrs); 206 207 struct dma_map_ops arm_coherent_dma_ops = { 208 .alloc = arm_coherent_dma_alloc, 209 .free = arm_coherent_dma_free, 210 .mmap = arm_coherent_dma_mmap, 211 .get_sgtable = arm_dma_get_sgtable, 212 .map_page = arm_coherent_dma_map_page, 213 .map_sg = arm_dma_map_sg, 214 }; 215 EXPORT_SYMBOL(arm_coherent_dma_ops); 216 217 static int __dma_supported(struct device *dev, u64 mask, bool warn) 218 { 219 unsigned long max_dma_pfn; 220 221 /* 222 * If the mask allows for more memory than we can address, 223 * and we actually have that much memory, then we must 224 * indicate that DMA to this device is not supported. 225 */ 226 if (sizeof(mask) != sizeof(dma_addr_t) && 227 mask > (dma_addr_t)~0 && 228 dma_to_pfn(dev, ~0) < max_pfn - 1) { 229 if (warn) { 230 dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", 231 mask); 232 dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); 233 } 234 return 0; 235 } 236 237 max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); 238 239 /* 240 * Translate the device's DMA mask to a PFN limit. This 241 * PFN number includes the page which we can DMA to. 242 */ 243 if (dma_to_pfn(dev, mask) < max_dma_pfn) { 244 if (warn) 245 dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", 246 mask, 247 dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, 248 max_dma_pfn + 1); 249 return 0; 250 } 251 252 return 1; 253 } 254 255 static u64 get_coherent_dma_mask(struct device *dev) 256 { 257 u64 mask = (u64)DMA_BIT_MASK(32); 258 259 if (dev) { 260 mask = dev->coherent_dma_mask; 261 262 /* 263 * Sanity check the DMA mask - it must be non-zero, and 264 * must be able to be satisfied by a DMA allocation. 265 */ 266 if (mask == 0) { 267 dev_warn(dev, "coherent DMA mask is unset\n"); 268 return 0; 269 } 270 271 if (!__dma_supported(dev, mask, true)) 272 return 0; 273 } 274 275 return mask; 276 } 277 278 static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) 279 { 280 /* 281 * Ensure that the allocated pages are zeroed, and that any data 282 * lurking in the kernel direct-mapped region is invalidated. 283 */ 284 if (PageHighMem(page)) { 285 phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); 286 phys_addr_t end = base + size; 287 while (size > 0) { 288 void *ptr = kmap_atomic(page); 289 memset(ptr, 0, PAGE_SIZE); 290 if (coherent_flag != COHERENT) 291 dmac_flush_range(ptr, ptr + PAGE_SIZE); 292 kunmap_atomic(ptr); 293 page++; 294 size -= PAGE_SIZE; 295 } 296 if (coherent_flag != COHERENT) 297 outer_flush_range(base, end); 298 } else { 299 void *ptr = page_address(page); 300 memset(ptr, 0, size); 301 if (coherent_flag != COHERENT) { 302 dmac_flush_range(ptr, ptr + size); 303 outer_flush_range(__pa(ptr), __pa(ptr) + size); 304 } 305 } 306 } 307 308 /* 309 * Allocate a DMA buffer for 'dev' of size 'size' using the 310 * specified gfp mask. Note that 'size' must be page aligned. 311 */ 312 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, 313 gfp_t gfp, int coherent_flag) 314 { 315 unsigned long order = get_order(size); 316 struct page *page, *p, *e; 317 318 page = alloc_pages(gfp, order); 319 if (!page) 320 return NULL; 321 322 /* 323 * Now split the huge page and free the excess pages 324 */ 325 split_page(page, order); 326 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 327 __free_page(p); 328 329 __dma_clear_buffer(page, size, coherent_flag); 330 331 return page; 332 } 333 334 /* 335 * Free a DMA buffer. 'size' must be page aligned. 336 */ 337 static void __dma_free_buffer(struct page *page, size_t size) 338 { 339 struct page *e = page + (size >> PAGE_SHIFT); 340 341 while (page < e) { 342 __free_page(page); 343 page++; 344 } 345 } 346 347 #ifdef CONFIG_MMU 348 349 static void *__alloc_from_contiguous(struct device *dev, size_t size, 350 pgprot_t prot, struct page **ret_page, 351 const void *caller, bool want_vaddr, 352 int coherent_flag); 353 354 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 355 pgprot_t prot, struct page **ret_page, 356 const void *caller, bool want_vaddr); 357 358 static void * 359 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, 360 const void *caller) 361 { 362 /* 363 * DMA allocation can be mapped to user space, so lets 364 * set VM_USERMAP flags too. 365 */ 366 return dma_common_contiguous_remap(page, size, 367 VM_ARM_DMA_CONSISTENT | VM_USERMAP, 368 prot, caller); 369 } 370 371 static void __dma_free_remap(void *cpu_addr, size_t size) 372 { 373 dma_common_free_remap(cpu_addr, size, 374 VM_ARM_DMA_CONSISTENT | VM_USERMAP); 375 } 376 377 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 378 static struct gen_pool *atomic_pool; 379 380 static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; 381 382 static int __init early_coherent_pool(char *p) 383 { 384 atomic_pool_size = memparse(p, &p); 385 return 0; 386 } 387 early_param("coherent_pool", early_coherent_pool); 388 389 void __init init_dma_coherent_pool_size(unsigned long size) 390 { 391 /* 392 * Catch any attempt to set the pool size too late. 393 */ 394 BUG_ON(atomic_pool); 395 396 /* 397 * Set architecture specific coherent pool size only if 398 * it has not been changed by kernel command line parameter. 399 */ 400 if (atomic_pool_size == DEFAULT_DMA_COHERENT_POOL_SIZE) 401 atomic_pool_size = size; 402 } 403 404 /* 405 * Initialise the coherent pool for atomic allocations. 406 */ 407 static int __init atomic_pool_init(void) 408 { 409 pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); 410 gfp_t gfp = GFP_KERNEL | GFP_DMA; 411 struct page *page; 412 void *ptr; 413 414 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 415 if (!atomic_pool) 416 goto out; 417 /* 418 * The atomic pool is only used for non-coherent allocations 419 * so we must pass NORMAL for coherent_flag. 420 */ 421 if (dev_get_cma_area(NULL)) 422 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, 423 &page, atomic_pool_init, true, NORMAL); 424 else 425 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, 426 &page, atomic_pool_init, true); 427 if (ptr) { 428 int ret; 429 430 ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, 431 page_to_phys(page), 432 atomic_pool_size, -1); 433 if (ret) 434 goto destroy_genpool; 435 436 gen_pool_set_algo(atomic_pool, 437 gen_pool_first_fit_order_align, 438 (void *)PAGE_SHIFT); 439 pr_info("DMA: preallocated %zd KiB pool for atomic coherent allocations\n", 440 atomic_pool_size / 1024); 441 return 0; 442 } 443 444 destroy_genpool: 445 gen_pool_destroy(atomic_pool); 446 atomic_pool = NULL; 447 out: 448 pr_err("DMA: failed to allocate %zx KiB pool for atomic coherent allocation\n", 449 atomic_pool_size / 1024); 450 return -ENOMEM; 451 } 452 /* 453 * CMA is activated by core_initcall, so we must be called after it. 454 */ 455 postcore_initcall(atomic_pool_init); 456 457 struct dma_contig_early_reserve { 458 phys_addr_t base; 459 unsigned long size; 460 }; 461 462 static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; 463 464 static int dma_mmu_remap_num __initdata; 465 466 void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) 467 { 468 dma_mmu_remap[dma_mmu_remap_num].base = base; 469 dma_mmu_remap[dma_mmu_remap_num].size = size; 470 dma_mmu_remap_num++; 471 } 472 473 void __init dma_contiguous_remap(void) 474 { 475 int i; 476 for (i = 0; i < dma_mmu_remap_num; i++) { 477 phys_addr_t start = dma_mmu_remap[i].base; 478 phys_addr_t end = start + dma_mmu_remap[i].size; 479 struct map_desc map; 480 unsigned long addr; 481 482 if (end > arm_lowmem_limit) 483 end = arm_lowmem_limit; 484 if (start >= end) 485 continue; 486 487 map.pfn = __phys_to_pfn(start); 488 map.virtual = __phys_to_virt(start); 489 map.length = end - start; 490 map.type = MT_MEMORY_DMA_READY; 491 492 /* 493 * Clear previous low-memory mapping to ensure that the 494 * TLB does not see any conflicting entries, then flush 495 * the TLB of the old entries before creating new mappings. 496 * 497 * This ensures that any speculatively loaded TLB entries 498 * (even though they may be rare) can not cause any problems, 499 * and ensures that this code is architecturally compliant. 500 */ 501 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); 502 addr += PMD_SIZE) 503 pmd_clear(pmd_off_k(addr)); 504 505 flush_tlb_kernel_range(__phys_to_virt(start), 506 __phys_to_virt(end)); 507 508 iotable_init(&map, 1); 509 } 510 } 511 512 static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, 513 void *data) 514 { 515 struct page *page = virt_to_page(addr); 516 pgprot_t prot = *(pgprot_t *)data; 517 518 set_pte_ext(pte, mk_pte(page, prot), 0); 519 return 0; 520 } 521 522 static void __dma_remap(struct page *page, size_t size, pgprot_t prot) 523 { 524 unsigned long start = (unsigned long) page_address(page); 525 unsigned end = start + size; 526 527 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); 528 flush_tlb_kernel_range(start, end); 529 } 530 531 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 532 pgprot_t prot, struct page **ret_page, 533 const void *caller, bool want_vaddr) 534 { 535 struct page *page; 536 void *ptr = NULL; 537 /* 538 * __alloc_remap_buffer is only called when the device is 539 * non-coherent 540 */ 541 page = __dma_alloc_buffer(dev, size, gfp, NORMAL); 542 if (!page) 543 return NULL; 544 if (!want_vaddr) 545 goto out; 546 547 ptr = __dma_alloc_remap(page, size, gfp, prot, caller); 548 if (!ptr) { 549 __dma_free_buffer(page, size); 550 return NULL; 551 } 552 553 out: 554 *ret_page = page; 555 return ptr; 556 } 557 558 static void *__alloc_from_pool(size_t size, struct page **ret_page) 559 { 560 unsigned long val; 561 void *ptr = NULL; 562 563 if (!atomic_pool) { 564 WARN(1, "coherent pool not initialised!\n"); 565 return NULL; 566 } 567 568 val = gen_pool_alloc(atomic_pool, size); 569 if (val) { 570 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 571 572 *ret_page = phys_to_page(phys); 573 ptr = (void *)val; 574 } 575 576 return ptr; 577 } 578 579 static bool __in_atomic_pool(void *start, size_t size) 580 { 581 return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); 582 } 583 584 static int __free_from_pool(void *start, size_t size) 585 { 586 if (!__in_atomic_pool(start, size)) 587 return 0; 588 589 gen_pool_free(atomic_pool, (unsigned long)start, size); 590 591 return 1; 592 } 593 594 static void *__alloc_from_contiguous(struct device *dev, size_t size, 595 pgprot_t prot, struct page **ret_page, 596 const void *caller, bool want_vaddr, 597 int coherent_flag) 598 { 599 unsigned long order = get_order(size); 600 size_t count = size >> PAGE_SHIFT; 601 struct page *page; 602 void *ptr = NULL; 603 604 page = dma_alloc_from_contiguous(dev, count, order); 605 if (!page) 606 return NULL; 607 608 __dma_clear_buffer(page, size, coherent_flag); 609 610 if (!want_vaddr) 611 goto out; 612 613 if (PageHighMem(page)) { 614 ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); 615 if (!ptr) { 616 dma_release_from_contiguous(dev, page, count); 617 return NULL; 618 } 619 } else { 620 __dma_remap(page, size, prot); 621 ptr = page_address(page); 622 } 623 624 out: 625 *ret_page = page; 626 return ptr; 627 } 628 629 static void __free_from_contiguous(struct device *dev, struct page *page, 630 void *cpu_addr, size_t size, bool want_vaddr) 631 { 632 if (want_vaddr) { 633 if (PageHighMem(page)) 634 __dma_free_remap(cpu_addr, size); 635 else 636 __dma_remap(page, size, PAGE_KERNEL); 637 } 638 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); 639 } 640 641 static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) 642 { 643 prot = (attrs & DMA_ATTR_WRITE_COMBINE) ? 644 pgprot_writecombine(prot) : 645 pgprot_dmacoherent(prot); 646 return prot; 647 } 648 649 #define nommu() 0 650 651 #else /* !CONFIG_MMU */ 652 653 #define nommu() 1 654 655 #define __get_dma_pgprot(attrs, prot) __pgprot(0) 656 #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL 657 #define __alloc_from_pool(size, ret_page) NULL 658 #define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL 659 #define __free_from_pool(cpu_addr, size) do { } while (0) 660 #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) 661 #define __dma_free_remap(cpu_addr, size) do { } while (0) 662 663 #endif /* CONFIG_MMU */ 664 665 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, 666 struct page **ret_page) 667 { 668 struct page *page; 669 /* __alloc_simple_buffer is only called when the device is coherent */ 670 page = __dma_alloc_buffer(dev, size, gfp, COHERENT); 671 if (!page) 672 return NULL; 673 674 *ret_page = page; 675 return page_address(page); 676 } 677 678 static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, 679 struct page **ret_page) 680 { 681 return __alloc_simple_buffer(args->dev, args->size, args->gfp, 682 ret_page); 683 } 684 685 static void simple_allocator_free(struct arm_dma_free_args *args) 686 { 687 __dma_free_buffer(args->page, args->size); 688 } 689 690 static struct arm_dma_allocator simple_allocator = { 691 .alloc = simple_allocator_alloc, 692 .free = simple_allocator_free, 693 }; 694 695 static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, 696 struct page **ret_page) 697 { 698 return __alloc_from_contiguous(args->dev, args->size, args->prot, 699 ret_page, args->caller, 700 args->want_vaddr, args->coherent_flag); 701 } 702 703 static void cma_allocator_free(struct arm_dma_free_args *args) 704 { 705 __free_from_contiguous(args->dev, args->page, args->cpu_addr, 706 args->size, args->want_vaddr); 707 } 708 709 static struct arm_dma_allocator cma_allocator = { 710 .alloc = cma_allocator_alloc, 711 .free = cma_allocator_free, 712 }; 713 714 static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, 715 struct page **ret_page) 716 { 717 return __alloc_from_pool(args->size, ret_page); 718 } 719 720 static void pool_allocator_free(struct arm_dma_free_args *args) 721 { 722 __free_from_pool(args->cpu_addr, args->size); 723 } 724 725 static struct arm_dma_allocator pool_allocator = { 726 .alloc = pool_allocator_alloc, 727 .free = pool_allocator_free, 728 }; 729 730 static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, 731 struct page **ret_page) 732 { 733 return __alloc_remap_buffer(args->dev, args->size, args->gfp, 734 args->prot, ret_page, args->caller, 735 args->want_vaddr); 736 } 737 738 static void remap_allocator_free(struct arm_dma_free_args *args) 739 { 740 if (args->want_vaddr) 741 __dma_free_remap(args->cpu_addr, args->size); 742 743 __dma_free_buffer(args->page, args->size); 744 } 745 746 static struct arm_dma_allocator remap_allocator = { 747 .alloc = remap_allocator_alloc, 748 .free = remap_allocator_free, 749 }; 750 751 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 752 gfp_t gfp, pgprot_t prot, bool is_coherent, 753 unsigned long attrs, const void *caller) 754 { 755 u64 mask = get_coherent_dma_mask(dev); 756 struct page *page = NULL; 757 void *addr; 758 bool allowblock, cma; 759 struct arm_dma_buffer *buf; 760 struct arm_dma_alloc_args args = { 761 .dev = dev, 762 .size = PAGE_ALIGN(size), 763 .gfp = gfp, 764 .prot = prot, 765 .caller = caller, 766 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 767 .coherent_flag = is_coherent ? COHERENT : NORMAL, 768 }; 769 770 #ifdef CONFIG_DMA_API_DEBUG 771 u64 limit = (mask + 1) & ~mask; 772 if (limit && size >= limit) { 773 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 774 size, mask); 775 return NULL; 776 } 777 #endif 778 779 if (!mask) 780 return NULL; 781 782 buf = kzalloc(sizeof(*buf), 783 gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)); 784 if (!buf) 785 return NULL; 786 787 if (mask < 0xffffffffULL) 788 gfp |= GFP_DMA; 789 790 /* 791 * Following is a work-around (a.k.a. hack) to prevent pages 792 * with __GFP_COMP being passed to split_page() which cannot 793 * handle them. The real problem is that this flag probably 794 * should be 0 on ARM as it is not supported on this 795 * platform; see CONFIG_HUGETLBFS. 796 */ 797 gfp &= ~(__GFP_COMP); 798 args.gfp = gfp; 799 800 *handle = DMA_ERROR_CODE; 801 allowblock = gfpflags_allow_blocking(gfp); 802 cma = allowblock ? dev_get_cma_area(dev) : false; 803 804 if (cma) 805 buf->allocator = &cma_allocator; 806 else if (nommu() || is_coherent) 807 buf->allocator = &simple_allocator; 808 else if (allowblock) 809 buf->allocator = &remap_allocator; 810 else 811 buf->allocator = &pool_allocator; 812 813 addr = buf->allocator->alloc(&args, &page); 814 815 if (page) { 816 unsigned long flags; 817 818 *handle = pfn_to_dma(dev, page_to_pfn(page)); 819 buf->virt = args.want_vaddr ? addr : page; 820 821 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 822 list_add(&buf->list, &arm_dma_bufs); 823 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 824 } else { 825 kfree(buf); 826 } 827 828 return args.want_vaddr ? addr : page; 829 } 830 831 /* 832 * Allocate DMA-coherent memory space and return both the kernel remapped 833 * virtual and bus address for that space. 834 */ 835 void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 836 gfp_t gfp, unsigned long attrs) 837 { 838 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 839 840 return __dma_alloc(dev, size, handle, gfp, prot, false, 841 attrs, __builtin_return_address(0)); 842 } 843 844 static void *arm_coherent_dma_alloc(struct device *dev, size_t size, 845 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 846 { 847 return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, 848 attrs, __builtin_return_address(0)); 849 } 850 851 static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 852 void *cpu_addr, dma_addr_t dma_addr, size_t size, 853 unsigned long attrs) 854 { 855 int ret = -ENXIO; 856 #ifdef CONFIG_MMU 857 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 858 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 859 unsigned long pfn = dma_to_pfn(dev, dma_addr); 860 unsigned long off = vma->vm_pgoff; 861 862 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 863 return ret; 864 865 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 866 ret = remap_pfn_range(vma, vma->vm_start, 867 pfn + off, 868 vma->vm_end - vma->vm_start, 869 vma->vm_page_prot); 870 } 871 #endif /* CONFIG_MMU */ 872 873 return ret; 874 } 875 876 /* 877 * Create userspace mapping for the DMA-coherent memory. 878 */ 879 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, 880 void *cpu_addr, dma_addr_t dma_addr, size_t size, 881 unsigned long attrs) 882 { 883 return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 884 } 885 886 int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 887 void *cpu_addr, dma_addr_t dma_addr, size_t size, 888 unsigned long attrs) 889 { 890 #ifdef CONFIG_MMU 891 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 892 #endif /* CONFIG_MMU */ 893 return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 894 } 895 896 /* 897 * Free a buffer as defined by the above mapping. 898 */ 899 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 900 dma_addr_t handle, unsigned long attrs, 901 bool is_coherent) 902 { 903 struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); 904 struct arm_dma_buffer *buf; 905 struct arm_dma_free_args args = { 906 .dev = dev, 907 .size = PAGE_ALIGN(size), 908 .cpu_addr = cpu_addr, 909 .page = page, 910 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 911 }; 912 913 buf = arm_dma_buffer_find(cpu_addr); 914 if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) 915 return; 916 917 buf->allocator->free(&args); 918 kfree(buf); 919 } 920 921 void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 922 dma_addr_t handle, unsigned long attrs) 923 { 924 __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); 925 } 926 927 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, 928 dma_addr_t handle, unsigned long attrs) 929 { 930 __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); 931 } 932 933 int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 934 void *cpu_addr, dma_addr_t handle, size_t size, 935 unsigned long attrs) 936 { 937 struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); 938 int ret; 939 940 ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 941 if (unlikely(ret)) 942 return ret; 943 944 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 945 return 0; 946 } 947 948 static void dma_cache_maint_page(struct page *page, unsigned long offset, 949 size_t size, enum dma_data_direction dir, 950 void (*op)(const void *, size_t, int)) 951 { 952 unsigned long pfn; 953 size_t left = size; 954 955 pfn = page_to_pfn(page) + offset / PAGE_SIZE; 956 offset %= PAGE_SIZE; 957 958 /* 959 * A single sg entry may refer to multiple physically contiguous 960 * pages. But we still need to process highmem pages individually. 961 * If highmem is not configured then the bulk of this loop gets 962 * optimized out. 963 */ 964 do { 965 size_t len = left; 966 void *vaddr; 967 968 page = pfn_to_page(pfn); 969 970 if (PageHighMem(page)) { 971 if (len + offset > PAGE_SIZE) 972 len = PAGE_SIZE - offset; 973 974 if (cache_is_vipt_nonaliasing()) { 975 vaddr = kmap_atomic(page); 976 op(vaddr + offset, len, dir); 977 kunmap_atomic(vaddr); 978 } else { 979 vaddr = kmap_high_get(page); 980 if (vaddr) { 981 op(vaddr + offset, len, dir); 982 kunmap_high(page); 983 } 984 } 985 } else { 986 vaddr = page_address(page) + offset; 987 op(vaddr, len, dir); 988 } 989 offset = 0; 990 pfn++; 991 left -= len; 992 } while (left); 993 } 994 995 /* 996 * Make an area consistent for devices. 997 * Note: Drivers should NOT use this function directly, as it will break 998 * platforms with CONFIG_DMABOUNCE. 999 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 1000 */ 1001 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, 1002 size_t size, enum dma_data_direction dir) 1003 { 1004 phys_addr_t paddr; 1005 1006 dma_cache_maint_page(page, off, size, dir, dmac_map_area); 1007 1008 paddr = page_to_phys(page) + off; 1009 if (dir == DMA_FROM_DEVICE) { 1010 outer_inv_range(paddr, paddr + size); 1011 } else { 1012 outer_clean_range(paddr, paddr + size); 1013 } 1014 /* FIXME: non-speculating: flush on bidirectional mappings? */ 1015 } 1016 1017 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, 1018 size_t size, enum dma_data_direction dir) 1019 { 1020 phys_addr_t paddr = page_to_phys(page) + off; 1021 1022 /* FIXME: non-speculating: not required */ 1023 /* in any case, don't bother invalidating if DMA to device */ 1024 if (dir != DMA_TO_DEVICE) { 1025 outer_inv_range(paddr, paddr + size); 1026 1027 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); 1028 } 1029 1030 /* 1031 * Mark the D-cache clean for these pages to avoid extra flushing. 1032 */ 1033 if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { 1034 unsigned long pfn; 1035 size_t left = size; 1036 1037 pfn = page_to_pfn(page) + off / PAGE_SIZE; 1038 off %= PAGE_SIZE; 1039 if (off) { 1040 pfn++; 1041 left -= PAGE_SIZE - off; 1042 } 1043 while (left >= PAGE_SIZE) { 1044 page = pfn_to_page(pfn++); 1045 set_bit(PG_dcache_clean, &page->flags); 1046 left -= PAGE_SIZE; 1047 } 1048 } 1049 } 1050 1051 /** 1052 * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA 1053 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1054 * @sg: list of buffers 1055 * @nents: number of buffers to map 1056 * @dir: DMA transfer direction 1057 * 1058 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1059 * This is the scatter-gather version of the dma_map_single interface. 1060 * Here the scatter gather list elements are each tagged with the 1061 * appropriate dma address and length. They are obtained via 1062 * sg_dma_{address,length}. 1063 * 1064 * Device ownership issues as mentioned for dma_map_single are the same 1065 * here. 1066 */ 1067 int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1068 enum dma_data_direction dir, unsigned long attrs) 1069 { 1070 struct dma_map_ops *ops = get_dma_ops(dev); 1071 struct scatterlist *s; 1072 int i, j; 1073 1074 for_each_sg(sg, s, nents, i) { 1075 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1076 s->dma_length = s->length; 1077 #endif 1078 s->dma_address = ops->map_page(dev, sg_page(s), s->offset, 1079 s->length, dir, attrs); 1080 if (dma_mapping_error(dev, s->dma_address)) 1081 goto bad_mapping; 1082 } 1083 return nents; 1084 1085 bad_mapping: 1086 for_each_sg(sg, s, i, j) 1087 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 1088 return 0; 1089 } 1090 1091 /** 1092 * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1093 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1094 * @sg: list of buffers 1095 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1096 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1097 * 1098 * Unmap a set of streaming mode DMA translations. Again, CPU access 1099 * rules concerning calls here are the same as for dma_unmap_single(). 1100 */ 1101 void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 1102 enum dma_data_direction dir, unsigned long attrs) 1103 { 1104 struct dma_map_ops *ops = get_dma_ops(dev); 1105 struct scatterlist *s; 1106 1107 int i; 1108 1109 for_each_sg(sg, s, nents, i) 1110 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 1111 } 1112 1113 /** 1114 * arm_dma_sync_sg_for_cpu 1115 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1116 * @sg: list of buffers 1117 * @nents: number of buffers to map (returned from dma_map_sg) 1118 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1119 */ 1120 void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 1121 int nents, enum dma_data_direction dir) 1122 { 1123 struct dma_map_ops *ops = get_dma_ops(dev); 1124 struct scatterlist *s; 1125 int i; 1126 1127 for_each_sg(sg, s, nents, i) 1128 ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, 1129 dir); 1130 } 1131 1132 /** 1133 * arm_dma_sync_sg_for_device 1134 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1135 * @sg: list of buffers 1136 * @nents: number of buffers to map (returned from dma_map_sg) 1137 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1138 */ 1139 void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 1140 int nents, enum dma_data_direction dir) 1141 { 1142 struct dma_map_ops *ops = get_dma_ops(dev); 1143 struct scatterlist *s; 1144 int i; 1145 1146 for_each_sg(sg, s, nents, i) 1147 ops->sync_single_for_device(dev, sg_dma_address(s), s->length, 1148 dir); 1149 } 1150 1151 /* 1152 * Return whether the given device DMA address mask can be supported 1153 * properly. For example, if your device can only drive the low 24-bits 1154 * during bus mastering, then you would pass 0x00ffffff as the mask 1155 * to this function. 1156 */ 1157 int dma_supported(struct device *dev, u64 mask) 1158 { 1159 return __dma_supported(dev, mask, false); 1160 } 1161 EXPORT_SYMBOL(dma_supported); 1162 1163 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 1164 1165 static int __init dma_debug_do_init(void) 1166 { 1167 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 1168 return 0; 1169 } 1170 fs_initcall(dma_debug_do_init); 1171 1172 #ifdef CONFIG_ARM_DMA_USE_IOMMU 1173 1174 /* IOMMU */ 1175 1176 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); 1177 1178 static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, 1179 size_t size) 1180 { 1181 unsigned int order = get_order(size); 1182 unsigned int align = 0; 1183 unsigned int count, start; 1184 size_t mapping_size = mapping->bits << PAGE_SHIFT; 1185 unsigned long flags; 1186 dma_addr_t iova; 1187 int i; 1188 1189 if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) 1190 order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; 1191 1192 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1193 align = (1 << order) - 1; 1194 1195 spin_lock_irqsave(&mapping->lock, flags); 1196 for (i = 0; i < mapping->nr_bitmaps; i++) { 1197 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 1198 mapping->bits, 0, count, align); 1199 1200 if (start > mapping->bits) 1201 continue; 1202 1203 bitmap_set(mapping->bitmaps[i], start, count); 1204 break; 1205 } 1206 1207 /* 1208 * No unused range found. Try to extend the existing mapping 1209 * and perform a second attempt to reserve an IO virtual 1210 * address range of size bytes. 1211 */ 1212 if (i == mapping->nr_bitmaps) { 1213 if (extend_iommu_mapping(mapping)) { 1214 spin_unlock_irqrestore(&mapping->lock, flags); 1215 return DMA_ERROR_CODE; 1216 } 1217 1218 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 1219 mapping->bits, 0, count, align); 1220 1221 if (start > mapping->bits) { 1222 spin_unlock_irqrestore(&mapping->lock, flags); 1223 return DMA_ERROR_CODE; 1224 } 1225 1226 bitmap_set(mapping->bitmaps[i], start, count); 1227 } 1228 spin_unlock_irqrestore(&mapping->lock, flags); 1229 1230 iova = mapping->base + (mapping_size * i); 1231 iova += start << PAGE_SHIFT; 1232 1233 return iova; 1234 } 1235 1236 static inline void __free_iova(struct dma_iommu_mapping *mapping, 1237 dma_addr_t addr, size_t size) 1238 { 1239 unsigned int start, count; 1240 size_t mapping_size = mapping->bits << PAGE_SHIFT; 1241 unsigned long flags; 1242 dma_addr_t bitmap_base; 1243 u32 bitmap_index; 1244 1245 if (!size) 1246 return; 1247 1248 bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; 1249 BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); 1250 1251 bitmap_base = mapping->base + mapping_size * bitmap_index; 1252 1253 start = (addr - bitmap_base) >> PAGE_SHIFT; 1254 1255 if (addr + size > bitmap_base + mapping_size) { 1256 /* 1257 * The address range to be freed reaches into the iova 1258 * range of the next bitmap. This should not happen as 1259 * we don't allow this in __alloc_iova (at the 1260 * moment). 1261 */ 1262 BUG(); 1263 } else 1264 count = size >> PAGE_SHIFT; 1265 1266 spin_lock_irqsave(&mapping->lock, flags); 1267 bitmap_clear(mapping->bitmaps[bitmap_index], start, count); 1268 spin_unlock_irqrestore(&mapping->lock, flags); 1269 } 1270 1271 /* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ 1272 static const int iommu_order_array[] = { 9, 8, 4, 0 }; 1273 1274 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, 1275 gfp_t gfp, unsigned long attrs, 1276 int coherent_flag) 1277 { 1278 struct page **pages; 1279 int count = size >> PAGE_SHIFT; 1280 int array_size = count * sizeof(struct page *); 1281 int i = 0; 1282 int order_idx = 0; 1283 1284 if (array_size <= PAGE_SIZE) 1285 pages = kzalloc(array_size, GFP_KERNEL); 1286 else 1287 pages = vzalloc(array_size); 1288 if (!pages) 1289 return NULL; 1290 1291 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) 1292 { 1293 unsigned long order = get_order(size); 1294 struct page *page; 1295 1296 page = dma_alloc_from_contiguous(dev, count, order); 1297 if (!page) 1298 goto error; 1299 1300 __dma_clear_buffer(page, size, coherent_flag); 1301 1302 for (i = 0; i < count; i++) 1303 pages[i] = page + i; 1304 1305 return pages; 1306 } 1307 1308 /* Go straight to 4K chunks if caller says it's OK. */ 1309 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 1310 order_idx = ARRAY_SIZE(iommu_order_array) - 1; 1311 1312 /* 1313 * IOMMU can map any pages, so himem can also be used here 1314 */ 1315 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 1316 1317 while (count) { 1318 int j, order; 1319 1320 order = iommu_order_array[order_idx]; 1321 1322 /* Drop down when we get small */ 1323 if (__fls(count) < order) { 1324 order_idx++; 1325 continue; 1326 } 1327 1328 if (order) { 1329 /* See if it's easy to allocate a high-order chunk */ 1330 pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); 1331 1332 /* Go down a notch at first sign of pressure */ 1333 if (!pages[i]) { 1334 order_idx++; 1335 continue; 1336 } 1337 } else { 1338 pages[i] = alloc_pages(gfp, 0); 1339 if (!pages[i]) 1340 goto error; 1341 } 1342 1343 if (order) { 1344 split_page(pages[i], order); 1345 j = 1 << order; 1346 while (--j) 1347 pages[i + j] = pages[i] + j; 1348 } 1349 1350 __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag); 1351 i += 1 << order; 1352 count -= 1 << order; 1353 } 1354 1355 return pages; 1356 error: 1357 while (i--) 1358 if (pages[i]) 1359 __free_pages(pages[i], 0); 1360 kvfree(pages); 1361 return NULL; 1362 } 1363 1364 static int __iommu_free_buffer(struct device *dev, struct page **pages, 1365 size_t size, unsigned long attrs) 1366 { 1367 int count = size >> PAGE_SHIFT; 1368 int i; 1369 1370 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 1371 dma_release_from_contiguous(dev, pages[0], count); 1372 } else { 1373 for (i = 0; i < count; i++) 1374 if (pages[i]) 1375 __free_pages(pages[i], 0); 1376 } 1377 1378 kvfree(pages); 1379 return 0; 1380 } 1381 1382 /* 1383 * Create a CPU mapping for a specified pages 1384 */ 1385 static void * 1386 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, 1387 const void *caller) 1388 { 1389 return dma_common_pages_remap(pages, size, 1390 VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); 1391 } 1392 1393 /* 1394 * Create a mapping in device IO address space for specified pages 1395 */ 1396 static dma_addr_t 1397 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) 1398 { 1399 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1400 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1401 dma_addr_t dma_addr, iova; 1402 int i; 1403 1404 dma_addr = __alloc_iova(mapping, size); 1405 if (dma_addr == DMA_ERROR_CODE) 1406 return dma_addr; 1407 1408 iova = dma_addr; 1409 for (i = 0; i < count; ) { 1410 int ret; 1411 1412 unsigned int next_pfn = page_to_pfn(pages[i]) + 1; 1413 phys_addr_t phys = page_to_phys(pages[i]); 1414 unsigned int len, j; 1415 1416 for (j = i + 1; j < count; j++, next_pfn++) 1417 if (page_to_pfn(pages[j]) != next_pfn) 1418 break; 1419 1420 len = (j - i) << PAGE_SHIFT; 1421 ret = iommu_map(mapping->domain, iova, phys, len, 1422 IOMMU_READ|IOMMU_WRITE); 1423 if (ret < 0) 1424 goto fail; 1425 iova += len; 1426 i = j; 1427 } 1428 return dma_addr; 1429 fail: 1430 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); 1431 __free_iova(mapping, dma_addr, size); 1432 return DMA_ERROR_CODE; 1433 } 1434 1435 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) 1436 { 1437 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1438 1439 /* 1440 * add optional in-page offset from iova to size and align 1441 * result to page size 1442 */ 1443 size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); 1444 iova &= PAGE_MASK; 1445 1446 iommu_unmap(mapping->domain, iova, size); 1447 __free_iova(mapping, iova, size); 1448 return 0; 1449 } 1450 1451 static struct page **__atomic_get_pages(void *addr) 1452 { 1453 struct page *page; 1454 phys_addr_t phys; 1455 1456 phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); 1457 page = phys_to_page(phys); 1458 1459 return (struct page **)page; 1460 } 1461 1462 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) 1463 { 1464 struct vm_struct *area; 1465 1466 if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) 1467 return __atomic_get_pages(cpu_addr); 1468 1469 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1470 return cpu_addr; 1471 1472 area = find_vm_area(cpu_addr); 1473 if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) 1474 return area->pages; 1475 return NULL; 1476 } 1477 1478 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, 1479 dma_addr_t *handle, int coherent_flag) 1480 { 1481 struct page *page; 1482 void *addr; 1483 1484 if (coherent_flag == COHERENT) 1485 addr = __alloc_simple_buffer(dev, size, gfp, &page); 1486 else 1487 addr = __alloc_from_pool(size, &page); 1488 if (!addr) 1489 return NULL; 1490 1491 *handle = __iommu_create_mapping(dev, &page, size); 1492 if (*handle == DMA_ERROR_CODE) 1493 goto err_mapping; 1494 1495 return addr; 1496 1497 err_mapping: 1498 __free_from_pool(addr, size); 1499 return NULL; 1500 } 1501 1502 static void __iommu_free_atomic(struct device *dev, void *cpu_addr, 1503 dma_addr_t handle, size_t size, int coherent_flag) 1504 { 1505 __iommu_remove_mapping(dev, handle, size); 1506 if (coherent_flag == COHERENT) 1507 __dma_free_buffer(virt_to_page(cpu_addr), size); 1508 else 1509 __free_from_pool(cpu_addr, size); 1510 } 1511 1512 static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, 1513 dma_addr_t *handle, gfp_t gfp, unsigned long attrs, 1514 int coherent_flag) 1515 { 1516 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 1517 struct page **pages; 1518 void *addr = NULL; 1519 1520 *handle = DMA_ERROR_CODE; 1521 size = PAGE_ALIGN(size); 1522 1523 if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) 1524 return __iommu_alloc_simple(dev, size, gfp, handle, 1525 coherent_flag); 1526 1527 /* 1528 * Following is a work-around (a.k.a. hack) to prevent pages 1529 * with __GFP_COMP being passed to split_page() which cannot 1530 * handle them. The real problem is that this flag probably 1531 * should be 0 on ARM as it is not supported on this 1532 * platform; see CONFIG_HUGETLBFS. 1533 */ 1534 gfp &= ~(__GFP_COMP); 1535 1536 pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); 1537 if (!pages) 1538 return NULL; 1539 1540 *handle = __iommu_create_mapping(dev, pages, size); 1541 if (*handle == DMA_ERROR_CODE) 1542 goto err_buffer; 1543 1544 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1545 return pages; 1546 1547 addr = __iommu_alloc_remap(pages, size, gfp, prot, 1548 __builtin_return_address(0)); 1549 if (!addr) 1550 goto err_mapping; 1551 1552 return addr; 1553 1554 err_mapping: 1555 __iommu_remove_mapping(dev, *handle, size); 1556 err_buffer: 1557 __iommu_free_buffer(dev, pages, size, attrs); 1558 return NULL; 1559 } 1560 1561 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1562 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1563 { 1564 return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL); 1565 } 1566 1567 static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size, 1568 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1569 { 1570 return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT); 1571 } 1572 1573 static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1574 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1575 unsigned long attrs) 1576 { 1577 unsigned long uaddr = vma->vm_start; 1578 unsigned long usize = vma->vm_end - vma->vm_start; 1579 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1580 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1581 unsigned long off = vma->vm_pgoff; 1582 1583 if (!pages) 1584 return -ENXIO; 1585 1586 if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) 1587 return -ENXIO; 1588 1589 pages += off; 1590 1591 do { 1592 int ret = vm_insert_page(vma, uaddr, *pages++); 1593 if (ret) { 1594 pr_err("Remapping memory failed: %d\n", ret); 1595 return ret; 1596 } 1597 uaddr += PAGE_SIZE; 1598 usize -= PAGE_SIZE; 1599 } while (usize > 0); 1600 1601 return 0; 1602 } 1603 static int arm_iommu_mmap_attrs(struct device *dev, 1604 struct vm_area_struct *vma, void *cpu_addr, 1605 dma_addr_t dma_addr, size_t size, unsigned long attrs) 1606 { 1607 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1608 1609 return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); 1610 } 1611 1612 static int arm_coherent_iommu_mmap_attrs(struct device *dev, 1613 struct vm_area_struct *vma, void *cpu_addr, 1614 dma_addr_t dma_addr, size_t size, unsigned long attrs) 1615 { 1616 return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); 1617 } 1618 1619 /* 1620 * free a page as defined by the above mapping. 1621 * Must not be called with IRQs disabled. 1622 */ 1623 void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1624 dma_addr_t handle, unsigned long attrs, int coherent_flag) 1625 { 1626 struct page **pages; 1627 size = PAGE_ALIGN(size); 1628 1629 if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { 1630 __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag); 1631 return; 1632 } 1633 1634 pages = __iommu_get_pages(cpu_addr, attrs); 1635 if (!pages) { 1636 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 1637 return; 1638 } 1639 1640 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) { 1641 dma_common_free_remap(cpu_addr, size, 1642 VM_ARM_DMA_CONSISTENT | VM_USERMAP); 1643 } 1644 1645 __iommu_remove_mapping(dev, handle, size); 1646 __iommu_free_buffer(dev, pages, size, attrs); 1647 } 1648 1649 void arm_iommu_free_attrs(struct device *dev, size_t size, 1650 void *cpu_addr, dma_addr_t handle, unsigned long attrs) 1651 { 1652 __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL); 1653 } 1654 1655 void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, 1656 void *cpu_addr, dma_addr_t handle, unsigned long attrs) 1657 { 1658 __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); 1659 } 1660 1661 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 1662 void *cpu_addr, dma_addr_t dma_addr, 1663 size_t size, unsigned long attrs) 1664 { 1665 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1666 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1667 1668 if (!pages) 1669 return -ENXIO; 1670 1671 return sg_alloc_table_from_pages(sgt, pages, count, 0, size, 1672 GFP_KERNEL); 1673 } 1674 1675 static int __dma_direction_to_prot(enum dma_data_direction dir) 1676 { 1677 int prot; 1678 1679 switch (dir) { 1680 case DMA_BIDIRECTIONAL: 1681 prot = IOMMU_READ | IOMMU_WRITE; 1682 break; 1683 case DMA_TO_DEVICE: 1684 prot = IOMMU_READ; 1685 break; 1686 case DMA_FROM_DEVICE: 1687 prot = IOMMU_WRITE; 1688 break; 1689 default: 1690 prot = 0; 1691 } 1692 1693 return prot; 1694 } 1695 1696 /* 1697 * Map a part of the scatter-gather list into contiguous io address space 1698 */ 1699 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1700 size_t size, dma_addr_t *handle, 1701 enum dma_data_direction dir, unsigned long attrs, 1702 bool is_coherent) 1703 { 1704 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1705 dma_addr_t iova, iova_base; 1706 int ret = 0; 1707 unsigned int count; 1708 struct scatterlist *s; 1709 int prot; 1710 1711 size = PAGE_ALIGN(size); 1712 *handle = DMA_ERROR_CODE; 1713 1714 iova_base = iova = __alloc_iova(mapping, size); 1715 if (iova == DMA_ERROR_CODE) 1716 return -ENOMEM; 1717 1718 for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { 1719 phys_addr_t phys = page_to_phys(sg_page(s)); 1720 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1721 1722 if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1723 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1724 1725 prot = __dma_direction_to_prot(dir); 1726 1727 ret = iommu_map(mapping->domain, iova, phys, len, prot); 1728 if (ret < 0) 1729 goto fail; 1730 count += len >> PAGE_SHIFT; 1731 iova += len; 1732 } 1733 *handle = iova_base; 1734 1735 return 0; 1736 fail: 1737 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); 1738 __free_iova(mapping, iova_base, size); 1739 return ret; 1740 } 1741 1742 static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1743 enum dma_data_direction dir, unsigned long attrs, 1744 bool is_coherent) 1745 { 1746 struct scatterlist *s = sg, *dma = sg, *start = sg; 1747 int i, count = 0; 1748 unsigned int offset = s->offset; 1749 unsigned int size = s->offset + s->length; 1750 unsigned int max = dma_get_max_seg_size(dev); 1751 1752 for (i = 1; i < nents; i++) { 1753 s = sg_next(s); 1754 1755 s->dma_address = DMA_ERROR_CODE; 1756 s->dma_length = 0; 1757 1758 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1759 if (__map_sg_chunk(dev, start, size, &dma->dma_address, 1760 dir, attrs, is_coherent) < 0) 1761 goto bad_mapping; 1762 1763 dma->dma_address += offset; 1764 dma->dma_length = size - offset; 1765 1766 size = offset = s->offset; 1767 start = s; 1768 dma = sg_next(dma); 1769 count += 1; 1770 } 1771 size += s->length; 1772 } 1773 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, 1774 is_coherent) < 0) 1775 goto bad_mapping; 1776 1777 dma->dma_address += offset; 1778 dma->dma_length = size - offset; 1779 1780 return count+1; 1781 1782 bad_mapping: 1783 for_each_sg(sg, s, count, i) 1784 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); 1785 return 0; 1786 } 1787 1788 /** 1789 * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1790 * @dev: valid struct device pointer 1791 * @sg: list of buffers 1792 * @nents: number of buffers to map 1793 * @dir: DMA transfer direction 1794 * 1795 * Map a set of i/o coherent buffers described by scatterlist in streaming 1796 * mode for DMA. The scatter gather list elements are merged together (if 1797 * possible) and tagged with the appropriate dma address and length. They are 1798 * obtained via sg_dma_{address,length}. 1799 */ 1800 int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1801 int nents, enum dma_data_direction dir, unsigned long attrs) 1802 { 1803 return __iommu_map_sg(dev, sg, nents, dir, attrs, true); 1804 } 1805 1806 /** 1807 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1808 * @dev: valid struct device pointer 1809 * @sg: list of buffers 1810 * @nents: number of buffers to map 1811 * @dir: DMA transfer direction 1812 * 1813 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1814 * The scatter gather list elements are merged together (if possible) and 1815 * tagged with the appropriate dma address and length. They are obtained via 1816 * sg_dma_{address,length}. 1817 */ 1818 int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1819 int nents, enum dma_data_direction dir, unsigned long attrs) 1820 { 1821 return __iommu_map_sg(dev, sg, nents, dir, attrs, false); 1822 } 1823 1824 static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, 1825 int nents, enum dma_data_direction dir, 1826 unsigned long attrs, bool is_coherent) 1827 { 1828 struct scatterlist *s; 1829 int i; 1830 1831 for_each_sg(sg, s, nents, i) { 1832 if (sg_dma_len(s)) 1833 __iommu_remove_mapping(dev, sg_dma_address(s), 1834 sg_dma_len(s)); 1835 if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1836 __dma_page_dev_to_cpu(sg_page(s), s->offset, 1837 s->length, dir); 1838 } 1839 } 1840 1841 /** 1842 * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1843 * @dev: valid struct device pointer 1844 * @sg: list of buffers 1845 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1846 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1847 * 1848 * Unmap a set of streaming mode DMA translations. Again, CPU access 1849 * rules concerning calls here are the same as for dma_unmap_single(). 1850 */ 1851 void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, 1852 int nents, enum dma_data_direction dir, 1853 unsigned long attrs) 1854 { 1855 __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); 1856 } 1857 1858 /** 1859 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1860 * @dev: valid struct device pointer 1861 * @sg: list of buffers 1862 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1863 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1864 * 1865 * Unmap a set of streaming mode DMA translations. Again, CPU access 1866 * rules concerning calls here are the same as for dma_unmap_single(). 1867 */ 1868 void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 1869 enum dma_data_direction dir, 1870 unsigned long attrs) 1871 { 1872 __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); 1873 } 1874 1875 /** 1876 * arm_iommu_sync_sg_for_cpu 1877 * @dev: valid struct device pointer 1878 * @sg: list of buffers 1879 * @nents: number of buffers to map (returned from dma_map_sg) 1880 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1881 */ 1882 void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 1883 int nents, enum dma_data_direction dir) 1884 { 1885 struct scatterlist *s; 1886 int i; 1887 1888 for_each_sg(sg, s, nents, i) 1889 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); 1890 1891 } 1892 1893 /** 1894 * arm_iommu_sync_sg_for_device 1895 * @dev: valid struct device pointer 1896 * @sg: list of buffers 1897 * @nents: number of buffers to map (returned from dma_map_sg) 1898 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1899 */ 1900 void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 1901 int nents, enum dma_data_direction dir) 1902 { 1903 struct scatterlist *s; 1904 int i; 1905 1906 for_each_sg(sg, s, nents, i) 1907 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1908 } 1909 1910 1911 /** 1912 * arm_coherent_iommu_map_page 1913 * @dev: valid struct device pointer 1914 * @page: page that buffer resides in 1915 * @offset: offset into page for start of buffer 1916 * @size: size of buffer to map 1917 * @dir: DMA transfer direction 1918 * 1919 * Coherent IOMMU aware version of arm_dma_map_page() 1920 */ 1921 static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, 1922 unsigned long offset, size_t size, enum dma_data_direction dir, 1923 unsigned long attrs) 1924 { 1925 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1926 dma_addr_t dma_addr; 1927 int ret, prot, len = PAGE_ALIGN(size + offset); 1928 1929 dma_addr = __alloc_iova(mapping, len); 1930 if (dma_addr == DMA_ERROR_CODE) 1931 return dma_addr; 1932 1933 prot = __dma_direction_to_prot(dir); 1934 1935 ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); 1936 if (ret < 0) 1937 goto fail; 1938 1939 return dma_addr + offset; 1940 fail: 1941 __free_iova(mapping, dma_addr, len); 1942 return DMA_ERROR_CODE; 1943 } 1944 1945 /** 1946 * arm_iommu_map_page 1947 * @dev: valid struct device pointer 1948 * @page: page that buffer resides in 1949 * @offset: offset into page for start of buffer 1950 * @size: size of buffer to map 1951 * @dir: DMA transfer direction 1952 * 1953 * IOMMU aware version of arm_dma_map_page() 1954 */ 1955 static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, 1956 unsigned long offset, size_t size, enum dma_data_direction dir, 1957 unsigned long attrs) 1958 { 1959 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1960 __dma_page_cpu_to_dev(page, offset, size, dir); 1961 1962 return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); 1963 } 1964 1965 /** 1966 * arm_coherent_iommu_unmap_page 1967 * @dev: valid struct device pointer 1968 * @handle: DMA address of buffer 1969 * @size: size of buffer (same as passed to dma_map_page) 1970 * @dir: DMA transfer direction (same as passed to dma_map_page) 1971 * 1972 * Coherent IOMMU aware version of arm_dma_unmap_page() 1973 */ 1974 static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1975 size_t size, enum dma_data_direction dir, unsigned long attrs) 1976 { 1977 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1978 dma_addr_t iova = handle & PAGE_MASK; 1979 int offset = handle & ~PAGE_MASK; 1980 int len = PAGE_ALIGN(size + offset); 1981 1982 if (!iova) 1983 return; 1984 1985 iommu_unmap(mapping->domain, iova, len); 1986 __free_iova(mapping, iova, len); 1987 } 1988 1989 /** 1990 * arm_iommu_unmap_page 1991 * @dev: valid struct device pointer 1992 * @handle: DMA address of buffer 1993 * @size: size of buffer (same as passed to dma_map_page) 1994 * @dir: DMA transfer direction (same as passed to dma_map_page) 1995 * 1996 * IOMMU aware version of arm_dma_unmap_page() 1997 */ 1998 static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1999 size_t size, enum dma_data_direction dir, unsigned long attrs) 2000 { 2001 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2002 dma_addr_t iova = handle & PAGE_MASK; 2003 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 2004 int offset = handle & ~PAGE_MASK; 2005 int len = PAGE_ALIGN(size + offset); 2006 2007 if (!iova) 2008 return; 2009 2010 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 2011 __dma_page_dev_to_cpu(page, offset, size, dir); 2012 2013 iommu_unmap(mapping->domain, iova, len); 2014 __free_iova(mapping, iova, len); 2015 } 2016 2017 static void arm_iommu_sync_single_for_cpu(struct device *dev, 2018 dma_addr_t handle, size_t size, enum dma_data_direction dir) 2019 { 2020 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2021 dma_addr_t iova = handle & PAGE_MASK; 2022 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 2023 unsigned int offset = handle & ~PAGE_MASK; 2024 2025 if (!iova) 2026 return; 2027 2028 __dma_page_dev_to_cpu(page, offset, size, dir); 2029 } 2030 2031 static void arm_iommu_sync_single_for_device(struct device *dev, 2032 dma_addr_t handle, size_t size, enum dma_data_direction dir) 2033 { 2034 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2035 dma_addr_t iova = handle & PAGE_MASK; 2036 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 2037 unsigned int offset = handle & ~PAGE_MASK; 2038 2039 if (!iova) 2040 return; 2041 2042 __dma_page_cpu_to_dev(page, offset, size, dir); 2043 } 2044 2045 struct dma_map_ops iommu_ops = { 2046 .alloc = arm_iommu_alloc_attrs, 2047 .free = arm_iommu_free_attrs, 2048 .mmap = arm_iommu_mmap_attrs, 2049 .get_sgtable = arm_iommu_get_sgtable, 2050 2051 .map_page = arm_iommu_map_page, 2052 .unmap_page = arm_iommu_unmap_page, 2053 .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, 2054 .sync_single_for_device = arm_iommu_sync_single_for_device, 2055 2056 .map_sg = arm_iommu_map_sg, 2057 .unmap_sg = arm_iommu_unmap_sg, 2058 .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, 2059 .sync_sg_for_device = arm_iommu_sync_sg_for_device, 2060 }; 2061 2062 struct dma_map_ops iommu_coherent_ops = { 2063 .alloc = arm_coherent_iommu_alloc_attrs, 2064 .free = arm_coherent_iommu_free_attrs, 2065 .mmap = arm_coherent_iommu_mmap_attrs, 2066 .get_sgtable = arm_iommu_get_sgtable, 2067 2068 .map_page = arm_coherent_iommu_map_page, 2069 .unmap_page = arm_coherent_iommu_unmap_page, 2070 2071 .map_sg = arm_coherent_iommu_map_sg, 2072 .unmap_sg = arm_coherent_iommu_unmap_sg, 2073 }; 2074 2075 /** 2076 * arm_iommu_create_mapping 2077 * @bus: pointer to the bus holding the client device (for IOMMU calls) 2078 * @base: start address of the valid IO address space 2079 * @size: maximum size of the valid IO address space 2080 * 2081 * Creates a mapping structure which holds information about used/unused 2082 * IO address ranges, which is required to perform memory allocation and 2083 * mapping with IOMMU aware functions. 2084 * 2085 * The client device need to be attached to the mapping with 2086 * arm_iommu_attach_device function. 2087 */ 2088 struct dma_iommu_mapping * 2089 arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size) 2090 { 2091 unsigned int bits = size >> PAGE_SHIFT; 2092 unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); 2093 struct dma_iommu_mapping *mapping; 2094 int extensions = 1; 2095 int err = -ENOMEM; 2096 2097 /* currently only 32-bit DMA address space is supported */ 2098 if (size > DMA_BIT_MASK(32) + 1) 2099 return ERR_PTR(-ERANGE); 2100 2101 if (!bitmap_size) 2102 return ERR_PTR(-EINVAL); 2103 2104 if (bitmap_size > PAGE_SIZE) { 2105 extensions = bitmap_size / PAGE_SIZE; 2106 bitmap_size = PAGE_SIZE; 2107 } 2108 2109 mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); 2110 if (!mapping) 2111 goto err; 2112 2113 mapping->bitmap_size = bitmap_size; 2114 mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *), 2115 GFP_KERNEL); 2116 if (!mapping->bitmaps) 2117 goto err2; 2118 2119 mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); 2120 if (!mapping->bitmaps[0]) 2121 goto err3; 2122 2123 mapping->nr_bitmaps = 1; 2124 mapping->extensions = extensions; 2125 mapping->base = base; 2126 mapping->bits = BITS_PER_BYTE * bitmap_size; 2127 2128 spin_lock_init(&mapping->lock); 2129 2130 mapping->domain = iommu_domain_alloc(bus); 2131 if (!mapping->domain) 2132 goto err4; 2133 2134 kref_init(&mapping->kref); 2135 return mapping; 2136 err4: 2137 kfree(mapping->bitmaps[0]); 2138 err3: 2139 kfree(mapping->bitmaps); 2140 err2: 2141 kfree(mapping); 2142 err: 2143 return ERR_PTR(err); 2144 } 2145 EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); 2146 2147 static void release_iommu_mapping(struct kref *kref) 2148 { 2149 int i; 2150 struct dma_iommu_mapping *mapping = 2151 container_of(kref, struct dma_iommu_mapping, kref); 2152 2153 iommu_domain_free(mapping->domain); 2154 for (i = 0; i < mapping->nr_bitmaps; i++) 2155 kfree(mapping->bitmaps[i]); 2156 kfree(mapping->bitmaps); 2157 kfree(mapping); 2158 } 2159 2160 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) 2161 { 2162 int next_bitmap; 2163 2164 if (mapping->nr_bitmaps >= mapping->extensions) 2165 return -EINVAL; 2166 2167 next_bitmap = mapping->nr_bitmaps; 2168 mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, 2169 GFP_ATOMIC); 2170 if (!mapping->bitmaps[next_bitmap]) 2171 return -ENOMEM; 2172 2173 mapping->nr_bitmaps++; 2174 2175 return 0; 2176 } 2177 2178 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) 2179 { 2180 if (mapping) 2181 kref_put(&mapping->kref, release_iommu_mapping); 2182 } 2183 EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); 2184 2185 static int __arm_iommu_attach_device(struct device *dev, 2186 struct dma_iommu_mapping *mapping) 2187 { 2188 int err; 2189 2190 err = iommu_attach_device(mapping->domain, dev); 2191 if (err) 2192 return err; 2193 2194 kref_get(&mapping->kref); 2195 to_dma_iommu_mapping(dev) = mapping; 2196 2197 pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); 2198 return 0; 2199 } 2200 2201 /** 2202 * arm_iommu_attach_device 2203 * @dev: valid struct device pointer 2204 * @mapping: io address space mapping structure (returned from 2205 * arm_iommu_create_mapping) 2206 * 2207 * Attaches specified io address space mapping to the provided device. 2208 * This replaces the dma operations (dma_map_ops pointer) with the 2209 * IOMMU aware version. 2210 * 2211 * More than one client might be attached to the same io address space 2212 * mapping. 2213 */ 2214 int arm_iommu_attach_device(struct device *dev, 2215 struct dma_iommu_mapping *mapping) 2216 { 2217 int err; 2218 2219 err = __arm_iommu_attach_device(dev, mapping); 2220 if (err) 2221 return err; 2222 2223 set_dma_ops(dev, &iommu_ops); 2224 return 0; 2225 } 2226 EXPORT_SYMBOL_GPL(arm_iommu_attach_device); 2227 2228 static void __arm_iommu_detach_device(struct device *dev) 2229 { 2230 struct dma_iommu_mapping *mapping; 2231 2232 mapping = to_dma_iommu_mapping(dev); 2233 if (!mapping) { 2234 dev_warn(dev, "Not attached\n"); 2235 return; 2236 } 2237 2238 iommu_detach_device(mapping->domain, dev); 2239 kref_put(&mapping->kref, release_iommu_mapping); 2240 to_dma_iommu_mapping(dev) = NULL; 2241 2242 pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); 2243 } 2244 2245 /** 2246 * arm_iommu_detach_device 2247 * @dev: valid struct device pointer 2248 * 2249 * Detaches the provided device from a previously attached map. 2250 * This voids the dma operations (dma_map_ops pointer) 2251 */ 2252 void arm_iommu_detach_device(struct device *dev) 2253 { 2254 __arm_iommu_detach_device(dev); 2255 set_dma_ops(dev, NULL); 2256 } 2257 EXPORT_SYMBOL_GPL(arm_iommu_detach_device); 2258 2259 static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) 2260 { 2261 return coherent ? &iommu_coherent_ops : &iommu_ops; 2262 } 2263 2264 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 2265 const struct iommu_ops *iommu) 2266 { 2267 struct dma_iommu_mapping *mapping; 2268 2269 if (!iommu) 2270 return false; 2271 2272 mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); 2273 if (IS_ERR(mapping)) { 2274 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", 2275 size, dev_name(dev)); 2276 return false; 2277 } 2278 2279 if (__arm_iommu_attach_device(dev, mapping)) { 2280 pr_warn("Failed to attached device %s to IOMMU_mapping\n", 2281 dev_name(dev)); 2282 arm_iommu_release_mapping(mapping); 2283 return false; 2284 } 2285 2286 return true; 2287 } 2288 2289 static void arm_teardown_iommu_dma_ops(struct device *dev) 2290 { 2291 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2292 2293 if (!mapping) 2294 return; 2295 2296 __arm_iommu_detach_device(dev); 2297 arm_iommu_release_mapping(mapping); 2298 } 2299 2300 #else 2301 2302 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 2303 const struct iommu_ops *iommu) 2304 { 2305 return false; 2306 } 2307 2308 static void arm_teardown_iommu_dma_ops(struct device *dev) { } 2309 2310 #define arm_get_iommu_dma_map_ops arm_get_dma_map_ops 2311 2312 #endif /* CONFIG_ARM_DMA_USE_IOMMU */ 2313 2314 static struct dma_map_ops *arm_get_dma_map_ops(bool coherent) 2315 { 2316 return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; 2317 } 2318 2319 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 2320 const struct iommu_ops *iommu, bool coherent) 2321 { 2322 struct dma_map_ops *dma_ops; 2323 2324 dev->archdata.dma_coherent = coherent; 2325 if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) 2326 dma_ops = arm_get_iommu_dma_map_ops(coherent); 2327 else 2328 dma_ops = arm_get_dma_map_ops(coherent); 2329 2330 set_dma_ops(dev, dma_ops); 2331 } 2332 2333 void arch_teardown_dma_ops(struct device *dev) 2334 { 2335 arm_teardown_iommu_dma_ops(dev); 2336 } 2337