1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/genalloc.h> 15 #include <linux/gfp.h> 16 #include <linux/errno.h> 17 #include <linux/list.h> 18 #include <linux/init.h> 19 #include <linux/device.h> 20 #include <linux/dma-mapping.h> 21 #include <linux/dma-contiguous.h> 22 #include <linux/highmem.h> 23 #include <linux/memblock.h> 24 #include <linux/slab.h> 25 #include <linux/iommu.h> 26 #include <linux/io.h> 27 #include <linux/vmalloc.h> 28 #include <linux/sizes.h> 29 #include <linux/cma.h> 30 31 #include <asm/memory.h> 32 #include <asm/highmem.h> 33 #include <asm/cacheflush.h> 34 #include <asm/tlbflush.h> 35 #include <asm/mach/arch.h> 36 #include <asm/dma-iommu.h> 37 #include <asm/mach/map.h> 38 #include <asm/system_info.h> 39 #include <asm/dma-contiguous.h> 40 41 #include "dma.h" 42 #include "mm.h" 43 44 struct arm_dma_alloc_args { 45 struct device *dev; 46 size_t size; 47 gfp_t gfp; 48 pgprot_t prot; 49 const void *caller; 50 bool want_vaddr; 51 int coherent_flag; 52 }; 53 54 struct arm_dma_free_args { 55 struct device *dev; 56 size_t size; 57 void *cpu_addr; 58 struct page *page; 59 bool want_vaddr; 60 }; 61 62 #define NORMAL 0 63 #define COHERENT 1 64 65 struct arm_dma_allocator { 66 void *(*alloc)(struct arm_dma_alloc_args *args, 67 struct page **ret_page); 68 void (*free)(struct arm_dma_free_args *args); 69 }; 70 71 struct arm_dma_buffer { 72 struct list_head list; 73 void *virt; 74 struct arm_dma_allocator *allocator; 75 }; 76 77 static LIST_HEAD(arm_dma_bufs); 78 static DEFINE_SPINLOCK(arm_dma_bufs_lock); 79 80 static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) 81 { 82 struct arm_dma_buffer *buf, *found = NULL; 83 unsigned long flags; 84 85 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 86 list_for_each_entry(buf, &arm_dma_bufs, list) { 87 if (buf->virt == virt) { 88 list_del(&buf->list); 89 found = buf; 90 break; 91 } 92 } 93 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 94 return found; 95 } 96 97 /* 98 * The DMA API is built upon the notion of "buffer ownership". A buffer 99 * is either exclusively owned by the CPU (and therefore may be accessed 100 * by it) or exclusively owned by the DMA device. These helper functions 101 * represent the transitions between these two ownership states. 102 * 103 * Note, however, that on later ARMs, this notion does not work due to 104 * speculative prefetches. We model our approach on the assumption that 105 * the CPU does do speculative prefetches, which means we clean caches 106 * before transfers and delay cache invalidation until transfer completion. 107 * 108 */ 109 static void __dma_page_cpu_to_dev(struct page *, unsigned long, 110 size_t, enum dma_data_direction); 111 static void __dma_page_dev_to_cpu(struct page *, unsigned long, 112 size_t, enum dma_data_direction); 113 114 /** 115 * arm_dma_map_page - map a portion of a page for streaming DMA 116 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 117 * @page: page that buffer resides in 118 * @offset: offset into page for start of buffer 119 * @size: size of buffer to map 120 * @dir: DMA transfer direction 121 * 122 * Ensure that any data held in the cache is appropriately discarded 123 * or written back. 124 * 125 * The device owns this memory once this call has completed. The CPU 126 * can regain ownership by calling dma_unmap_page(). 127 */ 128 static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, 129 unsigned long offset, size_t size, enum dma_data_direction dir, 130 unsigned long attrs) 131 { 132 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 133 __dma_page_cpu_to_dev(page, offset, size, dir); 134 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 135 } 136 137 static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, 138 unsigned long offset, size_t size, enum dma_data_direction dir, 139 unsigned long attrs) 140 { 141 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 142 } 143 144 /** 145 * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() 146 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 147 * @handle: DMA address of buffer 148 * @size: size of buffer (same as passed to dma_map_page) 149 * @dir: DMA transfer direction (same as passed to dma_map_page) 150 * 151 * Unmap a page streaming mode DMA translation. The handle and size 152 * must match what was provided in the previous dma_map_page() call. 153 * All other usages are undefined. 154 * 155 * After this call, reads by the CPU to the buffer are guaranteed to see 156 * whatever the device wrote there. 157 */ 158 static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, 159 size_t size, enum dma_data_direction dir, unsigned long attrs) 160 { 161 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 162 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), 163 handle & ~PAGE_MASK, size, dir); 164 } 165 166 static void arm_dma_sync_single_for_cpu(struct device *dev, 167 dma_addr_t handle, size_t size, enum dma_data_direction dir) 168 { 169 unsigned int offset = handle & (PAGE_SIZE - 1); 170 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 171 __dma_page_dev_to_cpu(page, offset, size, dir); 172 } 173 174 static void arm_dma_sync_single_for_device(struct device *dev, 175 dma_addr_t handle, size_t size, enum dma_data_direction dir) 176 { 177 unsigned int offset = handle & (PAGE_SIZE - 1); 178 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 179 __dma_page_cpu_to_dev(page, offset, size, dir); 180 } 181 182 static int arm_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 183 { 184 return dma_addr == ARM_MAPPING_ERROR; 185 } 186 187 const struct dma_map_ops arm_dma_ops = { 188 .alloc = arm_dma_alloc, 189 .free = arm_dma_free, 190 .mmap = arm_dma_mmap, 191 .get_sgtable = arm_dma_get_sgtable, 192 .map_page = arm_dma_map_page, 193 .unmap_page = arm_dma_unmap_page, 194 .map_sg = arm_dma_map_sg, 195 .unmap_sg = arm_dma_unmap_sg, 196 .sync_single_for_cpu = arm_dma_sync_single_for_cpu, 197 .sync_single_for_device = arm_dma_sync_single_for_device, 198 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 199 .sync_sg_for_device = arm_dma_sync_sg_for_device, 200 .mapping_error = arm_dma_mapping_error, 201 .dma_supported = arm_dma_supported, 202 }; 203 EXPORT_SYMBOL(arm_dma_ops); 204 205 static void *arm_coherent_dma_alloc(struct device *dev, size_t size, 206 dma_addr_t *handle, gfp_t gfp, unsigned long attrs); 207 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, 208 dma_addr_t handle, unsigned long attrs); 209 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, 210 void *cpu_addr, dma_addr_t dma_addr, size_t size, 211 unsigned long attrs); 212 213 const struct dma_map_ops arm_coherent_dma_ops = { 214 .alloc = arm_coherent_dma_alloc, 215 .free = arm_coherent_dma_free, 216 .mmap = arm_coherent_dma_mmap, 217 .get_sgtable = arm_dma_get_sgtable, 218 .map_page = arm_coherent_dma_map_page, 219 .map_sg = arm_dma_map_sg, 220 .mapping_error = arm_dma_mapping_error, 221 .dma_supported = arm_dma_supported, 222 }; 223 EXPORT_SYMBOL(arm_coherent_dma_ops); 224 225 static int __dma_supported(struct device *dev, u64 mask, bool warn) 226 { 227 unsigned long max_dma_pfn; 228 229 /* 230 * If the mask allows for more memory than we can address, 231 * and we actually have that much memory, then we must 232 * indicate that DMA to this device is not supported. 233 */ 234 if (sizeof(mask) != sizeof(dma_addr_t) && 235 mask > (dma_addr_t)~0 && 236 dma_to_pfn(dev, ~0) < max_pfn - 1) { 237 if (warn) { 238 dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", 239 mask); 240 dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); 241 } 242 return 0; 243 } 244 245 max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); 246 247 /* 248 * Translate the device's DMA mask to a PFN limit. This 249 * PFN number includes the page which we can DMA to. 250 */ 251 if (dma_to_pfn(dev, mask) < max_dma_pfn) { 252 if (warn) 253 dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", 254 mask, 255 dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, 256 max_dma_pfn + 1); 257 return 0; 258 } 259 260 return 1; 261 } 262 263 static u64 get_coherent_dma_mask(struct device *dev) 264 { 265 u64 mask = (u64)DMA_BIT_MASK(32); 266 267 if (dev) { 268 mask = dev->coherent_dma_mask; 269 270 /* 271 * Sanity check the DMA mask - it must be non-zero, and 272 * must be able to be satisfied by a DMA allocation. 273 */ 274 if (mask == 0) { 275 dev_warn(dev, "coherent DMA mask is unset\n"); 276 return 0; 277 } 278 279 if (!__dma_supported(dev, mask, true)) 280 return 0; 281 } 282 283 return mask; 284 } 285 286 static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) 287 { 288 /* 289 * Ensure that the allocated pages are zeroed, and that any data 290 * lurking in the kernel direct-mapped region is invalidated. 291 */ 292 if (PageHighMem(page)) { 293 phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); 294 phys_addr_t end = base + size; 295 while (size > 0) { 296 void *ptr = kmap_atomic(page); 297 memset(ptr, 0, PAGE_SIZE); 298 if (coherent_flag != COHERENT) 299 dmac_flush_range(ptr, ptr + PAGE_SIZE); 300 kunmap_atomic(ptr); 301 page++; 302 size -= PAGE_SIZE; 303 } 304 if (coherent_flag != COHERENT) 305 outer_flush_range(base, end); 306 } else { 307 void *ptr = page_address(page); 308 memset(ptr, 0, size); 309 if (coherent_flag != COHERENT) { 310 dmac_flush_range(ptr, ptr + size); 311 outer_flush_range(__pa(ptr), __pa(ptr) + size); 312 } 313 } 314 } 315 316 /* 317 * Allocate a DMA buffer for 'dev' of size 'size' using the 318 * specified gfp mask. Note that 'size' must be page aligned. 319 */ 320 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, 321 gfp_t gfp, int coherent_flag) 322 { 323 unsigned long order = get_order(size); 324 struct page *page, *p, *e; 325 326 page = alloc_pages(gfp, order); 327 if (!page) 328 return NULL; 329 330 /* 331 * Now split the huge page and free the excess pages 332 */ 333 split_page(page, order); 334 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 335 __free_page(p); 336 337 __dma_clear_buffer(page, size, coherent_flag); 338 339 return page; 340 } 341 342 /* 343 * Free a DMA buffer. 'size' must be page aligned. 344 */ 345 static void __dma_free_buffer(struct page *page, size_t size) 346 { 347 struct page *e = page + (size >> PAGE_SHIFT); 348 349 while (page < e) { 350 __free_page(page); 351 page++; 352 } 353 } 354 355 static void *__alloc_from_contiguous(struct device *dev, size_t size, 356 pgprot_t prot, struct page **ret_page, 357 const void *caller, bool want_vaddr, 358 int coherent_flag, gfp_t gfp); 359 360 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 361 pgprot_t prot, struct page **ret_page, 362 const void *caller, bool want_vaddr); 363 364 static void * 365 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, 366 const void *caller) 367 { 368 /* 369 * DMA allocation can be mapped to user space, so lets 370 * set VM_USERMAP flags too. 371 */ 372 return dma_common_contiguous_remap(page, size, 373 VM_ARM_DMA_CONSISTENT | VM_USERMAP, 374 prot, caller); 375 } 376 377 static void __dma_free_remap(void *cpu_addr, size_t size) 378 { 379 dma_common_free_remap(cpu_addr, size, 380 VM_ARM_DMA_CONSISTENT | VM_USERMAP); 381 } 382 383 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 384 static struct gen_pool *atomic_pool __ro_after_init; 385 386 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; 387 388 static int __init early_coherent_pool(char *p) 389 { 390 atomic_pool_size = memparse(p, &p); 391 return 0; 392 } 393 early_param("coherent_pool", early_coherent_pool); 394 395 /* 396 * Initialise the coherent pool for atomic allocations. 397 */ 398 static int __init atomic_pool_init(void) 399 { 400 pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); 401 gfp_t gfp = GFP_KERNEL | GFP_DMA; 402 struct page *page; 403 void *ptr; 404 405 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 406 if (!atomic_pool) 407 goto out; 408 /* 409 * The atomic pool is only used for non-coherent allocations 410 * so we must pass NORMAL for coherent_flag. 411 */ 412 if (dev_get_cma_area(NULL)) 413 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, 414 &page, atomic_pool_init, true, NORMAL, 415 GFP_KERNEL); 416 else 417 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, 418 &page, atomic_pool_init, true); 419 if (ptr) { 420 int ret; 421 422 ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, 423 page_to_phys(page), 424 atomic_pool_size, -1); 425 if (ret) 426 goto destroy_genpool; 427 428 gen_pool_set_algo(atomic_pool, 429 gen_pool_first_fit_order_align, 430 NULL); 431 pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", 432 atomic_pool_size / 1024); 433 return 0; 434 } 435 436 destroy_genpool: 437 gen_pool_destroy(atomic_pool); 438 atomic_pool = NULL; 439 out: 440 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 441 atomic_pool_size / 1024); 442 return -ENOMEM; 443 } 444 /* 445 * CMA is activated by core_initcall, so we must be called after it. 446 */ 447 postcore_initcall(atomic_pool_init); 448 449 struct dma_contig_early_reserve { 450 phys_addr_t base; 451 unsigned long size; 452 }; 453 454 static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; 455 456 static int dma_mmu_remap_num __initdata; 457 458 void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) 459 { 460 dma_mmu_remap[dma_mmu_remap_num].base = base; 461 dma_mmu_remap[dma_mmu_remap_num].size = size; 462 dma_mmu_remap_num++; 463 } 464 465 void __init dma_contiguous_remap(void) 466 { 467 int i; 468 for (i = 0; i < dma_mmu_remap_num; i++) { 469 phys_addr_t start = dma_mmu_remap[i].base; 470 phys_addr_t end = start + dma_mmu_remap[i].size; 471 struct map_desc map; 472 unsigned long addr; 473 474 if (end > arm_lowmem_limit) 475 end = arm_lowmem_limit; 476 if (start >= end) 477 continue; 478 479 map.pfn = __phys_to_pfn(start); 480 map.virtual = __phys_to_virt(start); 481 map.length = end - start; 482 map.type = MT_MEMORY_DMA_READY; 483 484 /* 485 * Clear previous low-memory mapping to ensure that the 486 * TLB does not see any conflicting entries, then flush 487 * the TLB of the old entries before creating new mappings. 488 * 489 * This ensures that any speculatively loaded TLB entries 490 * (even though they may be rare) can not cause any problems, 491 * and ensures that this code is architecturally compliant. 492 */ 493 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); 494 addr += PMD_SIZE) 495 pmd_clear(pmd_off_k(addr)); 496 497 flush_tlb_kernel_range(__phys_to_virt(start), 498 __phys_to_virt(end)); 499 500 iotable_init(&map, 1); 501 } 502 } 503 504 static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, 505 void *data) 506 { 507 struct page *page = virt_to_page(addr); 508 pgprot_t prot = *(pgprot_t *)data; 509 510 set_pte_ext(pte, mk_pte(page, prot), 0); 511 return 0; 512 } 513 514 static void __dma_remap(struct page *page, size_t size, pgprot_t prot) 515 { 516 unsigned long start = (unsigned long) page_address(page); 517 unsigned end = start + size; 518 519 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); 520 flush_tlb_kernel_range(start, end); 521 } 522 523 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 524 pgprot_t prot, struct page **ret_page, 525 const void *caller, bool want_vaddr) 526 { 527 struct page *page; 528 void *ptr = NULL; 529 /* 530 * __alloc_remap_buffer is only called when the device is 531 * non-coherent 532 */ 533 page = __dma_alloc_buffer(dev, size, gfp, NORMAL); 534 if (!page) 535 return NULL; 536 if (!want_vaddr) 537 goto out; 538 539 ptr = __dma_alloc_remap(page, size, gfp, prot, caller); 540 if (!ptr) { 541 __dma_free_buffer(page, size); 542 return NULL; 543 } 544 545 out: 546 *ret_page = page; 547 return ptr; 548 } 549 550 static void *__alloc_from_pool(size_t size, struct page **ret_page) 551 { 552 unsigned long val; 553 void *ptr = NULL; 554 555 if (!atomic_pool) { 556 WARN(1, "coherent pool not initialised!\n"); 557 return NULL; 558 } 559 560 val = gen_pool_alloc(atomic_pool, size); 561 if (val) { 562 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 563 564 *ret_page = phys_to_page(phys); 565 ptr = (void *)val; 566 } 567 568 return ptr; 569 } 570 571 static bool __in_atomic_pool(void *start, size_t size) 572 { 573 return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); 574 } 575 576 static int __free_from_pool(void *start, size_t size) 577 { 578 if (!__in_atomic_pool(start, size)) 579 return 0; 580 581 gen_pool_free(atomic_pool, (unsigned long)start, size); 582 583 return 1; 584 } 585 586 static void *__alloc_from_contiguous(struct device *dev, size_t size, 587 pgprot_t prot, struct page **ret_page, 588 const void *caller, bool want_vaddr, 589 int coherent_flag, gfp_t gfp) 590 { 591 unsigned long order = get_order(size); 592 size_t count = size >> PAGE_SHIFT; 593 struct page *page; 594 void *ptr = NULL; 595 596 page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN); 597 if (!page) 598 return NULL; 599 600 __dma_clear_buffer(page, size, coherent_flag); 601 602 if (!want_vaddr) 603 goto out; 604 605 if (PageHighMem(page)) { 606 ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); 607 if (!ptr) { 608 dma_release_from_contiguous(dev, page, count); 609 return NULL; 610 } 611 } else { 612 __dma_remap(page, size, prot); 613 ptr = page_address(page); 614 } 615 616 out: 617 *ret_page = page; 618 return ptr; 619 } 620 621 static void __free_from_contiguous(struct device *dev, struct page *page, 622 void *cpu_addr, size_t size, bool want_vaddr) 623 { 624 if (want_vaddr) { 625 if (PageHighMem(page)) 626 __dma_free_remap(cpu_addr, size); 627 else 628 __dma_remap(page, size, PAGE_KERNEL); 629 } 630 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); 631 } 632 633 static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) 634 { 635 prot = (attrs & DMA_ATTR_WRITE_COMBINE) ? 636 pgprot_writecombine(prot) : 637 pgprot_dmacoherent(prot); 638 return prot; 639 } 640 641 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, 642 struct page **ret_page) 643 { 644 struct page *page; 645 /* __alloc_simple_buffer is only called when the device is coherent */ 646 page = __dma_alloc_buffer(dev, size, gfp, COHERENT); 647 if (!page) 648 return NULL; 649 650 *ret_page = page; 651 return page_address(page); 652 } 653 654 static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, 655 struct page **ret_page) 656 { 657 return __alloc_simple_buffer(args->dev, args->size, args->gfp, 658 ret_page); 659 } 660 661 static void simple_allocator_free(struct arm_dma_free_args *args) 662 { 663 __dma_free_buffer(args->page, args->size); 664 } 665 666 static struct arm_dma_allocator simple_allocator = { 667 .alloc = simple_allocator_alloc, 668 .free = simple_allocator_free, 669 }; 670 671 static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, 672 struct page **ret_page) 673 { 674 return __alloc_from_contiguous(args->dev, args->size, args->prot, 675 ret_page, args->caller, 676 args->want_vaddr, args->coherent_flag, 677 args->gfp); 678 } 679 680 static void cma_allocator_free(struct arm_dma_free_args *args) 681 { 682 __free_from_contiguous(args->dev, args->page, args->cpu_addr, 683 args->size, args->want_vaddr); 684 } 685 686 static struct arm_dma_allocator cma_allocator = { 687 .alloc = cma_allocator_alloc, 688 .free = cma_allocator_free, 689 }; 690 691 static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, 692 struct page **ret_page) 693 { 694 return __alloc_from_pool(args->size, ret_page); 695 } 696 697 static void pool_allocator_free(struct arm_dma_free_args *args) 698 { 699 __free_from_pool(args->cpu_addr, args->size); 700 } 701 702 static struct arm_dma_allocator pool_allocator = { 703 .alloc = pool_allocator_alloc, 704 .free = pool_allocator_free, 705 }; 706 707 static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, 708 struct page **ret_page) 709 { 710 return __alloc_remap_buffer(args->dev, args->size, args->gfp, 711 args->prot, ret_page, args->caller, 712 args->want_vaddr); 713 } 714 715 static void remap_allocator_free(struct arm_dma_free_args *args) 716 { 717 if (args->want_vaddr) 718 __dma_free_remap(args->cpu_addr, args->size); 719 720 __dma_free_buffer(args->page, args->size); 721 } 722 723 static struct arm_dma_allocator remap_allocator = { 724 .alloc = remap_allocator_alloc, 725 .free = remap_allocator_free, 726 }; 727 728 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 729 gfp_t gfp, pgprot_t prot, bool is_coherent, 730 unsigned long attrs, const void *caller) 731 { 732 u64 mask = get_coherent_dma_mask(dev); 733 struct page *page = NULL; 734 void *addr; 735 bool allowblock, cma; 736 struct arm_dma_buffer *buf; 737 struct arm_dma_alloc_args args = { 738 .dev = dev, 739 .size = PAGE_ALIGN(size), 740 .gfp = gfp, 741 .prot = prot, 742 .caller = caller, 743 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 744 .coherent_flag = is_coherent ? COHERENT : NORMAL, 745 }; 746 747 #ifdef CONFIG_DMA_API_DEBUG 748 u64 limit = (mask + 1) & ~mask; 749 if (limit && size >= limit) { 750 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 751 size, mask); 752 return NULL; 753 } 754 #endif 755 756 if (!mask) 757 return NULL; 758 759 buf = kzalloc(sizeof(*buf), 760 gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)); 761 if (!buf) 762 return NULL; 763 764 if (mask < 0xffffffffULL) 765 gfp |= GFP_DMA; 766 767 /* 768 * Following is a work-around (a.k.a. hack) to prevent pages 769 * with __GFP_COMP being passed to split_page() which cannot 770 * handle them. The real problem is that this flag probably 771 * should be 0 on ARM as it is not supported on this 772 * platform; see CONFIG_HUGETLBFS. 773 */ 774 gfp &= ~(__GFP_COMP); 775 args.gfp = gfp; 776 777 *handle = ARM_MAPPING_ERROR; 778 allowblock = gfpflags_allow_blocking(gfp); 779 cma = allowblock ? dev_get_cma_area(dev) : false; 780 781 if (cma) 782 buf->allocator = &cma_allocator; 783 else if (is_coherent) 784 buf->allocator = &simple_allocator; 785 else if (allowblock) 786 buf->allocator = &remap_allocator; 787 else 788 buf->allocator = &pool_allocator; 789 790 addr = buf->allocator->alloc(&args, &page); 791 792 if (page) { 793 unsigned long flags; 794 795 *handle = pfn_to_dma(dev, page_to_pfn(page)); 796 buf->virt = args.want_vaddr ? addr : page; 797 798 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 799 list_add(&buf->list, &arm_dma_bufs); 800 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 801 } else { 802 kfree(buf); 803 } 804 805 return args.want_vaddr ? addr : page; 806 } 807 808 /* 809 * Allocate DMA-coherent memory space and return both the kernel remapped 810 * virtual and bus address for that space. 811 */ 812 void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 813 gfp_t gfp, unsigned long attrs) 814 { 815 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 816 817 return __dma_alloc(dev, size, handle, gfp, prot, false, 818 attrs, __builtin_return_address(0)); 819 } 820 821 static void *arm_coherent_dma_alloc(struct device *dev, size_t size, 822 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 823 { 824 return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, 825 attrs, __builtin_return_address(0)); 826 } 827 828 static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 829 void *cpu_addr, dma_addr_t dma_addr, size_t size, 830 unsigned long attrs) 831 { 832 int ret = -ENXIO; 833 unsigned long nr_vma_pages = vma_pages(vma); 834 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 835 unsigned long pfn = dma_to_pfn(dev, dma_addr); 836 unsigned long off = vma->vm_pgoff; 837 838 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 839 return ret; 840 841 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 842 ret = remap_pfn_range(vma, vma->vm_start, 843 pfn + off, 844 vma->vm_end - vma->vm_start, 845 vma->vm_page_prot); 846 } 847 848 return ret; 849 } 850 851 /* 852 * Create userspace mapping for the DMA-coherent memory. 853 */ 854 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, 855 void *cpu_addr, dma_addr_t dma_addr, size_t size, 856 unsigned long attrs) 857 { 858 return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 859 } 860 861 int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 862 void *cpu_addr, dma_addr_t dma_addr, size_t size, 863 unsigned long attrs) 864 { 865 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 866 return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 867 } 868 869 /* 870 * Free a buffer as defined by the above mapping. 871 */ 872 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 873 dma_addr_t handle, unsigned long attrs, 874 bool is_coherent) 875 { 876 struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); 877 struct arm_dma_buffer *buf; 878 struct arm_dma_free_args args = { 879 .dev = dev, 880 .size = PAGE_ALIGN(size), 881 .cpu_addr = cpu_addr, 882 .page = page, 883 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 884 }; 885 886 buf = arm_dma_buffer_find(cpu_addr); 887 if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) 888 return; 889 890 buf->allocator->free(&args); 891 kfree(buf); 892 } 893 894 void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 895 dma_addr_t handle, unsigned long attrs) 896 { 897 __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); 898 } 899 900 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, 901 dma_addr_t handle, unsigned long attrs) 902 { 903 __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); 904 } 905 906 /* 907 * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems 908 * that the intention is to allow exporting memory allocated via the 909 * coherent DMA APIs through the dma_buf API, which only accepts a 910 * scattertable. This presents a couple of problems: 911 * 1. Not all memory allocated via the coherent DMA APIs is backed by 912 * a struct page 913 * 2. Passing coherent DMA memory into the streaming APIs is not allowed 914 * as we will try to flush the memory through a different alias to that 915 * actually being used (and the flushes are redundant.) 916 */ 917 int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 918 void *cpu_addr, dma_addr_t handle, size_t size, 919 unsigned long attrs) 920 { 921 unsigned long pfn = dma_to_pfn(dev, handle); 922 struct page *page; 923 int ret; 924 925 /* If the PFN is not valid, we do not have a struct page */ 926 if (!pfn_valid(pfn)) 927 return -ENXIO; 928 929 page = pfn_to_page(pfn); 930 931 ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 932 if (unlikely(ret)) 933 return ret; 934 935 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 936 return 0; 937 } 938 939 static void dma_cache_maint_page(struct page *page, unsigned long offset, 940 size_t size, enum dma_data_direction dir, 941 void (*op)(const void *, size_t, int)) 942 { 943 unsigned long pfn; 944 size_t left = size; 945 946 pfn = page_to_pfn(page) + offset / PAGE_SIZE; 947 offset %= PAGE_SIZE; 948 949 /* 950 * A single sg entry may refer to multiple physically contiguous 951 * pages. But we still need to process highmem pages individually. 952 * If highmem is not configured then the bulk of this loop gets 953 * optimized out. 954 */ 955 do { 956 size_t len = left; 957 void *vaddr; 958 959 page = pfn_to_page(pfn); 960 961 if (PageHighMem(page)) { 962 if (len + offset > PAGE_SIZE) 963 len = PAGE_SIZE - offset; 964 965 if (cache_is_vipt_nonaliasing()) { 966 vaddr = kmap_atomic(page); 967 op(vaddr + offset, len, dir); 968 kunmap_atomic(vaddr); 969 } else { 970 vaddr = kmap_high_get(page); 971 if (vaddr) { 972 op(vaddr + offset, len, dir); 973 kunmap_high(page); 974 } 975 } 976 } else { 977 vaddr = page_address(page) + offset; 978 op(vaddr, len, dir); 979 } 980 offset = 0; 981 pfn++; 982 left -= len; 983 } while (left); 984 } 985 986 /* 987 * Make an area consistent for devices. 988 * Note: Drivers should NOT use this function directly, as it will break 989 * platforms with CONFIG_DMABOUNCE. 990 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 991 */ 992 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, 993 size_t size, enum dma_data_direction dir) 994 { 995 phys_addr_t paddr; 996 997 dma_cache_maint_page(page, off, size, dir, dmac_map_area); 998 999 paddr = page_to_phys(page) + off; 1000 if (dir == DMA_FROM_DEVICE) { 1001 outer_inv_range(paddr, paddr + size); 1002 } else { 1003 outer_clean_range(paddr, paddr + size); 1004 } 1005 /* FIXME: non-speculating: flush on bidirectional mappings? */ 1006 } 1007 1008 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, 1009 size_t size, enum dma_data_direction dir) 1010 { 1011 phys_addr_t paddr = page_to_phys(page) + off; 1012 1013 /* FIXME: non-speculating: not required */ 1014 /* in any case, don't bother invalidating if DMA to device */ 1015 if (dir != DMA_TO_DEVICE) { 1016 outer_inv_range(paddr, paddr + size); 1017 1018 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); 1019 } 1020 1021 /* 1022 * Mark the D-cache clean for these pages to avoid extra flushing. 1023 */ 1024 if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { 1025 unsigned long pfn; 1026 size_t left = size; 1027 1028 pfn = page_to_pfn(page) + off / PAGE_SIZE; 1029 off %= PAGE_SIZE; 1030 if (off) { 1031 pfn++; 1032 left -= PAGE_SIZE - off; 1033 } 1034 while (left >= PAGE_SIZE) { 1035 page = pfn_to_page(pfn++); 1036 set_bit(PG_dcache_clean, &page->flags); 1037 left -= PAGE_SIZE; 1038 } 1039 } 1040 } 1041 1042 /** 1043 * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA 1044 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1045 * @sg: list of buffers 1046 * @nents: number of buffers to map 1047 * @dir: DMA transfer direction 1048 * 1049 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1050 * This is the scatter-gather version of the dma_map_single interface. 1051 * Here the scatter gather list elements are each tagged with the 1052 * appropriate dma address and length. They are obtained via 1053 * sg_dma_{address,length}. 1054 * 1055 * Device ownership issues as mentioned for dma_map_single are the same 1056 * here. 1057 */ 1058 int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1059 enum dma_data_direction dir, unsigned long attrs) 1060 { 1061 const struct dma_map_ops *ops = get_dma_ops(dev); 1062 struct scatterlist *s; 1063 int i, j; 1064 1065 for_each_sg(sg, s, nents, i) { 1066 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1067 s->dma_length = s->length; 1068 #endif 1069 s->dma_address = ops->map_page(dev, sg_page(s), s->offset, 1070 s->length, dir, attrs); 1071 if (dma_mapping_error(dev, s->dma_address)) 1072 goto bad_mapping; 1073 } 1074 return nents; 1075 1076 bad_mapping: 1077 for_each_sg(sg, s, i, j) 1078 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 1079 return 0; 1080 } 1081 1082 /** 1083 * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1084 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1085 * @sg: list of buffers 1086 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1087 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1088 * 1089 * Unmap a set of streaming mode DMA translations. Again, CPU access 1090 * rules concerning calls here are the same as for dma_unmap_single(). 1091 */ 1092 void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 1093 enum dma_data_direction dir, unsigned long attrs) 1094 { 1095 const struct dma_map_ops *ops = get_dma_ops(dev); 1096 struct scatterlist *s; 1097 1098 int i; 1099 1100 for_each_sg(sg, s, nents, i) 1101 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 1102 } 1103 1104 /** 1105 * arm_dma_sync_sg_for_cpu 1106 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1107 * @sg: list of buffers 1108 * @nents: number of buffers to map (returned from dma_map_sg) 1109 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1110 */ 1111 void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 1112 int nents, enum dma_data_direction dir) 1113 { 1114 const struct dma_map_ops *ops = get_dma_ops(dev); 1115 struct scatterlist *s; 1116 int i; 1117 1118 for_each_sg(sg, s, nents, i) 1119 ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, 1120 dir); 1121 } 1122 1123 /** 1124 * arm_dma_sync_sg_for_device 1125 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 1126 * @sg: list of buffers 1127 * @nents: number of buffers to map (returned from dma_map_sg) 1128 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1129 */ 1130 void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 1131 int nents, enum dma_data_direction dir) 1132 { 1133 const struct dma_map_ops *ops = get_dma_ops(dev); 1134 struct scatterlist *s; 1135 int i; 1136 1137 for_each_sg(sg, s, nents, i) 1138 ops->sync_single_for_device(dev, sg_dma_address(s), s->length, 1139 dir); 1140 } 1141 1142 /* 1143 * Return whether the given device DMA address mask can be supported 1144 * properly. For example, if your device can only drive the low 24-bits 1145 * during bus mastering, then you would pass 0x00ffffff as the mask 1146 * to this function. 1147 */ 1148 int arm_dma_supported(struct device *dev, u64 mask) 1149 { 1150 return __dma_supported(dev, mask, false); 1151 } 1152 1153 static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) 1154 { 1155 return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; 1156 } 1157 1158 #ifdef CONFIG_ARM_DMA_USE_IOMMU 1159 1160 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs) 1161 { 1162 int prot = 0; 1163 1164 if (attrs & DMA_ATTR_PRIVILEGED) 1165 prot |= IOMMU_PRIV; 1166 1167 switch (dir) { 1168 case DMA_BIDIRECTIONAL: 1169 return prot | IOMMU_READ | IOMMU_WRITE; 1170 case DMA_TO_DEVICE: 1171 return prot | IOMMU_READ; 1172 case DMA_FROM_DEVICE: 1173 return prot | IOMMU_WRITE; 1174 default: 1175 return prot; 1176 } 1177 } 1178 1179 /* IOMMU */ 1180 1181 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); 1182 1183 static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, 1184 size_t size) 1185 { 1186 unsigned int order = get_order(size); 1187 unsigned int align = 0; 1188 unsigned int count, start; 1189 size_t mapping_size = mapping->bits << PAGE_SHIFT; 1190 unsigned long flags; 1191 dma_addr_t iova; 1192 int i; 1193 1194 if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) 1195 order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; 1196 1197 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1198 align = (1 << order) - 1; 1199 1200 spin_lock_irqsave(&mapping->lock, flags); 1201 for (i = 0; i < mapping->nr_bitmaps; i++) { 1202 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 1203 mapping->bits, 0, count, align); 1204 1205 if (start > mapping->bits) 1206 continue; 1207 1208 bitmap_set(mapping->bitmaps[i], start, count); 1209 break; 1210 } 1211 1212 /* 1213 * No unused range found. Try to extend the existing mapping 1214 * and perform a second attempt to reserve an IO virtual 1215 * address range of size bytes. 1216 */ 1217 if (i == mapping->nr_bitmaps) { 1218 if (extend_iommu_mapping(mapping)) { 1219 spin_unlock_irqrestore(&mapping->lock, flags); 1220 return ARM_MAPPING_ERROR; 1221 } 1222 1223 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 1224 mapping->bits, 0, count, align); 1225 1226 if (start > mapping->bits) { 1227 spin_unlock_irqrestore(&mapping->lock, flags); 1228 return ARM_MAPPING_ERROR; 1229 } 1230 1231 bitmap_set(mapping->bitmaps[i], start, count); 1232 } 1233 spin_unlock_irqrestore(&mapping->lock, flags); 1234 1235 iova = mapping->base + (mapping_size * i); 1236 iova += start << PAGE_SHIFT; 1237 1238 return iova; 1239 } 1240 1241 static inline void __free_iova(struct dma_iommu_mapping *mapping, 1242 dma_addr_t addr, size_t size) 1243 { 1244 unsigned int start, count; 1245 size_t mapping_size = mapping->bits << PAGE_SHIFT; 1246 unsigned long flags; 1247 dma_addr_t bitmap_base; 1248 u32 bitmap_index; 1249 1250 if (!size) 1251 return; 1252 1253 bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; 1254 BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); 1255 1256 bitmap_base = mapping->base + mapping_size * bitmap_index; 1257 1258 start = (addr - bitmap_base) >> PAGE_SHIFT; 1259 1260 if (addr + size > bitmap_base + mapping_size) { 1261 /* 1262 * The address range to be freed reaches into the iova 1263 * range of the next bitmap. This should not happen as 1264 * we don't allow this in __alloc_iova (at the 1265 * moment). 1266 */ 1267 BUG(); 1268 } else 1269 count = size >> PAGE_SHIFT; 1270 1271 spin_lock_irqsave(&mapping->lock, flags); 1272 bitmap_clear(mapping->bitmaps[bitmap_index], start, count); 1273 spin_unlock_irqrestore(&mapping->lock, flags); 1274 } 1275 1276 /* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ 1277 static const int iommu_order_array[] = { 9, 8, 4, 0 }; 1278 1279 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, 1280 gfp_t gfp, unsigned long attrs, 1281 int coherent_flag) 1282 { 1283 struct page **pages; 1284 int count = size >> PAGE_SHIFT; 1285 int array_size = count * sizeof(struct page *); 1286 int i = 0; 1287 int order_idx = 0; 1288 1289 if (array_size <= PAGE_SIZE) 1290 pages = kzalloc(array_size, GFP_KERNEL); 1291 else 1292 pages = vzalloc(array_size); 1293 if (!pages) 1294 return NULL; 1295 1296 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) 1297 { 1298 unsigned long order = get_order(size); 1299 struct page *page; 1300 1301 page = dma_alloc_from_contiguous(dev, count, order, 1302 gfp & __GFP_NOWARN); 1303 if (!page) 1304 goto error; 1305 1306 __dma_clear_buffer(page, size, coherent_flag); 1307 1308 for (i = 0; i < count; i++) 1309 pages[i] = page + i; 1310 1311 return pages; 1312 } 1313 1314 /* Go straight to 4K chunks if caller says it's OK. */ 1315 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 1316 order_idx = ARRAY_SIZE(iommu_order_array) - 1; 1317 1318 /* 1319 * IOMMU can map any pages, so himem can also be used here 1320 */ 1321 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 1322 1323 while (count) { 1324 int j, order; 1325 1326 order = iommu_order_array[order_idx]; 1327 1328 /* Drop down when we get small */ 1329 if (__fls(count) < order) { 1330 order_idx++; 1331 continue; 1332 } 1333 1334 if (order) { 1335 /* See if it's easy to allocate a high-order chunk */ 1336 pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); 1337 1338 /* Go down a notch at first sign of pressure */ 1339 if (!pages[i]) { 1340 order_idx++; 1341 continue; 1342 } 1343 } else { 1344 pages[i] = alloc_pages(gfp, 0); 1345 if (!pages[i]) 1346 goto error; 1347 } 1348 1349 if (order) { 1350 split_page(pages[i], order); 1351 j = 1 << order; 1352 while (--j) 1353 pages[i + j] = pages[i] + j; 1354 } 1355 1356 __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag); 1357 i += 1 << order; 1358 count -= 1 << order; 1359 } 1360 1361 return pages; 1362 error: 1363 while (i--) 1364 if (pages[i]) 1365 __free_pages(pages[i], 0); 1366 kvfree(pages); 1367 return NULL; 1368 } 1369 1370 static int __iommu_free_buffer(struct device *dev, struct page **pages, 1371 size_t size, unsigned long attrs) 1372 { 1373 int count = size >> PAGE_SHIFT; 1374 int i; 1375 1376 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 1377 dma_release_from_contiguous(dev, pages[0], count); 1378 } else { 1379 for (i = 0; i < count; i++) 1380 if (pages[i]) 1381 __free_pages(pages[i], 0); 1382 } 1383 1384 kvfree(pages); 1385 return 0; 1386 } 1387 1388 /* 1389 * Create a CPU mapping for a specified pages 1390 */ 1391 static void * 1392 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, 1393 const void *caller) 1394 { 1395 return dma_common_pages_remap(pages, size, 1396 VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); 1397 } 1398 1399 /* 1400 * Create a mapping in device IO address space for specified pages 1401 */ 1402 static dma_addr_t 1403 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, 1404 unsigned long attrs) 1405 { 1406 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1407 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1408 dma_addr_t dma_addr, iova; 1409 int i; 1410 1411 dma_addr = __alloc_iova(mapping, size); 1412 if (dma_addr == ARM_MAPPING_ERROR) 1413 return dma_addr; 1414 1415 iova = dma_addr; 1416 for (i = 0; i < count; ) { 1417 int ret; 1418 1419 unsigned int next_pfn = page_to_pfn(pages[i]) + 1; 1420 phys_addr_t phys = page_to_phys(pages[i]); 1421 unsigned int len, j; 1422 1423 for (j = i + 1; j < count; j++, next_pfn++) 1424 if (page_to_pfn(pages[j]) != next_pfn) 1425 break; 1426 1427 len = (j - i) << PAGE_SHIFT; 1428 ret = iommu_map(mapping->domain, iova, phys, len, 1429 __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs)); 1430 if (ret < 0) 1431 goto fail; 1432 iova += len; 1433 i = j; 1434 } 1435 return dma_addr; 1436 fail: 1437 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); 1438 __free_iova(mapping, dma_addr, size); 1439 return ARM_MAPPING_ERROR; 1440 } 1441 1442 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) 1443 { 1444 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1445 1446 /* 1447 * add optional in-page offset from iova to size and align 1448 * result to page size 1449 */ 1450 size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); 1451 iova &= PAGE_MASK; 1452 1453 iommu_unmap(mapping->domain, iova, size); 1454 __free_iova(mapping, iova, size); 1455 return 0; 1456 } 1457 1458 static struct page **__atomic_get_pages(void *addr) 1459 { 1460 struct page *page; 1461 phys_addr_t phys; 1462 1463 phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); 1464 page = phys_to_page(phys); 1465 1466 return (struct page **)page; 1467 } 1468 1469 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) 1470 { 1471 struct vm_struct *area; 1472 1473 if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) 1474 return __atomic_get_pages(cpu_addr); 1475 1476 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1477 return cpu_addr; 1478 1479 area = find_vm_area(cpu_addr); 1480 if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) 1481 return area->pages; 1482 return NULL; 1483 } 1484 1485 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, 1486 dma_addr_t *handle, int coherent_flag, 1487 unsigned long attrs) 1488 { 1489 struct page *page; 1490 void *addr; 1491 1492 if (coherent_flag == COHERENT) 1493 addr = __alloc_simple_buffer(dev, size, gfp, &page); 1494 else 1495 addr = __alloc_from_pool(size, &page); 1496 if (!addr) 1497 return NULL; 1498 1499 *handle = __iommu_create_mapping(dev, &page, size, attrs); 1500 if (*handle == ARM_MAPPING_ERROR) 1501 goto err_mapping; 1502 1503 return addr; 1504 1505 err_mapping: 1506 __free_from_pool(addr, size); 1507 return NULL; 1508 } 1509 1510 static void __iommu_free_atomic(struct device *dev, void *cpu_addr, 1511 dma_addr_t handle, size_t size, int coherent_flag) 1512 { 1513 __iommu_remove_mapping(dev, handle, size); 1514 if (coherent_flag == COHERENT) 1515 __dma_free_buffer(virt_to_page(cpu_addr), size); 1516 else 1517 __free_from_pool(cpu_addr, size); 1518 } 1519 1520 static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, 1521 dma_addr_t *handle, gfp_t gfp, unsigned long attrs, 1522 int coherent_flag) 1523 { 1524 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 1525 struct page **pages; 1526 void *addr = NULL; 1527 1528 *handle = ARM_MAPPING_ERROR; 1529 size = PAGE_ALIGN(size); 1530 1531 if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) 1532 return __iommu_alloc_simple(dev, size, gfp, handle, 1533 coherent_flag, attrs); 1534 1535 /* 1536 * Following is a work-around (a.k.a. hack) to prevent pages 1537 * with __GFP_COMP being passed to split_page() which cannot 1538 * handle them. The real problem is that this flag probably 1539 * should be 0 on ARM as it is not supported on this 1540 * platform; see CONFIG_HUGETLBFS. 1541 */ 1542 gfp &= ~(__GFP_COMP); 1543 1544 pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); 1545 if (!pages) 1546 return NULL; 1547 1548 *handle = __iommu_create_mapping(dev, pages, size, attrs); 1549 if (*handle == ARM_MAPPING_ERROR) 1550 goto err_buffer; 1551 1552 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1553 return pages; 1554 1555 addr = __iommu_alloc_remap(pages, size, gfp, prot, 1556 __builtin_return_address(0)); 1557 if (!addr) 1558 goto err_mapping; 1559 1560 return addr; 1561 1562 err_mapping: 1563 __iommu_remove_mapping(dev, *handle, size); 1564 err_buffer: 1565 __iommu_free_buffer(dev, pages, size, attrs); 1566 return NULL; 1567 } 1568 1569 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1570 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1571 { 1572 return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL); 1573 } 1574 1575 static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size, 1576 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1577 { 1578 return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT); 1579 } 1580 1581 static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1582 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1583 unsigned long attrs) 1584 { 1585 unsigned long uaddr = vma->vm_start; 1586 unsigned long usize = vma->vm_end - vma->vm_start; 1587 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1588 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1589 unsigned long off = vma->vm_pgoff; 1590 1591 if (!pages) 1592 return -ENXIO; 1593 1594 if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) 1595 return -ENXIO; 1596 1597 pages += off; 1598 1599 do { 1600 int ret = vm_insert_page(vma, uaddr, *pages++); 1601 if (ret) { 1602 pr_err("Remapping memory failed: %d\n", ret); 1603 return ret; 1604 } 1605 uaddr += PAGE_SIZE; 1606 usize -= PAGE_SIZE; 1607 } while (usize > 0); 1608 1609 return 0; 1610 } 1611 static int arm_iommu_mmap_attrs(struct device *dev, 1612 struct vm_area_struct *vma, void *cpu_addr, 1613 dma_addr_t dma_addr, size_t size, unsigned long attrs) 1614 { 1615 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1616 1617 return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); 1618 } 1619 1620 static int arm_coherent_iommu_mmap_attrs(struct device *dev, 1621 struct vm_area_struct *vma, void *cpu_addr, 1622 dma_addr_t dma_addr, size_t size, unsigned long attrs) 1623 { 1624 return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); 1625 } 1626 1627 /* 1628 * free a page as defined by the above mapping. 1629 * Must not be called with IRQs disabled. 1630 */ 1631 void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1632 dma_addr_t handle, unsigned long attrs, int coherent_flag) 1633 { 1634 struct page **pages; 1635 size = PAGE_ALIGN(size); 1636 1637 if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { 1638 __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag); 1639 return; 1640 } 1641 1642 pages = __iommu_get_pages(cpu_addr, attrs); 1643 if (!pages) { 1644 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 1645 return; 1646 } 1647 1648 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) { 1649 dma_common_free_remap(cpu_addr, size, 1650 VM_ARM_DMA_CONSISTENT | VM_USERMAP); 1651 } 1652 1653 __iommu_remove_mapping(dev, handle, size); 1654 __iommu_free_buffer(dev, pages, size, attrs); 1655 } 1656 1657 void arm_iommu_free_attrs(struct device *dev, size_t size, 1658 void *cpu_addr, dma_addr_t handle, unsigned long attrs) 1659 { 1660 __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL); 1661 } 1662 1663 void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, 1664 void *cpu_addr, dma_addr_t handle, unsigned long attrs) 1665 { 1666 __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); 1667 } 1668 1669 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 1670 void *cpu_addr, dma_addr_t dma_addr, 1671 size_t size, unsigned long attrs) 1672 { 1673 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1674 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1675 1676 if (!pages) 1677 return -ENXIO; 1678 1679 return sg_alloc_table_from_pages(sgt, pages, count, 0, size, 1680 GFP_KERNEL); 1681 } 1682 1683 /* 1684 * Map a part of the scatter-gather list into contiguous io address space 1685 */ 1686 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1687 size_t size, dma_addr_t *handle, 1688 enum dma_data_direction dir, unsigned long attrs, 1689 bool is_coherent) 1690 { 1691 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1692 dma_addr_t iova, iova_base; 1693 int ret = 0; 1694 unsigned int count; 1695 struct scatterlist *s; 1696 int prot; 1697 1698 size = PAGE_ALIGN(size); 1699 *handle = ARM_MAPPING_ERROR; 1700 1701 iova_base = iova = __alloc_iova(mapping, size); 1702 if (iova == ARM_MAPPING_ERROR) 1703 return -ENOMEM; 1704 1705 for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { 1706 phys_addr_t phys = page_to_phys(sg_page(s)); 1707 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1708 1709 if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1710 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1711 1712 prot = __dma_info_to_prot(dir, attrs); 1713 1714 ret = iommu_map(mapping->domain, iova, phys, len, prot); 1715 if (ret < 0) 1716 goto fail; 1717 count += len >> PAGE_SHIFT; 1718 iova += len; 1719 } 1720 *handle = iova_base; 1721 1722 return 0; 1723 fail: 1724 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); 1725 __free_iova(mapping, iova_base, size); 1726 return ret; 1727 } 1728 1729 static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1730 enum dma_data_direction dir, unsigned long attrs, 1731 bool is_coherent) 1732 { 1733 struct scatterlist *s = sg, *dma = sg, *start = sg; 1734 int i, count = 0; 1735 unsigned int offset = s->offset; 1736 unsigned int size = s->offset + s->length; 1737 unsigned int max = dma_get_max_seg_size(dev); 1738 1739 for (i = 1; i < nents; i++) { 1740 s = sg_next(s); 1741 1742 s->dma_address = ARM_MAPPING_ERROR; 1743 s->dma_length = 0; 1744 1745 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1746 if (__map_sg_chunk(dev, start, size, &dma->dma_address, 1747 dir, attrs, is_coherent) < 0) 1748 goto bad_mapping; 1749 1750 dma->dma_address += offset; 1751 dma->dma_length = size - offset; 1752 1753 size = offset = s->offset; 1754 start = s; 1755 dma = sg_next(dma); 1756 count += 1; 1757 } 1758 size += s->length; 1759 } 1760 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, 1761 is_coherent) < 0) 1762 goto bad_mapping; 1763 1764 dma->dma_address += offset; 1765 dma->dma_length = size - offset; 1766 1767 return count+1; 1768 1769 bad_mapping: 1770 for_each_sg(sg, s, count, i) 1771 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); 1772 return 0; 1773 } 1774 1775 /** 1776 * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1777 * @dev: valid struct device pointer 1778 * @sg: list of buffers 1779 * @nents: number of buffers to map 1780 * @dir: DMA transfer direction 1781 * 1782 * Map a set of i/o coherent buffers described by scatterlist in streaming 1783 * mode for DMA. The scatter gather list elements are merged together (if 1784 * possible) and tagged with the appropriate dma address and length. They are 1785 * obtained via sg_dma_{address,length}. 1786 */ 1787 int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1788 int nents, enum dma_data_direction dir, unsigned long attrs) 1789 { 1790 return __iommu_map_sg(dev, sg, nents, dir, attrs, true); 1791 } 1792 1793 /** 1794 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1795 * @dev: valid struct device pointer 1796 * @sg: list of buffers 1797 * @nents: number of buffers to map 1798 * @dir: DMA transfer direction 1799 * 1800 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1801 * The scatter gather list elements are merged together (if possible) and 1802 * tagged with the appropriate dma address and length. They are obtained via 1803 * sg_dma_{address,length}. 1804 */ 1805 int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1806 int nents, enum dma_data_direction dir, unsigned long attrs) 1807 { 1808 return __iommu_map_sg(dev, sg, nents, dir, attrs, false); 1809 } 1810 1811 static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, 1812 int nents, enum dma_data_direction dir, 1813 unsigned long attrs, bool is_coherent) 1814 { 1815 struct scatterlist *s; 1816 int i; 1817 1818 for_each_sg(sg, s, nents, i) { 1819 if (sg_dma_len(s)) 1820 __iommu_remove_mapping(dev, sg_dma_address(s), 1821 sg_dma_len(s)); 1822 if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1823 __dma_page_dev_to_cpu(sg_page(s), s->offset, 1824 s->length, dir); 1825 } 1826 } 1827 1828 /** 1829 * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1830 * @dev: valid struct device pointer 1831 * @sg: list of buffers 1832 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1833 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1834 * 1835 * Unmap a set of streaming mode DMA translations. Again, CPU access 1836 * rules concerning calls here are the same as for dma_unmap_single(). 1837 */ 1838 void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, 1839 int nents, enum dma_data_direction dir, 1840 unsigned long attrs) 1841 { 1842 __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); 1843 } 1844 1845 /** 1846 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1847 * @dev: valid struct device pointer 1848 * @sg: list of buffers 1849 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1850 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1851 * 1852 * Unmap a set of streaming mode DMA translations. Again, CPU access 1853 * rules concerning calls here are the same as for dma_unmap_single(). 1854 */ 1855 void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 1856 enum dma_data_direction dir, 1857 unsigned long attrs) 1858 { 1859 __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); 1860 } 1861 1862 /** 1863 * arm_iommu_sync_sg_for_cpu 1864 * @dev: valid struct device pointer 1865 * @sg: list of buffers 1866 * @nents: number of buffers to map (returned from dma_map_sg) 1867 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1868 */ 1869 void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 1870 int nents, enum dma_data_direction dir) 1871 { 1872 struct scatterlist *s; 1873 int i; 1874 1875 for_each_sg(sg, s, nents, i) 1876 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); 1877 1878 } 1879 1880 /** 1881 * arm_iommu_sync_sg_for_device 1882 * @dev: valid struct device pointer 1883 * @sg: list of buffers 1884 * @nents: number of buffers to map (returned from dma_map_sg) 1885 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1886 */ 1887 void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 1888 int nents, enum dma_data_direction dir) 1889 { 1890 struct scatterlist *s; 1891 int i; 1892 1893 for_each_sg(sg, s, nents, i) 1894 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1895 } 1896 1897 1898 /** 1899 * arm_coherent_iommu_map_page 1900 * @dev: valid struct device pointer 1901 * @page: page that buffer resides in 1902 * @offset: offset into page for start of buffer 1903 * @size: size of buffer to map 1904 * @dir: DMA transfer direction 1905 * 1906 * Coherent IOMMU aware version of arm_dma_map_page() 1907 */ 1908 static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, 1909 unsigned long offset, size_t size, enum dma_data_direction dir, 1910 unsigned long attrs) 1911 { 1912 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1913 dma_addr_t dma_addr; 1914 int ret, prot, len = PAGE_ALIGN(size + offset); 1915 1916 dma_addr = __alloc_iova(mapping, len); 1917 if (dma_addr == ARM_MAPPING_ERROR) 1918 return dma_addr; 1919 1920 prot = __dma_info_to_prot(dir, attrs); 1921 1922 ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); 1923 if (ret < 0) 1924 goto fail; 1925 1926 return dma_addr + offset; 1927 fail: 1928 __free_iova(mapping, dma_addr, len); 1929 return ARM_MAPPING_ERROR; 1930 } 1931 1932 /** 1933 * arm_iommu_map_page 1934 * @dev: valid struct device pointer 1935 * @page: page that buffer resides in 1936 * @offset: offset into page for start of buffer 1937 * @size: size of buffer to map 1938 * @dir: DMA transfer direction 1939 * 1940 * IOMMU aware version of arm_dma_map_page() 1941 */ 1942 static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, 1943 unsigned long offset, size_t size, enum dma_data_direction dir, 1944 unsigned long attrs) 1945 { 1946 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1947 __dma_page_cpu_to_dev(page, offset, size, dir); 1948 1949 return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); 1950 } 1951 1952 /** 1953 * arm_coherent_iommu_unmap_page 1954 * @dev: valid struct device pointer 1955 * @handle: DMA address of buffer 1956 * @size: size of buffer (same as passed to dma_map_page) 1957 * @dir: DMA transfer direction (same as passed to dma_map_page) 1958 * 1959 * Coherent IOMMU aware version of arm_dma_unmap_page() 1960 */ 1961 static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1962 size_t size, enum dma_data_direction dir, unsigned long attrs) 1963 { 1964 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1965 dma_addr_t iova = handle & PAGE_MASK; 1966 int offset = handle & ~PAGE_MASK; 1967 int len = PAGE_ALIGN(size + offset); 1968 1969 if (!iova) 1970 return; 1971 1972 iommu_unmap(mapping->domain, iova, len); 1973 __free_iova(mapping, iova, len); 1974 } 1975 1976 /** 1977 * arm_iommu_unmap_page 1978 * @dev: valid struct device pointer 1979 * @handle: DMA address of buffer 1980 * @size: size of buffer (same as passed to dma_map_page) 1981 * @dir: DMA transfer direction (same as passed to dma_map_page) 1982 * 1983 * IOMMU aware version of arm_dma_unmap_page() 1984 */ 1985 static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1986 size_t size, enum dma_data_direction dir, unsigned long attrs) 1987 { 1988 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1989 dma_addr_t iova = handle & PAGE_MASK; 1990 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1991 int offset = handle & ~PAGE_MASK; 1992 int len = PAGE_ALIGN(size + offset); 1993 1994 if (!iova) 1995 return; 1996 1997 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1998 __dma_page_dev_to_cpu(page, offset, size, dir); 1999 2000 iommu_unmap(mapping->domain, iova, len); 2001 __free_iova(mapping, iova, len); 2002 } 2003 2004 /** 2005 * arm_iommu_map_resource - map a device resource for DMA 2006 * @dev: valid struct device pointer 2007 * @phys_addr: physical address of resource 2008 * @size: size of resource to map 2009 * @dir: DMA transfer direction 2010 */ 2011 static dma_addr_t arm_iommu_map_resource(struct device *dev, 2012 phys_addr_t phys_addr, size_t size, 2013 enum dma_data_direction dir, unsigned long attrs) 2014 { 2015 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2016 dma_addr_t dma_addr; 2017 int ret, prot; 2018 phys_addr_t addr = phys_addr & PAGE_MASK; 2019 unsigned int offset = phys_addr & ~PAGE_MASK; 2020 size_t len = PAGE_ALIGN(size + offset); 2021 2022 dma_addr = __alloc_iova(mapping, len); 2023 if (dma_addr == ARM_MAPPING_ERROR) 2024 return dma_addr; 2025 2026 prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; 2027 2028 ret = iommu_map(mapping->domain, dma_addr, addr, len, prot); 2029 if (ret < 0) 2030 goto fail; 2031 2032 return dma_addr + offset; 2033 fail: 2034 __free_iova(mapping, dma_addr, len); 2035 return ARM_MAPPING_ERROR; 2036 } 2037 2038 /** 2039 * arm_iommu_unmap_resource - unmap a device DMA resource 2040 * @dev: valid struct device pointer 2041 * @dma_handle: DMA address to resource 2042 * @size: size of resource to map 2043 * @dir: DMA transfer direction 2044 */ 2045 static void arm_iommu_unmap_resource(struct device *dev, dma_addr_t dma_handle, 2046 size_t size, enum dma_data_direction dir, 2047 unsigned long attrs) 2048 { 2049 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2050 dma_addr_t iova = dma_handle & PAGE_MASK; 2051 unsigned int offset = dma_handle & ~PAGE_MASK; 2052 size_t len = PAGE_ALIGN(size + offset); 2053 2054 if (!iova) 2055 return; 2056 2057 iommu_unmap(mapping->domain, iova, len); 2058 __free_iova(mapping, iova, len); 2059 } 2060 2061 static void arm_iommu_sync_single_for_cpu(struct device *dev, 2062 dma_addr_t handle, size_t size, enum dma_data_direction dir) 2063 { 2064 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2065 dma_addr_t iova = handle & PAGE_MASK; 2066 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 2067 unsigned int offset = handle & ~PAGE_MASK; 2068 2069 if (!iova) 2070 return; 2071 2072 __dma_page_dev_to_cpu(page, offset, size, dir); 2073 } 2074 2075 static void arm_iommu_sync_single_for_device(struct device *dev, 2076 dma_addr_t handle, size_t size, enum dma_data_direction dir) 2077 { 2078 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2079 dma_addr_t iova = handle & PAGE_MASK; 2080 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 2081 unsigned int offset = handle & ~PAGE_MASK; 2082 2083 if (!iova) 2084 return; 2085 2086 __dma_page_cpu_to_dev(page, offset, size, dir); 2087 } 2088 2089 const struct dma_map_ops iommu_ops = { 2090 .alloc = arm_iommu_alloc_attrs, 2091 .free = arm_iommu_free_attrs, 2092 .mmap = arm_iommu_mmap_attrs, 2093 .get_sgtable = arm_iommu_get_sgtable, 2094 2095 .map_page = arm_iommu_map_page, 2096 .unmap_page = arm_iommu_unmap_page, 2097 .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, 2098 .sync_single_for_device = arm_iommu_sync_single_for_device, 2099 2100 .map_sg = arm_iommu_map_sg, 2101 .unmap_sg = arm_iommu_unmap_sg, 2102 .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, 2103 .sync_sg_for_device = arm_iommu_sync_sg_for_device, 2104 2105 .map_resource = arm_iommu_map_resource, 2106 .unmap_resource = arm_iommu_unmap_resource, 2107 2108 .mapping_error = arm_dma_mapping_error, 2109 .dma_supported = arm_dma_supported, 2110 }; 2111 2112 const struct dma_map_ops iommu_coherent_ops = { 2113 .alloc = arm_coherent_iommu_alloc_attrs, 2114 .free = arm_coherent_iommu_free_attrs, 2115 .mmap = arm_coherent_iommu_mmap_attrs, 2116 .get_sgtable = arm_iommu_get_sgtable, 2117 2118 .map_page = arm_coherent_iommu_map_page, 2119 .unmap_page = arm_coherent_iommu_unmap_page, 2120 2121 .map_sg = arm_coherent_iommu_map_sg, 2122 .unmap_sg = arm_coherent_iommu_unmap_sg, 2123 2124 .map_resource = arm_iommu_map_resource, 2125 .unmap_resource = arm_iommu_unmap_resource, 2126 2127 .mapping_error = arm_dma_mapping_error, 2128 .dma_supported = arm_dma_supported, 2129 }; 2130 2131 /** 2132 * arm_iommu_create_mapping 2133 * @bus: pointer to the bus holding the client device (for IOMMU calls) 2134 * @base: start address of the valid IO address space 2135 * @size: maximum size of the valid IO address space 2136 * 2137 * Creates a mapping structure which holds information about used/unused 2138 * IO address ranges, which is required to perform memory allocation and 2139 * mapping with IOMMU aware functions. 2140 * 2141 * The client device need to be attached to the mapping with 2142 * arm_iommu_attach_device function. 2143 */ 2144 struct dma_iommu_mapping * 2145 arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size) 2146 { 2147 unsigned int bits = size >> PAGE_SHIFT; 2148 unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); 2149 struct dma_iommu_mapping *mapping; 2150 int extensions = 1; 2151 int err = -ENOMEM; 2152 2153 /* currently only 32-bit DMA address space is supported */ 2154 if (size > DMA_BIT_MASK(32) + 1) 2155 return ERR_PTR(-ERANGE); 2156 2157 if (!bitmap_size) 2158 return ERR_PTR(-EINVAL); 2159 2160 if (bitmap_size > PAGE_SIZE) { 2161 extensions = bitmap_size / PAGE_SIZE; 2162 bitmap_size = PAGE_SIZE; 2163 } 2164 2165 mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); 2166 if (!mapping) 2167 goto err; 2168 2169 mapping->bitmap_size = bitmap_size; 2170 mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *), 2171 GFP_KERNEL); 2172 if (!mapping->bitmaps) 2173 goto err2; 2174 2175 mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); 2176 if (!mapping->bitmaps[0]) 2177 goto err3; 2178 2179 mapping->nr_bitmaps = 1; 2180 mapping->extensions = extensions; 2181 mapping->base = base; 2182 mapping->bits = BITS_PER_BYTE * bitmap_size; 2183 2184 spin_lock_init(&mapping->lock); 2185 2186 mapping->domain = iommu_domain_alloc(bus); 2187 if (!mapping->domain) 2188 goto err4; 2189 2190 kref_init(&mapping->kref); 2191 return mapping; 2192 err4: 2193 kfree(mapping->bitmaps[0]); 2194 err3: 2195 kfree(mapping->bitmaps); 2196 err2: 2197 kfree(mapping); 2198 err: 2199 return ERR_PTR(err); 2200 } 2201 EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); 2202 2203 static void release_iommu_mapping(struct kref *kref) 2204 { 2205 int i; 2206 struct dma_iommu_mapping *mapping = 2207 container_of(kref, struct dma_iommu_mapping, kref); 2208 2209 iommu_domain_free(mapping->domain); 2210 for (i = 0; i < mapping->nr_bitmaps; i++) 2211 kfree(mapping->bitmaps[i]); 2212 kfree(mapping->bitmaps); 2213 kfree(mapping); 2214 } 2215 2216 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) 2217 { 2218 int next_bitmap; 2219 2220 if (mapping->nr_bitmaps >= mapping->extensions) 2221 return -EINVAL; 2222 2223 next_bitmap = mapping->nr_bitmaps; 2224 mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, 2225 GFP_ATOMIC); 2226 if (!mapping->bitmaps[next_bitmap]) 2227 return -ENOMEM; 2228 2229 mapping->nr_bitmaps++; 2230 2231 return 0; 2232 } 2233 2234 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) 2235 { 2236 if (mapping) 2237 kref_put(&mapping->kref, release_iommu_mapping); 2238 } 2239 EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); 2240 2241 static int __arm_iommu_attach_device(struct device *dev, 2242 struct dma_iommu_mapping *mapping) 2243 { 2244 int err; 2245 2246 err = iommu_attach_device(mapping->domain, dev); 2247 if (err) 2248 return err; 2249 2250 kref_get(&mapping->kref); 2251 to_dma_iommu_mapping(dev) = mapping; 2252 2253 pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); 2254 return 0; 2255 } 2256 2257 /** 2258 * arm_iommu_attach_device 2259 * @dev: valid struct device pointer 2260 * @mapping: io address space mapping structure (returned from 2261 * arm_iommu_create_mapping) 2262 * 2263 * Attaches specified io address space mapping to the provided device. 2264 * This replaces the dma operations (dma_map_ops pointer) with the 2265 * IOMMU aware version. 2266 * 2267 * More than one client might be attached to the same io address space 2268 * mapping. 2269 */ 2270 int arm_iommu_attach_device(struct device *dev, 2271 struct dma_iommu_mapping *mapping) 2272 { 2273 int err; 2274 2275 err = __arm_iommu_attach_device(dev, mapping); 2276 if (err) 2277 return err; 2278 2279 set_dma_ops(dev, &iommu_ops); 2280 return 0; 2281 } 2282 EXPORT_SYMBOL_GPL(arm_iommu_attach_device); 2283 2284 /** 2285 * arm_iommu_detach_device 2286 * @dev: valid struct device pointer 2287 * 2288 * Detaches the provided device from a previously attached map. 2289 * This voids the dma operations (dma_map_ops pointer) 2290 */ 2291 void arm_iommu_detach_device(struct device *dev) 2292 { 2293 struct dma_iommu_mapping *mapping; 2294 2295 mapping = to_dma_iommu_mapping(dev); 2296 if (!mapping) { 2297 dev_warn(dev, "Not attached\n"); 2298 return; 2299 } 2300 2301 iommu_detach_device(mapping->domain, dev); 2302 kref_put(&mapping->kref, release_iommu_mapping); 2303 to_dma_iommu_mapping(dev) = NULL; 2304 set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent)); 2305 2306 pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); 2307 } 2308 EXPORT_SYMBOL_GPL(arm_iommu_detach_device); 2309 2310 static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) 2311 { 2312 return coherent ? &iommu_coherent_ops : &iommu_ops; 2313 } 2314 2315 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 2316 const struct iommu_ops *iommu) 2317 { 2318 struct dma_iommu_mapping *mapping; 2319 2320 if (!iommu) 2321 return false; 2322 2323 mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); 2324 if (IS_ERR(mapping)) { 2325 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", 2326 size, dev_name(dev)); 2327 return false; 2328 } 2329 2330 if (__arm_iommu_attach_device(dev, mapping)) { 2331 pr_warn("Failed to attached device %s to IOMMU_mapping\n", 2332 dev_name(dev)); 2333 arm_iommu_release_mapping(mapping); 2334 return false; 2335 } 2336 2337 return true; 2338 } 2339 2340 static void arm_teardown_iommu_dma_ops(struct device *dev) 2341 { 2342 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 2343 2344 if (!mapping) 2345 return; 2346 2347 arm_iommu_detach_device(dev); 2348 arm_iommu_release_mapping(mapping); 2349 } 2350 2351 #else 2352 2353 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 2354 const struct iommu_ops *iommu) 2355 { 2356 return false; 2357 } 2358 2359 static void arm_teardown_iommu_dma_ops(struct device *dev) { } 2360 2361 #define arm_get_iommu_dma_map_ops arm_get_dma_map_ops 2362 2363 #endif /* CONFIG_ARM_DMA_USE_IOMMU */ 2364 2365 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 2366 const struct iommu_ops *iommu, bool coherent) 2367 { 2368 const struct dma_map_ops *dma_ops; 2369 2370 dev->archdata.dma_coherent = coherent; 2371 2372 /* 2373 * Don't override the dma_ops if they have already been set. Ideally 2374 * this should be the only location where dma_ops are set, remove this 2375 * check when all other callers of set_dma_ops will have disappeared. 2376 */ 2377 if (dev->dma_ops) 2378 return; 2379 2380 if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) 2381 dma_ops = arm_get_iommu_dma_map_ops(coherent); 2382 else 2383 dma_ops = arm_get_dma_map_ops(coherent); 2384 2385 set_dma_ops(dev, dma_ops); 2386 2387 #ifdef CONFIG_XEN 2388 if (xen_initial_domain()) { 2389 dev->archdata.dev_dma_ops = dev->dma_ops; 2390 dev->dma_ops = xen_dma_ops; 2391 } 2392 #endif 2393 dev->archdata.dma_ops_setup = true; 2394 } 2395 2396 void arch_teardown_dma_ops(struct device *dev) 2397 { 2398 if (!dev->archdata.dma_ops_setup) 2399 return; 2400 2401 arm_teardown_iommu_dma_ops(dev); 2402 } 2403