1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/gfp.h> 15 #include <linux/errno.h> 16 #include <linux/list.h> 17 #include <linux/init.h> 18 #include <linux/device.h> 19 #include <linux/dma-mapping.h> 20 #include <linux/dma-contiguous.h> 21 #include <linux/highmem.h> 22 #include <linux/memblock.h> 23 #include <linux/slab.h> 24 #include <linux/iommu.h> 25 #include <linux/vmalloc.h> 26 27 #include <asm/memory.h> 28 #include <asm/highmem.h> 29 #include <asm/cacheflush.h> 30 #include <asm/tlbflush.h> 31 #include <asm/sizes.h> 32 #include <asm/mach/arch.h> 33 #include <asm/dma-iommu.h> 34 #include <asm/mach/map.h> 35 #include <asm/system_info.h> 36 #include <asm/dma-contiguous.h> 37 38 #include "mm.h" 39 40 /* 41 * The DMA API is built upon the notion of "buffer ownership". A buffer 42 * is either exclusively owned by the CPU (and therefore may be accessed 43 * by it) or exclusively owned by the DMA device. These helper functions 44 * represent the transitions between these two ownership states. 45 * 46 * Note, however, that on later ARMs, this notion does not work due to 47 * speculative prefetches. We model our approach on the assumption that 48 * the CPU does do speculative prefetches, which means we clean caches 49 * before transfers and delay cache invalidation until transfer completion. 50 * 51 */ 52 static void __dma_page_cpu_to_dev(struct page *, unsigned long, 53 size_t, enum dma_data_direction); 54 static void __dma_page_dev_to_cpu(struct page *, unsigned long, 55 size_t, enum dma_data_direction); 56 57 /** 58 * arm_dma_map_page - map a portion of a page for streaming DMA 59 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 60 * @page: page that buffer resides in 61 * @offset: offset into page for start of buffer 62 * @size: size of buffer to map 63 * @dir: DMA transfer direction 64 * 65 * Ensure that any data held in the cache is appropriately discarded 66 * or written back. 67 * 68 * The device owns this memory once this call has completed. The CPU 69 * can regain ownership by calling dma_unmap_page(). 70 */ 71 static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, 72 unsigned long offset, size_t size, enum dma_data_direction dir, 73 struct dma_attrs *attrs) 74 { 75 if (!arch_is_coherent()) 76 __dma_page_cpu_to_dev(page, offset, size, dir); 77 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 78 } 79 80 /** 81 * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() 82 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 83 * @handle: DMA address of buffer 84 * @size: size of buffer (same as passed to dma_map_page) 85 * @dir: DMA transfer direction (same as passed to dma_map_page) 86 * 87 * Unmap a page streaming mode DMA translation. The handle and size 88 * must match what was provided in the previous dma_map_page() call. 89 * All other usages are undefined. 90 * 91 * After this call, reads by the CPU to the buffer are guaranteed to see 92 * whatever the device wrote there. 93 */ 94 static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, 95 size_t size, enum dma_data_direction dir, 96 struct dma_attrs *attrs) 97 { 98 if (!arch_is_coherent()) 99 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), 100 handle & ~PAGE_MASK, size, dir); 101 } 102 103 static void arm_dma_sync_single_for_cpu(struct device *dev, 104 dma_addr_t handle, size_t size, enum dma_data_direction dir) 105 { 106 unsigned int offset = handle & (PAGE_SIZE - 1); 107 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 108 if (!arch_is_coherent()) 109 __dma_page_dev_to_cpu(page, offset, size, dir); 110 } 111 112 static void arm_dma_sync_single_for_device(struct device *dev, 113 dma_addr_t handle, size_t size, enum dma_data_direction dir) 114 { 115 unsigned int offset = handle & (PAGE_SIZE - 1); 116 struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 117 if (!arch_is_coherent()) 118 __dma_page_cpu_to_dev(page, offset, size, dir); 119 } 120 121 static int arm_dma_set_mask(struct device *dev, u64 dma_mask); 122 123 struct dma_map_ops arm_dma_ops = { 124 .alloc = arm_dma_alloc, 125 .free = arm_dma_free, 126 .mmap = arm_dma_mmap, 127 .map_page = arm_dma_map_page, 128 .unmap_page = arm_dma_unmap_page, 129 .map_sg = arm_dma_map_sg, 130 .unmap_sg = arm_dma_unmap_sg, 131 .sync_single_for_cpu = arm_dma_sync_single_for_cpu, 132 .sync_single_for_device = arm_dma_sync_single_for_device, 133 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 134 .sync_sg_for_device = arm_dma_sync_sg_for_device, 135 .set_dma_mask = arm_dma_set_mask, 136 }; 137 EXPORT_SYMBOL(arm_dma_ops); 138 139 static u64 get_coherent_dma_mask(struct device *dev) 140 { 141 u64 mask = (u64)arm_dma_limit; 142 143 if (dev) { 144 mask = dev->coherent_dma_mask; 145 146 /* 147 * Sanity check the DMA mask - it must be non-zero, and 148 * must be able to be satisfied by a DMA allocation. 149 */ 150 if (mask == 0) { 151 dev_warn(dev, "coherent DMA mask is unset\n"); 152 return 0; 153 } 154 155 if ((~mask) & (u64)arm_dma_limit) { 156 dev_warn(dev, "coherent DMA mask %#llx is smaller " 157 "than system GFP_DMA mask %#llx\n", 158 mask, (u64)arm_dma_limit); 159 return 0; 160 } 161 } 162 163 return mask; 164 } 165 166 static void __dma_clear_buffer(struct page *page, size_t size) 167 { 168 void *ptr; 169 /* 170 * Ensure that the allocated pages are zeroed, and that any data 171 * lurking in the kernel direct-mapped region is invalidated. 172 */ 173 ptr = page_address(page); 174 if (ptr) { 175 memset(ptr, 0, size); 176 dmac_flush_range(ptr, ptr + size); 177 outer_flush_range(__pa(ptr), __pa(ptr) + size); 178 } 179 } 180 181 /* 182 * Allocate a DMA buffer for 'dev' of size 'size' using the 183 * specified gfp mask. Note that 'size' must be page aligned. 184 */ 185 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) 186 { 187 unsigned long order = get_order(size); 188 struct page *page, *p, *e; 189 190 page = alloc_pages(gfp, order); 191 if (!page) 192 return NULL; 193 194 /* 195 * Now split the huge page and free the excess pages 196 */ 197 split_page(page, order); 198 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 199 __free_page(p); 200 201 __dma_clear_buffer(page, size); 202 203 return page; 204 } 205 206 /* 207 * Free a DMA buffer. 'size' must be page aligned. 208 */ 209 static void __dma_free_buffer(struct page *page, size_t size) 210 { 211 struct page *e = page + (size >> PAGE_SHIFT); 212 213 while (page < e) { 214 __free_page(page); 215 page++; 216 } 217 } 218 219 #ifdef CONFIG_MMU 220 221 #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) 222 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) 223 224 /* 225 * These are the page tables (2MB each) covering uncached, DMA consistent allocations 226 */ 227 static pte_t **consistent_pte; 228 229 #define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M 230 231 static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; 232 233 void __init init_consistent_dma_size(unsigned long size) 234 { 235 unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); 236 237 BUG_ON(consistent_pte); /* Check we're called before DMA region init */ 238 BUG_ON(base < VMALLOC_END); 239 240 /* Grow region to accommodate specified size */ 241 if (base < consistent_base) 242 consistent_base = base; 243 } 244 245 #include "vmregion.h" 246 247 static struct arm_vmregion_head consistent_head = { 248 .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), 249 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 250 .vm_end = CONSISTENT_END, 251 }; 252 253 #ifdef CONFIG_HUGETLB_PAGE 254 #error ARM Coherent DMA allocator does not (yet) support huge TLB 255 #endif 256 257 /* 258 * Initialise the consistent memory allocation. 259 */ 260 static int __init consistent_init(void) 261 { 262 int ret = 0; 263 pgd_t *pgd; 264 pud_t *pud; 265 pmd_t *pmd; 266 pte_t *pte; 267 int i = 0; 268 unsigned long base = consistent_base; 269 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; 270 271 if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) 272 return 0; 273 274 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); 275 if (!consistent_pte) { 276 pr_err("%s: no memory\n", __func__); 277 return -ENOMEM; 278 } 279 280 pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); 281 consistent_head.vm_start = base; 282 283 do { 284 pgd = pgd_offset(&init_mm, base); 285 286 pud = pud_alloc(&init_mm, pgd, base); 287 if (!pud) { 288 pr_err("%s: no pud tables\n", __func__); 289 ret = -ENOMEM; 290 break; 291 } 292 293 pmd = pmd_alloc(&init_mm, pud, base); 294 if (!pmd) { 295 pr_err("%s: no pmd tables\n", __func__); 296 ret = -ENOMEM; 297 break; 298 } 299 WARN_ON(!pmd_none(*pmd)); 300 301 pte = pte_alloc_kernel(pmd, base); 302 if (!pte) { 303 pr_err("%s: no pte tables\n", __func__); 304 ret = -ENOMEM; 305 break; 306 } 307 308 consistent_pte[i++] = pte; 309 base += PMD_SIZE; 310 } while (base < CONSISTENT_END); 311 312 return ret; 313 } 314 core_initcall(consistent_init); 315 316 static void *__alloc_from_contiguous(struct device *dev, size_t size, 317 pgprot_t prot, struct page **ret_page); 318 319 static struct arm_vmregion_head coherent_head = { 320 .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), 321 .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), 322 }; 323 324 static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; 325 326 static int __init early_coherent_pool(char *p) 327 { 328 coherent_pool_size = memparse(p, &p); 329 return 0; 330 } 331 early_param("coherent_pool", early_coherent_pool); 332 333 /* 334 * Initialise the coherent pool for atomic allocations. 335 */ 336 static int __init coherent_init(void) 337 { 338 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); 339 size_t size = coherent_pool_size; 340 struct page *page; 341 void *ptr; 342 343 if (!IS_ENABLED(CONFIG_CMA)) 344 return 0; 345 346 ptr = __alloc_from_contiguous(NULL, size, prot, &page); 347 if (ptr) { 348 coherent_head.vm_start = (unsigned long) ptr; 349 coherent_head.vm_end = (unsigned long) ptr + size; 350 printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", 351 (unsigned)size / 1024); 352 return 0; 353 } 354 printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", 355 (unsigned)size / 1024); 356 return -ENOMEM; 357 } 358 /* 359 * CMA is activated by core_initcall, so we must be called after it. 360 */ 361 postcore_initcall(coherent_init); 362 363 struct dma_contig_early_reserve { 364 phys_addr_t base; 365 unsigned long size; 366 }; 367 368 static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; 369 370 static int dma_mmu_remap_num __initdata; 371 372 void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) 373 { 374 dma_mmu_remap[dma_mmu_remap_num].base = base; 375 dma_mmu_remap[dma_mmu_remap_num].size = size; 376 dma_mmu_remap_num++; 377 } 378 379 void __init dma_contiguous_remap(void) 380 { 381 int i; 382 for (i = 0; i < dma_mmu_remap_num; i++) { 383 phys_addr_t start = dma_mmu_remap[i].base; 384 phys_addr_t end = start + dma_mmu_remap[i].size; 385 struct map_desc map; 386 unsigned long addr; 387 388 if (end > arm_lowmem_limit) 389 end = arm_lowmem_limit; 390 if (start >= end) 391 return; 392 393 map.pfn = __phys_to_pfn(start); 394 map.virtual = __phys_to_virt(start); 395 map.length = end - start; 396 map.type = MT_MEMORY_DMA_READY; 397 398 /* 399 * Clear previous low-memory mapping 400 */ 401 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); 402 addr += PMD_SIZE) 403 pmd_clear(pmd_off_k(addr)); 404 405 iotable_init(&map, 1); 406 } 407 } 408 409 static void * 410 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, 411 const void *caller) 412 { 413 struct arm_vmregion *c; 414 size_t align; 415 int bit; 416 417 if (!consistent_pte) { 418 pr_err("%s: not initialised\n", __func__); 419 dump_stack(); 420 return NULL; 421 } 422 423 /* 424 * Align the virtual region allocation - maximum alignment is 425 * a section size, minimum is a page size. This helps reduce 426 * fragmentation of the DMA space, and also prevents allocations 427 * smaller than a section from crossing a section boundary. 428 */ 429 bit = fls(size - 1); 430 if (bit > SECTION_SHIFT) 431 bit = SECTION_SHIFT; 432 align = 1 << bit; 433 434 /* 435 * Allocate a virtual address in the consistent mapping region. 436 */ 437 c = arm_vmregion_alloc(&consistent_head, align, size, 438 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); 439 if (c) { 440 pte_t *pte; 441 int idx = CONSISTENT_PTE_INDEX(c->vm_start); 442 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 443 444 pte = consistent_pte[idx] + off; 445 c->priv = page; 446 447 do { 448 BUG_ON(!pte_none(*pte)); 449 450 set_pte_ext(pte, mk_pte(page, prot), 0); 451 page++; 452 pte++; 453 off++; 454 if (off >= PTRS_PER_PTE) { 455 off = 0; 456 pte = consistent_pte[++idx]; 457 } 458 } while (size -= PAGE_SIZE); 459 460 dsb(); 461 462 return (void *)c->vm_start; 463 } 464 return NULL; 465 } 466 467 static void __dma_free_remap(void *cpu_addr, size_t size) 468 { 469 struct arm_vmregion *c; 470 unsigned long addr; 471 pte_t *ptep; 472 int idx; 473 u32 off; 474 475 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); 476 if (!c) { 477 pr_err("%s: trying to free invalid coherent area: %p\n", 478 __func__, cpu_addr); 479 dump_stack(); 480 return; 481 } 482 483 if ((c->vm_end - c->vm_start) != size) { 484 pr_err("%s: freeing wrong coherent size (%ld != %d)\n", 485 __func__, c->vm_end - c->vm_start, size); 486 dump_stack(); 487 size = c->vm_end - c->vm_start; 488 } 489 490 idx = CONSISTENT_PTE_INDEX(c->vm_start); 491 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 492 ptep = consistent_pte[idx] + off; 493 addr = c->vm_start; 494 do { 495 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); 496 497 ptep++; 498 addr += PAGE_SIZE; 499 off++; 500 if (off >= PTRS_PER_PTE) { 501 off = 0; 502 ptep = consistent_pte[++idx]; 503 } 504 505 if (pte_none(pte) || !pte_present(pte)) 506 pr_crit("%s: bad page in kernel page table\n", 507 __func__); 508 } while (size -= PAGE_SIZE); 509 510 flush_tlb_kernel_range(c->vm_start, c->vm_end); 511 512 arm_vmregion_free(&consistent_head, c); 513 } 514 515 static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, 516 void *data) 517 { 518 struct page *page = virt_to_page(addr); 519 pgprot_t prot = *(pgprot_t *)data; 520 521 set_pte_ext(pte, mk_pte(page, prot), 0); 522 return 0; 523 } 524 525 static void __dma_remap(struct page *page, size_t size, pgprot_t prot) 526 { 527 unsigned long start = (unsigned long) page_address(page); 528 unsigned end = start + size; 529 530 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); 531 dsb(); 532 flush_tlb_kernel_range(start, end); 533 } 534 535 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 536 pgprot_t prot, struct page **ret_page, 537 const void *caller) 538 { 539 struct page *page; 540 void *ptr; 541 page = __dma_alloc_buffer(dev, size, gfp); 542 if (!page) 543 return NULL; 544 545 ptr = __dma_alloc_remap(page, size, gfp, prot, caller); 546 if (!ptr) { 547 __dma_free_buffer(page, size); 548 return NULL; 549 } 550 551 *ret_page = page; 552 return ptr; 553 } 554 555 static void *__alloc_from_pool(struct device *dev, size_t size, 556 struct page **ret_page, const void *caller) 557 { 558 struct arm_vmregion *c; 559 size_t align; 560 561 if (!coherent_head.vm_start) { 562 printk(KERN_ERR "%s: coherent pool not initialised!\n", 563 __func__); 564 dump_stack(); 565 return NULL; 566 } 567 568 /* 569 * Align the region allocation - allocations from pool are rather 570 * small, so align them to their order in pages, minimum is a page 571 * size. This helps reduce fragmentation of the DMA space. 572 */ 573 align = PAGE_SIZE << get_order(size); 574 c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); 575 if (c) { 576 void *ptr = (void *)c->vm_start; 577 struct page *page = virt_to_page(ptr); 578 *ret_page = page; 579 return ptr; 580 } 581 return NULL; 582 } 583 584 static int __free_from_pool(void *cpu_addr, size_t size) 585 { 586 unsigned long start = (unsigned long)cpu_addr; 587 unsigned long end = start + size; 588 struct arm_vmregion *c; 589 590 if (start < coherent_head.vm_start || end > coherent_head.vm_end) 591 return 0; 592 593 c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); 594 595 if ((c->vm_end - c->vm_start) != size) { 596 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", 597 __func__, c->vm_end - c->vm_start, size); 598 dump_stack(); 599 size = c->vm_end - c->vm_start; 600 } 601 602 arm_vmregion_free(&coherent_head, c); 603 return 1; 604 } 605 606 static void *__alloc_from_contiguous(struct device *dev, size_t size, 607 pgprot_t prot, struct page **ret_page) 608 { 609 unsigned long order = get_order(size); 610 size_t count = size >> PAGE_SHIFT; 611 struct page *page; 612 613 page = dma_alloc_from_contiguous(dev, count, order); 614 if (!page) 615 return NULL; 616 617 __dma_clear_buffer(page, size); 618 __dma_remap(page, size, prot); 619 620 *ret_page = page; 621 return page_address(page); 622 } 623 624 static void __free_from_contiguous(struct device *dev, struct page *page, 625 size_t size) 626 { 627 __dma_remap(page, size, pgprot_kernel); 628 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); 629 } 630 631 static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) 632 { 633 prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? 634 pgprot_writecombine(prot) : 635 pgprot_dmacoherent(prot); 636 return prot; 637 } 638 639 #define nommu() 0 640 641 #else /* !CONFIG_MMU */ 642 643 #define nommu() 1 644 645 #define __get_dma_pgprot(attrs, prot) __pgprot(0) 646 #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL 647 #define __alloc_from_pool(dev, size, ret_page, c) NULL 648 #define __alloc_from_contiguous(dev, size, prot, ret) NULL 649 #define __free_from_pool(cpu_addr, size) 0 650 #define __free_from_contiguous(dev, page, size) do { } while (0) 651 #define __dma_free_remap(cpu_addr, size) do { } while (0) 652 653 #endif /* CONFIG_MMU */ 654 655 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, 656 struct page **ret_page) 657 { 658 struct page *page; 659 page = __dma_alloc_buffer(dev, size, gfp); 660 if (!page) 661 return NULL; 662 663 *ret_page = page; 664 return page_address(page); 665 } 666 667 668 669 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 670 gfp_t gfp, pgprot_t prot, const void *caller) 671 { 672 u64 mask = get_coherent_dma_mask(dev); 673 struct page *page; 674 void *addr; 675 676 #ifdef CONFIG_DMA_API_DEBUG 677 u64 limit = (mask + 1) & ~mask; 678 if (limit && size >= limit) { 679 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 680 size, mask); 681 return NULL; 682 } 683 #endif 684 685 if (!mask) 686 return NULL; 687 688 if (mask < 0xffffffffULL) 689 gfp |= GFP_DMA; 690 691 /* 692 * Following is a work-around (a.k.a. hack) to prevent pages 693 * with __GFP_COMP being passed to split_page() which cannot 694 * handle them. The real problem is that this flag probably 695 * should be 0 on ARM as it is not supported on this 696 * platform; see CONFIG_HUGETLBFS. 697 */ 698 gfp &= ~(__GFP_COMP); 699 700 *handle = DMA_ERROR_CODE; 701 size = PAGE_ALIGN(size); 702 703 if (arch_is_coherent() || nommu()) 704 addr = __alloc_simple_buffer(dev, size, gfp, &page); 705 else if (!IS_ENABLED(CONFIG_CMA)) 706 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); 707 else if (gfp & GFP_ATOMIC) 708 addr = __alloc_from_pool(dev, size, &page, caller); 709 else 710 addr = __alloc_from_contiguous(dev, size, prot, &page); 711 712 if (addr) 713 *handle = pfn_to_dma(dev, page_to_pfn(page)); 714 715 return addr; 716 } 717 718 /* 719 * Allocate DMA-coherent memory space and return both the kernel remapped 720 * virtual and bus address for that space. 721 */ 722 void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 723 gfp_t gfp, struct dma_attrs *attrs) 724 { 725 pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); 726 void *memory; 727 728 if (dma_alloc_from_coherent(dev, size, handle, &memory)) 729 return memory; 730 731 return __dma_alloc(dev, size, handle, gfp, prot, 732 __builtin_return_address(0)); 733 } 734 735 /* 736 * Create userspace mapping for the DMA-coherent memory. 737 */ 738 int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 739 void *cpu_addr, dma_addr_t dma_addr, size_t size, 740 struct dma_attrs *attrs) 741 { 742 int ret = -ENXIO; 743 #ifdef CONFIG_MMU 744 unsigned long pfn = dma_to_pfn(dev, dma_addr); 745 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 746 747 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 748 return ret; 749 750 ret = remap_pfn_range(vma, vma->vm_start, 751 pfn + vma->vm_pgoff, 752 vma->vm_end - vma->vm_start, 753 vma->vm_page_prot); 754 #endif /* CONFIG_MMU */ 755 756 return ret; 757 } 758 759 /* 760 * Free a buffer as defined by the above mapping. 761 */ 762 void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 763 dma_addr_t handle, struct dma_attrs *attrs) 764 { 765 struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); 766 767 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 768 return; 769 770 size = PAGE_ALIGN(size); 771 772 if (arch_is_coherent() || nommu()) { 773 __dma_free_buffer(page, size); 774 } else if (!IS_ENABLED(CONFIG_CMA)) { 775 __dma_free_remap(cpu_addr, size); 776 __dma_free_buffer(page, size); 777 } else { 778 if (__free_from_pool(cpu_addr, size)) 779 return; 780 /* 781 * Non-atomic allocations cannot be freed with IRQs disabled 782 */ 783 WARN_ON(irqs_disabled()); 784 __free_from_contiguous(dev, page, size); 785 } 786 } 787 788 static void dma_cache_maint_page(struct page *page, unsigned long offset, 789 size_t size, enum dma_data_direction dir, 790 void (*op)(const void *, size_t, int)) 791 { 792 /* 793 * A single sg entry may refer to multiple physically contiguous 794 * pages. But we still need to process highmem pages individually. 795 * If highmem is not configured then the bulk of this loop gets 796 * optimized out. 797 */ 798 size_t left = size; 799 do { 800 size_t len = left; 801 void *vaddr; 802 803 if (PageHighMem(page)) { 804 if (len + offset > PAGE_SIZE) { 805 if (offset >= PAGE_SIZE) { 806 page += offset / PAGE_SIZE; 807 offset %= PAGE_SIZE; 808 } 809 len = PAGE_SIZE - offset; 810 } 811 vaddr = kmap_high_get(page); 812 if (vaddr) { 813 vaddr += offset; 814 op(vaddr, len, dir); 815 kunmap_high(page); 816 } else if (cache_is_vipt()) { 817 /* unmapped pages might still be cached */ 818 vaddr = kmap_atomic(page); 819 op(vaddr + offset, len, dir); 820 kunmap_atomic(vaddr); 821 } 822 } else { 823 vaddr = page_address(page) + offset; 824 op(vaddr, len, dir); 825 } 826 offset = 0; 827 page++; 828 left -= len; 829 } while (left); 830 } 831 832 /* 833 * Make an area consistent for devices. 834 * Note: Drivers should NOT use this function directly, as it will break 835 * platforms with CONFIG_DMABOUNCE. 836 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 837 */ 838 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, 839 size_t size, enum dma_data_direction dir) 840 { 841 unsigned long paddr; 842 843 dma_cache_maint_page(page, off, size, dir, dmac_map_area); 844 845 paddr = page_to_phys(page) + off; 846 if (dir == DMA_FROM_DEVICE) { 847 outer_inv_range(paddr, paddr + size); 848 } else { 849 outer_clean_range(paddr, paddr + size); 850 } 851 /* FIXME: non-speculating: flush on bidirectional mappings? */ 852 } 853 854 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, 855 size_t size, enum dma_data_direction dir) 856 { 857 unsigned long paddr = page_to_phys(page) + off; 858 859 /* FIXME: non-speculating: not required */ 860 /* don't bother invalidating if DMA to device */ 861 if (dir != DMA_TO_DEVICE) 862 outer_inv_range(paddr, paddr + size); 863 864 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); 865 866 /* 867 * Mark the D-cache clean for this page to avoid extra flushing. 868 */ 869 if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) 870 set_bit(PG_dcache_clean, &page->flags); 871 } 872 873 /** 874 * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA 875 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 876 * @sg: list of buffers 877 * @nents: number of buffers to map 878 * @dir: DMA transfer direction 879 * 880 * Map a set of buffers described by scatterlist in streaming mode for DMA. 881 * This is the scatter-gather version of the dma_map_single interface. 882 * Here the scatter gather list elements are each tagged with the 883 * appropriate dma address and length. They are obtained via 884 * sg_dma_{address,length}. 885 * 886 * Device ownership issues as mentioned for dma_map_single are the same 887 * here. 888 */ 889 int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 890 enum dma_data_direction dir, struct dma_attrs *attrs) 891 { 892 struct dma_map_ops *ops = get_dma_ops(dev); 893 struct scatterlist *s; 894 int i, j; 895 896 for_each_sg(sg, s, nents, i) { 897 #ifdef CONFIG_NEED_SG_DMA_LENGTH 898 s->dma_length = s->length; 899 #endif 900 s->dma_address = ops->map_page(dev, sg_page(s), s->offset, 901 s->length, dir, attrs); 902 if (dma_mapping_error(dev, s->dma_address)) 903 goto bad_mapping; 904 } 905 return nents; 906 907 bad_mapping: 908 for_each_sg(sg, s, i, j) 909 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 910 return 0; 911 } 912 913 /** 914 * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 915 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 916 * @sg: list of buffers 917 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 918 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 919 * 920 * Unmap a set of streaming mode DMA translations. Again, CPU access 921 * rules concerning calls here are the same as for dma_unmap_single(). 922 */ 923 void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 924 enum dma_data_direction dir, struct dma_attrs *attrs) 925 { 926 struct dma_map_ops *ops = get_dma_ops(dev); 927 struct scatterlist *s; 928 929 int i; 930 931 for_each_sg(sg, s, nents, i) 932 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 933 } 934 935 /** 936 * arm_dma_sync_sg_for_cpu 937 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 938 * @sg: list of buffers 939 * @nents: number of buffers to map (returned from dma_map_sg) 940 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 941 */ 942 void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 943 int nents, enum dma_data_direction dir) 944 { 945 struct dma_map_ops *ops = get_dma_ops(dev); 946 struct scatterlist *s; 947 int i; 948 949 for_each_sg(sg, s, nents, i) 950 ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, 951 dir); 952 } 953 954 /** 955 * arm_dma_sync_sg_for_device 956 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 957 * @sg: list of buffers 958 * @nents: number of buffers to map (returned from dma_map_sg) 959 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 960 */ 961 void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 962 int nents, enum dma_data_direction dir) 963 { 964 struct dma_map_ops *ops = get_dma_ops(dev); 965 struct scatterlist *s; 966 int i; 967 968 for_each_sg(sg, s, nents, i) 969 ops->sync_single_for_device(dev, sg_dma_address(s), s->length, 970 dir); 971 } 972 973 /* 974 * Return whether the given device DMA address mask can be supported 975 * properly. For example, if your device can only drive the low 24-bits 976 * during bus mastering, then you would pass 0x00ffffff as the mask 977 * to this function. 978 */ 979 int dma_supported(struct device *dev, u64 mask) 980 { 981 if (mask < (u64)arm_dma_limit) 982 return 0; 983 return 1; 984 } 985 EXPORT_SYMBOL(dma_supported); 986 987 static int arm_dma_set_mask(struct device *dev, u64 dma_mask) 988 { 989 if (!dev->dma_mask || !dma_supported(dev, dma_mask)) 990 return -EIO; 991 992 *dev->dma_mask = dma_mask; 993 994 return 0; 995 } 996 997 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 998 999 static int __init dma_debug_do_init(void) 1000 { 1001 #ifdef CONFIG_MMU 1002 arm_vmregion_create_proc("dma-mappings", &consistent_head); 1003 #endif 1004 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 1005 return 0; 1006 } 1007 fs_initcall(dma_debug_do_init); 1008 1009 #ifdef CONFIG_ARM_DMA_USE_IOMMU 1010 1011 /* IOMMU */ 1012 1013 static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, 1014 size_t size) 1015 { 1016 unsigned int order = get_order(size); 1017 unsigned int align = 0; 1018 unsigned int count, start; 1019 unsigned long flags; 1020 1021 count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + 1022 (1 << mapping->order) - 1) >> mapping->order; 1023 1024 if (order > mapping->order) 1025 align = (1 << (order - mapping->order)) - 1; 1026 1027 spin_lock_irqsave(&mapping->lock, flags); 1028 start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, 1029 count, align); 1030 if (start > mapping->bits) { 1031 spin_unlock_irqrestore(&mapping->lock, flags); 1032 return DMA_ERROR_CODE; 1033 } 1034 1035 bitmap_set(mapping->bitmap, start, count); 1036 spin_unlock_irqrestore(&mapping->lock, flags); 1037 1038 return mapping->base + (start << (mapping->order + PAGE_SHIFT)); 1039 } 1040 1041 static inline void __free_iova(struct dma_iommu_mapping *mapping, 1042 dma_addr_t addr, size_t size) 1043 { 1044 unsigned int start = (addr - mapping->base) >> 1045 (mapping->order + PAGE_SHIFT); 1046 unsigned int count = ((size >> PAGE_SHIFT) + 1047 (1 << mapping->order) - 1) >> mapping->order; 1048 unsigned long flags; 1049 1050 spin_lock_irqsave(&mapping->lock, flags); 1051 bitmap_clear(mapping->bitmap, start, count); 1052 spin_unlock_irqrestore(&mapping->lock, flags); 1053 } 1054 1055 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) 1056 { 1057 struct page **pages; 1058 int count = size >> PAGE_SHIFT; 1059 int array_size = count * sizeof(struct page *); 1060 int i = 0; 1061 1062 if (array_size <= PAGE_SIZE) 1063 pages = kzalloc(array_size, gfp); 1064 else 1065 pages = vzalloc(array_size); 1066 if (!pages) 1067 return NULL; 1068 1069 while (count) { 1070 int j, order = __fls(count); 1071 1072 pages[i] = alloc_pages(gfp | __GFP_NOWARN, order); 1073 while (!pages[i] && order) 1074 pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order); 1075 if (!pages[i]) 1076 goto error; 1077 1078 if (order) 1079 split_page(pages[i], order); 1080 j = 1 << order; 1081 while (--j) 1082 pages[i + j] = pages[i] + j; 1083 1084 __dma_clear_buffer(pages[i], PAGE_SIZE << order); 1085 i += 1 << order; 1086 count -= 1 << order; 1087 } 1088 1089 return pages; 1090 error: 1091 while (--i) 1092 if (pages[i]) 1093 __free_pages(pages[i], 0); 1094 if (array_size <= PAGE_SIZE) 1095 kfree(pages); 1096 else 1097 vfree(pages); 1098 return NULL; 1099 } 1100 1101 static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) 1102 { 1103 int count = size >> PAGE_SHIFT; 1104 int array_size = count * sizeof(struct page *); 1105 int i; 1106 for (i = 0; i < count; i++) 1107 if (pages[i]) 1108 __free_pages(pages[i], 0); 1109 if (array_size <= PAGE_SIZE) 1110 kfree(pages); 1111 else 1112 vfree(pages); 1113 return 0; 1114 } 1115 1116 /* 1117 * Create a CPU mapping for a specified pages 1118 */ 1119 static void * 1120 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) 1121 { 1122 struct arm_vmregion *c; 1123 size_t align; 1124 size_t count = size >> PAGE_SHIFT; 1125 int bit; 1126 1127 if (!consistent_pte[0]) { 1128 pr_err("%s: not initialised\n", __func__); 1129 dump_stack(); 1130 return NULL; 1131 } 1132 1133 /* 1134 * Align the virtual region allocation - maximum alignment is 1135 * a section size, minimum is a page size. This helps reduce 1136 * fragmentation of the DMA space, and also prevents allocations 1137 * smaller than a section from crossing a section boundary. 1138 */ 1139 bit = fls(size - 1); 1140 if (bit > SECTION_SHIFT) 1141 bit = SECTION_SHIFT; 1142 align = 1 << bit; 1143 1144 /* 1145 * Allocate a virtual address in the consistent mapping region. 1146 */ 1147 c = arm_vmregion_alloc(&consistent_head, align, size, 1148 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); 1149 if (c) { 1150 pte_t *pte; 1151 int idx = CONSISTENT_PTE_INDEX(c->vm_start); 1152 int i = 0; 1153 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 1154 1155 pte = consistent_pte[idx] + off; 1156 c->priv = pages; 1157 1158 do { 1159 BUG_ON(!pte_none(*pte)); 1160 1161 set_pte_ext(pte, mk_pte(pages[i], prot), 0); 1162 pte++; 1163 off++; 1164 i++; 1165 if (off >= PTRS_PER_PTE) { 1166 off = 0; 1167 pte = consistent_pte[++idx]; 1168 } 1169 } while (i < count); 1170 1171 dsb(); 1172 1173 return (void *)c->vm_start; 1174 } 1175 return NULL; 1176 } 1177 1178 /* 1179 * Create a mapping in device IO address space for specified pages 1180 */ 1181 static dma_addr_t 1182 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) 1183 { 1184 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1185 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1186 dma_addr_t dma_addr, iova; 1187 int i, ret = DMA_ERROR_CODE; 1188 1189 dma_addr = __alloc_iova(mapping, size); 1190 if (dma_addr == DMA_ERROR_CODE) 1191 return dma_addr; 1192 1193 iova = dma_addr; 1194 for (i = 0; i < count; ) { 1195 unsigned int next_pfn = page_to_pfn(pages[i]) + 1; 1196 phys_addr_t phys = page_to_phys(pages[i]); 1197 unsigned int len, j; 1198 1199 for (j = i + 1; j < count; j++, next_pfn++) 1200 if (page_to_pfn(pages[j]) != next_pfn) 1201 break; 1202 1203 len = (j - i) << PAGE_SHIFT; 1204 ret = iommu_map(mapping->domain, iova, phys, len, 0); 1205 if (ret < 0) 1206 goto fail; 1207 iova += len; 1208 i = j; 1209 } 1210 return dma_addr; 1211 fail: 1212 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); 1213 __free_iova(mapping, dma_addr, size); 1214 return DMA_ERROR_CODE; 1215 } 1216 1217 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) 1218 { 1219 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1220 1221 /* 1222 * add optional in-page offset from iova to size and align 1223 * result to page size 1224 */ 1225 size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); 1226 iova &= PAGE_MASK; 1227 1228 iommu_unmap(mapping->domain, iova, size); 1229 __free_iova(mapping, iova, size); 1230 return 0; 1231 } 1232 1233 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1234 dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) 1235 { 1236 pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); 1237 struct page **pages; 1238 void *addr = NULL; 1239 1240 *handle = DMA_ERROR_CODE; 1241 size = PAGE_ALIGN(size); 1242 1243 pages = __iommu_alloc_buffer(dev, size, gfp); 1244 if (!pages) 1245 return NULL; 1246 1247 *handle = __iommu_create_mapping(dev, pages, size); 1248 if (*handle == DMA_ERROR_CODE) 1249 goto err_buffer; 1250 1251 addr = __iommu_alloc_remap(pages, size, gfp, prot); 1252 if (!addr) 1253 goto err_mapping; 1254 1255 return addr; 1256 1257 err_mapping: 1258 __iommu_remove_mapping(dev, *handle, size); 1259 err_buffer: 1260 __iommu_free_buffer(dev, pages, size); 1261 return NULL; 1262 } 1263 1264 static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1265 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1266 struct dma_attrs *attrs) 1267 { 1268 struct arm_vmregion *c; 1269 1270 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1271 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 1272 1273 if (c) { 1274 struct page **pages = c->priv; 1275 1276 unsigned long uaddr = vma->vm_start; 1277 unsigned long usize = vma->vm_end - vma->vm_start; 1278 int i = 0; 1279 1280 do { 1281 int ret; 1282 1283 ret = vm_insert_page(vma, uaddr, pages[i++]); 1284 if (ret) { 1285 pr_err("Remapping memory, error: %d\n", ret); 1286 return ret; 1287 } 1288 1289 uaddr += PAGE_SIZE; 1290 usize -= PAGE_SIZE; 1291 } while (usize > 0); 1292 } 1293 return 0; 1294 } 1295 1296 /* 1297 * free a page as defined by the above mapping. 1298 * Must not be called with IRQs disabled. 1299 */ 1300 void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1301 dma_addr_t handle, struct dma_attrs *attrs) 1302 { 1303 struct arm_vmregion *c; 1304 size = PAGE_ALIGN(size); 1305 1306 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 1307 if (c) { 1308 struct page **pages = c->priv; 1309 __dma_free_remap(cpu_addr, size); 1310 __iommu_remove_mapping(dev, handle, size); 1311 __iommu_free_buffer(dev, pages, size); 1312 } 1313 } 1314 1315 /* 1316 * Map a part of the scatter-gather list into contiguous io address space 1317 */ 1318 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1319 size_t size, dma_addr_t *handle, 1320 enum dma_data_direction dir) 1321 { 1322 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1323 dma_addr_t iova, iova_base; 1324 int ret = 0; 1325 unsigned int count; 1326 struct scatterlist *s; 1327 1328 size = PAGE_ALIGN(size); 1329 *handle = DMA_ERROR_CODE; 1330 1331 iova_base = iova = __alloc_iova(mapping, size); 1332 if (iova == DMA_ERROR_CODE) 1333 return -ENOMEM; 1334 1335 for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { 1336 phys_addr_t phys = page_to_phys(sg_page(s)); 1337 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1338 1339 if (!arch_is_coherent()) 1340 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1341 1342 ret = iommu_map(mapping->domain, iova, phys, len, 0); 1343 if (ret < 0) 1344 goto fail; 1345 count += len >> PAGE_SHIFT; 1346 iova += len; 1347 } 1348 *handle = iova_base; 1349 1350 return 0; 1351 fail: 1352 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); 1353 __free_iova(mapping, iova_base, size); 1354 return ret; 1355 } 1356 1357 /** 1358 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1359 * @dev: valid struct device pointer 1360 * @sg: list of buffers 1361 * @nents: number of buffers to map 1362 * @dir: DMA transfer direction 1363 * 1364 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1365 * The scatter gather list elements are merged together (if possible) and 1366 * tagged with the appropriate dma address and length. They are obtained via 1367 * sg_dma_{address,length}. 1368 */ 1369 int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1370 enum dma_data_direction dir, struct dma_attrs *attrs) 1371 { 1372 struct scatterlist *s = sg, *dma = sg, *start = sg; 1373 int i, count = 0; 1374 unsigned int offset = s->offset; 1375 unsigned int size = s->offset + s->length; 1376 unsigned int max = dma_get_max_seg_size(dev); 1377 1378 for (i = 1; i < nents; i++) { 1379 s = sg_next(s); 1380 1381 s->dma_address = DMA_ERROR_CODE; 1382 s->dma_length = 0; 1383 1384 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1385 if (__map_sg_chunk(dev, start, size, &dma->dma_address, 1386 dir) < 0) 1387 goto bad_mapping; 1388 1389 dma->dma_address += offset; 1390 dma->dma_length = size - offset; 1391 1392 size = offset = s->offset; 1393 start = s; 1394 dma = sg_next(dma); 1395 count += 1; 1396 } 1397 size += s->length; 1398 } 1399 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) 1400 goto bad_mapping; 1401 1402 dma->dma_address += offset; 1403 dma->dma_length = size - offset; 1404 1405 return count+1; 1406 1407 bad_mapping: 1408 for_each_sg(sg, s, count, i) 1409 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); 1410 return 0; 1411 } 1412 1413 /** 1414 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1415 * @dev: valid struct device pointer 1416 * @sg: list of buffers 1417 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1418 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1419 * 1420 * Unmap a set of streaming mode DMA translations. Again, CPU access 1421 * rules concerning calls here are the same as for dma_unmap_single(). 1422 */ 1423 void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 1424 enum dma_data_direction dir, struct dma_attrs *attrs) 1425 { 1426 struct scatterlist *s; 1427 int i; 1428 1429 for_each_sg(sg, s, nents, i) { 1430 if (sg_dma_len(s)) 1431 __iommu_remove_mapping(dev, sg_dma_address(s), 1432 sg_dma_len(s)); 1433 if (!arch_is_coherent()) 1434 __dma_page_dev_to_cpu(sg_page(s), s->offset, 1435 s->length, dir); 1436 } 1437 } 1438 1439 /** 1440 * arm_iommu_sync_sg_for_cpu 1441 * @dev: valid struct device pointer 1442 * @sg: list of buffers 1443 * @nents: number of buffers to map (returned from dma_map_sg) 1444 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1445 */ 1446 void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 1447 int nents, enum dma_data_direction dir) 1448 { 1449 struct scatterlist *s; 1450 int i; 1451 1452 for_each_sg(sg, s, nents, i) 1453 if (!arch_is_coherent()) 1454 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); 1455 1456 } 1457 1458 /** 1459 * arm_iommu_sync_sg_for_device 1460 * @dev: valid struct device pointer 1461 * @sg: list of buffers 1462 * @nents: number of buffers to map (returned from dma_map_sg) 1463 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1464 */ 1465 void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 1466 int nents, enum dma_data_direction dir) 1467 { 1468 struct scatterlist *s; 1469 int i; 1470 1471 for_each_sg(sg, s, nents, i) 1472 if (!arch_is_coherent()) 1473 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1474 } 1475 1476 1477 /** 1478 * arm_iommu_map_page 1479 * @dev: valid struct device pointer 1480 * @page: page that buffer resides in 1481 * @offset: offset into page for start of buffer 1482 * @size: size of buffer to map 1483 * @dir: DMA transfer direction 1484 * 1485 * IOMMU aware version of arm_dma_map_page() 1486 */ 1487 static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, 1488 unsigned long offset, size_t size, enum dma_data_direction dir, 1489 struct dma_attrs *attrs) 1490 { 1491 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1492 dma_addr_t dma_addr; 1493 int ret, len = PAGE_ALIGN(size + offset); 1494 1495 if (!arch_is_coherent()) 1496 __dma_page_cpu_to_dev(page, offset, size, dir); 1497 1498 dma_addr = __alloc_iova(mapping, len); 1499 if (dma_addr == DMA_ERROR_CODE) 1500 return dma_addr; 1501 1502 ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); 1503 if (ret < 0) 1504 goto fail; 1505 1506 return dma_addr + offset; 1507 fail: 1508 __free_iova(mapping, dma_addr, len); 1509 return DMA_ERROR_CODE; 1510 } 1511 1512 /** 1513 * arm_iommu_unmap_page 1514 * @dev: valid struct device pointer 1515 * @handle: DMA address of buffer 1516 * @size: size of buffer (same as passed to dma_map_page) 1517 * @dir: DMA transfer direction (same as passed to dma_map_page) 1518 * 1519 * IOMMU aware version of arm_dma_unmap_page() 1520 */ 1521 static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1522 size_t size, enum dma_data_direction dir, 1523 struct dma_attrs *attrs) 1524 { 1525 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1526 dma_addr_t iova = handle & PAGE_MASK; 1527 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1528 int offset = handle & ~PAGE_MASK; 1529 int len = PAGE_ALIGN(size + offset); 1530 1531 if (!iova) 1532 return; 1533 1534 if (!arch_is_coherent()) 1535 __dma_page_dev_to_cpu(page, offset, size, dir); 1536 1537 iommu_unmap(mapping->domain, iova, len); 1538 __free_iova(mapping, iova, len); 1539 } 1540 1541 static void arm_iommu_sync_single_for_cpu(struct device *dev, 1542 dma_addr_t handle, size_t size, enum dma_data_direction dir) 1543 { 1544 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1545 dma_addr_t iova = handle & PAGE_MASK; 1546 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1547 unsigned int offset = handle & ~PAGE_MASK; 1548 1549 if (!iova) 1550 return; 1551 1552 if (!arch_is_coherent()) 1553 __dma_page_dev_to_cpu(page, offset, size, dir); 1554 } 1555 1556 static void arm_iommu_sync_single_for_device(struct device *dev, 1557 dma_addr_t handle, size_t size, enum dma_data_direction dir) 1558 { 1559 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1560 dma_addr_t iova = handle & PAGE_MASK; 1561 struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1562 unsigned int offset = handle & ~PAGE_MASK; 1563 1564 if (!iova) 1565 return; 1566 1567 __dma_page_cpu_to_dev(page, offset, size, dir); 1568 } 1569 1570 struct dma_map_ops iommu_ops = { 1571 .alloc = arm_iommu_alloc_attrs, 1572 .free = arm_iommu_free_attrs, 1573 .mmap = arm_iommu_mmap_attrs, 1574 1575 .map_page = arm_iommu_map_page, 1576 .unmap_page = arm_iommu_unmap_page, 1577 .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, 1578 .sync_single_for_device = arm_iommu_sync_single_for_device, 1579 1580 .map_sg = arm_iommu_map_sg, 1581 .unmap_sg = arm_iommu_unmap_sg, 1582 .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, 1583 .sync_sg_for_device = arm_iommu_sync_sg_for_device, 1584 }; 1585 1586 /** 1587 * arm_iommu_create_mapping 1588 * @bus: pointer to the bus holding the client device (for IOMMU calls) 1589 * @base: start address of the valid IO address space 1590 * @size: size of the valid IO address space 1591 * @order: accuracy of the IO addresses allocations 1592 * 1593 * Creates a mapping structure which holds information about used/unused 1594 * IO address ranges, which is required to perform memory allocation and 1595 * mapping with IOMMU aware functions. 1596 * 1597 * The client device need to be attached to the mapping with 1598 * arm_iommu_attach_device function. 1599 */ 1600 struct dma_iommu_mapping * 1601 arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, 1602 int order) 1603 { 1604 unsigned int count = size >> (PAGE_SHIFT + order); 1605 unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); 1606 struct dma_iommu_mapping *mapping; 1607 int err = -ENOMEM; 1608 1609 if (!count) 1610 return ERR_PTR(-EINVAL); 1611 1612 mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); 1613 if (!mapping) 1614 goto err; 1615 1616 mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); 1617 if (!mapping->bitmap) 1618 goto err2; 1619 1620 mapping->base = base; 1621 mapping->bits = BITS_PER_BYTE * bitmap_size; 1622 mapping->order = order; 1623 spin_lock_init(&mapping->lock); 1624 1625 mapping->domain = iommu_domain_alloc(bus); 1626 if (!mapping->domain) 1627 goto err3; 1628 1629 kref_init(&mapping->kref); 1630 return mapping; 1631 err3: 1632 kfree(mapping->bitmap); 1633 err2: 1634 kfree(mapping); 1635 err: 1636 return ERR_PTR(err); 1637 } 1638 1639 static void release_iommu_mapping(struct kref *kref) 1640 { 1641 struct dma_iommu_mapping *mapping = 1642 container_of(kref, struct dma_iommu_mapping, kref); 1643 1644 iommu_domain_free(mapping->domain); 1645 kfree(mapping->bitmap); 1646 kfree(mapping); 1647 } 1648 1649 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) 1650 { 1651 if (mapping) 1652 kref_put(&mapping->kref, release_iommu_mapping); 1653 } 1654 1655 /** 1656 * arm_iommu_attach_device 1657 * @dev: valid struct device pointer 1658 * @mapping: io address space mapping structure (returned from 1659 * arm_iommu_create_mapping) 1660 * 1661 * Attaches specified io address space mapping to the provided device, 1662 * this replaces the dma operations (dma_map_ops pointer) with the 1663 * IOMMU aware version. More than one client might be attached to 1664 * the same io address space mapping. 1665 */ 1666 int arm_iommu_attach_device(struct device *dev, 1667 struct dma_iommu_mapping *mapping) 1668 { 1669 int err; 1670 1671 err = iommu_attach_device(mapping->domain, dev); 1672 if (err) 1673 return err; 1674 1675 kref_get(&mapping->kref); 1676 dev->archdata.mapping = mapping; 1677 set_dma_ops(dev, &iommu_ops); 1678 1679 pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); 1680 return 0; 1681 } 1682 1683 #endif 1684