1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/gfp.h> 15 #include <linux/errno.h> 16 #include <linux/list.h> 17 #include <linux/init.h> 18 #include <linux/device.h> 19 #include <linux/dma-mapping.h> 20 #include <linux/highmem.h> 21 22 #include <asm/memory.h> 23 #include <asm/highmem.h> 24 #include <asm/cacheflush.h> 25 #include <asm/tlbflush.h> 26 #include <asm/sizes.h> 27 28 #include "mm.h" 29 30 static u64 get_coherent_dma_mask(struct device *dev) 31 { 32 u64 mask = (u64)arm_dma_limit; 33 34 if (dev) { 35 mask = dev->coherent_dma_mask; 36 37 /* 38 * Sanity check the DMA mask - it must be non-zero, and 39 * must be able to be satisfied by a DMA allocation. 40 */ 41 if (mask == 0) { 42 dev_warn(dev, "coherent DMA mask is unset\n"); 43 return 0; 44 } 45 46 if ((~mask) & (u64)arm_dma_limit) { 47 dev_warn(dev, "coherent DMA mask %#llx is smaller " 48 "than system GFP_DMA mask %#llx\n", 49 mask, (u64)arm_dma_limit); 50 return 0; 51 } 52 } 53 54 return mask; 55 } 56 57 /* 58 * Allocate a DMA buffer for 'dev' of size 'size' using the 59 * specified gfp mask. Note that 'size' must be page aligned. 60 */ 61 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) 62 { 63 unsigned long order = get_order(size); 64 struct page *page, *p, *e; 65 void *ptr; 66 u64 mask = get_coherent_dma_mask(dev); 67 68 #ifdef CONFIG_DMA_API_DEBUG 69 u64 limit = (mask + 1) & ~mask; 70 if (limit && size >= limit) { 71 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 72 size, mask); 73 return NULL; 74 } 75 #endif 76 77 if (!mask) 78 return NULL; 79 80 if (mask < 0xffffffffULL) 81 gfp |= GFP_DMA; 82 83 page = alloc_pages(gfp, order); 84 if (!page) 85 return NULL; 86 87 /* 88 * Now split the huge page and free the excess pages 89 */ 90 split_page(page, order); 91 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 92 __free_page(p); 93 94 /* 95 * Ensure that the allocated pages are zeroed, and that any data 96 * lurking in the kernel direct-mapped region is invalidated. 97 */ 98 ptr = page_address(page); 99 memset(ptr, 0, size); 100 dmac_flush_range(ptr, ptr + size); 101 outer_flush_range(__pa(ptr), __pa(ptr) + size); 102 103 return page; 104 } 105 106 /* 107 * Free a DMA buffer. 'size' must be page aligned. 108 */ 109 static void __dma_free_buffer(struct page *page, size_t size) 110 { 111 struct page *e = page + (size >> PAGE_SHIFT); 112 113 while (page < e) { 114 __free_page(page); 115 page++; 116 } 117 } 118 119 #ifdef CONFIG_MMU 120 /* Sanity check size */ 121 #if (CONSISTENT_DMA_SIZE % SZ_2M) 122 #error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" 123 #endif 124 125 #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) 126 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) 127 #define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) 128 129 /* 130 * These are the page tables (2MB each) covering uncached, DMA consistent allocations 131 */ 132 static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; 133 134 #include "vmregion.h" 135 136 static struct arm_vmregion_head consistent_head = { 137 .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), 138 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 139 .vm_start = CONSISTENT_BASE, 140 .vm_end = CONSISTENT_END, 141 }; 142 143 #ifdef CONFIG_HUGETLB_PAGE 144 #error ARM Coherent DMA allocator does not (yet) support huge TLB 145 #endif 146 147 /* 148 * Initialise the consistent memory allocation. 149 */ 150 static int __init consistent_init(void) 151 { 152 int ret = 0; 153 pgd_t *pgd; 154 pud_t *pud; 155 pmd_t *pmd; 156 pte_t *pte; 157 int i = 0; 158 u32 base = CONSISTENT_BASE; 159 160 do { 161 pgd = pgd_offset(&init_mm, base); 162 163 pud = pud_alloc(&init_mm, pgd, base); 164 if (!pud) { 165 printk(KERN_ERR "%s: no pud tables\n", __func__); 166 ret = -ENOMEM; 167 break; 168 } 169 170 pmd = pmd_alloc(&init_mm, pud, base); 171 if (!pmd) { 172 printk(KERN_ERR "%s: no pmd tables\n", __func__); 173 ret = -ENOMEM; 174 break; 175 } 176 WARN_ON(!pmd_none(*pmd)); 177 178 pte = pte_alloc_kernel(pmd, base); 179 if (!pte) { 180 printk(KERN_ERR "%s: no pte tables\n", __func__); 181 ret = -ENOMEM; 182 break; 183 } 184 185 consistent_pte[i++] = pte; 186 base += (1 << PGDIR_SHIFT); 187 } while (base < CONSISTENT_END); 188 189 return ret; 190 } 191 192 core_initcall(consistent_init); 193 194 static void * 195 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) 196 { 197 struct arm_vmregion *c; 198 size_t align; 199 int bit; 200 201 if (!consistent_pte[0]) { 202 printk(KERN_ERR "%s: not initialised\n", __func__); 203 dump_stack(); 204 return NULL; 205 } 206 207 /* 208 * Align the virtual region allocation - maximum alignment is 209 * a section size, minimum is a page size. This helps reduce 210 * fragmentation of the DMA space, and also prevents allocations 211 * smaller than a section from crossing a section boundary. 212 */ 213 bit = fls(size - 1); 214 if (bit > SECTION_SHIFT) 215 bit = SECTION_SHIFT; 216 align = 1 << bit; 217 218 /* 219 * Allocate a virtual address in the consistent mapping region. 220 */ 221 c = arm_vmregion_alloc(&consistent_head, align, size, 222 gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); 223 if (c) { 224 pte_t *pte; 225 int idx = CONSISTENT_PTE_INDEX(c->vm_start); 226 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 227 228 pte = consistent_pte[idx] + off; 229 c->vm_pages = page; 230 231 do { 232 BUG_ON(!pte_none(*pte)); 233 234 set_pte_ext(pte, mk_pte(page, prot), 0); 235 page++; 236 pte++; 237 off++; 238 if (off >= PTRS_PER_PTE) { 239 off = 0; 240 pte = consistent_pte[++idx]; 241 } 242 } while (size -= PAGE_SIZE); 243 244 dsb(); 245 246 return (void *)c->vm_start; 247 } 248 return NULL; 249 } 250 251 static void __dma_free_remap(void *cpu_addr, size_t size) 252 { 253 struct arm_vmregion *c; 254 unsigned long addr; 255 pte_t *ptep; 256 int idx; 257 u32 off; 258 259 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); 260 if (!c) { 261 printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", 262 __func__, cpu_addr); 263 dump_stack(); 264 return; 265 } 266 267 if ((c->vm_end - c->vm_start) != size) { 268 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", 269 __func__, c->vm_end - c->vm_start, size); 270 dump_stack(); 271 size = c->vm_end - c->vm_start; 272 } 273 274 idx = CONSISTENT_PTE_INDEX(c->vm_start); 275 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 276 ptep = consistent_pte[idx] + off; 277 addr = c->vm_start; 278 do { 279 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); 280 281 ptep++; 282 addr += PAGE_SIZE; 283 off++; 284 if (off >= PTRS_PER_PTE) { 285 off = 0; 286 ptep = consistent_pte[++idx]; 287 } 288 289 if (pte_none(pte) || !pte_present(pte)) 290 printk(KERN_CRIT "%s: bad page in kernel page table\n", 291 __func__); 292 } while (size -= PAGE_SIZE); 293 294 flush_tlb_kernel_range(c->vm_start, c->vm_end); 295 296 arm_vmregion_free(&consistent_head, c); 297 } 298 299 #else /* !CONFIG_MMU */ 300 301 #define __dma_alloc_remap(page, size, gfp, prot) page_address(page) 302 #define __dma_free_remap(addr, size) do { } while (0) 303 304 #endif /* CONFIG_MMU */ 305 306 static void * 307 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 308 pgprot_t prot) 309 { 310 struct page *page; 311 void *addr; 312 313 *handle = ~0; 314 size = PAGE_ALIGN(size); 315 316 page = __dma_alloc_buffer(dev, size, gfp); 317 if (!page) 318 return NULL; 319 320 if (!arch_is_coherent()) 321 addr = __dma_alloc_remap(page, size, gfp, prot); 322 else 323 addr = page_address(page); 324 325 if (addr) 326 *handle = pfn_to_dma(dev, page_to_pfn(page)); 327 328 return addr; 329 } 330 331 /* 332 * Allocate DMA-coherent memory space and return both the kernel remapped 333 * virtual and bus address for that space. 334 */ 335 void * 336 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 337 { 338 void *memory; 339 340 if (dma_alloc_from_coherent(dev, size, handle, &memory)) 341 return memory; 342 343 return __dma_alloc(dev, size, handle, gfp, 344 pgprot_dmacoherent(pgprot_kernel)); 345 } 346 EXPORT_SYMBOL(dma_alloc_coherent); 347 348 /* 349 * Allocate a writecombining region, in much the same way as 350 * dma_alloc_coherent above. 351 */ 352 void * 353 dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 354 { 355 return __dma_alloc(dev, size, handle, gfp, 356 pgprot_writecombine(pgprot_kernel)); 357 } 358 EXPORT_SYMBOL(dma_alloc_writecombine); 359 360 static int dma_mmap(struct device *dev, struct vm_area_struct *vma, 361 void *cpu_addr, dma_addr_t dma_addr, size_t size) 362 { 363 int ret = -ENXIO; 364 #ifdef CONFIG_MMU 365 unsigned long user_size, kern_size; 366 struct arm_vmregion *c; 367 368 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 369 370 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 371 if (c) { 372 unsigned long off = vma->vm_pgoff; 373 374 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; 375 376 if (off < kern_size && 377 user_size <= (kern_size - off)) { 378 ret = remap_pfn_range(vma, vma->vm_start, 379 page_to_pfn(c->vm_pages) + off, 380 user_size << PAGE_SHIFT, 381 vma->vm_page_prot); 382 } 383 } 384 #endif /* CONFIG_MMU */ 385 386 return ret; 387 } 388 389 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, 390 void *cpu_addr, dma_addr_t dma_addr, size_t size) 391 { 392 vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); 393 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 394 } 395 EXPORT_SYMBOL(dma_mmap_coherent); 396 397 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, 398 void *cpu_addr, dma_addr_t dma_addr, size_t size) 399 { 400 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 401 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 402 } 403 EXPORT_SYMBOL(dma_mmap_writecombine); 404 405 /* 406 * free a page as defined by the above mapping. 407 * Must not be called with IRQs disabled. 408 */ 409 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 410 { 411 WARN_ON(irqs_disabled()); 412 413 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 414 return; 415 416 size = PAGE_ALIGN(size); 417 418 if (!arch_is_coherent()) 419 __dma_free_remap(cpu_addr, size); 420 421 __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); 422 } 423 EXPORT_SYMBOL(dma_free_coherent); 424 425 /* 426 * Make an area consistent for devices. 427 * Note: Drivers should NOT use this function directly, as it will break 428 * platforms with CONFIG_DMABOUNCE. 429 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 430 */ 431 void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, 432 enum dma_data_direction dir) 433 { 434 unsigned long paddr; 435 436 BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); 437 438 dmac_map_area(kaddr, size, dir); 439 440 paddr = __pa(kaddr); 441 if (dir == DMA_FROM_DEVICE) { 442 outer_inv_range(paddr, paddr + size); 443 } else { 444 outer_clean_range(paddr, paddr + size); 445 } 446 /* FIXME: non-speculating: flush on bidirectional mappings? */ 447 } 448 EXPORT_SYMBOL(___dma_single_cpu_to_dev); 449 450 void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, 451 enum dma_data_direction dir) 452 { 453 BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); 454 455 /* FIXME: non-speculating: not required */ 456 /* don't bother invalidating if DMA to device */ 457 if (dir != DMA_TO_DEVICE) { 458 unsigned long paddr = __pa(kaddr); 459 outer_inv_range(paddr, paddr + size); 460 } 461 462 dmac_unmap_area(kaddr, size, dir); 463 } 464 EXPORT_SYMBOL(___dma_single_dev_to_cpu); 465 466 static void dma_cache_maint_page(struct page *page, unsigned long offset, 467 size_t size, enum dma_data_direction dir, 468 void (*op)(const void *, size_t, int)) 469 { 470 /* 471 * A single sg entry may refer to multiple physically contiguous 472 * pages. But we still need to process highmem pages individually. 473 * If highmem is not configured then the bulk of this loop gets 474 * optimized out. 475 */ 476 size_t left = size; 477 do { 478 size_t len = left; 479 void *vaddr; 480 481 if (PageHighMem(page)) { 482 if (len + offset > PAGE_SIZE) { 483 if (offset >= PAGE_SIZE) { 484 page += offset / PAGE_SIZE; 485 offset %= PAGE_SIZE; 486 } 487 len = PAGE_SIZE - offset; 488 } 489 vaddr = kmap_high_get(page); 490 if (vaddr) { 491 vaddr += offset; 492 op(vaddr, len, dir); 493 kunmap_high(page); 494 } else if (cache_is_vipt()) { 495 /* unmapped pages might still be cached */ 496 vaddr = kmap_atomic(page); 497 op(vaddr + offset, len, dir); 498 kunmap_atomic(vaddr); 499 } 500 } else { 501 vaddr = page_address(page) + offset; 502 op(vaddr, len, dir); 503 } 504 offset = 0; 505 page++; 506 left -= len; 507 } while (left); 508 } 509 510 void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, 511 size_t size, enum dma_data_direction dir) 512 { 513 unsigned long paddr; 514 515 dma_cache_maint_page(page, off, size, dir, dmac_map_area); 516 517 paddr = page_to_phys(page) + off; 518 if (dir == DMA_FROM_DEVICE) { 519 outer_inv_range(paddr, paddr + size); 520 } else { 521 outer_clean_range(paddr, paddr + size); 522 } 523 /* FIXME: non-speculating: flush on bidirectional mappings? */ 524 } 525 EXPORT_SYMBOL(___dma_page_cpu_to_dev); 526 527 void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, 528 size_t size, enum dma_data_direction dir) 529 { 530 unsigned long paddr = page_to_phys(page) + off; 531 532 /* FIXME: non-speculating: not required */ 533 /* don't bother invalidating if DMA to device */ 534 if (dir != DMA_TO_DEVICE) 535 outer_inv_range(paddr, paddr + size); 536 537 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); 538 539 /* 540 * Mark the D-cache clean for this page to avoid extra flushing. 541 */ 542 if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) 543 set_bit(PG_dcache_clean, &page->flags); 544 } 545 EXPORT_SYMBOL(___dma_page_dev_to_cpu); 546 547 /** 548 * dma_map_sg - map a set of SG buffers for streaming mode DMA 549 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 550 * @sg: list of buffers 551 * @nents: number of buffers to map 552 * @dir: DMA transfer direction 553 * 554 * Map a set of buffers described by scatterlist in streaming mode for DMA. 555 * This is the scatter-gather version of the dma_map_single interface. 556 * Here the scatter gather list elements are each tagged with the 557 * appropriate dma address and length. They are obtained via 558 * sg_dma_{address,length}. 559 * 560 * Device ownership issues as mentioned for dma_map_single are the same 561 * here. 562 */ 563 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 564 enum dma_data_direction dir) 565 { 566 struct scatterlist *s; 567 int i, j; 568 569 BUG_ON(!valid_dma_direction(dir)); 570 571 for_each_sg(sg, s, nents, i) { 572 s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, 573 s->length, dir); 574 if (dma_mapping_error(dev, s->dma_address)) 575 goto bad_mapping; 576 } 577 debug_dma_map_sg(dev, sg, nents, nents, dir); 578 return nents; 579 580 bad_mapping: 581 for_each_sg(sg, s, i, j) 582 __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 583 return 0; 584 } 585 EXPORT_SYMBOL(dma_map_sg); 586 587 /** 588 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 589 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 590 * @sg: list of buffers 591 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 592 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 593 * 594 * Unmap a set of streaming mode DMA translations. Again, CPU access 595 * rules concerning calls here are the same as for dma_unmap_single(). 596 */ 597 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 598 enum dma_data_direction dir) 599 { 600 struct scatterlist *s; 601 int i; 602 603 debug_dma_unmap_sg(dev, sg, nents, dir); 604 605 for_each_sg(sg, s, nents, i) 606 __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 607 } 608 EXPORT_SYMBOL(dma_unmap_sg); 609 610 /** 611 * dma_sync_sg_for_cpu 612 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 613 * @sg: list of buffers 614 * @nents: number of buffers to map (returned from dma_map_sg) 615 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 616 */ 617 void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 618 int nents, enum dma_data_direction dir) 619 { 620 struct scatterlist *s; 621 int i; 622 623 for_each_sg(sg, s, nents, i) { 624 if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, 625 sg_dma_len(s), dir)) 626 continue; 627 628 __dma_page_dev_to_cpu(sg_page(s), s->offset, 629 s->length, dir); 630 } 631 632 debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); 633 } 634 EXPORT_SYMBOL(dma_sync_sg_for_cpu); 635 636 /** 637 * dma_sync_sg_for_device 638 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 639 * @sg: list of buffers 640 * @nents: number of buffers to map (returned from dma_map_sg) 641 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 642 */ 643 void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 644 int nents, enum dma_data_direction dir) 645 { 646 struct scatterlist *s; 647 int i; 648 649 for_each_sg(sg, s, nents, i) { 650 if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, 651 sg_dma_len(s), dir)) 652 continue; 653 654 __dma_page_cpu_to_dev(sg_page(s), s->offset, 655 s->length, dir); 656 } 657 658 debug_dma_sync_sg_for_device(dev, sg, nents, dir); 659 } 660 EXPORT_SYMBOL(dma_sync_sg_for_device); 661 662 /* 663 * Return whether the given device DMA address mask can be supported 664 * properly. For example, if your device can only drive the low 24-bits 665 * during bus mastering, then you would pass 0x00ffffff as the mask 666 * to this function. 667 */ 668 int dma_supported(struct device *dev, u64 mask) 669 { 670 if (mask < (u64)arm_dma_limit) 671 return 0; 672 return 1; 673 } 674 EXPORT_SYMBOL(dma_supported); 675 676 int dma_set_mask(struct device *dev, u64 dma_mask) 677 { 678 if (!dev->dma_mask || !dma_supported(dev, dma_mask)) 679 return -EIO; 680 681 #ifndef CONFIG_DMABOUNCE 682 *dev->dma_mask = dma_mask; 683 #endif 684 685 return 0; 686 } 687 EXPORT_SYMBOL(dma_set_mask); 688 689 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 690 691 static int __init dma_debug_do_init(void) 692 { 693 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 694 return 0; 695 } 696 fs_initcall(dma_debug_do_init); 697