1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/gfp.h> 15 #include <linux/errno.h> 16 #include <linux/list.h> 17 #include <linux/init.h> 18 #include <linux/device.h> 19 #include <linux/dma-mapping.h> 20 #include <linux/highmem.h> 21 22 #include <asm/memory.h> 23 #include <asm/highmem.h> 24 #include <asm/cacheflush.h> 25 #include <asm/tlbflush.h> 26 #include <asm/sizes.h> 27 28 static u64 get_coherent_dma_mask(struct device *dev) 29 { 30 u64 mask = ISA_DMA_THRESHOLD; 31 32 if (dev) { 33 mask = dev->coherent_dma_mask; 34 35 /* 36 * Sanity check the DMA mask - it must be non-zero, and 37 * must be able to be satisfied by a DMA allocation. 38 */ 39 if (mask == 0) { 40 dev_warn(dev, "coherent DMA mask is unset\n"); 41 return 0; 42 } 43 44 if ((~mask) & ISA_DMA_THRESHOLD) { 45 dev_warn(dev, "coherent DMA mask %#llx is smaller " 46 "than system GFP_DMA mask %#llx\n", 47 mask, (unsigned long long)ISA_DMA_THRESHOLD); 48 return 0; 49 } 50 } 51 52 return mask; 53 } 54 55 /* 56 * Allocate a DMA buffer for 'dev' of size 'size' using the 57 * specified gfp mask. Note that 'size' must be page aligned. 58 */ 59 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) 60 { 61 unsigned long order = get_order(size); 62 struct page *page, *p, *e; 63 void *ptr; 64 u64 mask = get_coherent_dma_mask(dev); 65 66 #ifdef CONFIG_DMA_API_DEBUG 67 u64 limit = (mask + 1) & ~mask; 68 if (limit && size >= limit) { 69 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 70 size, mask); 71 return NULL; 72 } 73 #endif 74 75 if (!mask) 76 return NULL; 77 78 if (mask < 0xffffffffULL) 79 gfp |= GFP_DMA; 80 81 page = alloc_pages(gfp, order); 82 if (!page) 83 return NULL; 84 85 /* 86 * Now split the huge page and free the excess pages 87 */ 88 split_page(page, order); 89 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 90 __free_page(p); 91 92 /* 93 * Ensure that the allocated pages are zeroed, and that any data 94 * lurking in the kernel direct-mapped region is invalidated. 95 */ 96 ptr = page_address(page); 97 memset(ptr, 0, size); 98 dmac_flush_range(ptr, ptr + size); 99 outer_flush_range(__pa(ptr), __pa(ptr) + size); 100 101 return page; 102 } 103 104 /* 105 * Free a DMA buffer. 'size' must be page aligned. 106 */ 107 static void __dma_free_buffer(struct page *page, size_t size) 108 { 109 struct page *e = page + (size >> PAGE_SHIFT); 110 111 while (page < e) { 112 __free_page(page); 113 page++; 114 } 115 } 116 117 #ifdef CONFIG_MMU 118 /* Sanity check size */ 119 #if (CONSISTENT_DMA_SIZE % SZ_2M) 120 #error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" 121 #endif 122 123 #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) 124 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) 125 #define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) 126 127 /* 128 * These are the page tables (2MB each) covering uncached, DMA consistent allocations 129 */ 130 static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; 131 132 #include "vmregion.h" 133 134 static struct arm_vmregion_head consistent_head = { 135 .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), 136 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 137 .vm_start = CONSISTENT_BASE, 138 .vm_end = CONSISTENT_END, 139 }; 140 141 #ifdef CONFIG_HUGETLB_PAGE 142 #error ARM Coherent DMA allocator does not (yet) support huge TLB 143 #endif 144 145 /* 146 * Initialise the consistent memory allocation. 147 */ 148 static int __init consistent_init(void) 149 { 150 int ret = 0; 151 pgd_t *pgd; 152 pmd_t *pmd; 153 pte_t *pte; 154 int i = 0; 155 u32 base = CONSISTENT_BASE; 156 157 do { 158 pgd = pgd_offset(&init_mm, base); 159 pmd = pmd_alloc(&init_mm, pgd, base); 160 if (!pmd) { 161 printk(KERN_ERR "%s: no pmd tables\n", __func__); 162 ret = -ENOMEM; 163 break; 164 } 165 WARN_ON(!pmd_none(*pmd)); 166 167 pte = pte_alloc_kernel(pmd, base); 168 if (!pte) { 169 printk(KERN_ERR "%s: no pte tables\n", __func__); 170 ret = -ENOMEM; 171 break; 172 } 173 174 consistent_pte[i++] = pte; 175 base += (1 << PGDIR_SHIFT); 176 } while (base < CONSISTENT_END); 177 178 return ret; 179 } 180 181 core_initcall(consistent_init); 182 183 static void * 184 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) 185 { 186 struct arm_vmregion *c; 187 size_t align; 188 int bit; 189 190 if (!consistent_pte[0]) { 191 printk(KERN_ERR "%s: not initialised\n", __func__); 192 dump_stack(); 193 return NULL; 194 } 195 196 /* 197 * Align the virtual region allocation - maximum alignment is 198 * a section size, minimum is a page size. This helps reduce 199 * fragmentation of the DMA space, and also prevents allocations 200 * smaller than a section from crossing a section boundary. 201 */ 202 bit = fls(size - 1); 203 if (bit > SECTION_SHIFT) 204 bit = SECTION_SHIFT; 205 align = 1 << bit; 206 207 /* 208 * Allocate a virtual address in the consistent mapping region. 209 */ 210 c = arm_vmregion_alloc(&consistent_head, align, size, 211 gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); 212 if (c) { 213 pte_t *pte; 214 int idx = CONSISTENT_PTE_INDEX(c->vm_start); 215 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 216 217 pte = consistent_pte[idx] + off; 218 c->vm_pages = page; 219 220 do { 221 BUG_ON(!pte_none(*pte)); 222 223 set_pte_ext(pte, mk_pte(page, prot), 0); 224 page++; 225 pte++; 226 off++; 227 if (off >= PTRS_PER_PTE) { 228 off = 0; 229 pte = consistent_pte[++idx]; 230 } 231 } while (size -= PAGE_SIZE); 232 233 dsb(); 234 235 return (void *)c->vm_start; 236 } 237 return NULL; 238 } 239 240 static void __dma_free_remap(void *cpu_addr, size_t size) 241 { 242 struct arm_vmregion *c; 243 unsigned long addr; 244 pte_t *ptep; 245 int idx; 246 u32 off; 247 248 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); 249 if (!c) { 250 printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", 251 __func__, cpu_addr); 252 dump_stack(); 253 return; 254 } 255 256 if ((c->vm_end - c->vm_start) != size) { 257 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", 258 __func__, c->vm_end - c->vm_start, size); 259 dump_stack(); 260 size = c->vm_end - c->vm_start; 261 } 262 263 idx = CONSISTENT_PTE_INDEX(c->vm_start); 264 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 265 ptep = consistent_pte[idx] + off; 266 addr = c->vm_start; 267 do { 268 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); 269 270 ptep++; 271 addr += PAGE_SIZE; 272 off++; 273 if (off >= PTRS_PER_PTE) { 274 off = 0; 275 ptep = consistent_pte[++idx]; 276 } 277 278 if (pte_none(pte) || !pte_present(pte)) 279 printk(KERN_CRIT "%s: bad page in kernel page table\n", 280 __func__); 281 } while (size -= PAGE_SIZE); 282 283 flush_tlb_kernel_range(c->vm_start, c->vm_end); 284 285 arm_vmregion_free(&consistent_head, c); 286 } 287 288 #else /* !CONFIG_MMU */ 289 290 #define __dma_alloc_remap(page, size, gfp, prot) page_address(page) 291 #define __dma_free_remap(addr, size) do { } while (0) 292 293 #endif /* CONFIG_MMU */ 294 295 static void * 296 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 297 pgprot_t prot) 298 { 299 struct page *page; 300 void *addr; 301 302 *handle = ~0; 303 size = PAGE_ALIGN(size); 304 305 page = __dma_alloc_buffer(dev, size, gfp); 306 if (!page) 307 return NULL; 308 309 if (!arch_is_coherent()) 310 addr = __dma_alloc_remap(page, size, gfp, prot); 311 else 312 addr = page_address(page); 313 314 if (addr) 315 *handle = pfn_to_dma(dev, page_to_pfn(page)); 316 317 return addr; 318 } 319 320 /* 321 * Allocate DMA-coherent memory space and return both the kernel remapped 322 * virtual and bus address for that space. 323 */ 324 void * 325 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 326 { 327 void *memory; 328 329 if (dma_alloc_from_coherent(dev, size, handle, &memory)) 330 return memory; 331 332 return __dma_alloc(dev, size, handle, gfp, 333 pgprot_dmacoherent(pgprot_kernel)); 334 } 335 EXPORT_SYMBOL(dma_alloc_coherent); 336 337 /* 338 * Allocate a writecombining region, in much the same way as 339 * dma_alloc_coherent above. 340 */ 341 void * 342 dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 343 { 344 return __dma_alloc(dev, size, handle, gfp, 345 pgprot_writecombine(pgprot_kernel)); 346 } 347 EXPORT_SYMBOL(dma_alloc_writecombine); 348 349 static int dma_mmap(struct device *dev, struct vm_area_struct *vma, 350 void *cpu_addr, dma_addr_t dma_addr, size_t size) 351 { 352 int ret = -ENXIO; 353 #ifdef CONFIG_MMU 354 unsigned long user_size, kern_size; 355 struct arm_vmregion *c; 356 357 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 358 359 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 360 if (c) { 361 unsigned long off = vma->vm_pgoff; 362 363 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; 364 365 if (off < kern_size && 366 user_size <= (kern_size - off)) { 367 ret = remap_pfn_range(vma, vma->vm_start, 368 page_to_pfn(c->vm_pages) + off, 369 user_size << PAGE_SHIFT, 370 vma->vm_page_prot); 371 } 372 } 373 #endif /* CONFIG_MMU */ 374 375 return ret; 376 } 377 378 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, 379 void *cpu_addr, dma_addr_t dma_addr, size_t size) 380 { 381 vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); 382 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 383 } 384 EXPORT_SYMBOL(dma_mmap_coherent); 385 386 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, 387 void *cpu_addr, dma_addr_t dma_addr, size_t size) 388 { 389 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 390 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 391 } 392 EXPORT_SYMBOL(dma_mmap_writecombine); 393 394 /* 395 * free a page as defined by the above mapping. 396 * Must not be called with IRQs disabled. 397 */ 398 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 399 { 400 WARN_ON(irqs_disabled()); 401 402 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 403 return; 404 405 size = PAGE_ALIGN(size); 406 407 if (!arch_is_coherent()) 408 __dma_free_remap(cpu_addr, size); 409 410 __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); 411 } 412 EXPORT_SYMBOL(dma_free_coherent); 413 414 /* 415 * Make an area consistent for devices. 416 * Note: Drivers should NOT use this function directly, as it will break 417 * platforms with CONFIG_DMABOUNCE. 418 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 419 */ 420 void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, 421 enum dma_data_direction dir) 422 { 423 unsigned long paddr; 424 425 BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); 426 427 dmac_map_area(kaddr, size, dir); 428 429 paddr = __pa(kaddr); 430 if (dir == DMA_FROM_DEVICE) { 431 outer_inv_range(paddr, paddr + size); 432 } else { 433 outer_clean_range(paddr, paddr + size); 434 } 435 /* FIXME: non-speculating: flush on bidirectional mappings? */ 436 } 437 EXPORT_SYMBOL(___dma_single_cpu_to_dev); 438 439 void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, 440 enum dma_data_direction dir) 441 { 442 BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); 443 444 /* FIXME: non-speculating: not required */ 445 /* don't bother invalidating if DMA to device */ 446 if (dir != DMA_TO_DEVICE) { 447 unsigned long paddr = __pa(kaddr); 448 outer_inv_range(paddr, paddr + size); 449 } 450 451 dmac_unmap_area(kaddr, size, dir); 452 } 453 EXPORT_SYMBOL(___dma_single_dev_to_cpu); 454 455 static void dma_cache_maint_page(struct page *page, unsigned long offset, 456 size_t size, enum dma_data_direction dir, 457 void (*op)(const void *, size_t, int)) 458 { 459 /* 460 * A single sg entry may refer to multiple physically contiguous 461 * pages. But we still need to process highmem pages individually. 462 * If highmem is not configured then the bulk of this loop gets 463 * optimized out. 464 */ 465 size_t left = size; 466 do { 467 size_t len = left; 468 void *vaddr; 469 470 if (PageHighMem(page)) { 471 if (len + offset > PAGE_SIZE) { 472 if (offset >= PAGE_SIZE) { 473 page += offset / PAGE_SIZE; 474 offset %= PAGE_SIZE; 475 } 476 len = PAGE_SIZE - offset; 477 } 478 vaddr = kmap_high_get(page); 479 if (vaddr) { 480 vaddr += offset; 481 op(vaddr, len, dir); 482 kunmap_high(page); 483 } else if (cache_is_vipt()) { 484 /* unmapped pages might still be cached */ 485 vaddr = kmap_atomic(page); 486 op(vaddr + offset, len, dir); 487 kunmap_atomic(vaddr); 488 } 489 } else { 490 vaddr = page_address(page) + offset; 491 op(vaddr, len, dir); 492 } 493 offset = 0; 494 page++; 495 left -= len; 496 } while (left); 497 } 498 499 void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, 500 size_t size, enum dma_data_direction dir) 501 { 502 unsigned long paddr; 503 504 dma_cache_maint_page(page, off, size, dir, dmac_map_area); 505 506 paddr = page_to_phys(page) + off; 507 if (dir == DMA_FROM_DEVICE) { 508 outer_inv_range(paddr, paddr + size); 509 } else { 510 outer_clean_range(paddr, paddr + size); 511 } 512 /* FIXME: non-speculating: flush on bidirectional mappings? */ 513 } 514 EXPORT_SYMBOL(___dma_page_cpu_to_dev); 515 516 void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, 517 size_t size, enum dma_data_direction dir) 518 { 519 unsigned long paddr = page_to_phys(page) + off; 520 521 /* FIXME: non-speculating: not required */ 522 /* don't bother invalidating if DMA to device */ 523 if (dir != DMA_TO_DEVICE) 524 outer_inv_range(paddr, paddr + size); 525 526 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); 527 528 /* 529 * Mark the D-cache clean for this page to avoid extra flushing. 530 */ 531 if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) 532 set_bit(PG_dcache_clean, &page->flags); 533 } 534 EXPORT_SYMBOL(___dma_page_dev_to_cpu); 535 536 /** 537 * dma_map_sg - map a set of SG buffers for streaming mode DMA 538 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 539 * @sg: list of buffers 540 * @nents: number of buffers to map 541 * @dir: DMA transfer direction 542 * 543 * Map a set of buffers described by scatterlist in streaming mode for DMA. 544 * This is the scatter-gather version of the dma_map_single interface. 545 * Here the scatter gather list elements are each tagged with the 546 * appropriate dma address and length. They are obtained via 547 * sg_dma_{address,length}. 548 * 549 * Device ownership issues as mentioned for dma_map_single are the same 550 * here. 551 */ 552 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 553 enum dma_data_direction dir) 554 { 555 struct scatterlist *s; 556 int i, j; 557 558 BUG_ON(!valid_dma_direction(dir)); 559 560 for_each_sg(sg, s, nents, i) { 561 s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, 562 s->length, dir); 563 if (dma_mapping_error(dev, s->dma_address)) 564 goto bad_mapping; 565 } 566 debug_dma_map_sg(dev, sg, nents, nents, dir); 567 return nents; 568 569 bad_mapping: 570 for_each_sg(sg, s, i, j) 571 __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 572 return 0; 573 } 574 EXPORT_SYMBOL(dma_map_sg); 575 576 /** 577 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 578 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 579 * @sg: list of buffers 580 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 581 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 582 * 583 * Unmap a set of streaming mode DMA translations. Again, CPU access 584 * rules concerning calls here are the same as for dma_unmap_single(). 585 */ 586 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 587 enum dma_data_direction dir) 588 { 589 struct scatterlist *s; 590 int i; 591 592 debug_dma_unmap_sg(dev, sg, nents, dir); 593 594 for_each_sg(sg, s, nents, i) 595 __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 596 } 597 EXPORT_SYMBOL(dma_unmap_sg); 598 599 /** 600 * dma_sync_sg_for_cpu 601 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 602 * @sg: list of buffers 603 * @nents: number of buffers to map (returned from dma_map_sg) 604 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 605 */ 606 void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 607 int nents, enum dma_data_direction dir) 608 { 609 struct scatterlist *s; 610 int i; 611 612 for_each_sg(sg, s, nents, i) { 613 if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, 614 sg_dma_len(s), dir)) 615 continue; 616 617 __dma_page_dev_to_cpu(sg_page(s), s->offset, 618 s->length, dir); 619 } 620 621 debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); 622 } 623 EXPORT_SYMBOL(dma_sync_sg_for_cpu); 624 625 /** 626 * dma_sync_sg_for_device 627 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 628 * @sg: list of buffers 629 * @nents: number of buffers to map (returned from dma_map_sg) 630 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 631 */ 632 void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 633 int nents, enum dma_data_direction dir) 634 { 635 struct scatterlist *s; 636 int i; 637 638 for_each_sg(sg, s, nents, i) { 639 if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, 640 sg_dma_len(s), dir)) 641 continue; 642 643 __dma_page_cpu_to_dev(sg_page(s), s->offset, 644 s->length, dir); 645 } 646 647 debug_dma_sync_sg_for_device(dev, sg, nents, dir); 648 } 649 EXPORT_SYMBOL(dma_sync_sg_for_device); 650 651 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 652 653 static int __init dma_debug_do_init(void) 654 { 655 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 656 return 0; 657 } 658 fs_initcall(dma_debug_do_init); 659