1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/errno.h> 16 #include <linux/list.h> 17 #include <linux/init.h> 18 #include <linux/device.h> 19 #include <linux/dma-mapping.h> 20 21 #include <asm/memory.h> 22 #include <asm/highmem.h> 23 #include <asm/cacheflush.h> 24 #include <asm/tlbflush.h> 25 #include <asm/sizes.h> 26 27 /* Sanity check size */ 28 #if (CONSISTENT_DMA_SIZE % SZ_2M) 29 #error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" 30 #endif 31 32 #define CONSISTENT_END (0xffe00000) 33 #define CONSISTENT_BASE (CONSISTENT_END - CONSISTENT_DMA_SIZE) 34 35 #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) 36 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) 37 #define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) 38 39 static u64 get_coherent_dma_mask(struct device *dev) 40 { 41 u64 mask = ISA_DMA_THRESHOLD; 42 43 if (dev) { 44 mask = dev->coherent_dma_mask; 45 46 /* 47 * Sanity check the DMA mask - it must be non-zero, and 48 * must be able to be satisfied by a DMA allocation. 49 */ 50 if (mask == 0) { 51 dev_warn(dev, "coherent DMA mask is unset\n"); 52 return 0; 53 } 54 55 if ((~mask) & ISA_DMA_THRESHOLD) { 56 dev_warn(dev, "coherent DMA mask %#llx is smaller " 57 "than system GFP_DMA mask %#llx\n", 58 mask, (unsigned long long)ISA_DMA_THRESHOLD); 59 return 0; 60 } 61 } 62 63 return mask; 64 } 65 66 #ifdef CONFIG_MMU 67 /* 68 * These are the page tables (2MB each) covering uncached, DMA consistent allocations 69 */ 70 static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; 71 static DEFINE_SPINLOCK(consistent_lock); 72 73 /* 74 * VM region handling support. 75 * 76 * This should become something generic, handling VM region allocations for 77 * vmalloc and similar (ioremap, module space, etc). 78 * 79 * I envisage vmalloc()'s supporting vm_struct becoming: 80 * 81 * struct vm_struct { 82 * struct vm_region region; 83 * unsigned long flags; 84 * struct page **pages; 85 * unsigned int nr_pages; 86 * unsigned long phys_addr; 87 * }; 88 * 89 * get_vm_area() would then call vm_region_alloc with an appropriate 90 * struct vm_region head (eg): 91 * 92 * struct vm_region vmalloc_head = { 93 * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), 94 * .vm_start = VMALLOC_START, 95 * .vm_end = VMALLOC_END, 96 * }; 97 * 98 * However, vmalloc_head.vm_start is variable (typically, it is dependent on 99 * the amount of RAM found at boot time.) I would imagine that get_vm_area() 100 * would have to initialise this each time prior to calling vm_region_alloc(). 101 */ 102 struct arm_vm_region { 103 struct list_head vm_list; 104 unsigned long vm_start; 105 unsigned long vm_end; 106 struct page *vm_pages; 107 int vm_active; 108 }; 109 110 static struct arm_vm_region consistent_head = { 111 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 112 .vm_start = CONSISTENT_BASE, 113 .vm_end = CONSISTENT_END, 114 }; 115 116 static struct arm_vm_region * 117 arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) 118 { 119 unsigned long addr = head->vm_start, end = head->vm_end - size; 120 unsigned long flags; 121 struct arm_vm_region *c, *new; 122 123 new = kmalloc(sizeof(struct arm_vm_region), gfp); 124 if (!new) 125 goto out; 126 127 spin_lock_irqsave(&consistent_lock, flags); 128 129 list_for_each_entry(c, &head->vm_list, vm_list) { 130 if ((addr + size) < addr) 131 goto nospc; 132 if ((addr + size) <= c->vm_start) 133 goto found; 134 addr = c->vm_end; 135 if (addr > end) 136 goto nospc; 137 } 138 139 found: 140 /* 141 * Insert this entry _before_ the one we found. 142 */ 143 list_add_tail(&new->vm_list, &c->vm_list); 144 new->vm_start = addr; 145 new->vm_end = addr + size; 146 new->vm_active = 1; 147 148 spin_unlock_irqrestore(&consistent_lock, flags); 149 return new; 150 151 nospc: 152 spin_unlock_irqrestore(&consistent_lock, flags); 153 kfree(new); 154 out: 155 return NULL; 156 } 157 158 static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr) 159 { 160 struct arm_vm_region *c; 161 162 list_for_each_entry(c, &head->vm_list, vm_list) { 163 if (c->vm_active && c->vm_start == addr) 164 goto out; 165 } 166 c = NULL; 167 out: 168 return c; 169 } 170 171 #ifdef CONFIG_HUGETLB_PAGE 172 #error ARM Coherent DMA allocator does not (yet) support huge TLB 173 #endif 174 175 static void * 176 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 177 pgprot_t prot) 178 { 179 struct page *page; 180 struct arm_vm_region *c; 181 unsigned long order; 182 u64 mask = get_coherent_dma_mask(dev); 183 u64 limit; 184 185 if (!consistent_pte[0]) { 186 printk(KERN_ERR "%s: not initialised\n", __func__); 187 dump_stack(); 188 return NULL; 189 } 190 191 if (!mask) 192 goto no_page; 193 194 /* 195 * Sanity check the allocation size. 196 */ 197 size = PAGE_ALIGN(size); 198 limit = (mask + 1) & ~mask; 199 if ((limit && size >= limit) || 200 size >= (CONSISTENT_END - CONSISTENT_BASE)) { 201 printk(KERN_WARNING "coherent allocation too big " 202 "(requested %#x mask %#llx)\n", size, mask); 203 goto no_page; 204 } 205 206 order = get_order(size); 207 208 if (mask != 0xffffffff) 209 gfp |= GFP_DMA; 210 211 page = alloc_pages(gfp, order); 212 if (!page) 213 goto no_page; 214 215 /* 216 * Invalidate any data that might be lurking in the 217 * kernel direct-mapped region for device DMA. 218 */ 219 { 220 void *ptr = page_address(page); 221 memset(ptr, 0, size); 222 dmac_flush_range(ptr, ptr + size); 223 outer_flush_range(__pa(ptr), __pa(ptr) + size); 224 } 225 226 /* 227 * Allocate a virtual address in the consistent mapping region. 228 */ 229 c = arm_vm_region_alloc(&consistent_head, size, 230 gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); 231 if (c) { 232 pte_t *pte; 233 struct page *end = page + (1 << order); 234 int idx = CONSISTENT_PTE_INDEX(c->vm_start); 235 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 236 237 pte = consistent_pte[idx] + off; 238 c->vm_pages = page; 239 240 split_page(page, order); 241 242 /* 243 * Set the "dma handle" 244 */ 245 *handle = page_to_dma(dev, page); 246 247 do { 248 BUG_ON(!pte_none(*pte)); 249 250 /* 251 * x86 does not mark the pages reserved... 252 */ 253 SetPageReserved(page); 254 set_pte_ext(pte, mk_pte(page, prot), 0); 255 page++; 256 pte++; 257 off++; 258 if (off >= PTRS_PER_PTE) { 259 off = 0; 260 pte = consistent_pte[++idx]; 261 } 262 } while (size -= PAGE_SIZE); 263 264 /* 265 * Free the otherwise unused pages. 266 */ 267 while (page < end) { 268 __free_page(page); 269 page++; 270 } 271 272 return (void *)c->vm_start; 273 } 274 275 if (page) 276 __free_pages(page, order); 277 no_page: 278 *handle = ~0; 279 return NULL; 280 } 281 #else /* !CONFIG_MMU */ 282 static void * 283 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 284 pgprot_t prot) 285 { 286 void *virt; 287 u64 mask = get_coherent_dma_mask(dev); 288 289 if (!mask) 290 goto error; 291 292 if (mask != 0xffffffff) 293 gfp |= GFP_DMA; 294 virt = kmalloc(size, gfp); 295 if (!virt) 296 goto error; 297 298 *handle = virt_to_dma(dev, virt); 299 return virt; 300 301 error: 302 *handle = ~0; 303 return NULL; 304 } 305 #endif /* CONFIG_MMU */ 306 307 /* 308 * Allocate DMA-coherent memory space and return both the kernel remapped 309 * virtual and bus address for that space. 310 */ 311 void * 312 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 313 { 314 void *memory; 315 316 if (dma_alloc_from_coherent(dev, size, handle, &memory)) 317 return memory; 318 319 if (arch_is_coherent()) { 320 void *virt; 321 322 virt = kmalloc(size, gfp); 323 if (!virt) 324 return NULL; 325 *handle = virt_to_dma(dev, virt); 326 327 return virt; 328 } 329 330 return __dma_alloc(dev, size, handle, gfp, 331 pgprot_noncached(pgprot_kernel)); 332 } 333 EXPORT_SYMBOL(dma_alloc_coherent); 334 335 /* 336 * Allocate a writecombining region, in much the same way as 337 * dma_alloc_coherent above. 338 */ 339 void * 340 dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 341 { 342 return __dma_alloc(dev, size, handle, gfp, 343 pgprot_writecombine(pgprot_kernel)); 344 } 345 EXPORT_SYMBOL(dma_alloc_writecombine); 346 347 static int dma_mmap(struct device *dev, struct vm_area_struct *vma, 348 void *cpu_addr, dma_addr_t dma_addr, size_t size) 349 { 350 int ret = -ENXIO; 351 #ifdef CONFIG_MMU 352 unsigned long flags, user_size, kern_size; 353 struct arm_vm_region *c; 354 355 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 356 357 spin_lock_irqsave(&consistent_lock, flags); 358 c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); 359 spin_unlock_irqrestore(&consistent_lock, flags); 360 361 if (c) { 362 unsigned long off = vma->vm_pgoff; 363 364 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; 365 366 if (off < kern_size && 367 user_size <= (kern_size - off)) { 368 ret = remap_pfn_range(vma, vma->vm_start, 369 page_to_pfn(c->vm_pages) + off, 370 user_size << PAGE_SHIFT, 371 vma->vm_page_prot); 372 } 373 } 374 #endif /* CONFIG_MMU */ 375 376 return ret; 377 } 378 379 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, 380 void *cpu_addr, dma_addr_t dma_addr, size_t size) 381 { 382 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 383 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 384 } 385 EXPORT_SYMBOL(dma_mmap_coherent); 386 387 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, 388 void *cpu_addr, dma_addr_t dma_addr, size_t size) 389 { 390 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 391 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 392 } 393 EXPORT_SYMBOL(dma_mmap_writecombine); 394 395 /* 396 * free a page as defined by the above mapping. 397 * Must not be called with IRQs disabled. 398 */ 399 #ifdef CONFIG_MMU 400 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 401 { 402 struct arm_vm_region *c; 403 unsigned long flags, addr; 404 pte_t *ptep; 405 int idx; 406 u32 off; 407 408 WARN_ON(irqs_disabled()); 409 410 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 411 return; 412 413 if (arch_is_coherent()) { 414 kfree(cpu_addr); 415 return; 416 } 417 418 size = PAGE_ALIGN(size); 419 420 spin_lock_irqsave(&consistent_lock, flags); 421 c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); 422 if (!c) 423 goto no_area; 424 425 c->vm_active = 0; 426 spin_unlock_irqrestore(&consistent_lock, flags); 427 428 if ((c->vm_end - c->vm_start) != size) { 429 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", 430 __func__, c->vm_end - c->vm_start, size); 431 dump_stack(); 432 size = c->vm_end - c->vm_start; 433 } 434 435 idx = CONSISTENT_PTE_INDEX(c->vm_start); 436 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 437 ptep = consistent_pte[idx] + off; 438 addr = c->vm_start; 439 do { 440 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); 441 unsigned long pfn; 442 443 ptep++; 444 addr += PAGE_SIZE; 445 off++; 446 if (off >= PTRS_PER_PTE) { 447 off = 0; 448 ptep = consistent_pte[++idx]; 449 } 450 451 if (!pte_none(pte) && pte_present(pte)) { 452 pfn = pte_pfn(pte); 453 454 if (pfn_valid(pfn)) { 455 struct page *page = pfn_to_page(pfn); 456 457 /* 458 * x86 does not mark the pages reserved... 459 */ 460 ClearPageReserved(page); 461 462 __free_page(page); 463 continue; 464 } 465 } 466 467 printk(KERN_CRIT "%s: bad page in kernel page table\n", 468 __func__); 469 } while (size -= PAGE_SIZE); 470 471 flush_tlb_kernel_range(c->vm_start, c->vm_end); 472 473 spin_lock_irqsave(&consistent_lock, flags); 474 list_del(&c->vm_list); 475 spin_unlock_irqrestore(&consistent_lock, flags); 476 477 kfree(c); 478 return; 479 480 no_area: 481 spin_unlock_irqrestore(&consistent_lock, flags); 482 printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", 483 __func__, cpu_addr); 484 dump_stack(); 485 } 486 #else /* !CONFIG_MMU */ 487 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 488 { 489 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 490 return; 491 kfree(cpu_addr); 492 } 493 #endif /* CONFIG_MMU */ 494 EXPORT_SYMBOL(dma_free_coherent); 495 496 /* 497 * Initialise the consistent memory allocation. 498 */ 499 static int __init consistent_init(void) 500 { 501 int ret = 0; 502 #ifdef CONFIG_MMU 503 pgd_t *pgd; 504 pmd_t *pmd; 505 pte_t *pte; 506 int i = 0; 507 u32 base = CONSISTENT_BASE; 508 509 do { 510 pgd = pgd_offset(&init_mm, base); 511 pmd = pmd_alloc(&init_mm, pgd, base); 512 if (!pmd) { 513 printk(KERN_ERR "%s: no pmd tables\n", __func__); 514 ret = -ENOMEM; 515 break; 516 } 517 WARN_ON(!pmd_none(*pmd)); 518 519 pte = pte_alloc_kernel(pmd, base); 520 if (!pte) { 521 printk(KERN_ERR "%s: no pte tables\n", __func__); 522 ret = -ENOMEM; 523 break; 524 } 525 526 consistent_pte[i++] = pte; 527 base += (1 << PGDIR_SHIFT); 528 } while (base < CONSISTENT_END); 529 #endif /* !CONFIG_MMU */ 530 531 return ret; 532 } 533 534 core_initcall(consistent_init); 535 536 /* 537 * Make an area consistent for devices. 538 * Note: Drivers should NOT use this function directly, as it will break 539 * platforms with CONFIG_DMABOUNCE. 540 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 541 */ 542 void dma_cache_maint(const void *start, size_t size, int direction) 543 { 544 void (*inner_op)(const void *, const void *); 545 void (*outer_op)(unsigned long, unsigned long); 546 547 BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1)); 548 549 switch (direction) { 550 case DMA_FROM_DEVICE: /* invalidate only */ 551 inner_op = dmac_inv_range; 552 outer_op = outer_inv_range; 553 break; 554 case DMA_TO_DEVICE: /* writeback only */ 555 inner_op = dmac_clean_range; 556 outer_op = outer_clean_range; 557 break; 558 case DMA_BIDIRECTIONAL: /* writeback and invalidate */ 559 inner_op = dmac_flush_range; 560 outer_op = outer_flush_range; 561 break; 562 default: 563 BUG(); 564 } 565 566 inner_op(start, start + size); 567 outer_op(__pa(start), __pa(start) + size); 568 } 569 EXPORT_SYMBOL(dma_cache_maint); 570 571 static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, 572 size_t size, int direction) 573 { 574 void *vaddr; 575 unsigned long paddr; 576 void (*inner_op)(const void *, const void *); 577 void (*outer_op)(unsigned long, unsigned long); 578 579 switch (direction) { 580 case DMA_FROM_DEVICE: /* invalidate only */ 581 inner_op = dmac_inv_range; 582 outer_op = outer_inv_range; 583 break; 584 case DMA_TO_DEVICE: /* writeback only */ 585 inner_op = dmac_clean_range; 586 outer_op = outer_clean_range; 587 break; 588 case DMA_BIDIRECTIONAL: /* writeback and invalidate */ 589 inner_op = dmac_flush_range; 590 outer_op = outer_flush_range; 591 break; 592 default: 593 BUG(); 594 } 595 596 if (!PageHighMem(page)) { 597 vaddr = page_address(page) + offset; 598 inner_op(vaddr, vaddr + size); 599 } else { 600 vaddr = kmap_high_get(page); 601 if (vaddr) { 602 vaddr += offset; 603 inner_op(vaddr, vaddr + size); 604 kunmap_high(page); 605 } 606 } 607 608 paddr = page_to_phys(page) + offset; 609 outer_op(paddr, paddr + size); 610 } 611 612 void dma_cache_maint_page(struct page *page, unsigned long offset, 613 size_t size, int dir) 614 { 615 /* 616 * A single sg entry may refer to multiple physically contiguous 617 * pages. But we still need to process highmem pages individually. 618 * If highmem is not configured then the bulk of this loop gets 619 * optimized out. 620 */ 621 size_t left = size; 622 do { 623 size_t len = left; 624 if (PageHighMem(page) && len + offset > PAGE_SIZE) { 625 if (offset >= PAGE_SIZE) { 626 page += offset / PAGE_SIZE; 627 offset %= PAGE_SIZE; 628 } 629 len = PAGE_SIZE - offset; 630 } 631 dma_cache_maint_contiguous(page, offset, len, dir); 632 offset = 0; 633 page++; 634 left -= len; 635 } while (left); 636 } 637 EXPORT_SYMBOL(dma_cache_maint_page); 638 639 /** 640 * dma_map_sg - map a set of SG buffers for streaming mode DMA 641 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 642 * @sg: list of buffers 643 * @nents: number of buffers to map 644 * @dir: DMA transfer direction 645 * 646 * Map a set of buffers described by scatterlist in streaming mode for DMA. 647 * This is the scatter-gather version of the dma_map_single interface. 648 * Here the scatter gather list elements are each tagged with the 649 * appropriate dma address and length. They are obtained via 650 * sg_dma_{address,length}. 651 * 652 * Device ownership issues as mentioned for dma_map_single are the same 653 * here. 654 */ 655 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 656 enum dma_data_direction dir) 657 { 658 struct scatterlist *s; 659 int i, j; 660 661 for_each_sg(sg, s, nents, i) { 662 s->dma_address = dma_map_page(dev, sg_page(s), s->offset, 663 s->length, dir); 664 if (dma_mapping_error(dev, s->dma_address)) 665 goto bad_mapping; 666 } 667 return nents; 668 669 bad_mapping: 670 for_each_sg(sg, s, i, j) 671 dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 672 return 0; 673 } 674 EXPORT_SYMBOL(dma_map_sg); 675 676 /** 677 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 678 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 679 * @sg: list of buffers 680 * @nents: number of buffers to unmap (returned from dma_map_sg) 681 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 682 * 683 * Unmap a set of streaming mode DMA translations. Again, CPU access 684 * rules concerning calls here are the same as for dma_unmap_single(). 685 */ 686 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 687 enum dma_data_direction dir) 688 { 689 struct scatterlist *s; 690 int i; 691 692 for_each_sg(sg, s, nents, i) 693 dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 694 } 695 EXPORT_SYMBOL(dma_unmap_sg); 696 697 /** 698 * dma_sync_sg_for_cpu 699 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 700 * @sg: list of buffers 701 * @nents: number of buffers to map (returned from dma_map_sg) 702 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 703 */ 704 void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 705 int nents, enum dma_data_direction dir) 706 { 707 struct scatterlist *s; 708 int i; 709 710 for_each_sg(sg, s, nents, i) { 711 dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, 712 sg_dma_len(s), dir); 713 } 714 } 715 EXPORT_SYMBOL(dma_sync_sg_for_cpu); 716 717 /** 718 * dma_sync_sg_for_device 719 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 720 * @sg: list of buffers 721 * @nents: number of buffers to map (returned from dma_map_sg) 722 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 723 */ 724 void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 725 int nents, enum dma_data_direction dir) 726 { 727 struct scatterlist *s; 728 int i; 729 730 for_each_sg(sg, s, nents, i) { 731 if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, 732 sg_dma_len(s), dir)) 733 continue; 734 735 if (!arch_is_coherent()) 736 dma_cache_maint_page(sg_page(s), s->offset, 737 s->length, dir); 738 } 739 } 740 EXPORT_SYMBOL(dma_sync_sg_for_device); 741