1 /* 2 * linux/arch/arm/mm/dma-mapping.c 3 * 4 * Copyright (C) 2000-2004 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * DMA uncached mapping support. 11 */ 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/errno.h> 16 #include <linux/list.h> 17 #include <linux/init.h> 18 #include <linux/device.h> 19 #include <linux/dma-mapping.h> 20 21 #include <asm/memory.h> 22 #include <asm/highmem.h> 23 #include <asm/cacheflush.h> 24 #include <asm/tlbflush.h> 25 #include <asm/sizes.h> 26 27 /* Sanity check size */ 28 #if (CONSISTENT_DMA_SIZE % SZ_2M) 29 #error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" 30 #endif 31 32 #define CONSISTENT_END (0xffe00000) 33 #define CONSISTENT_BASE (CONSISTENT_END - CONSISTENT_DMA_SIZE) 34 35 #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) 36 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) 37 #define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) 38 39 40 /* 41 * These are the page tables (2MB each) covering uncached, DMA consistent allocations 42 */ 43 static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; 44 static DEFINE_SPINLOCK(consistent_lock); 45 46 /* 47 * VM region handling support. 48 * 49 * This should become something generic, handling VM region allocations for 50 * vmalloc and similar (ioremap, module space, etc). 51 * 52 * I envisage vmalloc()'s supporting vm_struct becoming: 53 * 54 * struct vm_struct { 55 * struct vm_region region; 56 * unsigned long flags; 57 * struct page **pages; 58 * unsigned int nr_pages; 59 * unsigned long phys_addr; 60 * }; 61 * 62 * get_vm_area() would then call vm_region_alloc with an appropriate 63 * struct vm_region head (eg): 64 * 65 * struct vm_region vmalloc_head = { 66 * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), 67 * .vm_start = VMALLOC_START, 68 * .vm_end = VMALLOC_END, 69 * }; 70 * 71 * However, vmalloc_head.vm_start is variable (typically, it is dependent on 72 * the amount of RAM found at boot time.) I would imagine that get_vm_area() 73 * would have to initialise this each time prior to calling vm_region_alloc(). 74 */ 75 struct arm_vm_region { 76 struct list_head vm_list; 77 unsigned long vm_start; 78 unsigned long vm_end; 79 struct page *vm_pages; 80 int vm_active; 81 }; 82 83 static struct arm_vm_region consistent_head = { 84 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 85 .vm_start = CONSISTENT_BASE, 86 .vm_end = CONSISTENT_END, 87 }; 88 89 static struct arm_vm_region * 90 arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) 91 { 92 unsigned long addr = head->vm_start, end = head->vm_end - size; 93 unsigned long flags; 94 struct arm_vm_region *c, *new; 95 96 new = kmalloc(sizeof(struct arm_vm_region), gfp); 97 if (!new) 98 goto out; 99 100 spin_lock_irqsave(&consistent_lock, flags); 101 102 list_for_each_entry(c, &head->vm_list, vm_list) { 103 if ((addr + size) < addr) 104 goto nospc; 105 if ((addr + size) <= c->vm_start) 106 goto found; 107 addr = c->vm_end; 108 if (addr > end) 109 goto nospc; 110 } 111 112 found: 113 /* 114 * Insert this entry _before_ the one we found. 115 */ 116 list_add_tail(&new->vm_list, &c->vm_list); 117 new->vm_start = addr; 118 new->vm_end = addr + size; 119 new->vm_active = 1; 120 121 spin_unlock_irqrestore(&consistent_lock, flags); 122 return new; 123 124 nospc: 125 spin_unlock_irqrestore(&consistent_lock, flags); 126 kfree(new); 127 out: 128 return NULL; 129 } 130 131 static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr) 132 { 133 struct arm_vm_region *c; 134 135 list_for_each_entry(c, &head->vm_list, vm_list) { 136 if (c->vm_active && c->vm_start == addr) 137 goto out; 138 } 139 c = NULL; 140 out: 141 return c; 142 } 143 144 #ifdef CONFIG_HUGETLB_PAGE 145 #error ARM Coherent DMA allocator does not (yet) support huge TLB 146 #endif 147 148 static void * 149 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 150 pgprot_t prot) 151 { 152 struct page *page; 153 struct arm_vm_region *c; 154 unsigned long order; 155 u64 mask = ISA_DMA_THRESHOLD, limit; 156 157 if (!consistent_pte[0]) { 158 printk(KERN_ERR "%s: not initialised\n", __func__); 159 dump_stack(); 160 return NULL; 161 } 162 163 if (dev) { 164 mask = dev->coherent_dma_mask; 165 166 /* 167 * Sanity check the DMA mask - it must be non-zero, and 168 * must be able to be satisfied by a DMA allocation. 169 */ 170 if (mask == 0) { 171 dev_warn(dev, "coherent DMA mask is unset\n"); 172 goto no_page; 173 } 174 175 if ((~mask) & ISA_DMA_THRESHOLD) { 176 dev_warn(dev, "coherent DMA mask %#llx is smaller " 177 "than system GFP_DMA mask %#llx\n", 178 mask, (unsigned long long)ISA_DMA_THRESHOLD); 179 goto no_page; 180 } 181 } 182 183 /* 184 * Sanity check the allocation size. 185 */ 186 size = PAGE_ALIGN(size); 187 limit = (mask + 1) & ~mask; 188 if ((limit && size >= limit) || 189 size >= (CONSISTENT_END - CONSISTENT_BASE)) { 190 printk(KERN_WARNING "coherent allocation too big " 191 "(requested %#x mask %#llx)\n", size, mask); 192 goto no_page; 193 } 194 195 order = get_order(size); 196 197 if (mask != 0xffffffff) 198 gfp |= GFP_DMA; 199 200 page = alloc_pages(gfp, order); 201 if (!page) 202 goto no_page; 203 204 /* 205 * Invalidate any data that might be lurking in the 206 * kernel direct-mapped region for device DMA. 207 */ 208 { 209 void *ptr = page_address(page); 210 memset(ptr, 0, size); 211 dmac_flush_range(ptr, ptr + size); 212 outer_flush_range(__pa(ptr), __pa(ptr) + size); 213 } 214 215 /* 216 * Allocate a virtual address in the consistent mapping region. 217 */ 218 c = arm_vm_region_alloc(&consistent_head, size, 219 gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); 220 if (c) { 221 pte_t *pte; 222 struct page *end = page + (1 << order); 223 int idx = CONSISTENT_PTE_INDEX(c->vm_start); 224 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 225 226 pte = consistent_pte[idx] + off; 227 c->vm_pages = page; 228 229 split_page(page, order); 230 231 /* 232 * Set the "dma handle" 233 */ 234 *handle = page_to_dma(dev, page); 235 236 do { 237 BUG_ON(!pte_none(*pte)); 238 239 /* 240 * x86 does not mark the pages reserved... 241 */ 242 SetPageReserved(page); 243 set_pte_ext(pte, mk_pte(page, prot), 0); 244 page++; 245 pte++; 246 off++; 247 if (off >= PTRS_PER_PTE) { 248 off = 0; 249 pte = consistent_pte[++idx]; 250 } 251 } while (size -= PAGE_SIZE); 252 253 /* 254 * Free the otherwise unused pages. 255 */ 256 while (page < end) { 257 __free_page(page); 258 page++; 259 } 260 261 return (void *)c->vm_start; 262 } 263 264 if (page) 265 __free_pages(page, order); 266 no_page: 267 *handle = ~0; 268 return NULL; 269 } 270 271 /* 272 * Allocate DMA-coherent memory space and return both the kernel remapped 273 * virtual and bus address for that space. 274 */ 275 void * 276 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 277 { 278 void *memory; 279 280 if (dma_alloc_from_coherent(dev, size, handle, &memory)) 281 return memory; 282 283 if (arch_is_coherent()) { 284 void *virt; 285 286 virt = kmalloc(size, gfp); 287 if (!virt) 288 return NULL; 289 *handle = virt_to_dma(dev, virt); 290 291 return virt; 292 } 293 294 return __dma_alloc(dev, size, handle, gfp, 295 pgprot_noncached(pgprot_kernel)); 296 } 297 EXPORT_SYMBOL(dma_alloc_coherent); 298 299 /* 300 * Allocate a writecombining region, in much the same way as 301 * dma_alloc_coherent above. 302 */ 303 void * 304 dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 305 { 306 return __dma_alloc(dev, size, handle, gfp, 307 pgprot_writecombine(pgprot_kernel)); 308 } 309 EXPORT_SYMBOL(dma_alloc_writecombine); 310 311 static int dma_mmap(struct device *dev, struct vm_area_struct *vma, 312 void *cpu_addr, dma_addr_t dma_addr, size_t size) 313 { 314 unsigned long flags, user_size, kern_size; 315 struct arm_vm_region *c; 316 int ret = -ENXIO; 317 318 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 319 320 spin_lock_irqsave(&consistent_lock, flags); 321 c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); 322 spin_unlock_irqrestore(&consistent_lock, flags); 323 324 if (c) { 325 unsigned long off = vma->vm_pgoff; 326 327 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; 328 329 if (off < kern_size && 330 user_size <= (kern_size - off)) { 331 ret = remap_pfn_range(vma, vma->vm_start, 332 page_to_pfn(c->vm_pages) + off, 333 user_size << PAGE_SHIFT, 334 vma->vm_page_prot); 335 } 336 } 337 338 return ret; 339 } 340 341 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, 342 void *cpu_addr, dma_addr_t dma_addr, size_t size) 343 { 344 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 345 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 346 } 347 EXPORT_SYMBOL(dma_mmap_coherent); 348 349 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, 350 void *cpu_addr, dma_addr_t dma_addr, size_t size) 351 { 352 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 353 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 354 } 355 EXPORT_SYMBOL(dma_mmap_writecombine); 356 357 /* 358 * free a page as defined by the above mapping. 359 * Must not be called with IRQs disabled. 360 */ 361 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 362 { 363 struct arm_vm_region *c; 364 unsigned long flags, addr; 365 pte_t *ptep; 366 int idx; 367 u32 off; 368 369 WARN_ON(irqs_disabled()); 370 371 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 372 return; 373 374 if (arch_is_coherent()) { 375 kfree(cpu_addr); 376 return; 377 } 378 379 size = PAGE_ALIGN(size); 380 381 spin_lock_irqsave(&consistent_lock, flags); 382 c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); 383 if (!c) 384 goto no_area; 385 386 c->vm_active = 0; 387 spin_unlock_irqrestore(&consistent_lock, flags); 388 389 if ((c->vm_end - c->vm_start) != size) { 390 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", 391 __func__, c->vm_end - c->vm_start, size); 392 dump_stack(); 393 size = c->vm_end - c->vm_start; 394 } 395 396 idx = CONSISTENT_PTE_INDEX(c->vm_start); 397 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); 398 ptep = consistent_pte[idx] + off; 399 addr = c->vm_start; 400 do { 401 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); 402 unsigned long pfn; 403 404 ptep++; 405 addr += PAGE_SIZE; 406 off++; 407 if (off >= PTRS_PER_PTE) { 408 off = 0; 409 ptep = consistent_pte[++idx]; 410 } 411 412 if (!pte_none(pte) && pte_present(pte)) { 413 pfn = pte_pfn(pte); 414 415 if (pfn_valid(pfn)) { 416 struct page *page = pfn_to_page(pfn); 417 418 /* 419 * x86 does not mark the pages reserved... 420 */ 421 ClearPageReserved(page); 422 423 __free_page(page); 424 continue; 425 } 426 } 427 428 printk(KERN_CRIT "%s: bad page in kernel page table\n", 429 __func__); 430 } while (size -= PAGE_SIZE); 431 432 flush_tlb_kernel_range(c->vm_start, c->vm_end); 433 434 spin_lock_irqsave(&consistent_lock, flags); 435 list_del(&c->vm_list); 436 spin_unlock_irqrestore(&consistent_lock, flags); 437 438 kfree(c); 439 return; 440 441 no_area: 442 spin_unlock_irqrestore(&consistent_lock, flags); 443 printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", 444 __func__, cpu_addr); 445 dump_stack(); 446 } 447 EXPORT_SYMBOL(dma_free_coherent); 448 449 /* 450 * Initialise the consistent memory allocation. 451 */ 452 static int __init consistent_init(void) 453 { 454 pgd_t *pgd; 455 pmd_t *pmd; 456 pte_t *pte; 457 int ret = 0, i = 0; 458 u32 base = CONSISTENT_BASE; 459 460 do { 461 pgd = pgd_offset(&init_mm, base); 462 pmd = pmd_alloc(&init_mm, pgd, base); 463 if (!pmd) { 464 printk(KERN_ERR "%s: no pmd tables\n", __func__); 465 ret = -ENOMEM; 466 break; 467 } 468 WARN_ON(!pmd_none(*pmd)); 469 470 pte = pte_alloc_kernel(pmd, base); 471 if (!pte) { 472 printk(KERN_ERR "%s: no pte tables\n", __func__); 473 ret = -ENOMEM; 474 break; 475 } 476 477 consistent_pte[i++] = pte; 478 base += (1 << PGDIR_SHIFT); 479 } while (base < CONSISTENT_END); 480 481 return ret; 482 } 483 484 core_initcall(consistent_init); 485 486 /* 487 * Make an area consistent for devices. 488 * Note: Drivers should NOT use this function directly, as it will break 489 * platforms with CONFIG_DMABOUNCE. 490 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 491 */ 492 void dma_cache_maint(const void *start, size_t size, int direction) 493 { 494 void (*inner_op)(const void *, const void *); 495 void (*outer_op)(unsigned long, unsigned long); 496 497 BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1)); 498 499 switch (direction) { 500 case DMA_FROM_DEVICE: /* invalidate only */ 501 inner_op = dmac_inv_range; 502 outer_op = outer_inv_range; 503 break; 504 case DMA_TO_DEVICE: /* writeback only */ 505 inner_op = dmac_clean_range; 506 outer_op = outer_clean_range; 507 break; 508 case DMA_BIDIRECTIONAL: /* writeback and invalidate */ 509 inner_op = dmac_flush_range; 510 outer_op = outer_flush_range; 511 break; 512 default: 513 BUG(); 514 } 515 516 inner_op(start, start + size); 517 outer_op(__pa(start), __pa(start) + size); 518 } 519 EXPORT_SYMBOL(dma_cache_maint); 520 521 static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, 522 size_t size, int direction) 523 { 524 void *vaddr; 525 unsigned long paddr; 526 void (*inner_op)(const void *, const void *); 527 void (*outer_op)(unsigned long, unsigned long); 528 529 switch (direction) { 530 case DMA_FROM_DEVICE: /* invalidate only */ 531 inner_op = dmac_inv_range; 532 outer_op = outer_inv_range; 533 break; 534 case DMA_TO_DEVICE: /* writeback only */ 535 inner_op = dmac_clean_range; 536 outer_op = outer_clean_range; 537 break; 538 case DMA_BIDIRECTIONAL: /* writeback and invalidate */ 539 inner_op = dmac_flush_range; 540 outer_op = outer_flush_range; 541 break; 542 default: 543 BUG(); 544 } 545 546 if (!PageHighMem(page)) { 547 vaddr = page_address(page) + offset; 548 inner_op(vaddr, vaddr + size); 549 } else { 550 vaddr = kmap_high_get(page); 551 if (vaddr) { 552 vaddr += offset; 553 inner_op(vaddr, vaddr + size); 554 kunmap_high(page); 555 } 556 } 557 558 paddr = page_to_phys(page) + offset; 559 outer_op(paddr, paddr + size); 560 } 561 562 void dma_cache_maint_page(struct page *page, unsigned long offset, 563 size_t size, int dir) 564 { 565 /* 566 * A single sg entry may refer to multiple physically contiguous 567 * pages. But we still need to process highmem pages individually. 568 * If highmem is not configured then the bulk of this loop gets 569 * optimized out. 570 */ 571 size_t left = size; 572 do { 573 size_t len = left; 574 if (PageHighMem(page) && len + offset > PAGE_SIZE) { 575 if (offset >= PAGE_SIZE) { 576 page += offset / PAGE_SIZE; 577 offset %= PAGE_SIZE; 578 } 579 len = PAGE_SIZE - offset; 580 } 581 dma_cache_maint_contiguous(page, offset, len, dir); 582 offset = 0; 583 page++; 584 left -= len; 585 } while (left); 586 } 587 EXPORT_SYMBOL(dma_cache_maint_page); 588 589 /** 590 * dma_map_sg - map a set of SG buffers for streaming mode DMA 591 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 592 * @sg: list of buffers 593 * @nents: number of buffers to map 594 * @dir: DMA transfer direction 595 * 596 * Map a set of buffers described by scatterlist in streaming mode for DMA. 597 * This is the scatter-gather version of the dma_map_single interface. 598 * Here the scatter gather list elements are each tagged with the 599 * appropriate dma address and length. They are obtained via 600 * sg_dma_{address,length}. 601 * 602 * Device ownership issues as mentioned for dma_map_single are the same 603 * here. 604 */ 605 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 606 enum dma_data_direction dir) 607 { 608 struct scatterlist *s; 609 int i, j; 610 611 for_each_sg(sg, s, nents, i) { 612 s->dma_address = dma_map_page(dev, sg_page(s), s->offset, 613 s->length, dir); 614 if (dma_mapping_error(dev, s->dma_address)) 615 goto bad_mapping; 616 } 617 return nents; 618 619 bad_mapping: 620 for_each_sg(sg, s, i, j) 621 dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 622 return 0; 623 } 624 EXPORT_SYMBOL(dma_map_sg); 625 626 /** 627 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 628 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 629 * @sg: list of buffers 630 * @nents: number of buffers to unmap (returned from dma_map_sg) 631 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 632 * 633 * Unmap a set of streaming mode DMA translations. Again, CPU access 634 * rules concerning calls here are the same as for dma_unmap_single(). 635 */ 636 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 637 enum dma_data_direction dir) 638 { 639 struct scatterlist *s; 640 int i; 641 642 for_each_sg(sg, s, nents, i) 643 dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); 644 } 645 EXPORT_SYMBOL(dma_unmap_sg); 646 647 /** 648 * dma_sync_sg_for_cpu 649 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 650 * @sg: list of buffers 651 * @nents: number of buffers to map (returned from dma_map_sg) 652 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 653 */ 654 void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 655 int nents, enum dma_data_direction dir) 656 { 657 struct scatterlist *s; 658 int i; 659 660 for_each_sg(sg, s, nents, i) { 661 dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, 662 sg_dma_len(s), dir); 663 } 664 } 665 EXPORT_SYMBOL(dma_sync_sg_for_cpu); 666 667 /** 668 * dma_sync_sg_for_device 669 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 670 * @sg: list of buffers 671 * @nents: number of buffers to map (returned from dma_map_sg) 672 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 673 */ 674 void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 675 int nents, enum dma_data_direction dir) 676 { 677 struct scatterlist *s; 678 int i; 679 680 for_each_sg(sg, s, nents, i) { 681 if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, 682 sg_dma_len(s), dir)) 683 continue; 684 685 if (!arch_is_coherent()) 686 dma_cache_maint_page(sg_page(s), s->offset, 687 s->length, dir); 688 } 689 } 690 EXPORT_SYMBOL(dma_sync_sg_for_device); 691