xref: /openbmc/linux/arch/arm/mm/dma-mapping.c (revision a09d2831)
1 /*
2  *  linux/arch/arm/mm/dma-mapping.c
3  *
4  *  Copyright (C) 2000-2004 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  *  DMA uncached mapping support.
11  */
12 #include <linux/module.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/errno.h>
16 #include <linux/list.h>
17 #include <linux/init.h>
18 #include <linux/device.h>
19 #include <linux/dma-mapping.h>
20 
21 #include <asm/memory.h>
22 #include <asm/highmem.h>
23 #include <asm/cacheflush.h>
24 #include <asm/tlbflush.h>
25 #include <asm/sizes.h>
26 
27 /* Sanity check size */
28 #if (CONSISTENT_DMA_SIZE % SZ_2M)
29 #error "CONSISTENT_DMA_SIZE must be multiple of 2MiB"
30 #endif
31 
32 #define CONSISTENT_END	(0xffe00000)
33 #define CONSISTENT_BASE	(CONSISTENT_END - CONSISTENT_DMA_SIZE)
34 
35 #define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
36 #define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT)
37 #define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT)
38 
39 static u64 get_coherent_dma_mask(struct device *dev)
40 {
41 	u64 mask = ISA_DMA_THRESHOLD;
42 
43 	if (dev) {
44 		mask = dev->coherent_dma_mask;
45 
46 		/*
47 		 * Sanity check the DMA mask - it must be non-zero, and
48 		 * must be able to be satisfied by a DMA allocation.
49 		 */
50 		if (mask == 0) {
51 			dev_warn(dev, "coherent DMA mask is unset\n");
52 			return 0;
53 		}
54 
55 		if ((~mask) & ISA_DMA_THRESHOLD) {
56 			dev_warn(dev, "coherent DMA mask %#llx is smaller "
57 				 "than system GFP_DMA mask %#llx\n",
58 				 mask, (unsigned long long)ISA_DMA_THRESHOLD);
59 			return 0;
60 		}
61 	}
62 
63 	return mask;
64 }
65 
66 /*
67  * Allocate a DMA buffer for 'dev' of size 'size' using the
68  * specified gfp mask.  Note that 'size' must be page aligned.
69  */
70 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
71 {
72 	unsigned long order = get_order(size);
73 	struct page *page, *p, *e;
74 	void *ptr;
75 	u64 mask = get_coherent_dma_mask(dev);
76 
77 #ifdef CONFIG_DMA_API_DEBUG
78 	u64 limit = (mask + 1) & ~mask;
79 	if (limit && size >= limit) {
80 		dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
81 			size, mask);
82 		return NULL;
83 	}
84 #endif
85 
86 	if (!mask)
87 		return NULL;
88 
89 	if (mask < 0xffffffffULL)
90 		gfp |= GFP_DMA;
91 
92 	page = alloc_pages(gfp, order);
93 	if (!page)
94 		return NULL;
95 
96 	/*
97 	 * Now split the huge page and free the excess pages
98 	 */
99 	split_page(page, order);
100 	for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
101 		__free_page(p);
102 
103 	/*
104 	 * Ensure that the allocated pages are zeroed, and that any data
105 	 * lurking in the kernel direct-mapped region is invalidated.
106 	 */
107 	ptr = page_address(page);
108 	memset(ptr, 0, size);
109 	dmac_flush_range(ptr, ptr + size);
110 	outer_flush_range(__pa(ptr), __pa(ptr) + size);
111 
112 	return page;
113 }
114 
115 /*
116  * Free a DMA buffer.  'size' must be page aligned.
117  */
118 static void __dma_free_buffer(struct page *page, size_t size)
119 {
120 	struct page *e = page + (size >> PAGE_SHIFT);
121 
122 	while (page < e) {
123 		__free_page(page);
124 		page++;
125 	}
126 }
127 
128 #ifdef CONFIG_MMU
129 /*
130  * These are the page tables (2MB each) covering uncached, DMA consistent allocations
131  */
132 static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
133 
134 #include "vmregion.h"
135 
136 static struct arm_vmregion_head consistent_head = {
137 	.vm_lock	= __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
138 	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
139 	.vm_start	= CONSISTENT_BASE,
140 	.vm_end		= CONSISTENT_END,
141 };
142 
143 #ifdef CONFIG_HUGETLB_PAGE
144 #error ARM Coherent DMA allocator does not (yet) support huge TLB
145 #endif
146 
147 /*
148  * Initialise the consistent memory allocation.
149  */
150 static int __init consistent_init(void)
151 {
152 	int ret = 0;
153 	pgd_t *pgd;
154 	pmd_t *pmd;
155 	pte_t *pte;
156 	int i = 0;
157 	u32 base = CONSISTENT_BASE;
158 
159 	do {
160 		pgd = pgd_offset(&init_mm, base);
161 		pmd = pmd_alloc(&init_mm, pgd, base);
162 		if (!pmd) {
163 			printk(KERN_ERR "%s: no pmd tables\n", __func__);
164 			ret = -ENOMEM;
165 			break;
166 		}
167 		WARN_ON(!pmd_none(*pmd));
168 
169 		pte = pte_alloc_kernel(pmd, base);
170 		if (!pte) {
171 			printk(KERN_ERR "%s: no pte tables\n", __func__);
172 			ret = -ENOMEM;
173 			break;
174 		}
175 
176 		consistent_pte[i++] = pte;
177 		base += (1 << PGDIR_SHIFT);
178 	} while (base < CONSISTENT_END);
179 
180 	return ret;
181 }
182 
183 core_initcall(consistent_init);
184 
185 static void *
186 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
187 {
188 	struct arm_vmregion *c;
189 
190 	if (!consistent_pte[0]) {
191 		printk(KERN_ERR "%s: not initialised\n", __func__);
192 		dump_stack();
193 		return NULL;
194 	}
195 
196 	/*
197 	 * Allocate a virtual address in the consistent mapping region.
198 	 */
199 	c = arm_vmregion_alloc(&consistent_head, size,
200 			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
201 	if (c) {
202 		pte_t *pte;
203 		int idx = CONSISTENT_PTE_INDEX(c->vm_start);
204 		u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
205 
206 		pte = consistent_pte[idx] + off;
207 		c->vm_pages = page;
208 
209 		do {
210 			BUG_ON(!pte_none(*pte));
211 
212 			set_pte_ext(pte, mk_pte(page, prot), 0);
213 			page++;
214 			pte++;
215 			off++;
216 			if (off >= PTRS_PER_PTE) {
217 				off = 0;
218 				pte = consistent_pte[++idx];
219 			}
220 		} while (size -= PAGE_SIZE);
221 
222 		return (void *)c->vm_start;
223 	}
224 	return NULL;
225 }
226 
227 static void __dma_free_remap(void *cpu_addr, size_t size)
228 {
229 	struct arm_vmregion *c;
230 	unsigned long addr;
231 	pte_t *ptep;
232 	int idx;
233 	u32 off;
234 
235 	c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
236 	if (!c) {
237 		printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
238 		       __func__, cpu_addr);
239 		dump_stack();
240 		return;
241 	}
242 
243 	if ((c->vm_end - c->vm_start) != size) {
244 		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
245 		       __func__, c->vm_end - c->vm_start, size);
246 		dump_stack();
247 		size = c->vm_end - c->vm_start;
248 	}
249 
250 	idx = CONSISTENT_PTE_INDEX(c->vm_start);
251 	off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
252 	ptep = consistent_pte[idx] + off;
253 	addr = c->vm_start;
254 	do {
255 		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
256 
257 		ptep++;
258 		addr += PAGE_SIZE;
259 		off++;
260 		if (off >= PTRS_PER_PTE) {
261 			off = 0;
262 			ptep = consistent_pte[++idx];
263 		}
264 
265 		if (pte_none(pte) || !pte_present(pte))
266 			printk(KERN_CRIT "%s: bad page in kernel page table\n",
267 			       __func__);
268 	} while (size -= PAGE_SIZE);
269 
270 	flush_tlb_kernel_range(c->vm_start, c->vm_end);
271 
272 	arm_vmregion_free(&consistent_head, c);
273 }
274 
275 #else	/* !CONFIG_MMU */
276 
277 #define __dma_alloc_remap(page, size, gfp, prot)	page_address(page)
278 #define __dma_free_remap(addr, size)			do { } while (0)
279 
280 #endif	/* CONFIG_MMU */
281 
282 static void *
283 __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
284 	    pgprot_t prot)
285 {
286 	struct page *page;
287 	void *addr;
288 
289 	*handle = ~0;
290 	size = PAGE_ALIGN(size);
291 
292 	page = __dma_alloc_buffer(dev, size, gfp);
293 	if (!page)
294 		return NULL;
295 
296 	if (!arch_is_coherent())
297 		addr = __dma_alloc_remap(page, size, gfp, prot);
298 	else
299 		addr = page_address(page);
300 
301 	if (addr)
302 		*handle = page_to_dma(dev, page);
303 
304 	return addr;
305 }
306 
307 /*
308  * Allocate DMA-coherent memory space and return both the kernel remapped
309  * virtual and bus address for that space.
310  */
311 void *
312 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
313 {
314 	void *memory;
315 
316 	if (dma_alloc_from_coherent(dev, size, handle, &memory))
317 		return memory;
318 
319 	return __dma_alloc(dev, size, handle, gfp,
320 			   pgprot_dmacoherent(pgprot_kernel));
321 }
322 EXPORT_SYMBOL(dma_alloc_coherent);
323 
324 /*
325  * Allocate a writecombining region, in much the same way as
326  * dma_alloc_coherent above.
327  */
328 void *
329 dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
330 {
331 	return __dma_alloc(dev, size, handle, gfp,
332 			   pgprot_writecombine(pgprot_kernel));
333 }
334 EXPORT_SYMBOL(dma_alloc_writecombine);
335 
336 static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
337 		    void *cpu_addr, dma_addr_t dma_addr, size_t size)
338 {
339 	int ret = -ENXIO;
340 #ifdef CONFIG_MMU
341 	unsigned long user_size, kern_size;
342 	struct arm_vmregion *c;
343 
344 	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
345 
346 	c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
347 	if (c) {
348 		unsigned long off = vma->vm_pgoff;
349 
350 		kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
351 
352 		if (off < kern_size &&
353 		    user_size <= (kern_size - off)) {
354 			ret = remap_pfn_range(vma, vma->vm_start,
355 					      page_to_pfn(c->vm_pages) + off,
356 					      user_size << PAGE_SHIFT,
357 					      vma->vm_page_prot);
358 		}
359 	}
360 #endif	/* CONFIG_MMU */
361 
362 	return ret;
363 }
364 
365 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
366 		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
367 {
368 	vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot);
369 	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
370 }
371 EXPORT_SYMBOL(dma_mmap_coherent);
372 
373 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
374 			  void *cpu_addr, dma_addr_t dma_addr, size_t size)
375 {
376 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
377 	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
378 }
379 EXPORT_SYMBOL(dma_mmap_writecombine);
380 
381 /*
382  * free a page as defined by the above mapping.
383  * Must not be called with IRQs disabled.
384  */
385 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
386 {
387 	WARN_ON(irqs_disabled());
388 
389 	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
390 		return;
391 
392 	size = PAGE_ALIGN(size);
393 
394 	if (!arch_is_coherent())
395 		__dma_free_remap(cpu_addr, size);
396 
397 	__dma_free_buffer(dma_to_page(dev, handle), size);
398 }
399 EXPORT_SYMBOL(dma_free_coherent);
400 
401 /*
402  * Make an area consistent for devices.
403  * Note: Drivers should NOT use this function directly, as it will break
404  * platforms with CONFIG_DMABOUNCE.
405  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
406  */
407 void dma_cache_maint(const void *start, size_t size, int direction)
408 {
409 	void (*inner_op)(const void *, const void *);
410 	void (*outer_op)(unsigned long, unsigned long);
411 
412 	BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1));
413 
414 	switch (direction) {
415 	case DMA_FROM_DEVICE:		/* invalidate only */
416 		inner_op = dmac_inv_range;
417 		outer_op = outer_inv_range;
418 		break;
419 	case DMA_TO_DEVICE:		/* writeback only */
420 		inner_op = dmac_clean_range;
421 		outer_op = outer_clean_range;
422 		break;
423 	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
424 		inner_op = dmac_flush_range;
425 		outer_op = outer_flush_range;
426 		break;
427 	default:
428 		BUG();
429 	}
430 
431 	inner_op(start, start + size);
432 	outer_op(__pa(start), __pa(start) + size);
433 }
434 EXPORT_SYMBOL(dma_cache_maint);
435 
436 static void dma_cache_maint_contiguous(struct page *page, unsigned long offset,
437 				       size_t size, int direction)
438 {
439 	void *vaddr;
440 	unsigned long paddr;
441 	void (*inner_op)(const void *, const void *);
442 	void (*outer_op)(unsigned long, unsigned long);
443 
444 	switch (direction) {
445 	case DMA_FROM_DEVICE:		/* invalidate only */
446 		inner_op = dmac_inv_range;
447 		outer_op = outer_inv_range;
448 		break;
449 	case DMA_TO_DEVICE:		/* writeback only */
450 		inner_op = dmac_clean_range;
451 		outer_op = outer_clean_range;
452 		break;
453 	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
454 		inner_op = dmac_flush_range;
455 		outer_op = outer_flush_range;
456 		break;
457 	default:
458 		BUG();
459 	}
460 
461 	if (!PageHighMem(page)) {
462 		vaddr = page_address(page) + offset;
463 		inner_op(vaddr, vaddr + size);
464 	} else {
465 		vaddr = kmap_high_get(page);
466 		if (vaddr) {
467 			vaddr += offset;
468 			inner_op(vaddr, vaddr + size);
469 			kunmap_high(page);
470 		}
471 	}
472 
473 	paddr = page_to_phys(page) + offset;
474 	outer_op(paddr, paddr + size);
475 }
476 
477 void dma_cache_maint_page(struct page *page, unsigned long offset,
478 			  size_t size, int dir)
479 {
480 	/*
481 	 * A single sg entry may refer to multiple physically contiguous
482 	 * pages.  But we still need to process highmem pages individually.
483 	 * If highmem is not configured then the bulk of this loop gets
484 	 * optimized out.
485 	 */
486 	size_t left = size;
487 	do {
488 		size_t len = left;
489 		if (PageHighMem(page) && len + offset > PAGE_SIZE) {
490 			if (offset >= PAGE_SIZE) {
491 				page += offset / PAGE_SIZE;
492 				offset %= PAGE_SIZE;
493 			}
494 			len = PAGE_SIZE - offset;
495 		}
496 		dma_cache_maint_contiguous(page, offset, len, dir);
497 		offset = 0;
498 		page++;
499 		left -= len;
500 	} while (left);
501 }
502 EXPORT_SYMBOL(dma_cache_maint_page);
503 
504 /**
505  * dma_map_sg - map a set of SG buffers for streaming mode DMA
506  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
507  * @sg: list of buffers
508  * @nents: number of buffers to map
509  * @dir: DMA transfer direction
510  *
511  * Map a set of buffers described by scatterlist in streaming mode for DMA.
512  * This is the scatter-gather version of the dma_map_single interface.
513  * Here the scatter gather list elements are each tagged with the
514  * appropriate dma address and length.  They are obtained via
515  * sg_dma_{address,length}.
516  *
517  * Device ownership issues as mentioned for dma_map_single are the same
518  * here.
519  */
520 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
521 		enum dma_data_direction dir)
522 {
523 	struct scatterlist *s;
524 	int i, j;
525 
526 	for_each_sg(sg, s, nents, i) {
527 		s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
528 						s->length, dir);
529 		if (dma_mapping_error(dev, s->dma_address))
530 			goto bad_mapping;
531 	}
532 	return nents;
533 
534  bad_mapping:
535 	for_each_sg(sg, s, i, j)
536 		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
537 	return 0;
538 }
539 EXPORT_SYMBOL(dma_map_sg);
540 
541 /**
542  * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
543  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
544  * @sg: list of buffers
545  * @nents: number of buffers to unmap (returned from dma_map_sg)
546  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
547  *
548  * Unmap a set of streaming mode DMA translations.  Again, CPU access
549  * rules concerning calls here are the same as for dma_unmap_single().
550  */
551 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
552 		enum dma_data_direction dir)
553 {
554 	struct scatterlist *s;
555 	int i;
556 
557 	for_each_sg(sg, s, nents, i)
558 		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
559 }
560 EXPORT_SYMBOL(dma_unmap_sg);
561 
562 /**
563  * dma_sync_sg_for_cpu
564  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
565  * @sg: list of buffers
566  * @nents: number of buffers to map (returned from dma_map_sg)
567  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
568  */
569 void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
570 			int nents, enum dma_data_direction dir)
571 {
572 	struct scatterlist *s;
573 	int i;
574 
575 	for_each_sg(sg, s, nents, i) {
576 		dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0,
577 					sg_dma_len(s), dir);
578 	}
579 }
580 EXPORT_SYMBOL(dma_sync_sg_for_cpu);
581 
582 /**
583  * dma_sync_sg_for_device
584  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
585  * @sg: list of buffers
586  * @nents: number of buffers to map (returned from dma_map_sg)
587  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
588  */
589 void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
590 			int nents, enum dma_data_direction dir)
591 {
592 	struct scatterlist *s;
593 	int i;
594 
595 	for_each_sg(sg, s, nents, i) {
596 		if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0,
597 					sg_dma_len(s), dir))
598 			continue;
599 
600 		if (!arch_is_coherent())
601 			dma_cache_maint_page(sg_page(s), s->offset,
602 					     s->length, dir);
603 	}
604 }
605 EXPORT_SYMBOL(dma_sync_sg_for_device);
606