xref: /openbmc/linux/kernel/dma/swiotlb.c (revision 587ed79f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Dynamic DMA mapping support.
4  *
5  * This implementation is a fallback for platforms that do not support
6  * I/O TLBs (aka DMA address translation hardware).
7  * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
8  * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
9  * Copyright (C) 2000, 2003 Hewlett-Packard Co
10  *	David Mosberger-Tang <davidm@hpl.hp.com>
11  *
12  * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
13  * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
14  *			unnecessary i-cache flushing.
15  * 04/07/.. ak		Better overflow handling. Assorted fixes.
16  * 05/09/10 linville	Add support for syncing ranges, support syncing for
17  *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
18  * 08/12/11 beckyb	Add highmem support
19  */
20 
21 #define pr_fmt(fmt) "software IO TLB: " fmt
22 
23 #include <linux/cache.h>
24 #include <linux/cc_platform.h>
25 #include <linux/ctype.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-direct.h>
28 #include <linux/dma-map-ops.h>
29 #include <linux/export.h>
30 #include <linux/gfp.h>
31 #include <linux/highmem.h>
32 #include <linux/io.h>
33 #include <linux/iommu-helper.h>
34 #include <linux/init.h>
35 #include <linux/memblock.h>
36 #include <linux/mm.h>
37 #include <linux/pfn.h>
38 #include <linux/scatterlist.h>
39 #include <linux/set_memory.h>
40 #include <linux/spinlock.h>
41 #include <linux/string.h>
42 #include <linux/swiotlb.h>
43 #include <linux/types.h>
44 #ifdef CONFIG_DMA_RESTRICTED_POOL
45 #include <linux/of.h>
46 #include <linux/of_fdt.h>
47 #include <linux/of_reserved_mem.h>
48 #include <linux/slab.h>
49 #endif
50 
51 #define CREATE_TRACE_POINTS
52 #include <trace/events/swiotlb.h>
53 
54 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
55 
56 /*
57  * Minimum IO TLB size to bother booting with.  Systems with mainly
58  * 64bit capable cards will only lightly use the swiotlb.  If we can't
59  * allocate a contiguous 1MB, we're probably in trouble anyway.
60  */
61 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
62 
63 #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
64 
65 struct io_tlb_slot {
66 	phys_addr_t orig_addr;
67 	size_t alloc_size;
68 	unsigned int list;
69 };
70 
71 static bool swiotlb_force_bounce;
72 static bool swiotlb_force_disable;
73 
74 struct io_tlb_mem io_tlb_default_mem;
75 
76 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
77 static unsigned long default_nareas;
78 
79 /**
80  * struct io_tlb_area - IO TLB memory area descriptor
81  *
82  * This is a single area with a single lock.
83  *
84  * @used:	The number of used IO TLB block.
85  * @index:	The slot index to start searching in this area for next round.
86  * @lock:	The lock to protect the above data structures in the map and
87  *		unmap calls.
88  */
89 struct io_tlb_area {
90 	unsigned long used;
91 	unsigned int index;
92 	spinlock_t lock;
93 };
94 
95 /*
96  * Round up number of slabs to the next power of 2. The last area is going
97  * be smaller than the rest if default_nslabs is not power of two.
98  * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE,
99  * otherwise a segment may span two or more areas. It conflicts with free
100  * contiguous slots tracking: free slots are treated contiguous no matter
101  * whether they cross an area boundary.
102  *
103  * Return true if default_nslabs is rounded up.
104  */
105 static bool round_up_default_nslabs(void)
106 {
107 	if (!default_nareas)
108 		return false;
109 
110 	if (default_nslabs < IO_TLB_SEGSIZE * default_nareas)
111 		default_nslabs = IO_TLB_SEGSIZE * default_nareas;
112 	else if (is_power_of_2(default_nslabs))
113 		return false;
114 	default_nslabs = roundup_pow_of_two(default_nslabs);
115 	return true;
116 }
117 
118 /**
119  * swiotlb_adjust_nareas() - adjust the number of areas and slots
120  * @nareas:	Desired number of areas. Zero is treated as 1.
121  *
122  * Adjust the default number of areas in a memory pool.
123  * The default size of the memory pool may also change to meet minimum area
124  * size requirements.
125  */
126 static void swiotlb_adjust_nareas(unsigned int nareas)
127 {
128 	if (!nareas)
129 		nareas = 1;
130 	else if (!is_power_of_2(nareas))
131 		nareas = roundup_pow_of_two(nareas);
132 
133 	default_nareas = nareas;
134 
135 	pr_info("area num %d.\n", nareas);
136 	if (round_up_default_nslabs())
137 		pr_info("SWIOTLB bounce buffer size roundup to %luMB",
138 			(default_nslabs << IO_TLB_SHIFT) >> 20);
139 }
140 
141 /**
142  * limit_nareas() - get the maximum number of areas for a given memory pool size
143  * @nareas:	Desired number of areas.
144  * @nslots:	Total number of slots in the memory pool.
145  *
146  * Limit the number of areas to the maximum possible number of areas in
147  * a memory pool of the given size.
148  *
149  * Return: Maximum possible number of areas.
150  */
151 static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots)
152 {
153 	if (nslots < nareas * IO_TLB_SEGSIZE)
154 		return nslots / IO_TLB_SEGSIZE;
155 	return nareas;
156 }
157 
158 static int __init
159 setup_io_tlb_npages(char *str)
160 {
161 	if (isdigit(*str)) {
162 		/* avoid tail segment of size < IO_TLB_SEGSIZE */
163 		default_nslabs =
164 			ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
165 	}
166 	if (*str == ',')
167 		++str;
168 	if (isdigit(*str))
169 		swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
170 	if (*str == ',')
171 		++str;
172 	if (!strcmp(str, "force"))
173 		swiotlb_force_bounce = true;
174 	else if (!strcmp(str, "noforce"))
175 		swiotlb_force_disable = true;
176 
177 	return 0;
178 }
179 early_param("swiotlb", setup_io_tlb_npages);
180 
181 unsigned long swiotlb_size_or_default(void)
182 {
183 	return default_nslabs << IO_TLB_SHIFT;
184 }
185 
186 void __init swiotlb_adjust_size(unsigned long size)
187 {
188 	/*
189 	 * If swiotlb parameter has not been specified, give a chance to
190 	 * architectures such as those supporting memory encryption to
191 	 * adjust/expand SWIOTLB size for their use.
192 	 */
193 	if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
194 		return;
195 
196 	size = ALIGN(size, IO_TLB_SIZE);
197 	default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
198 	if (round_up_default_nslabs())
199 		size = default_nslabs << IO_TLB_SHIFT;
200 	pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
201 }
202 
203 void swiotlb_print_info(void)
204 {
205 	struct io_tlb_mem *mem = &io_tlb_default_mem;
206 
207 	if (!mem->nslabs) {
208 		pr_warn("No low mem\n");
209 		return;
210 	}
211 
212 	pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
213 	       (mem->nslabs << IO_TLB_SHIFT) >> 20);
214 }
215 
216 static inline unsigned long io_tlb_offset(unsigned long val)
217 {
218 	return val & (IO_TLB_SEGSIZE - 1);
219 }
220 
221 static inline unsigned long nr_slots(u64 val)
222 {
223 	return DIV_ROUND_UP(val, IO_TLB_SIZE);
224 }
225 
226 /*
227  * Early SWIOTLB allocation may be too early to allow an architecture to
228  * perform the desired operations.  This function allows the architecture to
229  * call SWIOTLB when the operations are possible.  It needs to be called
230  * before the SWIOTLB memory is used.
231  */
232 void __init swiotlb_update_mem_attributes(void)
233 {
234 	struct io_tlb_mem *mem = &io_tlb_default_mem;
235 	unsigned long bytes;
236 
237 	if (!mem->nslabs || mem->late_alloc)
238 		return;
239 	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
240 	set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
241 }
242 
243 static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
244 		unsigned long nslabs, unsigned int flags,
245 		bool late_alloc, unsigned int nareas)
246 {
247 	void *vaddr = phys_to_virt(start);
248 	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
249 
250 	mem->nslabs = nslabs;
251 	mem->start = start;
252 	mem->end = mem->start + bytes;
253 	mem->late_alloc = late_alloc;
254 	mem->nareas = nareas;
255 	mem->area_nslabs = nslabs / mem->nareas;
256 
257 	mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
258 
259 	for (i = 0; i < mem->nareas; i++) {
260 		spin_lock_init(&mem->areas[i].lock);
261 		mem->areas[i].index = 0;
262 		mem->areas[i].used = 0;
263 	}
264 
265 	for (i = 0; i < mem->nslabs; i++) {
266 		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
267 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
268 		mem->slots[i].alloc_size = 0;
269 	}
270 
271 	memset(vaddr, 0, bytes);
272 	mem->vaddr = vaddr;
273 	return;
274 }
275 
276 static void __init *swiotlb_memblock_alloc(unsigned long nslabs,
277 		unsigned int flags,
278 		int (*remap)(void *tlb, unsigned long nslabs))
279 {
280 	size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
281 	void *tlb;
282 
283 	/*
284 	 * By default allocate the bounce buffer memory from low memory, but
285 	 * allow to pick a location everywhere for hypervisors with guest
286 	 * memory encryption.
287 	 */
288 	if (flags & SWIOTLB_ANY)
289 		tlb = memblock_alloc(bytes, PAGE_SIZE);
290 	else
291 		tlb = memblock_alloc_low(bytes, PAGE_SIZE);
292 
293 	if (!tlb) {
294 		pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
295 			__func__, bytes);
296 		return NULL;
297 	}
298 
299 	if (remap && remap(tlb, nslabs) < 0) {
300 		memblock_free(tlb, PAGE_ALIGN(bytes));
301 		pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
302 		return NULL;
303 	}
304 
305 	return tlb;
306 }
307 
308 /*
309  * Statically reserve bounce buffer space and initialize bounce buffer data
310  * structures for the software IO TLB used to implement the DMA API.
311  */
312 void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
313 		int (*remap)(void *tlb, unsigned long nslabs))
314 {
315 	struct io_tlb_mem *mem = &io_tlb_default_mem;
316 	unsigned long nslabs;
317 	unsigned int nareas;
318 	size_t alloc_size;
319 	void *tlb;
320 
321 	if (!addressing_limit && !swiotlb_force_bounce)
322 		return;
323 	if (swiotlb_force_disable)
324 		return;
325 
326 	if (!default_nareas)
327 		swiotlb_adjust_nareas(num_possible_cpus());
328 
329 	nslabs = default_nslabs;
330 	nareas = limit_nareas(default_nareas, nslabs);
331 	while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
332 		if (nslabs <= IO_TLB_MIN_SLABS)
333 			return;
334 		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
335 		nareas = limit_nareas(nareas, nslabs);
336 	}
337 
338 	if (default_nslabs != nslabs) {
339 		pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs",
340 			default_nslabs, nslabs);
341 		default_nslabs = nslabs;
342 	}
343 
344 	alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
345 	mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
346 	if (!mem->slots) {
347 		pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n",
348 			__func__, alloc_size, PAGE_SIZE);
349 		return;
350 	}
351 
352 	mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
353 		default_nareas), SMP_CACHE_BYTES);
354 	if (!mem->areas) {
355 		pr_warn("%s: Failed to allocate mem->areas.\n", __func__);
356 		return;
357 	}
358 
359 	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false,
360 				default_nareas);
361 
362 	if (flags & SWIOTLB_VERBOSE)
363 		swiotlb_print_info();
364 }
365 
366 void __init swiotlb_init(bool addressing_limit, unsigned int flags)
367 {
368 	swiotlb_init_remap(addressing_limit, flags, NULL);
369 }
370 
371 /*
372  * Systems with larger DMA zones (those that don't support ISA) can
373  * initialize the swiotlb later using the slab allocator if needed.
374  * This should be just like above, but with some error catching.
375  */
376 int swiotlb_init_late(size_t size, gfp_t gfp_mask,
377 		int (*remap)(void *tlb, unsigned long nslabs))
378 {
379 	struct io_tlb_mem *mem = &io_tlb_default_mem;
380 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
381 	unsigned int nareas;
382 	unsigned char *vstart = NULL;
383 	unsigned int order, area_order;
384 	bool retried = false;
385 	int rc = 0;
386 
387 	if (swiotlb_force_disable)
388 		return 0;
389 
390 	if (!default_nareas)
391 		swiotlb_adjust_nareas(num_possible_cpus());
392 
393 retry:
394 	order = get_order(nslabs << IO_TLB_SHIFT);
395 	nslabs = SLABS_PER_PAGE << order;
396 
397 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
398 		vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
399 						  order);
400 		if (vstart)
401 			break;
402 		order--;
403 		nslabs = SLABS_PER_PAGE << order;
404 		retried = true;
405 	}
406 
407 	if (!vstart)
408 		return -ENOMEM;
409 
410 	if (remap)
411 		rc = remap(vstart, nslabs);
412 	if (rc) {
413 		free_pages((unsigned long)vstart, order);
414 
415 		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
416 		if (nslabs < IO_TLB_MIN_SLABS)
417 			return rc;
418 		retried = true;
419 		goto retry;
420 	}
421 
422 	if (retried) {
423 		pr_warn("only able to allocate %ld MB\n",
424 			(PAGE_SIZE << order) >> 20);
425 	}
426 
427 	nareas = limit_nareas(default_nareas, nslabs);
428 	area_order = get_order(array_size(sizeof(*mem->areas), nareas));
429 	mem->areas = (struct io_tlb_area *)
430 		__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
431 	if (!mem->areas)
432 		goto error_area;
433 
434 	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
435 		get_order(array_size(sizeof(*mem->slots), nslabs)));
436 	if (!mem->slots)
437 		goto error_slots;
438 
439 	set_memory_decrypted((unsigned long)vstart,
440 			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
441 	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
442 				nareas);
443 
444 	swiotlb_print_info();
445 	return 0;
446 
447 error_slots:
448 	free_pages((unsigned long)mem->areas, area_order);
449 error_area:
450 	free_pages((unsigned long)vstart, order);
451 	return -ENOMEM;
452 }
453 
454 void __init swiotlb_exit(void)
455 {
456 	struct io_tlb_mem *mem = &io_tlb_default_mem;
457 	unsigned long tbl_vaddr;
458 	size_t tbl_size, slots_size;
459 	unsigned int area_order;
460 
461 	if (swiotlb_force_bounce)
462 		return;
463 
464 	if (!mem->nslabs)
465 		return;
466 
467 	pr_info("tearing down default memory pool\n");
468 	tbl_vaddr = (unsigned long)phys_to_virt(mem->start);
469 	tbl_size = PAGE_ALIGN(mem->end - mem->start);
470 	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
471 
472 	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
473 	if (mem->late_alloc) {
474 		area_order = get_order(array_size(sizeof(*mem->areas),
475 			mem->nareas));
476 		free_pages((unsigned long)mem->areas, area_order);
477 		free_pages(tbl_vaddr, get_order(tbl_size));
478 		free_pages((unsigned long)mem->slots, get_order(slots_size));
479 	} else {
480 		memblock_free_late(__pa(mem->areas),
481 			array_size(sizeof(*mem->areas), mem->nareas));
482 		memblock_free_late(mem->start, tbl_size);
483 		memblock_free_late(__pa(mem->slots), slots_size);
484 	}
485 
486 	memset(mem, 0, sizeof(*mem));
487 }
488 
489 /*
490  * Return the offset into a iotlb slot required to keep the device happy.
491  */
492 static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
493 {
494 	return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
495 }
496 
497 /*
498  * Bounce: copy the swiotlb buffer from or back to the original dma location
499  */
500 static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
501 			   enum dma_data_direction dir)
502 {
503 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
504 	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
505 	phys_addr_t orig_addr = mem->slots[index].orig_addr;
506 	size_t alloc_size = mem->slots[index].alloc_size;
507 	unsigned long pfn = PFN_DOWN(orig_addr);
508 	unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
509 	unsigned int tlb_offset, orig_addr_offset;
510 
511 	if (orig_addr == INVALID_PHYS_ADDR)
512 		return;
513 
514 	tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
515 	orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
516 	if (tlb_offset < orig_addr_offset) {
517 		dev_WARN_ONCE(dev, 1,
518 			"Access before mapping start detected. orig offset %u, requested offset %u.\n",
519 			orig_addr_offset, tlb_offset);
520 		return;
521 	}
522 
523 	tlb_offset -= orig_addr_offset;
524 	if (tlb_offset > alloc_size) {
525 		dev_WARN_ONCE(dev, 1,
526 			"Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
527 			alloc_size, size, tlb_offset);
528 		return;
529 	}
530 
531 	orig_addr += tlb_offset;
532 	alloc_size -= tlb_offset;
533 
534 	if (size > alloc_size) {
535 		dev_WARN_ONCE(dev, 1,
536 			"Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n",
537 			alloc_size, size);
538 		size = alloc_size;
539 	}
540 
541 	if (PageHighMem(pfn_to_page(pfn))) {
542 		unsigned int offset = orig_addr & ~PAGE_MASK;
543 		struct page *page;
544 		unsigned int sz = 0;
545 		unsigned long flags;
546 
547 		while (size) {
548 			sz = min_t(size_t, PAGE_SIZE - offset, size);
549 
550 			local_irq_save(flags);
551 			page = pfn_to_page(pfn);
552 			if (dir == DMA_TO_DEVICE)
553 				memcpy_from_page(vaddr, page, offset, sz);
554 			else
555 				memcpy_to_page(page, offset, vaddr, sz);
556 			local_irq_restore(flags);
557 
558 			size -= sz;
559 			pfn++;
560 			vaddr += sz;
561 			offset = 0;
562 		}
563 	} else if (dir == DMA_TO_DEVICE) {
564 		memcpy(vaddr, phys_to_virt(orig_addr), size);
565 	} else {
566 		memcpy(phys_to_virt(orig_addr), vaddr, size);
567 	}
568 }
569 
570 static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
571 {
572 	return start + (idx << IO_TLB_SHIFT);
573 }
574 
575 /*
576  * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
577  */
578 static inline unsigned long get_max_slots(unsigned long boundary_mask)
579 {
580 	if (boundary_mask == ~0UL)
581 		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
582 	return nr_slots(boundary_mask + 1);
583 }
584 
585 static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
586 {
587 	if (index >= mem->area_nslabs)
588 		return 0;
589 	return index;
590 }
591 
592 /*
593  * Track the total used slots with a global atomic value in order to have
594  * correct information to determine the high water mark. The mem_used()
595  * function gives imprecise results because there's no locking across
596  * multiple areas.
597  */
598 #ifdef CONFIG_DEBUG_FS
599 static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
600 {
601 	unsigned long old_hiwater, new_used;
602 
603 	new_used = atomic_long_add_return(nslots, &mem->total_used);
604 	old_hiwater = atomic_long_read(&mem->used_hiwater);
605 	do {
606 		if (new_used <= old_hiwater)
607 			break;
608 	} while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
609 					  &old_hiwater, new_used));
610 }
611 
612 static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
613 {
614 	atomic_long_sub(nslots, &mem->total_used);
615 }
616 
617 #else /* !CONFIG_DEBUG_FS */
618 static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
619 {
620 }
621 static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
622 {
623 }
624 #endif /* CONFIG_DEBUG_FS */
625 
626 /*
627  * Find a suitable number of IO TLB entries size that will fit this request and
628  * allocate a buffer from that IO TLB pool.
629  */
630 static int swiotlb_do_find_slots(struct device *dev, int area_index,
631 		phys_addr_t orig_addr, size_t alloc_size,
632 		unsigned int alloc_align_mask)
633 {
634 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
635 	struct io_tlb_area *area = mem->areas + area_index;
636 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
637 	dma_addr_t tbl_dma_addr =
638 		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
639 	unsigned long max_slots = get_max_slots(boundary_mask);
640 	unsigned int iotlb_align_mask =
641 		dma_get_min_align_mask(dev) | alloc_align_mask;
642 	unsigned int nslots = nr_slots(alloc_size), stride;
643 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
644 	unsigned int index, slots_checked, count = 0, i;
645 	unsigned long flags;
646 	unsigned int slot_base;
647 	unsigned int slot_index;
648 
649 	BUG_ON(!nslots);
650 	BUG_ON(area_index >= mem->nareas);
651 
652 	/*
653 	 * For allocations of PAGE_SIZE or larger only look for page aligned
654 	 * allocations.
655 	 */
656 	if (alloc_size >= PAGE_SIZE)
657 		iotlb_align_mask |= ~PAGE_MASK;
658 	iotlb_align_mask &= ~(IO_TLB_SIZE - 1);
659 
660 	/*
661 	 * For mappings with an alignment requirement don't bother looping to
662 	 * unaligned slots once we found an aligned one.
663 	 */
664 	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
665 
666 	spin_lock_irqsave(&area->lock, flags);
667 	if (unlikely(nslots > mem->area_nslabs - area->used))
668 		goto not_found;
669 
670 	slot_base = area_index * mem->area_nslabs;
671 	index = area->index;
672 
673 	for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
674 		slot_index = slot_base + index;
675 
676 		if (orig_addr &&
677 		    (slot_addr(tbl_dma_addr, slot_index) &
678 		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
679 			index = wrap_area_index(mem, index + 1);
680 			slots_checked++;
681 			continue;
682 		}
683 
684 		/*
685 		 * If we find a slot that indicates we have 'nslots' number of
686 		 * contiguous buffers, we allocate the buffers from that slot
687 		 * and mark the entries as '0' indicating unavailable.
688 		 */
689 		if (!iommu_is_span_boundary(slot_index, nslots,
690 					    nr_slots(tbl_dma_addr),
691 					    max_slots)) {
692 			if (mem->slots[slot_index].list >= nslots)
693 				goto found;
694 		}
695 		index = wrap_area_index(mem, index + stride);
696 		slots_checked += stride;
697 	}
698 
699 not_found:
700 	spin_unlock_irqrestore(&area->lock, flags);
701 	return -1;
702 
703 found:
704 	for (i = slot_index; i < slot_index + nslots; i++) {
705 		mem->slots[i].list = 0;
706 		mem->slots[i].alloc_size = alloc_size - (offset +
707 				((i - slot_index) << IO_TLB_SHIFT));
708 	}
709 	for (i = slot_index - 1;
710 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
711 	     mem->slots[i].list; i--)
712 		mem->slots[i].list = ++count;
713 
714 	/*
715 	 * Update the indices to avoid searching in the next round.
716 	 */
717 	area->index = wrap_area_index(mem, index + nslots);
718 	area->used += nslots;
719 	spin_unlock_irqrestore(&area->lock, flags);
720 
721 	inc_used_and_hiwater(mem, nslots);
722 	return slot_index;
723 }
724 
725 static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
726 		size_t alloc_size, unsigned int alloc_align_mask)
727 {
728 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
729 	int start = raw_smp_processor_id() & (mem->nareas - 1);
730 	int i = start, index;
731 
732 	do {
733 		index = swiotlb_do_find_slots(dev, i, orig_addr, alloc_size,
734 					      alloc_align_mask);
735 		if (index >= 0)
736 			return index;
737 		if (++i >= mem->nareas)
738 			i = 0;
739 	} while (i != start);
740 
741 	return -1;
742 }
743 
744 #ifdef CONFIG_DEBUG_FS
745 
746 static unsigned long mem_used(struct io_tlb_mem *mem)
747 {
748 	return atomic_long_read(&mem->total_used);
749 }
750 
751 #else /* !CONFIG_DEBUG_FS */
752 
753 static unsigned long mem_used(struct io_tlb_mem *mem)
754 {
755 	int i;
756 	unsigned long used = 0;
757 
758 	for (i = 0; i < mem->nareas; i++)
759 		used += mem->areas[i].used;
760 	return used;
761 }
762 
763 #endif /* CONFIG_DEBUG_FS */
764 
765 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
766 		size_t mapping_size, size_t alloc_size,
767 		unsigned int alloc_align_mask, enum dma_data_direction dir,
768 		unsigned long attrs)
769 {
770 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
771 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
772 	unsigned int i;
773 	int index;
774 	phys_addr_t tlb_addr;
775 
776 	if (!mem || !mem->nslabs) {
777 		dev_warn_ratelimited(dev,
778 			"Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
779 		return (phys_addr_t)DMA_MAPPING_ERROR;
780 	}
781 
782 	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
783 		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
784 
785 	if (mapping_size > alloc_size) {
786 		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
787 			      mapping_size, alloc_size);
788 		return (phys_addr_t)DMA_MAPPING_ERROR;
789 	}
790 
791 	index = swiotlb_find_slots(dev, orig_addr,
792 				   alloc_size + offset, alloc_align_mask);
793 	if (index == -1) {
794 		if (!(attrs & DMA_ATTR_NO_WARN))
795 			dev_warn_ratelimited(dev,
796 	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
797 				 alloc_size, mem->nslabs, mem_used(mem));
798 		return (phys_addr_t)DMA_MAPPING_ERROR;
799 	}
800 
801 	/*
802 	 * Save away the mapping from the original address to the DMA address.
803 	 * This is needed when we sync the memory.  Then we sync the buffer if
804 	 * needed.
805 	 */
806 	for (i = 0; i < nr_slots(alloc_size + offset); i++)
807 		mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
808 	tlb_addr = slot_addr(mem->start, index) + offset;
809 	/*
810 	 * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
811 	 * to the tlb buffer, if we knew for sure the device will
812 	 * overwrite the entire current content. But we don't. Thus
813 	 * unconditional bounce may prevent leaking swiotlb content (i.e.
814 	 * kernel memory) to user-space.
815 	 */
816 	swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
817 	return tlb_addr;
818 }
819 
820 static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
821 {
822 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
823 	unsigned long flags;
824 	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
825 	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
826 	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
827 	int aindex = index / mem->area_nslabs;
828 	struct io_tlb_area *area = &mem->areas[aindex];
829 	int count, i;
830 
831 	/*
832 	 * Return the buffer to the free list by setting the corresponding
833 	 * entries to indicate the number of contiguous entries available.
834 	 * While returning the entries to the free list, we merge the entries
835 	 * with slots below and above the pool being returned.
836 	 */
837 	BUG_ON(aindex >= mem->nareas);
838 
839 	spin_lock_irqsave(&area->lock, flags);
840 	if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
841 		count = mem->slots[index + nslots].list;
842 	else
843 		count = 0;
844 
845 	/*
846 	 * Step 1: return the slots to the free list, merging the slots with
847 	 * superceeding slots
848 	 */
849 	for (i = index + nslots - 1; i >= index; i--) {
850 		mem->slots[i].list = ++count;
851 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
852 		mem->slots[i].alloc_size = 0;
853 	}
854 
855 	/*
856 	 * Step 2: merge the returned slots with the preceding slots, if
857 	 * available (non zero)
858 	 */
859 	for (i = index - 1;
860 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
861 	     i--)
862 		mem->slots[i].list = ++count;
863 	area->used -= nslots;
864 	spin_unlock_irqrestore(&area->lock, flags);
865 
866 	dec_used(mem, nslots);
867 }
868 
869 /*
870  * tlb_addr is the physical address of the bounce buffer to unmap.
871  */
872 void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
873 			      size_t mapping_size, enum dma_data_direction dir,
874 			      unsigned long attrs)
875 {
876 	/*
877 	 * First, sync the memory before unmapping the entry
878 	 */
879 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
880 	    (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
881 		swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
882 
883 	swiotlb_release_slots(dev, tlb_addr);
884 }
885 
886 void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
887 		size_t size, enum dma_data_direction dir)
888 {
889 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
890 		swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
891 	else
892 		BUG_ON(dir != DMA_FROM_DEVICE);
893 }
894 
895 void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
896 		size_t size, enum dma_data_direction dir)
897 {
898 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
899 		swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
900 	else
901 		BUG_ON(dir != DMA_TO_DEVICE);
902 }
903 
904 /*
905  * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing
906  * to the device copy the data into it as well.
907  */
908 dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
909 		enum dma_data_direction dir, unsigned long attrs)
910 {
911 	phys_addr_t swiotlb_addr;
912 	dma_addr_t dma_addr;
913 
914 	trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);
915 
916 	swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
917 			attrs);
918 	if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
919 		return DMA_MAPPING_ERROR;
920 
921 	/* Ensure that the address returned is DMA'ble */
922 	dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
923 	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
924 		swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
925 			attrs | DMA_ATTR_SKIP_CPU_SYNC);
926 		dev_WARN_ONCE(dev, 1,
927 			"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
928 			&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
929 		return DMA_MAPPING_ERROR;
930 	}
931 
932 	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
933 		arch_sync_dma_for_device(swiotlb_addr, size, dir);
934 	return dma_addr;
935 }
936 
937 size_t swiotlb_max_mapping_size(struct device *dev)
938 {
939 	int min_align_mask = dma_get_min_align_mask(dev);
940 	int min_align = 0;
941 
942 	/*
943 	 * swiotlb_find_slots() skips slots according to
944 	 * min align mask. This affects max mapping size.
945 	 * Take it into acount here.
946 	 */
947 	if (min_align_mask)
948 		min_align = roundup(min_align_mask, IO_TLB_SIZE);
949 
950 	return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
951 }
952 
953 bool is_swiotlb_active(struct device *dev)
954 {
955 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
956 
957 	return mem && mem->nslabs;
958 }
959 EXPORT_SYMBOL_GPL(is_swiotlb_active);
960 
961 #ifdef CONFIG_DEBUG_FS
962 
963 static int io_tlb_used_get(void *data, u64 *val)
964 {
965 	struct io_tlb_mem *mem = data;
966 
967 	*val = mem_used(mem);
968 	return 0;
969 }
970 
971 static int io_tlb_hiwater_get(void *data, u64 *val)
972 {
973 	struct io_tlb_mem *mem = data;
974 
975 	*val = atomic_long_read(&mem->used_hiwater);
976 	return 0;
977 }
978 
979 static int io_tlb_hiwater_set(void *data, u64 val)
980 {
981 	struct io_tlb_mem *mem = data;
982 
983 	/* Only allow setting to zero */
984 	if (val != 0)
985 		return -EINVAL;
986 
987 	atomic_long_set(&mem->used_hiwater, val);
988 	return 0;
989 }
990 
991 DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
992 DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
993 				io_tlb_hiwater_set, "%llu\n");
994 
995 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
996 					 const char *dirname)
997 {
998 	atomic_long_set(&mem->total_used, 0);
999 	atomic_long_set(&mem->used_hiwater, 0);
1000 
1001 	mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
1002 	if (!mem->nslabs)
1003 		return;
1004 
1005 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
1006 	debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
1007 			&fops_io_tlb_used);
1008 	debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
1009 			&fops_io_tlb_hiwater);
1010 }
1011 
1012 static int __init swiotlb_create_default_debugfs(void)
1013 {
1014 	swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb");
1015 	return 0;
1016 }
1017 
1018 late_initcall(swiotlb_create_default_debugfs);
1019 
1020 #else  /* !CONFIG_DEBUG_FS */
1021 
1022 static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
1023 						const char *dirname)
1024 {
1025 }
1026 
1027 #endif	/* CONFIG_DEBUG_FS */
1028 
1029 #ifdef CONFIG_DMA_RESTRICTED_POOL
1030 
1031 struct page *swiotlb_alloc(struct device *dev, size_t size)
1032 {
1033 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
1034 	phys_addr_t tlb_addr;
1035 	int index;
1036 
1037 	if (!mem)
1038 		return NULL;
1039 
1040 	index = swiotlb_find_slots(dev, 0, size, 0);
1041 	if (index == -1)
1042 		return NULL;
1043 
1044 	tlb_addr = slot_addr(mem->start, index);
1045 
1046 	return pfn_to_page(PFN_DOWN(tlb_addr));
1047 }
1048 
1049 bool swiotlb_free(struct device *dev, struct page *page, size_t size)
1050 {
1051 	phys_addr_t tlb_addr = page_to_phys(page);
1052 
1053 	if (!is_swiotlb_buffer(dev, tlb_addr))
1054 		return false;
1055 
1056 	swiotlb_release_slots(dev, tlb_addr);
1057 
1058 	return true;
1059 }
1060 
1061 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
1062 				    struct device *dev)
1063 {
1064 	struct io_tlb_mem *mem = rmem->priv;
1065 	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
1066 
1067 	/* Set Per-device io tlb area to one */
1068 	unsigned int nareas = 1;
1069 
1070 	if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
1071 		dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping.");
1072 		return -EINVAL;
1073 	}
1074 
1075 	/*
1076 	 * Since multiple devices can share the same pool, the private data,
1077 	 * io_tlb_mem struct, will be initialized by the first device attached
1078 	 * to it.
1079 	 */
1080 	if (!mem) {
1081 		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
1082 		if (!mem)
1083 			return -ENOMEM;
1084 
1085 		mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
1086 		if (!mem->slots) {
1087 			kfree(mem);
1088 			return -ENOMEM;
1089 		}
1090 
1091 		mem->areas = kcalloc(nareas, sizeof(*mem->areas),
1092 				GFP_KERNEL);
1093 		if (!mem->areas) {
1094 			kfree(mem->slots);
1095 			kfree(mem);
1096 			return -ENOMEM;
1097 		}
1098 
1099 		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
1100 				     rmem->size >> PAGE_SHIFT);
1101 		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
1102 					false, nareas);
1103 		mem->for_alloc = true;
1104 
1105 		rmem->priv = mem;
1106 
1107 		swiotlb_create_debugfs_files(mem, rmem->name);
1108 	}
1109 
1110 	dev->dma_io_tlb_mem = mem;
1111 
1112 	return 0;
1113 }
1114 
1115 static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
1116 					struct device *dev)
1117 {
1118 	dev->dma_io_tlb_mem = &io_tlb_default_mem;
1119 }
1120 
1121 static const struct reserved_mem_ops rmem_swiotlb_ops = {
1122 	.device_init = rmem_swiotlb_device_init,
1123 	.device_release = rmem_swiotlb_device_release,
1124 };
1125 
1126 static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
1127 {
1128 	unsigned long node = rmem->fdt_node;
1129 
1130 	if (of_get_flat_dt_prop(node, "reusable", NULL) ||
1131 	    of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
1132 	    of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
1133 	    of_get_flat_dt_prop(node, "no-map", NULL))
1134 		return -EINVAL;
1135 
1136 	rmem->ops = &rmem_swiotlb_ops;
1137 	pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
1138 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
1139 	return 0;
1140 }
1141 
1142 RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
1143 #endif /* CONFIG_DMA_RESTRICTED_POOL */
1144