xref: /openbmc/linux/drivers/iommu/dma-iommu.c (revision 48cc39c3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * A fairly generic DMA-API to IOMMU-API glue layer.
4  *
5  * Copyright (C) 2014-2015 ARM Ltd.
6  *
7  * based in part on arch/arm/mm/dma-mapping.c:
8  * Copyright (C) 2000-2004 Russell King
9  */
10 
11 #include <linux/acpi_iort.h>
12 #include <linux/device.h>
13 #include <linux/dma-map-ops.h>
14 #include <linux/dma-iommu.h>
15 #include <linux/gfp.h>
16 #include <linux/huge_mm.h>
17 #include <linux/iommu.h>
18 #include <linux/iova.h>
19 #include <linux/irq.h>
20 #include <linux/mm.h>
21 #include <linux/mutex.h>
22 #include <linux/pci.h>
23 #include <linux/swiotlb.h>
24 #include <linux/scatterlist.h>
25 #include <linux/vmalloc.h>
26 #include <linux/crash_dump.h>
27 #include <linux/dma-direct.h>
28 
29 struct iommu_dma_msi_page {
30 	struct list_head	list;
31 	dma_addr_t		iova;
32 	phys_addr_t		phys;
33 };
34 
35 enum iommu_dma_cookie_type {
36 	IOMMU_DMA_IOVA_COOKIE,
37 	IOMMU_DMA_MSI_COOKIE,
38 };
39 
40 struct iommu_dma_cookie {
41 	enum iommu_dma_cookie_type	type;
42 	union {
43 		/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
44 		struct iova_domain	iovad;
45 		/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
46 		dma_addr_t		msi_iova;
47 	};
48 	struct list_head		msi_page_list;
49 
50 	/* Domain for flush queue callback; NULL if flush queue not in use */
51 	struct iommu_domain		*fq_domain;
52 };
53 
54 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
55 bool iommu_dma_forcedac __read_mostly;
56 
57 static int __init iommu_dma_forcedac_setup(char *str)
58 {
59 	int ret = kstrtobool(str, &iommu_dma_forcedac);
60 
61 	if (!ret && iommu_dma_forcedac)
62 		pr_info("Forcing DAC for PCI devices\n");
63 	return ret;
64 }
65 early_param("iommu.forcedac", iommu_dma_forcedac_setup);
66 
67 static void iommu_dma_entry_dtor(unsigned long data)
68 {
69 	struct page *freelist = (struct page *)data;
70 
71 	while (freelist) {
72 		unsigned long p = (unsigned long)page_address(freelist);
73 
74 		freelist = freelist->freelist;
75 		free_page(p);
76 	}
77 }
78 
79 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
80 {
81 	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
82 		return cookie->iovad.granule;
83 	return PAGE_SIZE;
84 }
85 
86 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
87 {
88 	struct iommu_dma_cookie *cookie;
89 
90 	cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
91 	if (cookie) {
92 		INIT_LIST_HEAD(&cookie->msi_page_list);
93 		cookie->type = type;
94 	}
95 	return cookie;
96 }
97 
98 /**
99  * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
100  * @domain: IOMMU domain to prepare for DMA-API usage
101  *
102  * IOMMU drivers should normally call this from their domain_alloc
103  * callback when domain->type == IOMMU_DOMAIN_DMA.
104  */
105 int iommu_get_dma_cookie(struct iommu_domain *domain)
106 {
107 	if (domain->iova_cookie)
108 		return -EEXIST;
109 
110 	domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
111 	if (!domain->iova_cookie)
112 		return -ENOMEM;
113 
114 	return 0;
115 }
116 EXPORT_SYMBOL(iommu_get_dma_cookie);
117 
118 /**
119  * iommu_get_msi_cookie - Acquire just MSI remapping resources
120  * @domain: IOMMU domain to prepare
121  * @base: Start address of IOVA region for MSI mappings
122  *
123  * Users who manage their own IOVA allocation and do not want DMA API support,
124  * but would still like to take advantage of automatic MSI remapping, can use
125  * this to initialise their own domain appropriately. Users should reserve a
126  * contiguous IOVA region, starting at @base, large enough to accommodate the
127  * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
128  * used by the devices attached to @domain.
129  */
130 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
131 {
132 	struct iommu_dma_cookie *cookie;
133 
134 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
135 		return -EINVAL;
136 
137 	if (domain->iova_cookie)
138 		return -EEXIST;
139 
140 	cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
141 	if (!cookie)
142 		return -ENOMEM;
143 
144 	cookie->msi_iova = base;
145 	domain->iova_cookie = cookie;
146 	return 0;
147 }
148 EXPORT_SYMBOL(iommu_get_msi_cookie);
149 
150 /**
151  * iommu_put_dma_cookie - Release a domain's DMA mapping resources
152  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
153  *          iommu_get_msi_cookie()
154  *
155  * IOMMU drivers should normally call this from their domain_free callback.
156  */
157 void iommu_put_dma_cookie(struct iommu_domain *domain)
158 {
159 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
160 	struct iommu_dma_msi_page *msi, *tmp;
161 
162 	if (!cookie)
163 		return;
164 
165 	if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
166 		put_iova_domain(&cookie->iovad);
167 
168 	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
169 		list_del(&msi->list);
170 		kfree(msi);
171 	}
172 	kfree(cookie);
173 	domain->iova_cookie = NULL;
174 }
175 EXPORT_SYMBOL(iommu_put_dma_cookie);
176 
177 /**
178  * iommu_dma_get_resv_regions - Reserved region driver helper
179  * @dev: Device from iommu_get_resv_regions()
180  * @list: Reserved region list from iommu_get_resv_regions()
181  *
182  * IOMMU drivers can use this to implement their .get_resv_regions callback
183  * for general non-IOMMU-specific reservations. Currently, this covers GICv3
184  * ITS region reservation on ACPI based ARM platforms that may require HW MSI
185  * reservation.
186  */
187 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
188 {
189 
190 	if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
191 		iort_iommu_msi_get_resv_regions(dev, list);
192 
193 }
194 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
195 
196 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
197 		phys_addr_t start, phys_addr_t end)
198 {
199 	struct iova_domain *iovad = &cookie->iovad;
200 	struct iommu_dma_msi_page *msi_page;
201 	int i, num_pages;
202 
203 	start -= iova_offset(iovad, start);
204 	num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
205 
206 	for (i = 0; i < num_pages; i++) {
207 		msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
208 		if (!msi_page)
209 			return -ENOMEM;
210 
211 		msi_page->phys = start;
212 		msi_page->iova = start;
213 		INIT_LIST_HEAD(&msi_page->list);
214 		list_add(&msi_page->list, &cookie->msi_page_list);
215 		start += iovad->granule;
216 	}
217 
218 	return 0;
219 }
220 
221 static int iova_reserve_pci_windows(struct pci_dev *dev,
222 		struct iova_domain *iovad)
223 {
224 	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
225 	struct resource_entry *window;
226 	unsigned long lo, hi;
227 	phys_addr_t start = 0, end;
228 
229 	resource_list_for_each_entry(window, &bridge->windows) {
230 		if (resource_type(window->res) != IORESOURCE_MEM)
231 			continue;
232 
233 		lo = iova_pfn(iovad, window->res->start - window->offset);
234 		hi = iova_pfn(iovad, window->res->end - window->offset);
235 		reserve_iova(iovad, lo, hi);
236 	}
237 
238 	/* Get reserved DMA windows from host bridge */
239 	resource_list_for_each_entry(window, &bridge->dma_ranges) {
240 		end = window->res->start - window->offset;
241 resv_iova:
242 		if (end > start) {
243 			lo = iova_pfn(iovad, start);
244 			hi = iova_pfn(iovad, end);
245 			reserve_iova(iovad, lo, hi);
246 		} else if (end < start) {
247 			/* dma_ranges list should be sorted */
248 			dev_err(&dev->dev,
249 				"Failed to reserve IOVA [%pa-%pa]\n",
250 				&start, &end);
251 			return -EINVAL;
252 		}
253 
254 		start = window->res->end - window->offset + 1;
255 		/* If window is last entry */
256 		if (window->node.next == &bridge->dma_ranges &&
257 		    end != ~(phys_addr_t)0) {
258 			end = ~(phys_addr_t)0;
259 			goto resv_iova;
260 		}
261 	}
262 
263 	return 0;
264 }
265 
266 static int iova_reserve_iommu_regions(struct device *dev,
267 		struct iommu_domain *domain)
268 {
269 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
270 	struct iova_domain *iovad = &cookie->iovad;
271 	struct iommu_resv_region *region;
272 	LIST_HEAD(resv_regions);
273 	int ret = 0;
274 
275 	if (dev_is_pci(dev)) {
276 		ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
277 		if (ret)
278 			return ret;
279 	}
280 
281 	iommu_get_resv_regions(dev, &resv_regions);
282 	list_for_each_entry(region, &resv_regions, list) {
283 		unsigned long lo, hi;
284 
285 		/* We ARE the software that manages these! */
286 		if (region->type == IOMMU_RESV_SW_MSI)
287 			continue;
288 
289 		lo = iova_pfn(iovad, region->start);
290 		hi = iova_pfn(iovad, region->start + region->length - 1);
291 		reserve_iova(iovad, lo, hi);
292 
293 		if (region->type == IOMMU_RESV_MSI)
294 			ret = cookie_init_hw_msi_region(cookie, region->start,
295 					region->start + region->length);
296 		if (ret)
297 			break;
298 	}
299 	iommu_put_resv_regions(dev, &resv_regions);
300 
301 	return ret;
302 }
303 
304 static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
305 {
306 	struct iommu_dma_cookie *cookie;
307 	struct iommu_domain *domain;
308 
309 	cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
310 	domain = cookie->fq_domain;
311 
312 	domain->ops->flush_iotlb_all(domain);
313 }
314 
315 static bool dev_is_untrusted(struct device *dev)
316 {
317 	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
318 }
319 
320 /* sysfs updates are serialised by the mutex of the group owning @domain */
321 int iommu_dma_init_fq(struct iommu_domain *domain)
322 {
323 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
324 	int ret;
325 
326 	if (cookie->fq_domain)
327 		return 0;
328 
329 	ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all,
330 				    iommu_dma_entry_dtor);
331 	if (ret) {
332 		pr_warn("iova flush queue initialization failed\n");
333 		return ret;
334 	}
335 	/*
336 	 * Prevent incomplete iovad->fq being observable. Pairs with path from
337 	 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
338 	 */
339 	smp_wmb();
340 	WRITE_ONCE(cookie->fq_domain, domain);
341 	return 0;
342 }
343 
344 /**
345  * iommu_dma_init_domain - Initialise a DMA mapping domain
346  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
347  * @base: IOVA at which the mappable address space starts
348  * @limit: Last address of the IOVA space
349  * @dev: Device the domain is being initialised for
350  *
351  * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
352  * avoid rounding surprises. If necessary, we reserve the page at address 0
353  * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
354  * any change which could make prior IOVAs invalid will fail.
355  */
356 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
357 				 dma_addr_t limit, struct device *dev)
358 {
359 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
360 	unsigned long order, base_pfn;
361 	struct iova_domain *iovad;
362 
363 	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
364 		return -EINVAL;
365 
366 	iovad = &cookie->iovad;
367 
368 	/* Use the smallest supported page size for IOVA granularity */
369 	order = __ffs(domain->pgsize_bitmap);
370 	base_pfn = max_t(unsigned long, 1, base >> order);
371 
372 	/* Check the domain allows at least some access to the device... */
373 	if (domain->geometry.force_aperture) {
374 		if (base > domain->geometry.aperture_end ||
375 		    limit < domain->geometry.aperture_start) {
376 			pr_warn("specified DMA range outside IOMMU capability\n");
377 			return -EFAULT;
378 		}
379 		/* ...then finally give it a kicking to make sure it fits */
380 		base_pfn = max_t(unsigned long, base_pfn,
381 				domain->geometry.aperture_start >> order);
382 	}
383 
384 	/* start_pfn is always nonzero for an already-initialised domain */
385 	if (iovad->start_pfn) {
386 		if (1UL << order != iovad->granule ||
387 		    base_pfn != iovad->start_pfn) {
388 			pr_warn("Incompatible range for DMA domain\n");
389 			return -EFAULT;
390 		}
391 
392 		return 0;
393 	}
394 
395 	init_iova_domain(iovad, 1UL << order, base_pfn);
396 
397 	/* If the FQ fails we can simply fall back to strict mode */
398 	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
399 		domain->type = IOMMU_DOMAIN_DMA;
400 
401 	return iova_reserve_iommu_regions(dev, domain);
402 }
403 
404 /**
405  * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
406  *                    page flags.
407  * @dir: Direction of DMA transfer
408  * @coherent: Is the DMA master cache-coherent?
409  * @attrs: DMA attributes for the mapping
410  *
411  * Return: corresponding IOMMU API page protection flags
412  */
413 static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
414 		     unsigned long attrs)
415 {
416 	int prot = coherent ? IOMMU_CACHE : 0;
417 
418 	if (attrs & DMA_ATTR_PRIVILEGED)
419 		prot |= IOMMU_PRIV;
420 
421 	switch (dir) {
422 	case DMA_BIDIRECTIONAL:
423 		return prot | IOMMU_READ | IOMMU_WRITE;
424 	case DMA_TO_DEVICE:
425 		return prot | IOMMU_READ;
426 	case DMA_FROM_DEVICE:
427 		return prot | IOMMU_WRITE;
428 	default:
429 		return 0;
430 	}
431 }
432 
433 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
434 		size_t size, u64 dma_limit, struct device *dev)
435 {
436 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
437 	struct iova_domain *iovad = &cookie->iovad;
438 	unsigned long shift, iova_len, iova = 0;
439 
440 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
441 		cookie->msi_iova += size;
442 		return cookie->msi_iova - size;
443 	}
444 
445 	shift = iova_shift(iovad);
446 	iova_len = size >> shift;
447 	/*
448 	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
449 	 * will come back to bite us badly, so we have to waste a bit of space
450 	 * rounding up anything cacheable to make sure that can't happen. The
451 	 * order of the unadjusted size will still match upon freeing.
452 	 */
453 	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
454 		iova_len = roundup_pow_of_two(iova_len);
455 
456 	dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
457 
458 	if (domain->geometry.force_aperture)
459 		dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
460 
461 	/* Try to get PCI devices a SAC address */
462 	if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
463 		iova = alloc_iova_fast(iovad, iova_len,
464 				       DMA_BIT_MASK(32) >> shift, false);
465 
466 	if (!iova)
467 		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
468 				       true);
469 
470 	return (dma_addr_t)iova << shift;
471 }
472 
473 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
474 		dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
475 {
476 	struct iova_domain *iovad = &cookie->iovad;
477 
478 	/* The MSI case is only ever cleaning up its most recent allocation */
479 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
480 		cookie->msi_iova -= size;
481 	else if (gather && gather->queued)
482 		queue_iova(iovad, iova_pfn(iovad, iova),
483 				size >> iova_shift(iovad),
484 				(unsigned long)gather->freelist);
485 	else
486 		free_iova_fast(iovad, iova_pfn(iovad, iova),
487 				size >> iova_shift(iovad));
488 }
489 
490 static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
491 		size_t size)
492 {
493 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
494 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
495 	struct iova_domain *iovad = &cookie->iovad;
496 	size_t iova_off = iova_offset(iovad, dma_addr);
497 	struct iommu_iotlb_gather iotlb_gather;
498 	size_t unmapped;
499 
500 	dma_addr -= iova_off;
501 	size = iova_align(iovad, size + iova_off);
502 	iommu_iotlb_gather_init(&iotlb_gather);
503 	iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
504 
505 	unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
506 	WARN_ON(unmapped != size);
507 
508 	if (!iotlb_gather.queued)
509 		iommu_iotlb_sync(domain, &iotlb_gather);
510 	iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
511 }
512 
513 static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
514 		size_t size, enum dma_data_direction dir,
515 		unsigned long attrs)
516 {
517 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
518 	phys_addr_t phys;
519 
520 	phys = iommu_iova_to_phys(domain, dma_addr);
521 	if (WARN_ON(!phys))
522 		return;
523 
524 	__iommu_dma_unmap(dev, dma_addr, size);
525 
526 	if (unlikely(is_swiotlb_buffer(dev, phys)))
527 		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
528 }
529 
530 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
531 		size_t size, int prot, u64 dma_mask)
532 {
533 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
534 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
535 	struct iova_domain *iovad = &cookie->iovad;
536 	size_t iova_off = iova_offset(iovad, phys);
537 	dma_addr_t iova;
538 
539 	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
540 	    iommu_deferred_attach(dev, domain))
541 		return DMA_MAPPING_ERROR;
542 
543 	size = iova_align(iovad, size + iova_off);
544 
545 	iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
546 	if (!iova)
547 		return DMA_MAPPING_ERROR;
548 
549 	if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
550 		iommu_dma_free_iova(cookie, iova, size, NULL);
551 		return DMA_MAPPING_ERROR;
552 	}
553 	return iova + iova_off;
554 }
555 
556 static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
557 		size_t org_size, dma_addr_t dma_mask, bool coherent,
558 		enum dma_data_direction dir, unsigned long attrs)
559 {
560 	int prot = dma_info_to_prot(dir, coherent, attrs);
561 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
562 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
563 	struct iova_domain *iovad = &cookie->iovad;
564 	size_t aligned_size = org_size;
565 	void *padding_start;
566 	size_t padding_size;
567 	dma_addr_t iova;
568 
569 	/*
570 	 * If both the physical buffer start address and size are
571 	 * page aligned, we don't need to use a bounce page.
572 	 */
573 	if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
574 	    iova_offset(iovad, phys | org_size)) {
575 		aligned_size = iova_align(iovad, org_size);
576 		phys = swiotlb_tbl_map_single(dev, phys, org_size,
577 					      aligned_size, dir, attrs);
578 
579 		if (phys == DMA_MAPPING_ERROR)
580 			return DMA_MAPPING_ERROR;
581 
582 		/* Cleanup the padding area. */
583 		padding_start = phys_to_virt(phys);
584 		padding_size = aligned_size;
585 
586 		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
587 		    (dir == DMA_TO_DEVICE ||
588 		     dir == DMA_BIDIRECTIONAL)) {
589 			padding_start += org_size;
590 			padding_size -= org_size;
591 		}
592 
593 		memset(padding_start, 0, padding_size);
594 	}
595 
596 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
597 	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
598 		swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
599 	return iova;
600 }
601 
602 static void __iommu_dma_free_pages(struct page **pages, int count)
603 {
604 	while (count--)
605 		__free_page(pages[count]);
606 	kvfree(pages);
607 }
608 
609 static struct page **__iommu_dma_alloc_pages(struct device *dev,
610 		unsigned int count, unsigned long order_mask, gfp_t gfp)
611 {
612 	struct page **pages;
613 	unsigned int i = 0, nid = dev_to_node(dev);
614 
615 	order_mask &= (2U << MAX_ORDER) - 1;
616 	if (!order_mask)
617 		return NULL;
618 
619 	pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL);
620 	if (!pages)
621 		return NULL;
622 
623 	/* IOMMU can map any pages, so himem can also be used here */
624 	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
625 
626 	/* It makes no sense to muck about with huge pages */
627 	gfp &= ~__GFP_COMP;
628 
629 	while (count) {
630 		struct page *page = NULL;
631 		unsigned int order_size;
632 
633 		/*
634 		 * Higher-order allocations are a convenience rather
635 		 * than a necessity, hence using __GFP_NORETRY until
636 		 * falling back to minimum-order allocations.
637 		 */
638 		for (order_mask &= (2U << __fls(count)) - 1;
639 		     order_mask; order_mask &= ~order_size) {
640 			unsigned int order = __fls(order_mask);
641 			gfp_t alloc_flags = gfp;
642 
643 			order_size = 1U << order;
644 			if (order_mask > order_size)
645 				alloc_flags |= __GFP_NORETRY;
646 			page = alloc_pages_node(nid, alloc_flags, order);
647 			if (!page)
648 				continue;
649 			if (order)
650 				split_page(page, order);
651 			break;
652 		}
653 		if (!page) {
654 			__iommu_dma_free_pages(pages, i);
655 			return NULL;
656 		}
657 		count -= order_size;
658 		while (order_size--)
659 			pages[i++] = page++;
660 	}
661 	return pages;
662 }
663 
664 /*
665  * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
666  * but an IOMMU which supports smaller pages might not map the whole thing.
667  */
668 static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
669 		size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
670 		unsigned long attrs)
671 {
672 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
673 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
674 	struct iova_domain *iovad = &cookie->iovad;
675 	bool coherent = dev_is_dma_coherent(dev);
676 	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
677 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
678 	struct page **pages;
679 	dma_addr_t iova;
680 
681 	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
682 	    iommu_deferred_attach(dev, domain))
683 		return NULL;
684 
685 	min_size = alloc_sizes & -alloc_sizes;
686 	if (min_size < PAGE_SIZE) {
687 		min_size = PAGE_SIZE;
688 		alloc_sizes |= PAGE_SIZE;
689 	} else {
690 		size = ALIGN(size, min_size);
691 	}
692 	if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
693 		alloc_sizes = min_size;
694 
695 	count = PAGE_ALIGN(size) >> PAGE_SHIFT;
696 	pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
697 					gfp);
698 	if (!pages)
699 		return NULL;
700 
701 	size = iova_align(iovad, size);
702 	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
703 	if (!iova)
704 		goto out_free_pages;
705 
706 	if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
707 		goto out_free_iova;
708 
709 	if (!(ioprot & IOMMU_CACHE)) {
710 		struct scatterlist *sg;
711 		int i;
712 
713 		for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
714 			arch_dma_prep_coherent(sg_page(sg), sg->length);
715 	}
716 
717 	if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot)
718 			< size)
719 		goto out_free_sg;
720 
721 	sgt->sgl->dma_address = iova;
722 	sgt->sgl->dma_length = size;
723 	return pages;
724 
725 out_free_sg:
726 	sg_free_table(sgt);
727 out_free_iova:
728 	iommu_dma_free_iova(cookie, iova, size, NULL);
729 out_free_pages:
730 	__iommu_dma_free_pages(pages, count);
731 	return NULL;
732 }
733 
734 static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
735 		dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
736 		unsigned long attrs)
737 {
738 	struct page **pages;
739 	struct sg_table sgt;
740 	void *vaddr;
741 
742 	pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
743 						attrs);
744 	if (!pages)
745 		return NULL;
746 	*dma_handle = sgt.sgl->dma_address;
747 	sg_free_table(&sgt);
748 	vaddr = dma_common_pages_remap(pages, size, prot,
749 			__builtin_return_address(0));
750 	if (!vaddr)
751 		goto out_unmap;
752 	return vaddr;
753 
754 out_unmap:
755 	__iommu_dma_unmap(dev, *dma_handle, size);
756 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
757 	return NULL;
758 }
759 
760 #ifdef CONFIG_DMA_REMAP
761 static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
762 		size_t size, enum dma_data_direction dir, gfp_t gfp,
763 		unsigned long attrs)
764 {
765 	struct dma_sgt_handle *sh;
766 
767 	sh = kmalloc(sizeof(*sh), gfp);
768 	if (!sh)
769 		return NULL;
770 
771 	sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
772 						    PAGE_KERNEL, attrs);
773 	if (!sh->pages) {
774 		kfree(sh);
775 		return NULL;
776 	}
777 	return &sh->sgt;
778 }
779 
780 static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
781 		struct sg_table *sgt, enum dma_data_direction dir)
782 {
783 	struct dma_sgt_handle *sh = sgt_handle(sgt);
784 
785 	__iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
786 	__iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
787 	sg_free_table(&sh->sgt);
788 	kfree(sh);
789 }
790 #endif /* CONFIG_DMA_REMAP */
791 
792 static void iommu_dma_sync_single_for_cpu(struct device *dev,
793 		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
794 {
795 	phys_addr_t phys;
796 
797 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
798 		return;
799 
800 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
801 	if (!dev_is_dma_coherent(dev))
802 		arch_sync_dma_for_cpu(phys, size, dir);
803 
804 	if (is_swiotlb_buffer(dev, phys))
805 		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
806 }
807 
808 static void iommu_dma_sync_single_for_device(struct device *dev,
809 		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
810 {
811 	phys_addr_t phys;
812 
813 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
814 		return;
815 
816 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
817 	if (is_swiotlb_buffer(dev, phys))
818 		swiotlb_sync_single_for_device(dev, phys, size, dir);
819 
820 	if (!dev_is_dma_coherent(dev))
821 		arch_sync_dma_for_device(phys, size, dir);
822 }
823 
824 static void iommu_dma_sync_sg_for_cpu(struct device *dev,
825 		struct scatterlist *sgl, int nelems,
826 		enum dma_data_direction dir)
827 {
828 	struct scatterlist *sg;
829 	int i;
830 
831 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
832 		return;
833 
834 	for_each_sg(sgl, sg, nelems, i) {
835 		if (!dev_is_dma_coherent(dev))
836 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
837 
838 		if (is_swiotlb_buffer(dev, sg_phys(sg)))
839 			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
840 						    sg->length, dir);
841 	}
842 }
843 
844 static void iommu_dma_sync_sg_for_device(struct device *dev,
845 		struct scatterlist *sgl, int nelems,
846 		enum dma_data_direction dir)
847 {
848 	struct scatterlist *sg;
849 	int i;
850 
851 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
852 		return;
853 
854 	for_each_sg(sgl, sg, nelems, i) {
855 		if (is_swiotlb_buffer(dev, sg_phys(sg)))
856 			swiotlb_sync_single_for_device(dev, sg_phys(sg),
857 						       sg->length, dir);
858 
859 		if (!dev_is_dma_coherent(dev))
860 			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
861 	}
862 }
863 
864 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
865 		unsigned long offset, size_t size, enum dma_data_direction dir,
866 		unsigned long attrs)
867 {
868 	phys_addr_t phys = page_to_phys(page) + offset;
869 	bool coherent = dev_is_dma_coherent(dev);
870 	dma_addr_t dma_handle;
871 
872 	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
873 			coherent, dir, attrs);
874 	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
875 	    dma_handle != DMA_MAPPING_ERROR)
876 		arch_sync_dma_for_device(phys, size, dir);
877 	return dma_handle;
878 }
879 
880 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
881 		size_t size, enum dma_data_direction dir, unsigned long attrs)
882 {
883 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
884 		iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
885 	__iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
886 }
887 
888 /*
889  * Prepare a successfully-mapped scatterlist to give back to the caller.
890  *
891  * At this point the segments are already laid out by iommu_dma_map_sg() to
892  * avoid individually crossing any boundaries, so we merely need to check a
893  * segment's start address to avoid concatenating across one.
894  */
895 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
896 		dma_addr_t dma_addr)
897 {
898 	struct scatterlist *s, *cur = sg;
899 	unsigned long seg_mask = dma_get_seg_boundary(dev);
900 	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
901 	int i, count = 0;
902 
903 	for_each_sg(sg, s, nents, i) {
904 		/* Restore this segment's original unaligned fields first */
905 		unsigned int s_iova_off = sg_dma_address(s);
906 		unsigned int s_length = sg_dma_len(s);
907 		unsigned int s_iova_len = s->length;
908 
909 		s->offset += s_iova_off;
910 		s->length = s_length;
911 		sg_dma_address(s) = DMA_MAPPING_ERROR;
912 		sg_dma_len(s) = 0;
913 
914 		/*
915 		 * Now fill in the real DMA data. If...
916 		 * - there is a valid output segment to append to
917 		 * - and this segment starts on an IOVA page boundary
918 		 * - but doesn't fall at a segment boundary
919 		 * - and wouldn't make the resulting output segment too long
920 		 */
921 		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
922 		    (max_len - cur_len >= s_length)) {
923 			/* ...then concatenate it with the previous one */
924 			cur_len += s_length;
925 		} else {
926 			/* Otherwise start the next output segment */
927 			if (i > 0)
928 				cur = sg_next(cur);
929 			cur_len = s_length;
930 			count++;
931 
932 			sg_dma_address(cur) = dma_addr + s_iova_off;
933 		}
934 
935 		sg_dma_len(cur) = cur_len;
936 		dma_addr += s_iova_len;
937 
938 		if (s_length + s_iova_off < s_iova_len)
939 			cur_len = 0;
940 	}
941 	return count;
942 }
943 
944 /*
945  * If mapping failed, then just restore the original list,
946  * but making sure the DMA fields are invalidated.
947  */
948 static void __invalidate_sg(struct scatterlist *sg, int nents)
949 {
950 	struct scatterlist *s;
951 	int i;
952 
953 	for_each_sg(sg, s, nents, i) {
954 		if (sg_dma_address(s) != DMA_MAPPING_ERROR)
955 			s->offset += sg_dma_address(s);
956 		if (sg_dma_len(s))
957 			s->length = sg_dma_len(s);
958 		sg_dma_address(s) = DMA_MAPPING_ERROR;
959 		sg_dma_len(s) = 0;
960 	}
961 }
962 
963 static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
964 		int nents, enum dma_data_direction dir, unsigned long attrs)
965 {
966 	struct scatterlist *s;
967 	int i;
968 
969 	for_each_sg(sg, s, nents, i)
970 		__iommu_dma_unmap_swiotlb(dev, sg_dma_address(s),
971 				sg_dma_len(s), dir, attrs);
972 }
973 
974 static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
975 		int nents, enum dma_data_direction dir, unsigned long attrs)
976 {
977 	struct scatterlist *s;
978 	int i;
979 
980 	for_each_sg(sg, s, nents, i) {
981 		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
982 				s->length, dma_get_mask(dev),
983 				dev_is_dma_coherent(dev), dir, attrs);
984 		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
985 			goto out_unmap;
986 		sg_dma_len(s) = s->length;
987 	}
988 
989 	return nents;
990 
991 out_unmap:
992 	iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
993 	return -EIO;
994 }
995 
996 /*
997  * The DMA API client is passing in a scatterlist which could describe
998  * any old buffer layout, but the IOMMU API requires everything to be
999  * aligned to IOMMU pages. Hence the need for this complicated bit of
1000  * impedance-matching, to be able to hand off a suitably-aligned list,
1001  * but still preserve the original offsets and sizes for the caller.
1002  */
1003 static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
1004 		int nents, enum dma_data_direction dir, unsigned long attrs)
1005 {
1006 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
1007 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
1008 	struct iova_domain *iovad = &cookie->iovad;
1009 	struct scatterlist *s, *prev = NULL;
1010 	int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
1011 	dma_addr_t iova;
1012 	size_t iova_len = 0;
1013 	unsigned long mask = dma_get_seg_boundary(dev);
1014 	ssize_t ret;
1015 	int i;
1016 
1017 	if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
1018 		ret = iommu_deferred_attach(dev, domain);
1019 		goto out;
1020 	}
1021 
1022 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1023 		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
1024 
1025 	if (dev_is_untrusted(dev))
1026 		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
1027 
1028 	/*
1029 	 * Work out how much IOVA space we need, and align the segments to
1030 	 * IOVA granules for the IOMMU driver to handle. With some clever
1031 	 * trickery we can modify the list in-place, but reversibly, by
1032 	 * stashing the unaligned parts in the as-yet-unused DMA fields.
1033 	 */
1034 	for_each_sg(sg, s, nents, i) {
1035 		size_t s_iova_off = iova_offset(iovad, s->offset);
1036 		size_t s_length = s->length;
1037 		size_t pad_len = (mask - iova_len + 1) & mask;
1038 
1039 		sg_dma_address(s) = s_iova_off;
1040 		sg_dma_len(s) = s_length;
1041 		s->offset -= s_iova_off;
1042 		s_length = iova_align(iovad, s_length + s_iova_off);
1043 		s->length = s_length;
1044 
1045 		/*
1046 		 * Due to the alignment of our single IOVA allocation, we can
1047 		 * depend on these assumptions about the segment boundary mask:
1048 		 * - If mask size >= IOVA size, then the IOVA range cannot
1049 		 *   possibly fall across a boundary, so we don't care.
1050 		 * - If mask size < IOVA size, then the IOVA range must start
1051 		 *   exactly on a boundary, therefore we can lay things out
1052 		 *   based purely on segment lengths without needing to know
1053 		 *   the actual addresses beforehand.
1054 		 * - The mask must be a power of 2, so pad_len == 0 if
1055 		 *   iova_len == 0, thus we cannot dereference prev the first
1056 		 *   time through here (i.e. before it has a meaningful value).
1057 		 */
1058 		if (pad_len && pad_len < s_length - 1) {
1059 			prev->length += pad_len;
1060 			iova_len += pad_len;
1061 		}
1062 
1063 		iova_len += s_length;
1064 		prev = s;
1065 	}
1066 
1067 	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
1068 	if (!iova) {
1069 		ret = -ENOMEM;
1070 		goto out_restore_sg;
1071 	}
1072 
1073 	/*
1074 	 * We'll leave any physical concatenation to the IOMMU driver's
1075 	 * implementation - it knows better than we do.
1076 	 */
1077 	ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
1078 	if (ret < iova_len)
1079 		goto out_free_iova;
1080 
1081 	return __finalise_sg(dev, sg, nents, iova);
1082 
1083 out_free_iova:
1084 	iommu_dma_free_iova(cookie, iova, iova_len, NULL);
1085 out_restore_sg:
1086 	__invalidate_sg(sg, nents);
1087 out:
1088 	if (ret != -ENOMEM)
1089 		return -EINVAL;
1090 	return ret;
1091 }
1092 
1093 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
1094 		int nents, enum dma_data_direction dir, unsigned long attrs)
1095 {
1096 	dma_addr_t start, end;
1097 	struct scatterlist *tmp;
1098 	int i;
1099 
1100 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1101 		iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
1102 
1103 	if (dev_is_untrusted(dev)) {
1104 		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
1105 		return;
1106 	}
1107 
1108 	/*
1109 	 * The scatterlist segments are mapped into a single
1110 	 * contiguous IOVA allocation, so this is incredibly easy.
1111 	 */
1112 	start = sg_dma_address(sg);
1113 	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
1114 		if (sg_dma_len(tmp) == 0)
1115 			break;
1116 		sg = tmp;
1117 	}
1118 	end = sg_dma_address(sg) + sg_dma_len(sg);
1119 	__iommu_dma_unmap(dev, start, end - start);
1120 }
1121 
1122 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
1123 		size_t size, enum dma_data_direction dir, unsigned long attrs)
1124 {
1125 	return __iommu_dma_map(dev, phys, size,
1126 			dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
1127 			dma_get_mask(dev));
1128 }
1129 
1130 static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
1131 		size_t size, enum dma_data_direction dir, unsigned long attrs)
1132 {
1133 	__iommu_dma_unmap(dev, handle, size);
1134 }
1135 
1136 static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
1137 {
1138 	size_t alloc_size = PAGE_ALIGN(size);
1139 	int count = alloc_size >> PAGE_SHIFT;
1140 	struct page *page = NULL, **pages = NULL;
1141 
1142 	/* Non-coherent atomic allocation? Easy */
1143 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1144 	    dma_free_from_pool(dev, cpu_addr, alloc_size))
1145 		return;
1146 
1147 	if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1148 		/*
1149 		 * If it the address is remapped, then it's either non-coherent
1150 		 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
1151 		 */
1152 		pages = dma_common_find_pages(cpu_addr);
1153 		if (!pages)
1154 			page = vmalloc_to_page(cpu_addr);
1155 		dma_common_free_remap(cpu_addr, alloc_size);
1156 	} else {
1157 		/* Lowmem means a coherent atomic or CMA allocation */
1158 		page = virt_to_page(cpu_addr);
1159 	}
1160 
1161 	if (pages)
1162 		__iommu_dma_free_pages(pages, count);
1163 	if (page)
1164 		dma_free_contiguous(dev, page, alloc_size);
1165 }
1166 
1167 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
1168 		dma_addr_t handle, unsigned long attrs)
1169 {
1170 	__iommu_dma_unmap(dev, handle, size);
1171 	__iommu_dma_free(dev, size, cpu_addr);
1172 }
1173 
1174 static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
1175 		struct page **pagep, gfp_t gfp, unsigned long attrs)
1176 {
1177 	bool coherent = dev_is_dma_coherent(dev);
1178 	size_t alloc_size = PAGE_ALIGN(size);
1179 	int node = dev_to_node(dev);
1180 	struct page *page = NULL;
1181 	void *cpu_addr;
1182 
1183 	page = dma_alloc_contiguous(dev, alloc_size, gfp);
1184 	if (!page)
1185 		page = alloc_pages_node(node, gfp, get_order(alloc_size));
1186 	if (!page)
1187 		return NULL;
1188 
1189 	if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) {
1190 		pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
1191 
1192 		cpu_addr = dma_common_contiguous_remap(page, alloc_size,
1193 				prot, __builtin_return_address(0));
1194 		if (!cpu_addr)
1195 			goto out_free_pages;
1196 
1197 		if (!coherent)
1198 			arch_dma_prep_coherent(page, size);
1199 	} else {
1200 		cpu_addr = page_address(page);
1201 	}
1202 
1203 	*pagep = page;
1204 	memset(cpu_addr, 0, alloc_size);
1205 	return cpu_addr;
1206 out_free_pages:
1207 	dma_free_contiguous(dev, page, alloc_size);
1208 	return NULL;
1209 }
1210 
1211 static void *iommu_dma_alloc(struct device *dev, size_t size,
1212 		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1213 {
1214 	bool coherent = dev_is_dma_coherent(dev);
1215 	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
1216 	struct page *page = NULL;
1217 	void *cpu_addr;
1218 
1219 	gfp |= __GFP_ZERO;
1220 
1221 	if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) &&
1222 	    !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
1223 		return iommu_dma_alloc_remap(dev, size, handle, gfp,
1224 				dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
1225 	}
1226 
1227 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1228 	    !gfpflags_allow_blocking(gfp) && !coherent)
1229 		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
1230 					       gfp, NULL);
1231 	else
1232 		cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
1233 	if (!cpu_addr)
1234 		return NULL;
1235 
1236 	*handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
1237 			dev->coherent_dma_mask);
1238 	if (*handle == DMA_MAPPING_ERROR) {
1239 		__iommu_dma_free(dev, size, cpu_addr);
1240 		return NULL;
1241 	}
1242 
1243 	return cpu_addr;
1244 }
1245 
1246 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
1247 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
1248 		unsigned long attrs)
1249 {
1250 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1251 	unsigned long pfn, off = vma->vm_pgoff;
1252 	int ret;
1253 
1254 	vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
1255 
1256 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
1257 		return ret;
1258 
1259 	if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
1260 		return -ENXIO;
1261 
1262 	if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1263 		struct page **pages = dma_common_find_pages(cpu_addr);
1264 
1265 		if (pages)
1266 			return vm_map_pages(vma, pages, nr_pages);
1267 		pfn = vmalloc_to_pfn(cpu_addr);
1268 	} else {
1269 		pfn = page_to_pfn(virt_to_page(cpu_addr));
1270 	}
1271 
1272 	return remap_pfn_range(vma, vma->vm_start, pfn + off,
1273 			       vma->vm_end - vma->vm_start,
1274 			       vma->vm_page_prot);
1275 }
1276 
1277 static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1278 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
1279 		unsigned long attrs)
1280 {
1281 	struct page *page;
1282 	int ret;
1283 
1284 	if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1285 		struct page **pages = dma_common_find_pages(cpu_addr);
1286 
1287 		if (pages) {
1288 			return sg_alloc_table_from_pages(sgt, pages,
1289 					PAGE_ALIGN(size) >> PAGE_SHIFT,
1290 					0, size, GFP_KERNEL);
1291 		}
1292 
1293 		page = vmalloc_to_page(cpu_addr);
1294 	} else {
1295 		page = virt_to_page(cpu_addr);
1296 	}
1297 
1298 	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
1299 	if (!ret)
1300 		sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
1301 	return ret;
1302 }
1303 
1304 static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
1305 {
1306 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
1307 
1308 	return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
1309 }
1310 
1311 static const struct dma_map_ops iommu_dma_ops = {
1312 	.alloc			= iommu_dma_alloc,
1313 	.free			= iommu_dma_free,
1314 	.alloc_pages		= dma_common_alloc_pages,
1315 	.free_pages		= dma_common_free_pages,
1316 #ifdef CONFIG_DMA_REMAP
1317 	.alloc_noncontiguous	= iommu_dma_alloc_noncontiguous,
1318 	.free_noncontiguous	= iommu_dma_free_noncontiguous,
1319 #endif
1320 	.mmap			= iommu_dma_mmap,
1321 	.get_sgtable		= iommu_dma_get_sgtable,
1322 	.map_page		= iommu_dma_map_page,
1323 	.unmap_page		= iommu_dma_unmap_page,
1324 	.map_sg			= iommu_dma_map_sg,
1325 	.unmap_sg		= iommu_dma_unmap_sg,
1326 	.sync_single_for_cpu	= iommu_dma_sync_single_for_cpu,
1327 	.sync_single_for_device	= iommu_dma_sync_single_for_device,
1328 	.sync_sg_for_cpu	= iommu_dma_sync_sg_for_cpu,
1329 	.sync_sg_for_device	= iommu_dma_sync_sg_for_device,
1330 	.map_resource		= iommu_dma_map_resource,
1331 	.unmap_resource		= iommu_dma_unmap_resource,
1332 	.get_merge_boundary	= iommu_dma_get_merge_boundary,
1333 };
1334 
1335 /*
1336  * The IOMMU core code allocates the default DMA domain, which the underlying
1337  * IOMMU driver needs to support via the dma-iommu layer.
1338  */
1339 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
1340 {
1341 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1342 
1343 	if (!domain)
1344 		goto out_err;
1345 
1346 	/*
1347 	 * The IOMMU core code allocates the default DMA domain, which the
1348 	 * underlying IOMMU driver needs to support via the dma-iommu layer.
1349 	 */
1350 	if (iommu_is_dma_domain(domain)) {
1351 		if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
1352 			goto out_err;
1353 		dev->dma_ops = &iommu_dma_ops;
1354 	}
1355 
1356 	return;
1357 out_err:
1358 	 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
1359 		 dev_name(dev));
1360 }
1361 EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
1362 
1363 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
1364 		phys_addr_t msi_addr, struct iommu_domain *domain)
1365 {
1366 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
1367 	struct iommu_dma_msi_page *msi_page;
1368 	dma_addr_t iova;
1369 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1370 	size_t size = cookie_msi_granule(cookie);
1371 
1372 	msi_addr &= ~(phys_addr_t)(size - 1);
1373 	list_for_each_entry(msi_page, &cookie->msi_page_list, list)
1374 		if (msi_page->phys == msi_addr)
1375 			return msi_page;
1376 
1377 	msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
1378 	if (!msi_page)
1379 		return NULL;
1380 
1381 	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
1382 	if (!iova)
1383 		goto out_free_page;
1384 
1385 	if (iommu_map(domain, iova, msi_addr, size, prot))
1386 		goto out_free_iova;
1387 
1388 	INIT_LIST_HEAD(&msi_page->list);
1389 	msi_page->phys = msi_addr;
1390 	msi_page->iova = iova;
1391 	list_add(&msi_page->list, &cookie->msi_page_list);
1392 	return msi_page;
1393 
1394 out_free_iova:
1395 	iommu_dma_free_iova(cookie, iova, size, NULL);
1396 out_free_page:
1397 	kfree(msi_page);
1398 	return NULL;
1399 }
1400 
1401 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
1402 {
1403 	struct device *dev = msi_desc_to_dev(desc);
1404 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1405 	struct iommu_dma_msi_page *msi_page;
1406 	static DEFINE_MUTEX(msi_prepare_lock); /* see below */
1407 
1408 	if (!domain || !domain->iova_cookie) {
1409 		desc->iommu_cookie = NULL;
1410 		return 0;
1411 	}
1412 
1413 	/*
1414 	 * In fact the whole prepare operation should already be serialised by
1415 	 * irq_domain_mutex further up the callchain, but that's pretty subtle
1416 	 * on its own, so consider this locking as failsafe documentation...
1417 	 */
1418 	mutex_lock(&msi_prepare_lock);
1419 	msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
1420 	mutex_unlock(&msi_prepare_lock);
1421 
1422 	msi_desc_set_iommu_cookie(desc, msi_page);
1423 
1424 	if (!msi_page)
1425 		return -ENOMEM;
1426 	return 0;
1427 }
1428 
1429 void iommu_dma_compose_msi_msg(struct msi_desc *desc,
1430 			       struct msi_msg *msg)
1431 {
1432 	struct device *dev = msi_desc_to_dev(desc);
1433 	const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1434 	const struct iommu_dma_msi_page *msi_page;
1435 
1436 	msi_page = msi_desc_get_iommu_cookie(desc);
1437 
1438 	if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
1439 		return;
1440 
1441 	msg->address_hi = upper_32_bits(msi_page->iova);
1442 	msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
1443 	msg->address_lo += lower_32_bits(msi_page->iova);
1444 }
1445 
1446 static int iommu_dma_init(void)
1447 {
1448 	if (is_kdump_kernel())
1449 		static_branch_enable(&iommu_deferred_attach_enabled);
1450 
1451 	return iova_cache_get();
1452 }
1453 arch_initcall(iommu_dma_init);
1454