xref: /openbmc/linux/drivers/iommu/dma-iommu.c (revision fca3aa16)
1 /*
2  * A fairly generic DMA-API to IOMMU-API glue layer.
3  *
4  * Copyright (C) 2014-2015 ARM Ltd.
5  *
6  * based in part on arch/arm/mm/dma-mapping.c:
7  * Copyright (C) 2000-2004 Russell King
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <linux/acpi_iort.h>
23 #include <linux/device.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/gfp.h>
26 #include <linux/huge_mm.h>
27 #include <linux/iommu.h>
28 #include <linux/iova.h>
29 #include <linux/irq.h>
30 #include <linux/mm.h>
31 #include <linux/pci.h>
32 #include <linux/scatterlist.h>
33 #include <linux/vmalloc.h>
34 
35 #define IOMMU_MAPPING_ERROR	0
36 
37 struct iommu_dma_msi_page {
38 	struct list_head	list;
39 	dma_addr_t		iova;
40 	phys_addr_t		phys;
41 };
42 
43 enum iommu_dma_cookie_type {
44 	IOMMU_DMA_IOVA_COOKIE,
45 	IOMMU_DMA_MSI_COOKIE,
46 };
47 
48 struct iommu_dma_cookie {
49 	enum iommu_dma_cookie_type	type;
50 	union {
51 		/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
52 		struct iova_domain	iovad;
53 		/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
54 		dma_addr_t		msi_iova;
55 	};
56 	struct list_head		msi_page_list;
57 	spinlock_t			msi_lock;
58 };
59 
60 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
61 {
62 	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
63 		return cookie->iovad.granule;
64 	return PAGE_SIZE;
65 }
66 
67 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
68 {
69 	struct iommu_dma_cookie *cookie;
70 
71 	cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
72 	if (cookie) {
73 		spin_lock_init(&cookie->msi_lock);
74 		INIT_LIST_HEAD(&cookie->msi_page_list);
75 		cookie->type = type;
76 	}
77 	return cookie;
78 }
79 
80 int iommu_dma_init(void)
81 {
82 	return iova_cache_get();
83 }
84 
85 /**
86  * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
87  * @domain: IOMMU domain to prepare for DMA-API usage
88  *
89  * IOMMU drivers should normally call this from their domain_alloc
90  * callback when domain->type == IOMMU_DOMAIN_DMA.
91  */
92 int iommu_get_dma_cookie(struct iommu_domain *domain)
93 {
94 	if (domain->iova_cookie)
95 		return -EEXIST;
96 
97 	domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
98 	if (!domain->iova_cookie)
99 		return -ENOMEM;
100 
101 	return 0;
102 }
103 EXPORT_SYMBOL(iommu_get_dma_cookie);
104 
105 /**
106  * iommu_get_msi_cookie - Acquire just MSI remapping resources
107  * @domain: IOMMU domain to prepare
108  * @base: Start address of IOVA region for MSI mappings
109  *
110  * Users who manage their own IOVA allocation and do not want DMA API support,
111  * but would still like to take advantage of automatic MSI remapping, can use
112  * this to initialise their own domain appropriately. Users should reserve a
113  * contiguous IOVA region, starting at @base, large enough to accommodate the
114  * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
115  * used by the devices attached to @domain.
116  */
117 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
118 {
119 	struct iommu_dma_cookie *cookie;
120 
121 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
122 		return -EINVAL;
123 
124 	if (domain->iova_cookie)
125 		return -EEXIST;
126 
127 	cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
128 	if (!cookie)
129 		return -ENOMEM;
130 
131 	cookie->msi_iova = base;
132 	domain->iova_cookie = cookie;
133 	return 0;
134 }
135 EXPORT_SYMBOL(iommu_get_msi_cookie);
136 
137 /**
138  * iommu_put_dma_cookie - Release a domain's DMA mapping resources
139  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
140  *          iommu_get_msi_cookie()
141  *
142  * IOMMU drivers should normally call this from their domain_free callback.
143  */
144 void iommu_put_dma_cookie(struct iommu_domain *domain)
145 {
146 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
147 	struct iommu_dma_msi_page *msi, *tmp;
148 
149 	if (!cookie)
150 		return;
151 
152 	if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
153 		put_iova_domain(&cookie->iovad);
154 
155 	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
156 		list_del(&msi->list);
157 		kfree(msi);
158 	}
159 	kfree(cookie);
160 	domain->iova_cookie = NULL;
161 }
162 EXPORT_SYMBOL(iommu_put_dma_cookie);
163 
164 /**
165  * iommu_dma_get_resv_regions - Reserved region driver helper
166  * @dev: Device from iommu_get_resv_regions()
167  * @list: Reserved region list from iommu_get_resv_regions()
168  *
169  * IOMMU drivers can use this to implement their .get_resv_regions callback
170  * for general non-IOMMU-specific reservations. Currently, this covers host
171  * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI
172  * based ARM platforms that may require HW MSI reservation.
173  */
174 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
175 {
176 	struct pci_host_bridge *bridge;
177 	struct resource_entry *window;
178 
179 	if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) &&
180 		iort_iommu_msi_get_resv_regions(dev, list) < 0)
181 		return;
182 
183 	if (!dev_is_pci(dev))
184 		return;
185 
186 	bridge = pci_find_host_bridge(to_pci_dev(dev)->bus);
187 	resource_list_for_each_entry(window, &bridge->windows) {
188 		struct iommu_resv_region *region;
189 		phys_addr_t start;
190 		size_t length;
191 
192 		if (resource_type(window->res) != IORESOURCE_MEM)
193 			continue;
194 
195 		start = window->res->start - window->offset;
196 		length = window->res->end - window->res->start + 1;
197 		region = iommu_alloc_resv_region(start, length, 0,
198 				IOMMU_RESV_RESERVED);
199 		if (!region)
200 			return;
201 
202 		list_add_tail(&region->list, list);
203 	}
204 }
205 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
206 
207 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
208 		phys_addr_t start, phys_addr_t end)
209 {
210 	struct iova_domain *iovad = &cookie->iovad;
211 	struct iommu_dma_msi_page *msi_page;
212 	int i, num_pages;
213 
214 	start -= iova_offset(iovad, start);
215 	num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
216 
217 	msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL);
218 	if (!msi_page)
219 		return -ENOMEM;
220 
221 	for (i = 0; i < num_pages; i++) {
222 		msi_page[i].phys = start;
223 		msi_page[i].iova = start;
224 		INIT_LIST_HEAD(&msi_page[i].list);
225 		list_add(&msi_page[i].list, &cookie->msi_page_list);
226 		start += iovad->granule;
227 	}
228 
229 	return 0;
230 }
231 
232 static int iova_reserve_iommu_regions(struct device *dev,
233 		struct iommu_domain *domain)
234 {
235 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
236 	struct iova_domain *iovad = &cookie->iovad;
237 	struct iommu_resv_region *region;
238 	LIST_HEAD(resv_regions);
239 	int ret = 0;
240 
241 	iommu_get_resv_regions(dev, &resv_regions);
242 	list_for_each_entry(region, &resv_regions, list) {
243 		unsigned long lo, hi;
244 
245 		/* We ARE the software that manages these! */
246 		if (region->type == IOMMU_RESV_SW_MSI)
247 			continue;
248 
249 		lo = iova_pfn(iovad, region->start);
250 		hi = iova_pfn(iovad, region->start + region->length - 1);
251 		reserve_iova(iovad, lo, hi);
252 
253 		if (region->type == IOMMU_RESV_MSI)
254 			ret = cookie_init_hw_msi_region(cookie, region->start,
255 					region->start + region->length);
256 		if (ret)
257 			break;
258 	}
259 	iommu_put_resv_regions(dev, &resv_regions);
260 
261 	return ret;
262 }
263 
264 /**
265  * iommu_dma_init_domain - Initialise a DMA mapping domain
266  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
267  * @base: IOVA at which the mappable address space starts
268  * @size: Size of IOVA space
269  * @dev: Device the domain is being initialised for
270  *
271  * @base and @size should be exact multiples of IOMMU page granularity to
272  * avoid rounding surprises. If necessary, we reserve the page at address 0
273  * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
274  * any change which could make prior IOVAs invalid will fail.
275  */
276 int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
277 		u64 size, struct device *dev)
278 {
279 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
280 	struct iova_domain *iovad = &cookie->iovad;
281 	unsigned long order, base_pfn, end_pfn;
282 
283 	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
284 		return -EINVAL;
285 
286 	/* Use the smallest supported page size for IOVA granularity */
287 	order = __ffs(domain->pgsize_bitmap);
288 	base_pfn = max_t(unsigned long, 1, base >> order);
289 	end_pfn = (base + size - 1) >> order;
290 
291 	/* Check the domain allows at least some access to the device... */
292 	if (domain->geometry.force_aperture) {
293 		if (base > domain->geometry.aperture_end ||
294 		    base + size <= domain->geometry.aperture_start) {
295 			pr_warn("specified DMA range outside IOMMU capability\n");
296 			return -EFAULT;
297 		}
298 		/* ...then finally give it a kicking to make sure it fits */
299 		base_pfn = max_t(unsigned long, base_pfn,
300 				domain->geometry.aperture_start >> order);
301 	}
302 
303 	/* start_pfn is always nonzero for an already-initialised domain */
304 	if (iovad->start_pfn) {
305 		if (1UL << order != iovad->granule ||
306 		    base_pfn != iovad->start_pfn) {
307 			pr_warn("Incompatible range for DMA domain\n");
308 			return -EFAULT;
309 		}
310 
311 		return 0;
312 	}
313 
314 	init_iova_domain(iovad, 1UL << order, base_pfn);
315 	if (!dev)
316 		return 0;
317 
318 	return iova_reserve_iommu_regions(dev, domain);
319 }
320 EXPORT_SYMBOL(iommu_dma_init_domain);
321 
322 /**
323  * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
324  *                    page flags.
325  * @dir: Direction of DMA transfer
326  * @coherent: Is the DMA master cache-coherent?
327  * @attrs: DMA attributes for the mapping
328  *
329  * Return: corresponding IOMMU API page protection flags
330  */
331 int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
332 		     unsigned long attrs)
333 {
334 	int prot = coherent ? IOMMU_CACHE : 0;
335 
336 	if (attrs & DMA_ATTR_PRIVILEGED)
337 		prot |= IOMMU_PRIV;
338 
339 	switch (dir) {
340 	case DMA_BIDIRECTIONAL:
341 		return prot | IOMMU_READ | IOMMU_WRITE;
342 	case DMA_TO_DEVICE:
343 		return prot | IOMMU_READ;
344 	case DMA_FROM_DEVICE:
345 		return prot | IOMMU_WRITE;
346 	default:
347 		return 0;
348 	}
349 }
350 
351 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
352 		size_t size, dma_addr_t dma_limit, struct device *dev)
353 {
354 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
355 	struct iova_domain *iovad = &cookie->iovad;
356 	unsigned long shift, iova_len, iova = 0;
357 
358 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
359 		cookie->msi_iova += size;
360 		return cookie->msi_iova - size;
361 	}
362 
363 	shift = iova_shift(iovad);
364 	iova_len = size >> shift;
365 	/*
366 	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
367 	 * will come back to bite us badly, so we have to waste a bit of space
368 	 * rounding up anything cacheable to make sure that can't happen. The
369 	 * order of the unadjusted size will still match upon freeing.
370 	 */
371 	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
372 		iova_len = roundup_pow_of_two(iova_len);
373 
374 	if (domain->geometry.force_aperture)
375 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
376 
377 	/* Try to get PCI devices a SAC address */
378 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
379 		iova = alloc_iova_fast(iovad, iova_len,
380 				       DMA_BIT_MASK(32) >> shift, false);
381 
382 	if (!iova)
383 		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
384 				       true);
385 
386 	return (dma_addr_t)iova << shift;
387 }
388 
389 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
390 		dma_addr_t iova, size_t size)
391 {
392 	struct iova_domain *iovad = &cookie->iovad;
393 
394 	/* The MSI case is only ever cleaning up its most recent allocation */
395 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
396 		cookie->msi_iova -= size;
397 	else
398 		free_iova_fast(iovad, iova_pfn(iovad, iova),
399 				size >> iova_shift(iovad));
400 }
401 
402 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
403 		size_t size)
404 {
405 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
406 	struct iova_domain *iovad = &cookie->iovad;
407 	size_t iova_off = iova_offset(iovad, dma_addr);
408 
409 	dma_addr -= iova_off;
410 	size = iova_align(iovad, size + iova_off);
411 
412 	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
413 	iommu_dma_free_iova(cookie, dma_addr, size);
414 }
415 
416 static void __iommu_dma_free_pages(struct page **pages, int count)
417 {
418 	while (count--)
419 		__free_page(pages[count]);
420 	kvfree(pages);
421 }
422 
423 static struct page **__iommu_dma_alloc_pages(unsigned int count,
424 		unsigned long order_mask, gfp_t gfp)
425 {
426 	struct page **pages;
427 	unsigned int i = 0, array_size = count * sizeof(*pages);
428 
429 	order_mask &= (2U << MAX_ORDER) - 1;
430 	if (!order_mask)
431 		return NULL;
432 
433 	if (array_size <= PAGE_SIZE)
434 		pages = kzalloc(array_size, GFP_KERNEL);
435 	else
436 		pages = vzalloc(array_size);
437 	if (!pages)
438 		return NULL;
439 
440 	/* IOMMU can map any pages, so himem can also be used here */
441 	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
442 
443 	while (count) {
444 		struct page *page = NULL;
445 		unsigned int order_size;
446 
447 		/*
448 		 * Higher-order allocations are a convenience rather
449 		 * than a necessity, hence using __GFP_NORETRY until
450 		 * falling back to minimum-order allocations.
451 		 */
452 		for (order_mask &= (2U << __fls(count)) - 1;
453 		     order_mask; order_mask &= ~order_size) {
454 			unsigned int order = __fls(order_mask);
455 
456 			order_size = 1U << order;
457 			page = alloc_pages((order_mask - order_size) ?
458 					   gfp | __GFP_NORETRY : gfp, order);
459 			if (!page)
460 				continue;
461 			if (!order)
462 				break;
463 			if (!PageCompound(page)) {
464 				split_page(page, order);
465 				break;
466 			} else if (!split_huge_page(page)) {
467 				break;
468 			}
469 			__free_pages(page, order);
470 		}
471 		if (!page) {
472 			__iommu_dma_free_pages(pages, i);
473 			return NULL;
474 		}
475 		count -= order_size;
476 		while (order_size--)
477 			pages[i++] = page++;
478 	}
479 	return pages;
480 }
481 
482 /**
483  * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
484  * @dev: Device which owns this buffer
485  * @pages: Array of buffer pages as returned by iommu_dma_alloc()
486  * @size: Size of buffer in bytes
487  * @handle: DMA address of buffer
488  *
489  * Frees both the pages associated with the buffer, and the array
490  * describing them
491  */
492 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
493 		dma_addr_t *handle)
494 {
495 	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
496 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
497 	*handle = IOMMU_MAPPING_ERROR;
498 }
499 
500 /**
501  * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
502  * @dev: Device to allocate memory for. Must be a real device
503  *	 attached to an iommu_dma_domain
504  * @size: Size of buffer in bytes
505  * @gfp: Allocation flags
506  * @attrs: DMA attributes for this allocation
507  * @prot: IOMMU mapping flags
508  * @handle: Out argument for allocated DMA handle
509  * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
510  *		given VA/PA are visible to the given non-coherent device.
511  *
512  * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
513  * but an IOMMU which supports smaller pages might not map the whole thing.
514  *
515  * Return: Array of struct page pointers describing the buffer,
516  *	   or NULL on failure.
517  */
518 struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
519 		unsigned long attrs, int prot, dma_addr_t *handle,
520 		void (*flush_page)(struct device *, const void *, phys_addr_t))
521 {
522 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
523 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
524 	struct iova_domain *iovad = &cookie->iovad;
525 	struct page **pages;
526 	struct sg_table sgt;
527 	dma_addr_t iova;
528 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
529 
530 	*handle = IOMMU_MAPPING_ERROR;
531 
532 	min_size = alloc_sizes & -alloc_sizes;
533 	if (min_size < PAGE_SIZE) {
534 		min_size = PAGE_SIZE;
535 		alloc_sizes |= PAGE_SIZE;
536 	} else {
537 		size = ALIGN(size, min_size);
538 	}
539 	if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
540 		alloc_sizes = min_size;
541 
542 	count = PAGE_ALIGN(size) >> PAGE_SHIFT;
543 	pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
544 	if (!pages)
545 		return NULL;
546 
547 	size = iova_align(iovad, size);
548 	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
549 	if (!iova)
550 		goto out_free_pages;
551 
552 	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
553 		goto out_free_iova;
554 
555 	if (!(prot & IOMMU_CACHE)) {
556 		struct sg_mapping_iter miter;
557 		/*
558 		 * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
559 		 * sufficient here, so skip it by using the "wrong" direction.
560 		 */
561 		sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
562 		while (sg_miter_next(&miter))
563 			flush_page(dev, miter.addr, page_to_phys(miter.page));
564 		sg_miter_stop(&miter);
565 	}
566 
567 	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
568 			< size)
569 		goto out_free_sg;
570 
571 	*handle = iova;
572 	sg_free_table(&sgt);
573 	return pages;
574 
575 out_free_sg:
576 	sg_free_table(&sgt);
577 out_free_iova:
578 	iommu_dma_free_iova(cookie, iova, size);
579 out_free_pages:
580 	__iommu_dma_free_pages(pages, count);
581 	return NULL;
582 }
583 
584 /**
585  * iommu_dma_mmap - Map a buffer into provided user VMA
586  * @pages: Array representing buffer from iommu_dma_alloc()
587  * @size: Size of buffer in bytes
588  * @vma: VMA describing requested userspace mapping
589  *
590  * Maps the pages of the buffer in @pages into @vma. The caller is responsible
591  * for verifying the correct size and protection of @vma beforehand.
592  */
593 
594 int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
595 {
596 	unsigned long uaddr = vma->vm_start;
597 	unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
598 	int ret = -ENXIO;
599 
600 	for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
601 		ret = vm_insert_page(vma, uaddr, pages[i]);
602 		if (ret)
603 			break;
604 		uaddr += PAGE_SIZE;
605 	}
606 	return ret;
607 }
608 
609 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
610 		size_t size, int prot)
611 {
612 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
613 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
614 	size_t iova_off = 0;
615 	dma_addr_t iova;
616 
617 	if (cookie->type == IOMMU_DMA_IOVA_COOKIE) {
618 		iova_off = iova_offset(&cookie->iovad, phys);
619 		size = iova_align(&cookie->iovad, size + iova_off);
620 	}
621 
622 	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
623 	if (!iova)
624 		return IOMMU_MAPPING_ERROR;
625 
626 	if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
627 		iommu_dma_free_iova(cookie, iova, size);
628 		return IOMMU_MAPPING_ERROR;
629 	}
630 	return iova + iova_off;
631 }
632 
633 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
634 		unsigned long offset, size_t size, int prot)
635 {
636 	return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot);
637 }
638 
639 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
640 		enum dma_data_direction dir, unsigned long attrs)
641 {
642 	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
643 }
644 
645 /*
646  * Prepare a successfully-mapped scatterlist to give back to the caller.
647  *
648  * At this point the segments are already laid out by iommu_dma_map_sg() to
649  * avoid individually crossing any boundaries, so we merely need to check a
650  * segment's start address to avoid concatenating across one.
651  */
652 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
653 		dma_addr_t dma_addr)
654 {
655 	struct scatterlist *s, *cur = sg;
656 	unsigned long seg_mask = dma_get_seg_boundary(dev);
657 	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
658 	int i, count = 0;
659 
660 	for_each_sg(sg, s, nents, i) {
661 		/* Restore this segment's original unaligned fields first */
662 		unsigned int s_iova_off = sg_dma_address(s);
663 		unsigned int s_length = sg_dma_len(s);
664 		unsigned int s_iova_len = s->length;
665 
666 		s->offset += s_iova_off;
667 		s->length = s_length;
668 		sg_dma_address(s) = IOMMU_MAPPING_ERROR;
669 		sg_dma_len(s) = 0;
670 
671 		/*
672 		 * Now fill in the real DMA data. If...
673 		 * - there is a valid output segment to append to
674 		 * - and this segment starts on an IOVA page boundary
675 		 * - but doesn't fall at a segment boundary
676 		 * - and wouldn't make the resulting output segment too long
677 		 */
678 		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
679 		    (cur_len + s_length <= max_len)) {
680 			/* ...then concatenate it with the previous one */
681 			cur_len += s_length;
682 		} else {
683 			/* Otherwise start the next output segment */
684 			if (i > 0)
685 				cur = sg_next(cur);
686 			cur_len = s_length;
687 			count++;
688 
689 			sg_dma_address(cur) = dma_addr + s_iova_off;
690 		}
691 
692 		sg_dma_len(cur) = cur_len;
693 		dma_addr += s_iova_len;
694 
695 		if (s_length + s_iova_off < s_iova_len)
696 			cur_len = 0;
697 	}
698 	return count;
699 }
700 
701 /*
702  * If mapping failed, then just restore the original list,
703  * but making sure the DMA fields are invalidated.
704  */
705 static void __invalidate_sg(struct scatterlist *sg, int nents)
706 {
707 	struct scatterlist *s;
708 	int i;
709 
710 	for_each_sg(sg, s, nents, i) {
711 		if (sg_dma_address(s) != IOMMU_MAPPING_ERROR)
712 			s->offset += sg_dma_address(s);
713 		if (sg_dma_len(s))
714 			s->length = sg_dma_len(s);
715 		sg_dma_address(s) = IOMMU_MAPPING_ERROR;
716 		sg_dma_len(s) = 0;
717 	}
718 }
719 
720 /*
721  * The DMA API client is passing in a scatterlist which could describe
722  * any old buffer layout, but the IOMMU API requires everything to be
723  * aligned to IOMMU pages. Hence the need for this complicated bit of
724  * impedance-matching, to be able to hand off a suitably-aligned list,
725  * but still preserve the original offsets and sizes for the caller.
726  */
727 int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
728 		int nents, int prot)
729 {
730 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
731 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
732 	struct iova_domain *iovad = &cookie->iovad;
733 	struct scatterlist *s, *prev = NULL;
734 	dma_addr_t iova;
735 	size_t iova_len = 0;
736 	unsigned long mask = dma_get_seg_boundary(dev);
737 	int i;
738 
739 	/*
740 	 * Work out how much IOVA space we need, and align the segments to
741 	 * IOVA granules for the IOMMU driver to handle. With some clever
742 	 * trickery we can modify the list in-place, but reversibly, by
743 	 * stashing the unaligned parts in the as-yet-unused DMA fields.
744 	 */
745 	for_each_sg(sg, s, nents, i) {
746 		size_t s_iova_off = iova_offset(iovad, s->offset);
747 		size_t s_length = s->length;
748 		size_t pad_len = (mask - iova_len + 1) & mask;
749 
750 		sg_dma_address(s) = s_iova_off;
751 		sg_dma_len(s) = s_length;
752 		s->offset -= s_iova_off;
753 		s_length = iova_align(iovad, s_length + s_iova_off);
754 		s->length = s_length;
755 
756 		/*
757 		 * Due to the alignment of our single IOVA allocation, we can
758 		 * depend on these assumptions about the segment boundary mask:
759 		 * - If mask size >= IOVA size, then the IOVA range cannot
760 		 *   possibly fall across a boundary, so we don't care.
761 		 * - If mask size < IOVA size, then the IOVA range must start
762 		 *   exactly on a boundary, therefore we can lay things out
763 		 *   based purely on segment lengths without needing to know
764 		 *   the actual addresses beforehand.
765 		 * - The mask must be a power of 2, so pad_len == 0 if
766 		 *   iova_len == 0, thus we cannot dereference prev the first
767 		 *   time through here (i.e. before it has a meaningful value).
768 		 */
769 		if (pad_len && pad_len < s_length - 1) {
770 			prev->length += pad_len;
771 			iova_len += pad_len;
772 		}
773 
774 		iova_len += s_length;
775 		prev = s;
776 	}
777 
778 	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
779 	if (!iova)
780 		goto out_restore_sg;
781 
782 	/*
783 	 * We'll leave any physical concatenation to the IOMMU driver's
784 	 * implementation - it knows better than we do.
785 	 */
786 	if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
787 		goto out_free_iova;
788 
789 	return __finalise_sg(dev, sg, nents, iova);
790 
791 out_free_iova:
792 	iommu_dma_free_iova(cookie, iova, iova_len);
793 out_restore_sg:
794 	__invalidate_sg(sg, nents);
795 	return 0;
796 }
797 
798 void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
799 		enum dma_data_direction dir, unsigned long attrs)
800 {
801 	dma_addr_t start, end;
802 	struct scatterlist *tmp;
803 	int i;
804 	/*
805 	 * The scatterlist segments are mapped into a single
806 	 * contiguous IOVA allocation, so this is incredibly easy.
807 	 */
808 	start = sg_dma_address(sg);
809 	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
810 		if (sg_dma_len(tmp) == 0)
811 			break;
812 		sg = tmp;
813 	}
814 	end = sg_dma_address(sg) + sg_dma_len(sg);
815 	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
816 }
817 
818 dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
819 		size_t size, enum dma_data_direction dir, unsigned long attrs)
820 {
821 	return __iommu_dma_map(dev, phys, size,
822 			dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
823 }
824 
825 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
826 		size_t size, enum dma_data_direction dir, unsigned long attrs)
827 {
828 	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
829 }
830 
831 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
832 {
833 	return dma_addr == IOMMU_MAPPING_ERROR;
834 }
835 
836 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
837 		phys_addr_t msi_addr, struct iommu_domain *domain)
838 {
839 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
840 	struct iommu_dma_msi_page *msi_page;
841 	dma_addr_t iova;
842 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
843 	size_t size = cookie_msi_granule(cookie);
844 
845 	msi_addr &= ~(phys_addr_t)(size - 1);
846 	list_for_each_entry(msi_page, &cookie->msi_page_list, list)
847 		if (msi_page->phys == msi_addr)
848 			return msi_page;
849 
850 	msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC);
851 	if (!msi_page)
852 		return NULL;
853 
854 	iova = __iommu_dma_map(dev, msi_addr, size, prot);
855 	if (iommu_dma_mapping_error(dev, iova))
856 		goto out_free_page;
857 
858 	INIT_LIST_HEAD(&msi_page->list);
859 	msi_page->phys = msi_addr;
860 	msi_page->iova = iova;
861 	list_add(&msi_page->list, &cookie->msi_page_list);
862 	return msi_page;
863 
864 out_free_page:
865 	kfree(msi_page);
866 	return NULL;
867 }
868 
869 void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
870 {
871 	struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
872 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
873 	struct iommu_dma_cookie *cookie;
874 	struct iommu_dma_msi_page *msi_page;
875 	phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
876 	unsigned long flags;
877 
878 	if (!domain || !domain->iova_cookie)
879 		return;
880 
881 	cookie = domain->iova_cookie;
882 
883 	/*
884 	 * We disable IRQs to rule out a possible inversion against
885 	 * irq_desc_lock if, say, someone tries to retarget the affinity
886 	 * of an MSI from within an IPI handler.
887 	 */
888 	spin_lock_irqsave(&cookie->msi_lock, flags);
889 	msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
890 	spin_unlock_irqrestore(&cookie->msi_lock, flags);
891 
892 	if (WARN_ON(!msi_page)) {
893 		/*
894 		 * We're called from a void callback, so the best we can do is
895 		 * 'fail' by filling the message with obviously bogus values.
896 		 * Since we got this far due to an IOMMU being present, it's
897 		 * not like the existing address would have worked anyway...
898 		 */
899 		msg->address_hi = ~0U;
900 		msg->address_lo = ~0U;
901 		msg->data = ~0U;
902 	} else {
903 		msg->address_hi = upper_32_bits(msi_page->iova);
904 		msg->address_lo &= cookie_msi_granule(cookie) - 1;
905 		msg->address_lo += lower_32_bits(msi_page->iova);
906 	}
907 }
908