xref: /openbmc/linux/drivers/iommu/iommu.c (revision fcbb0a4d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  */
6 
7 #define pr_fmt(fmt)    "iommu: " fmt
8 
9 #include <linux/amba/bus.h>
10 #include <linux/device.h>
11 #include <linux/kernel.h>
12 #include <linux/bits.h>
13 #include <linux/bug.h>
14 #include <linux/types.h>
15 #include <linux/init.h>
16 #include <linux/export.h>
17 #include <linux/slab.h>
18 #include <linux/errno.h>
19 #include <linux/host1x_context_bus.h>
20 #include <linux/iommu.h>
21 #include <linux/idr.h>
22 #include <linux/err.h>
23 #include <linux/pci.h>
24 #include <linux/pci-ats.h>
25 #include <linux/bitops.h>
26 #include <linux/platform_device.h>
27 #include <linux/property.h>
28 #include <linux/fsl/mc.h>
29 #include <linux/module.h>
30 #include <linux/cc_platform.h>
31 #include <linux/cdx/cdx_bus.h>
32 #include <trace/events/iommu.h>
33 #include <linux/sched/mm.h>
34 #include <linux/msi.h>
35 
36 #include "dma-iommu.h"
37 
38 #include "iommu-sva.h"
39 
40 static struct kset *iommu_group_kset;
41 static DEFINE_IDA(iommu_group_ida);
42 
43 static unsigned int iommu_def_domain_type __read_mostly;
44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
45 static u32 iommu_cmd_line __read_mostly;
46 
47 struct iommu_group {
48 	struct kobject kobj;
49 	struct kobject *devices_kobj;
50 	struct list_head devices;
51 	struct xarray pasid_array;
52 	struct mutex mutex;
53 	void *iommu_data;
54 	void (*iommu_data_release)(void *iommu_data);
55 	char *name;
56 	int id;
57 	struct iommu_domain *default_domain;
58 	struct iommu_domain *blocking_domain;
59 	struct iommu_domain *domain;
60 	struct list_head entry;
61 	unsigned int owner_cnt;
62 	void *owner;
63 };
64 
65 struct group_device {
66 	struct list_head list;
67 	struct device *dev;
68 	char *name;
69 };
70 
71 /* Iterate over each struct group_device in a struct iommu_group */
72 #define for_each_group_device(group, pos) \
73 	list_for_each_entry(pos, &(group)->devices, list)
74 
75 struct iommu_group_attribute {
76 	struct attribute attr;
77 	ssize_t (*show)(struct iommu_group *group, char *buf);
78 	ssize_t (*store)(struct iommu_group *group,
79 			 const char *buf, size_t count);
80 };
81 
82 static const char * const iommu_group_resv_type_string[] = {
83 	[IOMMU_RESV_DIRECT]			= "direct",
84 	[IOMMU_RESV_DIRECT_RELAXABLE]		= "direct-relaxable",
85 	[IOMMU_RESV_RESERVED]			= "reserved",
86 	[IOMMU_RESV_MSI]			= "msi",
87 	[IOMMU_RESV_SW_MSI]			= "msi",
88 };
89 
90 #define IOMMU_CMD_LINE_DMA_API		BIT(0)
91 #define IOMMU_CMD_LINE_STRICT		BIT(1)
92 
93 static int iommu_bus_notifier(struct notifier_block *nb,
94 			      unsigned long action, void *data);
95 static void iommu_release_device(struct device *dev);
96 static struct iommu_domain *
97 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type);
98 static int iommu_get_def_domain_type(struct device *dev);
99 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
100 						 unsigned type);
101 static int __iommu_attach_device(struct iommu_domain *domain,
102 				 struct device *dev);
103 static int __iommu_attach_group(struct iommu_domain *domain,
104 				struct iommu_group *group);
105 
106 enum {
107 	IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
108 };
109 
110 static int __iommu_device_set_domain(struct iommu_group *group,
111 				     struct device *dev,
112 				     struct iommu_domain *new_domain,
113 				     unsigned int flags);
114 static int __iommu_group_set_domain_internal(struct iommu_group *group,
115 					     struct iommu_domain *new_domain,
116 					     unsigned int flags);
117 static int __iommu_group_set_domain(struct iommu_group *group,
118 				    struct iommu_domain *new_domain)
119 {
120 	return __iommu_group_set_domain_internal(group, new_domain, 0);
121 }
122 static void __iommu_group_set_domain_nofail(struct iommu_group *group,
123 					    struct iommu_domain *new_domain)
124 {
125 	WARN_ON(__iommu_group_set_domain_internal(
126 		group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
127 }
128 
129 static int iommu_create_device_direct_mappings(struct iommu_group *group,
130 					       struct device *dev);
131 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
132 static ssize_t iommu_group_store_type(struct iommu_group *group,
133 				      const char *buf, size_t count);
134 
135 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
136 struct iommu_group_attribute iommu_group_attr_##_name =		\
137 	__ATTR(_name, _mode, _show, _store)
138 
139 #define to_iommu_group_attr(_attr)	\
140 	container_of(_attr, struct iommu_group_attribute, attr)
141 #define to_iommu_group(_kobj)		\
142 	container_of(_kobj, struct iommu_group, kobj)
143 
144 static LIST_HEAD(iommu_device_list);
145 static DEFINE_SPINLOCK(iommu_device_lock);
146 
147 static struct bus_type * const iommu_buses[] = {
148 	&platform_bus_type,
149 #ifdef CONFIG_PCI
150 	&pci_bus_type,
151 #endif
152 #ifdef CONFIG_ARM_AMBA
153 	&amba_bustype,
154 #endif
155 #ifdef CONFIG_FSL_MC_BUS
156 	&fsl_mc_bus_type,
157 #endif
158 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
159 	&host1x_context_device_bus_type,
160 #endif
161 #ifdef CONFIG_CDX_BUS
162 	&cdx_bus_type,
163 #endif
164 };
165 
166 /*
167  * Use a function instead of an array here because the domain-type is a
168  * bit-field, so an array would waste memory.
169  */
170 static const char *iommu_domain_type_str(unsigned int t)
171 {
172 	switch (t) {
173 	case IOMMU_DOMAIN_BLOCKED:
174 		return "Blocked";
175 	case IOMMU_DOMAIN_IDENTITY:
176 		return "Passthrough";
177 	case IOMMU_DOMAIN_UNMANAGED:
178 		return "Unmanaged";
179 	case IOMMU_DOMAIN_DMA:
180 	case IOMMU_DOMAIN_DMA_FQ:
181 		return "Translated";
182 	default:
183 		return "Unknown";
184 	}
185 }
186 
187 static int __init iommu_subsys_init(void)
188 {
189 	struct notifier_block *nb;
190 
191 	if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
192 		if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
193 			iommu_set_default_passthrough(false);
194 		else
195 			iommu_set_default_translated(false);
196 
197 		if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
198 			pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
199 			iommu_set_default_translated(false);
200 		}
201 	}
202 
203 	if (!iommu_default_passthrough() && !iommu_dma_strict)
204 		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
205 
206 	pr_info("Default domain type: %s%s\n",
207 		iommu_domain_type_str(iommu_def_domain_type),
208 		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
209 			" (set via kernel command line)" : "");
210 
211 	if (!iommu_default_passthrough())
212 		pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
213 			iommu_dma_strict ? "strict" : "lazy",
214 			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
215 				" (set via kernel command line)" : "");
216 
217 	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
218 	if (!nb)
219 		return -ENOMEM;
220 
221 	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
222 		nb[i].notifier_call = iommu_bus_notifier;
223 		bus_register_notifier(iommu_buses[i], &nb[i]);
224 	}
225 
226 	return 0;
227 }
228 subsys_initcall(iommu_subsys_init);
229 
230 static int remove_iommu_group(struct device *dev, void *data)
231 {
232 	if (dev->iommu && dev->iommu->iommu_dev == data)
233 		iommu_release_device(dev);
234 
235 	return 0;
236 }
237 
238 /**
239  * iommu_device_register() - Register an IOMMU hardware instance
240  * @iommu: IOMMU handle for the instance
241  * @ops:   IOMMU ops to associate with the instance
242  * @hwdev: (optional) actual instance device, used for fwnode lookup
243  *
244  * Return: 0 on success, or an error.
245  */
246 int iommu_device_register(struct iommu_device *iommu,
247 			  const struct iommu_ops *ops, struct device *hwdev)
248 {
249 	int err = 0;
250 
251 	/* We need to be able to take module references appropriately */
252 	if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
253 		return -EINVAL;
254 	/*
255 	 * Temporarily enforce global restriction to a single driver. This was
256 	 * already the de-facto behaviour, since any possible combination of
257 	 * existing drivers would compete for at least the PCI or platform bus.
258 	 */
259 	if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops)
260 		return -EBUSY;
261 
262 	iommu->ops = ops;
263 	if (hwdev)
264 		iommu->fwnode = dev_fwnode(hwdev);
265 
266 	spin_lock(&iommu_device_lock);
267 	list_add_tail(&iommu->list, &iommu_device_list);
268 	spin_unlock(&iommu_device_lock);
269 
270 	for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) {
271 		iommu_buses[i]->iommu_ops = ops;
272 		err = bus_iommu_probe(iommu_buses[i]);
273 	}
274 	if (err)
275 		iommu_device_unregister(iommu);
276 	return err;
277 }
278 EXPORT_SYMBOL_GPL(iommu_device_register);
279 
280 void iommu_device_unregister(struct iommu_device *iommu)
281 {
282 	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
283 		bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group);
284 
285 	spin_lock(&iommu_device_lock);
286 	list_del(&iommu->list);
287 	spin_unlock(&iommu_device_lock);
288 }
289 EXPORT_SYMBOL_GPL(iommu_device_unregister);
290 
291 static struct dev_iommu *dev_iommu_get(struct device *dev)
292 {
293 	struct dev_iommu *param = dev->iommu;
294 
295 	if (param)
296 		return param;
297 
298 	param = kzalloc(sizeof(*param), GFP_KERNEL);
299 	if (!param)
300 		return NULL;
301 
302 	mutex_init(&param->lock);
303 	dev->iommu = param;
304 	return param;
305 }
306 
307 static void dev_iommu_free(struct device *dev)
308 {
309 	struct dev_iommu *param = dev->iommu;
310 
311 	dev->iommu = NULL;
312 	if (param->fwspec) {
313 		fwnode_handle_put(param->fwspec->iommu_fwnode);
314 		kfree(param->fwspec);
315 	}
316 	kfree(param);
317 }
318 
319 static u32 dev_iommu_get_max_pasids(struct device *dev)
320 {
321 	u32 max_pasids = 0, bits = 0;
322 	int ret;
323 
324 	if (dev_is_pci(dev)) {
325 		ret = pci_max_pasids(to_pci_dev(dev));
326 		if (ret > 0)
327 			max_pasids = ret;
328 	} else {
329 		ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
330 		if (!ret)
331 			max_pasids = 1UL << bits;
332 	}
333 
334 	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
335 }
336 
337 static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
338 {
339 	const struct iommu_ops *ops = dev->bus->iommu_ops;
340 	struct iommu_device *iommu_dev;
341 	struct iommu_group *group;
342 	static DEFINE_MUTEX(iommu_probe_device_lock);
343 	int ret;
344 
345 	if (!ops)
346 		return -ENODEV;
347 	/*
348 	 * Serialise to avoid races between IOMMU drivers registering in
349 	 * parallel and/or the "replay" calls from ACPI/OF code via client
350 	 * driver probe. Once the latter have been cleaned up we should
351 	 * probably be able to use device_lock() here to minimise the scope,
352 	 * but for now enforcing a simple global ordering is fine.
353 	 */
354 	mutex_lock(&iommu_probe_device_lock);
355 	if (!dev_iommu_get(dev)) {
356 		ret = -ENOMEM;
357 		goto err_unlock;
358 	}
359 
360 	if (!try_module_get(ops->owner)) {
361 		ret = -EINVAL;
362 		goto err_free;
363 	}
364 
365 	iommu_dev = ops->probe_device(dev);
366 	if (IS_ERR(iommu_dev)) {
367 		ret = PTR_ERR(iommu_dev);
368 		goto out_module_put;
369 	}
370 
371 	dev->iommu->iommu_dev = iommu_dev;
372 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
373 	if (ops->is_attach_deferred)
374 		dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
375 
376 	group = iommu_group_get_for_dev(dev);
377 	if (IS_ERR(group)) {
378 		ret = PTR_ERR(group);
379 		goto out_release;
380 	}
381 
382 	mutex_lock(&group->mutex);
383 	if (group_list && !group->default_domain && list_empty(&group->entry))
384 		list_add_tail(&group->entry, group_list);
385 	mutex_unlock(&group->mutex);
386 	iommu_group_put(group);
387 
388 	mutex_unlock(&iommu_probe_device_lock);
389 	iommu_device_link(iommu_dev, dev);
390 
391 	return 0;
392 
393 out_release:
394 	if (ops->release_device)
395 		ops->release_device(dev);
396 
397 out_module_put:
398 	module_put(ops->owner);
399 
400 err_free:
401 	dev_iommu_free(dev);
402 
403 err_unlock:
404 	mutex_unlock(&iommu_probe_device_lock);
405 
406 	return ret;
407 }
408 
409 int iommu_probe_device(struct device *dev)
410 {
411 	const struct iommu_ops *ops;
412 	struct iommu_group *group;
413 	int ret;
414 
415 	ret = __iommu_probe_device(dev, NULL);
416 	if (ret)
417 		goto err_out;
418 
419 	group = iommu_group_get(dev);
420 	if (!group) {
421 		ret = -ENODEV;
422 		goto err_release;
423 	}
424 
425 	mutex_lock(&group->mutex);
426 
427 	iommu_create_device_direct_mappings(group, dev);
428 
429 	if (group->domain) {
430 		ret = __iommu_device_set_domain(group, dev, group->domain, 0);
431 	} else if (!group->default_domain) {
432 		/*
433 		 * Try to allocate a default domain - needs support from the
434 		 * IOMMU driver. There are still some drivers which don't
435 		 * support default domains, so the return value is not yet
436 		 * checked.
437 		 */
438 		group->default_domain = iommu_group_alloc_default_domain(
439 			group, iommu_get_def_domain_type(dev));
440 		if (group->default_domain) {
441 			iommu_create_device_direct_mappings(group, dev);
442 			ret = __iommu_group_set_domain(group,
443 						       group->default_domain);
444 		}
445 
446 		/*
447 		 * We assume that the iommu driver starts up the device in
448 		 * 'set_platform_dma_ops' mode if it does not support default
449 		 * domains.
450 		 */
451 	}
452 	if (ret)
453 		goto err_unlock;
454 
455 	mutex_unlock(&group->mutex);
456 	iommu_group_put(group);
457 
458 	ops = dev_iommu_ops(dev);
459 	if (ops->probe_finalize)
460 		ops->probe_finalize(dev);
461 
462 	return 0;
463 
464 err_unlock:
465 	mutex_unlock(&group->mutex);
466 	iommu_group_put(group);
467 err_release:
468 	iommu_release_device(dev);
469 
470 err_out:
471 	return ret;
472 
473 }
474 
475 /*
476  * Remove a device from a group's device list and return the group device
477  * if successful.
478  */
479 static struct group_device *
480 __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
481 {
482 	struct group_device *device;
483 
484 	lockdep_assert_held(&group->mutex);
485 	for_each_group_device(group, device) {
486 		if (device->dev == dev) {
487 			list_del(&device->list);
488 			return device;
489 		}
490 	}
491 
492 	return NULL;
493 }
494 
495 /*
496  * Release a device from its group and decrements the iommu group reference
497  * count.
498  */
499 static void __iommu_group_release_device(struct iommu_group *group,
500 					 struct group_device *grp_dev)
501 {
502 	struct device *dev = grp_dev->dev;
503 
504 	sysfs_remove_link(group->devices_kobj, grp_dev->name);
505 	sysfs_remove_link(&dev->kobj, "iommu_group");
506 
507 	trace_remove_device_from_group(group->id, dev);
508 
509 	kfree(grp_dev->name);
510 	kfree(grp_dev);
511 	dev->iommu_group = NULL;
512 	kobject_put(group->devices_kobj);
513 }
514 
515 static void iommu_release_device(struct device *dev)
516 {
517 	struct iommu_group *group = dev->iommu_group;
518 	struct group_device *device;
519 	const struct iommu_ops *ops;
520 
521 	if (!dev->iommu || !group)
522 		return;
523 
524 	iommu_device_unlink(dev->iommu->iommu_dev, dev);
525 
526 	mutex_lock(&group->mutex);
527 	device = __iommu_group_remove_device(group, dev);
528 
529 	/*
530 	 * If the group has become empty then ownership must have been released,
531 	 * and the current domain must be set back to NULL or the default
532 	 * domain.
533 	 */
534 	if (list_empty(&group->devices))
535 		WARN_ON(group->owner_cnt ||
536 			group->domain != group->default_domain);
537 
538 	/*
539 	 * release_device() must stop using any attached domain on the device.
540 	 * If there are still other devices in the group they are not effected
541 	 * by this callback.
542 	 *
543 	 * The IOMMU driver must set the device to either an identity or
544 	 * blocking translation and stop using any domain pointer, as it is
545 	 * going to be freed.
546 	 */
547 	ops = dev_iommu_ops(dev);
548 	if (ops->release_device)
549 		ops->release_device(dev);
550 	mutex_unlock(&group->mutex);
551 
552 	if (device)
553 		__iommu_group_release_device(group, device);
554 
555 	module_put(ops->owner);
556 	dev_iommu_free(dev);
557 }
558 
559 static int __init iommu_set_def_domain_type(char *str)
560 {
561 	bool pt;
562 	int ret;
563 
564 	ret = kstrtobool(str, &pt);
565 	if (ret)
566 		return ret;
567 
568 	if (pt)
569 		iommu_set_default_passthrough(true);
570 	else
571 		iommu_set_default_translated(true);
572 
573 	return 0;
574 }
575 early_param("iommu.passthrough", iommu_set_def_domain_type);
576 
577 static int __init iommu_dma_setup(char *str)
578 {
579 	int ret = kstrtobool(str, &iommu_dma_strict);
580 
581 	if (!ret)
582 		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
583 	return ret;
584 }
585 early_param("iommu.strict", iommu_dma_setup);
586 
587 void iommu_set_dma_strict(void)
588 {
589 	iommu_dma_strict = true;
590 	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
591 		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
592 }
593 
594 static ssize_t iommu_group_attr_show(struct kobject *kobj,
595 				     struct attribute *__attr, char *buf)
596 {
597 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
598 	struct iommu_group *group = to_iommu_group(kobj);
599 	ssize_t ret = -EIO;
600 
601 	if (attr->show)
602 		ret = attr->show(group, buf);
603 	return ret;
604 }
605 
606 static ssize_t iommu_group_attr_store(struct kobject *kobj,
607 				      struct attribute *__attr,
608 				      const char *buf, size_t count)
609 {
610 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
611 	struct iommu_group *group = to_iommu_group(kobj);
612 	ssize_t ret = -EIO;
613 
614 	if (attr->store)
615 		ret = attr->store(group, buf, count);
616 	return ret;
617 }
618 
619 static const struct sysfs_ops iommu_group_sysfs_ops = {
620 	.show = iommu_group_attr_show,
621 	.store = iommu_group_attr_store,
622 };
623 
624 static int iommu_group_create_file(struct iommu_group *group,
625 				   struct iommu_group_attribute *attr)
626 {
627 	return sysfs_create_file(&group->kobj, &attr->attr);
628 }
629 
630 static void iommu_group_remove_file(struct iommu_group *group,
631 				    struct iommu_group_attribute *attr)
632 {
633 	sysfs_remove_file(&group->kobj, &attr->attr);
634 }
635 
636 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
637 {
638 	return sysfs_emit(buf, "%s\n", group->name);
639 }
640 
641 /**
642  * iommu_insert_resv_region - Insert a new region in the
643  * list of reserved regions.
644  * @new: new region to insert
645  * @regions: list of regions
646  *
647  * Elements are sorted by start address and overlapping segments
648  * of the same type are merged.
649  */
650 static int iommu_insert_resv_region(struct iommu_resv_region *new,
651 				    struct list_head *regions)
652 {
653 	struct iommu_resv_region *iter, *tmp, *nr, *top;
654 	LIST_HEAD(stack);
655 
656 	nr = iommu_alloc_resv_region(new->start, new->length,
657 				     new->prot, new->type, GFP_KERNEL);
658 	if (!nr)
659 		return -ENOMEM;
660 
661 	/* First add the new element based on start address sorting */
662 	list_for_each_entry(iter, regions, list) {
663 		if (nr->start < iter->start ||
664 		    (nr->start == iter->start && nr->type <= iter->type))
665 			break;
666 	}
667 	list_add_tail(&nr->list, &iter->list);
668 
669 	/* Merge overlapping segments of type nr->type in @regions, if any */
670 	list_for_each_entry_safe(iter, tmp, regions, list) {
671 		phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
672 
673 		/* no merge needed on elements of different types than @new */
674 		if (iter->type != new->type) {
675 			list_move_tail(&iter->list, &stack);
676 			continue;
677 		}
678 
679 		/* look for the last stack element of same type as @iter */
680 		list_for_each_entry_reverse(top, &stack, list)
681 			if (top->type == iter->type)
682 				goto check_overlap;
683 
684 		list_move_tail(&iter->list, &stack);
685 		continue;
686 
687 check_overlap:
688 		top_end = top->start + top->length - 1;
689 
690 		if (iter->start > top_end + 1) {
691 			list_move_tail(&iter->list, &stack);
692 		} else {
693 			top->length = max(top_end, iter_end) - top->start + 1;
694 			list_del(&iter->list);
695 			kfree(iter);
696 		}
697 	}
698 	list_splice(&stack, regions);
699 	return 0;
700 }
701 
702 static int
703 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
704 				 struct list_head *group_resv_regions)
705 {
706 	struct iommu_resv_region *entry;
707 	int ret = 0;
708 
709 	list_for_each_entry(entry, dev_resv_regions, list) {
710 		ret = iommu_insert_resv_region(entry, group_resv_regions);
711 		if (ret)
712 			break;
713 	}
714 	return ret;
715 }
716 
717 int iommu_get_group_resv_regions(struct iommu_group *group,
718 				 struct list_head *head)
719 {
720 	struct group_device *device;
721 	int ret = 0;
722 
723 	mutex_lock(&group->mutex);
724 	for_each_group_device(group, device) {
725 		struct list_head dev_resv_regions;
726 
727 		/*
728 		 * Non-API groups still expose reserved_regions in sysfs,
729 		 * so filter out calls that get here that way.
730 		 */
731 		if (!device->dev->iommu)
732 			break;
733 
734 		INIT_LIST_HEAD(&dev_resv_regions);
735 		iommu_get_resv_regions(device->dev, &dev_resv_regions);
736 		ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
737 		iommu_put_resv_regions(device->dev, &dev_resv_regions);
738 		if (ret)
739 			break;
740 	}
741 	mutex_unlock(&group->mutex);
742 	return ret;
743 }
744 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);
745 
746 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
747 					     char *buf)
748 {
749 	struct iommu_resv_region *region, *next;
750 	struct list_head group_resv_regions;
751 	int offset = 0;
752 
753 	INIT_LIST_HEAD(&group_resv_regions);
754 	iommu_get_group_resv_regions(group, &group_resv_regions);
755 
756 	list_for_each_entry_safe(region, next, &group_resv_regions, list) {
757 		offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n",
758 					(long long)region->start,
759 					(long long)(region->start +
760 						    region->length - 1),
761 					iommu_group_resv_type_string[region->type]);
762 		kfree(region);
763 	}
764 
765 	return offset;
766 }
767 
768 static ssize_t iommu_group_show_type(struct iommu_group *group,
769 				     char *buf)
770 {
771 	char *type = "unknown";
772 
773 	mutex_lock(&group->mutex);
774 	if (group->default_domain) {
775 		switch (group->default_domain->type) {
776 		case IOMMU_DOMAIN_BLOCKED:
777 			type = "blocked";
778 			break;
779 		case IOMMU_DOMAIN_IDENTITY:
780 			type = "identity";
781 			break;
782 		case IOMMU_DOMAIN_UNMANAGED:
783 			type = "unmanaged";
784 			break;
785 		case IOMMU_DOMAIN_DMA:
786 			type = "DMA";
787 			break;
788 		case IOMMU_DOMAIN_DMA_FQ:
789 			type = "DMA-FQ";
790 			break;
791 		}
792 	}
793 	mutex_unlock(&group->mutex);
794 
795 	return sysfs_emit(buf, "%s\n", type);
796 }
797 
798 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
799 
800 static IOMMU_GROUP_ATTR(reserved_regions, 0444,
801 			iommu_group_show_resv_regions, NULL);
802 
803 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
804 			iommu_group_store_type);
805 
806 static void iommu_group_release(struct kobject *kobj)
807 {
808 	struct iommu_group *group = to_iommu_group(kobj);
809 
810 	pr_debug("Releasing group %d\n", group->id);
811 
812 	if (group->iommu_data_release)
813 		group->iommu_data_release(group->iommu_data);
814 
815 	ida_free(&iommu_group_ida, group->id);
816 
817 	if (group->default_domain)
818 		iommu_domain_free(group->default_domain);
819 	if (group->blocking_domain)
820 		iommu_domain_free(group->blocking_domain);
821 
822 	kfree(group->name);
823 	kfree(group);
824 }
825 
826 static const struct kobj_type iommu_group_ktype = {
827 	.sysfs_ops = &iommu_group_sysfs_ops,
828 	.release = iommu_group_release,
829 };
830 
831 /**
832  * iommu_group_alloc - Allocate a new group
833  *
834  * This function is called by an iommu driver to allocate a new iommu
835  * group.  The iommu group represents the minimum granularity of the iommu.
836  * Upon successful return, the caller holds a reference to the supplied
837  * group in order to hold the group until devices are added.  Use
838  * iommu_group_put() to release this extra reference count, allowing the
839  * group to be automatically reclaimed once it has no devices or external
840  * references.
841  */
842 struct iommu_group *iommu_group_alloc(void)
843 {
844 	struct iommu_group *group;
845 	int ret;
846 
847 	group = kzalloc(sizeof(*group), GFP_KERNEL);
848 	if (!group)
849 		return ERR_PTR(-ENOMEM);
850 
851 	group->kobj.kset = iommu_group_kset;
852 	mutex_init(&group->mutex);
853 	INIT_LIST_HEAD(&group->devices);
854 	INIT_LIST_HEAD(&group->entry);
855 	xa_init(&group->pasid_array);
856 
857 	ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
858 	if (ret < 0) {
859 		kfree(group);
860 		return ERR_PTR(ret);
861 	}
862 	group->id = ret;
863 
864 	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
865 				   NULL, "%d", group->id);
866 	if (ret) {
867 		kobject_put(&group->kobj);
868 		return ERR_PTR(ret);
869 	}
870 
871 	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
872 	if (!group->devices_kobj) {
873 		kobject_put(&group->kobj); /* triggers .release & free */
874 		return ERR_PTR(-ENOMEM);
875 	}
876 
877 	/*
878 	 * The devices_kobj holds a reference on the group kobject, so
879 	 * as long as that exists so will the group.  We can therefore
880 	 * use the devices_kobj for reference counting.
881 	 */
882 	kobject_put(&group->kobj);
883 
884 	ret = iommu_group_create_file(group,
885 				      &iommu_group_attr_reserved_regions);
886 	if (ret) {
887 		kobject_put(group->devices_kobj);
888 		return ERR_PTR(ret);
889 	}
890 
891 	ret = iommu_group_create_file(group, &iommu_group_attr_type);
892 	if (ret) {
893 		kobject_put(group->devices_kobj);
894 		return ERR_PTR(ret);
895 	}
896 
897 	pr_debug("Allocated group %d\n", group->id);
898 
899 	return group;
900 }
901 EXPORT_SYMBOL_GPL(iommu_group_alloc);
902 
903 /**
904  * iommu_group_get_iommudata - retrieve iommu_data registered for a group
905  * @group: the group
906  *
907  * iommu drivers can store data in the group for use when doing iommu
908  * operations.  This function provides a way to retrieve it.  Caller
909  * should hold a group reference.
910  */
911 void *iommu_group_get_iommudata(struct iommu_group *group)
912 {
913 	return group->iommu_data;
914 }
915 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
916 
917 /**
918  * iommu_group_set_iommudata - set iommu_data for a group
919  * @group: the group
920  * @iommu_data: new data
921  * @release: release function for iommu_data
922  *
923  * iommu drivers can store data in the group for use when doing iommu
924  * operations.  This function provides a way to set the data after
925  * the group has been allocated.  Caller should hold a group reference.
926  */
927 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
928 			       void (*release)(void *iommu_data))
929 {
930 	group->iommu_data = iommu_data;
931 	group->iommu_data_release = release;
932 }
933 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
934 
935 /**
936  * iommu_group_set_name - set name for a group
937  * @group: the group
938  * @name: name
939  *
940  * Allow iommu driver to set a name for a group.  When set it will
941  * appear in a name attribute file under the group in sysfs.
942  */
943 int iommu_group_set_name(struct iommu_group *group, const char *name)
944 {
945 	int ret;
946 
947 	if (group->name) {
948 		iommu_group_remove_file(group, &iommu_group_attr_name);
949 		kfree(group->name);
950 		group->name = NULL;
951 		if (!name)
952 			return 0;
953 	}
954 
955 	group->name = kstrdup(name, GFP_KERNEL);
956 	if (!group->name)
957 		return -ENOMEM;
958 
959 	ret = iommu_group_create_file(group, &iommu_group_attr_name);
960 	if (ret) {
961 		kfree(group->name);
962 		group->name = NULL;
963 		return ret;
964 	}
965 
966 	return 0;
967 }
968 EXPORT_SYMBOL_GPL(iommu_group_set_name);
969 
970 static int iommu_create_device_direct_mappings(struct iommu_group *group,
971 					       struct device *dev)
972 {
973 	struct iommu_domain *domain = group->default_domain;
974 	struct iommu_resv_region *entry;
975 	struct list_head mappings;
976 	unsigned long pg_size;
977 	int ret = 0;
978 
979 	if (!domain || !iommu_is_dma_domain(domain))
980 		return 0;
981 
982 	BUG_ON(!domain->pgsize_bitmap);
983 
984 	pg_size = 1UL << __ffs(domain->pgsize_bitmap);
985 	INIT_LIST_HEAD(&mappings);
986 
987 	iommu_get_resv_regions(dev, &mappings);
988 
989 	/* We need to consider overlapping regions for different devices */
990 	list_for_each_entry(entry, &mappings, list) {
991 		dma_addr_t start, end, addr;
992 		size_t map_size = 0;
993 
994 		start = ALIGN(entry->start, pg_size);
995 		end   = ALIGN(entry->start + entry->length, pg_size);
996 
997 		if (entry->type != IOMMU_RESV_DIRECT &&
998 		    entry->type != IOMMU_RESV_DIRECT_RELAXABLE)
999 			continue;
1000 
1001 		for (addr = start; addr <= end; addr += pg_size) {
1002 			phys_addr_t phys_addr;
1003 
1004 			if (addr == end)
1005 				goto map_end;
1006 
1007 			phys_addr = iommu_iova_to_phys(domain, addr);
1008 			if (!phys_addr) {
1009 				map_size += pg_size;
1010 				continue;
1011 			}
1012 
1013 map_end:
1014 			if (map_size) {
1015 				ret = iommu_map(domain, addr - map_size,
1016 						addr - map_size, map_size,
1017 						entry->prot, GFP_KERNEL);
1018 				if (ret)
1019 					goto out;
1020 				map_size = 0;
1021 			}
1022 		}
1023 
1024 	}
1025 
1026 	iommu_flush_iotlb_all(domain);
1027 
1028 out:
1029 	iommu_put_resv_regions(dev, &mappings);
1030 
1031 	return ret;
1032 }
1033 
1034 /**
1035  * iommu_group_add_device - add a device to an iommu group
1036  * @group: the group into which to add the device (reference should be held)
1037  * @dev: the device
1038  *
1039  * This function is called by an iommu driver to add a device into a
1040  * group.  Adding a device increments the group reference count.
1041  */
1042 int iommu_group_add_device(struct iommu_group *group, struct device *dev)
1043 {
1044 	int ret, i = 0;
1045 	struct group_device *device;
1046 
1047 	device = kzalloc(sizeof(*device), GFP_KERNEL);
1048 	if (!device)
1049 		return -ENOMEM;
1050 
1051 	device->dev = dev;
1052 
1053 	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
1054 	if (ret)
1055 		goto err_free_device;
1056 
1057 	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
1058 rename:
1059 	if (!device->name) {
1060 		ret = -ENOMEM;
1061 		goto err_remove_link;
1062 	}
1063 
1064 	ret = sysfs_create_link_nowarn(group->devices_kobj,
1065 				       &dev->kobj, device->name);
1066 	if (ret) {
1067 		if (ret == -EEXIST && i >= 0) {
1068 			/*
1069 			 * Account for the slim chance of collision
1070 			 * and append an instance to the name.
1071 			 */
1072 			kfree(device->name);
1073 			device->name = kasprintf(GFP_KERNEL, "%s.%d",
1074 						 kobject_name(&dev->kobj), i++);
1075 			goto rename;
1076 		}
1077 		goto err_free_name;
1078 	}
1079 
1080 	kobject_get(group->devices_kobj);
1081 
1082 	dev->iommu_group = group;
1083 
1084 	mutex_lock(&group->mutex);
1085 	list_add_tail(&device->list, &group->devices);
1086 	mutex_unlock(&group->mutex);
1087 	trace_add_device_to_group(group->id, dev);
1088 
1089 	dev_info(dev, "Adding to iommu group %d\n", group->id);
1090 
1091 	return 0;
1092 
1093 err_free_name:
1094 	kfree(device->name);
1095 err_remove_link:
1096 	sysfs_remove_link(&dev->kobj, "iommu_group");
1097 err_free_device:
1098 	kfree(device);
1099 	dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
1100 	return ret;
1101 }
1102 EXPORT_SYMBOL_GPL(iommu_group_add_device);
1103 
1104 /**
1105  * iommu_group_remove_device - remove a device from it's current group
1106  * @dev: device to be removed
1107  *
1108  * This function is called by an iommu driver to remove the device from
1109  * it's current group.  This decrements the iommu group reference count.
1110  */
1111 void iommu_group_remove_device(struct device *dev)
1112 {
1113 	struct iommu_group *group = dev->iommu_group;
1114 	struct group_device *device;
1115 
1116 	if (!group)
1117 		return;
1118 
1119 	dev_info(dev, "Removing from iommu group %d\n", group->id);
1120 
1121 	mutex_lock(&group->mutex);
1122 	device = __iommu_group_remove_device(group, dev);
1123 	mutex_unlock(&group->mutex);
1124 
1125 	if (device)
1126 		__iommu_group_release_device(group, device);
1127 }
1128 EXPORT_SYMBOL_GPL(iommu_group_remove_device);
1129 
1130 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
1131 				      int (*fn)(struct device *, void *))
1132 {
1133 	struct group_device *device;
1134 	int ret = 0;
1135 
1136 	for_each_group_device(group, device) {
1137 		ret = fn(device->dev, data);
1138 		if (ret)
1139 			break;
1140 	}
1141 	return ret;
1142 }
1143 
1144 /**
1145  * iommu_group_for_each_dev - iterate over each device in the group
1146  * @group: the group
1147  * @data: caller opaque data to be passed to callback function
1148  * @fn: caller supplied callback function
1149  *
1150  * This function is called by group users to iterate over group devices.
1151  * Callers should hold a reference count to the group during callback.
1152  * The group->mutex is held across callbacks, which will block calls to
1153  * iommu_group_add/remove_device.
1154  */
1155 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
1156 			     int (*fn)(struct device *, void *))
1157 {
1158 	int ret;
1159 
1160 	mutex_lock(&group->mutex);
1161 	ret = __iommu_group_for_each_dev(group, data, fn);
1162 	mutex_unlock(&group->mutex);
1163 
1164 	return ret;
1165 }
1166 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
1167 
1168 /**
1169  * iommu_group_get - Return the group for a device and increment reference
1170  * @dev: get the group that this device belongs to
1171  *
1172  * This function is called by iommu drivers and users to get the group
1173  * for the specified device.  If found, the group is returned and the group
1174  * reference in incremented, else NULL.
1175  */
1176 struct iommu_group *iommu_group_get(struct device *dev)
1177 {
1178 	struct iommu_group *group = dev->iommu_group;
1179 
1180 	if (group)
1181 		kobject_get(group->devices_kobj);
1182 
1183 	return group;
1184 }
1185 EXPORT_SYMBOL_GPL(iommu_group_get);
1186 
1187 /**
1188  * iommu_group_ref_get - Increment reference on a group
1189  * @group: the group to use, must not be NULL
1190  *
1191  * This function is called by iommu drivers to take additional references on an
1192  * existing group.  Returns the given group for convenience.
1193  */
1194 struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
1195 {
1196 	kobject_get(group->devices_kobj);
1197 	return group;
1198 }
1199 EXPORT_SYMBOL_GPL(iommu_group_ref_get);
1200 
1201 /**
1202  * iommu_group_put - Decrement group reference
1203  * @group: the group to use
1204  *
1205  * This function is called by iommu drivers and users to release the
1206  * iommu group.  Once the reference count is zero, the group is released.
1207  */
1208 void iommu_group_put(struct iommu_group *group)
1209 {
1210 	if (group)
1211 		kobject_put(group->devices_kobj);
1212 }
1213 EXPORT_SYMBOL_GPL(iommu_group_put);
1214 
1215 /**
1216  * iommu_register_device_fault_handler() - Register a device fault handler
1217  * @dev: the device
1218  * @handler: the fault handler
1219  * @data: private data passed as argument to the handler
1220  *
1221  * When an IOMMU fault event is received, this handler gets called with the
1222  * fault event and data as argument. The handler should return 0 on success. If
1223  * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
1224  * complete the fault by calling iommu_page_response() with one of the following
1225  * response code:
1226  * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
1227  * - IOMMU_PAGE_RESP_INVALID: terminate the fault
1228  * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
1229  *   page faults if possible.
1230  *
1231  * Return 0 if the fault handler was installed successfully, or an error.
1232  */
1233 int iommu_register_device_fault_handler(struct device *dev,
1234 					iommu_dev_fault_handler_t handler,
1235 					void *data)
1236 {
1237 	struct dev_iommu *param = dev->iommu;
1238 	int ret = 0;
1239 
1240 	if (!param)
1241 		return -EINVAL;
1242 
1243 	mutex_lock(&param->lock);
1244 	/* Only allow one fault handler registered for each device */
1245 	if (param->fault_param) {
1246 		ret = -EBUSY;
1247 		goto done_unlock;
1248 	}
1249 
1250 	get_device(dev);
1251 	param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
1252 	if (!param->fault_param) {
1253 		put_device(dev);
1254 		ret = -ENOMEM;
1255 		goto done_unlock;
1256 	}
1257 	param->fault_param->handler = handler;
1258 	param->fault_param->data = data;
1259 	mutex_init(&param->fault_param->lock);
1260 	INIT_LIST_HEAD(&param->fault_param->faults);
1261 
1262 done_unlock:
1263 	mutex_unlock(&param->lock);
1264 
1265 	return ret;
1266 }
1267 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
1268 
1269 /**
1270  * iommu_unregister_device_fault_handler() - Unregister the device fault handler
1271  * @dev: the device
1272  *
1273  * Remove the device fault handler installed with
1274  * iommu_register_device_fault_handler().
1275  *
1276  * Return 0 on success, or an error.
1277  */
1278 int iommu_unregister_device_fault_handler(struct device *dev)
1279 {
1280 	struct dev_iommu *param = dev->iommu;
1281 	int ret = 0;
1282 
1283 	if (!param)
1284 		return -EINVAL;
1285 
1286 	mutex_lock(&param->lock);
1287 
1288 	if (!param->fault_param)
1289 		goto unlock;
1290 
1291 	/* we cannot unregister handler if there are pending faults */
1292 	if (!list_empty(&param->fault_param->faults)) {
1293 		ret = -EBUSY;
1294 		goto unlock;
1295 	}
1296 
1297 	kfree(param->fault_param);
1298 	param->fault_param = NULL;
1299 	put_device(dev);
1300 unlock:
1301 	mutex_unlock(&param->lock);
1302 
1303 	return ret;
1304 }
1305 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
1306 
1307 /**
1308  * iommu_report_device_fault() - Report fault event to device driver
1309  * @dev: the device
1310  * @evt: fault event data
1311  *
1312  * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
1313  * handler. When this function fails and the fault is recoverable, it is the
1314  * caller's responsibility to complete the fault.
1315  *
1316  * Return 0 on success, or an error.
1317  */
1318 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
1319 {
1320 	struct dev_iommu *param = dev->iommu;
1321 	struct iommu_fault_event *evt_pending = NULL;
1322 	struct iommu_fault_param *fparam;
1323 	int ret = 0;
1324 
1325 	if (!param || !evt)
1326 		return -EINVAL;
1327 
1328 	/* we only report device fault if there is a handler registered */
1329 	mutex_lock(&param->lock);
1330 	fparam = param->fault_param;
1331 	if (!fparam || !fparam->handler) {
1332 		ret = -EINVAL;
1333 		goto done_unlock;
1334 	}
1335 
1336 	if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
1337 	    (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
1338 		evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
1339 				      GFP_KERNEL);
1340 		if (!evt_pending) {
1341 			ret = -ENOMEM;
1342 			goto done_unlock;
1343 		}
1344 		mutex_lock(&fparam->lock);
1345 		list_add_tail(&evt_pending->list, &fparam->faults);
1346 		mutex_unlock(&fparam->lock);
1347 	}
1348 
1349 	ret = fparam->handler(&evt->fault, fparam->data);
1350 	if (ret && evt_pending) {
1351 		mutex_lock(&fparam->lock);
1352 		list_del(&evt_pending->list);
1353 		mutex_unlock(&fparam->lock);
1354 		kfree(evt_pending);
1355 	}
1356 done_unlock:
1357 	mutex_unlock(&param->lock);
1358 	return ret;
1359 }
1360 EXPORT_SYMBOL_GPL(iommu_report_device_fault);
1361 
1362 int iommu_page_response(struct device *dev,
1363 			struct iommu_page_response *msg)
1364 {
1365 	bool needs_pasid;
1366 	int ret = -EINVAL;
1367 	struct iommu_fault_event *evt;
1368 	struct iommu_fault_page_request *prm;
1369 	struct dev_iommu *param = dev->iommu;
1370 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1371 	bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
1372 
1373 	if (!ops->page_response)
1374 		return -ENODEV;
1375 
1376 	if (!param || !param->fault_param)
1377 		return -EINVAL;
1378 
1379 	if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
1380 	    msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
1381 		return -EINVAL;
1382 
1383 	/* Only send response if there is a fault report pending */
1384 	mutex_lock(&param->fault_param->lock);
1385 	if (list_empty(&param->fault_param->faults)) {
1386 		dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
1387 		goto done_unlock;
1388 	}
1389 	/*
1390 	 * Check if we have a matching page request pending to respond,
1391 	 * otherwise return -EINVAL
1392 	 */
1393 	list_for_each_entry(evt, &param->fault_param->faults, list) {
1394 		prm = &evt->fault.prm;
1395 		if (prm->grpid != msg->grpid)
1396 			continue;
1397 
1398 		/*
1399 		 * If the PASID is required, the corresponding request is
1400 		 * matched using the group ID, the PASID valid bit and the PASID
1401 		 * value. Otherwise only the group ID matches request and
1402 		 * response.
1403 		 */
1404 		needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
1405 		if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
1406 			continue;
1407 
1408 		if (!needs_pasid && has_pasid) {
1409 			/* No big deal, just clear it. */
1410 			msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
1411 			msg->pasid = 0;
1412 		}
1413 
1414 		ret = ops->page_response(dev, evt, msg);
1415 		list_del(&evt->list);
1416 		kfree(evt);
1417 		break;
1418 	}
1419 
1420 done_unlock:
1421 	mutex_unlock(&param->fault_param->lock);
1422 	return ret;
1423 }
1424 EXPORT_SYMBOL_GPL(iommu_page_response);
1425 
1426 /**
1427  * iommu_group_id - Return ID for a group
1428  * @group: the group to ID
1429  *
1430  * Return the unique ID for the group matching the sysfs group number.
1431  */
1432 int iommu_group_id(struct iommu_group *group)
1433 {
1434 	return group->id;
1435 }
1436 EXPORT_SYMBOL_GPL(iommu_group_id);
1437 
1438 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1439 					       unsigned long *devfns);
1440 
1441 /*
1442  * To consider a PCI device isolated, we require ACS to support Source
1443  * Validation, Request Redirection, Completer Redirection, and Upstream
1444  * Forwarding.  This effectively means that devices cannot spoof their
1445  * requester ID, requests and completions cannot be redirected, and all
1446  * transactions are forwarded upstream, even as it passes through a
1447  * bridge where the target device is downstream.
1448  */
1449 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
1450 
1451 /*
1452  * For multifunction devices which are not isolated from each other, find
1453  * all the other non-isolated functions and look for existing groups.  For
1454  * each function, we also need to look for aliases to or from other devices
1455  * that may already have a group.
1456  */
1457 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
1458 							unsigned long *devfns)
1459 {
1460 	struct pci_dev *tmp = NULL;
1461 	struct iommu_group *group;
1462 
1463 	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
1464 		return NULL;
1465 
1466 	for_each_pci_dev(tmp) {
1467 		if (tmp == pdev || tmp->bus != pdev->bus ||
1468 		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
1469 		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
1470 			continue;
1471 
1472 		group = get_pci_alias_group(tmp, devfns);
1473 		if (group) {
1474 			pci_dev_put(tmp);
1475 			return group;
1476 		}
1477 	}
1478 
1479 	return NULL;
1480 }
1481 
1482 /*
1483  * Look for aliases to or from the given device for existing groups. DMA
1484  * aliases are only supported on the same bus, therefore the search
1485  * space is quite small (especially since we're really only looking at pcie
1486  * device, and therefore only expect multiple slots on the root complex or
1487  * downstream switch ports).  It's conceivable though that a pair of
1488  * multifunction devices could have aliases between them that would cause a
1489  * loop.  To prevent this, we use a bitmap to track where we've been.
1490  */
1491 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1492 					       unsigned long *devfns)
1493 {
1494 	struct pci_dev *tmp = NULL;
1495 	struct iommu_group *group;
1496 
1497 	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
1498 		return NULL;
1499 
1500 	group = iommu_group_get(&pdev->dev);
1501 	if (group)
1502 		return group;
1503 
1504 	for_each_pci_dev(tmp) {
1505 		if (tmp == pdev || tmp->bus != pdev->bus)
1506 			continue;
1507 
1508 		/* We alias them or they alias us */
1509 		if (pci_devs_are_dma_aliases(pdev, tmp)) {
1510 			group = get_pci_alias_group(tmp, devfns);
1511 			if (group) {
1512 				pci_dev_put(tmp);
1513 				return group;
1514 			}
1515 
1516 			group = get_pci_function_alias_group(tmp, devfns);
1517 			if (group) {
1518 				pci_dev_put(tmp);
1519 				return group;
1520 			}
1521 		}
1522 	}
1523 
1524 	return NULL;
1525 }
1526 
1527 struct group_for_pci_data {
1528 	struct pci_dev *pdev;
1529 	struct iommu_group *group;
1530 };
1531 
1532 /*
1533  * DMA alias iterator callback, return the last seen device.  Stop and return
1534  * the IOMMU group if we find one along the way.
1535  */
1536 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
1537 {
1538 	struct group_for_pci_data *data = opaque;
1539 
1540 	data->pdev = pdev;
1541 	data->group = iommu_group_get(&pdev->dev);
1542 
1543 	return data->group != NULL;
1544 }
1545 
1546 /*
1547  * Generic device_group call-back function. It just allocates one
1548  * iommu-group per device.
1549  */
1550 struct iommu_group *generic_device_group(struct device *dev)
1551 {
1552 	return iommu_group_alloc();
1553 }
1554 EXPORT_SYMBOL_GPL(generic_device_group);
1555 
1556 /*
1557  * Use standard PCI bus topology, isolation features, and DMA alias quirks
1558  * to find or create an IOMMU group for a device.
1559  */
1560 struct iommu_group *pci_device_group(struct device *dev)
1561 {
1562 	struct pci_dev *pdev = to_pci_dev(dev);
1563 	struct group_for_pci_data data;
1564 	struct pci_bus *bus;
1565 	struct iommu_group *group = NULL;
1566 	u64 devfns[4] = { 0 };
1567 
1568 	if (WARN_ON(!dev_is_pci(dev)))
1569 		return ERR_PTR(-EINVAL);
1570 
1571 	/*
1572 	 * Find the upstream DMA alias for the device.  A device must not
1573 	 * be aliased due to topology in order to have its own IOMMU group.
1574 	 * If we find an alias along the way that already belongs to a
1575 	 * group, use it.
1576 	 */
1577 	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
1578 		return data.group;
1579 
1580 	pdev = data.pdev;
1581 
1582 	/*
1583 	 * Continue upstream from the point of minimum IOMMU granularity
1584 	 * due to aliases to the point where devices are protected from
1585 	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
1586 	 * group, use it.
1587 	 */
1588 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
1589 		if (!bus->self)
1590 			continue;
1591 
1592 		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
1593 			break;
1594 
1595 		pdev = bus->self;
1596 
1597 		group = iommu_group_get(&pdev->dev);
1598 		if (group)
1599 			return group;
1600 	}
1601 
1602 	/*
1603 	 * Look for existing groups on device aliases.  If we alias another
1604 	 * device or another device aliases us, use the same group.
1605 	 */
1606 	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
1607 	if (group)
1608 		return group;
1609 
1610 	/*
1611 	 * Look for existing groups on non-isolated functions on the same
1612 	 * slot and aliases of those funcions, if any.  No need to clear
1613 	 * the search bitmap, the tested devfns are still valid.
1614 	 */
1615 	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
1616 	if (group)
1617 		return group;
1618 
1619 	/* No shared group found, allocate new */
1620 	return iommu_group_alloc();
1621 }
1622 EXPORT_SYMBOL_GPL(pci_device_group);
1623 
1624 /* Get the IOMMU group for device on fsl-mc bus */
1625 struct iommu_group *fsl_mc_device_group(struct device *dev)
1626 {
1627 	struct device *cont_dev = fsl_mc_cont_dev(dev);
1628 	struct iommu_group *group;
1629 
1630 	group = iommu_group_get(cont_dev);
1631 	if (!group)
1632 		group = iommu_group_alloc();
1633 	return group;
1634 }
1635 EXPORT_SYMBOL_GPL(fsl_mc_device_group);
1636 
1637 static int iommu_get_def_domain_type(struct device *dev)
1638 {
1639 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1640 
1641 	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
1642 		return IOMMU_DOMAIN_DMA;
1643 
1644 	if (ops->def_domain_type)
1645 		return ops->def_domain_type(dev);
1646 
1647 	return 0;
1648 }
1649 
1650 /*
1651  * req_type of 0 means "auto" which means to select a domain based on
1652  * iommu_def_domain_type or what the driver actually supports.
1653  */
1654 static struct iommu_domain *
1655 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
1656 {
1657 	const struct bus_type *bus =
1658 		list_first_entry(&group->devices, struct group_device, list)
1659 			->dev->bus;
1660 	struct iommu_domain *dom;
1661 
1662 	lockdep_assert_held(&group->mutex);
1663 
1664 	if (req_type)
1665 		return __iommu_domain_alloc(bus, req_type);
1666 
1667 	/* The driver gave no guidance on what type to use, try the default */
1668 	dom = __iommu_domain_alloc(bus, iommu_def_domain_type);
1669 	if (dom)
1670 		return dom;
1671 
1672 	/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
1673 	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
1674 		return NULL;
1675 	dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
1676 	if (!dom)
1677 		return NULL;
1678 
1679 	pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
1680 		iommu_def_domain_type, group->name);
1681 	return dom;
1682 }
1683 
1684 /**
1685  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
1686  * @dev: target device
1687  *
1688  * This function is intended to be called by IOMMU drivers and extended to
1689  * support common, bus-defined algorithms when determining or creating the
1690  * IOMMU group for a device.  On success, the caller will hold a reference
1691  * to the returned IOMMU group, which will already include the provided
1692  * device.  The reference should be released with iommu_group_put().
1693  */
1694 static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
1695 {
1696 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1697 	struct iommu_group *group;
1698 	int ret;
1699 
1700 	group = iommu_group_get(dev);
1701 	if (group)
1702 		return group;
1703 
1704 	group = ops->device_group(dev);
1705 	if (WARN_ON_ONCE(group == NULL))
1706 		return ERR_PTR(-EINVAL);
1707 
1708 	if (IS_ERR(group))
1709 		return group;
1710 
1711 	ret = iommu_group_add_device(group, dev);
1712 	if (ret)
1713 		goto out_put_group;
1714 
1715 	return group;
1716 
1717 out_put_group:
1718 	iommu_group_put(group);
1719 
1720 	return ERR_PTR(ret);
1721 }
1722 
1723 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
1724 {
1725 	return group->default_domain;
1726 }
1727 
1728 static int probe_iommu_group(struct device *dev, void *data)
1729 {
1730 	struct list_head *group_list = data;
1731 	struct iommu_group *group;
1732 	int ret;
1733 
1734 	/* Device is probed already if in a group */
1735 	group = iommu_group_get(dev);
1736 	if (group) {
1737 		iommu_group_put(group);
1738 		return 0;
1739 	}
1740 
1741 	ret = __iommu_probe_device(dev, group_list);
1742 	if (ret == -ENODEV)
1743 		ret = 0;
1744 
1745 	return ret;
1746 }
1747 
1748 static int iommu_bus_notifier(struct notifier_block *nb,
1749 			      unsigned long action, void *data)
1750 {
1751 	struct device *dev = data;
1752 
1753 	if (action == BUS_NOTIFY_ADD_DEVICE) {
1754 		int ret;
1755 
1756 		ret = iommu_probe_device(dev);
1757 		return (ret) ? NOTIFY_DONE : NOTIFY_OK;
1758 	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
1759 		iommu_release_device(dev);
1760 		return NOTIFY_OK;
1761 	}
1762 
1763 	return 0;
1764 }
1765 
1766 /* A target_type of 0 will select the best domain type and cannot fail */
1767 static int iommu_get_default_domain_type(struct iommu_group *group,
1768 					 int target_type)
1769 {
1770 	int best_type = target_type;
1771 	struct group_device *gdev;
1772 	struct device *last_dev;
1773 
1774 	lockdep_assert_held(&group->mutex);
1775 
1776 	for_each_group_device(group, gdev) {
1777 		unsigned int type = iommu_get_def_domain_type(gdev->dev);
1778 
1779 		if (best_type && type && best_type != type) {
1780 			if (target_type) {
1781 				dev_err_ratelimited(
1782 					gdev->dev,
1783 					"Device cannot be in %s domain\n",
1784 					iommu_domain_type_str(target_type));
1785 				return -1;
1786 			}
1787 
1788 			dev_warn(
1789 				gdev->dev,
1790 				"Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
1791 				iommu_domain_type_str(type), dev_name(last_dev),
1792 				iommu_domain_type_str(best_type));
1793 			return 0;
1794 		}
1795 		if (!best_type)
1796 			best_type = type;
1797 		last_dev = gdev->dev;
1798 	}
1799 	return best_type;
1800 }
1801 
1802 static int iommu_group_do_probe_finalize(struct device *dev, void *data)
1803 {
1804 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1805 
1806 	if (ops->probe_finalize)
1807 		ops->probe_finalize(dev);
1808 
1809 	return 0;
1810 }
1811 
1812 static void __iommu_group_dma_finalize(struct iommu_group *group)
1813 {
1814 	__iommu_group_for_each_dev(group, group->default_domain,
1815 				   iommu_group_do_probe_finalize);
1816 }
1817 
1818 static int iommu_do_create_direct_mappings(struct device *dev, void *data)
1819 {
1820 	struct iommu_group *group = data;
1821 
1822 	iommu_create_device_direct_mappings(group, dev);
1823 
1824 	return 0;
1825 }
1826 
1827 static int iommu_group_create_direct_mappings(struct iommu_group *group)
1828 {
1829 	return __iommu_group_for_each_dev(group, group,
1830 					  iommu_do_create_direct_mappings);
1831 }
1832 
1833 int bus_iommu_probe(const struct bus_type *bus)
1834 {
1835 	struct iommu_group *group, *next;
1836 	LIST_HEAD(group_list);
1837 	int ret;
1838 
1839 	/*
1840 	 * This code-path does not allocate the default domain when
1841 	 * creating the iommu group, so do it after the groups are
1842 	 * created.
1843 	 */
1844 	ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
1845 	if (ret)
1846 		return ret;
1847 
1848 	list_for_each_entry_safe(group, next, &group_list, entry) {
1849 		mutex_lock(&group->mutex);
1850 
1851 		/* Remove item from the list */
1852 		list_del_init(&group->entry);
1853 
1854 		/* Try to allocate default domain */
1855 		group->default_domain = iommu_group_alloc_default_domain(
1856 			group, iommu_get_default_domain_type(group, 0));
1857 		if (!group->default_domain) {
1858 			mutex_unlock(&group->mutex);
1859 			continue;
1860 		}
1861 
1862 		iommu_group_create_direct_mappings(group);
1863 
1864 		ret = __iommu_group_set_domain(group, group->default_domain);
1865 
1866 		mutex_unlock(&group->mutex);
1867 
1868 		if (ret)
1869 			break;
1870 
1871 		__iommu_group_dma_finalize(group);
1872 	}
1873 
1874 	return ret;
1875 }
1876 
1877 bool iommu_present(const struct bus_type *bus)
1878 {
1879 	return bus->iommu_ops != NULL;
1880 }
1881 EXPORT_SYMBOL_GPL(iommu_present);
1882 
1883 /**
1884  * device_iommu_capable() - check for a general IOMMU capability
1885  * @dev: device to which the capability would be relevant, if available
1886  * @cap: IOMMU capability
1887  *
1888  * Return: true if an IOMMU is present and supports the given capability
1889  * for the given device, otherwise false.
1890  */
1891 bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
1892 {
1893 	const struct iommu_ops *ops;
1894 
1895 	if (!dev->iommu || !dev->iommu->iommu_dev)
1896 		return false;
1897 
1898 	ops = dev_iommu_ops(dev);
1899 	if (!ops->capable)
1900 		return false;
1901 
1902 	return ops->capable(dev, cap);
1903 }
1904 EXPORT_SYMBOL_GPL(device_iommu_capable);
1905 
1906 /**
1907  * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
1908  *       for a group
1909  * @group: Group to query
1910  *
1911  * IOMMU groups should not have differing values of
1912  * msi_device_has_isolated_msi() for devices in a group. However nothing
1913  * directly prevents this, so ensure mistakes don't result in isolation failures
1914  * by checking that all the devices are the same.
1915  */
1916 bool iommu_group_has_isolated_msi(struct iommu_group *group)
1917 {
1918 	struct group_device *group_dev;
1919 	bool ret = true;
1920 
1921 	mutex_lock(&group->mutex);
1922 	for_each_group_device(group, group_dev)
1923 		ret &= msi_device_has_isolated_msi(group_dev->dev);
1924 	mutex_unlock(&group->mutex);
1925 	return ret;
1926 }
1927 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
1928 
1929 /**
1930  * iommu_set_fault_handler() - set a fault handler for an iommu domain
1931  * @domain: iommu domain
1932  * @handler: fault handler
1933  * @token: user data, will be passed back to the fault handler
1934  *
1935  * This function should be used by IOMMU users which want to be notified
1936  * whenever an IOMMU fault happens.
1937  *
1938  * The fault handler itself should return 0 on success, and an appropriate
1939  * error code otherwise.
1940  */
1941 void iommu_set_fault_handler(struct iommu_domain *domain,
1942 					iommu_fault_handler_t handler,
1943 					void *token)
1944 {
1945 	BUG_ON(!domain);
1946 
1947 	domain->handler = handler;
1948 	domain->handler_token = token;
1949 }
1950 EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
1951 
1952 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
1953 						 unsigned type)
1954 {
1955 	struct iommu_domain *domain;
1956 	unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
1957 
1958 	if (bus == NULL || bus->iommu_ops == NULL)
1959 		return NULL;
1960 
1961 	domain = bus->iommu_ops->domain_alloc(alloc_type);
1962 	if (!domain)
1963 		return NULL;
1964 
1965 	domain->type = type;
1966 	/*
1967 	 * If not already set, assume all sizes by default; the driver
1968 	 * may override this later
1969 	 */
1970 	if (!domain->pgsize_bitmap)
1971 		domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
1972 
1973 	if (!domain->ops)
1974 		domain->ops = bus->iommu_ops->default_domain_ops;
1975 
1976 	if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
1977 		iommu_domain_free(domain);
1978 		domain = NULL;
1979 	}
1980 	return domain;
1981 }
1982 
1983 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
1984 {
1985 	return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
1986 }
1987 EXPORT_SYMBOL_GPL(iommu_domain_alloc);
1988 
1989 void iommu_domain_free(struct iommu_domain *domain)
1990 {
1991 	if (domain->type == IOMMU_DOMAIN_SVA)
1992 		mmdrop(domain->mm);
1993 	iommu_put_dma_cookie(domain);
1994 	domain->ops->free(domain);
1995 }
1996 EXPORT_SYMBOL_GPL(iommu_domain_free);
1997 
1998 /*
1999  * Put the group's domain back to the appropriate core-owned domain - either the
2000  * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
2001  */
2002 static void __iommu_group_set_core_domain(struct iommu_group *group)
2003 {
2004 	struct iommu_domain *new_domain;
2005 
2006 	if (group->owner)
2007 		new_domain = group->blocking_domain;
2008 	else
2009 		new_domain = group->default_domain;
2010 
2011 	__iommu_group_set_domain_nofail(group, new_domain);
2012 }
2013 
2014 static int __iommu_attach_device(struct iommu_domain *domain,
2015 				 struct device *dev)
2016 {
2017 	int ret;
2018 
2019 	if (unlikely(domain->ops->attach_dev == NULL))
2020 		return -ENODEV;
2021 
2022 	ret = domain->ops->attach_dev(domain, dev);
2023 	if (ret)
2024 		return ret;
2025 	dev->iommu->attach_deferred = 0;
2026 	trace_attach_device_to_domain(dev);
2027 	return 0;
2028 }
2029 
2030 /**
2031  * iommu_attach_device - Attach an IOMMU domain to a device
2032  * @domain: IOMMU domain to attach
2033  * @dev: Device that will be attached
2034  *
2035  * Returns 0 on success and error code on failure
2036  *
2037  * Note that EINVAL can be treated as a soft failure, indicating
2038  * that certain configuration of the domain is incompatible with
2039  * the device. In this case attaching a different domain to the
2040  * device may succeed.
2041  */
2042 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
2043 {
2044 	struct iommu_group *group;
2045 	int ret;
2046 
2047 	group = iommu_group_get(dev);
2048 	if (!group)
2049 		return -ENODEV;
2050 
2051 	/*
2052 	 * Lock the group to make sure the device-count doesn't
2053 	 * change while we are attaching
2054 	 */
2055 	mutex_lock(&group->mutex);
2056 	ret = -EINVAL;
2057 	if (list_count_nodes(&group->devices) != 1)
2058 		goto out_unlock;
2059 
2060 	ret = __iommu_attach_group(domain, group);
2061 
2062 out_unlock:
2063 	mutex_unlock(&group->mutex);
2064 	iommu_group_put(group);
2065 
2066 	return ret;
2067 }
2068 EXPORT_SYMBOL_GPL(iommu_attach_device);
2069 
2070 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
2071 {
2072 	if (dev->iommu && dev->iommu->attach_deferred)
2073 		return __iommu_attach_device(domain, dev);
2074 
2075 	return 0;
2076 }
2077 
2078 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
2079 {
2080 	struct iommu_group *group;
2081 
2082 	group = iommu_group_get(dev);
2083 	if (!group)
2084 		return;
2085 
2086 	mutex_lock(&group->mutex);
2087 	if (WARN_ON(domain != group->domain) ||
2088 	    WARN_ON(list_count_nodes(&group->devices) != 1))
2089 		goto out_unlock;
2090 	__iommu_group_set_core_domain(group);
2091 
2092 out_unlock:
2093 	mutex_unlock(&group->mutex);
2094 	iommu_group_put(group);
2095 }
2096 EXPORT_SYMBOL_GPL(iommu_detach_device);
2097 
2098 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
2099 {
2100 	struct iommu_domain *domain;
2101 	struct iommu_group *group;
2102 
2103 	group = iommu_group_get(dev);
2104 	if (!group)
2105 		return NULL;
2106 
2107 	domain = group->domain;
2108 
2109 	iommu_group_put(group);
2110 
2111 	return domain;
2112 }
2113 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
2114 
2115 /*
2116  * For IOMMU_DOMAIN_DMA implementations which already provide their own
2117  * guarantees that the group and its default domain are valid and correct.
2118  */
2119 struct iommu_domain *iommu_get_dma_domain(struct device *dev)
2120 {
2121 	return dev->iommu_group->default_domain;
2122 }
2123 
2124 static int __iommu_attach_group(struct iommu_domain *domain,
2125 				struct iommu_group *group)
2126 {
2127 	if (group->domain && group->domain != group->default_domain &&
2128 	    group->domain != group->blocking_domain)
2129 		return -EBUSY;
2130 
2131 	return __iommu_group_set_domain(group, domain);
2132 }
2133 
2134 /**
2135  * iommu_attach_group - Attach an IOMMU domain to an IOMMU group
2136  * @domain: IOMMU domain to attach
2137  * @group: IOMMU group that will be attached
2138  *
2139  * Returns 0 on success and error code on failure
2140  *
2141  * Note that EINVAL can be treated as a soft failure, indicating
2142  * that certain configuration of the domain is incompatible with
2143  * the group. In this case attaching a different domain to the
2144  * group may succeed.
2145  */
2146 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
2147 {
2148 	int ret;
2149 
2150 	mutex_lock(&group->mutex);
2151 	ret = __iommu_attach_group(domain, group);
2152 	mutex_unlock(&group->mutex);
2153 
2154 	return ret;
2155 }
2156 EXPORT_SYMBOL_GPL(iommu_attach_group);
2157 
2158 static int __iommu_device_set_domain(struct iommu_group *group,
2159 				     struct device *dev,
2160 				     struct iommu_domain *new_domain,
2161 				     unsigned int flags)
2162 {
2163 	int ret;
2164 
2165 	if (dev->iommu->attach_deferred) {
2166 		if (new_domain == group->default_domain)
2167 			return 0;
2168 		dev->iommu->attach_deferred = 0;
2169 	}
2170 
2171 	ret = __iommu_attach_device(new_domain, dev);
2172 	if (ret) {
2173 		/*
2174 		 * If we have a blocking domain then try to attach that in hopes
2175 		 * of avoiding a UAF. Modern drivers should implement blocking
2176 		 * domains as global statics that cannot fail.
2177 		 */
2178 		if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
2179 		    group->blocking_domain &&
2180 		    group->blocking_domain != new_domain)
2181 			__iommu_attach_device(group->blocking_domain, dev);
2182 		return ret;
2183 	}
2184 	return 0;
2185 }
2186 
2187 /*
2188  * If 0 is returned the group's domain is new_domain. If an error is returned
2189  * then the group's domain will be set back to the existing domain unless
2190  * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
2191  * domains is left inconsistent. This is a driver bug to fail attach with a
2192  * previously good domain. We try to avoid a kernel UAF because of this.
2193  *
2194  * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
2195  * API works on domains and devices.  Bridge that gap by iterating over the
2196  * devices in a group.  Ideally we'd have a single device which represents the
2197  * requestor ID of the group, but we also allow IOMMU drivers to create policy
2198  * defined minimum sets, where the physical hardware may be able to distiguish
2199  * members, but we wish to group them at a higher level (ex. untrusted
2200  * multi-function PCI devices).  Thus we attach each device.
2201  */
2202 static int __iommu_group_set_domain_internal(struct iommu_group *group,
2203 					     struct iommu_domain *new_domain,
2204 					     unsigned int flags)
2205 {
2206 	struct group_device *last_gdev;
2207 	struct group_device *gdev;
2208 	int result;
2209 	int ret;
2210 
2211 	lockdep_assert_held(&group->mutex);
2212 
2213 	if (group->domain == new_domain)
2214 		return 0;
2215 
2216 	/*
2217 	 * New drivers should support default domains, so set_platform_dma()
2218 	 * op will never be called. Otherwise the NULL domain represents some
2219 	 * platform specific behavior.
2220 	 */
2221 	if (!new_domain) {
2222 		for_each_group_device(group, gdev) {
2223 			const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
2224 
2225 			if (!WARN_ON(!ops->set_platform_dma_ops))
2226 				ops->set_platform_dma_ops(gdev->dev);
2227 		}
2228 		group->domain = NULL;
2229 		return 0;
2230 	}
2231 
2232 	/*
2233 	 * Changing the domain is done by calling attach_dev() on the new
2234 	 * domain. This switch does not have to be atomic and DMA can be
2235 	 * discarded during the transition. DMA must only be able to access
2236 	 * either new_domain or group->domain, never something else.
2237 	 */
2238 	result = 0;
2239 	for_each_group_device(group, gdev) {
2240 		ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
2241 						flags);
2242 		if (ret) {
2243 			result = ret;
2244 			/*
2245 			 * Keep trying the other devices in the group. If a
2246 			 * driver fails attach to an otherwise good domain, and
2247 			 * does not support blocking domains, it should at least
2248 			 * drop its reference on the current domain so we don't
2249 			 * UAF.
2250 			 */
2251 			if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
2252 				continue;
2253 			goto err_revert;
2254 		}
2255 	}
2256 	group->domain = new_domain;
2257 	return result;
2258 
2259 err_revert:
2260 	/*
2261 	 * This is called in error unwind paths. A well behaved driver should
2262 	 * always allow us to attach to a domain that was already attached.
2263 	 */
2264 	last_gdev = gdev;
2265 	for_each_group_device(group, gdev) {
2266 		const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
2267 
2268 		/*
2269 		 * If set_platform_dma_ops is not present a NULL domain can
2270 		 * happen only for first probe, in which case we leave
2271 		 * group->domain as NULL and let release clean everything up.
2272 		 */
2273 		if (group->domain)
2274 			WARN_ON(__iommu_device_set_domain(
2275 				group, gdev->dev, group->domain,
2276 				IOMMU_SET_DOMAIN_MUST_SUCCEED));
2277 		else if (ops->set_platform_dma_ops)
2278 			ops->set_platform_dma_ops(gdev->dev);
2279 		if (gdev == last_gdev)
2280 			break;
2281 	}
2282 	return ret;
2283 }
2284 
2285 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
2286 {
2287 	mutex_lock(&group->mutex);
2288 	__iommu_group_set_core_domain(group);
2289 	mutex_unlock(&group->mutex);
2290 }
2291 EXPORT_SYMBOL_GPL(iommu_detach_group);
2292 
2293 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2294 {
2295 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2296 		return iova;
2297 
2298 	if (domain->type == IOMMU_DOMAIN_BLOCKED)
2299 		return 0;
2300 
2301 	return domain->ops->iova_to_phys(domain, iova);
2302 }
2303 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
2304 
2305 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
2306 			   phys_addr_t paddr, size_t size, size_t *count)
2307 {
2308 	unsigned int pgsize_idx, pgsize_idx_next;
2309 	unsigned long pgsizes;
2310 	size_t offset, pgsize, pgsize_next;
2311 	unsigned long addr_merge = paddr | iova;
2312 
2313 	/* Page sizes supported by the hardware and small enough for @size */
2314 	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
2315 
2316 	/* Constrain the page sizes further based on the maximum alignment */
2317 	if (likely(addr_merge))
2318 		pgsizes &= GENMASK(__ffs(addr_merge), 0);
2319 
2320 	/* Make sure we have at least one suitable page size */
2321 	BUG_ON(!pgsizes);
2322 
2323 	/* Pick the biggest page size remaining */
2324 	pgsize_idx = __fls(pgsizes);
2325 	pgsize = BIT(pgsize_idx);
2326 	if (!count)
2327 		return pgsize;
2328 
2329 	/* Find the next biggest support page size, if it exists */
2330 	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
2331 	if (!pgsizes)
2332 		goto out_set_count;
2333 
2334 	pgsize_idx_next = __ffs(pgsizes);
2335 	pgsize_next = BIT(pgsize_idx_next);
2336 
2337 	/*
2338 	 * There's no point trying a bigger page size unless the virtual
2339 	 * and physical addresses are similarly offset within the larger page.
2340 	 */
2341 	if ((iova ^ paddr) & (pgsize_next - 1))
2342 		goto out_set_count;
2343 
2344 	/* Calculate the offset to the next page size alignment boundary */
2345 	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
2346 
2347 	/*
2348 	 * If size is big enough to accommodate the larger page, reduce
2349 	 * the number of smaller pages.
2350 	 */
2351 	if (offset + pgsize_next <= size)
2352 		size = offset;
2353 
2354 out_set_count:
2355 	*count = size >> pgsize_idx;
2356 	return pgsize;
2357 }
2358 
2359 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
2360 			     phys_addr_t paddr, size_t size, int prot,
2361 			     gfp_t gfp, size_t *mapped)
2362 {
2363 	const struct iommu_domain_ops *ops = domain->ops;
2364 	size_t pgsize, count;
2365 	int ret;
2366 
2367 	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
2368 
2369 	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
2370 		 iova, &paddr, pgsize, count);
2371 
2372 	if (ops->map_pages) {
2373 		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
2374 				     gfp, mapped);
2375 	} else {
2376 		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
2377 		*mapped = ret ? 0 : pgsize;
2378 	}
2379 
2380 	return ret;
2381 }
2382 
2383 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
2384 		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2385 {
2386 	const struct iommu_domain_ops *ops = domain->ops;
2387 	unsigned long orig_iova = iova;
2388 	unsigned int min_pagesz;
2389 	size_t orig_size = size;
2390 	phys_addr_t orig_paddr = paddr;
2391 	int ret = 0;
2392 
2393 	if (unlikely(!(ops->map || ops->map_pages) ||
2394 		     domain->pgsize_bitmap == 0UL))
2395 		return -ENODEV;
2396 
2397 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2398 		return -EINVAL;
2399 
2400 	/* find out the minimum page size supported */
2401 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2402 
2403 	/*
2404 	 * both the virtual address and the physical one, as well as
2405 	 * the size of the mapping, must be aligned (at least) to the
2406 	 * size of the smallest page supported by the hardware
2407 	 */
2408 	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
2409 		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
2410 		       iova, &paddr, size, min_pagesz);
2411 		return -EINVAL;
2412 	}
2413 
2414 	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
2415 
2416 	while (size) {
2417 		size_t mapped = 0;
2418 
2419 		ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
2420 					&mapped);
2421 		/*
2422 		 * Some pages may have been mapped, even if an error occurred,
2423 		 * so we should account for those so they can be unmapped.
2424 		 */
2425 		size -= mapped;
2426 
2427 		if (ret)
2428 			break;
2429 
2430 		iova += mapped;
2431 		paddr += mapped;
2432 	}
2433 
2434 	/* unroll mapping in case something went wrong */
2435 	if (ret)
2436 		iommu_unmap(domain, orig_iova, orig_size - size);
2437 	else
2438 		trace_map(orig_iova, orig_paddr, orig_size);
2439 
2440 	return ret;
2441 }
2442 
2443 int iommu_map(struct iommu_domain *domain, unsigned long iova,
2444 	      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2445 {
2446 	const struct iommu_domain_ops *ops = domain->ops;
2447 	int ret;
2448 
2449 	might_sleep_if(gfpflags_allow_blocking(gfp));
2450 
2451 	/* Discourage passing strange GFP flags */
2452 	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
2453 				__GFP_HIGHMEM)))
2454 		return -EINVAL;
2455 
2456 	ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
2457 	if (ret == 0 && ops->iotlb_sync_map)
2458 		ops->iotlb_sync_map(domain, iova, size);
2459 
2460 	return ret;
2461 }
2462 EXPORT_SYMBOL_GPL(iommu_map);
2463 
2464 static size_t __iommu_unmap_pages(struct iommu_domain *domain,
2465 				  unsigned long iova, size_t size,
2466 				  struct iommu_iotlb_gather *iotlb_gather)
2467 {
2468 	const struct iommu_domain_ops *ops = domain->ops;
2469 	size_t pgsize, count;
2470 
2471 	pgsize = iommu_pgsize(domain, iova, iova, size, &count);
2472 	return ops->unmap_pages ?
2473 	       ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
2474 	       ops->unmap(domain, iova, pgsize, iotlb_gather);
2475 }
2476 
2477 static size_t __iommu_unmap(struct iommu_domain *domain,
2478 			    unsigned long iova, size_t size,
2479 			    struct iommu_iotlb_gather *iotlb_gather)
2480 {
2481 	const struct iommu_domain_ops *ops = domain->ops;
2482 	size_t unmapped_page, unmapped = 0;
2483 	unsigned long orig_iova = iova;
2484 	unsigned int min_pagesz;
2485 
2486 	if (unlikely(!(ops->unmap || ops->unmap_pages) ||
2487 		     domain->pgsize_bitmap == 0UL))
2488 		return 0;
2489 
2490 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2491 		return 0;
2492 
2493 	/* find out the minimum page size supported */
2494 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2495 
2496 	/*
2497 	 * The virtual address, as well as the size of the mapping, must be
2498 	 * aligned (at least) to the size of the smallest page supported
2499 	 * by the hardware
2500 	 */
2501 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
2502 		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
2503 		       iova, size, min_pagesz);
2504 		return 0;
2505 	}
2506 
2507 	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
2508 
2509 	/*
2510 	 * Keep iterating until we either unmap 'size' bytes (or more)
2511 	 * or we hit an area that isn't mapped.
2512 	 */
2513 	while (unmapped < size) {
2514 		unmapped_page = __iommu_unmap_pages(domain, iova,
2515 						    size - unmapped,
2516 						    iotlb_gather);
2517 		if (!unmapped_page)
2518 			break;
2519 
2520 		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
2521 			 iova, unmapped_page);
2522 
2523 		iova += unmapped_page;
2524 		unmapped += unmapped_page;
2525 	}
2526 
2527 	trace_unmap(orig_iova, size, unmapped);
2528 	return unmapped;
2529 }
2530 
2531 size_t iommu_unmap(struct iommu_domain *domain,
2532 		   unsigned long iova, size_t size)
2533 {
2534 	struct iommu_iotlb_gather iotlb_gather;
2535 	size_t ret;
2536 
2537 	iommu_iotlb_gather_init(&iotlb_gather);
2538 	ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
2539 	iommu_iotlb_sync(domain, &iotlb_gather);
2540 
2541 	return ret;
2542 }
2543 EXPORT_SYMBOL_GPL(iommu_unmap);
2544 
2545 size_t iommu_unmap_fast(struct iommu_domain *domain,
2546 			unsigned long iova, size_t size,
2547 			struct iommu_iotlb_gather *iotlb_gather)
2548 {
2549 	return __iommu_unmap(domain, iova, size, iotlb_gather);
2550 }
2551 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
2552 
2553 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
2554 		     struct scatterlist *sg, unsigned int nents, int prot,
2555 		     gfp_t gfp)
2556 {
2557 	const struct iommu_domain_ops *ops = domain->ops;
2558 	size_t len = 0, mapped = 0;
2559 	phys_addr_t start;
2560 	unsigned int i = 0;
2561 	int ret;
2562 
2563 	might_sleep_if(gfpflags_allow_blocking(gfp));
2564 
2565 	/* Discourage passing strange GFP flags */
2566 	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
2567 				__GFP_HIGHMEM)))
2568 		return -EINVAL;
2569 
2570 	while (i <= nents) {
2571 		phys_addr_t s_phys = sg_phys(sg);
2572 
2573 		if (len && s_phys != start + len) {
2574 			ret = __iommu_map(domain, iova + mapped, start,
2575 					len, prot, gfp);
2576 
2577 			if (ret)
2578 				goto out_err;
2579 
2580 			mapped += len;
2581 			len = 0;
2582 		}
2583 
2584 		if (sg_is_dma_bus_address(sg))
2585 			goto next;
2586 
2587 		if (len) {
2588 			len += sg->length;
2589 		} else {
2590 			len = sg->length;
2591 			start = s_phys;
2592 		}
2593 
2594 next:
2595 		if (++i < nents)
2596 			sg = sg_next(sg);
2597 	}
2598 
2599 	if (ops->iotlb_sync_map)
2600 		ops->iotlb_sync_map(domain, iova, mapped);
2601 	return mapped;
2602 
2603 out_err:
2604 	/* undo mappings already done */
2605 	iommu_unmap(domain, iova, mapped);
2606 
2607 	return ret;
2608 }
2609 EXPORT_SYMBOL_GPL(iommu_map_sg);
2610 
2611 /**
2612  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
2613  * @domain: the iommu domain where the fault has happened
2614  * @dev: the device where the fault has happened
2615  * @iova: the faulting address
2616  * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
2617  *
2618  * This function should be called by the low-level IOMMU implementations
2619  * whenever IOMMU faults happen, to allow high-level users, that are
2620  * interested in such events, to know about them.
2621  *
2622  * This event may be useful for several possible use cases:
2623  * - mere logging of the event
2624  * - dynamic TLB/PTE loading
2625  * - if restarting of the faulting device is required
2626  *
2627  * Returns 0 on success and an appropriate error code otherwise (if dynamic
2628  * PTE/TLB loading will one day be supported, implementations will be able
2629  * to tell whether it succeeded or not according to this return value).
2630  *
2631  * Specifically, -ENOSYS is returned if a fault handler isn't installed
2632  * (though fault handlers can also return -ENOSYS, in case they want to
2633  * elicit the default behavior of the IOMMU drivers).
2634  */
2635 int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
2636 		       unsigned long iova, int flags)
2637 {
2638 	int ret = -ENOSYS;
2639 
2640 	/*
2641 	 * if upper layers showed interest and installed a fault handler,
2642 	 * invoke it.
2643 	 */
2644 	if (domain->handler)
2645 		ret = domain->handler(domain, dev, iova, flags,
2646 						domain->handler_token);
2647 
2648 	trace_io_page_fault(dev, iova, flags);
2649 	return ret;
2650 }
2651 EXPORT_SYMBOL_GPL(report_iommu_fault);
2652 
2653 static int __init iommu_init(void)
2654 {
2655 	iommu_group_kset = kset_create_and_add("iommu_groups",
2656 					       NULL, kernel_kobj);
2657 	BUG_ON(!iommu_group_kset);
2658 
2659 	iommu_debugfs_setup();
2660 
2661 	return 0;
2662 }
2663 core_initcall(iommu_init);
2664 
2665 int iommu_enable_nesting(struct iommu_domain *domain)
2666 {
2667 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2668 		return -EINVAL;
2669 	if (!domain->ops->enable_nesting)
2670 		return -EINVAL;
2671 	return domain->ops->enable_nesting(domain);
2672 }
2673 EXPORT_SYMBOL_GPL(iommu_enable_nesting);
2674 
2675 int iommu_set_pgtable_quirks(struct iommu_domain *domain,
2676 		unsigned long quirk)
2677 {
2678 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2679 		return -EINVAL;
2680 	if (!domain->ops->set_pgtable_quirks)
2681 		return -EINVAL;
2682 	return domain->ops->set_pgtable_quirks(domain, quirk);
2683 }
2684 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
2685 
2686 void iommu_get_resv_regions(struct device *dev, struct list_head *list)
2687 {
2688 	const struct iommu_ops *ops = dev_iommu_ops(dev);
2689 
2690 	if (ops->get_resv_regions)
2691 		ops->get_resv_regions(dev, list);
2692 }
2693 
2694 /**
2695  * iommu_put_resv_regions - release resered regions
2696  * @dev: device for which to free reserved regions
2697  * @list: reserved region list for device
2698  *
2699  * This releases a reserved region list acquired by iommu_get_resv_regions().
2700  */
2701 void iommu_put_resv_regions(struct device *dev, struct list_head *list)
2702 {
2703 	struct iommu_resv_region *entry, *next;
2704 
2705 	list_for_each_entry_safe(entry, next, list, list) {
2706 		if (entry->free)
2707 			entry->free(dev, entry);
2708 		else
2709 			kfree(entry);
2710 	}
2711 }
2712 EXPORT_SYMBOL(iommu_put_resv_regions);
2713 
2714 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
2715 						  size_t length, int prot,
2716 						  enum iommu_resv_type type,
2717 						  gfp_t gfp)
2718 {
2719 	struct iommu_resv_region *region;
2720 
2721 	region = kzalloc(sizeof(*region), gfp);
2722 	if (!region)
2723 		return NULL;
2724 
2725 	INIT_LIST_HEAD(&region->list);
2726 	region->start = start;
2727 	region->length = length;
2728 	region->prot = prot;
2729 	region->type = type;
2730 	return region;
2731 }
2732 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
2733 
2734 void iommu_set_default_passthrough(bool cmd_line)
2735 {
2736 	if (cmd_line)
2737 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2738 	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
2739 }
2740 
2741 void iommu_set_default_translated(bool cmd_line)
2742 {
2743 	if (cmd_line)
2744 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2745 	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
2746 }
2747 
2748 bool iommu_default_passthrough(void)
2749 {
2750 	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
2751 }
2752 EXPORT_SYMBOL_GPL(iommu_default_passthrough);
2753 
2754 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
2755 {
2756 	const struct iommu_ops *ops = NULL;
2757 	struct iommu_device *iommu;
2758 
2759 	spin_lock(&iommu_device_lock);
2760 	list_for_each_entry(iommu, &iommu_device_list, list)
2761 		if (iommu->fwnode == fwnode) {
2762 			ops = iommu->ops;
2763 			break;
2764 		}
2765 	spin_unlock(&iommu_device_lock);
2766 	return ops;
2767 }
2768 
2769 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
2770 		      const struct iommu_ops *ops)
2771 {
2772 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2773 
2774 	if (fwspec)
2775 		return ops == fwspec->ops ? 0 : -EINVAL;
2776 
2777 	if (!dev_iommu_get(dev))
2778 		return -ENOMEM;
2779 
2780 	/* Preallocate for the overwhelmingly common case of 1 ID */
2781 	fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
2782 	if (!fwspec)
2783 		return -ENOMEM;
2784 
2785 	of_node_get(to_of_node(iommu_fwnode));
2786 	fwspec->iommu_fwnode = iommu_fwnode;
2787 	fwspec->ops = ops;
2788 	dev_iommu_fwspec_set(dev, fwspec);
2789 	return 0;
2790 }
2791 EXPORT_SYMBOL_GPL(iommu_fwspec_init);
2792 
2793 void iommu_fwspec_free(struct device *dev)
2794 {
2795 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2796 
2797 	if (fwspec) {
2798 		fwnode_handle_put(fwspec->iommu_fwnode);
2799 		kfree(fwspec);
2800 		dev_iommu_fwspec_set(dev, NULL);
2801 	}
2802 }
2803 EXPORT_SYMBOL_GPL(iommu_fwspec_free);
2804 
2805 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
2806 {
2807 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2808 	int i, new_num;
2809 
2810 	if (!fwspec)
2811 		return -EINVAL;
2812 
2813 	new_num = fwspec->num_ids + num_ids;
2814 	if (new_num > 1) {
2815 		fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
2816 				  GFP_KERNEL);
2817 		if (!fwspec)
2818 			return -ENOMEM;
2819 
2820 		dev_iommu_fwspec_set(dev, fwspec);
2821 	}
2822 
2823 	for (i = 0; i < num_ids; i++)
2824 		fwspec->ids[fwspec->num_ids + i] = ids[i];
2825 
2826 	fwspec->num_ids = new_num;
2827 	return 0;
2828 }
2829 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
2830 
2831 /*
2832  * Per device IOMMU features.
2833  */
2834 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
2835 {
2836 	if (dev->iommu && dev->iommu->iommu_dev) {
2837 		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
2838 
2839 		if (ops->dev_enable_feat)
2840 			return ops->dev_enable_feat(dev, feat);
2841 	}
2842 
2843 	return -ENODEV;
2844 }
2845 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
2846 
2847 /*
2848  * The device drivers should do the necessary cleanups before calling this.
2849  */
2850 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
2851 {
2852 	if (dev->iommu && dev->iommu->iommu_dev) {
2853 		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
2854 
2855 		if (ops->dev_disable_feat)
2856 			return ops->dev_disable_feat(dev, feat);
2857 	}
2858 
2859 	return -EBUSY;
2860 }
2861 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
2862 
2863 /*
2864  * Changes the default domain of an iommu group
2865  *
2866  * @group: The group for which the default domain should be changed
2867  * @dev: The first device in the group
2868  * @type: The type of the new default domain that gets associated with the group
2869  *
2870  * Returns 0 on success and error code on failure
2871  *
2872  * Note:
2873  * 1. Presently, this function is called only when user requests to change the
2874  *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
2875  *    Please take a closer look if intended to use for other purposes.
2876  */
2877 static int iommu_change_dev_def_domain(struct iommu_group *group,
2878 				       struct device *dev, int type)
2879 {
2880 	struct iommu_domain *prev_dom;
2881 	int ret;
2882 
2883 	lockdep_assert_held(&group->mutex);
2884 
2885 	prev_dom = group->default_domain;
2886 	type = iommu_get_default_domain_type(group, type);
2887 	if (type < 0)
2888 		return -EINVAL;
2889 
2890 	/*
2891 	 * Switch to a new domain only if the requested domain type is different
2892 	 * from the existing default domain type
2893 	 */
2894 	if (prev_dom->type == type)
2895 		return 0;
2896 
2897 	group->default_domain = NULL;
2898 	group->domain = NULL;
2899 
2900 	/* Sets group->default_domain to the newly allocated domain */
2901 	group->default_domain = iommu_group_alloc_default_domain(group, type);
2902 	if (!group->default_domain) {
2903 		ret = -EINVAL;
2904 		goto restore_old_domain;
2905 	}
2906 
2907 	group->domain = prev_dom;
2908 	ret = iommu_create_device_direct_mappings(group, dev);
2909 	if (ret)
2910 		goto free_new_domain;
2911 
2912 	ret = __iommu_group_set_domain(group, group->default_domain);
2913 	if (ret)
2914 		goto free_new_domain;
2915 
2916 	iommu_domain_free(prev_dom);
2917 
2918 	return 0;
2919 
2920 free_new_domain:
2921 	iommu_domain_free(group->default_domain);
2922 restore_old_domain:
2923 	group->default_domain = prev_dom;
2924 
2925 	return ret;
2926 }
2927 
2928 /*
2929  * Changing the default domain through sysfs requires the users to unbind the
2930  * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
2931  * transition. Return failure if this isn't met.
2932  *
2933  * We need to consider the race between this and the device release path.
2934  * group->mutex is used here to guarantee that the device release path
2935  * will not be entered at the same time.
2936  */
2937 static ssize_t iommu_group_store_type(struct iommu_group *group,
2938 				      const char *buf, size_t count)
2939 {
2940 	struct group_device *grp_dev;
2941 	struct device *dev;
2942 	int ret, req_type;
2943 
2944 	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
2945 		return -EACCES;
2946 
2947 	if (WARN_ON(!group) || !group->default_domain)
2948 		return -EINVAL;
2949 
2950 	if (sysfs_streq(buf, "identity"))
2951 		req_type = IOMMU_DOMAIN_IDENTITY;
2952 	else if (sysfs_streq(buf, "DMA"))
2953 		req_type = IOMMU_DOMAIN_DMA;
2954 	else if (sysfs_streq(buf, "DMA-FQ"))
2955 		req_type = IOMMU_DOMAIN_DMA_FQ;
2956 	else if (sysfs_streq(buf, "auto"))
2957 		req_type = 0;
2958 	else
2959 		return -EINVAL;
2960 
2961 	mutex_lock(&group->mutex);
2962 	/* We can bring up a flush queue without tearing down the domain. */
2963 	if (req_type == IOMMU_DOMAIN_DMA_FQ &&
2964 	    group->default_domain->type == IOMMU_DOMAIN_DMA) {
2965 		ret = iommu_dma_init_fq(group->default_domain);
2966 		if (!ret)
2967 			group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
2968 		mutex_unlock(&group->mutex);
2969 
2970 		return ret ?: count;
2971 	}
2972 
2973 	/* Otherwise, ensure that device exists and no driver is bound. */
2974 	if (list_empty(&group->devices) || group->owner_cnt) {
2975 		mutex_unlock(&group->mutex);
2976 		return -EPERM;
2977 	}
2978 
2979 	grp_dev = list_first_entry(&group->devices, struct group_device, list);
2980 	dev = grp_dev->dev;
2981 
2982 	ret = iommu_change_dev_def_domain(group, dev, req_type);
2983 
2984 	/*
2985 	 * Release the mutex here because ops->probe_finalize() call-back of
2986 	 * some vendor IOMMU drivers calls arm_iommu_attach_device() which
2987 	 * in-turn might call back into IOMMU core code, where it tries to take
2988 	 * group->mutex, resulting in a deadlock.
2989 	 */
2990 	mutex_unlock(&group->mutex);
2991 
2992 	/* Make sure dma_ops is appropriatley set */
2993 	if (!ret)
2994 		__iommu_group_dma_finalize(group);
2995 
2996 	return ret ?: count;
2997 }
2998 
2999 static bool iommu_is_default_domain(struct iommu_group *group)
3000 {
3001 	if (group->domain == group->default_domain)
3002 		return true;
3003 
3004 	/*
3005 	 * If the default domain was set to identity and it is still an identity
3006 	 * domain then we consider this a pass. This happens because of
3007 	 * amd_iommu_init_device() replacing the default idenytity domain with an
3008 	 * identity domain that has a different configuration for AMDGPU.
3009 	 */
3010 	if (group->default_domain &&
3011 	    group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
3012 	    group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
3013 		return true;
3014 	return false;
3015 }
3016 
3017 /**
3018  * iommu_device_use_default_domain() - Device driver wants to handle device
3019  *                                     DMA through the kernel DMA API.
3020  * @dev: The device.
3021  *
3022  * The device driver about to bind @dev wants to do DMA through the kernel
3023  * DMA API. Return 0 if it is allowed, otherwise an error.
3024  */
3025 int iommu_device_use_default_domain(struct device *dev)
3026 {
3027 	struct iommu_group *group = iommu_group_get(dev);
3028 	int ret = 0;
3029 
3030 	if (!group)
3031 		return 0;
3032 
3033 	mutex_lock(&group->mutex);
3034 	if (group->owner_cnt) {
3035 		if (group->owner || !iommu_is_default_domain(group) ||
3036 		    !xa_empty(&group->pasid_array)) {
3037 			ret = -EBUSY;
3038 			goto unlock_out;
3039 		}
3040 	}
3041 
3042 	group->owner_cnt++;
3043 
3044 unlock_out:
3045 	mutex_unlock(&group->mutex);
3046 	iommu_group_put(group);
3047 
3048 	return ret;
3049 }
3050 
3051 /**
3052  * iommu_device_unuse_default_domain() - Device driver stops handling device
3053  *                                       DMA through the kernel DMA API.
3054  * @dev: The device.
3055  *
3056  * The device driver doesn't want to do DMA through kernel DMA API anymore.
3057  * It must be called after iommu_device_use_default_domain().
3058  */
3059 void iommu_device_unuse_default_domain(struct device *dev)
3060 {
3061 	struct iommu_group *group = iommu_group_get(dev);
3062 
3063 	if (!group)
3064 		return;
3065 
3066 	mutex_lock(&group->mutex);
3067 	if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
3068 		group->owner_cnt--;
3069 
3070 	mutex_unlock(&group->mutex);
3071 	iommu_group_put(group);
3072 }
3073 
3074 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
3075 {
3076 	struct group_device *dev =
3077 		list_first_entry(&group->devices, struct group_device, list);
3078 
3079 	if (group->blocking_domain)
3080 		return 0;
3081 
3082 	group->blocking_domain =
3083 		__iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
3084 	if (!group->blocking_domain) {
3085 		/*
3086 		 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
3087 		 * create an empty domain instead.
3088 		 */
3089 		group->blocking_domain = __iommu_domain_alloc(
3090 			dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
3091 		if (!group->blocking_domain)
3092 			return -EINVAL;
3093 	}
3094 	return 0;
3095 }
3096 
3097 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
3098 {
3099 	int ret;
3100 
3101 	if ((group->domain && group->domain != group->default_domain) ||
3102 	    !xa_empty(&group->pasid_array))
3103 		return -EBUSY;
3104 
3105 	ret = __iommu_group_alloc_blocking_domain(group);
3106 	if (ret)
3107 		return ret;
3108 	ret = __iommu_group_set_domain(group, group->blocking_domain);
3109 	if (ret)
3110 		return ret;
3111 
3112 	group->owner = owner;
3113 	group->owner_cnt++;
3114 	return 0;
3115 }
3116 
3117 /**
3118  * iommu_group_claim_dma_owner() - Set DMA ownership of a group
3119  * @group: The group.
3120  * @owner: Caller specified pointer. Used for exclusive ownership.
3121  *
3122  * This is to support backward compatibility for vfio which manages the dma
3123  * ownership in iommu_group level. New invocations on this interface should be
3124  * prohibited. Only a single owner may exist for a group.
3125  */
3126 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
3127 {
3128 	int ret = 0;
3129 
3130 	if (WARN_ON(!owner))
3131 		return -EINVAL;
3132 
3133 	mutex_lock(&group->mutex);
3134 	if (group->owner_cnt) {
3135 		ret = -EPERM;
3136 		goto unlock_out;
3137 	}
3138 
3139 	ret = __iommu_take_dma_ownership(group, owner);
3140 unlock_out:
3141 	mutex_unlock(&group->mutex);
3142 
3143 	return ret;
3144 }
3145 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
3146 
3147 /**
3148  * iommu_device_claim_dma_owner() - Set DMA ownership of a device
3149  * @dev: The device.
3150  * @owner: Caller specified pointer. Used for exclusive ownership.
3151  *
3152  * Claim the DMA ownership of a device. Multiple devices in the same group may
3153  * concurrently claim ownership if they present the same owner value. Returns 0
3154  * on success and error code on failure
3155  */
3156 int iommu_device_claim_dma_owner(struct device *dev, void *owner)
3157 {
3158 	struct iommu_group *group;
3159 	int ret = 0;
3160 
3161 	if (WARN_ON(!owner))
3162 		return -EINVAL;
3163 
3164 	group = iommu_group_get(dev);
3165 	if (!group)
3166 		return -ENODEV;
3167 
3168 	mutex_lock(&group->mutex);
3169 	if (group->owner_cnt) {
3170 		if (group->owner != owner) {
3171 			ret = -EPERM;
3172 			goto unlock_out;
3173 		}
3174 		group->owner_cnt++;
3175 		goto unlock_out;
3176 	}
3177 
3178 	ret = __iommu_take_dma_ownership(group, owner);
3179 unlock_out:
3180 	mutex_unlock(&group->mutex);
3181 	iommu_group_put(group);
3182 
3183 	return ret;
3184 }
3185 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
3186 
3187 static void __iommu_release_dma_ownership(struct iommu_group *group)
3188 {
3189 	if (WARN_ON(!group->owner_cnt || !group->owner ||
3190 		    !xa_empty(&group->pasid_array)))
3191 		return;
3192 
3193 	group->owner_cnt = 0;
3194 	group->owner = NULL;
3195 	__iommu_group_set_domain_nofail(group, group->default_domain);
3196 }
3197 
3198 /**
3199  * iommu_group_release_dma_owner() - Release DMA ownership of a group
3200  * @dev: The device
3201  *
3202  * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
3203  */
3204 void iommu_group_release_dma_owner(struct iommu_group *group)
3205 {
3206 	mutex_lock(&group->mutex);
3207 	__iommu_release_dma_ownership(group);
3208 	mutex_unlock(&group->mutex);
3209 }
3210 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
3211 
3212 /**
3213  * iommu_device_release_dma_owner() - Release DMA ownership of a device
3214  * @group: The device.
3215  *
3216  * Release the DMA ownership claimed by iommu_device_claim_dma_owner().
3217  */
3218 void iommu_device_release_dma_owner(struct device *dev)
3219 {
3220 	struct iommu_group *group = iommu_group_get(dev);
3221 
3222 	mutex_lock(&group->mutex);
3223 	if (group->owner_cnt > 1)
3224 		group->owner_cnt--;
3225 	else
3226 		__iommu_release_dma_ownership(group);
3227 	mutex_unlock(&group->mutex);
3228 	iommu_group_put(group);
3229 }
3230 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
3231 
3232 /**
3233  * iommu_group_dma_owner_claimed() - Query group dma ownership status
3234  * @group: The group.
3235  *
3236  * This provides status query on a given group. It is racy and only for
3237  * non-binding status reporting.
3238  */
3239 bool iommu_group_dma_owner_claimed(struct iommu_group *group)
3240 {
3241 	unsigned int user;
3242 
3243 	mutex_lock(&group->mutex);
3244 	user = group->owner_cnt;
3245 	mutex_unlock(&group->mutex);
3246 
3247 	return user;
3248 }
3249 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
3250 
3251 static int __iommu_set_group_pasid(struct iommu_domain *domain,
3252 				   struct iommu_group *group, ioasid_t pasid)
3253 {
3254 	struct group_device *device;
3255 	int ret = 0;
3256 
3257 	for_each_group_device(group, device) {
3258 		ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
3259 		if (ret)
3260 			break;
3261 	}
3262 
3263 	return ret;
3264 }
3265 
3266 static void __iommu_remove_group_pasid(struct iommu_group *group,
3267 				       ioasid_t pasid)
3268 {
3269 	struct group_device *device;
3270 	const struct iommu_ops *ops;
3271 
3272 	for_each_group_device(group, device) {
3273 		ops = dev_iommu_ops(device->dev);
3274 		ops->remove_dev_pasid(device->dev, pasid);
3275 	}
3276 }
3277 
3278 /*
3279  * iommu_attach_device_pasid() - Attach a domain to pasid of device
3280  * @domain: the iommu domain.
3281  * @dev: the attached device.
3282  * @pasid: the pasid of the device.
3283  *
3284  * Return: 0 on success, or an error.
3285  */
3286 int iommu_attach_device_pasid(struct iommu_domain *domain,
3287 			      struct device *dev, ioasid_t pasid)
3288 {
3289 	struct iommu_group *group;
3290 	void *curr;
3291 	int ret;
3292 
3293 	if (!domain->ops->set_dev_pasid)
3294 		return -EOPNOTSUPP;
3295 
3296 	group = iommu_group_get(dev);
3297 	if (!group)
3298 		return -ENODEV;
3299 
3300 	mutex_lock(&group->mutex);
3301 	curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
3302 	if (curr) {
3303 		ret = xa_err(curr) ? : -EBUSY;
3304 		goto out_unlock;
3305 	}
3306 
3307 	ret = __iommu_set_group_pasid(domain, group, pasid);
3308 	if (ret) {
3309 		__iommu_remove_group_pasid(group, pasid);
3310 		xa_erase(&group->pasid_array, pasid);
3311 	}
3312 out_unlock:
3313 	mutex_unlock(&group->mutex);
3314 	iommu_group_put(group);
3315 
3316 	return ret;
3317 }
3318 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
3319 
3320 /*
3321  * iommu_detach_device_pasid() - Detach the domain from pasid of device
3322  * @domain: the iommu domain.
3323  * @dev: the attached device.
3324  * @pasid: the pasid of the device.
3325  *
3326  * The @domain must have been attached to @pasid of the @dev with
3327  * iommu_attach_device_pasid().
3328  */
3329 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
3330 			       ioasid_t pasid)
3331 {
3332 	struct iommu_group *group = iommu_group_get(dev);
3333 
3334 	mutex_lock(&group->mutex);
3335 	__iommu_remove_group_pasid(group, pasid);
3336 	WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
3337 	mutex_unlock(&group->mutex);
3338 
3339 	iommu_group_put(group);
3340 }
3341 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
3342 
3343 /*
3344  * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
3345  * @dev: the queried device
3346  * @pasid: the pasid of the device
3347  * @type: matched domain type, 0 for any match
3348  *
3349  * This is a variant of iommu_get_domain_for_dev(). It returns the existing
3350  * domain attached to pasid of a device. Callers must hold a lock around this
3351  * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
3352  * type is being manipulated. This API does not internally resolve races with
3353  * attach/detach.
3354  *
3355  * Return: attached domain on success, NULL otherwise.
3356  */
3357 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
3358 						    ioasid_t pasid,
3359 						    unsigned int type)
3360 {
3361 	struct iommu_domain *domain;
3362 	struct iommu_group *group;
3363 
3364 	group = iommu_group_get(dev);
3365 	if (!group)
3366 		return NULL;
3367 
3368 	xa_lock(&group->pasid_array);
3369 	domain = xa_load(&group->pasid_array, pasid);
3370 	if (type && domain && domain->type != type)
3371 		domain = ERR_PTR(-EBUSY);
3372 	xa_unlock(&group->pasid_array);
3373 	iommu_group_put(group);
3374 
3375 	return domain;
3376 }
3377 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
3378 
3379 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
3380 					    struct mm_struct *mm)
3381 {
3382 	const struct iommu_ops *ops = dev_iommu_ops(dev);
3383 	struct iommu_domain *domain;
3384 
3385 	domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
3386 	if (!domain)
3387 		return NULL;
3388 
3389 	domain->type = IOMMU_DOMAIN_SVA;
3390 	mmgrab(mm);
3391 	domain->mm = mm;
3392 	domain->iopf_handler = iommu_sva_handle_iopf;
3393 	domain->fault_data = mm;
3394 
3395 	return domain;
3396 }
3397