xref: /openbmc/linux/drivers/iommu/iommu.c (revision ab6cacf8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  */
6 
7 #define pr_fmt(fmt)    "iommu: " fmt
8 
9 #include <linux/amba/bus.h>
10 #include <linux/device.h>
11 #include <linux/kernel.h>
12 #include <linux/bits.h>
13 #include <linux/bug.h>
14 #include <linux/types.h>
15 #include <linux/init.h>
16 #include <linux/export.h>
17 #include <linux/slab.h>
18 #include <linux/errno.h>
19 #include <linux/host1x_context_bus.h>
20 #include <linux/iommu.h>
21 #include <linux/idr.h>
22 #include <linux/err.h>
23 #include <linux/pci.h>
24 #include <linux/pci-ats.h>
25 #include <linux/bitops.h>
26 #include <linux/platform_device.h>
27 #include <linux/property.h>
28 #include <linux/fsl/mc.h>
29 #include <linux/module.h>
30 #include <linux/cc_platform.h>
31 #include <linux/cdx/cdx_bus.h>
32 #include <trace/events/iommu.h>
33 #include <linux/sched/mm.h>
34 #include <linux/msi.h>
35 
36 #include "dma-iommu.h"
37 
38 #include "iommu-sva.h"
39 
40 static struct kset *iommu_group_kset;
41 static DEFINE_IDA(iommu_group_ida);
42 
43 static unsigned int iommu_def_domain_type __read_mostly;
44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
45 static u32 iommu_cmd_line __read_mostly;
46 
47 struct iommu_group {
48 	struct kobject kobj;
49 	struct kobject *devices_kobj;
50 	struct list_head devices;
51 	struct xarray pasid_array;
52 	struct mutex mutex;
53 	void *iommu_data;
54 	void (*iommu_data_release)(void *iommu_data);
55 	char *name;
56 	int id;
57 	struct iommu_domain *default_domain;
58 	struct iommu_domain *blocking_domain;
59 	struct iommu_domain *domain;
60 	struct list_head entry;
61 	unsigned int owner_cnt;
62 	void *owner;
63 };
64 
65 struct group_device {
66 	struct list_head list;
67 	struct device *dev;
68 	char *name;
69 };
70 
71 struct iommu_group_attribute {
72 	struct attribute attr;
73 	ssize_t (*show)(struct iommu_group *group, char *buf);
74 	ssize_t (*store)(struct iommu_group *group,
75 			 const char *buf, size_t count);
76 };
77 
78 static const char * const iommu_group_resv_type_string[] = {
79 	[IOMMU_RESV_DIRECT]			= "direct",
80 	[IOMMU_RESV_DIRECT_RELAXABLE]		= "direct-relaxable",
81 	[IOMMU_RESV_RESERVED]			= "reserved",
82 	[IOMMU_RESV_MSI]			= "msi",
83 	[IOMMU_RESV_SW_MSI]			= "msi",
84 };
85 
86 #define IOMMU_CMD_LINE_DMA_API		BIT(0)
87 #define IOMMU_CMD_LINE_STRICT		BIT(1)
88 
89 static int iommu_bus_notifier(struct notifier_block *nb,
90 			      unsigned long action, void *data);
91 static void iommu_release_device(struct device *dev);
92 static int iommu_alloc_default_domain(struct iommu_group *group,
93 				      struct device *dev);
94 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
95 						 unsigned type);
96 static int __iommu_attach_device(struct iommu_domain *domain,
97 				 struct device *dev);
98 static int __iommu_attach_group(struct iommu_domain *domain,
99 				struct iommu_group *group);
100 static int __iommu_group_set_domain(struct iommu_group *group,
101 				    struct iommu_domain *new_domain);
102 static int iommu_create_device_direct_mappings(struct iommu_group *group,
103 					       struct device *dev);
104 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
105 static ssize_t iommu_group_store_type(struct iommu_group *group,
106 				      const char *buf, size_t count);
107 
108 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
109 struct iommu_group_attribute iommu_group_attr_##_name =		\
110 	__ATTR(_name, _mode, _show, _store)
111 
112 #define to_iommu_group_attr(_attr)	\
113 	container_of(_attr, struct iommu_group_attribute, attr)
114 #define to_iommu_group(_kobj)		\
115 	container_of(_kobj, struct iommu_group, kobj)
116 
117 static LIST_HEAD(iommu_device_list);
118 static DEFINE_SPINLOCK(iommu_device_lock);
119 
120 static struct bus_type * const iommu_buses[] = {
121 	&platform_bus_type,
122 #ifdef CONFIG_PCI
123 	&pci_bus_type,
124 #endif
125 #ifdef CONFIG_ARM_AMBA
126 	&amba_bustype,
127 #endif
128 #ifdef CONFIG_FSL_MC_BUS
129 	&fsl_mc_bus_type,
130 #endif
131 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
132 	&host1x_context_device_bus_type,
133 #endif
134 #ifdef CONFIG_CDX_BUS
135 	&cdx_bus_type,
136 #endif
137 };
138 
139 /*
140  * Use a function instead of an array here because the domain-type is a
141  * bit-field, so an array would waste memory.
142  */
143 static const char *iommu_domain_type_str(unsigned int t)
144 {
145 	switch (t) {
146 	case IOMMU_DOMAIN_BLOCKED:
147 		return "Blocked";
148 	case IOMMU_DOMAIN_IDENTITY:
149 		return "Passthrough";
150 	case IOMMU_DOMAIN_UNMANAGED:
151 		return "Unmanaged";
152 	case IOMMU_DOMAIN_DMA:
153 	case IOMMU_DOMAIN_DMA_FQ:
154 		return "Translated";
155 	default:
156 		return "Unknown";
157 	}
158 }
159 
160 static int __init iommu_subsys_init(void)
161 {
162 	struct notifier_block *nb;
163 
164 	if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
165 		if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
166 			iommu_set_default_passthrough(false);
167 		else
168 			iommu_set_default_translated(false);
169 
170 		if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
171 			pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
172 			iommu_set_default_translated(false);
173 		}
174 	}
175 
176 	if (!iommu_default_passthrough() && !iommu_dma_strict)
177 		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
178 
179 	pr_info("Default domain type: %s %s\n",
180 		iommu_domain_type_str(iommu_def_domain_type),
181 		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
182 			"(set via kernel command line)" : "");
183 
184 	if (!iommu_default_passthrough())
185 		pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
186 			iommu_dma_strict ? "strict" : "lazy",
187 			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
188 				"(set via kernel command line)" : "");
189 
190 	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
191 	if (!nb)
192 		return -ENOMEM;
193 
194 	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
195 		nb[i].notifier_call = iommu_bus_notifier;
196 		bus_register_notifier(iommu_buses[i], &nb[i]);
197 	}
198 
199 	return 0;
200 }
201 subsys_initcall(iommu_subsys_init);
202 
203 static int remove_iommu_group(struct device *dev, void *data)
204 {
205 	if (dev->iommu && dev->iommu->iommu_dev == data)
206 		iommu_release_device(dev);
207 
208 	return 0;
209 }
210 
211 /**
212  * iommu_device_register() - Register an IOMMU hardware instance
213  * @iommu: IOMMU handle for the instance
214  * @ops:   IOMMU ops to associate with the instance
215  * @hwdev: (optional) actual instance device, used for fwnode lookup
216  *
217  * Return: 0 on success, or an error.
218  */
219 int iommu_device_register(struct iommu_device *iommu,
220 			  const struct iommu_ops *ops, struct device *hwdev)
221 {
222 	int err = 0;
223 
224 	/* We need to be able to take module references appropriately */
225 	if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
226 		return -EINVAL;
227 	/*
228 	 * Temporarily enforce global restriction to a single driver. This was
229 	 * already the de-facto behaviour, since any possible combination of
230 	 * existing drivers would compete for at least the PCI or platform bus.
231 	 */
232 	if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops)
233 		return -EBUSY;
234 
235 	iommu->ops = ops;
236 	if (hwdev)
237 		iommu->fwnode = dev_fwnode(hwdev);
238 
239 	spin_lock(&iommu_device_lock);
240 	list_add_tail(&iommu->list, &iommu_device_list);
241 	spin_unlock(&iommu_device_lock);
242 
243 	for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) {
244 		iommu_buses[i]->iommu_ops = ops;
245 		err = bus_iommu_probe(iommu_buses[i]);
246 	}
247 	if (err)
248 		iommu_device_unregister(iommu);
249 	return err;
250 }
251 EXPORT_SYMBOL_GPL(iommu_device_register);
252 
253 void iommu_device_unregister(struct iommu_device *iommu)
254 {
255 	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
256 		bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group);
257 
258 	spin_lock(&iommu_device_lock);
259 	list_del(&iommu->list);
260 	spin_unlock(&iommu_device_lock);
261 }
262 EXPORT_SYMBOL_GPL(iommu_device_unregister);
263 
264 static struct dev_iommu *dev_iommu_get(struct device *dev)
265 {
266 	struct dev_iommu *param = dev->iommu;
267 
268 	if (param)
269 		return param;
270 
271 	param = kzalloc(sizeof(*param), GFP_KERNEL);
272 	if (!param)
273 		return NULL;
274 
275 	mutex_init(&param->lock);
276 	dev->iommu = param;
277 	return param;
278 }
279 
280 static void dev_iommu_free(struct device *dev)
281 {
282 	struct dev_iommu *param = dev->iommu;
283 
284 	dev->iommu = NULL;
285 	if (param->fwspec) {
286 		fwnode_handle_put(param->fwspec->iommu_fwnode);
287 		kfree(param->fwspec);
288 	}
289 	kfree(param);
290 }
291 
292 static u32 dev_iommu_get_max_pasids(struct device *dev)
293 {
294 	u32 max_pasids = 0, bits = 0;
295 	int ret;
296 
297 	if (dev_is_pci(dev)) {
298 		ret = pci_max_pasids(to_pci_dev(dev));
299 		if (ret > 0)
300 			max_pasids = ret;
301 	} else {
302 		ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
303 		if (!ret)
304 			max_pasids = 1UL << bits;
305 	}
306 
307 	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
308 }
309 
310 static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
311 {
312 	const struct iommu_ops *ops = dev->bus->iommu_ops;
313 	struct iommu_device *iommu_dev;
314 	struct iommu_group *group;
315 	static DEFINE_MUTEX(iommu_probe_device_lock);
316 	int ret;
317 
318 	if (!ops)
319 		return -ENODEV;
320 	/*
321 	 * Serialise to avoid races between IOMMU drivers registering in
322 	 * parallel and/or the "replay" calls from ACPI/OF code via client
323 	 * driver probe. Once the latter have been cleaned up we should
324 	 * probably be able to use device_lock() here to minimise the scope,
325 	 * but for now enforcing a simple global ordering is fine.
326 	 */
327 	mutex_lock(&iommu_probe_device_lock);
328 	if (!dev_iommu_get(dev)) {
329 		ret = -ENOMEM;
330 		goto err_unlock;
331 	}
332 
333 	if (!try_module_get(ops->owner)) {
334 		ret = -EINVAL;
335 		goto err_free;
336 	}
337 
338 	iommu_dev = ops->probe_device(dev);
339 	if (IS_ERR(iommu_dev)) {
340 		ret = PTR_ERR(iommu_dev);
341 		goto out_module_put;
342 	}
343 
344 	dev->iommu->iommu_dev = iommu_dev;
345 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
346 
347 	group = iommu_group_get_for_dev(dev);
348 	if (IS_ERR(group)) {
349 		ret = PTR_ERR(group);
350 		goto out_release;
351 	}
352 
353 	mutex_lock(&group->mutex);
354 	if (group_list && !group->default_domain && list_empty(&group->entry))
355 		list_add_tail(&group->entry, group_list);
356 	mutex_unlock(&group->mutex);
357 	iommu_group_put(group);
358 
359 	mutex_unlock(&iommu_probe_device_lock);
360 	iommu_device_link(iommu_dev, dev);
361 
362 	return 0;
363 
364 out_release:
365 	if (ops->release_device)
366 		ops->release_device(dev);
367 
368 out_module_put:
369 	module_put(ops->owner);
370 
371 err_free:
372 	dev_iommu_free(dev);
373 
374 err_unlock:
375 	mutex_unlock(&iommu_probe_device_lock);
376 
377 	return ret;
378 }
379 
380 static bool iommu_is_attach_deferred(struct device *dev)
381 {
382 	const struct iommu_ops *ops = dev_iommu_ops(dev);
383 
384 	if (ops->is_attach_deferred)
385 		return ops->is_attach_deferred(dev);
386 
387 	return false;
388 }
389 
390 static int iommu_group_do_dma_first_attach(struct device *dev, void *data)
391 {
392 	struct iommu_domain *domain = data;
393 
394 	lockdep_assert_held(&dev->iommu_group->mutex);
395 
396 	if (iommu_is_attach_deferred(dev)) {
397 		dev->iommu->attach_deferred = 1;
398 		return 0;
399 	}
400 
401 	return __iommu_attach_device(domain, dev);
402 }
403 
404 int iommu_probe_device(struct device *dev)
405 {
406 	const struct iommu_ops *ops;
407 	struct iommu_group *group;
408 	int ret;
409 
410 	ret = __iommu_probe_device(dev, NULL);
411 	if (ret)
412 		goto err_out;
413 
414 	group = iommu_group_get(dev);
415 	if (!group) {
416 		ret = -ENODEV;
417 		goto err_release;
418 	}
419 
420 	/*
421 	 * Try to allocate a default domain - needs support from the
422 	 * IOMMU driver. There are still some drivers which don't
423 	 * support default domains, so the return value is not yet
424 	 * checked.
425 	 */
426 	mutex_lock(&group->mutex);
427 	iommu_alloc_default_domain(group, dev);
428 
429 	/*
430 	 * If device joined an existing group which has been claimed, don't
431 	 * attach the default domain.
432 	 */
433 	if (group->default_domain && !group->owner) {
434 		ret = iommu_group_do_dma_first_attach(dev, group->default_domain);
435 		if (ret) {
436 			mutex_unlock(&group->mutex);
437 			iommu_group_put(group);
438 			goto err_release;
439 		}
440 	}
441 
442 	iommu_create_device_direct_mappings(group, dev);
443 
444 	mutex_unlock(&group->mutex);
445 	iommu_group_put(group);
446 
447 	ops = dev_iommu_ops(dev);
448 	if (ops->probe_finalize)
449 		ops->probe_finalize(dev);
450 
451 	return 0;
452 
453 err_release:
454 	iommu_release_device(dev);
455 
456 err_out:
457 	return ret;
458 
459 }
460 
461 /*
462  * Remove a device from a group's device list and return the group device
463  * if successful.
464  */
465 static struct group_device *
466 __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
467 {
468 	struct group_device *device;
469 
470 	lockdep_assert_held(&group->mutex);
471 	list_for_each_entry(device, &group->devices, list) {
472 		if (device->dev == dev) {
473 			list_del(&device->list);
474 			return device;
475 		}
476 	}
477 
478 	return NULL;
479 }
480 
481 /*
482  * Release a device from its group and decrements the iommu group reference
483  * count.
484  */
485 static void __iommu_group_release_device(struct iommu_group *group,
486 					 struct group_device *grp_dev)
487 {
488 	struct device *dev = grp_dev->dev;
489 
490 	sysfs_remove_link(group->devices_kobj, grp_dev->name);
491 	sysfs_remove_link(&dev->kobj, "iommu_group");
492 
493 	trace_remove_device_from_group(group->id, dev);
494 
495 	kfree(grp_dev->name);
496 	kfree(grp_dev);
497 	dev->iommu_group = NULL;
498 	kobject_put(group->devices_kobj);
499 }
500 
501 static void iommu_release_device(struct device *dev)
502 {
503 	struct iommu_group *group = dev->iommu_group;
504 	struct group_device *device;
505 	const struct iommu_ops *ops;
506 
507 	if (!dev->iommu || !group)
508 		return;
509 
510 	iommu_device_unlink(dev->iommu->iommu_dev, dev);
511 
512 	mutex_lock(&group->mutex);
513 	device = __iommu_group_remove_device(group, dev);
514 
515 	/*
516 	 * If the group has become empty then ownership must have been released,
517 	 * and the current domain must be set back to NULL or the default
518 	 * domain.
519 	 */
520 	if (list_empty(&group->devices))
521 		WARN_ON(group->owner_cnt ||
522 			group->domain != group->default_domain);
523 
524 	/*
525 	 * release_device() must stop using any attached domain on the device.
526 	 * If there are still other devices in the group they are not effected
527 	 * by this callback.
528 	 *
529 	 * The IOMMU driver must set the device to either an identity or
530 	 * blocking translation and stop using any domain pointer, as it is
531 	 * going to be freed.
532 	 */
533 	ops = dev_iommu_ops(dev);
534 	if (ops->release_device)
535 		ops->release_device(dev);
536 	mutex_unlock(&group->mutex);
537 
538 	if (device)
539 		__iommu_group_release_device(group, device);
540 
541 	module_put(ops->owner);
542 	dev_iommu_free(dev);
543 }
544 
545 static int __init iommu_set_def_domain_type(char *str)
546 {
547 	bool pt;
548 	int ret;
549 
550 	ret = kstrtobool(str, &pt);
551 	if (ret)
552 		return ret;
553 
554 	if (pt)
555 		iommu_set_default_passthrough(true);
556 	else
557 		iommu_set_default_translated(true);
558 
559 	return 0;
560 }
561 early_param("iommu.passthrough", iommu_set_def_domain_type);
562 
563 static int __init iommu_dma_setup(char *str)
564 {
565 	int ret = kstrtobool(str, &iommu_dma_strict);
566 
567 	if (!ret)
568 		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
569 	return ret;
570 }
571 early_param("iommu.strict", iommu_dma_setup);
572 
573 void iommu_set_dma_strict(void)
574 {
575 	iommu_dma_strict = true;
576 	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
577 		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
578 }
579 
580 static ssize_t iommu_group_attr_show(struct kobject *kobj,
581 				     struct attribute *__attr, char *buf)
582 {
583 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
584 	struct iommu_group *group = to_iommu_group(kobj);
585 	ssize_t ret = -EIO;
586 
587 	if (attr->show)
588 		ret = attr->show(group, buf);
589 	return ret;
590 }
591 
592 static ssize_t iommu_group_attr_store(struct kobject *kobj,
593 				      struct attribute *__attr,
594 				      const char *buf, size_t count)
595 {
596 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
597 	struct iommu_group *group = to_iommu_group(kobj);
598 	ssize_t ret = -EIO;
599 
600 	if (attr->store)
601 		ret = attr->store(group, buf, count);
602 	return ret;
603 }
604 
605 static const struct sysfs_ops iommu_group_sysfs_ops = {
606 	.show = iommu_group_attr_show,
607 	.store = iommu_group_attr_store,
608 };
609 
610 static int iommu_group_create_file(struct iommu_group *group,
611 				   struct iommu_group_attribute *attr)
612 {
613 	return sysfs_create_file(&group->kobj, &attr->attr);
614 }
615 
616 static void iommu_group_remove_file(struct iommu_group *group,
617 				    struct iommu_group_attribute *attr)
618 {
619 	sysfs_remove_file(&group->kobj, &attr->attr);
620 }
621 
622 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
623 {
624 	return sysfs_emit(buf, "%s\n", group->name);
625 }
626 
627 /**
628  * iommu_insert_resv_region - Insert a new region in the
629  * list of reserved regions.
630  * @new: new region to insert
631  * @regions: list of regions
632  *
633  * Elements are sorted by start address and overlapping segments
634  * of the same type are merged.
635  */
636 static int iommu_insert_resv_region(struct iommu_resv_region *new,
637 				    struct list_head *regions)
638 {
639 	struct iommu_resv_region *iter, *tmp, *nr, *top;
640 	LIST_HEAD(stack);
641 
642 	nr = iommu_alloc_resv_region(new->start, new->length,
643 				     new->prot, new->type, GFP_KERNEL);
644 	if (!nr)
645 		return -ENOMEM;
646 
647 	/* First add the new element based on start address sorting */
648 	list_for_each_entry(iter, regions, list) {
649 		if (nr->start < iter->start ||
650 		    (nr->start == iter->start && nr->type <= iter->type))
651 			break;
652 	}
653 	list_add_tail(&nr->list, &iter->list);
654 
655 	/* Merge overlapping segments of type nr->type in @regions, if any */
656 	list_for_each_entry_safe(iter, tmp, regions, list) {
657 		phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
658 
659 		/* no merge needed on elements of different types than @new */
660 		if (iter->type != new->type) {
661 			list_move_tail(&iter->list, &stack);
662 			continue;
663 		}
664 
665 		/* look for the last stack element of same type as @iter */
666 		list_for_each_entry_reverse(top, &stack, list)
667 			if (top->type == iter->type)
668 				goto check_overlap;
669 
670 		list_move_tail(&iter->list, &stack);
671 		continue;
672 
673 check_overlap:
674 		top_end = top->start + top->length - 1;
675 
676 		if (iter->start > top_end + 1) {
677 			list_move_tail(&iter->list, &stack);
678 		} else {
679 			top->length = max(top_end, iter_end) - top->start + 1;
680 			list_del(&iter->list);
681 			kfree(iter);
682 		}
683 	}
684 	list_splice(&stack, regions);
685 	return 0;
686 }
687 
688 static int
689 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
690 				 struct list_head *group_resv_regions)
691 {
692 	struct iommu_resv_region *entry;
693 	int ret = 0;
694 
695 	list_for_each_entry(entry, dev_resv_regions, list) {
696 		ret = iommu_insert_resv_region(entry, group_resv_regions);
697 		if (ret)
698 			break;
699 	}
700 	return ret;
701 }
702 
703 int iommu_get_group_resv_regions(struct iommu_group *group,
704 				 struct list_head *head)
705 {
706 	struct group_device *device;
707 	int ret = 0;
708 
709 	mutex_lock(&group->mutex);
710 	list_for_each_entry(device, &group->devices, list) {
711 		struct list_head dev_resv_regions;
712 
713 		/*
714 		 * Non-API groups still expose reserved_regions in sysfs,
715 		 * so filter out calls that get here that way.
716 		 */
717 		if (!device->dev->iommu)
718 			break;
719 
720 		INIT_LIST_HEAD(&dev_resv_regions);
721 		iommu_get_resv_regions(device->dev, &dev_resv_regions);
722 		ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
723 		iommu_put_resv_regions(device->dev, &dev_resv_regions);
724 		if (ret)
725 			break;
726 	}
727 	mutex_unlock(&group->mutex);
728 	return ret;
729 }
730 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);
731 
732 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
733 					     char *buf)
734 {
735 	struct iommu_resv_region *region, *next;
736 	struct list_head group_resv_regions;
737 	int offset = 0;
738 
739 	INIT_LIST_HEAD(&group_resv_regions);
740 	iommu_get_group_resv_regions(group, &group_resv_regions);
741 
742 	list_for_each_entry_safe(region, next, &group_resv_regions, list) {
743 		offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n",
744 					(long long)region->start,
745 					(long long)(region->start +
746 						    region->length - 1),
747 					iommu_group_resv_type_string[region->type]);
748 		kfree(region);
749 	}
750 
751 	return offset;
752 }
753 
754 static ssize_t iommu_group_show_type(struct iommu_group *group,
755 				     char *buf)
756 {
757 	char *type = "unknown";
758 
759 	mutex_lock(&group->mutex);
760 	if (group->default_domain) {
761 		switch (group->default_domain->type) {
762 		case IOMMU_DOMAIN_BLOCKED:
763 			type = "blocked";
764 			break;
765 		case IOMMU_DOMAIN_IDENTITY:
766 			type = "identity";
767 			break;
768 		case IOMMU_DOMAIN_UNMANAGED:
769 			type = "unmanaged";
770 			break;
771 		case IOMMU_DOMAIN_DMA:
772 			type = "DMA";
773 			break;
774 		case IOMMU_DOMAIN_DMA_FQ:
775 			type = "DMA-FQ";
776 			break;
777 		}
778 	}
779 	mutex_unlock(&group->mutex);
780 
781 	return sysfs_emit(buf, "%s\n", type);
782 }
783 
784 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
785 
786 static IOMMU_GROUP_ATTR(reserved_regions, 0444,
787 			iommu_group_show_resv_regions, NULL);
788 
789 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
790 			iommu_group_store_type);
791 
792 static void iommu_group_release(struct kobject *kobj)
793 {
794 	struct iommu_group *group = to_iommu_group(kobj);
795 
796 	pr_debug("Releasing group %d\n", group->id);
797 
798 	if (group->iommu_data_release)
799 		group->iommu_data_release(group->iommu_data);
800 
801 	ida_free(&iommu_group_ida, group->id);
802 
803 	if (group->default_domain)
804 		iommu_domain_free(group->default_domain);
805 	if (group->blocking_domain)
806 		iommu_domain_free(group->blocking_domain);
807 
808 	kfree(group->name);
809 	kfree(group);
810 }
811 
812 static const struct kobj_type iommu_group_ktype = {
813 	.sysfs_ops = &iommu_group_sysfs_ops,
814 	.release = iommu_group_release,
815 };
816 
817 /**
818  * iommu_group_alloc - Allocate a new group
819  *
820  * This function is called by an iommu driver to allocate a new iommu
821  * group.  The iommu group represents the minimum granularity of the iommu.
822  * Upon successful return, the caller holds a reference to the supplied
823  * group in order to hold the group until devices are added.  Use
824  * iommu_group_put() to release this extra reference count, allowing the
825  * group to be automatically reclaimed once it has no devices or external
826  * references.
827  */
828 struct iommu_group *iommu_group_alloc(void)
829 {
830 	struct iommu_group *group;
831 	int ret;
832 
833 	group = kzalloc(sizeof(*group), GFP_KERNEL);
834 	if (!group)
835 		return ERR_PTR(-ENOMEM);
836 
837 	group->kobj.kset = iommu_group_kset;
838 	mutex_init(&group->mutex);
839 	INIT_LIST_HEAD(&group->devices);
840 	INIT_LIST_HEAD(&group->entry);
841 	xa_init(&group->pasid_array);
842 
843 	ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
844 	if (ret < 0) {
845 		kfree(group);
846 		return ERR_PTR(ret);
847 	}
848 	group->id = ret;
849 
850 	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
851 				   NULL, "%d", group->id);
852 	if (ret) {
853 		kobject_put(&group->kobj);
854 		return ERR_PTR(ret);
855 	}
856 
857 	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
858 	if (!group->devices_kobj) {
859 		kobject_put(&group->kobj); /* triggers .release & free */
860 		return ERR_PTR(-ENOMEM);
861 	}
862 
863 	/*
864 	 * The devices_kobj holds a reference on the group kobject, so
865 	 * as long as that exists so will the group.  We can therefore
866 	 * use the devices_kobj for reference counting.
867 	 */
868 	kobject_put(&group->kobj);
869 
870 	ret = iommu_group_create_file(group,
871 				      &iommu_group_attr_reserved_regions);
872 	if (ret) {
873 		kobject_put(group->devices_kobj);
874 		return ERR_PTR(ret);
875 	}
876 
877 	ret = iommu_group_create_file(group, &iommu_group_attr_type);
878 	if (ret) {
879 		kobject_put(group->devices_kobj);
880 		return ERR_PTR(ret);
881 	}
882 
883 	pr_debug("Allocated group %d\n", group->id);
884 
885 	return group;
886 }
887 EXPORT_SYMBOL_GPL(iommu_group_alloc);
888 
889 /**
890  * iommu_group_get_iommudata - retrieve iommu_data registered for a group
891  * @group: the group
892  *
893  * iommu drivers can store data in the group for use when doing iommu
894  * operations.  This function provides a way to retrieve it.  Caller
895  * should hold a group reference.
896  */
897 void *iommu_group_get_iommudata(struct iommu_group *group)
898 {
899 	return group->iommu_data;
900 }
901 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
902 
903 /**
904  * iommu_group_set_iommudata - set iommu_data for a group
905  * @group: the group
906  * @iommu_data: new data
907  * @release: release function for iommu_data
908  *
909  * iommu drivers can store data in the group for use when doing iommu
910  * operations.  This function provides a way to set the data after
911  * the group has been allocated.  Caller should hold a group reference.
912  */
913 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
914 			       void (*release)(void *iommu_data))
915 {
916 	group->iommu_data = iommu_data;
917 	group->iommu_data_release = release;
918 }
919 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
920 
921 /**
922  * iommu_group_set_name - set name for a group
923  * @group: the group
924  * @name: name
925  *
926  * Allow iommu driver to set a name for a group.  When set it will
927  * appear in a name attribute file under the group in sysfs.
928  */
929 int iommu_group_set_name(struct iommu_group *group, const char *name)
930 {
931 	int ret;
932 
933 	if (group->name) {
934 		iommu_group_remove_file(group, &iommu_group_attr_name);
935 		kfree(group->name);
936 		group->name = NULL;
937 		if (!name)
938 			return 0;
939 	}
940 
941 	group->name = kstrdup(name, GFP_KERNEL);
942 	if (!group->name)
943 		return -ENOMEM;
944 
945 	ret = iommu_group_create_file(group, &iommu_group_attr_name);
946 	if (ret) {
947 		kfree(group->name);
948 		group->name = NULL;
949 		return ret;
950 	}
951 
952 	return 0;
953 }
954 EXPORT_SYMBOL_GPL(iommu_group_set_name);
955 
956 static int iommu_create_device_direct_mappings(struct iommu_group *group,
957 					       struct device *dev)
958 {
959 	struct iommu_domain *domain = group->default_domain;
960 	struct iommu_resv_region *entry;
961 	struct list_head mappings;
962 	unsigned long pg_size;
963 	int ret = 0;
964 
965 	if (!domain || !iommu_is_dma_domain(domain))
966 		return 0;
967 
968 	BUG_ON(!domain->pgsize_bitmap);
969 
970 	pg_size = 1UL << __ffs(domain->pgsize_bitmap);
971 	INIT_LIST_HEAD(&mappings);
972 
973 	iommu_get_resv_regions(dev, &mappings);
974 
975 	/* We need to consider overlapping regions for different devices */
976 	list_for_each_entry(entry, &mappings, list) {
977 		dma_addr_t start, end, addr;
978 		size_t map_size = 0;
979 
980 		start = ALIGN(entry->start, pg_size);
981 		end   = ALIGN(entry->start + entry->length, pg_size);
982 
983 		if (entry->type != IOMMU_RESV_DIRECT &&
984 		    entry->type != IOMMU_RESV_DIRECT_RELAXABLE)
985 			continue;
986 
987 		for (addr = start; addr <= end; addr += pg_size) {
988 			phys_addr_t phys_addr;
989 
990 			if (addr == end)
991 				goto map_end;
992 
993 			phys_addr = iommu_iova_to_phys(domain, addr);
994 			if (!phys_addr) {
995 				map_size += pg_size;
996 				continue;
997 			}
998 
999 map_end:
1000 			if (map_size) {
1001 				ret = iommu_map(domain, addr - map_size,
1002 						addr - map_size, map_size,
1003 						entry->prot, GFP_KERNEL);
1004 				if (ret)
1005 					goto out;
1006 				map_size = 0;
1007 			}
1008 		}
1009 
1010 	}
1011 
1012 	iommu_flush_iotlb_all(domain);
1013 
1014 out:
1015 	iommu_put_resv_regions(dev, &mappings);
1016 
1017 	return ret;
1018 }
1019 
1020 /**
1021  * iommu_group_add_device - add a device to an iommu group
1022  * @group: the group into which to add the device (reference should be held)
1023  * @dev: the device
1024  *
1025  * This function is called by an iommu driver to add a device into a
1026  * group.  Adding a device increments the group reference count.
1027  */
1028 int iommu_group_add_device(struct iommu_group *group, struct device *dev)
1029 {
1030 	int ret, i = 0;
1031 	struct group_device *device;
1032 
1033 	device = kzalloc(sizeof(*device), GFP_KERNEL);
1034 	if (!device)
1035 		return -ENOMEM;
1036 
1037 	device->dev = dev;
1038 
1039 	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
1040 	if (ret)
1041 		goto err_free_device;
1042 
1043 	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
1044 rename:
1045 	if (!device->name) {
1046 		ret = -ENOMEM;
1047 		goto err_remove_link;
1048 	}
1049 
1050 	ret = sysfs_create_link_nowarn(group->devices_kobj,
1051 				       &dev->kobj, device->name);
1052 	if (ret) {
1053 		if (ret == -EEXIST && i >= 0) {
1054 			/*
1055 			 * Account for the slim chance of collision
1056 			 * and append an instance to the name.
1057 			 */
1058 			kfree(device->name);
1059 			device->name = kasprintf(GFP_KERNEL, "%s.%d",
1060 						 kobject_name(&dev->kobj), i++);
1061 			goto rename;
1062 		}
1063 		goto err_free_name;
1064 	}
1065 
1066 	kobject_get(group->devices_kobj);
1067 
1068 	dev->iommu_group = group;
1069 
1070 	mutex_lock(&group->mutex);
1071 	list_add_tail(&device->list, &group->devices);
1072 	if (group->domain)
1073 		ret = iommu_group_do_dma_first_attach(dev, group->domain);
1074 	mutex_unlock(&group->mutex);
1075 	if (ret)
1076 		goto err_put_group;
1077 
1078 	trace_add_device_to_group(group->id, dev);
1079 
1080 	dev_info(dev, "Adding to iommu group %d\n", group->id);
1081 
1082 	return 0;
1083 
1084 err_put_group:
1085 	mutex_lock(&group->mutex);
1086 	list_del(&device->list);
1087 	mutex_unlock(&group->mutex);
1088 	dev->iommu_group = NULL;
1089 	kobject_put(group->devices_kobj);
1090 	sysfs_remove_link(group->devices_kobj, device->name);
1091 err_free_name:
1092 	kfree(device->name);
1093 err_remove_link:
1094 	sysfs_remove_link(&dev->kobj, "iommu_group");
1095 err_free_device:
1096 	kfree(device);
1097 	dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
1098 	return ret;
1099 }
1100 EXPORT_SYMBOL_GPL(iommu_group_add_device);
1101 
1102 /**
1103  * iommu_group_remove_device - remove a device from it's current group
1104  * @dev: device to be removed
1105  *
1106  * This function is called by an iommu driver to remove the device from
1107  * it's current group.  This decrements the iommu group reference count.
1108  */
1109 void iommu_group_remove_device(struct device *dev)
1110 {
1111 	struct iommu_group *group = dev->iommu_group;
1112 	struct group_device *device;
1113 
1114 	if (!group)
1115 		return;
1116 
1117 	dev_info(dev, "Removing from iommu group %d\n", group->id);
1118 
1119 	mutex_lock(&group->mutex);
1120 	device = __iommu_group_remove_device(group, dev);
1121 	mutex_unlock(&group->mutex);
1122 
1123 	if (device)
1124 		__iommu_group_release_device(group, device);
1125 }
1126 EXPORT_SYMBOL_GPL(iommu_group_remove_device);
1127 
1128 static int iommu_group_device_count(struct iommu_group *group)
1129 {
1130 	struct group_device *entry;
1131 	int ret = 0;
1132 
1133 	list_for_each_entry(entry, &group->devices, list)
1134 		ret++;
1135 
1136 	return ret;
1137 }
1138 
1139 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
1140 				      int (*fn)(struct device *, void *))
1141 {
1142 	struct group_device *device;
1143 	int ret = 0;
1144 
1145 	list_for_each_entry(device, &group->devices, list) {
1146 		ret = fn(device->dev, data);
1147 		if (ret)
1148 			break;
1149 	}
1150 	return ret;
1151 }
1152 
1153 /**
1154  * iommu_group_for_each_dev - iterate over each device in the group
1155  * @group: the group
1156  * @data: caller opaque data to be passed to callback function
1157  * @fn: caller supplied callback function
1158  *
1159  * This function is called by group users to iterate over group devices.
1160  * Callers should hold a reference count to the group during callback.
1161  * The group->mutex is held across callbacks, which will block calls to
1162  * iommu_group_add/remove_device.
1163  */
1164 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
1165 			     int (*fn)(struct device *, void *))
1166 {
1167 	int ret;
1168 
1169 	mutex_lock(&group->mutex);
1170 	ret = __iommu_group_for_each_dev(group, data, fn);
1171 	mutex_unlock(&group->mutex);
1172 
1173 	return ret;
1174 }
1175 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
1176 
1177 /**
1178  * iommu_group_get - Return the group for a device and increment reference
1179  * @dev: get the group that this device belongs to
1180  *
1181  * This function is called by iommu drivers and users to get the group
1182  * for the specified device.  If found, the group is returned and the group
1183  * reference in incremented, else NULL.
1184  */
1185 struct iommu_group *iommu_group_get(struct device *dev)
1186 {
1187 	struct iommu_group *group = dev->iommu_group;
1188 
1189 	if (group)
1190 		kobject_get(group->devices_kobj);
1191 
1192 	return group;
1193 }
1194 EXPORT_SYMBOL_GPL(iommu_group_get);
1195 
1196 /**
1197  * iommu_group_ref_get - Increment reference on a group
1198  * @group: the group to use, must not be NULL
1199  *
1200  * This function is called by iommu drivers to take additional references on an
1201  * existing group.  Returns the given group for convenience.
1202  */
1203 struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
1204 {
1205 	kobject_get(group->devices_kobj);
1206 	return group;
1207 }
1208 EXPORT_SYMBOL_GPL(iommu_group_ref_get);
1209 
1210 /**
1211  * iommu_group_put - Decrement group reference
1212  * @group: the group to use
1213  *
1214  * This function is called by iommu drivers and users to release the
1215  * iommu group.  Once the reference count is zero, the group is released.
1216  */
1217 void iommu_group_put(struct iommu_group *group)
1218 {
1219 	if (group)
1220 		kobject_put(group->devices_kobj);
1221 }
1222 EXPORT_SYMBOL_GPL(iommu_group_put);
1223 
1224 /**
1225  * iommu_register_device_fault_handler() - Register a device fault handler
1226  * @dev: the device
1227  * @handler: the fault handler
1228  * @data: private data passed as argument to the handler
1229  *
1230  * When an IOMMU fault event is received, this handler gets called with the
1231  * fault event and data as argument. The handler should return 0 on success. If
1232  * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
1233  * complete the fault by calling iommu_page_response() with one of the following
1234  * response code:
1235  * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
1236  * - IOMMU_PAGE_RESP_INVALID: terminate the fault
1237  * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
1238  *   page faults if possible.
1239  *
1240  * Return 0 if the fault handler was installed successfully, or an error.
1241  */
1242 int iommu_register_device_fault_handler(struct device *dev,
1243 					iommu_dev_fault_handler_t handler,
1244 					void *data)
1245 {
1246 	struct dev_iommu *param = dev->iommu;
1247 	int ret = 0;
1248 
1249 	if (!param)
1250 		return -EINVAL;
1251 
1252 	mutex_lock(&param->lock);
1253 	/* Only allow one fault handler registered for each device */
1254 	if (param->fault_param) {
1255 		ret = -EBUSY;
1256 		goto done_unlock;
1257 	}
1258 
1259 	get_device(dev);
1260 	param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
1261 	if (!param->fault_param) {
1262 		put_device(dev);
1263 		ret = -ENOMEM;
1264 		goto done_unlock;
1265 	}
1266 	param->fault_param->handler = handler;
1267 	param->fault_param->data = data;
1268 	mutex_init(&param->fault_param->lock);
1269 	INIT_LIST_HEAD(&param->fault_param->faults);
1270 
1271 done_unlock:
1272 	mutex_unlock(&param->lock);
1273 
1274 	return ret;
1275 }
1276 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
1277 
1278 /**
1279  * iommu_unregister_device_fault_handler() - Unregister the device fault handler
1280  * @dev: the device
1281  *
1282  * Remove the device fault handler installed with
1283  * iommu_register_device_fault_handler().
1284  *
1285  * Return 0 on success, or an error.
1286  */
1287 int iommu_unregister_device_fault_handler(struct device *dev)
1288 {
1289 	struct dev_iommu *param = dev->iommu;
1290 	int ret = 0;
1291 
1292 	if (!param)
1293 		return -EINVAL;
1294 
1295 	mutex_lock(&param->lock);
1296 
1297 	if (!param->fault_param)
1298 		goto unlock;
1299 
1300 	/* we cannot unregister handler if there are pending faults */
1301 	if (!list_empty(&param->fault_param->faults)) {
1302 		ret = -EBUSY;
1303 		goto unlock;
1304 	}
1305 
1306 	kfree(param->fault_param);
1307 	param->fault_param = NULL;
1308 	put_device(dev);
1309 unlock:
1310 	mutex_unlock(&param->lock);
1311 
1312 	return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
1315 
1316 /**
1317  * iommu_report_device_fault() - Report fault event to device driver
1318  * @dev: the device
1319  * @evt: fault event data
1320  *
1321  * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
1322  * handler. When this function fails and the fault is recoverable, it is the
1323  * caller's responsibility to complete the fault.
1324  *
1325  * Return 0 on success, or an error.
1326  */
1327 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
1328 {
1329 	struct dev_iommu *param = dev->iommu;
1330 	struct iommu_fault_event *evt_pending = NULL;
1331 	struct iommu_fault_param *fparam;
1332 	int ret = 0;
1333 
1334 	if (!param || !evt)
1335 		return -EINVAL;
1336 
1337 	/* we only report device fault if there is a handler registered */
1338 	mutex_lock(&param->lock);
1339 	fparam = param->fault_param;
1340 	if (!fparam || !fparam->handler) {
1341 		ret = -EINVAL;
1342 		goto done_unlock;
1343 	}
1344 
1345 	if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
1346 	    (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
1347 		evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
1348 				      GFP_KERNEL);
1349 		if (!evt_pending) {
1350 			ret = -ENOMEM;
1351 			goto done_unlock;
1352 		}
1353 		mutex_lock(&fparam->lock);
1354 		list_add_tail(&evt_pending->list, &fparam->faults);
1355 		mutex_unlock(&fparam->lock);
1356 	}
1357 
1358 	ret = fparam->handler(&evt->fault, fparam->data);
1359 	if (ret && evt_pending) {
1360 		mutex_lock(&fparam->lock);
1361 		list_del(&evt_pending->list);
1362 		mutex_unlock(&fparam->lock);
1363 		kfree(evt_pending);
1364 	}
1365 done_unlock:
1366 	mutex_unlock(&param->lock);
1367 	return ret;
1368 }
1369 EXPORT_SYMBOL_GPL(iommu_report_device_fault);
1370 
1371 int iommu_page_response(struct device *dev,
1372 			struct iommu_page_response *msg)
1373 {
1374 	bool needs_pasid;
1375 	int ret = -EINVAL;
1376 	struct iommu_fault_event *evt;
1377 	struct iommu_fault_page_request *prm;
1378 	struct dev_iommu *param = dev->iommu;
1379 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1380 	bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
1381 
1382 	if (!ops->page_response)
1383 		return -ENODEV;
1384 
1385 	if (!param || !param->fault_param)
1386 		return -EINVAL;
1387 
1388 	if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
1389 	    msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
1390 		return -EINVAL;
1391 
1392 	/* Only send response if there is a fault report pending */
1393 	mutex_lock(&param->fault_param->lock);
1394 	if (list_empty(&param->fault_param->faults)) {
1395 		dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
1396 		goto done_unlock;
1397 	}
1398 	/*
1399 	 * Check if we have a matching page request pending to respond,
1400 	 * otherwise return -EINVAL
1401 	 */
1402 	list_for_each_entry(evt, &param->fault_param->faults, list) {
1403 		prm = &evt->fault.prm;
1404 		if (prm->grpid != msg->grpid)
1405 			continue;
1406 
1407 		/*
1408 		 * If the PASID is required, the corresponding request is
1409 		 * matched using the group ID, the PASID valid bit and the PASID
1410 		 * value. Otherwise only the group ID matches request and
1411 		 * response.
1412 		 */
1413 		needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
1414 		if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
1415 			continue;
1416 
1417 		if (!needs_pasid && has_pasid) {
1418 			/* No big deal, just clear it. */
1419 			msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
1420 			msg->pasid = 0;
1421 		}
1422 
1423 		ret = ops->page_response(dev, evt, msg);
1424 		list_del(&evt->list);
1425 		kfree(evt);
1426 		break;
1427 	}
1428 
1429 done_unlock:
1430 	mutex_unlock(&param->fault_param->lock);
1431 	return ret;
1432 }
1433 EXPORT_SYMBOL_GPL(iommu_page_response);
1434 
1435 /**
1436  * iommu_group_id - Return ID for a group
1437  * @group: the group to ID
1438  *
1439  * Return the unique ID for the group matching the sysfs group number.
1440  */
1441 int iommu_group_id(struct iommu_group *group)
1442 {
1443 	return group->id;
1444 }
1445 EXPORT_SYMBOL_GPL(iommu_group_id);
1446 
1447 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1448 					       unsigned long *devfns);
1449 
1450 /*
1451  * To consider a PCI device isolated, we require ACS to support Source
1452  * Validation, Request Redirection, Completer Redirection, and Upstream
1453  * Forwarding.  This effectively means that devices cannot spoof their
1454  * requester ID, requests and completions cannot be redirected, and all
1455  * transactions are forwarded upstream, even as it passes through a
1456  * bridge where the target device is downstream.
1457  */
1458 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
1459 
1460 /*
1461  * For multifunction devices which are not isolated from each other, find
1462  * all the other non-isolated functions and look for existing groups.  For
1463  * each function, we also need to look for aliases to or from other devices
1464  * that may already have a group.
1465  */
1466 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
1467 							unsigned long *devfns)
1468 {
1469 	struct pci_dev *tmp = NULL;
1470 	struct iommu_group *group;
1471 
1472 	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
1473 		return NULL;
1474 
1475 	for_each_pci_dev(tmp) {
1476 		if (tmp == pdev || tmp->bus != pdev->bus ||
1477 		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
1478 		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
1479 			continue;
1480 
1481 		group = get_pci_alias_group(tmp, devfns);
1482 		if (group) {
1483 			pci_dev_put(tmp);
1484 			return group;
1485 		}
1486 	}
1487 
1488 	return NULL;
1489 }
1490 
1491 /*
1492  * Look for aliases to or from the given device for existing groups. DMA
1493  * aliases are only supported on the same bus, therefore the search
1494  * space is quite small (especially since we're really only looking at pcie
1495  * device, and therefore only expect multiple slots on the root complex or
1496  * downstream switch ports).  It's conceivable though that a pair of
1497  * multifunction devices could have aliases between them that would cause a
1498  * loop.  To prevent this, we use a bitmap to track where we've been.
1499  */
1500 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1501 					       unsigned long *devfns)
1502 {
1503 	struct pci_dev *tmp = NULL;
1504 	struct iommu_group *group;
1505 
1506 	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
1507 		return NULL;
1508 
1509 	group = iommu_group_get(&pdev->dev);
1510 	if (group)
1511 		return group;
1512 
1513 	for_each_pci_dev(tmp) {
1514 		if (tmp == pdev || tmp->bus != pdev->bus)
1515 			continue;
1516 
1517 		/* We alias them or they alias us */
1518 		if (pci_devs_are_dma_aliases(pdev, tmp)) {
1519 			group = get_pci_alias_group(tmp, devfns);
1520 			if (group) {
1521 				pci_dev_put(tmp);
1522 				return group;
1523 			}
1524 
1525 			group = get_pci_function_alias_group(tmp, devfns);
1526 			if (group) {
1527 				pci_dev_put(tmp);
1528 				return group;
1529 			}
1530 		}
1531 	}
1532 
1533 	return NULL;
1534 }
1535 
1536 struct group_for_pci_data {
1537 	struct pci_dev *pdev;
1538 	struct iommu_group *group;
1539 };
1540 
1541 /*
1542  * DMA alias iterator callback, return the last seen device.  Stop and return
1543  * the IOMMU group if we find one along the way.
1544  */
1545 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
1546 {
1547 	struct group_for_pci_data *data = opaque;
1548 
1549 	data->pdev = pdev;
1550 	data->group = iommu_group_get(&pdev->dev);
1551 
1552 	return data->group != NULL;
1553 }
1554 
1555 /*
1556  * Generic device_group call-back function. It just allocates one
1557  * iommu-group per device.
1558  */
1559 struct iommu_group *generic_device_group(struct device *dev)
1560 {
1561 	return iommu_group_alloc();
1562 }
1563 EXPORT_SYMBOL_GPL(generic_device_group);
1564 
1565 /*
1566  * Use standard PCI bus topology, isolation features, and DMA alias quirks
1567  * to find or create an IOMMU group for a device.
1568  */
1569 struct iommu_group *pci_device_group(struct device *dev)
1570 {
1571 	struct pci_dev *pdev = to_pci_dev(dev);
1572 	struct group_for_pci_data data;
1573 	struct pci_bus *bus;
1574 	struct iommu_group *group = NULL;
1575 	u64 devfns[4] = { 0 };
1576 
1577 	if (WARN_ON(!dev_is_pci(dev)))
1578 		return ERR_PTR(-EINVAL);
1579 
1580 	/*
1581 	 * Find the upstream DMA alias for the device.  A device must not
1582 	 * be aliased due to topology in order to have its own IOMMU group.
1583 	 * If we find an alias along the way that already belongs to a
1584 	 * group, use it.
1585 	 */
1586 	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
1587 		return data.group;
1588 
1589 	pdev = data.pdev;
1590 
1591 	/*
1592 	 * Continue upstream from the point of minimum IOMMU granularity
1593 	 * due to aliases to the point where devices are protected from
1594 	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
1595 	 * group, use it.
1596 	 */
1597 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
1598 		if (!bus->self)
1599 			continue;
1600 
1601 		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
1602 			break;
1603 
1604 		pdev = bus->self;
1605 
1606 		group = iommu_group_get(&pdev->dev);
1607 		if (group)
1608 			return group;
1609 	}
1610 
1611 	/*
1612 	 * Look for existing groups on device aliases.  If we alias another
1613 	 * device or another device aliases us, use the same group.
1614 	 */
1615 	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
1616 	if (group)
1617 		return group;
1618 
1619 	/*
1620 	 * Look for existing groups on non-isolated functions on the same
1621 	 * slot and aliases of those funcions, if any.  No need to clear
1622 	 * the search bitmap, the tested devfns are still valid.
1623 	 */
1624 	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
1625 	if (group)
1626 		return group;
1627 
1628 	/* No shared group found, allocate new */
1629 	return iommu_group_alloc();
1630 }
1631 EXPORT_SYMBOL_GPL(pci_device_group);
1632 
1633 /* Get the IOMMU group for device on fsl-mc bus */
1634 struct iommu_group *fsl_mc_device_group(struct device *dev)
1635 {
1636 	struct device *cont_dev = fsl_mc_cont_dev(dev);
1637 	struct iommu_group *group;
1638 
1639 	group = iommu_group_get(cont_dev);
1640 	if (!group)
1641 		group = iommu_group_alloc();
1642 	return group;
1643 }
1644 EXPORT_SYMBOL_GPL(fsl_mc_device_group);
1645 
1646 static int iommu_get_def_domain_type(struct device *dev)
1647 {
1648 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1649 
1650 	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
1651 		return IOMMU_DOMAIN_DMA;
1652 
1653 	if (ops->def_domain_type)
1654 		return ops->def_domain_type(dev);
1655 
1656 	return 0;
1657 }
1658 
1659 static int iommu_group_alloc_default_domain(const struct bus_type *bus,
1660 					    struct iommu_group *group,
1661 					    unsigned int type)
1662 {
1663 	struct iommu_domain *dom;
1664 
1665 	dom = __iommu_domain_alloc(bus, type);
1666 	if (!dom && type != IOMMU_DOMAIN_DMA) {
1667 		dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
1668 		if (dom)
1669 			pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
1670 				type, group->name);
1671 	}
1672 
1673 	if (!dom)
1674 		return -ENOMEM;
1675 
1676 	group->default_domain = dom;
1677 	if (!group->domain)
1678 		group->domain = dom;
1679 	return 0;
1680 }
1681 
1682 static int iommu_alloc_default_domain(struct iommu_group *group,
1683 				      struct device *dev)
1684 {
1685 	unsigned int type;
1686 
1687 	if (group->default_domain)
1688 		return 0;
1689 
1690 	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
1691 
1692 	return iommu_group_alloc_default_domain(dev->bus, group, type);
1693 }
1694 
1695 /**
1696  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
1697  * @dev: target device
1698  *
1699  * This function is intended to be called by IOMMU drivers and extended to
1700  * support common, bus-defined algorithms when determining or creating the
1701  * IOMMU group for a device.  On success, the caller will hold a reference
1702  * to the returned IOMMU group, which will already include the provided
1703  * device.  The reference should be released with iommu_group_put().
1704  */
1705 static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
1706 {
1707 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1708 	struct iommu_group *group;
1709 	int ret;
1710 
1711 	group = iommu_group_get(dev);
1712 	if (group)
1713 		return group;
1714 
1715 	group = ops->device_group(dev);
1716 	if (WARN_ON_ONCE(group == NULL))
1717 		return ERR_PTR(-EINVAL);
1718 
1719 	if (IS_ERR(group))
1720 		return group;
1721 
1722 	ret = iommu_group_add_device(group, dev);
1723 	if (ret)
1724 		goto out_put_group;
1725 
1726 	return group;
1727 
1728 out_put_group:
1729 	iommu_group_put(group);
1730 
1731 	return ERR_PTR(ret);
1732 }
1733 
1734 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
1735 {
1736 	return group->default_domain;
1737 }
1738 
1739 static int probe_iommu_group(struct device *dev, void *data)
1740 {
1741 	struct list_head *group_list = data;
1742 	struct iommu_group *group;
1743 	int ret;
1744 
1745 	/* Device is probed already if in a group */
1746 	group = iommu_group_get(dev);
1747 	if (group) {
1748 		iommu_group_put(group);
1749 		return 0;
1750 	}
1751 
1752 	ret = __iommu_probe_device(dev, group_list);
1753 	if (ret == -ENODEV)
1754 		ret = 0;
1755 
1756 	return ret;
1757 }
1758 
1759 static int iommu_bus_notifier(struct notifier_block *nb,
1760 			      unsigned long action, void *data)
1761 {
1762 	struct device *dev = data;
1763 
1764 	if (action == BUS_NOTIFY_ADD_DEVICE) {
1765 		int ret;
1766 
1767 		ret = iommu_probe_device(dev);
1768 		return (ret) ? NOTIFY_DONE : NOTIFY_OK;
1769 	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
1770 		iommu_release_device(dev);
1771 		return NOTIFY_OK;
1772 	}
1773 
1774 	return 0;
1775 }
1776 
1777 struct __group_domain_type {
1778 	struct device *dev;
1779 	unsigned int type;
1780 };
1781 
1782 static int probe_get_default_domain_type(struct device *dev, void *data)
1783 {
1784 	struct __group_domain_type *gtype = data;
1785 	unsigned int type = iommu_get_def_domain_type(dev);
1786 
1787 	if (type) {
1788 		if (gtype->type && gtype->type != type) {
1789 			dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
1790 				 iommu_domain_type_str(type),
1791 				 dev_name(gtype->dev),
1792 				 iommu_domain_type_str(gtype->type));
1793 			gtype->type = 0;
1794 		}
1795 
1796 		if (!gtype->dev) {
1797 			gtype->dev  = dev;
1798 			gtype->type = type;
1799 		}
1800 	}
1801 
1802 	return 0;
1803 }
1804 
1805 static void probe_alloc_default_domain(const struct bus_type *bus,
1806 				       struct iommu_group *group)
1807 {
1808 	struct __group_domain_type gtype;
1809 
1810 	memset(&gtype, 0, sizeof(gtype));
1811 
1812 	/* Ask for default domain requirements of all devices in the group */
1813 	__iommu_group_for_each_dev(group, &gtype,
1814 				   probe_get_default_domain_type);
1815 
1816 	if (!gtype.type)
1817 		gtype.type = iommu_def_domain_type;
1818 
1819 	iommu_group_alloc_default_domain(bus, group, gtype.type);
1820 
1821 }
1822 
1823 static int __iommu_group_dma_first_attach(struct iommu_group *group)
1824 {
1825 	return __iommu_group_for_each_dev(group, group->default_domain,
1826 					  iommu_group_do_dma_first_attach);
1827 }
1828 
1829 static int iommu_group_do_probe_finalize(struct device *dev, void *data)
1830 {
1831 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1832 
1833 	if (ops->probe_finalize)
1834 		ops->probe_finalize(dev);
1835 
1836 	return 0;
1837 }
1838 
1839 static void __iommu_group_dma_finalize(struct iommu_group *group)
1840 {
1841 	__iommu_group_for_each_dev(group, group->default_domain,
1842 				   iommu_group_do_probe_finalize);
1843 }
1844 
1845 static int iommu_do_create_direct_mappings(struct device *dev, void *data)
1846 {
1847 	struct iommu_group *group = data;
1848 
1849 	iommu_create_device_direct_mappings(group, dev);
1850 
1851 	return 0;
1852 }
1853 
1854 static int iommu_group_create_direct_mappings(struct iommu_group *group)
1855 {
1856 	return __iommu_group_for_each_dev(group, group,
1857 					  iommu_do_create_direct_mappings);
1858 }
1859 
1860 int bus_iommu_probe(const struct bus_type *bus)
1861 {
1862 	struct iommu_group *group, *next;
1863 	LIST_HEAD(group_list);
1864 	int ret;
1865 
1866 	/*
1867 	 * This code-path does not allocate the default domain when
1868 	 * creating the iommu group, so do it after the groups are
1869 	 * created.
1870 	 */
1871 	ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
1872 	if (ret)
1873 		return ret;
1874 
1875 	list_for_each_entry_safe(group, next, &group_list, entry) {
1876 		mutex_lock(&group->mutex);
1877 
1878 		/* Remove item from the list */
1879 		list_del_init(&group->entry);
1880 
1881 		/* Try to allocate default domain */
1882 		probe_alloc_default_domain(bus, group);
1883 
1884 		if (!group->default_domain) {
1885 			mutex_unlock(&group->mutex);
1886 			continue;
1887 		}
1888 
1889 		iommu_group_create_direct_mappings(group);
1890 
1891 		ret = __iommu_group_dma_first_attach(group);
1892 
1893 		mutex_unlock(&group->mutex);
1894 
1895 		if (ret)
1896 			break;
1897 
1898 		__iommu_group_dma_finalize(group);
1899 	}
1900 
1901 	return ret;
1902 }
1903 
1904 bool iommu_present(const struct bus_type *bus)
1905 {
1906 	return bus->iommu_ops != NULL;
1907 }
1908 EXPORT_SYMBOL_GPL(iommu_present);
1909 
1910 /**
1911  * device_iommu_capable() - check for a general IOMMU capability
1912  * @dev: device to which the capability would be relevant, if available
1913  * @cap: IOMMU capability
1914  *
1915  * Return: true if an IOMMU is present and supports the given capability
1916  * for the given device, otherwise false.
1917  */
1918 bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
1919 {
1920 	const struct iommu_ops *ops;
1921 
1922 	if (!dev->iommu || !dev->iommu->iommu_dev)
1923 		return false;
1924 
1925 	ops = dev_iommu_ops(dev);
1926 	if (!ops->capable)
1927 		return false;
1928 
1929 	return ops->capable(dev, cap);
1930 }
1931 EXPORT_SYMBOL_GPL(device_iommu_capable);
1932 
1933 /**
1934  * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
1935  *       for a group
1936  * @group: Group to query
1937  *
1938  * IOMMU groups should not have differing values of
1939  * msi_device_has_isolated_msi() for devices in a group. However nothing
1940  * directly prevents this, so ensure mistakes don't result in isolation failures
1941  * by checking that all the devices are the same.
1942  */
1943 bool iommu_group_has_isolated_msi(struct iommu_group *group)
1944 {
1945 	struct group_device *group_dev;
1946 	bool ret = true;
1947 
1948 	mutex_lock(&group->mutex);
1949 	list_for_each_entry(group_dev, &group->devices, list)
1950 		ret &= msi_device_has_isolated_msi(group_dev->dev);
1951 	mutex_unlock(&group->mutex);
1952 	return ret;
1953 }
1954 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
1955 
1956 /**
1957  * iommu_set_fault_handler() - set a fault handler for an iommu domain
1958  * @domain: iommu domain
1959  * @handler: fault handler
1960  * @token: user data, will be passed back to the fault handler
1961  *
1962  * This function should be used by IOMMU users which want to be notified
1963  * whenever an IOMMU fault happens.
1964  *
1965  * The fault handler itself should return 0 on success, and an appropriate
1966  * error code otherwise.
1967  */
1968 void iommu_set_fault_handler(struct iommu_domain *domain,
1969 					iommu_fault_handler_t handler,
1970 					void *token)
1971 {
1972 	BUG_ON(!domain);
1973 
1974 	domain->handler = handler;
1975 	domain->handler_token = token;
1976 }
1977 EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
1978 
1979 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
1980 						 unsigned type)
1981 {
1982 	struct iommu_domain *domain;
1983 
1984 	if (bus == NULL || bus->iommu_ops == NULL)
1985 		return NULL;
1986 
1987 	domain = bus->iommu_ops->domain_alloc(type);
1988 	if (!domain)
1989 		return NULL;
1990 
1991 	domain->type = type;
1992 	/*
1993 	 * If not already set, assume all sizes by default; the driver
1994 	 * may override this later
1995 	 */
1996 	if (!domain->pgsize_bitmap)
1997 		domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
1998 
1999 	if (!domain->ops)
2000 		domain->ops = bus->iommu_ops->default_domain_ops;
2001 
2002 	if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
2003 		iommu_domain_free(domain);
2004 		domain = NULL;
2005 	}
2006 	return domain;
2007 }
2008 
2009 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
2010 {
2011 	return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
2012 }
2013 EXPORT_SYMBOL_GPL(iommu_domain_alloc);
2014 
2015 void iommu_domain_free(struct iommu_domain *domain)
2016 {
2017 	if (domain->type == IOMMU_DOMAIN_SVA)
2018 		mmdrop(domain->mm);
2019 	iommu_put_dma_cookie(domain);
2020 	domain->ops->free(domain);
2021 }
2022 EXPORT_SYMBOL_GPL(iommu_domain_free);
2023 
2024 /*
2025  * Put the group's domain back to the appropriate core-owned domain - either the
2026  * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
2027  */
2028 static void __iommu_group_set_core_domain(struct iommu_group *group)
2029 {
2030 	struct iommu_domain *new_domain;
2031 	int ret;
2032 
2033 	if (group->owner)
2034 		new_domain = group->blocking_domain;
2035 	else
2036 		new_domain = group->default_domain;
2037 
2038 	ret = __iommu_group_set_domain(group, new_domain);
2039 	WARN(ret, "iommu driver failed to attach the default/blocking domain");
2040 }
2041 
2042 static int __iommu_attach_device(struct iommu_domain *domain,
2043 				 struct device *dev)
2044 {
2045 	int ret;
2046 
2047 	if (unlikely(domain->ops->attach_dev == NULL))
2048 		return -ENODEV;
2049 
2050 	ret = domain->ops->attach_dev(domain, dev);
2051 	if (ret)
2052 		return ret;
2053 	dev->iommu->attach_deferred = 0;
2054 	trace_attach_device_to_domain(dev);
2055 	return 0;
2056 }
2057 
2058 /**
2059  * iommu_attach_device - Attach an IOMMU domain to a device
2060  * @domain: IOMMU domain to attach
2061  * @dev: Device that will be attached
2062  *
2063  * Returns 0 on success and error code on failure
2064  *
2065  * Note that EINVAL can be treated as a soft failure, indicating
2066  * that certain configuration of the domain is incompatible with
2067  * the device. In this case attaching a different domain to the
2068  * device may succeed.
2069  */
2070 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
2071 {
2072 	struct iommu_group *group;
2073 	int ret;
2074 
2075 	group = iommu_group_get(dev);
2076 	if (!group)
2077 		return -ENODEV;
2078 
2079 	/*
2080 	 * Lock the group to make sure the device-count doesn't
2081 	 * change while we are attaching
2082 	 */
2083 	mutex_lock(&group->mutex);
2084 	ret = -EINVAL;
2085 	if (iommu_group_device_count(group) != 1)
2086 		goto out_unlock;
2087 
2088 	ret = __iommu_attach_group(domain, group);
2089 
2090 out_unlock:
2091 	mutex_unlock(&group->mutex);
2092 	iommu_group_put(group);
2093 
2094 	return ret;
2095 }
2096 EXPORT_SYMBOL_GPL(iommu_attach_device);
2097 
2098 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
2099 {
2100 	if (dev->iommu && dev->iommu->attach_deferred)
2101 		return __iommu_attach_device(domain, dev);
2102 
2103 	return 0;
2104 }
2105 
2106 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
2107 {
2108 	struct iommu_group *group;
2109 
2110 	group = iommu_group_get(dev);
2111 	if (!group)
2112 		return;
2113 
2114 	mutex_lock(&group->mutex);
2115 	if (WARN_ON(domain != group->domain) ||
2116 	    WARN_ON(iommu_group_device_count(group) != 1))
2117 		goto out_unlock;
2118 	__iommu_group_set_core_domain(group);
2119 
2120 out_unlock:
2121 	mutex_unlock(&group->mutex);
2122 	iommu_group_put(group);
2123 }
2124 EXPORT_SYMBOL_GPL(iommu_detach_device);
2125 
2126 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
2127 {
2128 	struct iommu_domain *domain;
2129 	struct iommu_group *group;
2130 
2131 	group = iommu_group_get(dev);
2132 	if (!group)
2133 		return NULL;
2134 
2135 	domain = group->domain;
2136 
2137 	iommu_group_put(group);
2138 
2139 	return domain;
2140 }
2141 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
2142 
2143 /*
2144  * For IOMMU_DOMAIN_DMA implementations which already provide their own
2145  * guarantees that the group and its default domain are valid and correct.
2146  */
2147 struct iommu_domain *iommu_get_dma_domain(struct device *dev)
2148 {
2149 	return dev->iommu_group->default_domain;
2150 }
2151 
2152 /*
2153  * IOMMU groups are really the natural working unit of the IOMMU, but
2154  * the IOMMU API works on domains and devices.  Bridge that gap by
2155  * iterating over the devices in a group.  Ideally we'd have a single
2156  * device which represents the requestor ID of the group, but we also
2157  * allow IOMMU drivers to create policy defined minimum sets, where
2158  * the physical hardware may be able to distiguish members, but we
2159  * wish to group them at a higher level (ex. untrusted multi-function
2160  * PCI devices).  Thus we attach each device.
2161  */
2162 static int iommu_group_do_attach_device(struct device *dev, void *data)
2163 {
2164 	struct iommu_domain *domain = data;
2165 
2166 	return __iommu_attach_device(domain, dev);
2167 }
2168 
2169 static int __iommu_attach_group(struct iommu_domain *domain,
2170 				struct iommu_group *group)
2171 {
2172 	int ret;
2173 
2174 	if (group->domain && group->domain != group->default_domain &&
2175 	    group->domain != group->blocking_domain)
2176 		return -EBUSY;
2177 
2178 	ret = __iommu_group_for_each_dev(group, domain,
2179 					 iommu_group_do_attach_device);
2180 	if (ret == 0) {
2181 		group->domain = domain;
2182 	} else {
2183 		/*
2184 		 * To recover from the case when certain device within the
2185 		 * group fails to attach to the new domain, we need force
2186 		 * attaching all devices back to the old domain. The old
2187 		 * domain is compatible for all devices in the group,
2188 		 * hence the iommu driver should always return success.
2189 		 */
2190 		struct iommu_domain *old_domain = group->domain;
2191 
2192 		group->domain = NULL;
2193 		WARN(__iommu_group_set_domain(group, old_domain),
2194 		     "iommu driver failed to attach a compatible domain");
2195 	}
2196 
2197 	return ret;
2198 }
2199 
2200 /**
2201  * iommu_attach_group - Attach an IOMMU domain to an IOMMU group
2202  * @domain: IOMMU domain to attach
2203  * @group: IOMMU group that will be attached
2204  *
2205  * Returns 0 on success and error code on failure
2206  *
2207  * Note that EINVAL can be treated as a soft failure, indicating
2208  * that certain configuration of the domain is incompatible with
2209  * the group. In this case attaching a different domain to the
2210  * group may succeed.
2211  */
2212 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
2213 {
2214 	int ret;
2215 
2216 	mutex_lock(&group->mutex);
2217 	ret = __iommu_attach_group(domain, group);
2218 	mutex_unlock(&group->mutex);
2219 
2220 	return ret;
2221 }
2222 EXPORT_SYMBOL_GPL(iommu_attach_group);
2223 
2224 static int iommu_group_do_set_platform_dma(struct device *dev, void *data)
2225 {
2226 	const struct iommu_ops *ops = dev_iommu_ops(dev);
2227 
2228 	if (!WARN_ON(!ops->set_platform_dma_ops))
2229 		ops->set_platform_dma_ops(dev);
2230 
2231 	return 0;
2232 }
2233 
2234 static int __iommu_group_set_domain(struct iommu_group *group,
2235 				    struct iommu_domain *new_domain)
2236 {
2237 	int ret;
2238 
2239 	if (group->domain == new_domain)
2240 		return 0;
2241 
2242 	/*
2243 	 * New drivers should support default domains, so set_platform_dma()
2244 	 * op will never be called. Otherwise the NULL domain represents some
2245 	 * platform specific behavior.
2246 	 */
2247 	if (!new_domain) {
2248 		__iommu_group_for_each_dev(group, NULL,
2249 					   iommu_group_do_set_platform_dma);
2250 		group->domain = NULL;
2251 		return 0;
2252 	}
2253 
2254 	/*
2255 	 * Changing the domain is done by calling attach_dev() on the new
2256 	 * domain. This switch does not have to be atomic and DMA can be
2257 	 * discarded during the transition. DMA must only be able to access
2258 	 * either new_domain or group->domain, never something else.
2259 	 *
2260 	 * Note that this is called in error unwind paths, attaching to a
2261 	 * domain that has already been attached cannot fail.
2262 	 */
2263 	ret = __iommu_group_for_each_dev(group, new_domain,
2264 					 iommu_group_do_attach_device);
2265 	if (ret)
2266 		return ret;
2267 	group->domain = new_domain;
2268 	return 0;
2269 }
2270 
2271 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
2272 {
2273 	mutex_lock(&group->mutex);
2274 	__iommu_group_set_core_domain(group);
2275 	mutex_unlock(&group->mutex);
2276 }
2277 EXPORT_SYMBOL_GPL(iommu_detach_group);
2278 
2279 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2280 {
2281 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2282 		return iova;
2283 
2284 	if (domain->type == IOMMU_DOMAIN_BLOCKED)
2285 		return 0;
2286 
2287 	return domain->ops->iova_to_phys(domain, iova);
2288 }
2289 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
2290 
2291 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
2292 			   phys_addr_t paddr, size_t size, size_t *count)
2293 {
2294 	unsigned int pgsize_idx, pgsize_idx_next;
2295 	unsigned long pgsizes;
2296 	size_t offset, pgsize, pgsize_next;
2297 	unsigned long addr_merge = paddr | iova;
2298 
2299 	/* Page sizes supported by the hardware and small enough for @size */
2300 	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
2301 
2302 	/* Constrain the page sizes further based on the maximum alignment */
2303 	if (likely(addr_merge))
2304 		pgsizes &= GENMASK(__ffs(addr_merge), 0);
2305 
2306 	/* Make sure we have at least one suitable page size */
2307 	BUG_ON(!pgsizes);
2308 
2309 	/* Pick the biggest page size remaining */
2310 	pgsize_idx = __fls(pgsizes);
2311 	pgsize = BIT(pgsize_idx);
2312 	if (!count)
2313 		return pgsize;
2314 
2315 	/* Find the next biggest support page size, if it exists */
2316 	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
2317 	if (!pgsizes)
2318 		goto out_set_count;
2319 
2320 	pgsize_idx_next = __ffs(pgsizes);
2321 	pgsize_next = BIT(pgsize_idx_next);
2322 
2323 	/*
2324 	 * There's no point trying a bigger page size unless the virtual
2325 	 * and physical addresses are similarly offset within the larger page.
2326 	 */
2327 	if ((iova ^ paddr) & (pgsize_next - 1))
2328 		goto out_set_count;
2329 
2330 	/* Calculate the offset to the next page size alignment boundary */
2331 	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
2332 
2333 	/*
2334 	 * If size is big enough to accommodate the larger page, reduce
2335 	 * the number of smaller pages.
2336 	 */
2337 	if (offset + pgsize_next <= size)
2338 		size = offset;
2339 
2340 out_set_count:
2341 	*count = size >> pgsize_idx;
2342 	return pgsize;
2343 }
2344 
2345 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
2346 			     phys_addr_t paddr, size_t size, int prot,
2347 			     gfp_t gfp, size_t *mapped)
2348 {
2349 	const struct iommu_domain_ops *ops = domain->ops;
2350 	size_t pgsize, count;
2351 	int ret;
2352 
2353 	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
2354 
2355 	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
2356 		 iova, &paddr, pgsize, count);
2357 
2358 	if (ops->map_pages) {
2359 		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
2360 				     gfp, mapped);
2361 	} else {
2362 		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
2363 		*mapped = ret ? 0 : pgsize;
2364 	}
2365 
2366 	return ret;
2367 }
2368 
2369 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
2370 		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2371 {
2372 	const struct iommu_domain_ops *ops = domain->ops;
2373 	unsigned long orig_iova = iova;
2374 	unsigned int min_pagesz;
2375 	size_t orig_size = size;
2376 	phys_addr_t orig_paddr = paddr;
2377 	int ret = 0;
2378 
2379 	if (unlikely(!(ops->map || ops->map_pages) ||
2380 		     domain->pgsize_bitmap == 0UL))
2381 		return -ENODEV;
2382 
2383 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2384 		return -EINVAL;
2385 
2386 	/* find out the minimum page size supported */
2387 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2388 
2389 	/*
2390 	 * both the virtual address and the physical one, as well as
2391 	 * the size of the mapping, must be aligned (at least) to the
2392 	 * size of the smallest page supported by the hardware
2393 	 */
2394 	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
2395 		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
2396 		       iova, &paddr, size, min_pagesz);
2397 		return -EINVAL;
2398 	}
2399 
2400 	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
2401 
2402 	while (size) {
2403 		size_t mapped = 0;
2404 
2405 		ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
2406 					&mapped);
2407 		/*
2408 		 * Some pages may have been mapped, even if an error occurred,
2409 		 * so we should account for those so they can be unmapped.
2410 		 */
2411 		size -= mapped;
2412 
2413 		if (ret)
2414 			break;
2415 
2416 		iova += mapped;
2417 		paddr += mapped;
2418 	}
2419 
2420 	/* unroll mapping in case something went wrong */
2421 	if (ret)
2422 		iommu_unmap(domain, orig_iova, orig_size - size);
2423 	else
2424 		trace_map(orig_iova, orig_paddr, orig_size);
2425 
2426 	return ret;
2427 }
2428 
2429 int iommu_map(struct iommu_domain *domain, unsigned long iova,
2430 	      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2431 {
2432 	const struct iommu_domain_ops *ops = domain->ops;
2433 	int ret;
2434 
2435 	might_sleep_if(gfpflags_allow_blocking(gfp));
2436 
2437 	/* Discourage passing strange GFP flags */
2438 	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
2439 				__GFP_HIGHMEM)))
2440 		return -EINVAL;
2441 
2442 	ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
2443 	if (ret == 0 && ops->iotlb_sync_map)
2444 		ops->iotlb_sync_map(domain, iova, size);
2445 
2446 	return ret;
2447 }
2448 EXPORT_SYMBOL_GPL(iommu_map);
2449 
2450 static size_t __iommu_unmap_pages(struct iommu_domain *domain,
2451 				  unsigned long iova, size_t size,
2452 				  struct iommu_iotlb_gather *iotlb_gather)
2453 {
2454 	const struct iommu_domain_ops *ops = domain->ops;
2455 	size_t pgsize, count;
2456 
2457 	pgsize = iommu_pgsize(domain, iova, iova, size, &count);
2458 	return ops->unmap_pages ?
2459 	       ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
2460 	       ops->unmap(domain, iova, pgsize, iotlb_gather);
2461 }
2462 
2463 static size_t __iommu_unmap(struct iommu_domain *domain,
2464 			    unsigned long iova, size_t size,
2465 			    struct iommu_iotlb_gather *iotlb_gather)
2466 {
2467 	const struct iommu_domain_ops *ops = domain->ops;
2468 	size_t unmapped_page, unmapped = 0;
2469 	unsigned long orig_iova = iova;
2470 	unsigned int min_pagesz;
2471 
2472 	if (unlikely(!(ops->unmap || ops->unmap_pages) ||
2473 		     domain->pgsize_bitmap == 0UL))
2474 		return 0;
2475 
2476 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2477 		return 0;
2478 
2479 	/* find out the minimum page size supported */
2480 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2481 
2482 	/*
2483 	 * The virtual address, as well as the size of the mapping, must be
2484 	 * aligned (at least) to the size of the smallest page supported
2485 	 * by the hardware
2486 	 */
2487 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
2488 		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
2489 		       iova, size, min_pagesz);
2490 		return 0;
2491 	}
2492 
2493 	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
2494 
2495 	/*
2496 	 * Keep iterating until we either unmap 'size' bytes (or more)
2497 	 * or we hit an area that isn't mapped.
2498 	 */
2499 	while (unmapped < size) {
2500 		unmapped_page = __iommu_unmap_pages(domain, iova,
2501 						    size - unmapped,
2502 						    iotlb_gather);
2503 		if (!unmapped_page)
2504 			break;
2505 
2506 		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
2507 			 iova, unmapped_page);
2508 
2509 		iova += unmapped_page;
2510 		unmapped += unmapped_page;
2511 	}
2512 
2513 	trace_unmap(orig_iova, size, unmapped);
2514 	return unmapped;
2515 }
2516 
2517 size_t iommu_unmap(struct iommu_domain *domain,
2518 		   unsigned long iova, size_t size)
2519 {
2520 	struct iommu_iotlb_gather iotlb_gather;
2521 	size_t ret;
2522 
2523 	iommu_iotlb_gather_init(&iotlb_gather);
2524 	ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
2525 	iommu_iotlb_sync(domain, &iotlb_gather);
2526 
2527 	return ret;
2528 }
2529 EXPORT_SYMBOL_GPL(iommu_unmap);
2530 
2531 size_t iommu_unmap_fast(struct iommu_domain *domain,
2532 			unsigned long iova, size_t size,
2533 			struct iommu_iotlb_gather *iotlb_gather)
2534 {
2535 	return __iommu_unmap(domain, iova, size, iotlb_gather);
2536 }
2537 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
2538 
2539 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
2540 		     struct scatterlist *sg, unsigned int nents, int prot,
2541 		     gfp_t gfp)
2542 {
2543 	const struct iommu_domain_ops *ops = domain->ops;
2544 	size_t len = 0, mapped = 0;
2545 	phys_addr_t start;
2546 	unsigned int i = 0;
2547 	int ret;
2548 
2549 	might_sleep_if(gfpflags_allow_blocking(gfp));
2550 
2551 	/* Discourage passing strange GFP flags */
2552 	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
2553 				__GFP_HIGHMEM)))
2554 		return -EINVAL;
2555 
2556 	while (i <= nents) {
2557 		phys_addr_t s_phys = sg_phys(sg);
2558 
2559 		if (len && s_phys != start + len) {
2560 			ret = __iommu_map(domain, iova + mapped, start,
2561 					len, prot, gfp);
2562 
2563 			if (ret)
2564 				goto out_err;
2565 
2566 			mapped += len;
2567 			len = 0;
2568 		}
2569 
2570 		if (sg_is_dma_bus_address(sg))
2571 			goto next;
2572 
2573 		if (len) {
2574 			len += sg->length;
2575 		} else {
2576 			len = sg->length;
2577 			start = s_phys;
2578 		}
2579 
2580 next:
2581 		if (++i < nents)
2582 			sg = sg_next(sg);
2583 	}
2584 
2585 	if (ops->iotlb_sync_map)
2586 		ops->iotlb_sync_map(domain, iova, mapped);
2587 	return mapped;
2588 
2589 out_err:
2590 	/* undo mappings already done */
2591 	iommu_unmap(domain, iova, mapped);
2592 
2593 	return ret;
2594 }
2595 EXPORT_SYMBOL_GPL(iommu_map_sg);
2596 
2597 /**
2598  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
2599  * @domain: the iommu domain where the fault has happened
2600  * @dev: the device where the fault has happened
2601  * @iova: the faulting address
2602  * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
2603  *
2604  * This function should be called by the low-level IOMMU implementations
2605  * whenever IOMMU faults happen, to allow high-level users, that are
2606  * interested in such events, to know about them.
2607  *
2608  * This event may be useful for several possible use cases:
2609  * - mere logging of the event
2610  * - dynamic TLB/PTE loading
2611  * - if restarting of the faulting device is required
2612  *
2613  * Returns 0 on success and an appropriate error code otherwise (if dynamic
2614  * PTE/TLB loading will one day be supported, implementations will be able
2615  * to tell whether it succeeded or not according to this return value).
2616  *
2617  * Specifically, -ENOSYS is returned if a fault handler isn't installed
2618  * (though fault handlers can also return -ENOSYS, in case they want to
2619  * elicit the default behavior of the IOMMU drivers).
2620  */
2621 int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
2622 		       unsigned long iova, int flags)
2623 {
2624 	int ret = -ENOSYS;
2625 
2626 	/*
2627 	 * if upper layers showed interest and installed a fault handler,
2628 	 * invoke it.
2629 	 */
2630 	if (domain->handler)
2631 		ret = domain->handler(domain, dev, iova, flags,
2632 						domain->handler_token);
2633 
2634 	trace_io_page_fault(dev, iova, flags);
2635 	return ret;
2636 }
2637 EXPORT_SYMBOL_GPL(report_iommu_fault);
2638 
2639 static int __init iommu_init(void)
2640 {
2641 	iommu_group_kset = kset_create_and_add("iommu_groups",
2642 					       NULL, kernel_kobj);
2643 	BUG_ON(!iommu_group_kset);
2644 
2645 	iommu_debugfs_setup();
2646 
2647 	return 0;
2648 }
2649 core_initcall(iommu_init);
2650 
2651 int iommu_enable_nesting(struct iommu_domain *domain)
2652 {
2653 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2654 		return -EINVAL;
2655 	if (!domain->ops->enable_nesting)
2656 		return -EINVAL;
2657 	return domain->ops->enable_nesting(domain);
2658 }
2659 EXPORT_SYMBOL_GPL(iommu_enable_nesting);
2660 
2661 int iommu_set_pgtable_quirks(struct iommu_domain *domain,
2662 		unsigned long quirk)
2663 {
2664 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2665 		return -EINVAL;
2666 	if (!domain->ops->set_pgtable_quirks)
2667 		return -EINVAL;
2668 	return domain->ops->set_pgtable_quirks(domain, quirk);
2669 }
2670 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
2671 
2672 void iommu_get_resv_regions(struct device *dev, struct list_head *list)
2673 {
2674 	const struct iommu_ops *ops = dev_iommu_ops(dev);
2675 
2676 	if (ops->get_resv_regions)
2677 		ops->get_resv_regions(dev, list);
2678 }
2679 
2680 /**
2681  * iommu_put_resv_regions - release resered regions
2682  * @dev: device for which to free reserved regions
2683  * @list: reserved region list for device
2684  *
2685  * This releases a reserved region list acquired by iommu_get_resv_regions().
2686  */
2687 void iommu_put_resv_regions(struct device *dev, struct list_head *list)
2688 {
2689 	struct iommu_resv_region *entry, *next;
2690 
2691 	list_for_each_entry_safe(entry, next, list, list) {
2692 		if (entry->free)
2693 			entry->free(dev, entry);
2694 		else
2695 			kfree(entry);
2696 	}
2697 }
2698 EXPORT_SYMBOL(iommu_put_resv_regions);
2699 
2700 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
2701 						  size_t length, int prot,
2702 						  enum iommu_resv_type type,
2703 						  gfp_t gfp)
2704 {
2705 	struct iommu_resv_region *region;
2706 
2707 	region = kzalloc(sizeof(*region), gfp);
2708 	if (!region)
2709 		return NULL;
2710 
2711 	INIT_LIST_HEAD(&region->list);
2712 	region->start = start;
2713 	region->length = length;
2714 	region->prot = prot;
2715 	region->type = type;
2716 	return region;
2717 }
2718 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
2719 
2720 void iommu_set_default_passthrough(bool cmd_line)
2721 {
2722 	if (cmd_line)
2723 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2724 	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
2725 }
2726 
2727 void iommu_set_default_translated(bool cmd_line)
2728 {
2729 	if (cmd_line)
2730 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2731 	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
2732 }
2733 
2734 bool iommu_default_passthrough(void)
2735 {
2736 	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
2737 }
2738 EXPORT_SYMBOL_GPL(iommu_default_passthrough);
2739 
2740 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
2741 {
2742 	const struct iommu_ops *ops = NULL;
2743 	struct iommu_device *iommu;
2744 
2745 	spin_lock(&iommu_device_lock);
2746 	list_for_each_entry(iommu, &iommu_device_list, list)
2747 		if (iommu->fwnode == fwnode) {
2748 			ops = iommu->ops;
2749 			break;
2750 		}
2751 	spin_unlock(&iommu_device_lock);
2752 	return ops;
2753 }
2754 
2755 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
2756 		      const struct iommu_ops *ops)
2757 {
2758 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2759 
2760 	if (fwspec)
2761 		return ops == fwspec->ops ? 0 : -EINVAL;
2762 
2763 	if (!dev_iommu_get(dev))
2764 		return -ENOMEM;
2765 
2766 	/* Preallocate for the overwhelmingly common case of 1 ID */
2767 	fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
2768 	if (!fwspec)
2769 		return -ENOMEM;
2770 
2771 	of_node_get(to_of_node(iommu_fwnode));
2772 	fwspec->iommu_fwnode = iommu_fwnode;
2773 	fwspec->ops = ops;
2774 	dev_iommu_fwspec_set(dev, fwspec);
2775 	return 0;
2776 }
2777 EXPORT_SYMBOL_GPL(iommu_fwspec_init);
2778 
2779 void iommu_fwspec_free(struct device *dev)
2780 {
2781 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2782 
2783 	if (fwspec) {
2784 		fwnode_handle_put(fwspec->iommu_fwnode);
2785 		kfree(fwspec);
2786 		dev_iommu_fwspec_set(dev, NULL);
2787 	}
2788 }
2789 EXPORT_SYMBOL_GPL(iommu_fwspec_free);
2790 
2791 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
2792 {
2793 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2794 	int i, new_num;
2795 
2796 	if (!fwspec)
2797 		return -EINVAL;
2798 
2799 	new_num = fwspec->num_ids + num_ids;
2800 	if (new_num > 1) {
2801 		fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
2802 				  GFP_KERNEL);
2803 		if (!fwspec)
2804 			return -ENOMEM;
2805 
2806 		dev_iommu_fwspec_set(dev, fwspec);
2807 	}
2808 
2809 	for (i = 0; i < num_ids; i++)
2810 		fwspec->ids[fwspec->num_ids + i] = ids[i];
2811 
2812 	fwspec->num_ids = new_num;
2813 	return 0;
2814 }
2815 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
2816 
2817 /*
2818  * Per device IOMMU features.
2819  */
2820 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
2821 {
2822 	if (dev->iommu && dev->iommu->iommu_dev) {
2823 		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
2824 
2825 		if (ops->dev_enable_feat)
2826 			return ops->dev_enable_feat(dev, feat);
2827 	}
2828 
2829 	return -ENODEV;
2830 }
2831 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
2832 
2833 /*
2834  * The device drivers should do the necessary cleanups before calling this.
2835  */
2836 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
2837 {
2838 	if (dev->iommu && dev->iommu->iommu_dev) {
2839 		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
2840 
2841 		if (ops->dev_disable_feat)
2842 			return ops->dev_disable_feat(dev, feat);
2843 	}
2844 
2845 	return -EBUSY;
2846 }
2847 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
2848 
2849 /*
2850  * Changes the default domain of an iommu group
2851  *
2852  * @group: The group for which the default domain should be changed
2853  * @dev: The first device in the group
2854  * @type: The type of the new default domain that gets associated with the group
2855  *
2856  * Returns 0 on success and error code on failure
2857  *
2858  * Note:
2859  * 1. Presently, this function is called only when user requests to change the
2860  *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
2861  *    Please take a closer look if intended to use for other purposes.
2862  */
2863 static int iommu_change_dev_def_domain(struct iommu_group *group,
2864 				       struct device *dev, int type)
2865 {
2866 	struct __group_domain_type gtype = {NULL, 0};
2867 	struct iommu_domain *prev_dom;
2868 	int ret;
2869 
2870 	lockdep_assert_held(&group->mutex);
2871 
2872 	prev_dom = group->default_domain;
2873 	__iommu_group_for_each_dev(group, &gtype,
2874 				   probe_get_default_domain_type);
2875 	if (!type) {
2876 		/*
2877 		 * If the user hasn't requested any specific type of domain and
2878 		 * if the device supports both the domains, then default to the
2879 		 * domain the device was booted with
2880 		 */
2881 		type = gtype.type ? : iommu_def_domain_type;
2882 	} else if (gtype.type && type != gtype.type) {
2883 		dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
2884 				    iommu_domain_type_str(type));
2885 		return -EINVAL;
2886 	}
2887 
2888 	/*
2889 	 * Switch to a new domain only if the requested domain type is different
2890 	 * from the existing default domain type
2891 	 */
2892 	if (prev_dom->type == type)
2893 		return 0;
2894 
2895 	group->default_domain = NULL;
2896 	group->domain = NULL;
2897 
2898 	/* Sets group->default_domain to the newly allocated domain */
2899 	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
2900 	if (ret)
2901 		goto restore_old_domain;
2902 
2903 	ret = iommu_group_create_direct_mappings(group);
2904 	if (ret)
2905 		goto free_new_domain;
2906 
2907 	ret = __iommu_attach_group(group->default_domain, group);
2908 	if (ret)
2909 		goto free_new_domain;
2910 
2911 	iommu_domain_free(prev_dom);
2912 
2913 	return 0;
2914 
2915 free_new_domain:
2916 	iommu_domain_free(group->default_domain);
2917 restore_old_domain:
2918 	group->default_domain = prev_dom;
2919 	group->domain = prev_dom;
2920 
2921 	return ret;
2922 }
2923 
2924 /*
2925  * Changing the default domain through sysfs requires the users to unbind the
2926  * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
2927  * transition. Return failure if this isn't met.
2928  *
2929  * We need to consider the race between this and the device release path.
2930  * group->mutex is used here to guarantee that the device release path
2931  * will not be entered at the same time.
2932  */
2933 static ssize_t iommu_group_store_type(struct iommu_group *group,
2934 				      const char *buf, size_t count)
2935 {
2936 	struct group_device *grp_dev;
2937 	struct device *dev;
2938 	int ret, req_type;
2939 
2940 	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
2941 		return -EACCES;
2942 
2943 	if (WARN_ON(!group) || !group->default_domain)
2944 		return -EINVAL;
2945 
2946 	if (sysfs_streq(buf, "identity"))
2947 		req_type = IOMMU_DOMAIN_IDENTITY;
2948 	else if (sysfs_streq(buf, "DMA"))
2949 		req_type = IOMMU_DOMAIN_DMA;
2950 	else if (sysfs_streq(buf, "DMA-FQ"))
2951 		req_type = IOMMU_DOMAIN_DMA_FQ;
2952 	else if (sysfs_streq(buf, "auto"))
2953 		req_type = 0;
2954 	else
2955 		return -EINVAL;
2956 
2957 	mutex_lock(&group->mutex);
2958 	/* We can bring up a flush queue without tearing down the domain. */
2959 	if (req_type == IOMMU_DOMAIN_DMA_FQ &&
2960 	    group->default_domain->type == IOMMU_DOMAIN_DMA) {
2961 		ret = iommu_dma_init_fq(group->default_domain);
2962 		if (!ret)
2963 			group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
2964 		mutex_unlock(&group->mutex);
2965 
2966 		return ret ?: count;
2967 	}
2968 
2969 	/* Otherwise, ensure that device exists and no driver is bound. */
2970 	if (list_empty(&group->devices) || group->owner_cnt) {
2971 		mutex_unlock(&group->mutex);
2972 		return -EPERM;
2973 	}
2974 
2975 	grp_dev = list_first_entry(&group->devices, struct group_device, list);
2976 	dev = grp_dev->dev;
2977 
2978 	ret = iommu_change_dev_def_domain(group, dev, req_type);
2979 
2980 	/*
2981 	 * Release the mutex here because ops->probe_finalize() call-back of
2982 	 * some vendor IOMMU drivers calls arm_iommu_attach_device() which
2983 	 * in-turn might call back into IOMMU core code, where it tries to take
2984 	 * group->mutex, resulting in a deadlock.
2985 	 */
2986 	mutex_unlock(&group->mutex);
2987 
2988 	/* Make sure dma_ops is appropriatley set */
2989 	if (!ret)
2990 		__iommu_group_dma_finalize(group);
2991 
2992 	return ret ?: count;
2993 }
2994 
2995 static bool iommu_is_default_domain(struct iommu_group *group)
2996 {
2997 	if (group->domain == group->default_domain)
2998 		return true;
2999 
3000 	/*
3001 	 * If the default domain was set to identity and it is still an identity
3002 	 * domain then we consider this a pass. This happens because of
3003 	 * amd_iommu_init_device() replacing the default idenytity domain with an
3004 	 * identity domain that has a different configuration for AMDGPU.
3005 	 */
3006 	if (group->default_domain &&
3007 	    group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
3008 	    group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
3009 		return true;
3010 	return false;
3011 }
3012 
3013 /**
3014  * iommu_device_use_default_domain() - Device driver wants to handle device
3015  *                                     DMA through the kernel DMA API.
3016  * @dev: The device.
3017  *
3018  * The device driver about to bind @dev wants to do DMA through the kernel
3019  * DMA API. Return 0 if it is allowed, otherwise an error.
3020  */
3021 int iommu_device_use_default_domain(struct device *dev)
3022 {
3023 	struct iommu_group *group = iommu_group_get(dev);
3024 	int ret = 0;
3025 
3026 	if (!group)
3027 		return 0;
3028 
3029 	mutex_lock(&group->mutex);
3030 	if (group->owner_cnt) {
3031 		if (group->owner || !iommu_is_default_domain(group) ||
3032 		    !xa_empty(&group->pasid_array)) {
3033 			ret = -EBUSY;
3034 			goto unlock_out;
3035 		}
3036 	}
3037 
3038 	group->owner_cnt++;
3039 
3040 unlock_out:
3041 	mutex_unlock(&group->mutex);
3042 	iommu_group_put(group);
3043 
3044 	return ret;
3045 }
3046 
3047 /**
3048  * iommu_device_unuse_default_domain() - Device driver stops handling device
3049  *                                       DMA through the kernel DMA API.
3050  * @dev: The device.
3051  *
3052  * The device driver doesn't want to do DMA through kernel DMA API anymore.
3053  * It must be called after iommu_device_use_default_domain().
3054  */
3055 void iommu_device_unuse_default_domain(struct device *dev)
3056 {
3057 	struct iommu_group *group = iommu_group_get(dev);
3058 
3059 	if (!group)
3060 		return;
3061 
3062 	mutex_lock(&group->mutex);
3063 	if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
3064 		group->owner_cnt--;
3065 
3066 	mutex_unlock(&group->mutex);
3067 	iommu_group_put(group);
3068 }
3069 
3070 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
3071 {
3072 	struct group_device *dev =
3073 		list_first_entry(&group->devices, struct group_device, list);
3074 
3075 	if (group->blocking_domain)
3076 		return 0;
3077 
3078 	group->blocking_domain =
3079 		__iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
3080 	if (!group->blocking_domain) {
3081 		/*
3082 		 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
3083 		 * create an empty domain instead.
3084 		 */
3085 		group->blocking_domain = __iommu_domain_alloc(
3086 			dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
3087 		if (!group->blocking_domain)
3088 			return -EINVAL;
3089 	}
3090 	return 0;
3091 }
3092 
3093 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
3094 {
3095 	int ret;
3096 
3097 	if ((group->domain && group->domain != group->default_domain) ||
3098 	    !xa_empty(&group->pasid_array))
3099 		return -EBUSY;
3100 
3101 	ret = __iommu_group_alloc_blocking_domain(group);
3102 	if (ret)
3103 		return ret;
3104 	ret = __iommu_group_set_domain(group, group->blocking_domain);
3105 	if (ret)
3106 		return ret;
3107 
3108 	group->owner = owner;
3109 	group->owner_cnt++;
3110 	return 0;
3111 }
3112 
3113 /**
3114  * iommu_group_claim_dma_owner() - Set DMA ownership of a group
3115  * @group: The group.
3116  * @owner: Caller specified pointer. Used for exclusive ownership.
3117  *
3118  * This is to support backward compatibility for vfio which manages the dma
3119  * ownership in iommu_group level. New invocations on this interface should be
3120  * prohibited. Only a single owner may exist for a group.
3121  */
3122 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
3123 {
3124 	int ret = 0;
3125 
3126 	if (WARN_ON(!owner))
3127 		return -EINVAL;
3128 
3129 	mutex_lock(&group->mutex);
3130 	if (group->owner_cnt) {
3131 		ret = -EPERM;
3132 		goto unlock_out;
3133 	}
3134 
3135 	ret = __iommu_take_dma_ownership(group, owner);
3136 unlock_out:
3137 	mutex_unlock(&group->mutex);
3138 
3139 	return ret;
3140 }
3141 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
3142 
3143 /**
3144  * iommu_device_claim_dma_owner() - Set DMA ownership of a device
3145  * @dev: The device.
3146  * @owner: Caller specified pointer. Used for exclusive ownership.
3147  *
3148  * Claim the DMA ownership of a device. Multiple devices in the same group may
3149  * concurrently claim ownership if they present the same owner value. Returns 0
3150  * on success and error code on failure
3151  */
3152 int iommu_device_claim_dma_owner(struct device *dev, void *owner)
3153 {
3154 	struct iommu_group *group;
3155 	int ret = 0;
3156 
3157 	if (WARN_ON(!owner))
3158 		return -EINVAL;
3159 
3160 	group = iommu_group_get(dev);
3161 	if (!group)
3162 		return -ENODEV;
3163 
3164 	mutex_lock(&group->mutex);
3165 	if (group->owner_cnt) {
3166 		if (group->owner != owner) {
3167 			ret = -EPERM;
3168 			goto unlock_out;
3169 		}
3170 		group->owner_cnt++;
3171 		goto unlock_out;
3172 	}
3173 
3174 	ret = __iommu_take_dma_ownership(group, owner);
3175 unlock_out:
3176 	mutex_unlock(&group->mutex);
3177 	iommu_group_put(group);
3178 
3179 	return ret;
3180 }
3181 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
3182 
3183 static void __iommu_release_dma_ownership(struct iommu_group *group)
3184 {
3185 	int ret;
3186 
3187 	if (WARN_ON(!group->owner_cnt || !group->owner ||
3188 		    !xa_empty(&group->pasid_array)))
3189 		return;
3190 
3191 	group->owner_cnt = 0;
3192 	group->owner = NULL;
3193 	ret = __iommu_group_set_domain(group, group->default_domain);
3194 	WARN(ret, "iommu driver failed to attach the default domain");
3195 }
3196 
3197 /**
3198  * iommu_group_release_dma_owner() - Release DMA ownership of a group
3199  * @dev: The device
3200  *
3201  * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
3202  */
3203 void iommu_group_release_dma_owner(struct iommu_group *group)
3204 {
3205 	mutex_lock(&group->mutex);
3206 	__iommu_release_dma_ownership(group);
3207 	mutex_unlock(&group->mutex);
3208 }
3209 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
3210 
3211 /**
3212  * iommu_device_release_dma_owner() - Release DMA ownership of a device
3213  * @group: The device.
3214  *
3215  * Release the DMA ownership claimed by iommu_device_claim_dma_owner().
3216  */
3217 void iommu_device_release_dma_owner(struct device *dev)
3218 {
3219 	struct iommu_group *group = iommu_group_get(dev);
3220 
3221 	mutex_lock(&group->mutex);
3222 	if (group->owner_cnt > 1)
3223 		group->owner_cnt--;
3224 	else
3225 		__iommu_release_dma_ownership(group);
3226 	mutex_unlock(&group->mutex);
3227 	iommu_group_put(group);
3228 }
3229 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
3230 
3231 /**
3232  * iommu_group_dma_owner_claimed() - Query group dma ownership status
3233  * @group: The group.
3234  *
3235  * This provides status query on a given group. It is racy and only for
3236  * non-binding status reporting.
3237  */
3238 bool iommu_group_dma_owner_claimed(struct iommu_group *group)
3239 {
3240 	unsigned int user;
3241 
3242 	mutex_lock(&group->mutex);
3243 	user = group->owner_cnt;
3244 	mutex_unlock(&group->mutex);
3245 
3246 	return user;
3247 }
3248 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
3249 
3250 static int __iommu_set_group_pasid(struct iommu_domain *domain,
3251 				   struct iommu_group *group, ioasid_t pasid)
3252 {
3253 	struct group_device *device;
3254 	int ret = 0;
3255 
3256 	list_for_each_entry(device, &group->devices, list) {
3257 		ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
3258 		if (ret)
3259 			break;
3260 	}
3261 
3262 	return ret;
3263 }
3264 
3265 static void __iommu_remove_group_pasid(struct iommu_group *group,
3266 				       ioasid_t pasid)
3267 {
3268 	struct group_device *device;
3269 	const struct iommu_ops *ops;
3270 
3271 	list_for_each_entry(device, &group->devices, list) {
3272 		ops = dev_iommu_ops(device->dev);
3273 		ops->remove_dev_pasid(device->dev, pasid);
3274 	}
3275 }
3276 
3277 /*
3278  * iommu_attach_device_pasid() - Attach a domain to pasid of device
3279  * @domain: the iommu domain.
3280  * @dev: the attached device.
3281  * @pasid: the pasid of the device.
3282  *
3283  * Return: 0 on success, or an error.
3284  */
3285 int iommu_attach_device_pasid(struct iommu_domain *domain,
3286 			      struct device *dev, ioasid_t pasid)
3287 {
3288 	struct iommu_group *group;
3289 	void *curr;
3290 	int ret;
3291 
3292 	if (!domain->ops->set_dev_pasid)
3293 		return -EOPNOTSUPP;
3294 
3295 	group = iommu_group_get(dev);
3296 	if (!group)
3297 		return -ENODEV;
3298 
3299 	mutex_lock(&group->mutex);
3300 	curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
3301 	if (curr) {
3302 		ret = xa_err(curr) ? : -EBUSY;
3303 		goto out_unlock;
3304 	}
3305 
3306 	ret = __iommu_set_group_pasid(domain, group, pasid);
3307 	if (ret) {
3308 		__iommu_remove_group_pasid(group, pasid);
3309 		xa_erase(&group->pasid_array, pasid);
3310 	}
3311 out_unlock:
3312 	mutex_unlock(&group->mutex);
3313 	iommu_group_put(group);
3314 
3315 	return ret;
3316 }
3317 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
3318 
3319 /*
3320  * iommu_detach_device_pasid() - Detach the domain from pasid of device
3321  * @domain: the iommu domain.
3322  * @dev: the attached device.
3323  * @pasid: the pasid of the device.
3324  *
3325  * The @domain must have been attached to @pasid of the @dev with
3326  * iommu_attach_device_pasid().
3327  */
3328 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
3329 			       ioasid_t pasid)
3330 {
3331 	struct iommu_group *group = iommu_group_get(dev);
3332 
3333 	mutex_lock(&group->mutex);
3334 	__iommu_remove_group_pasid(group, pasid);
3335 	WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
3336 	mutex_unlock(&group->mutex);
3337 
3338 	iommu_group_put(group);
3339 }
3340 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
3341 
3342 /*
3343  * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
3344  * @dev: the queried device
3345  * @pasid: the pasid of the device
3346  * @type: matched domain type, 0 for any match
3347  *
3348  * This is a variant of iommu_get_domain_for_dev(). It returns the existing
3349  * domain attached to pasid of a device. Callers must hold a lock around this
3350  * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
3351  * type is being manipulated. This API does not internally resolve races with
3352  * attach/detach.
3353  *
3354  * Return: attached domain on success, NULL otherwise.
3355  */
3356 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
3357 						    ioasid_t pasid,
3358 						    unsigned int type)
3359 {
3360 	struct iommu_domain *domain;
3361 	struct iommu_group *group;
3362 
3363 	group = iommu_group_get(dev);
3364 	if (!group)
3365 		return NULL;
3366 
3367 	xa_lock(&group->pasid_array);
3368 	domain = xa_load(&group->pasid_array, pasid);
3369 	if (type && domain && domain->type != type)
3370 		domain = ERR_PTR(-EBUSY);
3371 	xa_unlock(&group->pasid_array);
3372 	iommu_group_put(group);
3373 
3374 	return domain;
3375 }
3376 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
3377 
3378 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
3379 					    struct mm_struct *mm)
3380 {
3381 	const struct iommu_ops *ops = dev_iommu_ops(dev);
3382 	struct iommu_domain *domain;
3383 
3384 	domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
3385 	if (!domain)
3386 		return NULL;
3387 
3388 	domain->type = IOMMU_DOMAIN_SVA;
3389 	mmgrab(mm);
3390 	domain->mm = mm;
3391 	domain->iopf_handler = iommu_sva_handle_iopf;
3392 	domain->fault_data = mm;
3393 
3394 	return domain;
3395 }
3396