xref: /openbmc/linux/drivers/iommu/iommu.c (revision 84e85359)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  */
6 
7 #define pr_fmt(fmt)    "iommu: " fmt
8 
9 #include <linux/amba/bus.h>
10 #include <linux/device.h>
11 #include <linux/kernel.h>
12 #include <linux/bits.h>
13 #include <linux/bug.h>
14 #include <linux/types.h>
15 #include <linux/init.h>
16 #include <linux/export.h>
17 #include <linux/slab.h>
18 #include <linux/errno.h>
19 #include <linux/host1x_context_bus.h>
20 #include <linux/iommu.h>
21 #include <linux/idr.h>
22 #include <linux/err.h>
23 #include <linux/pci.h>
24 #include <linux/pci-ats.h>
25 #include <linux/bitops.h>
26 #include <linux/platform_device.h>
27 #include <linux/property.h>
28 #include <linux/fsl/mc.h>
29 #include <linux/module.h>
30 #include <linux/cc_platform.h>
31 #include <trace/events/iommu.h>
32 #include <linux/sched/mm.h>
33 
34 #include "dma-iommu.h"
35 
36 #include "iommu-sva.h"
37 
38 static struct kset *iommu_group_kset;
39 static DEFINE_IDA(iommu_group_ida);
40 
41 static unsigned int iommu_def_domain_type __read_mostly;
42 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
43 static u32 iommu_cmd_line __read_mostly;
44 
45 struct iommu_group {
46 	struct kobject kobj;
47 	struct kobject *devices_kobj;
48 	struct list_head devices;
49 	struct xarray pasid_array;
50 	struct mutex mutex;
51 	void *iommu_data;
52 	void (*iommu_data_release)(void *iommu_data);
53 	char *name;
54 	int id;
55 	struct iommu_domain *default_domain;
56 	struct iommu_domain *blocking_domain;
57 	struct iommu_domain *domain;
58 	struct list_head entry;
59 	unsigned int owner_cnt;
60 	void *owner;
61 };
62 
63 struct group_device {
64 	struct list_head list;
65 	struct device *dev;
66 	char *name;
67 };
68 
69 struct iommu_group_attribute {
70 	struct attribute attr;
71 	ssize_t (*show)(struct iommu_group *group, char *buf);
72 	ssize_t (*store)(struct iommu_group *group,
73 			 const char *buf, size_t count);
74 };
75 
76 static const char * const iommu_group_resv_type_string[] = {
77 	[IOMMU_RESV_DIRECT]			= "direct",
78 	[IOMMU_RESV_DIRECT_RELAXABLE]		= "direct-relaxable",
79 	[IOMMU_RESV_RESERVED]			= "reserved",
80 	[IOMMU_RESV_MSI]			= "msi",
81 	[IOMMU_RESV_SW_MSI]			= "msi",
82 };
83 
84 #define IOMMU_CMD_LINE_DMA_API		BIT(0)
85 #define IOMMU_CMD_LINE_STRICT		BIT(1)
86 
87 static int iommu_bus_notifier(struct notifier_block *nb,
88 			      unsigned long action, void *data);
89 static int iommu_alloc_default_domain(struct iommu_group *group,
90 				      struct device *dev);
91 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
92 						 unsigned type);
93 static int __iommu_attach_device(struct iommu_domain *domain,
94 				 struct device *dev);
95 static int __iommu_attach_group(struct iommu_domain *domain,
96 				struct iommu_group *group);
97 static int __iommu_group_set_domain(struct iommu_group *group,
98 				    struct iommu_domain *new_domain);
99 static int iommu_create_device_direct_mappings(struct iommu_group *group,
100 					       struct device *dev);
101 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
102 static ssize_t iommu_group_store_type(struct iommu_group *group,
103 				      const char *buf, size_t count);
104 
105 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
106 struct iommu_group_attribute iommu_group_attr_##_name =		\
107 	__ATTR(_name, _mode, _show, _store)
108 
109 #define to_iommu_group_attr(_attr)	\
110 	container_of(_attr, struct iommu_group_attribute, attr)
111 #define to_iommu_group(_kobj)		\
112 	container_of(_kobj, struct iommu_group, kobj)
113 
114 static LIST_HEAD(iommu_device_list);
115 static DEFINE_SPINLOCK(iommu_device_lock);
116 
117 static struct bus_type * const iommu_buses[] = {
118 	&platform_bus_type,
119 #ifdef CONFIG_PCI
120 	&pci_bus_type,
121 #endif
122 #ifdef CONFIG_ARM_AMBA
123 	&amba_bustype,
124 #endif
125 #ifdef CONFIG_FSL_MC_BUS
126 	&fsl_mc_bus_type,
127 #endif
128 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
129 	&host1x_context_device_bus_type,
130 #endif
131 };
132 
133 /*
134  * Use a function instead of an array here because the domain-type is a
135  * bit-field, so an array would waste memory.
136  */
137 static const char *iommu_domain_type_str(unsigned int t)
138 {
139 	switch (t) {
140 	case IOMMU_DOMAIN_BLOCKED:
141 		return "Blocked";
142 	case IOMMU_DOMAIN_IDENTITY:
143 		return "Passthrough";
144 	case IOMMU_DOMAIN_UNMANAGED:
145 		return "Unmanaged";
146 	case IOMMU_DOMAIN_DMA:
147 	case IOMMU_DOMAIN_DMA_FQ:
148 		return "Translated";
149 	default:
150 		return "Unknown";
151 	}
152 }
153 
154 static int __init iommu_subsys_init(void)
155 {
156 	struct notifier_block *nb;
157 
158 	if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
159 		if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
160 			iommu_set_default_passthrough(false);
161 		else
162 			iommu_set_default_translated(false);
163 
164 		if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
165 			pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
166 			iommu_set_default_translated(false);
167 		}
168 	}
169 
170 	if (!iommu_default_passthrough() && !iommu_dma_strict)
171 		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
172 
173 	pr_info("Default domain type: %s %s\n",
174 		iommu_domain_type_str(iommu_def_domain_type),
175 		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
176 			"(set via kernel command line)" : "");
177 
178 	if (!iommu_default_passthrough())
179 		pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
180 			iommu_dma_strict ? "strict" : "lazy",
181 			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
182 				"(set via kernel command line)" : "");
183 
184 	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
185 	if (!nb)
186 		return -ENOMEM;
187 
188 	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
189 		nb[i].notifier_call = iommu_bus_notifier;
190 		bus_register_notifier(iommu_buses[i], &nb[i]);
191 	}
192 
193 	return 0;
194 }
195 subsys_initcall(iommu_subsys_init);
196 
197 static int remove_iommu_group(struct device *dev, void *data)
198 {
199 	if (dev->iommu && dev->iommu->iommu_dev == data)
200 		iommu_release_device(dev);
201 
202 	return 0;
203 }
204 
205 /**
206  * iommu_device_register() - Register an IOMMU hardware instance
207  * @iommu: IOMMU handle for the instance
208  * @ops:   IOMMU ops to associate with the instance
209  * @hwdev: (optional) actual instance device, used for fwnode lookup
210  *
211  * Return: 0 on success, or an error.
212  */
213 int iommu_device_register(struct iommu_device *iommu,
214 			  const struct iommu_ops *ops, struct device *hwdev)
215 {
216 	int err = 0;
217 
218 	/* We need to be able to take module references appropriately */
219 	if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
220 		return -EINVAL;
221 	/*
222 	 * Temporarily enforce global restriction to a single driver. This was
223 	 * already the de-facto behaviour, since any possible combination of
224 	 * existing drivers would compete for at least the PCI or platform bus.
225 	 */
226 	if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops)
227 		return -EBUSY;
228 
229 	iommu->ops = ops;
230 	if (hwdev)
231 		iommu->fwnode = dev_fwnode(hwdev);
232 
233 	spin_lock(&iommu_device_lock);
234 	list_add_tail(&iommu->list, &iommu_device_list);
235 	spin_unlock(&iommu_device_lock);
236 
237 	for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) {
238 		iommu_buses[i]->iommu_ops = ops;
239 		err = bus_iommu_probe(iommu_buses[i]);
240 	}
241 	if (err)
242 		iommu_device_unregister(iommu);
243 	return err;
244 }
245 EXPORT_SYMBOL_GPL(iommu_device_register);
246 
247 void iommu_device_unregister(struct iommu_device *iommu)
248 {
249 	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
250 		bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group);
251 
252 	spin_lock(&iommu_device_lock);
253 	list_del(&iommu->list);
254 	spin_unlock(&iommu_device_lock);
255 }
256 EXPORT_SYMBOL_GPL(iommu_device_unregister);
257 
258 static struct dev_iommu *dev_iommu_get(struct device *dev)
259 {
260 	struct dev_iommu *param = dev->iommu;
261 
262 	if (param)
263 		return param;
264 
265 	param = kzalloc(sizeof(*param), GFP_KERNEL);
266 	if (!param)
267 		return NULL;
268 
269 	mutex_init(&param->lock);
270 	dev->iommu = param;
271 	return param;
272 }
273 
274 static void dev_iommu_free(struct device *dev)
275 {
276 	struct dev_iommu *param = dev->iommu;
277 
278 	dev->iommu = NULL;
279 	if (param->fwspec) {
280 		fwnode_handle_put(param->fwspec->iommu_fwnode);
281 		kfree(param->fwspec);
282 	}
283 	kfree(param);
284 }
285 
286 static u32 dev_iommu_get_max_pasids(struct device *dev)
287 {
288 	u32 max_pasids = 0, bits = 0;
289 	int ret;
290 
291 	if (dev_is_pci(dev)) {
292 		ret = pci_max_pasids(to_pci_dev(dev));
293 		if (ret > 0)
294 			max_pasids = ret;
295 	} else {
296 		ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
297 		if (!ret)
298 			max_pasids = 1UL << bits;
299 	}
300 
301 	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
302 }
303 
304 static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
305 {
306 	const struct iommu_ops *ops = dev->bus->iommu_ops;
307 	struct iommu_device *iommu_dev;
308 	struct iommu_group *group;
309 	int ret;
310 
311 	if (!ops)
312 		return -ENODEV;
313 
314 	if (!dev_iommu_get(dev))
315 		return -ENOMEM;
316 
317 	if (!try_module_get(ops->owner)) {
318 		ret = -EINVAL;
319 		goto err_free;
320 	}
321 
322 	iommu_dev = ops->probe_device(dev);
323 	if (IS_ERR(iommu_dev)) {
324 		ret = PTR_ERR(iommu_dev);
325 		goto out_module_put;
326 	}
327 
328 	dev->iommu->iommu_dev = iommu_dev;
329 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
330 
331 	group = iommu_group_get_for_dev(dev);
332 	if (IS_ERR(group)) {
333 		ret = PTR_ERR(group);
334 		goto out_release;
335 	}
336 	iommu_group_put(group);
337 
338 	if (group_list && !group->default_domain && list_empty(&group->entry))
339 		list_add_tail(&group->entry, group_list);
340 
341 	iommu_device_link(iommu_dev, dev);
342 
343 	return 0;
344 
345 out_release:
346 	if (ops->release_device)
347 		ops->release_device(dev);
348 
349 out_module_put:
350 	module_put(ops->owner);
351 
352 err_free:
353 	dev_iommu_free(dev);
354 
355 	return ret;
356 }
357 
358 int iommu_probe_device(struct device *dev)
359 {
360 	const struct iommu_ops *ops;
361 	struct iommu_group *group;
362 	int ret;
363 
364 	ret = __iommu_probe_device(dev, NULL);
365 	if (ret)
366 		goto err_out;
367 
368 	group = iommu_group_get(dev);
369 	if (!group) {
370 		ret = -ENODEV;
371 		goto err_release;
372 	}
373 
374 	/*
375 	 * Try to allocate a default domain - needs support from the
376 	 * IOMMU driver. There are still some drivers which don't
377 	 * support default domains, so the return value is not yet
378 	 * checked.
379 	 */
380 	mutex_lock(&group->mutex);
381 	iommu_alloc_default_domain(group, dev);
382 
383 	/*
384 	 * If device joined an existing group which has been claimed, don't
385 	 * attach the default domain.
386 	 */
387 	if (group->default_domain && !group->owner) {
388 		ret = __iommu_attach_device(group->default_domain, dev);
389 		if (ret) {
390 			mutex_unlock(&group->mutex);
391 			iommu_group_put(group);
392 			goto err_release;
393 		}
394 	}
395 
396 	iommu_create_device_direct_mappings(group, dev);
397 
398 	mutex_unlock(&group->mutex);
399 	iommu_group_put(group);
400 
401 	ops = dev_iommu_ops(dev);
402 	if (ops->probe_finalize)
403 		ops->probe_finalize(dev);
404 
405 	return 0;
406 
407 err_release:
408 	iommu_release_device(dev);
409 
410 err_out:
411 	return ret;
412 
413 }
414 
415 void iommu_release_device(struct device *dev)
416 {
417 	const struct iommu_ops *ops;
418 
419 	if (!dev->iommu)
420 		return;
421 
422 	iommu_device_unlink(dev->iommu->iommu_dev, dev);
423 
424 	ops = dev_iommu_ops(dev);
425 	if (ops->release_device)
426 		ops->release_device(dev);
427 
428 	iommu_group_remove_device(dev);
429 	module_put(ops->owner);
430 	dev_iommu_free(dev);
431 }
432 
433 static int __init iommu_set_def_domain_type(char *str)
434 {
435 	bool pt;
436 	int ret;
437 
438 	ret = kstrtobool(str, &pt);
439 	if (ret)
440 		return ret;
441 
442 	if (pt)
443 		iommu_set_default_passthrough(true);
444 	else
445 		iommu_set_default_translated(true);
446 
447 	return 0;
448 }
449 early_param("iommu.passthrough", iommu_set_def_domain_type);
450 
451 static int __init iommu_dma_setup(char *str)
452 {
453 	int ret = kstrtobool(str, &iommu_dma_strict);
454 
455 	if (!ret)
456 		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
457 	return ret;
458 }
459 early_param("iommu.strict", iommu_dma_setup);
460 
461 void iommu_set_dma_strict(void)
462 {
463 	iommu_dma_strict = true;
464 	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
465 		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
466 }
467 
468 static ssize_t iommu_group_attr_show(struct kobject *kobj,
469 				     struct attribute *__attr, char *buf)
470 {
471 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
472 	struct iommu_group *group = to_iommu_group(kobj);
473 	ssize_t ret = -EIO;
474 
475 	if (attr->show)
476 		ret = attr->show(group, buf);
477 	return ret;
478 }
479 
480 static ssize_t iommu_group_attr_store(struct kobject *kobj,
481 				      struct attribute *__attr,
482 				      const char *buf, size_t count)
483 {
484 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
485 	struct iommu_group *group = to_iommu_group(kobj);
486 	ssize_t ret = -EIO;
487 
488 	if (attr->store)
489 		ret = attr->store(group, buf, count);
490 	return ret;
491 }
492 
493 static const struct sysfs_ops iommu_group_sysfs_ops = {
494 	.show = iommu_group_attr_show,
495 	.store = iommu_group_attr_store,
496 };
497 
498 static int iommu_group_create_file(struct iommu_group *group,
499 				   struct iommu_group_attribute *attr)
500 {
501 	return sysfs_create_file(&group->kobj, &attr->attr);
502 }
503 
504 static void iommu_group_remove_file(struct iommu_group *group,
505 				    struct iommu_group_attribute *attr)
506 {
507 	sysfs_remove_file(&group->kobj, &attr->attr);
508 }
509 
510 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
511 {
512 	return sprintf(buf, "%s\n", group->name);
513 }
514 
515 /**
516  * iommu_insert_resv_region - Insert a new region in the
517  * list of reserved regions.
518  * @new: new region to insert
519  * @regions: list of regions
520  *
521  * Elements are sorted by start address and overlapping segments
522  * of the same type are merged.
523  */
524 static int iommu_insert_resv_region(struct iommu_resv_region *new,
525 				    struct list_head *regions)
526 {
527 	struct iommu_resv_region *iter, *tmp, *nr, *top;
528 	LIST_HEAD(stack);
529 
530 	nr = iommu_alloc_resv_region(new->start, new->length,
531 				     new->prot, new->type, GFP_KERNEL);
532 	if (!nr)
533 		return -ENOMEM;
534 
535 	/* First add the new element based on start address sorting */
536 	list_for_each_entry(iter, regions, list) {
537 		if (nr->start < iter->start ||
538 		    (nr->start == iter->start && nr->type <= iter->type))
539 			break;
540 	}
541 	list_add_tail(&nr->list, &iter->list);
542 
543 	/* Merge overlapping segments of type nr->type in @regions, if any */
544 	list_for_each_entry_safe(iter, tmp, regions, list) {
545 		phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
546 
547 		/* no merge needed on elements of different types than @new */
548 		if (iter->type != new->type) {
549 			list_move_tail(&iter->list, &stack);
550 			continue;
551 		}
552 
553 		/* look for the last stack element of same type as @iter */
554 		list_for_each_entry_reverse(top, &stack, list)
555 			if (top->type == iter->type)
556 				goto check_overlap;
557 
558 		list_move_tail(&iter->list, &stack);
559 		continue;
560 
561 check_overlap:
562 		top_end = top->start + top->length - 1;
563 
564 		if (iter->start > top_end + 1) {
565 			list_move_tail(&iter->list, &stack);
566 		} else {
567 			top->length = max(top_end, iter_end) - top->start + 1;
568 			list_del(&iter->list);
569 			kfree(iter);
570 		}
571 	}
572 	list_splice(&stack, regions);
573 	return 0;
574 }
575 
576 static int
577 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
578 				 struct list_head *group_resv_regions)
579 {
580 	struct iommu_resv_region *entry;
581 	int ret = 0;
582 
583 	list_for_each_entry(entry, dev_resv_regions, list) {
584 		ret = iommu_insert_resv_region(entry, group_resv_regions);
585 		if (ret)
586 			break;
587 	}
588 	return ret;
589 }
590 
591 int iommu_get_group_resv_regions(struct iommu_group *group,
592 				 struct list_head *head)
593 {
594 	struct group_device *device;
595 	int ret = 0;
596 
597 	mutex_lock(&group->mutex);
598 	list_for_each_entry(device, &group->devices, list) {
599 		struct list_head dev_resv_regions;
600 
601 		/*
602 		 * Non-API groups still expose reserved_regions in sysfs,
603 		 * so filter out calls that get here that way.
604 		 */
605 		if (!device->dev->iommu)
606 			break;
607 
608 		INIT_LIST_HEAD(&dev_resv_regions);
609 		iommu_get_resv_regions(device->dev, &dev_resv_regions);
610 		ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
611 		iommu_put_resv_regions(device->dev, &dev_resv_regions);
612 		if (ret)
613 			break;
614 	}
615 	mutex_unlock(&group->mutex);
616 	return ret;
617 }
618 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);
619 
620 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
621 					     char *buf)
622 {
623 	struct iommu_resv_region *region, *next;
624 	struct list_head group_resv_regions;
625 	char *str = buf;
626 
627 	INIT_LIST_HEAD(&group_resv_regions);
628 	iommu_get_group_resv_regions(group, &group_resv_regions);
629 
630 	list_for_each_entry_safe(region, next, &group_resv_regions, list) {
631 		str += sprintf(str, "0x%016llx 0x%016llx %s\n",
632 			       (long long int)region->start,
633 			       (long long int)(region->start +
634 						region->length - 1),
635 			       iommu_group_resv_type_string[region->type]);
636 		kfree(region);
637 	}
638 
639 	return (str - buf);
640 }
641 
642 static ssize_t iommu_group_show_type(struct iommu_group *group,
643 				     char *buf)
644 {
645 	char *type = "unknown\n";
646 
647 	mutex_lock(&group->mutex);
648 	if (group->default_domain) {
649 		switch (group->default_domain->type) {
650 		case IOMMU_DOMAIN_BLOCKED:
651 			type = "blocked\n";
652 			break;
653 		case IOMMU_DOMAIN_IDENTITY:
654 			type = "identity\n";
655 			break;
656 		case IOMMU_DOMAIN_UNMANAGED:
657 			type = "unmanaged\n";
658 			break;
659 		case IOMMU_DOMAIN_DMA:
660 			type = "DMA\n";
661 			break;
662 		case IOMMU_DOMAIN_DMA_FQ:
663 			type = "DMA-FQ\n";
664 			break;
665 		}
666 	}
667 	mutex_unlock(&group->mutex);
668 	strcpy(buf, type);
669 
670 	return strlen(type);
671 }
672 
673 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
674 
675 static IOMMU_GROUP_ATTR(reserved_regions, 0444,
676 			iommu_group_show_resv_regions, NULL);
677 
678 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
679 			iommu_group_store_type);
680 
681 static void iommu_group_release(struct kobject *kobj)
682 {
683 	struct iommu_group *group = to_iommu_group(kobj);
684 
685 	pr_debug("Releasing group %d\n", group->id);
686 
687 	if (group->iommu_data_release)
688 		group->iommu_data_release(group->iommu_data);
689 
690 	ida_free(&iommu_group_ida, group->id);
691 
692 	if (group->default_domain)
693 		iommu_domain_free(group->default_domain);
694 	if (group->blocking_domain)
695 		iommu_domain_free(group->blocking_domain);
696 
697 	kfree(group->name);
698 	kfree(group);
699 }
700 
701 static struct kobj_type iommu_group_ktype = {
702 	.sysfs_ops = &iommu_group_sysfs_ops,
703 	.release = iommu_group_release,
704 };
705 
706 /**
707  * iommu_group_alloc - Allocate a new group
708  *
709  * This function is called by an iommu driver to allocate a new iommu
710  * group.  The iommu group represents the minimum granularity of the iommu.
711  * Upon successful return, the caller holds a reference to the supplied
712  * group in order to hold the group until devices are added.  Use
713  * iommu_group_put() to release this extra reference count, allowing the
714  * group to be automatically reclaimed once it has no devices or external
715  * references.
716  */
717 struct iommu_group *iommu_group_alloc(void)
718 {
719 	struct iommu_group *group;
720 	int ret;
721 
722 	group = kzalloc(sizeof(*group), GFP_KERNEL);
723 	if (!group)
724 		return ERR_PTR(-ENOMEM);
725 
726 	group->kobj.kset = iommu_group_kset;
727 	mutex_init(&group->mutex);
728 	INIT_LIST_HEAD(&group->devices);
729 	INIT_LIST_HEAD(&group->entry);
730 	xa_init(&group->pasid_array);
731 
732 	ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
733 	if (ret < 0) {
734 		kfree(group);
735 		return ERR_PTR(ret);
736 	}
737 	group->id = ret;
738 
739 	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
740 				   NULL, "%d", group->id);
741 	if (ret) {
742 		kobject_put(&group->kobj);
743 		return ERR_PTR(ret);
744 	}
745 
746 	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
747 	if (!group->devices_kobj) {
748 		kobject_put(&group->kobj); /* triggers .release & free */
749 		return ERR_PTR(-ENOMEM);
750 	}
751 
752 	/*
753 	 * The devices_kobj holds a reference on the group kobject, so
754 	 * as long as that exists so will the group.  We can therefore
755 	 * use the devices_kobj for reference counting.
756 	 */
757 	kobject_put(&group->kobj);
758 
759 	ret = iommu_group_create_file(group,
760 				      &iommu_group_attr_reserved_regions);
761 	if (ret)
762 		return ERR_PTR(ret);
763 
764 	ret = iommu_group_create_file(group, &iommu_group_attr_type);
765 	if (ret)
766 		return ERR_PTR(ret);
767 
768 	pr_debug("Allocated group %d\n", group->id);
769 
770 	return group;
771 }
772 EXPORT_SYMBOL_GPL(iommu_group_alloc);
773 
774 struct iommu_group *iommu_group_get_by_id(int id)
775 {
776 	struct kobject *group_kobj;
777 	struct iommu_group *group;
778 	const char *name;
779 
780 	if (!iommu_group_kset)
781 		return NULL;
782 
783 	name = kasprintf(GFP_KERNEL, "%d", id);
784 	if (!name)
785 		return NULL;
786 
787 	group_kobj = kset_find_obj(iommu_group_kset, name);
788 	kfree(name);
789 
790 	if (!group_kobj)
791 		return NULL;
792 
793 	group = container_of(group_kobj, struct iommu_group, kobj);
794 	BUG_ON(group->id != id);
795 
796 	kobject_get(group->devices_kobj);
797 	kobject_put(&group->kobj);
798 
799 	return group;
800 }
801 EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
802 
803 /**
804  * iommu_group_get_iommudata - retrieve iommu_data registered for a group
805  * @group: the group
806  *
807  * iommu drivers can store data in the group for use when doing iommu
808  * operations.  This function provides a way to retrieve it.  Caller
809  * should hold a group reference.
810  */
811 void *iommu_group_get_iommudata(struct iommu_group *group)
812 {
813 	return group->iommu_data;
814 }
815 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
816 
817 /**
818  * iommu_group_set_iommudata - set iommu_data for a group
819  * @group: the group
820  * @iommu_data: new data
821  * @release: release function for iommu_data
822  *
823  * iommu drivers can store data in the group for use when doing iommu
824  * operations.  This function provides a way to set the data after
825  * the group has been allocated.  Caller should hold a group reference.
826  */
827 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
828 			       void (*release)(void *iommu_data))
829 {
830 	group->iommu_data = iommu_data;
831 	group->iommu_data_release = release;
832 }
833 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
834 
835 /**
836  * iommu_group_set_name - set name for a group
837  * @group: the group
838  * @name: name
839  *
840  * Allow iommu driver to set a name for a group.  When set it will
841  * appear in a name attribute file under the group in sysfs.
842  */
843 int iommu_group_set_name(struct iommu_group *group, const char *name)
844 {
845 	int ret;
846 
847 	if (group->name) {
848 		iommu_group_remove_file(group, &iommu_group_attr_name);
849 		kfree(group->name);
850 		group->name = NULL;
851 		if (!name)
852 			return 0;
853 	}
854 
855 	group->name = kstrdup(name, GFP_KERNEL);
856 	if (!group->name)
857 		return -ENOMEM;
858 
859 	ret = iommu_group_create_file(group, &iommu_group_attr_name);
860 	if (ret) {
861 		kfree(group->name);
862 		group->name = NULL;
863 		return ret;
864 	}
865 
866 	return 0;
867 }
868 EXPORT_SYMBOL_GPL(iommu_group_set_name);
869 
870 static int iommu_create_device_direct_mappings(struct iommu_group *group,
871 					       struct device *dev)
872 {
873 	struct iommu_domain *domain = group->default_domain;
874 	struct iommu_resv_region *entry;
875 	struct list_head mappings;
876 	unsigned long pg_size;
877 	int ret = 0;
878 
879 	if (!domain || !iommu_is_dma_domain(domain))
880 		return 0;
881 
882 	BUG_ON(!domain->pgsize_bitmap);
883 
884 	pg_size = 1UL << __ffs(domain->pgsize_bitmap);
885 	INIT_LIST_HEAD(&mappings);
886 
887 	iommu_get_resv_regions(dev, &mappings);
888 
889 	/* We need to consider overlapping regions for different devices */
890 	list_for_each_entry(entry, &mappings, list) {
891 		dma_addr_t start, end, addr;
892 		size_t map_size = 0;
893 
894 		start = ALIGN(entry->start, pg_size);
895 		end   = ALIGN(entry->start + entry->length, pg_size);
896 
897 		if (entry->type != IOMMU_RESV_DIRECT &&
898 		    entry->type != IOMMU_RESV_DIRECT_RELAXABLE)
899 			continue;
900 
901 		for (addr = start; addr <= end; addr += pg_size) {
902 			phys_addr_t phys_addr;
903 
904 			if (addr == end)
905 				goto map_end;
906 
907 			phys_addr = iommu_iova_to_phys(domain, addr);
908 			if (!phys_addr) {
909 				map_size += pg_size;
910 				continue;
911 			}
912 
913 map_end:
914 			if (map_size) {
915 				ret = iommu_map(domain, addr - map_size,
916 						addr - map_size, map_size,
917 						entry->prot);
918 				if (ret)
919 					goto out;
920 				map_size = 0;
921 			}
922 		}
923 
924 	}
925 
926 	iommu_flush_iotlb_all(domain);
927 
928 out:
929 	iommu_put_resv_regions(dev, &mappings);
930 
931 	return ret;
932 }
933 
934 static bool iommu_is_attach_deferred(struct device *dev)
935 {
936 	const struct iommu_ops *ops = dev_iommu_ops(dev);
937 
938 	if (ops->is_attach_deferred)
939 		return ops->is_attach_deferred(dev);
940 
941 	return false;
942 }
943 
944 /**
945  * iommu_group_add_device - add a device to an iommu group
946  * @group: the group into which to add the device (reference should be held)
947  * @dev: the device
948  *
949  * This function is called by an iommu driver to add a device into a
950  * group.  Adding a device increments the group reference count.
951  */
952 int iommu_group_add_device(struct iommu_group *group, struct device *dev)
953 {
954 	int ret, i = 0;
955 	struct group_device *device;
956 
957 	device = kzalloc(sizeof(*device), GFP_KERNEL);
958 	if (!device)
959 		return -ENOMEM;
960 
961 	device->dev = dev;
962 
963 	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
964 	if (ret)
965 		goto err_free_device;
966 
967 	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
968 rename:
969 	if (!device->name) {
970 		ret = -ENOMEM;
971 		goto err_remove_link;
972 	}
973 
974 	ret = sysfs_create_link_nowarn(group->devices_kobj,
975 				       &dev->kobj, device->name);
976 	if (ret) {
977 		if (ret == -EEXIST && i >= 0) {
978 			/*
979 			 * Account for the slim chance of collision
980 			 * and append an instance to the name.
981 			 */
982 			kfree(device->name);
983 			device->name = kasprintf(GFP_KERNEL, "%s.%d",
984 						 kobject_name(&dev->kobj), i++);
985 			goto rename;
986 		}
987 		goto err_free_name;
988 	}
989 
990 	kobject_get(group->devices_kobj);
991 
992 	dev->iommu_group = group;
993 
994 	mutex_lock(&group->mutex);
995 	list_add_tail(&device->list, &group->devices);
996 	if (group->domain  && !iommu_is_attach_deferred(dev))
997 		ret = __iommu_attach_device(group->domain, dev);
998 	mutex_unlock(&group->mutex);
999 	if (ret)
1000 		goto err_put_group;
1001 
1002 	trace_add_device_to_group(group->id, dev);
1003 
1004 	dev_info(dev, "Adding to iommu group %d\n", group->id);
1005 
1006 	return 0;
1007 
1008 err_put_group:
1009 	mutex_lock(&group->mutex);
1010 	list_del(&device->list);
1011 	mutex_unlock(&group->mutex);
1012 	dev->iommu_group = NULL;
1013 	kobject_put(group->devices_kobj);
1014 	sysfs_remove_link(group->devices_kobj, device->name);
1015 err_free_name:
1016 	kfree(device->name);
1017 err_remove_link:
1018 	sysfs_remove_link(&dev->kobj, "iommu_group");
1019 err_free_device:
1020 	kfree(device);
1021 	dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
1022 	return ret;
1023 }
1024 EXPORT_SYMBOL_GPL(iommu_group_add_device);
1025 
1026 /**
1027  * iommu_group_remove_device - remove a device from it's current group
1028  * @dev: device to be removed
1029  *
1030  * This function is called by an iommu driver to remove the device from
1031  * it's current group.  This decrements the iommu group reference count.
1032  */
1033 void iommu_group_remove_device(struct device *dev)
1034 {
1035 	struct iommu_group *group = dev->iommu_group;
1036 	struct group_device *tmp_device, *device = NULL;
1037 
1038 	if (!group)
1039 		return;
1040 
1041 	dev_info(dev, "Removing from iommu group %d\n", group->id);
1042 
1043 	mutex_lock(&group->mutex);
1044 	list_for_each_entry(tmp_device, &group->devices, list) {
1045 		if (tmp_device->dev == dev) {
1046 			device = tmp_device;
1047 			list_del(&device->list);
1048 			break;
1049 		}
1050 	}
1051 	mutex_unlock(&group->mutex);
1052 
1053 	if (!device)
1054 		return;
1055 
1056 	sysfs_remove_link(group->devices_kobj, device->name);
1057 	sysfs_remove_link(&dev->kobj, "iommu_group");
1058 
1059 	trace_remove_device_from_group(group->id, dev);
1060 
1061 	kfree(device->name);
1062 	kfree(device);
1063 	dev->iommu_group = NULL;
1064 	kobject_put(group->devices_kobj);
1065 }
1066 EXPORT_SYMBOL_GPL(iommu_group_remove_device);
1067 
1068 static int iommu_group_device_count(struct iommu_group *group)
1069 {
1070 	struct group_device *entry;
1071 	int ret = 0;
1072 
1073 	list_for_each_entry(entry, &group->devices, list)
1074 		ret++;
1075 
1076 	return ret;
1077 }
1078 
1079 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
1080 				      int (*fn)(struct device *, void *))
1081 {
1082 	struct group_device *device;
1083 	int ret = 0;
1084 
1085 	list_for_each_entry(device, &group->devices, list) {
1086 		ret = fn(device->dev, data);
1087 		if (ret)
1088 			break;
1089 	}
1090 	return ret;
1091 }
1092 
1093 /**
1094  * iommu_group_for_each_dev - iterate over each device in the group
1095  * @group: the group
1096  * @data: caller opaque data to be passed to callback function
1097  * @fn: caller supplied callback function
1098  *
1099  * This function is called by group users to iterate over group devices.
1100  * Callers should hold a reference count to the group during callback.
1101  * The group->mutex is held across callbacks, which will block calls to
1102  * iommu_group_add/remove_device.
1103  */
1104 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
1105 			     int (*fn)(struct device *, void *))
1106 {
1107 	int ret;
1108 
1109 	mutex_lock(&group->mutex);
1110 	ret = __iommu_group_for_each_dev(group, data, fn);
1111 	mutex_unlock(&group->mutex);
1112 
1113 	return ret;
1114 }
1115 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
1116 
1117 /**
1118  * iommu_group_get - Return the group for a device and increment reference
1119  * @dev: get the group that this device belongs to
1120  *
1121  * This function is called by iommu drivers and users to get the group
1122  * for the specified device.  If found, the group is returned and the group
1123  * reference in incremented, else NULL.
1124  */
1125 struct iommu_group *iommu_group_get(struct device *dev)
1126 {
1127 	struct iommu_group *group = dev->iommu_group;
1128 
1129 	if (group)
1130 		kobject_get(group->devices_kobj);
1131 
1132 	return group;
1133 }
1134 EXPORT_SYMBOL_GPL(iommu_group_get);
1135 
1136 /**
1137  * iommu_group_ref_get - Increment reference on a group
1138  * @group: the group to use, must not be NULL
1139  *
1140  * This function is called by iommu drivers to take additional references on an
1141  * existing group.  Returns the given group for convenience.
1142  */
1143 struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
1144 {
1145 	kobject_get(group->devices_kobj);
1146 	return group;
1147 }
1148 EXPORT_SYMBOL_GPL(iommu_group_ref_get);
1149 
1150 /**
1151  * iommu_group_put - Decrement group reference
1152  * @group: the group to use
1153  *
1154  * This function is called by iommu drivers and users to release the
1155  * iommu group.  Once the reference count is zero, the group is released.
1156  */
1157 void iommu_group_put(struct iommu_group *group)
1158 {
1159 	if (group)
1160 		kobject_put(group->devices_kobj);
1161 }
1162 EXPORT_SYMBOL_GPL(iommu_group_put);
1163 
1164 /**
1165  * iommu_register_device_fault_handler() - Register a device fault handler
1166  * @dev: the device
1167  * @handler: the fault handler
1168  * @data: private data passed as argument to the handler
1169  *
1170  * When an IOMMU fault event is received, this handler gets called with the
1171  * fault event and data as argument. The handler should return 0 on success. If
1172  * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
1173  * complete the fault by calling iommu_page_response() with one of the following
1174  * response code:
1175  * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
1176  * - IOMMU_PAGE_RESP_INVALID: terminate the fault
1177  * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
1178  *   page faults if possible.
1179  *
1180  * Return 0 if the fault handler was installed successfully, or an error.
1181  */
1182 int iommu_register_device_fault_handler(struct device *dev,
1183 					iommu_dev_fault_handler_t handler,
1184 					void *data)
1185 {
1186 	struct dev_iommu *param = dev->iommu;
1187 	int ret = 0;
1188 
1189 	if (!param)
1190 		return -EINVAL;
1191 
1192 	mutex_lock(&param->lock);
1193 	/* Only allow one fault handler registered for each device */
1194 	if (param->fault_param) {
1195 		ret = -EBUSY;
1196 		goto done_unlock;
1197 	}
1198 
1199 	get_device(dev);
1200 	param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
1201 	if (!param->fault_param) {
1202 		put_device(dev);
1203 		ret = -ENOMEM;
1204 		goto done_unlock;
1205 	}
1206 	param->fault_param->handler = handler;
1207 	param->fault_param->data = data;
1208 	mutex_init(&param->fault_param->lock);
1209 	INIT_LIST_HEAD(&param->fault_param->faults);
1210 
1211 done_unlock:
1212 	mutex_unlock(&param->lock);
1213 
1214 	return ret;
1215 }
1216 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
1217 
1218 /**
1219  * iommu_unregister_device_fault_handler() - Unregister the device fault handler
1220  * @dev: the device
1221  *
1222  * Remove the device fault handler installed with
1223  * iommu_register_device_fault_handler().
1224  *
1225  * Return 0 on success, or an error.
1226  */
1227 int iommu_unregister_device_fault_handler(struct device *dev)
1228 {
1229 	struct dev_iommu *param = dev->iommu;
1230 	int ret = 0;
1231 
1232 	if (!param)
1233 		return -EINVAL;
1234 
1235 	mutex_lock(&param->lock);
1236 
1237 	if (!param->fault_param)
1238 		goto unlock;
1239 
1240 	/* we cannot unregister handler if there are pending faults */
1241 	if (!list_empty(&param->fault_param->faults)) {
1242 		ret = -EBUSY;
1243 		goto unlock;
1244 	}
1245 
1246 	kfree(param->fault_param);
1247 	param->fault_param = NULL;
1248 	put_device(dev);
1249 unlock:
1250 	mutex_unlock(&param->lock);
1251 
1252 	return ret;
1253 }
1254 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
1255 
1256 /**
1257  * iommu_report_device_fault() - Report fault event to device driver
1258  * @dev: the device
1259  * @evt: fault event data
1260  *
1261  * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
1262  * handler. When this function fails and the fault is recoverable, it is the
1263  * caller's responsibility to complete the fault.
1264  *
1265  * Return 0 on success, or an error.
1266  */
1267 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
1268 {
1269 	struct dev_iommu *param = dev->iommu;
1270 	struct iommu_fault_event *evt_pending = NULL;
1271 	struct iommu_fault_param *fparam;
1272 	int ret = 0;
1273 
1274 	if (!param || !evt)
1275 		return -EINVAL;
1276 
1277 	/* we only report device fault if there is a handler registered */
1278 	mutex_lock(&param->lock);
1279 	fparam = param->fault_param;
1280 	if (!fparam || !fparam->handler) {
1281 		ret = -EINVAL;
1282 		goto done_unlock;
1283 	}
1284 
1285 	if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
1286 	    (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
1287 		evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
1288 				      GFP_KERNEL);
1289 		if (!evt_pending) {
1290 			ret = -ENOMEM;
1291 			goto done_unlock;
1292 		}
1293 		mutex_lock(&fparam->lock);
1294 		list_add_tail(&evt_pending->list, &fparam->faults);
1295 		mutex_unlock(&fparam->lock);
1296 	}
1297 
1298 	ret = fparam->handler(&evt->fault, fparam->data);
1299 	if (ret && evt_pending) {
1300 		mutex_lock(&fparam->lock);
1301 		list_del(&evt_pending->list);
1302 		mutex_unlock(&fparam->lock);
1303 		kfree(evt_pending);
1304 	}
1305 done_unlock:
1306 	mutex_unlock(&param->lock);
1307 	return ret;
1308 }
1309 EXPORT_SYMBOL_GPL(iommu_report_device_fault);
1310 
1311 int iommu_page_response(struct device *dev,
1312 			struct iommu_page_response *msg)
1313 {
1314 	bool needs_pasid;
1315 	int ret = -EINVAL;
1316 	struct iommu_fault_event *evt;
1317 	struct iommu_fault_page_request *prm;
1318 	struct dev_iommu *param = dev->iommu;
1319 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1320 	bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
1321 
1322 	if (!ops->page_response)
1323 		return -ENODEV;
1324 
1325 	if (!param || !param->fault_param)
1326 		return -EINVAL;
1327 
1328 	if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
1329 	    msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
1330 		return -EINVAL;
1331 
1332 	/* Only send response if there is a fault report pending */
1333 	mutex_lock(&param->fault_param->lock);
1334 	if (list_empty(&param->fault_param->faults)) {
1335 		dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
1336 		goto done_unlock;
1337 	}
1338 	/*
1339 	 * Check if we have a matching page request pending to respond,
1340 	 * otherwise return -EINVAL
1341 	 */
1342 	list_for_each_entry(evt, &param->fault_param->faults, list) {
1343 		prm = &evt->fault.prm;
1344 		if (prm->grpid != msg->grpid)
1345 			continue;
1346 
1347 		/*
1348 		 * If the PASID is required, the corresponding request is
1349 		 * matched using the group ID, the PASID valid bit and the PASID
1350 		 * value. Otherwise only the group ID matches request and
1351 		 * response.
1352 		 */
1353 		needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
1354 		if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
1355 			continue;
1356 
1357 		if (!needs_pasid && has_pasid) {
1358 			/* No big deal, just clear it. */
1359 			msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
1360 			msg->pasid = 0;
1361 		}
1362 
1363 		ret = ops->page_response(dev, evt, msg);
1364 		list_del(&evt->list);
1365 		kfree(evt);
1366 		break;
1367 	}
1368 
1369 done_unlock:
1370 	mutex_unlock(&param->fault_param->lock);
1371 	return ret;
1372 }
1373 EXPORT_SYMBOL_GPL(iommu_page_response);
1374 
1375 /**
1376  * iommu_group_id - Return ID for a group
1377  * @group: the group to ID
1378  *
1379  * Return the unique ID for the group matching the sysfs group number.
1380  */
1381 int iommu_group_id(struct iommu_group *group)
1382 {
1383 	return group->id;
1384 }
1385 EXPORT_SYMBOL_GPL(iommu_group_id);
1386 
1387 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1388 					       unsigned long *devfns);
1389 
1390 /*
1391  * To consider a PCI device isolated, we require ACS to support Source
1392  * Validation, Request Redirection, Completer Redirection, and Upstream
1393  * Forwarding.  This effectively means that devices cannot spoof their
1394  * requester ID, requests and completions cannot be redirected, and all
1395  * transactions are forwarded upstream, even as it passes through a
1396  * bridge where the target device is downstream.
1397  */
1398 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
1399 
1400 /*
1401  * For multifunction devices which are not isolated from each other, find
1402  * all the other non-isolated functions and look for existing groups.  For
1403  * each function, we also need to look for aliases to or from other devices
1404  * that may already have a group.
1405  */
1406 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
1407 							unsigned long *devfns)
1408 {
1409 	struct pci_dev *tmp = NULL;
1410 	struct iommu_group *group;
1411 
1412 	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
1413 		return NULL;
1414 
1415 	for_each_pci_dev(tmp) {
1416 		if (tmp == pdev || tmp->bus != pdev->bus ||
1417 		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
1418 		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
1419 			continue;
1420 
1421 		group = get_pci_alias_group(tmp, devfns);
1422 		if (group) {
1423 			pci_dev_put(tmp);
1424 			return group;
1425 		}
1426 	}
1427 
1428 	return NULL;
1429 }
1430 
1431 /*
1432  * Look for aliases to or from the given device for existing groups. DMA
1433  * aliases are only supported on the same bus, therefore the search
1434  * space is quite small (especially since we're really only looking at pcie
1435  * device, and therefore only expect multiple slots on the root complex or
1436  * downstream switch ports).  It's conceivable though that a pair of
1437  * multifunction devices could have aliases between them that would cause a
1438  * loop.  To prevent this, we use a bitmap to track where we've been.
1439  */
1440 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1441 					       unsigned long *devfns)
1442 {
1443 	struct pci_dev *tmp = NULL;
1444 	struct iommu_group *group;
1445 
1446 	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
1447 		return NULL;
1448 
1449 	group = iommu_group_get(&pdev->dev);
1450 	if (group)
1451 		return group;
1452 
1453 	for_each_pci_dev(tmp) {
1454 		if (tmp == pdev || tmp->bus != pdev->bus)
1455 			continue;
1456 
1457 		/* We alias them or they alias us */
1458 		if (pci_devs_are_dma_aliases(pdev, tmp)) {
1459 			group = get_pci_alias_group(tmp, devfns);
1460 			if (group) {
1461 				pci_dev_put(tmp);
1462 				return group;
1463 			}
1464 
1465 			group = get_pci_function_alias_group(tmp, devfns);
1466 			if (group) {
1467 				pci_dev_put(tmp);
1468 				return group;
1469 			}
1470 		}
1471 	}
1472 
1473 	return NULL;
1474 }
1475 
1476 struct group_for_pci_data {
1477 	struct pci_dev *pdev;
1478 	struct iommu_group *group;
1479 };
1480 
1481 /*
1482  * DMA alias iterator callback, return the last seen device.  Stop and return
1483  * the IOMMU group if we find one along the way.
1484  */
1485 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
1486 {
1487 	struct group_for_pci_data *data = opaque;
1488 
1489 	data->pdev = pdev;
1490 	data->group = iommu_group_get(&pdev->dev);
1491 
1492 	return data->group != NULL;
1493 }
1494 
1495 /*
1496  * Generic device_group call-back function. It just allocates one
1497  * iommu-group per device.
1498  */
1499 struct iommu_group *generic_device_group(struct device *dev)
1500 {
1501 	return iommu_group_alloc();
1502 }
1503 EXPORT_SYMBOL_GPL(generic_device_group);
1504 
1505 /*
1506  * Use standard PCI bus topology, isolation features, and DMA alias quirks
1507  * to find or create an IOMMU group for a device.
1508  */
1509 struct iommu_group *pci_device_group(struct device *dev)
1510 {
1511 	struct pci_dev *pdev = to_pci_dev(dev);
1512 	struct group_for_pci_data data;
1513 	struct pci_bus *bus;
1514 	struct iommu_group *group = NULL;
1515 	u64 devfns[4] = { 0 };
1516 
1517 	if (WARN_ON(!dev_is_pci(dev)))
1518 		return ERR_PTR(-EINVAL);
1519 
1520 	/*
1521 	 * Find the upstream DMA alias for the device.  A device must not
1522 	 * be aliased due to topology in order to have its own IOMMU group.
1523 	 * If we find an alias along the way that already belongs to a
1524 	 * group, use it.
1525 	 */
1526 	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
1527 		return data.group;
1528 
1529 	pdev = data.pdev;
1530 
1531 	/*
1532 	 * Continue upstream from the point of minimum IOMMU granularity
1533 	 * due to aliases to the point where devices are protected from
1534 	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
1535 	 * group, use it.
1536 	 */
1537 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
1538 		if (!bus->self)
1539 			continue;
1540 
1541 		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
1542 			break;
1543 
1544 		pdev = bus->self;
1545 
1546 		group = iommu_group_get(&pdev->dev);
1547 		if (group)
1548 			return group;
1549 	}
1550 
1551 	/*
1552 	 * Look for existing groups on device aliases.  If we alias another
1553 	 * device or another device aliases us, use the same group.
1554 	 */
1555 	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
1556 	if (group)
1557 		return group;
1558 
1559 	/*
1560 	 * Look for existing groups on non-isolated functions on the same
1561 	 * slot and aliases of those funcions, if any.  No need to clear
1562 	 * the search bitmap, the tested devfns are still valid.
1563 	 */
1564 	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
1565 	if (group)
1566 		return group;
1567 
1568 	/* No shared group found, allocate new */
1569 	return iommu_group_alloc();
1570 }
1571 EXPORT_SYMBOL_GPL(pci_device_group);
1572 
1573 /* Get the IOMMU group for device on fsl-mc bus */
1574 struct iommu_group *fsl_mc_device_group(struct device *dev)
1575 {
1576 	struct device *cont_dev = fsl_mc_cont_dev(dev);
1577 	struct iommu_group *group;
1578 
1579 	group = iommu_group_get(cont_dev);
1580 	if (!group)
1581 		group = iommu_group_alloc();
1582 	return group;
1583 }
1584 EXPORT_SYMBOL_GPL(fsl_mc_device_group);
1585 
1586 static int iommu_get_def_domain_type(struct device *dev)
1587 {
1588 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1589 
1590 	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
1591 		return IOMMU_DOMAIN_DMA;
1592 
1593 	if (ops->def_domain_type)
1594 		return ops->def_domain_type(dev);
1595 
1596 	return 0;
1597 }
1598 
1599 static int iommu_group_alloc_default_domain(struct bus_type *bus,
1600 					    struct iommu_group *group,
1601 					    unsigned int type)
1602 {
1603 	struct iommu_domain *dom;
1604 
1605 	dom = __iommu_domain_alloc(bus, type);
1606 	if (!dom && type != IOMMU_DOMAIN_DMA) {
1607 		dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
1608 		if (dom)
1609 			pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
1610 				type, group->name);
1611 	}
1612 
1613 	if (!dom)
1614 		return -ENOMEM;
1615 
1616 	group->default_domain = dom;
1617 	if (!group->domain)
1618 		group->domain = dom;
1619 	return 0;
1620 }
1621 
1622 static int iommu_alloc_default_domain(struct iommu_group *group,
1623 				      struct device *dev)
1624 {
1625 	unsigned int type;
1626 
1627 	if (group->default_domain)
1628 		return 0;
1629 
1630 	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
1631 
1632 	return iommu_group_alloc_default_domain(dev->bus, group, type);
1633 }
1634 
1635 /**
1636  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
1637  * @dev: target device
1638  *
1639  * This function is intended to be called by IOMMU drivers and extended to
1640  * support common, bus-defined algorithms when determining or creating the
1641  * IOMMU group for a device.  On success, the caller will hold a reference
1642  * to the returned IOMMU group, which will already include the provided
1643  * device.  The reference should be released with iommu_group_put().
1644  */
1645 static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
1646 {
1647 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1648 	struct iommu_group *group;
1649 	int ret;
1650 
1651 	group = iommu_group_get(dev);
1652 	if (group)
1653 		return group;
1654 
1655 	group = ops->device_group(dev);
1656 	if (WARN_ON_ONCE(group == NULL))
1657 		return ERR_PTR(-EINVAL);
1658 
1659 	if (IS_ERR(group))
1660 		return group;
1661 
1662 	ret = iommu_group_add_device(group, dev);
1663 	if (ret)
1664 		goto out_put_group;
1665 
1666 	return group;
1667 
1668 out_put_group:
1669 	iommu_group_put(group);
1670 
1671 	return ERR_PTR(ret);
1672 }
1673 
1674 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
1675 {
1676 	return group->default_domain;
1677 }
1678 
1679 static int probe_iommu_group(struct device *dev, void *data)
1680 {
1681 	struct list_head *group_list = data;
1682 	struct iommu_group *group;
1683 	int ret;
1684 
1685 	/* Device is probed already if in a group */
1686 	group = iommu_group_get(dev);
1687 	if (group) {
1688 		iommu_group_put(group);
1689 		return 0;
1690 	}
1691 
1692 	ret = __iommu_probe_device(dev, group_list);
1693 	if (ret == -ENODEV)
1694 		ret = 0;
1695 
1696 	return ret;
1697 }
1698 
1699 static int iommu_bus_notifier(struct notifier_block *nb,
1700 			      unsigned long action, void *data)
1701 {
1702 	struct device *dev = data;
1703 
1704 	if (action == BUS_NOTIFY_ADD_DEVICE) {
1705 		int ret;
1706 
1707 		ret = iommu_probe_device(dev);
1708 		return (ret) ? NOTIFY_DONE : NOTIFY_OK;
1709 	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
1710 		iommu_release_device(dev);
1711 		return NOTIFY_OK;
1712 	}
1713 
1714 	return 0;
1715 }
1716 
1717 struct __group_domain_type {
1718 	struct device *dev;
1719 	unsigned int type;
1720 };
1721 
1722 static int probe_get_default_domain_type(struct device *dev, void *data)
1723 {
1724 	struct __group_domain_type *gtype = data;
1725 	unsigned int type = iommu_get_def_domain_type(dev);
1726 
1727 	if (type) {
1728 		if (gtype->type && gtype->type != type) {
1729 			dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
1730 				 iommu_domain_type_str(type),
1731 				 dev_name(gtype->dev),
1732 				 iommu_domain_type_str(gtype->type));
1733 			gtype->type = 0;
1734 		}
1735 
1736 		if (!gtype->dev) {
1737 			gtype->dev  = dev;
1738 			gtype->type = type;
1739 		}
1740 	}
1741 
1742 	return 0;
1743 }
1744 
1745 static void probe_alloc_default_domain(struct bus_type *bus,
1746 				       struct iommu_group *group)
1747 {
1748 	struct __group_domain_type gtype;
1749 
1750 	memset(&gtype, 0, sizeof(gtype));
1751 
1752 	/* Ask for default domain requirements of all devices in the group */
1753 	__iommu_group_for_each_dev(group, &gtype,
1754 				   probe_get_default_domain_type);
1755 
1756 	if (!gtype.type)
1757 		gtype.type = iommu_def_domain_type;
1758 
1759 	iommu_group_alloc_default_domain(bus, group, gtype.type);
1760 
1761 }
1762 
1763 static int iommu_group_do_dma_attach(struct device *dev, void *data)
1764 {
1765 	struct iommu_domain *domain = data;
1766 	int ret = 0;
1767 
1768 	if (!iommu_is_attach_deferred(dev))
1769 		ret = __iommu_attach_device(domain, dev);
1770 
1771 	return ret;
1772 }
1773 
1774 static int __iommu_group_dma_attach(struct iommu_group *group)
1775 {
1776 	return __iommu_group_for_each_dev(group, group->default_domain,
1777 					  iommu_group_do_dma_attach);
1778 }
1779 
1780 static int iommu_group_do_probe_finalize(struct device *dev, void *data)
1781 {
1782 	const struct iommu_ops *ops = dev_iommu_ops(dev);
1783 
1784 	if (ops->probe_finalize)
1785 		ops->probe_finalize(dev);
1786 
1787 	return 0;
1788 }
1789 
1790 static void __iommu_group_dma_finalize(struct iommu_group *group)
1791 {
1792 	__iommu_group_for_each_dev(group, group->default_domain,
1793 				   iommu_group_do_probe_finalize);
1794 }
1795 
1796 static int iommu_do_create_direct_mappings(struct device *dev, void *data)
1797 {
1798 	struct iommu_group *group = data;
1799 
1800 	iommu_create_device_direct_mappings(group, dev);
1801 
1802 	return 0;
1803 }
1804 
1805 static int iommu_group_create_direct_mappings(struct iommu_group *group)
1806 {
1807 	return __iommu_group_for_each_dev(group, group,
1808 					  iommu_do_create_direct_mappings);
1809 }
1810 
1811 int bus_iommu_probe(struct bus_type *bus)
1812 {
1813 	struct iommu_group *group, *next;
1814 	LIST_HEAD(group_list);
1815 	int ret;
1816 
1817 	/*
1818 	 * This code-path does not allocate the default domain when
1819 	 * creating the iommu group, so do it after the groups are
1820 	 * created.
1821 	 */
1822 	ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
1823 	if (ret)
1824 		return ret;
1825 
1826 	list_for_each_entry_safe(group, next, &group_list, entry) {
1827 		/* Remove item from the list */
1828 		list_del_init(&group->entry);
1829 
1830 		mutex_lock(&group->mutex);
1831 
1832 		/* Try to allocate default domain */
1833 		probe_alloc_default_domain(bus, group);
1834 
1835 		if (!group->default_domain) {
1836 			mutex_unlock(&group->mutex);
1837 			continue;
1838 		}
1839 
1840 		iommu_group_create_direct_mappings(group);
1841 
1842 		ret = __iommu_group_dma_attach(group);
1843 
1844 		mutex_unlock(&group->mutex);
1845 
1846 		if (ret)
1847 			break;
1848 
1849 		__iommu_group_dma_finalize(group);
1850 	}
1851 
1852 	return ret;
1853 }
1854 
1855 bool iommu_present(struct bus_type *bus)
1856 {
1857 	return bus->iommu_ops != NULL;
1858 }
1859 EXPORT_SYMBOL_GPL(iommu_present);
1860 
1861 /**
1862  * device_iommu_capable() - check for a general IOMMU capability
1863  * @dev: device to which the capability would be relevant, if available
1864  * @cap: IOMMU capability
1865  *
1866  * Return: true if an IOMMU is present and supports the given capability
1867  * for the given device, otherwise false.
1868  */
1869 bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
1870 {
1871 	const struct iommu_ops *ops;
1872 
1873 	if (!dev->iommu || !dev->iommu->iommu_dev)
1874 		return false;
1875 
1876 	ops = dev_iommu_ops(dev);
1877 	if (!ops->capable)
1878 		return false;
1879 
1880 	return ops->capable(dev, cap);
1881 }
1882 EXPORT_SYMBOL_GPL(device_iommu_capable);
1883 
1884 /**
1885  * iommu_set_fault_handler() - set a fault handler for an iommu domain
1886  * @domain: iommu domain
1887  * @handler: fault handler
1888  * @token: user data, will be passed back to the fault handler
1889  *
1890  * This function should be used by IOMMU users which want to be notified
1891  * whenever an IOMMU fault happens.
1892  *
1893  * The fault handler itself should return 0 on success, and an appropriate
1894  * error code otherwise.
1895  */
1896 void iommu_set_fault_handler(struct iommu_domain *domain,
1897 					iommu_fault_handler_t handler,
1898 					void *token)
1899 {
1900 	BUG_ON(!domain);
1901 
1902 	domain->handler = handler;
1903 	domain->handler_token = token;
1904 }
1905 EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
1906 
1907 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
1908 						 unsigned type)
1909 {
1910 	struct iommu_domain *domain;
1911 
1912 	if (bus == NULL || bus->iommu_ops == NULL)
1913 		return NULL;
1914 
1915 	domain = bus->iommu_ops->domain_alloc(type);
1916 	if (!domain)
1917 		return NULL;
1918 
1919 	domain->type = type;
1920 	/* Assume all sizes by default; the driver may override this later */
1921 	domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
1922 	if (!domain->ops)
1923 		domain->ops = bus->iommu_ops->default_domain_ops;
1924 
1925 	if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
1926 		iommu_domain_free(domain);
1927 		domain = NULL;
1928 	}
1929 	return domain;
1930 }
1931 
1932 struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
1933 {
1934 	return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
1935 }
1936 EXPORT_SYMBOL_GPL(iommu_domain_alloc);
1937 
1938 void iommu_domain_free(struct iommu_domain *domain)
1939 {
1940 	if (domain->type == IOMMU_DOMAIN_SVA)
1941 		mmdrop(domain->mm);
1942 	iommu_put_dma_cookie(domain);
1943 	domain->ops->free(domain);
1944 }
1945 EXPORT_SYMBOL_GPL(iommu_domain_free);
1946 
1947 /*
1948  * Put the group's domain back to the appropriate core-owned domain - either the
1949  * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
1950  */
1951 static void __iommu_group_set_core_domain(struct iommu_group *group)
1952 {
1953 	struct iommu_domain *new_domain;
1954 	int ret;
1955 
1956 	if (group->owner)
1957 		new_domain = group->blocking_domain;
1958 	else
1959 		new_domain = group->default_domain;
1960 
1961 	ret = __iommu_group_set_domain(group, new_domain);
1962 	WARN(ret, "iommu driver failed to attach the default/blocking domain");
1963 }
1964 
1965 static int __iommu_attach_device(struct iommu_domain *domain,
1966 				 struct device *dev)
1967 {
1968 	int ret;
1969 
1970 	if (unlikely(domain->ops->attach_dev == NULL))
1971 		return -ENODEV;
1972 
1973 	ret = domain->ops->attach_dev(domain, dev);
1974 	if (!ret)
1975 		trace_attach_device_to_domain(dev);
1976 	return ret;
1977 }
1978 
1979 /**
1980  * iommu_attach_device - Attach an IOMMU domain to a device
1981  * @domain: IOMMU domain to attach
1982  * @dev: Device that will be attached
1983  *
1984  * Returns 0 on success and error code on failure
1985  *
1986  * Note that EINVAL can be treated as a soft failure, indicating
1987  * that certain configuration of the domain is incompatible with
1988  * the device. In this case attaching a different domain to the
1989  * device may succeed.
1990  */
1991 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
1992 {
1993 	struct iommu_group *group;
1994 	int ret;
1995 
1996 	group = iommu_group_get(dev);
1997 	if (!group)
1998 		return -ENODEV;
1999 
2000 	/*
2001 	 * Lock the group to make sure the device-count doesn't
2002 	 * change while we are attaching
2003 	 */
2004 	mutex_lock(&group->mutex);
2005 	ret = -EINVAL;
2006 	if (iommu_group_device_count(group) != 1)
2007 		goto out_unlock;
2008 
2009 	ret = __iommu_attach_group(domain, group);
2010 
2011 out_unlock:
2012 	mutex_unlock(&group->mutex);
2013 	iommu_group_put(group);
2014 
2015 	return ret;
2016 }
2017 EXPORT_SYMBOL_GPL(iommu_attach_device);
2018 
2019 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
2020 {
2021 	if (iommu_is_attach_deferred(dev))
2022 		return __iommu_attach_device(domain, dev);
2023 
2024 	return 0;
2025 }
2026 
2027 static void __iommu_detach_device(struct iommu_domain *domain,
2028 				  struct device *dev)
2029 {
2030 	if (iommu_is_attach_deferred(dev))
2031 		return;
2032 
2033 	domain->ops->detach_dev(domain, dev);
2034 	trace_detach_device_from_domain(dev);
2035 }
2036 
2037 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
2038 {
2039 	struct iommu_group *group;
2040 
2041 	group = iommu_group_get(dev);
2042 	if (!group)
2043 		return;
2044 
2045 	mutex_lock(&group->mutex);
2046 	if (WARN_ON(domain != group->domain) ||
2047 	    WARN_ON(iommu_group_device_count(group) != 1))
2048 		goto out_unlock;
2049 	__iommu_group_set_core_domain(group);
2050 
2051 out_unlock:
2052 	mutex_unlock(&group->mutex);
2053 	iommu_group_put(group);
2054 }
2055 EXPORT_SYMBOL_GPL(iommu_detach_device);
2056 
2057 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
2058 {
2059 	struct iommu_domain *domain;
2060 	struct iommu_group *group;
2061 
2062 	group = iommu_group_get(dev);
2063 	if (!group)
2064 		return NULL;
2065 
2066 	domain = group->domain;
2067 
2068 	iommu_group_put(group);
2069 
2070 	return domain;
2071 }
2072 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
2073 
2074 /*
2075  * For IOMMU_DOMAIN_DMA implementations which already provide their own
2076  * guarantees that the group and its default domain are valid and correct.
2077  */
2078 struct iommu_domain *iommu_get_dma_domain(struct device *dev)
2079 {
2080 	return dev->iommu_group->default_domain;
2081 }
2082 
2083 /*
2084  * IOMMU groups are really the natural working unit of the IOMMU, but
2085  * the IOMMU API works on domains and devices.  Bridge that gap by
2086  * iterating over the devices in a group.  Ideally we'd have a single
2087  * device which represents the requestor ID of the group, but we also
2088  * allow IOMMU drivers to create policy defined minimum sets, where
2089  * the physical hardware may be able to distiguish members, but we
2090  * wish to group them at a higher level (ex. untrusted multi-function
2091  * PCI devices).  Thus we attach each device.
2092  */
2093 static int iommu_group_do_attach_device(struct device *dev, void *data)
2094 {
2095 	struct iommu_domain *domain = data;
2096 
2097 	return __iommu_attach_device(domain, dev);
2098 }
2099 
2100 static int __iommu_attach_group(struct iommu_domain *domain,
2101 				struct iommu_group *group)
2102 {
2103 	int ret;
2104 
2105 	if (group->domain && group->domain != group->default_domain &&
2106 	    group->domain != group->blocking_domain)
2107 		return -EBUSY;
2108 
2109 	ret = __iommu_group_for_each_dev(group, domain,
2110 					 iommu_group_do_attach_device);
2111 	if (ret == 0)
2112 		group->domain = domain;
2113 
2114 	return ret;
2115 }
2116 
2117 /**
2118  * iommu_attach_group - Attach an IOMMU domain to an IOMMU group
2119  * @domain: IOMMU domain to attach
2120  * @group: IOMMU group that will be attached
2121  *
2122  * Returns 0 on success and error code on failure
2123  *
2124  * Note that EINVAL can be treated as a soft failure, indicating
2125  * that certain configuration of the domain is incompatible with
2126  * the group. In this case attaching a different domain to the
2127  * group may succeed.
2128  */
2129 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
2130 {
2131 	int ret;
2132 
2133 	mutex_lock(&group->mutex);
2134 	ret = __iommu_attach_group(domain, group);
2135 	mutex_unlock(&group->mutex);
2136 
2137 	return ret;
2138 }
2139 EXPORT_SYMBOL_GPL(iommu_attach_group);
2140 
2141 static int iommu_group_do_detach_device(struct device *dev, void *data)
2142 {
2143 	struct iommu_domain *domain = data;
2144 
2145 	__iommu_detach_device(domain, dev);
2146 
2147 	return 0;
2148 }
2149 
2150 static int __iommu_group_set_domain(struct iommu_group *group,
2151 				    struct iommu_domain *new_domain)
2152 {
2153 	int ret;
2154 
2155 	if (group->domain == new_domain)
2156 		return 0;
2157 
2158 	/*
2159 	 * New drivers should support default domains and so the detach_dev() op
2160 	 * will never be called. Otherwise the NULL domain represents some
2161 	 * platform specific behavior.
2162 	 */
2163 	if (!new_domain) {
2164 		if (WARN_ON(!group->domain->ops->detach_dev))
2165 			return -EINVAL;
2166 		__iommu_group_for_each_dev(group, group->domain,
2167 					   iommu_group_do_detach_device);
2168 		group->domain = NULL;
2169 		return 0;
2170 	}
2171 
2172 	/*
2173 	 * Changing the domain is done by calling attach_dev() on the new
2174 	 * domain. This switch does not have to be atomic and DMA can be
2175 	 * discarded during the transition. DMA must only be able to access
2176 	 * either new_domain or group->domain, never something else.
2177 	 *
2178 	 * Note that this is called in error unwind paths, attaching to a
2179 	 * domain that has already been attached cannot fail.
2180 	 */
2181 	ret = __iommu_group_for_each_dev(group, new_domain,
2182 					 iommu_group_do_attach_device);
2183 	if (ret)
2184 		return ret;
2185 	group->domain = new_domain;
2186 	return 0;
2187 }
2188 
2189 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
2190 {
2191 	mutex_lock(&group->mutex);
2192 	__iommu_group_set_core_domain(group);
2193 	mutex_unlock(&group->mutex);
2194 }
2195 EXPORT_SYMBOL_GPL(iommu_detach_group);
2196 
2197 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2198 {
2199 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2200 		return iova;
2201 
2202 	if (domain->type == IOMMU_DOMAIN_BLOCKED)
2203 		return 0;
2204 
2205 	return domain->ops->iova_to_phys(domain, iova);
2206 }
2207 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
2208 
2209 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
2210 			   phys_addr_t paddr, size_t size, size_t *count)
2211 {
2212 	unsigned int pgsize_idx, pgsize_idx_next;
2213 	unsigned long pgsizes;
2214 	size_t offset, pgsize, pgsize_next;
2215 	unsigned long addr_merge = paddr | iova;
2216 
2217 	/* Page sizes supported by the hardware and small enough for @size */
2218 	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
2219 
2220 	/* Constrain the page sizes further based on the maximum alignment */
2221 	if (likely(addr_merge))
2222 		pgsizes &= GENMASK(__ffs(addr_merge), 0);
2223 
2224 	/* Make sure we have at least one suitable page size */
2225 	BUG_ON(!pgsizes);
2226 
2227 	/* Pick the biggest page size remaining */
2228 	pgsize_idx = __fls(pgsizes);
2229 	pgsize = BIT(pgsize_idx);
2230 	if (!count)
2231 		return pgsize;
2232 
2233 	/* Find the next biggest support page size, if it exists */
2234 	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
2235 	if (!pgsizes)
2236 		goto out_set_count;
2237 
2238 	pgsize_idx_next = __ffs(pgsizes);
2239 	pgsize_next = BIT(pgsize_idx_next);
2240 
2241 	/*
2242 	 * There's no point trying a bigger page size unless the virtual
2243 	 * and physical addresses are similarly offset within the larger page.
2244 	 */
2245 	if ((iova ^ paddr) & (pgsize_next - 1))
2246 		goto out_set_count;
2247 
2248 	/* Calculate the offset to the next page size alignment boundary */
2249 	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
2250 
2251 	/*
2252 	 * If size is big enough to accommodate the larger page, reduce
2253 	 * the number of smaller pages.
2254 	 */
2255 	if (offset + pgsize_next <= size)
2256 		size = offset;
2257 
2258 out_set_count:
2259 	*count = size >> pgsize_idx;
2260 	return pgsize;
2261 }
2262 
2263 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
2264 			     phys_addr_t paddr, size_t size, int prot,
2265 			     gfp_t gfp, size_t *mapped)
2266 {
2267 	const struct iommu_domain_ops *ops = domain->ops;
2268 	size_t pgsize, count;
2269 	int ret;
2270 
2271 	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
2272 
2273 	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
2274 		 iova, &paddr, pgsize, count);
2275 
2276 	if (ops->map_pages) {
2277 		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
2278 				     gfp, mapped);
2279 	} else {
2280 		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
2281 		*mapped = ret ? 0 : pgsize;
2282 	}
2283 
2284 	return ret;
2285 }
2286 
2287 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
2288 		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2289 {
2290 	const struct iommu_domain_ops *ops = domain->ops;
2291 	unsigned long orig_iova = iova;
2292 	unsigned int min_pagesz;
2293 	size_t orig_size = size;
2294 	phys_addr_t orig_paddr = paddr;
2295 	int ret = 0;
2296 
2297 	if (unlikely(!(ops->map || ops->map_pages) ||
2298 		     domain->pgsize_bitmap == 0UL))
2299 		return -ENODEV;
2300 
2301 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2302 		return -EINVAL;
2303 
2304 	/* find out the minimum page size supported */
2305 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2306 
2307 	/*
2308 	 * both the virtual address and the physical one, as well as
2309 	 * the size of the mapping, must be aligned (at least) to the
2310 	 * size of the smallest page supported by the hardware
2311 	 */
2312 	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
2313 		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
2314 		       iova, &paddr, size, min_pagesz);
2315 		return -EINVAL;
2316 	}
2317 
2318 	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
2319 
2320 	while (size) {
2321 		size_t mapped = 0;
2322 
2323 		ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
2324 					&mapped);
2325 		/*
2326 		 * Some pages may have been mapped, even if an error occurred,
2327 		 * so we should account for those so they can be unmapped.
2328 		 */
2329 		size -= mapped;
2330 
2331 		if (ret)
2332 			break;
2333 
2334 		iova += mapped;
2335 		paddr += mapped;
2336 	}
2337 
2338 	/* unroll mapping in case something went wrong */
2339 	if (ret)
2340 		iommu_unmap(domain, orig_iova, orig_size - size);
2341 	else
2342 		trace_map(orig_iova, orig_paddr, orig_size);
2343 
2344 	return ret;
2345 }
2346 
2347 static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
2348 		      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2349 {
2350 	const struct iommu_domain_ops *ops = domain->ops;
2351 	int ret;
2352 
2353 	ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
2354 	if (ret == 0 && ops->iotlb_sync_map)
2355 		ops->iotlb_sync_map(domain, iova, size);
2356 
2357 	return ret;
2358 }
2359 
2360 int iommu_map(struct iommu_domain *domain, unsigned long iova,
2361 	      phys_addr_t paddr, size_t size, int prot)
2362 {
2363 	might_sleep();
2364 	return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
2365 }
2366 EXPORT_SYMBOL_GPL(iommu_map);
2367 
2368 int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
2369 	      phys_addr_t paddr, size_t size, int prot)
2370 {
2371 	return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
2372 }
2373 EXPORT_SYMBOL_GPL(iommu_map_atomic);
2374 
2375 static size_t __iommu_unmap_pages(struct iommu_domain *domain,
2376 				  unsigned long iova, size_t size,
2377 				  struct iommu_iotlb_gather *iotlb_gather)
2378 {
2379 	const struct iommu_domain_ops *ops = domain->ops;
2380 	size_t pgsize, count;
2381 
2382 	pgsize = iommu_pgsize(domain, iova, iova, size, &count);
2383 	return ops->unmap_pages ?
2384 	       ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
2385 	       ops->unmap(domain, iova, pgsize, iotlb_gather);
2386 }
2387 
2388 static size_t __iommu_unmap(struct iommu_domain *domain,
2389 			    unsigned long iova, size_t size,
2390 			    struct iommu_iotlb_gather *iotlb_gather)
2391 {
2392 	const struct iommu_domain_ops *ops = domain->ops;
2393 	size_t unmapped_page, unmapped = 0;
2394 	unsigned long orig_iova = iova;
2395 	unsigned int min_pagesz;
2396 
2397 	if (unlikely(!(ops->unmap || ops->unmap_pages) ||
2398 		     domain->pgsize_bitmap == 0UL))
2399 		return 0;
2400 
2401 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2402 		return 0;
2403 
2404 	/* find out the minimum page size supported */
2405 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2406 
2407 	/*
2408 	 * The virtual address, as well as the size of the mapping, must be
2409 	 * aligned (at least) to the size of the smallest page supported
2410 	 * by the hardware
2411 	 */
2412 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
2413 		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
2414 		       iova, size, min_pagesz);
2415 		return 0;
2416 	}
2417 
2418 	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
2419 
2420 	/*
2421 	 * Keep iterating until we either unmap 'size' bytes (or more)
2422 	 * or we hit an area that isn't mapped.
2423 	 */
2424 	while (unmapped < size) {
2425 		unmapped_page = __iommu_unmap_pages(domain, iova,
2426 						    size - unmapped,
2427 						    iotlb_gather);
2428 		if (!unmapped_page)
2429 			break;
2430 
2431 		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
2432 			 iova, unmapped_page);
2433 
2434 		iova += unmapped_page;
2435 		unmapped += unmapped_page;
2436 	}
2437 
2438 	trace_unmap(orig_iova, size, unmapped);
2439 	return unmapped;
2440 }
2441 
2442 size_t iommu_unmap(struct iommu_domain *domain,
2443 		   unsigned long iova, size_t size)
2444 {
2445 	struct iommu_iotlb_gather iotlb_gather;
2446 	size_t ret;
2447 
2448 	iommu_iotlb_gather_init(&iotlb_gather);
2449 	ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
2450 	iommu_iotlb_sync(domain, &iotlb_gather);
2451 
2452 	return ret;
2453 }
2454 EXPORT_SYMBOL_GPL(iommu_unmap);
2455 
2456 size_t iommu_unmap_fast(struct iommu_domain *domain,
2457 			unsigned long iova, size_t size,
2458 			struct iommu_iotlb_gather *iotlb_gather)
2459 {
2460 	return __iommu_unmap(domain, iova, size, iotlb_gather);
2461 }
2462 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
2463 
2464 static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
2465 		struct scatterlist *sg, unsigned int nents, int prot,
2466 		gfp_t gfp)
2467 {
2468 	const struct iommu_domain_ops *ops = domain->ops;
2469 	size_t len = 0, mapped = 0;
2470 	phys_addr_t start;
2471 	unsigned int i = 0;
2472 	int ret;
2473 
2474 	while (i <= nents) {
2475 		phys_addr_t s_phys = sg_phys(sg);
2476 
2477 		if (len && s_phys != start + len) {
2478 			ret = __iommu_map(domain, iova + mapped, start,
2479 					len, prot, gfp);
2480 
2481 			if (ret)
2482 				goto out_err;
2483 
2484 			mapped += len;
2485 			len = 0;
2486 		}
2487 
2488 		if (sg_is_dma_bus_address(sg))
2489 			goto next;
2490 
2491 		if (len) {
2492 			len += sg->length;
2493 		} else {
2494 			len = sg->length;
2495 			start = s_phys;
2496 		}
2497 
2498 next:
2499 		if (++i < nents)
2500 			sg = sg_next(sg);
2501 	}
2502 
2503 	if (ops->iotlb_sync_map)
2504 		ops->iotlb_sync_map(domain, iova, mapped);
2505 	return mapped;
2506 
2507 out_err:
2508 	/* undo mappings already done */
2509 	iommu_unmap(domain, iova, mapped);
2510 
2511 	return ret;
2512 }
2513 
2514 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
2515 		     struct scatterlist *sg, unsigned int nents, int prot)
2516 {
2517 	might_sleep();
2518 	return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
2519 }
2520 EXPORT_SYMBOL_GPL(iommu_map_sg);
2521 
2522 ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
2523 		    struct scatterlist *sg, unsigned int nents, int prot)
2524 {
2525 	return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
2526 }
2527 
2528 /**
2529  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
2530  * @domain: the iommu domain where the fault has happened
2531  * @dev: the device where the fault has happened
2532  * @iova: the faulting address
2533  * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
2534  *
2535  * This function should be called by the low-level IOMMU implementations
2536  * whenever IOMMU faults happen, to allow high-level users, that are
2537  * interested in such events, to know about them.
2538  *
2539  * This event may be useful for several possible use cases:
2540  * - mere logging of the event
2541  * - dynamic TLB/PTE loading
2542  * - if restarting of the faulting device is required
2543  *
2544  * Returns 0 on success and an appropriate error code otherwise (if dynamic
2545  * PTE/TLB loading will one day be supported, implementations will be able
2546  * to tell whether it succeeded or not according to this return value).
2547  *
2548  * Specifically, -ENOSYS is returned if a fault handler isn't installed
2549  * (though fault handlers can also return -ENOSYS, in case they want to
2550  * elicit the default behavior of the IOMMU drivers).
2551  */
2552 int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
2553 		       unsigned long iova, int flags)
2554 {
2555 	int ret = -ENOSYS;
2556 
2557 	/*
2558 	 * if upper layers showed interest and installed a fault handler,
2559 	 * invoke it.
2560 	 */
2561 	if (domain->handler)
2562 		ret = domain->handler(domain, dev, iova, flags,
2563 						domain->handler_token);
2564 
2565 	trace_io_page_fault(dev, iova, flags);
2566 	return ret;
2567 }
2568 EXPORT_SYMBOL_GPL(report_iommu_fault);
2569 
2570 static int __init iommu_init(void)
2571 {
2572 	iommu_group_kset = kset_create_and_add("iommu_groups",
2573 					       NULL, kernel_kobj);
2574 	BUG_ON(!iommu_group_kset);
2575 
2576 	iommu_debugfs_setup();
2577 
2578 	return 0;
2579 }
2580 core_initcall(iommu_init);
2581 
2582 int iommu_enable_nesting(struct iommu_domain *domain)
2583 {
2584 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2585 		return -EINVAL;
2586 	if (!domain->ops->enable_nesting)
2587 		return -EINVAL;
2588 	return domain->ops->enable_nesting(domain);
2589 }
2590 EXPORT_SYMBOL_GPL(iommu_enable_nesting);
2591 
2592 int iommu_set_pgtable_quirks(struct iommu_domain *domain,
2593 		unsigned long quirk)
2594 {
2595 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2596 		return -EINVAL;
2597 	if (!domain->ops->set_pgtable_quirks)
2598 		return -EINVAL;
2599 	return domain->ops->set_pgtable_quirks(domain, quirk);
2600 }
2601 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
2602 
2603 void iommu_get_resv_regions(struct device *dev, struct list_head *list)
2604 {
2605 	const struct iommu_ops *ops = dev_iommu_ops(dev);
2606 
2607 	if (ops->get_resv_regions)
2608 		ops->get_resv_regions(dev, list);
2609 }
2610 
2611 /**
2612  * iommu_put_resv_regions - release resered regions
2613  * @dev: device for which to free reserved regions
2614  * @list: reserved region list for device
2615  *
2616  * This releases a reserved region list acquired by iommu_get_resv_regions().
2617  */
2618 void iommu_put_resv_regions(struct device *dev, struct list_head *list)
2619 {
2620 	struct iommu_resv_region *entry, *next;
2621 
2622 	list_for_each_entry_safe(entry, next, list, list) {
2623 		if (entry->free)
2624 			entry->free(dev, entry);
2625 		else
2626 			kfree(entry);
2627 	}
2628 }
2629 EXPORT_SYMBOL(iommu_put_resv_regions);
2630 
2631 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
2632 						  size_t length, int prot,
2633 						  enum iommu_resv_type type,
2634 						  gfp_t gfp)
2635 {
2636 	struct iommu_resv_region *region;
2637 
2638 	region = kzalloc(sizeof(*region), gfp);
2639 	if (!region)
2640 		return NULL;
2641 
2642 	INIT_LIST_HEAD(&region->list);
2643 	region->start = start;
2644 	region->length = length;
2645 	region->prot = prot;
2646 	region->type = type;
2647 	return region;
2648 }
2649 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
2650 
2651 void iommu_set_default_passthrough(bool cmd_line)
2652 {
2653 	if (cmd_line)
2654 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2655 	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
2656 }
2657 
2658 void iommu_set_default_translated(bool cmd_line)
2659 {
2660 	if (cmd_line)
2661 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2662 	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
2663 }
2664 
2665 bool iommu_default_passthrough(void)
2666 {
2667 	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
2668 }
2669 EXPORT_SYMBOL_GPL(iommu_default_passthrough);
2670 
2671 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
2672 {
2673 	const struct iommu_ops *ops = NULL;
2674 	struct iommu_device *iommu;
2675 
2676 	spin_lock(&iommu_device_lock);
2677 	list_for_each_entry(iommu, &iommu_device_list, list)
2678 		if (iommu->fwnode == fwnode) {
2679 			ops = iommu->ops;
2680 			break;
2681 		}
2682 	spin_unlock(&iommu_device_lock);
2683 	return ops;
2684 }
2685 
2686 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
2687 		      const struct iommu_ops *ops)
2688 {
2689 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2690 
2691 	if (fwspec)
2692 		return ops == fwspec->ops ? 0 : -EINVAL;
2693 
2694 	if (!dev_iommu_get(dev))
2695 		return -ENOMEM;
2696 
2697 	/* Preallocate for the overwhelmingly common case of 1 ID */
2698 	fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
2699 	if (!fwspec)
2700 		return -ENOMEM;
2701 
2702 	of_node_get(to_of_node(iommu_fwnode));
2703 	fwspec->iommu_fwnode = iommu_fwnode;
2704 	fwspec->ops = ops;
2705 	dev_iommu_fwspec_set(dev, fwspec);
2706 	return 0;
2707 }
2708 EXPORT_SYMBOL_GPL(iommu_fwspec_init);
2709 
2710 void iommu_fwspec_free(struct device *dev)
2711 {
2712 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2713 
2714 	if (fwspec) {
2715 		fwnode_handle_put(fwspec->iommu_fwnode);
2716 		kfree(fwspec);
2717 		dev_iommu_fwspec_set(dev, NULL);
2718 	}
2719 }
2720 EXPORT_SYMBOL_GPL(iommu_fwspec_free);
2721 
2722 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
2723 {
2724 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2725 	int i, new_num;
2726 
2727 	if (!fwspec)
2728 		return -EINVAL;
2729 
2730 	new_num = fwspec->num_ids + num_ids;
2731 	if (new_num > 1) {
2732 		fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
2733 				  GFP_KERNEL);
2734 		if (!fwspec)
2735 			return -ENOMEM;
2736 
2737 		dev_iommu_fwspec_set(dev, fwspec);
2738 	}
2739 
2740 	for (i = 0; i < num_ids; i++)
2741 		fwspec->ids[fwspec->num_ids + i] = ids[i];
2742 
2743 	fwspec->num_ids = new_num;
2744 	return 0;
2745 }
2746 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
2747 
2748 /*
2749  * Per device IOMMU features.
2750  */
2751 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
2752 {
2753 	if (dev->iommu && dev->iommu->iommu_dev) {
2754 		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
2755 
2756 		if (ops->dev_enable_feat)
2757 			return ops->dev_enable_feat(dev, feat);
2758 	}
2759 
2760 	return -ENODEV;
2761 }
2762 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
2763 
2764 /*
2765  * The device drivers should do the necessary cleanups before calling this.
2766  */
2767 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
2768 {
2769 	if (dev->iommu && dev->iommu->iommu_dev) {
2770 		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
2771 
2772 		if (ops->dev_disable_feat)
2773 			return ops->dev_disable_feat(dev, feat);
2774 	}
2775 
2776 	return -EBUSY;
2777 }
2778 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
2779 
2780 /*
2781  * Changes the default domain of an iommu group that has *only* one device
2782  *
2783  * @group: The group for which the default domain should be changed
2784  * @prev_dev: The device in the group (this is used to make sure that the device
2785  *	 hasn't changed after the caller has called this function)
2786  * @type: The type of the new default domain that gets associated with the group
2787  *
2788  * Returns 0 on success and error code on failure
2789  *
2790  * Note:
2791  * 1. Presently, this function is called only when user requests to change the
2792  *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
2793  *    Please take a closer look if intended to use for other purposes.
2794  */
2795 static int iommu_change_dev_def_domain(struct iommu_group *group,
2796 				       struct device *prev_dev, int type)
2797 {
2798 	struct iommu_domain *prev_dom;
2799 	struct group_device *grp_dev;
2800 	int ret, dev_def_dom;
2801 	struct device *dev;
2802 
2803 	mutex_lock(&group->mutex);
2804 
2805 	if (group->default_domain != group->domain) {
2806 		dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n");
2807 		ret = -EBUSY;
2808 		goto out;
2809 	}
2810 
2811 	/*
2812 	 * iommu group wasn't locked while acquiring device lock in
2813 	 * iommu_group_store_type(). So, make sure that the device count hasn't
2814 	 * changed while acquiring device lock.
2815 	 *
2816 	 * Changing default domain of an iommu group with two or more devices
2817 	 * isn't supported because there could be a potential deadlock. Consider
2818 	 * the following scenario. T1 is trying to acquire device locks of all
2819 	 * the devices in the group and before it could acquire all of them,
2820 	 * there could be another thread T2 (from different sub-system and use
2821 	 * case) that has already acquired some of the device locks and might be
2822 	 * waiting for T1 to release other device locks.
2823 	 */
2824 	if (iommu_group_device_count(group) != 1) {
2825 		dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n");
2826 		ret = -EINVAL;
2827 		goto out;
2828 	}
2829 
2830 	/* Since group has only one device */
2831 	grp_dev = list_first_entry(&group->devices, struct group_device, list);
2832 	dev = grp_dev->dev;
2833 
2834 	if (prev_dev != dev) {
2835 		dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n");
2836 		ret = -EBUSY;
2837 		goto out;
2838 	}
2839 
2840 	prev_dom = group->default_domain;
2841 	if (!prev_dom) {
2842 		ret = -EINVAL;
2843 		goto out;
2844 	}
2845 
2846 	dev_def_dom = iommu_get_def_domain_type(dev);
2847 	if (!type) {
2848 		/*
2849 		 * If the user hasn't requested any specific type of domain and
2850 		 * if the device supports both the domains, then default to the
2851 		 * domain the device was booted with
2852 		 */
2853 		type = dev_def_dom ? : iommu_def_domain_type;
2854 	} else if (dev_def_dom && type != dev_def_dom) {
2855 		dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n",
2856 				    iommu_domain_type_str(type));
2857 		ret = -EINVAL;
2858 		goto out;
2859 	}
2860 
2861 	/*
2862 	 * Switch to a new domain only if the requested domain type is different
2863 	 * from the existing default domain type
2864 	 */
2865 	if (prev_dom->type == type) {
2866 		ret = 0;
2867 		goto out;
2868 	}
2869 
2870 	/* We can bring up a flush queue without tearing down the domain */
2871 	if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
2872 		ret = iommu_dma_init_fq(prev_dom);
2873 		if (!ret)
2874 			prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
2875 		goto out;
2876 	}
2877 
2878 	/* Sets group->default_domain to the newly allocated domain */
2879 	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
2880 	if (ret)
2881 		goto out;
2882 
2883 	ret = iommu_create_device_direct_mappings(group, dev);
2884 	if (ret)
2885 		goto free_new_domain;
2886 
2887 	ret = __iommu_attach_device(group->default_domain, dev);
2888 	if (ret)
2889 		goto free_new_domain;
2890 
2891 	group->domain = group->default_domain;
2892 
2893 	/*
2894 	 * Release the mutex here because ops->probe_finalize() call-back of
2895 	 * some vendor IOMMU drivers calls arm_iommu_attach_device() which
2896 	 * in-turn might call back into IOMMU core code, where it tries to take
2897 	 * group->mutex, resulting in a deadlock.
2898 	 */
2899 	mutex_unlock(&group->mutex);
2900 
2901 	/* Make sure dma_ops is appropriatley set */
2902 	iommu_group_do_probe_finalize(dev, group->default_domain);
2903 	iommu_domain_free(prev_dom);
2904 	return 0;
2905 
2906 free_new_domain:
2907 	iommu_domain_free(group->default_domain);
2908 	group->default_domain = prev_dom;
2909 	group->domain = prev_dom;
2910 
2911 out:
2912 	mutex_unlock(&group->mutex);
2913 
2914 	return ret;
2915 }
2916 
2917 /*
2918  * Changing the default domain through sysfs requires the users to unbind the
2919  * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
2920  * transition. Return failure if this isn't met.
2921  *
2922  * We need to consider the race between this and the device release path.
2923  * device_lock(dev) is used here to guarantee that the device release path
2924  * will not be entered at the same time.
2925  */
2926 static ssize_t iommu_group_store_type(struct iommu_group *group,
2927 				      const char *buf, size_t count)
2928 {
2929 	struct group_device *grp_dev;
2930 	struct device *dev;
2931 	int ret, req_type;
2932 
2933 	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
2934 		return -EACCES;
2935 
2936 	if (WARN_ON(!group) || !group->default_domain)
2937 		return -EINVAL;
2938 
2939 	if (sysfs_streq(buf, "identity"))
2940 		req_type = IOMMU_DOMAIN_IDENTITY;
2941 	else if (sysfs_streq(buf, "DMA"))
2942 		req_type = IOMMU_DOMAIN_DMA;
2943 	else if (sysfs_streq(buf, "DMA-FQ"))
2944 		req_type = IOMMU_DOMAIN_DMA_FQ;
2945 	else if (sysfs_streq(buf, "auto"))
2946 		req_type = 0;
2947 	else
2948 		return -EINVAL;
2949 
2950 	/*
2951 	 * Lock/Unlock the group mutex here before device lock to
2952 	 * 1. Make sure that the iommu group has only one device (this is a
2953 	 *    prerequisite for step 2)
2954 	 * 2. Get struct *dev which is needed to lock device
2955 	 */
2956 	mutex_lock(&group->mutex);
2957 	if (iommu_group_device_count(group) != 1) {
2958 		mutex_unlock(&group->mutex);
2959 		pr_err_ratelimited("Cannot change default domain: Group has more than one device\n");
2960 		return -EINVAL;
2961 	}
2962 
2963 	/* Since group has only one device */
2964 	grp_dev = list_first_entry(&group->devices, struct group_device, list);
2965 	dev = grp_dev->dev;
2966 	get_device(dev);
2967 
2968 	/*
2969 	 * Don't hold the group mutex because taking group mutex first and then
2970 	 * the device lock could potentially cause a deadlock as below. Assume
2971 	 * two threads T1 and T2. T1 is trying to change default domain of an
2972 	 * iommu group and T2 is trying to hot unplug a device or release [1] VF
2973 	 * of a PCIe device which is in the same iommu group. T1 takes group
2974 	 * mutex and before it could take device lock assume T2 has taken device
2975 	 * lock and is yet to take group mutex. Now, both the threads will be
2976 	 * waiting for the other thread to release lock. Below, lock order was
2977 	 * suggested.
2978 	 * device_lock(dev);
2979 	 *	mutex_lock(&group->mutex);
2980 	 *		iommu_change_dev_def_domain();
2981 	 *	mutex_unlock(&group->mutex);
2982 	 * device_unlock(dev);
2983 	 *
2984 	 * [1] Typical device release path
2985 	 * device_lock() from device/driver core code
2986 	 *  -> bus_notifier()
2987 	 *   -> iommu_bus_notifier()
2988 	 *    -> iommu_release_device()
2989 	 *     -> ops->release_device() vendor driver calls back iommu core code
2990 	 *      -> mutex_lock() from iommu core code
2991 	 */
2992 	mutex_unlock(&group->mutex);
2993 
2994 	/* Check if the device in the group still has a driver bound to it */
2995 	device_lock(dev);
2996 	if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
2997 	    group->default_domain->type == IOMMU_DOMAIN_DMA)) {
2998 		pr_err_ratelimited("Device is still bound to driver\n");
2999 		ret = -EBUSY;
3000 		goto out;
3001 	}
3002 
3003 	ret = iommu_change_dev_def_domain(group, dev, req_type);
3004 	ret = ret ?: count;
3005 
3006 out:
3007 	device_unlock(dev);
3008 	put_device(dev);
3009 
3010 	return ret;
3011 }
3012 
3013 static bool iommu_is_default_domain(struct iommu_group *group)
3014 {
3015 	if (group->domain == group->default_domain)
3016 		return true;
3017 
3018 	/*
3019 	 * If the default domain was set to identity and it is still an identity
3020 	 * domain then we consider this a pass. This happens because of
3021 	 * amd_iommu_init_device() replacing the default idenytity domain with an
3022 	 * identity domain that has a different configuration for AMDGPU.
3023 	 */
3024 	if (group->default_domain &&
3025 	    group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
3026 	    group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
3027 		return true;
3028 	return false;
3029 }
3030 
3031 /**
3032  * iommu_device_use_default_domain() - Device driver wants to handle device
3033  *                                     DMA through the kernel DMA API.
3034  * @dev: The device.
3035  *
3036  * The device driver about to bind @dev wants to do DMA through the kernel
3037  * DMA API. Return 0 if it is allowed, otherwise an error.
3038  */
3039 int iommu_device_use_default_domain(struct device *dev)
3040 {
3041 	struct iommu_group *group = iommu_group_get(dev);
3042 	int ret = 0;
3043 
3044 	if (!group)
3045 		return 0;
3046 
3047 	mutex_lock(&group->mutex);
3048 	if (group->owner_cnt) {
3049 		if (group->owner || !iommu_is_default_domain(group) ||
3050 		    !xa_empty(&group->pasid_array)) {
3051 			ret = -EBUSY;
3052 			goto unlock_out;
3053 		}
3054 	}
3055 
3056 	group->owner_cnt++;
3057 
3058 unlock_out:
3059 	mutex_unlock(&group->mutex);
3060 	iommu_group_put(group);
3061 
3062 	return ret;
3063 }
3064 
3065 /**
3066  * iommu_device_unuse_default_domain() - Device driver stops handling device
3067  *                                       DMA through the kernel DMA API.
3068  * @dev: The device.
3069  *
3070  * The device driver doesn't want to do DMA through kernel DMA API anymore.
3071  * It must be called after iommu_device_use_default_domain().
3072  */
3073 void iommu_device_unuse_default_domain(struct device *dev)
3074 {
3075 	struct iommu_group *group = iommu_group_get(dev);
3076 
3077 	if (!group)
3078 		return;
3079 
3080 	mutex_lock(&group->mutex);
3081 	if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
3082 		group->owner_cnt--;
3083 
3084 	mutex_unlock(&group->mutex);
3085 	iommu_group_put(group);
3086 }
3087 
3088 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
3089 {
3090 	struct group_device *dev =
3091 		list_first_entry(&group->devices, struct group_device, list);
3092 
3093 	if (group->blocking_domain)
3094 		return 0;
3095 
3096 	group->blocking_domain =
3097 		__iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
3098 	if (!group->blocking_domain) {
3099 		/*
3100 		 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
3101 		 * create an empty domain instead.
3102 		 */
3103 		group->blocking_domain = __iommu_domain_alloc(
3104 			dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
3105 		if (!group->blocking_domain)
3106 			return -EINVAL;
3107 	}
3108 	return 0;
3109 }
3110 
3111 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
3112 {
3113 	int ret;
3114 
3115 	if ((group->domain && group->domain != group->default_domain) ||
3116 	    !xa_empty(&group->pasid_array))
3117 		return -EBUSY;
3118 
3119 	ret = __iommu_group_alloc_blocking_domain(group);
3120 	if (ret)
3121 		return ret;
3122 	ret = __iommu_group_set_domain(group, group->blocking_domain);
3123 	if (ret)
3124 		return ret;
3125 
3126 	group->owner = owner;
3127 	group->owner_cnt++;
3128 	return 0;
3129 }
3130 
3131 /**
3132  * iommu_group_claim_dma_owner() - Set DMA ownership of a group
3133  * @group: The group.
3134  * @owner: Caller specified pointer. Used for exclusive ownership.
3135  *
3136  * This is to support backward compatibility for vfio which manages the dma
3137  * ownership in iommu_group level. New invocations on this interface should be
3138  * prohibited. Only a single owner may exist for a group.
3139  */
3140 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
3141 {
3142 	int ret = 0;
3143 
3144 	if (WARN_ON(!owner))
3145 		return -EINVAL;
3146 
3147 	mutex_lock(&group->mutex);
3148 	if (group->owner_cnt) {
3149 		ret = -EPERM;
3150 		goto unlock_out;
3151 	}
3152 
3153 	ret = __iommu_take_dma_ownership(group, owner);
3154 unlock_out:
3155 	mutex_unlock(&group->mutex);
3156 
3157 	return ret;
3158 }
3159 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
3160 
3161 /**
3162  * iommu_device_claim_dma_owner() - Set DMA ownership of a device
3163  * @dev: The device.
3164  * @owner: Caller specified pointer. Used for exclusive ownership.
3165  *
3166  * Claim the DMA ownership of a device. Multiple devices in the same group may
3167  * concurrently claim ownership if they present the same owner value. Returns 0
3168  * on success and error code on failure
3169  */
3170 int iommu_device_claim_dma_owner(struct device *dev, void *owner)
3171 {
3172 	struct iommu_group *group = iommu_group_get(dev);
3173 	int ret = 0;
3174 
3175 	if (!group)
3176 		return -ENODEV;
3177 	if (WARN_ON(!owner))
3178 		return -EINVAL;
3179 
3180 	mutex_lock(&group->mutex);
3181 	if (group->owner_cnt) {
3182 		if (group->owner != owner) {
3183 			ret = -EPERM;
3184 			goto unlock_out;
3185 		}
3186 		group->owner_cnt++;
3187 		goto unlock_out;
3188 	}
3189 
3190 	ret = __iommu_take_dma_ownership(group, owner);
3191 unlock_out:
3192 	mutex_unlock(&group->mutex);
3193 	iommu_group_put(group);
3194 
3195 	return ret;
3196 }
3197 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
3198 
3199 static void __iommu_release_dma_ownership(struct iommu_group *group)
3200 {
3201 	int ret;
3202 
3203 	if (WARN_ON(!group->owner_cnt || !group->owner ||
3204 		    !xa_empty(&group->pasid_array)))
3205 		return;
3206 
3207 	group->owner_cnt = 0;
3208 	group->owner = NULL;
3209 	ret = __iommu_group_set_domain(group, group->default_domain);
3210 	WARN(ret, "iommu driver failed to attach the default domain");
3211 }
3212 
3213 /**
3214  * iommu_group_release_dma_owner() - Release DMA ownership of a group
3215  * @dev: The device
3216  *
3217  * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
3218  */
3219 void iommu_group_release_dma_owner(struct iommu_group *group)
3220 {
3221 	mutex_lock(&group->mutex);
3222 	__iommu_release_dma_ownership(group);
3223 	mutex_unlock(&group->mutex);
3224 }
3225 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
3226 
3227 /**
3228  * iommu_device_release_dma_owner() - Release DMA ownership of a device
3229  * @group: The device.
3230  *
3231  * Release the DMA ownership claimed by iommu_device_claim_dma_owner().
3232  */
3233 void iommu_device_release_dma_owner(struct device *dev)
3234 {
3235 	struct iommu_group *group = iommu_group_get(dev);
3236 
3237 	mutex_lock(&group->mutex);
3238 	if (group->owner_cnt > 1)
3239 		group->owner_cnt--;
3240 	else
3241 		__iommu_release_dma_ownership(group);
3242 	mutex_unlock(&group->mutex);
3243 	iommu_group_put(group);
3244 }
3245 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
3246 
3247 /**
3248  * iommu_group_dma_owner_claimed() - Query group dma ownership status
3249  * @group: The group.
3250  *
3251  * This provides status query on a given group. It is racy and only for
3252  * non-binding status reporting.
3253  */
3254 bool iommu_group_dma_owner_claimed(struct iommu_group *group)
3255 {
3256 	unsigned int user;
3257 
3258 	mutex_lock(&group->mutex);
3259 	user = group->owner_cnt;
3260 	mutex_unlock(&group->mutex);
3261 
3262 	return user;
3263 }
3264 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
3265 
3266 static int __iommu_set_group_pasid(struct iommu_domain *domain,
3267 				   struct iommu_group *group, ioasid_t pasid)
3268 {
3269 	struct group_device *device;
3270 	int ret = 0;
3271 
3272 	list_for_each_entry(device, &group->devices, list) {
3273 		ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
3274 		if (ret)
3275 			break;
3276 	}
3277 
3278 	return ret;
3279 }
3280 
3281 static void __iommu_remove_group_pasid(struct iommu_group *group,
3282 				       ioasid_t pasid)
3283 {
3284 	struct group_device *device;
3285 	const struct iommu_ops *ops;
3286 
3287 	list_for_each_entry(device, &group->devices, list) {
3288 		ops = dev_iommu_ops(device->dev);
3289 		ops->remove_dev_pasid(device->dev, pasid);
3290 	}
3291 }
3292 
3293 /*
3294  * iommu_attach_device_pasid() - Attach a domain to pasid of device
3295  * @domain: the iommu domain.
3296  * @dev: the attached device.
3297  * @pasid: the pasid of the device.
3298  *
3299  * Return: 0 on success, or an error.
3300  */
3301 int iommu_attach_device_pasid(struct iommu_domain *domain,
3302 			      struct device *dev, ioasid_t pasid)
3303 {
3304 	struct iommu_group *group;
3305 	void *curr;
3306 	int ret;
3307 
3308 	if (!domain->ops->set_dev_pasid)
3309 		return -EOPNOTSUPP;
3310 
3311 	group = iommu_group_get(dev);
3312 	if (!group)
3313 		return -ENODEV;
3314 
3315 	mutex_lock(&group->mutex);
3316 	curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
3317 	if (curr) {
3318 		ret = xa_err(curr) ? : -EBUSY;
3319 		goto out_unlock;
3320 	}
3321 
3322 	ret = __iommu_set_group_pasid(domain, group, pasid);
3323 	if (ret) {
3324 		__iommu_remove_group_pasid(group, pasid);
3325 		xa_erase(&group->pasid_array, pasid);
3326 	}
3327 out_unlock:
3328 	mutex_unlock(&group->mutex);
3329 	iommu_group_put(group);
3330 
3331 	return ret;
3332 }
3333 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
3334 
3335 /*
3336  * iommu_detach_device_pasid() - Detach the domain from pasid of device
3337  * @domain: the iommu domain.
3338  * @dev: the attached device.
3339  * @pasid: the pasid of the device.
3340  *
3341  * The @domain must have been attached to @pasid of the @dev with
3342  * iommu_attach_device_pasid().
3343  */
3344 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
3345 			       ioasid_t pasid)
3346 {
3347 	struct iommu_group *group = iommu_group_get(dev);
3348 
3349 	mutex_lock(&group->mutex);
3350 	__iommu_remove_group_pasid(group, pasid);
3351 	WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
3352 	mutex_unlock(&group->mutex);
3353 
3354 	iommu_group_put(group);
3355 }
3356 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
3357 
3358 /*
3359  * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
3360  * @dev: the queried device
3361  * @pasid: the pasid of the device
3362  * @type: matched domain type, 0 for any match
3363  *
3364  * This is a variant of iommu_get_domain_for_dev(). It returns the existing
3365  * domain attached to pasid of a device. Callers must hold a lock around this
3366  * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
3367  * type is being manipulated. This API does not internally resolve races with
3368  * attach/detach.
3369  *
3370  * Return: attached domain on success, NULL otherwise.
3371  */
3372 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
3373 						    ioasid_t pasid,
3374 						    unsigned int type)
3375 {
3376 	struct iommu_domain *domain;
3377 	struct iommu_group *group;
3378 
3379 	group = iommu_group_get(dev);
3380 	if (!group)
3381 		return NULL;
3382 
3383 	xa_lock(&group->pasid_array);
3384 	domain = xa_load(&group->pasid_array, pasid);
3385 	if (type && domain && domain->type != type)
3386 		domain = ERR_PTR(-EBUSY);
3387 	xa_unlock(&group->pasid_array);
3388 	iommu_group_put(group);
3389 
3390 	return domain;
3391 }
3392 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
3393 
3394 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
3395 					    struct mm_struct *mm)
3396 {
3397 	const struct iommu_ops *ops = dev_iommu_ops(dev);
3398 	struct iommu_domain *domain;
3399 
3400 	domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
3401 	if (!domain)
3402 		return NULL;
3403 
3404 	domain->type = IOMMU_DOMAIN_SVA;
3405 	mmgrab(mm);
3406 	domain->mm = mm;
3407 	domain->iopf_handler = iommu_sva_handle_iopf;
3408 	domain->fault_data = mm;
3409 
3410 	return domain;
3411 }
3412