xref: /openbmc/linux/drivers/iommu/iommu.c (revision e6c81cce)
1 /*
2  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3  * Author: Joerg Roedel <jroedel@suse.de>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published
7  * by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17  */
18 
19 #define pr_fmt(fmt)    "%s: " fmt, __func__
20 
21 #include <linux/device.h>
22 #include <linux/kernel.h>
23 #include <linux/bug.h>
24 #include <linux/types.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <linux/errno.h>
28 #include <linux/iommu.h>
29 #include <linux/idr.h>
30 #include <linux/notifier.h>
31 #include <linux/err.h>
32 #include <linux/pci.h>
33 #include <linux/bitops.h>
34 #include <trace/events/iommu.h>
35 
36 static struct kset *iommu_group_kset;
37 static struct ida iommu_group_ida;
38 static struct mutex iommu_group_mutex;
39 
40 struct iommu_callback_data {
41 	const struct iommu_ops *ops;
42 };
43 
44 struct iommu_group {
45 	struct kobject kobj;
46 	struct kobject *devices_kobj;
47 	struct list_head devices;
48 	struct mutex mutex;
49 	struct blocking_notifier_head notifier;
50 	void *iommu_data;
51 	void (*iommu_data_release)(void *iommu_data);
52 	char *name;
53 	int id;
54 };
55 
56 struct iommu_device {
57 	struct list_head list;
58 	struct device *dev;
59 	char *name;
60 };
61 
62 struct iommu_group_attribute {
63 	struct attribute attr;
64 	ssize_t (*show)(struct iommu_group *group, char *buf);
65 	ssize_t (*store)(struct iommu_group *group,
66 			 const char *buf, size_t count);
67 };
68 
69 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
70 struct iommu_group_attribute iommu_group_attr_##_name =		\
71 	__ATTR(_name, _mode, _show, _store)
72 
73 #define to_iommu_group_attr(_attr)	\
74 	container_of(_attr, struct iommu_group_attribute, attr)
75 #define to_iommu_group(_kobj)		\
76 	container_of(_kobj, struct iommu_group, kobj)
77 
78 static ssize_t iommu_group_attr_show(struct kobject *kobj,
79 				     struct attribute *__attr, char *buf)
80 {
81 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
82 	struct iommu_group *group = to_iommu_group(kobj);
83 	ssize_t ret = -EIO;
84 
85 	if (attr->show)
86 		ret = attr->show(group, buf);
87 	return ret;
88 }
89 
90 static ssize_t iommu_group_attr_store(struct kobject *kobj,
91 				      struct attribute *__attr,
92 				      const char *buf, size_t count)
93 {
94 	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
95 	struct iommu_group *group = to_iommu_group(kobj);
96 	ssize_t ret = -EIO;
97 
98 	if (attr->store)
99 		ret = attr->store(group, buf, count);
100 	return ret;
101 }
102 
103 static const struct sysfs_ops iommu_group_sysfs_ops = {
104 	.show = iommu_group_attr_show,
105 	.store = iommu_group_attr_store,
106 };
107 
108 static int iommu_group_create_file(struct iommu_group *group,
109 				   struct iommu_group_attribute *attr)
110 {
111 	return sysfs_create_file(&group->kobj, &attr->attr);
112 }
113 
114 static void iommu_group_remove_file(struct iommu_group *group,
115 				    struct iommu_group_attribute *attr)
116 {
117 	sysfs_remove_file(&group->kobj, &attr->attr);
118 }
119 
120 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
121 {
122 	return sprintf(buf, "%s\n", group->name);
123 }
124 
125 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
126 
127 static void iommu_group_release(struct kobject *kobj)
128 {
129 	struct iommu_group *group = to_iommu_group(kobj);
130 
131 	if (group->iommu_data_release)
132 		group->iommu_data_release(group->iommu_data);
133 
134 	mutex_lock(&iommu_group_mutex);
135 	ida_remove(&iommu_group_ida, group->id);
136 	mutex_unlock(&iommu_group_mutex);
137 
138 	kfree(group->name);
139 	kfree(group);
140 }
141 
142 static struct kobj_type iommu_group_ktype = {
143 	.sysfs_ops = &iommu_group_sysfs_ops,
144 	.release = iommu_group_release,
145 };
146 
147 /**
148  * iommu_group_alloc - Allocate a new group
149  * @name: Optional name to associate with group, visible in sysfs
150  *
151  * This function is called by an iommu driver to allocate a new iommu
152  * group.  The iommu group represents the minimum granularity of the iommu.
153  * Upon successful return, the caller holds a reference to the supplied
154  * group in order to hold the group until devices are added.  Use
155  * iommu_group_put() to release this extra reference count, allowing the
156  * group to be automatically reclaimed once it has no devices or external
157  * references.
158  */
159 struct iommu_group *iommu_group_alloc(void)
160 {
161 	struct iommu_group *group;
162 	int ret;
163 
164 	group = kzalloc(sizeof(*group), GFP_KERNEL);
165 	if (!group)
166 		return ERR_PTR(-ENOMEM);
167 
168 	group->kobj.kset = iommu_group_kset;
169 	mutex_init(&group->mutex);
170 	INIT_LIST_HEAD(&group->devices);
171 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
172 
173 	mutex_lock(&iommu_group_mutex);
174 
175 again:
176 	if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
177 		kfree(group);
178 		mutex_unlock(&iommu_group_mutex);
179 		return ERR_PTR(-ENOMEM);
180 	}
181 
182 	if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
183 		goto again;
184 
185 	mutex_unlock(&iommu_group_mutex);
186 
187 	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
188 				   NULL, "%d", group->id);
189 	if (ret) {
190 		mutex_lock(&iommu_group_mutex);
191 		ida_remove(&iommu_group_ida, group->id);
192 		mutex_unlock(&iommu_group_mutex);
193 		kfree(group);
194 		return ERR_PTR(ret);
195 	}
196 
197 	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
198 	if (!group->devices_kobj) {
199 		kobject_put(&group->kobj); /* triggers .release & free */
200 		return ERR_PTR(-ENOMEM);
201 	}
202 
203 	/*
204 	 * The devices_kobj holds a reference on the group kobject, so
205 	 * as long as that exists so will the group.  We can therefore
206 	 * use the devices_kobj for reference counting.
207 	 */
208 	kobject_put(&group->kobj);
209 
210 	return group;
211 }
212 EXPORT_SYMBOL_GPL(iommu_group_alloc);
213 
214 struct iommu_group *iommu_group_get_by_id(int id)
215 {
216 	struct kobject *group_kobj;
217 	struct iommu_group *group;
218 	const char *name;
219 
220 	if (!iommu_group_kset)
221 		return NULL;
222 
223 	name = kasprintf(GFP_KERNEL, "%d", id);
224 	if (!name)
225 		return NULL;
226 
227 	group_kobj = kset_find_obj(iommu_group_kset, name);
228 	kfree(name);
229 
230 	if (!group_kobj)
231 		return NULL;
232 
233 	group = container_of(group_kobj, struct iommu_group, kobj);
234 	BUG_ON(group->id != id);
235 
236 	kobject_get(group->devices_kobj);
237 	kobject_put(&group->kobj);
238 
239 	return group;
240 }
241 EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
242 
243 /**
244  * iommu_group_get_iommudata - retrieve iommu_data registered for a group
245  * @group: the group
246  *
247  * iommu drivers can store data in the group for use when doing iommu
248  * operations.  This function provides a way to retrieve it.  Caller
249  * should hold a group reference.
250  */
251 void *iommu_group_get_iommudata(struct iommu_group *group)
252 {
253 	return group->iommu_data;
254 }
255 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
256 
257 /**
258  * iommu_group_set_iommudata - set iommu_data for a group
259  * @group: the group
260  * @iommu_data: new data
261  * @release: release function for iommu_data
262  *
263  * iommu drivers can store data in the group for use when doing iommu
264  * operations.  This function provides a way to set the data after
265  * the group has been allocated.  Caller should hold a group reference.
266  */
267 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
268 			       void (*release)(void *iommu_data))
269 {
270 	group->iommu_data = iommu_data;
271 	group->iommu_data_release = release;
272 }
273 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
274 
275 /**
276  * iommu_group_set_name - set name for a group
277  * @group: the group
278  * @name: name
279  *
280  * Allow iommu driver to set a name for a group.  When set it will
281  * appear in a name attribute file under the group in sysfs.
282  */
283 int iommu_group_set_name(struct iommu_group *group, const char *name)
284 {
285 	int ret;
286 
287 	if (group->name) {
288 		iommu_group_remove_file(group, &iommu_group_attr_name);
289 		kfree(group->name);
290 		group->name = NULL;
291 		if (!name)
292 			return 0;
293 	}
294 
295 	group->name = kstrdup(name, GFP_KERNEL);
296 	if (!group->name)
297 		return -ENOMEM;
298 
299 	ret = iommu_group_create_file(group, &iommu_group_attr_name);
300 	if (ret) {
301 		kfree(group->name);
302 		group->name = NULL;
303 		return ret;
304 	}
305 
306 	return 0;
307 }
308 EXPORT_SYMBOL_GPL(iommu_group_set_name);
309 
310 /**
311  * iommu_group_add_device - add a device to an iommu group
312  * @group: the group into which to add the device (reference should be held)
313  * @dev: the device
314  *
315  * This function is called by an iommu driver to add a device into a
316  * group.  Adding a device increments the group reference count.
317  */
318 int iommu_group_add_device(struct iommu_group *group, struct device *dev)
319 {
320 	int ret, i = 0;
321 	struct iommu_device *device;
322 
323 	device = kzalloc(sizeof(*device), GFP_KERNEL);
324 	if (!device)
325 		return -ENOMEM;
326 
327 	device->dev = dev;
328 
329 	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
330 	if (ret) {
331 		kfree(device);
332 		return ret;
333 	}
334 
335 	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
336 rename:
337 	if (!device->name) {
338 		sysfs_remove_link(&dev->kobj, "iommu_group");
339 		kfree(device);
340 		return -ENOMEM;
341 	}
342 
343 	ret = sysfs_create_link_nowarn(group->devices_kobj,
344 				       &dev->kobj, device->name);
345 	if (ret) {
346 		kfree(device->name);
347 		if (ret == -EEXIST && i >= 0) {
348 			/*
349 			 * Account for the slim chance of collision
350 			 * and append an instance to the name.
351 			 */
352 			device->name = kasprintf(GFP_KERNEL, "%s.%d",
353 						 kobject_name(&dev->kobj), i++);
354 			goto rename;
355 		}
356 
357 		sysfs_remove_link(&dev->kobj, "iommu_group");
358 		kfree(device);
359 		return ret;
360 	}
361 
362 	kobject_get(group->devices_kobj);
363 
364 	dev->iommu_group = group;
365 
366 	mutex_lock(&group->mutex);
367 	list_add_tail(&device->list, &group->devices);
368 	mutex_unlock(&group->mutex);
369 
370 	/* Notify any listeners about change to group. */
371 	blocking_notifier_call_chain(&group->notifier,
372 				     IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
373 
374 	trace_add_device_to_group(group->id, dev);
375 	return 0;
376 }
377 EXPORT_SYMBOL_GPL(iommu_group_add_device);
378 
379 /**
380  * iommu_group_remove_device - remove a device from it's current group
381  * @dev: device to be removed
382  *
383  * This function is called by an iommu driver to remove the device from
384  * it's current group.  This decrements the iommu group reference count.
385  */
386 void iommu_group_remove_device(struct device *dev)
387 {
388 	struct iommu_group *group = dev->iommu_group;
389 	struct iommu_device *tmp_device, *device = NULL;
390 
391 	/* Pre-notify listeners that a device is being removed. */
392 	blocking_notifier_call_chain(&group->notifier,
393 				     IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
394 
395 	mutex_lock(&group->mutex);
396 	list_for_each_entry(tmp_device, &group->devices, list) {
397 		if (tmp_device->dev == dev) {
398 			device = tmp_device;
399 			list_del(&device->list);
400 			break;
401 		}
402 	}
403 	mutex_unlock(&group->mutex);
404 
405 	if (!device)
406 		return;
407 
408 	sysfs_remove_link(group->devices_kobj, device->name);
409 	sysfs_remove_link(&dev->kobj, "iommu_group");
410 
411 	trace_remove_device_from_group(group->id, dev);
412 
413 	kfree(device->name);
414 	kfree(device);
415 	dev->iommu_group = NULL;
416 	kobject_put(group->devices_kobj);
417 }
418 EXPORT_SYMBOL_GPL(iommu_group_remove_device);
419 
420 /**
421  * iommu_group_for_each_dev - iterate over each device in the group
422  * @group: the group
423  * @data: caller opaque data to be passed to callback function
424  * @fn: caller supplied callback function
425  *
426  * This function is called by group users to iterate over group devices.
427  * Callers should hold a reference count to the group during callback.
428  * The group->mutex is held across callbacks, which will block calls to
429  * iommu_group_add/remove_device.
430  */
431 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
432 			     int (*fn)(struct device *, void *))
433 {
434 	struct iommu_device *device;
435 	int ret = 0;
436 
437 	mutex_lock(&group->mutex);
438 	list_for_each_entry(device, &group->devices, list) {
439 		ret = fn(device->dev, data);
440 		if (ret)
441 			break;
442 	}
443 	mutex_unlock(&group->mutex);
444 	return ret;
445 }
446 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
447 
448 /**
449  * iommu_group_get - Return the group for a device and increment reference
450  * @dev: get the group that this device belongs to
451  *
452  * This function is called by iommu drivers and users to get the group
453  * for the specified device.  If found, the group is returned and the group
454  * reference in incremented, else NULL.
455  */
456 struct iommu_group *iommu_group_get(struct device *dev)
457 {
458 	struct iommu_group *group = dev->iommu_group;
459 
460 	if (group)
461 		kobject_get(group->devices_kobj);
462 
463 	return group;
464 }
465 EXPORT_SYMBOL_GPL(iommu_group_get);
466 
467 /**
468  * iommu_group_put - Decrement group reference
469  * @group: the group to use
470  *
471  * This function is called by iommu drivers and users to release the
472  * iommu group.  Once the reference count is zero, the group is released.
473  */
474 void iommu_group_put(struct iommu_group *group)
475 {
476 	if (group)
477 		kobject_put(group->devices_kobj);
478 }
479 EXPORT_SYMBOL_GPL(iommu_group_put);
480 
481 /**
482  * iommu_group_register_notifier - Register a notifier for group changes
483  * @group: the group to watch
484  * @nb: notifier block to signal
485  *
486  * This function allows iommu group users to track changes in a group.
487  * See include/linux/iommu.h for actions sent via this notifier.  Caller
488  * should hold a reference to the group throughout notifier registration.
489  */
490 int iommu_group_register_notifier(struct iommu_group *group,
491 				  struct notifier_block *nb)
492 {
493 	return blocking_notifier_chain_register(&group->notifier, nb);
494 }
495 EXPORT_SYMBOL_GPL(iommu_group_register_notifier);
496 
497 /**
498  * iommu_group_unregister_notifier - Unregister a notifier
499  * @group: the group to watch
500  * @nb: notifier block to signal
501  *
502  * Unregister a previously registered group notifier block.
503  */
504 int iommu_group_unregister_notifier(struct iommu_group *group,
505 				    struct notifier_block *nb)
506 {
507 	return blocking_notifier_chain_unregister(&group->notifier, nb);
508 }
509 EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
510 
511 /**
512  * iommu_group_id - Return ID for a group
513  * @group: the group to ID
514  *
515  * Return the unique ID for the group matching the sysfs group number.
516  */
517 int iommu_group_id(struct iommu_group *group)
518 {
519 	return group->id;
520 }
521 EXPORT_SYMBOL_GPL(iommu_group_id);
522 
523 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
524 					       unsigned long *devfns);
525 
526 /*
527  * To consider a PCI device isolated, we require ACS to support Source
528  * Validation, Request Redirection, Completer Redirection, and Upstream
529  * Forwarding.  This effectively means that devices cannot spoof their
530  * requester ID, requests and completions cannot be redirected, and all
531  * transactions are forwarded upstream, even as it passes through a
532  * bridge where the target device is downstream.
533  */
534 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
535 
536 /*
537  * For multifunction devices which are not isolated from each other, find
538  * all the other non-isolated functions and look for existing groups.  For
539  * each function, we also need to look for aliases to or from other devices
540  * that may already have a group.
541  */
542 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
543 							unsigned long *devfns)
544 {
545 	struct pci_dev *tmp = NULL;
546 	struct iommu_group *group;
547 
548 	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
549 		return NULL;
550 
551 	for_each_pci_dev(tmp) {
552 		if (tmp == pdev || tmp->bus != pdev->bus ||
553 		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
554 		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
555 			continue;
556 
557 		group = get_pci_alias_group(tmp, devfns);
558 		if (group) {
559 			pci_dev_put(tmp);
560 			return group;
561 		}
562 	}
563 
564 	return NULL;
565 }
566 
567 /*
568  * Look for aliases to or from the given device for exisiting groups.  The
569  * dma_alias_devfn only supports aliases on the same bus, therefore the search
570  * space is quite small (especially since we're really only looking at pcie
571  * device, and therefore only expect multiple slots on the root complex or
572  * downstream switch ports).  It's conceivable though that a pair of
573  * multifunction devices could have aliases between them that would cause a
574  * loop.  To prevent this, we use a bitmap to track where we've been.
575  */
576 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
577 					       unsigned long *devfns)
578 {
579 	struct pci_dev *tmp = NULL;
580 	struct iommu_group *group;
581 
582 	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
583 		return NULL;
584 
585 	group = iommu_group_get(&pdev->dev);
586 	if (group)
587 		return group;
588 
589 	for_each_pci_dev(tmp) {
590 		if (tmp == pdev || tmp->bus != pdev->bus)
591 			continue;
592 
593 		/* We alias them or they alias us */
594 		if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
595 		     pdev->dma_alias_devfn == tmp->devfn) ||
596 		    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
597 		     tmp->dma_alias_devfn == pdev->devfn)) {
598 
599 			group = get_pci_alias_group(tmp, devfns);
600 			if (group) {
601 				pci_dev_put(tmp);
602 				return group;
603 			}
604 
605 			group = get_pci_function_alias_group(tmp, devfns);
606 			if (group) {
607 				pci_dev_put(tmp);
608 				return group;
609 			}
610 		}
611 	}
612 
613 	return NULL;
614 }
615 
616 struct group_for_pci_data {
617 	struct pci_dev *pdev;
618 	struct iommu_group *group;
619 };
620 
621 /*
622  * DMA alias iterator callback, return the last seen device.  Stop and return
623  * the IOMMU group if we find one along the way.
624  */
625 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
626 {
627 	struct group_for_pci_data *data = opaque;
628 
629 	data->pdev = pdev;
630 	data->group = iommu_group_get(&pdev->dev);
631 
632 	return data->group != NULL;
633 }
634 
635 /*
636  * Use standard PCI bus topology, isolation features, and DMA alias quirks
637  * to find or create an IOMMU group for a device.
638  */
639 static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
640 {
641 	struct group_for_pci_data data;
642 	struct pci_bus *bus;
643 	struct iommu_group *group = NULL;
644 	u64 devfns[4] = { 0 };
645 
646 	/*
647 	 * Find the upstream DMA alias for the device.  A device must not
648 	 * be aliased due to topology in order to have its own IOMMU group.
649 	 * If we find an alias along the way that already belongs to a
650 	 * group, use it.
651 	 */
652 	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
653 		return data.group;
654 
655 	pdev = data.pdev;
656 
657 	/*
658 	 * Continue upstream from the point of minimum IOMMU granularity
659 	 * due to aliases to the point where devices are protected from
660 	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
661 	 * group, use it.
662 	 */
663 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
664 		if (!bus->self)
665 			continue;
666 
667 		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
668 			break;
669 
670 		pdev = bus->self;
671 
672 		group = iommu_group_get(&pdev->dev);
673 		if (group)
674 			return group;
675 	}
676 
677 	/*
678 	 * Look for existing groups on device aliases.  If we alias another
679 	 * device or another device aliases us, use the same group.
680 	 */
681 	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
682 	if (group)
683 		return group;
684 
685 	/*
686 	 * Look for existing groups on non-isolated functions on the same
687 	 * slot and aliases of those funcions, if any.  No need to clear
688 	 * the search bitmap, the tested devfns are still valid.
689 	 */
690 	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
691 	if (group)
692 		return group;
693 
694 	/* No shared group found, allocate new */
695 	return iommu_group_alloc();
696 }
697 
698 /**
699  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
700  * @dev: target device
701  *
702  * This function is intended to be called by IOMMU drivers and extended to
703  * support common, bus-defined algorithms when determining or creating the
704  * IOMMU group for a device.  On success, the caller will hold a reference
705  * to the returned IOMMU group, which will already include the provided
706  * device.  The reference should be released with iommu_group_put().
707  */
708 struct iommu_group *iommu_group_get_for_dev(struct device *dev)
709 {
710 	struct iommu_group *group;
711 	int ret;
712 
713 	group = iommu_group_get(dev);
714 	if (group)
715 		return group;
716 
717 	if (!dev_is_pci(dev))
718 		return ERR_PTR(-EINVAL);
719 
720 	group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
721 
722 	if (IS_ERR(group))
723 		return group;
724 
725 	ret = iommu_group_add_device(group, dev);
726 	if (ret) {
727 		iommu_group_put(group);
728 		return ERR_PTR(ret);
729 	}
730 
731 	return group;
732 }
733 
734 static int add_iommu_group(struct device *dev, void *data)
735 {
736 	struct iommu_callback_data *cb = data;
737 	const struct iommu_ops *ops = cb->ops;
738 
739 	if (!ops->add_device)
740 		return 0;
741 
742 	WARN_ON(dev->iommu_group);
743 
744 	ops->add_device(dev);
745 
746 	return 0;
747 }
748 
749 static int iommu_bus_notifier(struct notifier_block *nb,
750 			      unsigned long action, void *data)
751 {
752 	struct device *dev = data;
753 	const struct iommu_ops *ops = dev->bus->iommu_ops;
754 	struct iommu_group *group;
755 	unsigned long group_action = 0;
756 
757 	/*
758 	 * ADD/DEL call into iommu driver ops if provided, which may
759 	 * result in ADD/DEL notifiers to group->notifier
760 	 */
761 	if (action == BUS_NOTIFY_ADD_DEVICE) {
762 		if (ops->add_device)
763 			return ops->add_device(dev);
764 	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
765 		if (ops->remove_device && dev->iommu_group) {
766 			ops->remove_device(dev);
767 			return 0;
768 		}
769 	}
770 
771 	/*
772 	 * Remaining BUS_NOTIFYs get filtered and republished to the
773 	 * group, if anyone is listening
774 	 */
775 	group = iommu_group_get(dev);
776 	if (!group)
777 		return 0;
778 
779 	switch (action) {
780 	case BUS_NOTIFY_BIND_DRIVER:
781 		group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER;
782 		break;
783 	case BUS_NOTIFY_BOUND_DRIVER:
784 		group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER;
785 		break;
786 	case BUS_NOTIFY_UNBIND_DRIVER:
787 		group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER;
788 		break;
789 	case BUS_NOTIFY_UNBOUND_DRIVER:
790 		group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER;
791 		break;
792 	}
793 
794 	if (group_action)
795 		blocking_notifier_call_chain(&group->notifier,
796 					     group_action, dev);
797 
798 	iommu_group_put(group);
799 	return 0;
800 }
801 
802 static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
803 {
804 	int err;
805 	struct notifier_block *nb;
806 	struct iommu_callback_data cb = {
807 		.ops = ops,
808 	};
809 
810 	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
811 	if (!nb)
812 		return -ENOMEM;
813 
814 	nb->notifier_call = iommu_bus_notifier;
815 
816 	err = bus_register_notifier(bus, nb);
817 	if (err) {
818 		kfree(nb);
819 		return err;
820 	}
821 
822 	err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
823 	if (err) {
824 		bus_unregister_notifier(bus, nb);
825 		kfree(nb);
826 		return err;
827 	}
828 
829 	return 0;
830 }
831 
832 /**
833  * bus_set_iommu - set iommu-callbacks for the bus
834  * @bus: bus.
835  * @ops: the callbacks provided by the iommu-driver
836  *
837  * This function is called by an iommu driver to set the iommu methods
838  * used for a particular bus. Drivers for devices on that bus can use
839  * the iommu-api after these ops are registered.
840  * This special function is needed because IOMMUs are usually devices on
841  * the bus itself, so the iommu drivers are not initialized when the bus
842  * is set up. With this function the iommu-driver can set the iommu-ops
843  * afterwards.
844  */
845 int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
846 {
847 	int err;
848 
849 	if (bus->iommu_ops != NULL)
850 		return -EBUSY;
851 
852 	bus->iommu_ops = ops;
853 
854 	/* Do IOMMU specific setup for this bus-type */
855 	err = iommu_bus_init(bus, ops);
856 	if (err)
857 		bus->iommu_ops = NULL;
858 
859 	return err;
860 }
861 EXPORT_SYMBOL_GPL(bus_set_iommu);
862 
863 bool iommu_present(struct bus_type *bus)
864 {
865 	return bus->iommu_ops != NULL;
866 }
867 EXPORT_SYMBOL_GPL(iommu_present);
868 
869 bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
870 {
871 	if (!bus->iommu_ops || !bus->iommu_ops->capable)
872 		return false;
873 
874 	return bus->iommu_ops->capable(cap);
875 }
876 EXPORT_SYMBOL_GPL(iommu_capable);
877 
878 /**
879  * iommu_set_fault_handler() - set a fault handler for an iommu domain
880  * @domain: iommu domain
881  * @handler: fault handler
882  * @token: user data, will be passed back to the fault handler
883  *
884  * This function should be used by IOMMU users which want to be notified
885  * whenever an IOMMU fault happens.
886  *
887  * The fault handler itself should return 0 on success, and an appropriate
888  * error code otherwise.
889  */
890 void iommu_set_fault_handler(struct iommu_domain *domain,
891 					iommu_fault_handler_t handler,
892 					void *token)
893 {
894 	BUG_ON(!domain);
895 
896 	domain->handler = handler;
897 	domain->handler_token = token;
898 }
899 EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
900 
901 struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
902 {
903 	struct iommu_domain *domain;
904 
905 	if (bus == NULL || bus->iommu_ops == NULL)
906 		return NULL;
907 
908 	domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
909 	if (!domain)
910 		return NULL;
911 
912 	domain->ops  = bus->iommu_ops;
913 	domain->type = IOMMU_DOMAIN_UNMANAGED;
914 
915 	return domain;
916 }
917 EXPORT_SYMBOL_GPL(iommu_domain_alloc);
918 
919 void iommu_domain_free(struct iommu_domain *domain)
920 {
921 	domain->ops->domain_free(domain);
922 }
923 EXPORT_SYMBOL_GPL(iommu_domain_free);
924 
925 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
926 {
927 	int ret;
928 	if (unlikely(domain->ops->attach_dev == NULL))
929 		return -ENODEV;
930 
931 	ret = domain->ops->attach_dev(domain, dev);
932 	if (!ret)
933 		trace_attach_device_to_domain(dev);
934 	return ret;
935 }
936 EXPORT_SYMBOL_GPL(iommu_attach_device);
937 
938 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
939 {
940 	if (unlikely(domain->ops->detach_dev == NULL))
941 		return;
942 
943 	domain->ops->detach_dev(domain, dev);
944 	trace_detach_device_from_domain(dev);
945 }
946 EXPORT_SYMBOL_GPL(iommu_detach_device);
947 
948 /*
949  * IOMMU groups are really the natrual working unit of the IOMMU, but
950  * the IOMMU API works on domains and devices.  Bridge that gap by
951  * iterating over the devices in a group.  Ideally we'd have a single
952  * device which represents the requestor ID of the group, but we also
953  * allow IOMMU drivers to create policy defined minimum sets, where
954  * the physical hardware may be able to distiguish members, but we
955  * wish to group them at a higher level (ex. untrusted multi-function
956  * PCI devices).  Thus we attach each device.
957  */
958 static int iommu_group_do_attach_device(struct device *dev, void *data)
959 {
960 	struct iommu_domain *domain = data;
961 
962 	return iommu_attach_device(domain, dev);
963 }
964 
965 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
966 {
967 	return iommu_group_for_each_dev(group, domain,
968 					iommu_group_do_attach_device);
969 }
970 EXPORT_SYMBOL_GPL(iommu_attach_group);
971 
972 static int iommu_group_do_detach_device(struct device *dev, void *data)
973 {
974 	struct iommu_domain *domain = data;
975 
976 	iommu_detach_device(domain, dev);
977 
978 	return 0;
979 }
980 
981 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
982 {
983 	iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device);
984 }
985 EXPORT_SYMBOL_GPL(iommu_detach_group);
986 
987 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
988 {
989 	if (unlikely(domain->ops->iova_to_phys == NULL))
990 		return 0;
991 
992 	return domain->ops->iova_to_phys(domain, iova);
993 }
994 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
995 
996 static size_t iommu_pgsize(struct iommu_domain *domain,
997 			   unsigned long addr_merge, size_t size)
998 {
999 	unsigned int pgsize_idx;
1000 	size_t pgsize;
1001 
1002 	/* Max page size that still fits into 'size' */
1003 	pgsize_idx = __fls(size);
1004 
1005 	/* need to consider alignment requirements ? */
1006 	if (likely(addr_merge)) {
1007 		/* Max page size allowed by address */
1008 		unsigned int align_pgsize_idx = __ffs(addr_merge);
1009 		pgsize_idx = min(pgsize_idx, align_pgsize_idx);
1010 	}
1011 
1012 	/* build a mask of acceptable page sizes */
1013 	pgsize = (1UL << (pgsize_idx + 1)) - 1;
1014 
1015 	/* throw away page sizes not supported by the hardware */
1016 	pgsize &= domain->ops->pgsize_bitmap;
1017 
1018 	/* make sure we're still sane */
1019 	BUG_ON(!pgsize);
1020 
1021 	/* pick the biggest page */
1022 	pgsize_idx = __fls(pgsize);
1023 	pgsize = 1UL << pgsize_idx;
1024 
1025 	return pgsize;
1026 }
1027 
1028 int iommu_map(struct iommu_domain *domain, unsigned long iova,
1029 	      phys_addr_t paddr, size_t size, int prot)
1030 {
1031 	unsigned long orig_iova = iova;
1032 	unsigned int min_pagesz;
1033 	size_t orig_size = size;
1034 	int ret = 0;
1035 
1036 	if (unlikely(domain->ops->map == NULL ||
1037 		     domain->ops->pgsize_bitmap == 0UL))
1038 		return -ENODEV;
1039 
1040 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
1041 		return -EINVAL;
1042 
1043 	/* find out the minimum page size supported */
1044 	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1045 
1046 	/*
1047 	 * both the virtual address and the physical one, as well as
1048 	 * the size of the mapping, must be aligned (at least) to the
1049 	 * size of the smallest page supported by the hardware
1050 	 */
1051 	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
1052 		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
1053 		       iova, &paddr, size, min_pagesz);
1054 		return -EINVAL;
1055 	}
1056 
1057 	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
1058 
1059 	while (size) {
1060 		size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
1061 
1062 		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
1063 			 iova, &paddr, pgsize);
1064 
1065 		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
1066 		if (ret)
1067 			break;
1068 
1069 		iova += pgsize;
1070 		paddr += pgsize;
1071 		size -= pgsize;
1072 	}
1073 
1074 	/* unroll mapping in case something went wrong */
1075 	if (ret)
1076 		iommu_unmap(domain, orig_iova, orig_size - size);
1077 	else
1078 		trace_map(orig_iova, paddr, orig_size);
1079 
1080 	return ret;
1081 }
1082 EXPORT_SYMBOL_GPL(iommu_map);
1083 
1084 size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1085 {
1086 	size_t unmapped_page, unmapped = 0;
1087 	unsigned int min_pagesz;
1088 	unsigned long orig_iova = iova;
1089 
1090 	if (unlikely(domain->ops->unmap == NULL ||
1091 		     domain->ops->pgsize_bitmap == 0UL))
1092 		return -ENODEV;
1093 
1094 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
1095 		return -EINVAL;
1096 
1097 	/* find out the minimum page size supported */
1098 	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1099 
1100 	/*
1101 	 * The virtual address, as well as the size of the mapping, must be
1102 	 * aligned (at least) to the size of the smallest page supported
1103 	 * by the hardware
1104 	 */
1105 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
1106 		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
1107 		       iova, size, min_pagesz);
1108 		return -EINVAL;
1109 	}
1110 
1111 	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
1112 
1113 	/*
1114 	 * Keep iterating until we either unmap 'size' bytes (or more)
1115 	 * or we hit an area that isn't mapped.
1116 	 */
1117 	while (unmapped < size) {
1118 		size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
1119 
1120 		unmapped_page = domain->ops->unmap(domain, iova, pgsize);
1121 		if (!unmapped_page)
1122 			break;
1123 
1124 		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
1125 			 iova, unmapped_page);
1126 
1127 		iova += unmapped_page;
1128 		unmapped += unmapped_page;
1129 	}
1130 
1131 	trace_unmap(orig_iova, size, unmapped);
1132 	return unmapped;
1133 }
1134 EXPORT_SYMBOL_GPL(iommu_unmap);
1135 
1136 size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
1137 			 struct scatterlist *sg, unsigned int nents, int prot)
1138 {
1139 	struct scatterlist *s;
1140 	size_t mapped = 0;
1141 	unsigned int i, min_pagesz;
1142 	int ret;
1143 
1144 	if (unlikely(domain->ops->pgsize_bitmap == 0UL))
1145 		return 0;
1146 
1147 	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1148 
1149 	for_each_sg(sg, s, nents, i) {
1150 		phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
1151 
1152 		/*
1153 		 * We are mapping on IOMMU page boundaries, so offset within
1154 		 * the page must be 0. However, the IOMMU may support pages
1155 		 * smaller than PAGE_SIZE, so s->offset may still represent
1156 		 * an offset of that boundary within the CPU page.
1157 		 */
1158 		if (!IS_ALIGNED(s->offset, min_pagesz))
1159 			goto out_err;
1160 
1161 		ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
1162 		if (ret)
1163 			goto out_err;
1164 
1165 		mapped += s->length;
1166 	}
1167 
1168 	return mapped;
1169 
1170 out_err:
1171 	/* undo mappings already done */
1172 	iommu_unmap(domain, iova, mapped);
1173 
1174 	return 0;
1175 
1176 }
1177 EXPORT_SYMBOL_GPL(default_iommu_map_sg);
1178 
1179 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
1180 			       phys_addr_t paddr, u64 size, int prot)
1181 {
1182 	if (unlikely(domain->ops->domain_window_enable == NULL))
1183 		return -ENODEV;
1184 
1185 	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size,
1186 						 prot);
1187 }
1188 EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
1189 
1190 void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
1191 {
1192 	if (unlikely(domain->ops->domain_window_disable == NULL))
1193 		return;
1194 
1195 	return domain->ops->domain_window_disable(domain, wnd_nr);
1196 }
1197 EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
1198 
1199 static int __init iommu_init(void)
1200 {
1201 	iommu_group_kset = kset_create_and_add("iommu_groups",
1202 					       NULL, kernel_kobj);
1203 	ida_init(&iommu_group_ida);
1204 	mutex_init(&iommu_group_mutex);
1205 
1206 	BUG_ON(!iommu_group_kset);
1207 
1208 	return 0;
1209 }
1210 arch_initcall(iommu_init);
1211 
1212 int iommu_domain_get_attr(struct iommu_domain *domain,
1213 			  enum iommu_attr attr, void *data)
1214 {
1215 	struct iommu_domain_geometry *geometry;
1216 	bool *paging;
1217 	int ret = 0;
1218 	u32 *count;
1219 
1220 	switch (attr) {
1221 	case DOMAIN_ATTR_GEOMETRY:
1222 		geometry  = data;
1223 		*geometry = domain->geometry;
1224 
1225 		break;
1226 	case DOMAIN_ATTR_PAGING:
1227 		paging  = data;
1228 		*paging = (domain->ops->pgsize_bitmap != 0UL);
1229 		break;
1230 	case DOMAIN_ATTR_WINDOWS:
1231 		count = data;
1232 
1233 		if (domain->ops->domain_get_windows != NULL)
1234 			*count = domain->ops->domain_get_windows(domain);
1235 		else
1236 			ret = -ENODEV;
1237 
1238 		break;
1239 	default:
1240 		if (!domain->ops->domain_get_attr)
1241 			return -EINVAL;
1242 
1243 		ret = domain->ops->domain_get_attr(domain, attr, data);
1244 	}
1245 
1246 	return ret;
1247 }
1248 EXPORT_SYMBOL_GPL(iommu_domain_get_attr);
1249 
1250 int iommu_domain_set_attr(struct iommu_domain *domain,
1251 			  enum iommu_attr attr, void *data)
1252 {
1253 	int ret = 0;
1254 	u32 *count;
1255 
1256 	switch (attr) {
1257 	case DOMAIN_ATTR_WINDOWS:
1258 		count = data;
1259 
1260 		if (domain->ops->domain_set_windows != NULL)
1261 			ret = domain->ops->domain_set_windows(domain, *count);
1262 		else
1263 			ret = -ENODEV;
1264 
1265 		break;
1266 	default:
1267 		if (domain->ops->domain_set_attr == NULL)
1268 			return -EINVAL;
1269 
1270 		ret = domain->ops->domain_set_attr(domain, attr, data);
1271 	}
1272 
1273 	return ret;
1274 }
1275 EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
1276