xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision 67671f15)
10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
20f3e72b5SJason Gunthorpe /*
30f3e72b5SJason Gunthorpe  * VFIO core
40f3e72b5SJason Gunthorpe  *
50f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
60f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
70f3e72b5SJason Gunthorpe  *
80f3e72b5SJason Gunthorpe  * Derived from original vfio:
90f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
100f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
110f3e72b5SJason Gunthorpe  */
120f3e72b5SJason Gunthorpe 
130f3e72b5SJason Gunthorpe #include <linux/cdev.h>
140f3e72b5SJason Gunthorpe #include <linux/compat.h>
150f3e72b5SJason Gunthorpe #include <linux/device.h>
160f3e72b5SJason Gunthorpe #include <linux/file.h>
170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h>
180f3e72b5SJason Gunthorpe #include <linux/fs.h>
190f3e72b5SJason Gunthorpe #include <linux/idr.h>
200f3e72b5SJason Gunthorpe #include <linux/iommu.h>
210f3e72b5SJason Gunthorpe #include <linux/list.h>
220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
230f3e72b5SJason Gunthorpe #include <linux/module.h>
240f3e72b5SJason Gunthorpe #include <linux/mutex.h>
250f3e72b5SJason Gunthorpe #include <linux/pci.h>
260f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
270f3e72b5SJason Gunthorpe #include <linux/sched.h>
280f3e72b5SJason Gunthorpe #include <linux/slab.h>
290f3e72b5SJason Gunthorpe #include <linux/stat.h>
300f3e72b5SJason Gunthorpe #include <linux/string.h>
310f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
320f3e72b5SJason Gunthorpe #include <linux/vfio.h>
330f3e72b5SJason Gunthorpe #include <linux/wait.h>
340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
350f3e72b5SJason Gunthorpe #include "vfio.h"
360f3e72b5SJason Gunthorpe 
370f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
380f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
390f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
400f3e72b5SJason Gunthorpe 
410f3e72b5SJason Gunthorpe static struct vfio {
420f3e72b5SJason Gunthorpe 	struct class			*class;
430f3e72b5SJason Gunthorpe 	struct list_head		iommu_drivers_list;
440f3e72b5SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
450f3e72b5SJason Gunthorpe 	struct list_head		group_list;
460f3e72b5SJason Gunthorpe 	struct mutex			group_lock; /* locks group_list */
470f3e72b5SJason Gunthorpe 	struct ida			group_ida;
480f3e72b5SJason Gunthorpe 	dev_t				group_devt;
490f3e72b5SJason Gunthorpe } vfio;
500f3e72b5SJason Gunthorpe 
510f3e72b5SJason Gunthorpe struct vfio_iommu_driver {
520f3e72b5SJason Gunthorpe 	const struct vfio_iommu_driver_ops	*ops;
530f3e72b5SJason Gunthorpe 	struct list_head			vfio_next;
540f3e72b5SJason Gunthorpe };
550f3e72b5SJason Gunthorpe 
560f3e72b5SJason Gunthorpe struct vfio_container {
570f3e72b5SJason Gunthorpe 	struct kref			kref;
580f3e72b5SJason Gunthorpe 	struct list_head		group_list;
590f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_lock;
600f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
610f3e72b5SJason Gunthorpe 	void				*iommu_data;
620f3e72b5SJason Gunthorpe 	bool				noiommu;
630f3e72b5SJason Gunthorpe };
640f3e72b5SJason Gunthorpe 
650f3e72b5SJason Gunthorpe struct vfio_group {
660f3e72b5SJason Gunthorpe 	struct device 			dev;
670f3e72b5SJason Gunthorpe 	struct cdev			cdev;
680f3e72b5SJason Gunthorpe 	refcount_t			users;
690f3e72b5SJason Gunthorpe 	unsigned int			container_users;
700f3e72b5SJason Gunthorpe 	struct iommu_group		*iommu_group;
710f3e72b5SJason Gunthorpe 	struct vfio_container		*container;
720f3e72b5SJason Gunthorpe 	struct list_head		device_list;
730f3e72b5SJason Gunthorpe 	struct mutex			device_lock;
740f3e72b5SJason Gunthorpe 	struct list_head		vfio_next;
750f3e72b5SJason Gunthorpe 	struct list_head		container_next;
760f3e72b5SJason Gunthorpe 	enum vfio_group_type		type;
770f3e72b5SJason Gunthorpe 	unsigned int			dev_counter;
780f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_rwsem;
790f3e72b5SJason Gunthorpe 	struct kvm			*kvm;
800f3e72b5SJason Gunthorpe 	struct file			*opened_file;
810f3e72b5SJason Gunthorpe 	struct blocking_notifier_head	notifier;
820f3e72b5SJason Gunthorpe };
830f3e72b5SJason Gunthorpe 
840f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
850f3e72b5SJason Gunthorpe static bool noiommu __read_mostly;
860f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
870f3e72b5SJason Gunthorpe 		   noiommu, bool, S_IRUGO | S_IWUSR);
880f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
890f3e72b5SJason Gunthorpe #endif
900f3e72b5SJason Gunthorpe 
910f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
920f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops;
930f3e72b5SJason Gunthorpe 
940f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
950f3e72b5SJason Gunthorpe {
960f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
970f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
980f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
990f3e72b5SJason Gunthorpe 
1000f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
1010f3e72b5SJason Gunthorpe 		return -EINVAL;
1020f3e72b5SJason Gunthorpe 
1030f3e72b5SJason Gunthorpe 	/*
1040f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
1050f3e72b5SJason Gunthorpe 	 */
1060f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1070f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
1080f3e72b5SJason Gunthorpe 	if (dev_set)
1090f3e72b5SJason Gunthorpe 		goto found_get_ref;
1100f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1110f3e72b5SJason Gunthorpe 
1120f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
1130f3e72b5SJason Gunthorpe 	if (!new_dev_set)
1140f3e72b5SJason Gunthorpe 		return -ENOMEM;
1150f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
1160f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
1170f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
1180f3e72b5SJason Gunthorpe 
1190f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1200f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
1210f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
1220f3e72b5SJason Gunthorpe 	if (!dev_set) {
1230f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
1240f3e72b5SJason Gunthorpe 		goto found_get_ref;
1250f3e72b5SJason Gunthorpe 	}
1260f3e72b5SJason Gunthorpe 
1270f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
1280f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
1290f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
1300f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
1310f3e72b5SJason Gunthorpe 	}
1320f3e72b5SJason Gunthorpe 
1330f3e72b5SJason Gunthorpe found_get_ref:
1340f3e72b5SJason Gunthorpe 	dev_set->device_count++;
1350f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1360f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1370f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
1380f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
1390f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1400f3e72b5SJason Gunthorpe 	return 0;
1410f3e72b5SJason Gunthorpe }
1420f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
1430f3e72b5SJason Gunthorpe 
1440f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
1450f3e72b5SJason Gunthorpe {
1460f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
1470f3e72b5SJason Gunthorpe 
1480f3e72b5SJason Gunthorpe 	if (!dev_set)
1490f3e72b5SJason Gunthorpe 		return;
1500f3e72b5SJason Gunthorpe 
1510f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1520f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
1530f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1540f3e72b5SJason Gunthorpe 
1550f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1560f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
1570f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
1580f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
1590f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
1600f3e72b5SJason Gunthorpe 		kfree(dev_set);
1610f3e72b5SJason Gunthorpe 	}
1620f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1630f3e72b5SJason Gunthorpe }
1640f3e72b5SJason Gunthorpe 
1650f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
1660f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
1670f3e72b5SJason Gunthorpe {
1680f3e72b5SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
1690f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
1700f3e72b5SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
1710f3e72b5SJason Gunthorpe 		return ERR_PTR(-EPERM);
1720f3e72b5SJason Gunthorpe 
1730f3e72b5SJason Gunthorpe 	return NULL;
1740f3e72b5SJason Gunthorpe }
1750f3e72b5SJason Gunthorpe 
1760f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
1770f3e72b5SJason Gunthorpe {
1780f3e72b5SJason Gunthorpe }
1790f3e72b5SJason Gunthorpe 
1800f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
1810f3e72b5SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
1820f3e72b5SJason Gunthorpe {
1830f3e72b5SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
1840f3e72b5SJason Gunthorpe 		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
1850f3e72b5SJason Gunthorpe 
1860f3e72b5SJason Gunthorpe 	return -ENOTTY;
1870f3e72b5SJason Gunthorpe }
1880f3e72b5SJason Gunthorpe 
1890f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
1900f3e72b5SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
1910f3e72b5SJason Gunthorpe {
1920f3e72b5SJason Gunthorpe 	return 0;
1930f3e72b5SJason Gunthorpe }
1940f3e72b5SJason Gunthorpe 
1950f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
1960f3e72b5SJason Gunthorpe 				      struct iommu_group *iommu_group)
1970f3e72b5SJason Gunthorpe {
1980f3e72b5SJason Gunthorpe }
1990f3e72b5SJason Gunthorpe 
2000f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
2010f3e72b5SJason Gunthorpe 	.name = "vfio-noiommu",
2020f3e72b5SJason Gunthorpe 	.owner = THIS_MODULE,
2030f3e72b5SJason Gunthorpe 	.open = vfio_noiommu_open,
2040f3e72b5SJason Gunthorpe 	.release = vfio_noiommu_release,
2050f3e72b5SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
2060f3e72b5SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
2070f3e72b5SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
2080f3e72b5SJason Gunthorpe };
2090f3e72b5SJason Gunthorpe 
2100f3e72b5SJason Gunthorpe /*
2110f3e72b5SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
2120f3e72b5SJason Gunthorpe  * use vfio-noiommu.
2130f3e72b5SJason Gunthorpe  */
2140f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2150f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2160f3e72b5SJason Gunthorpe {
2170f3e72b5SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
2180f3e72b5SJason Gunthorpe }
2190f3e72b5SJason Gunthorpe #else
2200f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2210f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2220f3e72b5SJason Gunthorpe {
2230f3e72b5SJason Gunthorpe 	return true;
2240f3e72b5SJason Gunthorpe }
2250f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */
2260f3e72b5SJason Gunthorpe 
2270f3e72b5SJason Gunthorpe /*
2280f3e72b5SJason Gunthorpe  * IOMMU driver registration
2290f3e72b5SJason Gunthorpe  */
2300f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2310f3e72b5SJason Gunthorpe {
2320f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
2330f3e72b5SJason Gunthorpe 
2340f3e72b5SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
2350f3e72b5SJason Gunthorpe 		return -EINVAL;
2360f3e72b5SJason Gunthorpe 
2370f3e72b5SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
2380f3e72b5SJason Gunthorpe 	if (!driver)
2390f3e72b5SJason Gunthorpe 		return -ENOMEM;
2400f3e72b5SJason Gunthorpe 
2410f3e72b5SJason Gunthorpe 	driver->ops = ops;
2420f3e72b5SJason Gunthorpe 
2430f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2440f3e72b5SJason Gunthorpe 
2450f3e72b5SJason Gunthorpe 	/* Check for duplicates */
2460f3e72b5SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
2470f3e72b5SJason Gunthorpe 		if (tmp->ops == ops) {
2480f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2490f3e72b5SJason Gunthorpe 			kfree(driver);
2500f3e72b5SJason Gunthorpe 			return -EINVAL;
2510f3e72b5SJason Gunthorpe 		}
2520f3e72b5SJason Gunthorpe 	}
2530f3e72b5SJason Gunthorpe 
2540f3e72b5SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
2550f3e72b5SJason Gunthorpe 
2560f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2570f3e72b5SJason Gunthorpe 
2580f3e72b5SJason Gunthorpe 	return 0;
2590f3e72b5SJason Gunthorpe }
2600f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
2610f3e72b5SJason Gunthorpe 
2620f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2630f3e72b5SJason Gunthorpe {
2640f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
2650f3e72b5SJason Gunthorpe 
2660f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2670f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
2680f3e72b5SJason Gunthorpe 		if (driver->ops == ops) {
2690f3e72b5SJason Gunthorpe 			list_del(&driver->vfio_next);
2700f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2710f3e72b5SJason Gunthorpe 			kfree(driver);
2720f3e72b5SJason Gunthorpe 			return;
2730f3e72b5SJason Gunthorpe 		}
2740f3e72b5SJason Gunthorpe 	}
2750f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2760f3e72b5SJason Gunthorpe }
2770f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
2780f3e72b5SJason Gunthorpe 
2790f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group);
2800f3e72b5SJason Gunthorpe 
2810f3e72b5SJason Gunthorpe /*
2820f3e72b5SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
2830f3e72b5SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
2840f3e72b5SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
2850f3e72b5SJason Gunthorpe  * closed in any order.
2860f3e72b5SJason Gunthorpe  */
2870f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
2880f3e72b5SJason Gunthorpe {
2890f3e72b5SJason Gunthorpe 	kref_get(&container->kref);
2900f3e72b5SJason Gunthorpe }
2910f3e72b5SJason Gunthorpe 
2920f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref)
2930f3e72b5SJason Gunthorpe {
2940f3e72b5SJason Gunthorpe 	struct vfio_container *container;
2950f3e72b5SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
2960f3e72b5SJason Gunthorpe 
2970f3e72b5SJason Gunthorpe 	kfree(container);
2980f3e72b5SJason Gunthorpe }
2990f3e72b5SJason Gunthorpe 
3000f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
3010f3e72b5SJason Gunthorpe {
3020f3e72b5SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
3030f3e72b5SJason Gunthorpe }
3040f3e72b5SJason Gunthorpe 
3050f3e72b5SJason Gunthorpe /*
3060f3e72b5SJason Gunthorpe  * Group objects - create, release, get, put, search
3070f3e72b5SJason Gunthorpe  */
3080f3e72b5SJason Gunthorpe static struct vfio_group *
3090f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3100f3e72b5SJason Gunthorpe {
3110f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3120f3e72b5SJason Gunthorpe 
3130f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
3140f3e72b5SJason Gunthorpe 		if (group->iommu_group == iommu_group) {
3150f3e72b5SJason Gunthorpe 			vfio_group_get(group);
3160f3e72b5SJason Gunthorpe 			return group;
3170f3e72b5SJason Gunthorpe 		}
3180f3e72b5SJason Gunthorpe 	}
3190f3e72b5SJason Gunthorpe 	return NULL;
3200f3e72b5SJason Gunthorpe }
3210f3e72b5SJason Gunthorpe 
3220f3e72b5SJason Gunthorpe static struct vfio_group *
3230f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3240f3e72b5SJason Gunthorpe {
3250f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3260f3e72b5SJason Gunthorpe 
3270f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3280f3e72b5SJason Gunthorpe 	group = __vfio_group_get_from_iommu(iommu_group);
3290f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
3300f3e72b5SJason Gunthorpe 	return group;
3310f3e72b5SJason Gunthorpe }
3320f3e72b5SJason Gunthorpe 
3330f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev)
3340f3e72b5SJason Gunthorpe {
3350f3e72b5SJason Gunthorpe 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
3360f3e72b5SJason Gunthorpe 
3370f3e72b5SJason Gunthorpe 	mutex_destroy(&group->device_lock);
3380f3e72b5SJason Gunthorpe 	iommu_group_put(group->iommu_group);
3390f3e72b5SJason Gunthorpe 	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
3400f3e72b5SJason Gunthorpe 	kfree(group);
3410f3e72b5SJason Gunthorpe }
3420f3e72b5SJason Gunthorpe 
3430f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
3440f3e72b5SJason Gunthorpe 					   enum vfio_group_type type)
3450f3e72b5SJason Gunthorpe {
3460f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3470f3e72b5SJason Gunthorpe 	int minor;
3480f3e72b5SJason Gunthorpe 
3490f3e72b5SJason Gunthorpe 	group = kzalloc(sizeof(*group), GFP_KERNEL);
3500f3e72b5SJason Gunthorpe 	if (!group)
3510f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
3520f3e72b5SJason Gunthorpe 
3530f3e72b5SJason Gunthorpe 	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
3540f3e72b5SJason Gunthorpe 	if (minor < 0) {
3550f3e72b5SJason Gunthorpe 		kfree(group);
3560f3e72b5SJason Gunthorpe 		return ERR_PTR(minor);
3570f3e72b5SJason Gunthorpe 	}
3580f3e72b5SJason Gunthorpe 
3590f3e72b5SJason Gunthorpe 	device_initialize(&group->dev);
3600f3e72b5SJason Gunthorpe 	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
3610f3e72b5SJason Gunthorpe 	group->dev.class = vfio.class;
3620f3e72b5SJason Gunthorpe 	group->dev.release = vfio_group_release;
3630f3e72b5SJason Gunthorpe 	cdev_init(&group->cdev, &vfio_group_fops);
3640f3e72b5SJason Gunthorpe 	group->cdev.owner = THIS_MODULE;
3650f3e72b5SJason Gunthorpe 
3660f3e72b5SJason Gunthorpe 	refcount_set(&group->users, 1);
3670f3e72b5SJason Gunthorpe 	init_rwsem(&group->group_rwsem);
3680f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&group->device_list);
3690f3e72b5SJason Gunthorpe 	mutex_init(&group->device_lock);
3700f3e72b5SJason Gunthorpe 	group->iommu_group = iommu_group;
3710f3e72b5SJason Gunthorpe 	/* put in vfio_group_release() */
3720f3e72b5SJason Gunthorpe 	iommu_group_ref_get(iommu_group);
3730f3e72b5SJason Gunthorpe 	group->type = type;
3740f3e72b5SJason Gunthorpe 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
3750f3e72b5SJason Gunthorpe 
3760f3e72b5SJason Gunthorpe 	return group;
3770f3e72b5SJason Gunthorpe }
3780f3e72b5SJason Gunthorpe 
3790f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
3800f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
3810f3e72b5SJason Gunthorpe {
3820f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3830f3e72b5SJason Gunthorpe 	struct vfio_group *ret;
3840f3e72b5SJason Gunthorpe 	int err;
3850f3e72b5SJason Gunthorpe 
3860f3e72b5SJason Gunthorpe 	group = vfio_group_alloc(iommu_group, type);
3870f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
3880f3e72b5SJason Gunthorpe 		return group;
3890f3e72b5SJason Gunthorpe 
3900f3e72b5SJason Gunthorpe 	err = dev_set_name(&group->dev, "%s%d",
3910f3e72b5SJason Gunthorpe 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
3920f3e72b5SJason Gunthorpe 			   iommu_group_id(iommu_group));
3930f3e72b5SJason Gunthorpe 	if (err) {
3940f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
3950f3e72b5SJason Gunthorpe 		goto err_put;
3960f3e72b5SJason Gunthorpe 	}
3970f3e72b5SJason Gunthorpe 
3980f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3990f3e72b5SJason Gunthorpe 
4000f3e72b5SJason Gunthorpe 	/* Did we race creating this group? */
4010f3e72b5SJason Gunthorpe 	ret = __vfio_group_get_from_iommu(iommu_group);
4020f3e72b5SJason Gunthorpe 	if (ret)
4030f3e72b5SJason Gunthorpe 		goto err_unlock;
4040f3e72b5SJason Gunthorpe 
4050f3e72b5SJason Gunthorpe 	err = cdev_device_add(&group->cdev, &group->dev);
4060f3e72b5SJason Gunthorpe 	if (err) {
4070f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
4080f3e72b5SJason Gunthorpe 		goto err_unlock;
4090f3e72b5SJason Gunthorpe 	}
4100f3e72b5SJason Gunthorpe 
4110f3e72b5SJason Gunthorpe 	list_add(&group->vfio_next, &vfio.group_list);
4120f3e72b5SJason Gunthorpe 
4130f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4140f3e72b5SJason Gunthorpe 	return group;
4150f3e72b5SJason Gunthorpe 
4160f3e72b5SJason Gunthorpe err_unlock:
4170f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4180f3e72b5SJason Gunthorpe err_put:
4190f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4200f3e72b5SJason Gunthorpe 	return ret;
4210f3e72b5SJason Gunthorpe }
4220f3e72b5SJason Gunthorpe 
4230f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group)
4240f3e72b5SJason Gunthorpe {
4250f3e72b5SJason Gunthorpe 	if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
4260f3e72b5SJason Gunthorpe 		return;
4270f3e72b5SJason Gunthorpe 
4280f3e72b5SJason Gunthorpe 	/*
4290f3e72b5SJason Gunthorpe 	 * These data structures all have paired operations that can only be
4300f3e72b5SJason Gunthorpe 	 * undone when the caller holds a live reference on the group. Since all
4310f3e72b5SJason Gunthorpe 	 * pairs must be undone these WARN_ON's indicate some caller did not
4320f3e72b5SJason Gunthorpe 	 * properly hold the group reference.
4330f3e72b5SJason Gunthorpe 	 */
4340f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&group->device_list));
4350f3e72b5SJason Gunthorpe 	WARN_ON(group->container || group->container_users);
4360f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
4370f3e72b5SJason Gunthorpe 
4380f3e72b5SJason Gunthorpe 	list_del(&group->vfio_next);
4390f3e72b5SJason Gunthorpe 	cdev_device_del(&group->cdev, &group->dev);
4400f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4410f3e72b5SJason Gunthorpe 
4420f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4430f3e72b5SJason Gunthorpe }
4440f3e72b5SJason Gunthorpe 
4450f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group)
4460f3e72b5SJason Gunthorpe {
4470f3e72b5SJason Gunthorpe 	refcount_inc(&group->users);
4480f3e72b5SJason Gunthorpe }
4490f3e72b5SJason Gunthorpe 
4500f3e72b5SJason Gunthorpe /*
4510f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
4520f3e72b5SJason Gunthorpe  */
4530f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
4540f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device)
4550f3e72b5SJason Gunthorpe {
4560f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
4570f3e72b5SJason Gunthorpe 		complete(&device->comp);
4580f3e72b5SJason Gunthorpe }
4590f3e72b5SJason Gunthorpe 
4600f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device)
4610f3e72b5SJason Gunthorpe {
4620f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
4630f3e72b5SJason Gunthorpe }
4640f3e72b5SJason Gunthorpe 
4650f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
4660f3e72b5SJason Gunthorpe 						 struct device *dev)
4670f3e72b5SJason Gunthorpe {
4680f3e72b5SJason Gunthorpe 	struct vfio_device *device;
4690f3e72b5SJason Gunthorpe 
4700f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
4710f3e72b5SJason Gunthorpe 	list_for_each_entry(device, &group->device_list, group_next) {
4720f3e72b5SJason Gunthorpe 		if (device->dev == dev && vfio_device_try_get(device)) {
4730f3e72b5SJason Gunthorpe 			mutex_unlock(&group->device_lock);
4740f3e72b5SJason Gunthorpe 			return device;
4750f3e72b5SJason Gunthorpe 		}
4760f3e72b5SJason Gunthorpe 	}
4770f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
4780f3e72b5SJason Gunthorpe 	return NULL;
4790f3e72b5SJason Gunthorpe }
4800f3e72b5SJason Gunthorpe 
4810f3e72b5SJason Gunthorpe /*
4820f3e72b5SJason Gunthorpe  * VFIO driver API
4830f3e72b5SJason Gunthorpe  */
4840f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
4850f3e72b5SJason Gunthorpe 			 const struct vfio_device_ops *ops)
4860f3e72b5SJason Gunthorpe {
4870f3e72b5SJason Gunthorpe 	init_completion(&device->comp);
4880f3e72b5SJason Gunthorpe 	device->dev = dev;
4890f3e72b5SJason Gunthorpe 	device->ops = ops;
4900f3e72b5SJason Gunthorpe }
4910f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev);
4920f3e72b5SJason Gunthorpe 
4930f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device)
4940f3e72b5SJason Gunthorpe {
4950f3e72b5SJason Gunthorpe 	vfio_release_device_set(device);
4960f3e72b5SJason Gunthorpe }
4970f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
4980f3e72b5SJason Gunthorpe 
4990f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
5000f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
5010f3e72b5SJason Gunthorpe {
5020f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5030f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5040f3e72b5SJason Gunthorpe 	int ret;
5050f3e72b5SJason Gunthorpe 
5060f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_alloc();
5070f3e72b5SJason Gunthorpe 	if (IS_ERR(iommu_group))
5080f3e72b5SJason Gunthorpe 		return ERR_CAST(iommu_group);
5090f3e72b5SJason Gunthorpe 
5100f3e72b5SJason Gunthorpe 	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
5110f3e72b5SJason Gunthorpe 	if (ret)
5120f3e72b5SJason Gunthorpe 		goto out_put_group;
5130f3e72b5SJason Gunthorpe 	ret = iommu_group_add_device(iommu_group, dev);
5140f3e72b5SJason Gunthorpe 	if (ret)
5150f3e72b5SJason Gunthorpe 		goto out_put_group;
5160f3e72b5SJason Gunthorpe 
5170f3e72b5SJason Gunthorpe 	group = vfio_create_group(iommu_group, type);
5180f3e72b5SJason Gunthorpe 	if (IS_ERR(group)) {
5190f3e72b5SJason Gunthorpe 		ret = PTR_ERR(group);
5200f3e72b5SJason Gunthorpe 		goto out_remove_device;
5210f3e72b5SJason Gunthorpe 	}
5220f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5230f3e72b5SJason Gunthorpe 	return group;
5240f3e72b5SJason Gunthorpe 
5250f3e72b5SJason Gunthorpe out_remove_device:
5260f3e72b5SJason Gunthorpe 	iommu_group_remove_device(dev);
5270f3e72b5SJason Gunthorpe out_put_group:
5280f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5290f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
5300f3e72b5SJason Gunthorpe }
5310f3e72b5SJason Gunthorpe 
5320f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
5330f3e72b5SJason Gunthorpe {
5340f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5350f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5360f3e72b5SJason Gunthorpe 
5370f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_get(dev);
5380f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
5390f3e72b5SJason Gunthorpe 	if (!iommu_group && noiommu) {
5400f3e72b5SJason Gunthorpe 		/*
5410f3e72b5SJason Gunthorpe 		 * With noiommu enabled, create an IOMMU group for devices that
5420f3e72b5SJason Gunthorpe 		 * don't already have one, implying no IOMMU hardware/driver
5430f3e72b5SJason Gunthorpe 		 * exists.  Taint the kernel because we're about to give a DMA
5440f3e72b5SJason Gunthorpe 		 * capable device to a user without IOMMU protection.
5450f3e72b5SJason Gunthorpe 		 */
5460f3e72b5SJason Gunthorpe 		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
5470f3e72b5SJason Gunthorpe 		if (!IS_ERR(group)) {
5480f3e72b5SJason Gunthorpe 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
5490f3e72b5SJason Gunthorpe 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
5500f3e72b5SJason Gunthorpe 		}
5510f3e72b5SJason Gunthorpe 		return group;
5520f3e72b5SJason Gunthorpe 	}
5530f3e72b5SJason Gunthorpe #endif
5540f3e72b5SJason Gunthorpe 	if (!iommu_group)
5550f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
5560f3e72b5SJason Gunthorpe 
5570f3e72b5SJason Gunthorpe 	/*
5580f3e72b5SJason Gunthorpe 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
5590f3e72b5SJason Gunthorpe 	 * restore cache coherency. It has to be checked here because it is only
5600f3e72b5SJason Gunthorpe 	 * valid for cases where we are using iommu groups.
5610f3e72b5SJason Gunthorpe 	 */
5620f3e72b5SJason Gunthorpe 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
5630f3e72b5SJason Gunthorpe 		iommu_group_put(iommu_group);
5640f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
5650f3e72b5SJason Gunthorpe 	}
5660f3e72b5SJason Gunthorpe 
5670f3e72b5SJason Gunthorpe 	group = vfio_group_get_from_iommu(iommu_group);
5680f3e72b5SJason Gunthorpe 	if (!group)
5690f3e72b5SJason Gunthorpe 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
5700f3e72b5SJason Gunthorpe 
5710f3e72b5SJason Gunthorpe 	/* The vfio_group holds a reference to the iommu_group */
5720f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5730f3e72b5SJason Gunthorpe 	return group;
5740f3e72b5SJason Gunthorpe }
5750f3e72b5SJason Gunthorpe 
5760f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device,
5770f3e72b5SJason Gunthorpe 		struct vfio_group *group)
5780f3e72b5SJason Gunthorpe {
5790f3e72b5SJason Gunthorpe 	struct vfio_device *existing_device;
5800f3e72b5SJason Gunthorpe 
5810f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
5820f3e72b5SJason Gunthorpe 		return PTR_ERR(group);
5830f3e72b5SJason Gunthorpe 
5840f3e72b5SJason Gunthorpe 	/*
5850f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
5860f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
5870f3e72b5SJason Gunthorpe 	 */
5880f3e72b5SJason Gunthorpe 	if (!device->dev_set)
5890f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
5900f3e72b5SJason Gunthorpe 
5910f3e72b5SJason Gunthorpe 	existing_device = vfio_group_get_device(group, device->dev);
5920f3e72b5SJason Gunthorpe 	if (existing_device) {
5930f3e72b5SJason Gunthorpe 		dev_WARN(device->dev, "Device already exists on group %d\n",
5940f3e72b5SJason Gunthorpe 			 iommu_group_id(group->iommu_group));
5950f3e72b5SJason Gunthorpe 		vfio_device_put(existing_device);
5960f3e72b5SJason Gunthorpe 		if (group->type == VFIO_NO_IOMMU ||
5970f3e72b5SJason Gunthorpe 		    group->type == VFIO_EMULATED_IOMMU)
5980f3e72b5SJason Gunthorpe 			iommu_group_remove_device(device->dev);
5990f3e72b5SJason Gunthorpe 		vfio_group_put(group);
6000f3e72b5SJason Gunthorpe 		return -EBUSY;
6010f3e72b5SJason Gunthorpe 	}
6020f3e72b5SJason Gunthorpe 
6030f3e72b5SJason Gunthorpe 	/* Our reference on group is moved to the device */
6040f3e72b5SJason Gunthorpe 	device->group = group;
6050f3e72b5SJason Gunthorpe 
6060f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
6070f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
6080f3e72b5SJason Gunthorpe 
6090f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6100f3e72b5SJason Gunthorpe 	list_add(&device->group_next, &group->device_list);
6110f3e72b5SJason Gunthorpe 	group->dev_counter++;
6120f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
6130f3e72b5SJason Gunthorpe 
6140f3e72b5SJason Gunthorpe 	return 0;
6150f3e72b5SJason Gunthorpe }
6160f3e72b5SJason Gunthorpe 
6170f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
6180f3e72b5SJason Gunthorpe {
6190f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
6200f3e72b5SJason Gunthorpe 		vfio_group_find_or_alloc(device->dev));
6210f3e72b5SJason Gunthorpe }
6220f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
6230f3e72b5SJason Gunthorpe 
6240f3e72b5SJason Gunthorpe /*
6250f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
6260f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
6270f3e72b5SJason Gunthorpe  */
6280f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
6290f3e72b5SJason Gunthorpe {
6300f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
6310f3e72b5SJason Gunthorpe 		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
6320f3e72b5SJason Gunthorpe }
6330f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
6340f3e72b5SJason Gunthorpe 
6350f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
6360f3e72b5SJason Gunthorpe 						     char *buf)
6370f3e72b5SJason Gunthorpe {
6380f3e72b5SJason Gunthorpe 	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
6390f3e72b5SJason Gunthorpe 
6400f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6410f3e72b5SJason Gunthorpe 	list_for_each_entry(it, &group->device_list, group_next) {
6420f3e72b5SJason Gunthorpe 		int ret;
6430f3e72b5SJason Gunthorpe 
6440f3e72b5SJason Gunthorpe 		if (it->ops->match) {
6450f3e72b5SJason Gunthorpe 			ret = it->ops->match(it, buf);
6460f3e72b5SJason Gunthorpe 			if (ret < 0) {
6470f3e72b5SJason Gunthorpe 				device = ERR_PTR(ret);
6480f3e72b5SJason Gunthorpe 				break;
6490f3e72b5SJason Gunthorpe 			}
6500f3e72b5SJason Gunthorpe 		} else {
6510f3e72b5SJason Gunthorpe 			ret = !strcmp(dev_name(it->dev), buf);
6520f3e72b5SJason Gunthorpe 		}
6530f3e72b5SJason Gunthorpe 
6540f3e72b5SJason Gunthorpe 		if (ret && vfio_device_try_get(it)) {
6550f3e72b5SJason Gunthorpe 			device = it;
6560f3e72b5SJason Gunthorpe 			break;
6570f3e72b5SJason Gunthorpe 		}
6580f3e72b5SJason Gunthorpe 	}
6590f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
6600f3e72b5SJason Gunthorpe 
6610f3e72b5SJason Gunthorpe 	return device;
6620f3e72b5SJason Gunthorpe }
6630f3e72b5SJason Gunthorpe 
6640f3e72b5SJason Gunthorpe /*
6650f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
6660f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
6670f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
6680f3e72b5SJason Gunthorpe {
6690f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
6700f3e72b5SJason Gunthorpe 	unsigned int i = 0;
6710f3e72b5SJason Gunthorpe 	bool interrupted = false;
6720f3e72b5SJason Gunthorpe 	long rc;
6730f3e72b5SJason Gunthorpe 
6740f3e72b5SJason Gunthorpe 	vfio_device_put(device);
6750f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
6760f3e72b5SJason Gunthorpe 	while (rc <= 0) {
6770f3e72b5SJason Gunthorpe 		if (device->ops->request)
6780f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
6790f3e72b5SJason Gunthorpe 
6800f3e72b5SJason Gunthorpe 		if (interrupted) {
6810f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
6820f3e72b5SJason Gunthorpe 							 HZ * 10);
6830f3e72b5SJason Gunthorpe 		} else {
6840f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
6850f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
6860f3e72b5SJason Gunthorpe 			if (rc < 0) {
6870f3e72b5SJason Gunthorpe 				interrupted = true;
6880f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
6890f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
6900f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
6910f3e72b5SJason Gunthorpe 					 "blocked until device is released",
6920f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
6930f3e72b5SJason Gunthorpe 			}
6940f3e72b5SJason Gunthorpe 		}
6950f3e72b5SJason Gunthorpe 	}
6960f3e72b5SJason Gunthorpe 
6970f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6980f3e72b5SJason Gunthorpe 	list_del(&device->group_next);
6990f3e72b5SJason Gunthorpe 	group->dev_counter--;
7000f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7010f3e72b5SJason Gunthorpe 
7020f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
7030f3e72b5SJason Gunthorpe 		iommu_group_remove_device(device->dev);
7040f3e72b5SJason Gunthorpe 
7050f3e72b5SJason Gunthorpe 	/* Matches the get in vfio_register_group_dev() */
7060f3e72b5SJason Gunthorpe 	vfio_group_put(group);
7070f3e72b5SJason Gunthorpe }
7080f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
7090f3e72b5SJason Gunthorpe 
7100f3e72b5SJason Gunthorpe /*
7110f3e72b5SJason Gunthorpe  * VFIO base fd, /dev/vfio/vfio
7120f3e72b5SJason Gunthorpe  */
7130f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container,
7140f3e72b5SJason Gunthorpe 				       unsigned long arg)
7150f3e72b5SJason Gunthorpe {
7160f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
7170f3e72b5SJason Gunthorpe 	long ret = 0;
7180f3e72b5SJason Gunthorpe 
7190f3e72b5SJason Gunthorpe 	down_read(&container->group_lock);
7200f3e72b5SJason Gunthorpe 
7210f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
7220f3e72b5SJason Gunthorpe 
7230f3e72b5SJason Gunthorpe 	switch (arg) {
7240f3e72b5SJason Gunthorpe 		/* No base extensions yet */
7250f3e72b5SJason Gunthorpe 	default:
7260f3e72b5SJason Gunthorpe 		/*
7270f3e72b5SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
7280f3e72b5SJason Gunthorpe 		 * extensions and return the first positive result.  If
7290f3e72b5SJason Gunthorpe 		 * a driver is already set, further queries will be passed
7300f3e72b5SJason Gunthorpe 		 * only to that driver.
7310f3e72b5SJason Gunthorpe 		 */
7320f3e72b5SJason Gunthorpe 		if (!driver) {
7330f3e72b5SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
7340f3e72b5SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
7350f3e72b5SJason Gunthorpe 					    vfio_next) {
7360f3e72b5SJason Gunthorpe 
7370f3e72b5SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
7380f3e72b5SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
7390f3e72b5SJason Gunthorpe 							       driver))
7400f3e72b5SJason Gunthorpe 					continue;
7410f3e72b5SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
7420f3e72b5SJason Gunthorpe 					continue;
7430f3e72b5SJason Gunthorpe 
7440f3e72b5SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
7450f3e72b5SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
7460f3e72b5SJason Gunthorpe 							 arg);
7470f3e72b5SJason Gunthorpe 				module_put(driver->ops->owner);
7480f3e72b5SJason Gunthorpe 				if (ret > 0)
7490f3e72b5SJason Gunthorpe 					break;
7500f3e72b5SJason Gunthorpe 			}
7510f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
7520f3e72b5SJason Gunthorpe 		} else
7530f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
7540f3e72b5SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
7550f3e72b5SJason Gunthorpe 	}
7560f3e72b5SJason Gunthorpe 
7570f3e72b5SJason Gunthorpe 	up_read(&container->group_lock);
7580f3e72b5SJason Gunthorpe 
7590f3e72b5SJason Gunthorpe 	return ret;
7600f3e72b5SJason Gunthorpe }
7610f3e72b5SJason Gunthorpe 
7620f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */
7630f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
7640f3e72b5SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
7650f3e72b5SJason Gunthorpe 					  void *data)
7660f3e72b5SJason Gunthorpe {
7670f3e72b5SJason Gunthorpe 	struct vfio_group *group;
7680f3e72b5SJason Gunthorpe 	int ret = -ENODEV;
7690f3e72b5SJason Gunthorpe 
7700f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
7710f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
7720f3e72b5SJason Gunthorpe 						group->type);
7730f3e72b5SJason Gunthorpe 		if (ret)
7740f3e72b5SJason Gunthorpe 			goto unwind;
7750f3e72b5SJason Gunthorpe 	}
7760f3e72b5SJason Gunthorpe 
7770f3e72b5SJason Gunthorpe 	return ret;
7780f3e72b5SJason Gunthorpe 
7790f3e72b5SJason Gunthorpe unwind:
7800f3e72b5SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
7810f3e72b5SJason Gunthorpe 					     container_next) {
7820f3e72b5SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
7830f3e72b5SJason Gunthorpe 	}
7840f3e72b5SJason Gunthorpe 
7850f3e72b5SJason Gunthorpe 	return ret;
7860f3e72b5SJason Gunthorpe }
7870f3e72b5SJason Gunthorpe 
7880f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
7890f3e72b5SJason Gunthorpe 				 unsigned long arg)
7900f3e72b5SJason Gunthorpe {
7910f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
7920f3e72b5SJason Gunthorpe 	long ret = -ENODEV;
7930f3e72b5SJason Gunthorpe 
7940f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
7950f3e72b5SJason Gunthorpe 
7960f3e72b5SJason Gunthorpe 	/*
7970f3e72b5SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
7980f3e72b5SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
7990f3e72b5SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
8000f3e72b5SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
8010f3e72b5SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
8020f3e72b5SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
8030f3e72b5SJason Gunthorpe 	 */
8040f3e72b5SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
8050f3e72b5SJason Gunthorpe 		up_write(&container->group_lock);
8060f3e72b5SJason Gunthorpe 		return -EINVAL;
8070f3e72b5SJason Gunthorpe 	}
8080f3e72b5SJason Gunthorpe 
8090f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
8100f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
8110f3e72b5SJason Gunthorpe 		void *data;
8120f3e72b5SJason Gunthorpe 
8130f3e72b5SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
8140f3e72b5SJason Gunthorpe 			continue;
8150f3e72b5SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
8160f3e72b5SJason Gunthorpe 			continue;
8170f3e72b5SJason Gunthorpe 
8180f3e72b5SJason Gunthorpe 		/*
8190f3e72b5SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
8200f3e72b5SJason Gunthorpe 		 * so test which iommu driver reported support for this
8210f3e72b5SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
8220f3e72b5SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
8230f3e72b5SJason Gunthorpe 		 * interfaces if they'd like.
8240f3e72b5SJason Gunthorpe 		 */
8250f3e72b5SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
8260f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8270f3e72b5SJason Gunthorpe 			continue;
8280f3e72b5SJason Gunthorpe 		}
8290f3e72b5SJason Gunthorpe 
8300f3e72b5SJason Gunthorpe 		data = driver->ops->open(arg);
8310f3e72b5SJason Gunthorpe 		if (IS_ERR(data)) {
8320f3e72b5SJason Gunthorpe 			ret = PTR_ERR(data);
8330f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8340f3e72b5SJason Gunthorpe 			continue;
8350f3e72b5SJason Gunthorpe 		}
8360f3e72b5SJason Gunthorpe 
8370f3e72b5SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
8380f3e72b5SJason Gunthorpe 		if (ret) {
8390f3e72b5SJason Gunthorpe 			driver->ops->release(data);
8400f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8410f3e72b5SJason Gunthorpe 			continue;
8420f3e72b5SJason Gunthorpe 		}
8430f3e72b5SJason Gunthorpe 
8440f3e72b5SJason Gunthorpe 		container->iommu_driver = driver;
8450f3e72b5SJason Gunthorpe 		container->iommu_data = data;
8460f3e72b5SJason Gunthorpe 		break;
8470f3e72b5SJason Gunthorpe 	}
8480f3e72b5SJason Gunthorpe 
8490f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
8500f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
8510f3e72b5SJason Gunthorpe 
8520f3e72b5SJason Gunthorpe 	return ret;
8530f3e72b5SJason Gunthorpe }
8540f3e72b5SJason Gunthorpe 
8550f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
8560f3e72b5SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
8570f3e72b5SJason Gunthorpe {
8580f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
8590f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
8600f3e72b5SJason Gunthorpe 	void *data;
8610f3e72b5SJason Gunthorpe 	long ret = -EINVAL;
8620f3e72b5SJason Gunthorpe 
8630f3e72b5SJason Gunthorpe 	if (!container)
8640f3e72b5SJason Gunthorpe 		return ret;
8650f3e72b5SJason Gunthorpe 
8660f3e72b5SJason Gunthorpe 	switch (cmd) {
8670f3e72b5SJason Gunthorpe 	case VFIO_GET_API_VERSION:
8680f3e72b5SJason Gunthorpe 		ret = VFIO_API_VERSION;
8690f3e72b5SJason Gunthorpe 		break;
8700f3e72b5SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
8710f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(container, arg);
8720f3e72b5SJason Gunthorpe 		break;
8730f3e72b5SJason Gunthorpe 	case VFIO_SET_IOMMU:
8740f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
8750f3e72b5SJason Gunthorpe 		break;
8760f3e72b5SJason Gunthorpe 	default:
8770f3e72b5SJason Gunthorpe 		driver = container->iommu_driver;
8780f3e72b5SJason Gunthorpe 		data = container->iommu_data;
8790f3e72b5SJason Gunthorpe 
8800f3e72b5SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
8810f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
8820f3e72b5SJason Gunthorpe 	}
8830f3e72b5SJason Gunthorpe 
8840f3e72b5SJason Gunthorpe 	return ret;
8850f3e72b5SJason Gunthorpe }
8860f3e72b5SJason Gunthorpe 
8870f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
8880f3e72b5SJason Gunthorpe {
8890f3e72b5SJason Gunthorpe 	struct vfio_container *container;
8900f3e72b5SJason Gunthorpe 
8910f3e72b5SJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL);
8920f3e72b5SJason Gunthorpe 	if (!container)
8930f3e72b5SJason Gunthorpe 		return -ENOMEM;
8940f3e72b5SJason Gunthorpe 
8950f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
8960f3e72b5SJason Gunthorpe 	init_rwsem(&container->group_lock);
8970f3e72b5SJason Gunthorpe 	kref_init(&container->kref);
8980f3e72b5SJason Gunthorpe 
8990f3e72b5SJason Gunthorpe 	filep->private_data = container;
9000f3e72b5SJason Gunthorpe 
9010f3e72b5SJason Gunthorpe 	return 0;
9020f3e72b5SJason Gunthorpe }
9030f3e72b5SJason Gunthorpe 
9040f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
9050f3e72b5SJason Gunthorpe {
9060f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
9070f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
9080f3e72b5SJason Gunthorpe 
9090f3e72b5SJason Gunthorpe 	if (driver && driver->ops->notify)
9100f3e72b5SJason Gunthorpe 		driver->ops->notify(container->iommu_data,
9110f3e72b5SJason Gunthorpe 				    VFIO_IOMMU_CONTAINER_CLOSE);
9120f3e72b5SJason Gunthorpe 
9130f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
9140f3e72b5SJason Gunthorpe 
9150f3e72b5SJason Gunthorpe 	vfio_container_put(container);
9160f3e72b5SJason Gunthorpe 
9170f3e72b5SJason Gunthorpe 	return 0;
9180f3e72b5SJason Gunthorpe }
9190f3e72b5SJason Gunthorpe 
9200f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = {
9210f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
9220f3e72b5SJason Gunthorpe 	.open		= vfio_fops_open,
9230f3e72b5SJason Gunthorpe 	.release	= vfio_fops_release,
9240f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
9250f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
9260f3e72b5SJason Gunthorpe };
9270f3e72b5SJason Gunthorpe 
9280f3e72b5SJason Gunthorpe /*
9290f3e72b5SJason Gunthorpe  * VFIO Group fd, /dev/vfio/$GROUP
9300f3e72b5SJason Gunthorpe  */
9310f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group)
9320f3e72b5SJason Gunthorpe {
9330f3e72b5SJason Gunthorpe 	struct vfio_container *container = group->container;
9340f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9350f3e72b5SJason Gunthorpe 
9360f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
9370f3e72b5SJason Gunthorpe 
9380f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
9390f3e72b5SJason Gunthorpe 
9400f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
9410f3e72b5SJason Gunthorpe 	if (driver)
9420f3e72b5SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
9430f3e72b5SJason Gunthorpe 					  group->iommu_group);
9440f3e72b5SJason Gunthorpe 
9450f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
9460f3e72b5SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
9470f3e72b5SJason Gunthorpe 
9480f3e72b5SJason Gunthorpe 	group->container = NULL;
9490f3e72b5SJason Gunthorpe 	group->container_users = 0;
9500f3e72b5SJason Gunthorpe 	list_del(&group->container_next);
9510f3e72b5SJason Gunthorpe 
9520f3e72b5SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
9530f3e72b5SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
9540f3e72b5SJason Gunthorpe 		driver->ops->release(container->iommu_data);
9550f3e72b5SJason Gunthorpe 		module_put(driver->ops->owner);
9560f3e72b5SJason Gunthorpe 		container->iommu_driver = NULL;
9570f3e72b5SJason Gunthorpe 		container->iommu_data = NULL;
9580f3e72b5SJason Gunthorpe 	}
9590f3e72b5SJason Gunthorpe 
9600f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
9610f3e72b5SJason Gunthorpe 
9620f3e72b5SJason Gunthorpe 	vfio_container_put(container);
9630f3e72b5SJason Gunthorpe }
9640f3e72b5SJason Gunthorpe 
9650f3e72b5SJason Gunthorpe /*
9660f3e72b5SJason Gunthorpe  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
9670f3e72b5SJason Gunthorpe  * if there was no container to unset.  Since the ioctl is called on
9680f3e72b5SJason Gunthorpe  * the group, we know that still exists, therefore the only valid
9690f3e72b5SJason Gunthorpe  * transition here is 1->0.
9700f3e72b5SJason Gunthorpe  */
9710f3e72b5SJason Gunthorpe static int vfio_group_unset_container(struct vfio_group *group)
9720f3e72b5SJason Gunthorpe {
9730f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
9740f3e72b5SJason Gunthorpe 
9750f3e72b5SJason Gunthorpe 	if (!group->container)
9760f3e72b5SJason Gunthorpe 		return -EINVAL;
9770f3e72b5SJason Gunthorpe 	if (group->container_users != 1)
9780f3e72b5SJason Gunthorpe 		return -EBUSY;
9790f3e72b5SJason Gunthorpe 	__vfio_group_unset_container(group);
9800f3e72b5SJason Gunthorpe 	return 0;
9810f3e72b5SJason Gunthorpe }
9820f3e72b5SJason Gunthorpe 
983*67671f15SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group,
984*67671f15SJason Gunthorpe 					  int __user *arg)
9850f3e72b5SJason Gunthorpe {
9860f3e72b5SJason Gunthorpe 	struct fd f;
9870f3e72b5SJason Gunthorpe 	struct vfio_container *container;
9880f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
989*67671f15SJason Gunthorpe 	int container_fd;
9900f3e72b5SJason Gunthorpe 	int ret = 0;
9910f3e72b5SJason Gunthorpe 
9920f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
9930f3e72b5SJason Gunthorpe 		return -EPERM;
9940f3e72b5SJason Gunthorpe 
995*67671f15SJason Gunthorpe 	if (get_user(container_fd, arg))
996*67671f15SJason Gunthorpe 		return -EFAULT;
997*67671f15SJason Gunthorpe 	if (container_fd < 0)
998*67671f15SJason Gunthorpe 		return -EINVAL;
9990f3e72b5SJason Gunthorpe 	f = fdget(container_fd);
10000f3e72b5SJason Gunthorpe 	if (!f.file)
10010f3e72b5SJason Gunthorpe 		return -EBADF;
10020f3e72b5SJason Gunthorpe 
10030f3e72b5SJason Gunthorpe 	/* Sanity check, is this really our fd? */
10040f3e72b5SJason Gunthorpe 	if (f.file->f_op != &vfio_fops) {
1005*67671f15SJason Gunthorpe 		ret = -EINVAL;
1006*67671f15SJason Gunthorpe 		goto out_fdput;
10070f3e72b5SJason Gunthorpe 	}
10080f3e72b5SJason Gunthorpe 	container = f.file->private_data;
10090f3e72b5SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
10100f3e72b5SJason Gunthorpe 
1011*67671f15SJason Gunthorpe 	down_write(&group->group_rwsem);
1012*67671f15SJason Gunthorpe 
1013*67671f15SJason Gunthorpe 	if (group->container || WARN_ON(group->container_users)) {
1014*67671f15SJason Gunthorpe 		ret = -EINVAL;
1015*67671f15SJason Gunthorpe 		goto out_unlock_group;
1016*67671f15SJason Gunthorpe 	}
1017*67671f15SJason Gunthorpe 
10180f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
10190f3e72b5SJason Gunthorpe 
10200f3e72b5SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
10210f3e72b5SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
10220f3e72b5SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
10230f3e72b5SJason Gunthorpe 		ret = -EPERM;
1024*67671f15SJason Gunthorpe 		goto out_unlock_container;
10250f3e72b5SJason Gunthorpe 	}
10260f3e72b5SJason Gunthorpe 
10270f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
10280f3e72b5SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
10290f3e72b5SJason Gunthorpe 		if (ret)
1030*67671f15SJason Gunthorpe 			goto out_unlock_container;
10310f3e72b5SJason Gunthorpe 	}
10320f3e72b5SJason Gunthorpe 
10330f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
10340f3e72b5SJason Gunthorpe 	if (driver) {
10350f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
10360f3e72b5SJason Gunthorpe 						group->iommu_group,
10370f3e72b5SJason Gunthorpe 						group->type);
10380f3e72b5SJason Gunthorpe 		if (ret) {
10390f3e72b5SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
10400f3e72b5SJason Gunthorpe 				iommu_group_release_dma_owner(
10410f3e72b5SJason Gunthorpe 					group->iommu_group);
1042*67671f15SJason Gunthorpe 			goto out_unlock_container;
10430f3e72b5SJason Gunthorpe 		}
10440f3e72b5SJason Gunthorpe 	}
10450f3e72b5SJason Gunthorpe 
10460f3e72b5SJason Gunthorpe 	group->container = container;
10470f3e72b5SJason Gunthorpe 	group->container_users = 1;
10480f3e72b5SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
10490f3e72b5SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
10500f3e72b5SJason Gunthorpe 
10510f3e72b5SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
10520f3e72b5SJason Gunthorpe 	vfio_container_get(container);
10530f3e72b5SJason Gunthorpe 
1054*67671f15SJason Gunthorpe out_unlock_container:
10550f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
1056*67671f15SJason Gunthorpe out_unlock_group:
1057*67671f15SJason Gunthorpe 	up_write(&group->group_rwsem);
1058*67671f15SJason Gunthorpe out_fdput:
10590f3e72b5SJason Gunthorpe 	fdput(f);
10600f3e72b5SJason Gunthorpe 	return ret;
10610f3e72b5SJason Gunthorpe }
10620f3e72b5SJason Gunthorpe 
10630f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops;
10640f3e72b5SJason Gunthorpe 
10650f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
10660f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
10670f3e72b5SJason Gunthorpe {
10680f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
10690f3e72b5SJason Gunthorpe }
10700f3e72b5SJason Gunthorpe 
10710f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device)
10720f3e72b5SJason Gunthorpe {
10730f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
10740f3e72b5SJason Gunthorpe 
10750f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
10760f3e72b5SJason Gunthorpe 
10770f3e72b5SJason Gunthorpe 	if (!group->container || !group->container->iommu_driver ||
10780f3e72b5SJason Gunthorpe 	    WARN_ON(!group->container_users))
10790f3e72b5SJason Gunthorpe 		return -EINVAL;
10800f3e72b5SJason Gunthorpe 
10810f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
10820f3e72b5SJason Gunthorpe 		return -EPERM;
10830f3e72b5SJason Gunthorpe 
10840f3e72b5SJason Gunthorpe 	get_file(group->opened_file);
10850f3e72b5SJason Gunthorpe 	group->container_users++;
10860f3e72b5SJason Gunthorpe 	return 0;
10870f3e72b5SJason Gunthorpe }
10880f3e72b5SJason Gunthorpe 
10890f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device)
10900f3e72b5SJason Gunthorpe {
10910f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
10920f3e72b5SJason Gunthorpe 	WARN_ON(device->group->container_users <= 1);
10930f3e72b5SJason Gunthorpe 	device->group->container_users--;
10940f3e72b5SJason Gunthorpe 	fput(device->group->opened_file);
10950f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
10960f3e72b5SJason Gunthorpe }
10970f3e72b5SJason Gunthorpe 
10980f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device)
10990f3e72b5SJason Gunthorpe {
11000f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
11010f3e72b5SJason Gunthorpe 	struct file *filep;
11020f3e72b5SJason Gunthorpe 	int ret;
11030f3e72b5SJason Gunthorpe 
11040f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
11050f3e72b5SJason Gunthorpe 	ret = vfio_device_assign_container(device);
11060f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
11070f3e72b5SJason Gunthorpe 	if (ret)
11080f3e72b5SJason Gunthorpe 		return ERR_PTR(ret);
11090f3e72b5SJason Gunthorpe 
11100f3e72b5SJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner)) {
11110f3e72b5SJason Gunthorpe 		ret = -ENODEV;
11120f3e72b5SJason Gunthorpe 		goto err_unassign_container;
11130f3e72b5SJason Gunthorpe 	}
11140f3e72b5SJason Gunthorpe 
11150f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
11160f3e72b5SJason Gunthorpe 	device->open_count++;
11170f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
11180f3e72b5SJason Gunthorpe 		/*
11190f3e72b5SJason Gunthorpe 		 * Here we pass the KVM pointer with the group under the read
11200f3e72b5SJason Gunthorpe 		 * lock.  If the device driver will use it, it must obtain a
11210f3e72b5SJason Gunthorpe 		 * reference and release it during close_device.
11220f3e72b5SJason Gunthorpe 		 */
11230f3e72b5SJason Gunthorpe 		down_read(&device->group->group_rwsem);
11240f3e72b5SJason Gunthorpe 		device->kvm = device->group->kvm;
11250f3e72b5SJason Gunthorpe 
11260f3e72b5SJason Gunthorpe 		if (device->ops->open_device) {
11270f3e72b5SJason Gunthorpe 			ret = device->ops->open_device(device);
11280f3e72b5SJason Gunthorpe 			if (ret)
11290f3e72b5SJason Gunthorpe 				goto err_undo_count;
11300f3e72b5SJason Gunthorpe 		}
11310f3e72b5SJason Gunthorpe 
11320f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
11330f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->register_device)
11340f3e72b5SJason Gunthorpe 			iommu_driver->ops->register_device(
11350f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
11360f3e72b5SJason Gunthorpe 
11370f3e72b5SJason Gunthorpe 		up_read(&device->group->group_rwsem);
11380f3e72b5SJason Gunthorpe 	}
11390f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
11400f3e72b5SJason Gunthorpe 
11410f3e72b5SJason Gunthorpe 	/*
11420f3e72b5SJason Gunthorpe 	 * We can't use anon_inode_getfd() because we need to modify
11430f3e72b5SJason Gunthorpe 	 * the f_mode flags directly to allow more than just ioctls
11440f3e72b5SJason Gunthorpe 	 */
11450f3e72b5SJason Gunthorpe 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
11460f3e72b5SJason Gunthorpe 				   device, O_RDWR);
11470f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
11480f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
11490f3e72b5SJason Gunthorpe 		goto err_close_device;
11500f3e72b5SJason Gunthorpe 	}
11510f3e72b5SJason Gunthorpe 
11520f3e72b5SJason Gunthorpe 	/*
11530f3e72b5SJason Gunthorpe 	 * TODO: add an anon_inode interface to do this.
11540f3e72b5SJason Gunthorpe 	 * Appears to be missing by lack of need rather than
11550f3e72b5SJason Gunthorpe 	 * explicitly prevented.  Now there's need.
11560f3e72b5SJason Gunthorpe 	 */
11570f3e72b5SJason Gunthorpe 	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
11580f3e72b5SJason Gunthorpe 
11590f3e72b5SJason Gunthorpe 	if (device->group->type == VFIO_NO_IOMMU)
11600f3e72b5SJason Gunthorpe 		dev_warn(device->dev, "vfio-noiommu device opened by user "
11610f3e72b5SJason Gunthorpe 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
11620f3e72b5SJason Gunthorpe 	/*
11630f3e72b5SJason Gunthorpe 	 * On success the ref of device is moved to the file and
11640f3e72b5SJason Gunthorpe 	 * put in vfio_device_fops_release()
11650f3e72b5SJason Gunthorpe 	 */
11660f3e72b5SJason Gunthorpe 	return filep;
11670f3e72b5SJason Gunthorpe 
11680f3e72b5SJason Gunthorpe err_close_device:
11690f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
11700f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
11710f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device) {
11720f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
11730f3e72b5SJason Gunthorpe 
11740f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
11750f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->unregister_device)
11760f3e72b5SJason Gunthorpe 			iommu_driver->ops->unregister_device(
11770f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
11780f3e72b5SJason Gunthorpe 	}
11790f3e72b5SJason Gunthorpe err_undo_count:
11800f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
11810f3e72b5SJason Gunthorpe 	device->open_count--;
11820f3e72b5SJason Gunthorpe 	if (device->open_count == 0 && device->kvm)
11830f3e72b5SJason Gunthorpe 		device->kvm = NULL;
11840f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
11850f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
11860f3e72b5SJason Gunthorpe err_unassign_container:
11870f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
11880f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
11890f3e72b5SJason Gunthorpe }
11900f3e72b5SJason Gunthorpe 
1191150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
1192150ee2f9SJason Gunthorpe 					  char __user *arg)
11930f3e72b5SJason Gunthorpe {
11940f3e72b5SJason Gunthorpe 	struct vfio_device *device;
11950f3e72b5SJason Gunthorpe 	struct file *filep;
1196150ee2f9SJason Gunthorpe 	char *buf;
11970f3e72b5SJason Gunthorpe 	int fdno;
11980f3e72b5SJason Gunthorpe 	int ret;
11990f3e72b5SJason Gunthorpe 
1200150ee2f9SJason Gunthorpe 	buf = strndup_user(arg, PAGE_SIZE);
1201150ee2f9SJason Gunthorpe 	if (IS_ERR(buf))
1202150ee2f9SJason Gunthorpe 		return PTR_ERR(buf);
1203150ee2f9SJason Gunthorpe 
12040f3e72b5SJason Gunthorpe 	device = vfio_device_get_from_name(group, buf);
1205150ee2f9SJason Gunthorpe 	kfree(buf);
12060f3e72b5SJason Gunthorpe 	if (IS_ERR(device))
12070f3e72b5SJason Gunthorpe 		return PTR_ERR(device);
12080f3e72b5SJason Gunthorpe 
12090f3e72b5SJason Gunthorpe 	fdno = get_unused_fd_flags(O_CLOEXEC);
12100f3e72b5SJason Gunthorpe 	if (fdno < 0) {
12110f3e72b5SJason Gunthorpe 		ret = fdno;
12120f3e72b5SJason Gunthorpe 		goto err_put_device;
12130f3e72b5SJason Gunthorpe 	}
12140f3e72b5SJason Gunthorpe 
12150f3e72b5SJason Gunthorpe 	filep = vfio_device_open(device);
12160f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
12170f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
12180f3e72b5SJason Gunthorpe 		goto err_put_fdno;
12190f3e72b5SJason Gunthorpe 	}
12200f3e72b5SJason Gunthorpe 
12210f3e72b5SJason Gunthorpe 	fd_install(fdno, filep);
12220f3e72b5SJason Gunthorpe 	return fdno;
12230f3e72b5SJason Gunthorpe 
12240f3e72b5SJason Gunthorpe err_put_fdno:
12250f3e72b5SJason Gunthorpe 	put_unused_fd(fdno);
12260f3e72b5SJason Gunthorpe err_put_device:
12270f3e72b5SJason Gunthorpe 	vfio_device_put(device);
12280f3e72b5SJason Gunthorpe 	return ret;
12290f3e72b5SJason Gunthorpe }
12300f3e72b5SJason Gunthorpe 
12310f3e72b5SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep,
12320f3e72b5SJason Gunthorpe 				      unsigned int cmd, unsigned long arg)
12330f3e72b5SJason Gunthorpe {
12340f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
1235150ee2f9SJason Gunthorpe 	void __user *uarg = (void __user *)arg;
12360f3e72b5SJason Gunthorpe 	long ret = -ENOTTY;
12370f3e72b5SJason Gunthorpe 
12380f3e72b5SJason Gunthorpe 	switch (cmd) {
1239150ee2f9SJason Gunthorpe 	case VFIO_GROUP_GET_DEVICE_FD:
1240150ee2f9SJason Gunthorpe 		return vfio_group_ioctl_get_device_fd(group, uarg);
12410f3e72b5SJason Gunthorpe 	case VFIO_GROUP_GET_STATUS:
12420f3e72b5SJason Gunthorpe 	{
12430f3e72b5SJason Gunthorpe 		struct vfio_group_status status;
12440f3e72b5SJason Gunthorpe 		unsigned long minsz;
12450f3e72b5SJason Gunthorpe 
12460f3e72b5SJason Gunthorpe 		minsz = offsetofend(struct vfio_group_status, flags);
12470f3e72b5SJason Gunthorpe 
12480f3e72b5SJason Gunthorpe 		if (copy_from_user(&status, (void __user *)arg, minsz))
12490f3e72b5SJason Gunthorpe 			return -EFAULT;
12500f3e72b5SJason Gunthorpe 
12510f3e72b5SJason Gunthorpe 		if (status.argsz < minsz)
12520f3e72b5SJason Gunthorpe 			return -EINVAL;
12530f3e72b5SJason Gunthorpe 
12540f3e72b5SJason Gunthorpe 		status.flags = 0;
12550f3e72b5SJason Gunthorpe 
12560f3e72b5SJason Gunthorpe 		down_read(&group->group_rwsem);
12570f3e72b5SJason Gunthorpe 		if (group->container)
12580f3e72b5SJason Gunthorpe 			status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
12590f3e72b5SJason Gunthorpe 					VFIO_GROUP_FLAGS_VIABLE;
12600f3e72b5SJason Gunthorpe 		else if (!iommu_group_dma_owner_claimed(group->iommu_group))
12610f3e72b5SJason Gunthorpe 			status.flags |= VFIO_GROUP_FLAGS_VIABLE;
12620f3e72b5SJason Gunthorpe 		up_read(&group->group_rwsem);
12630f3e72b5SJason Gunthorpe 
12640f3e72b5SJason Gunthorpe 		if (copy_to_user((void __user *)arg, &status, minsz))
12650f3e72b5SJason Gunthorpe 			return -EFAULT;
12660f3e72b5SJason Gunthorpe 
12670f3e72b5SJason Gunthorpe 		ret = 0;
12680f3e72b5SJason Gunthorpe 		break;
12690f3e72b5SJason Gunthorpe 	}
12700f3e72b5SJason Gunthorpe 	case VFIO_GROUP_SET_CONTAINER:
1271*67671f15SJason Gunthorpe 		return vfio_group_ioctl_set_container(group, uarg);
12720f3e72b5SJason Gunthorpe 	case VFIO_GROUP_UNSET_CONTAINER:
12730f3e72b5SJason Gunthorpe 		down_write(&group->group_rwsem);
12740f3e72b5SJason Gunthorpe 		ret = vfio_group_unset_container(group);
12750f3e72b5SJason Gunthorpe 		up_write(&group->group_rwsem);
12760f3e72b5SJason Gunthorpe 		break;
12770f3e72b5SJason Gunthorpe 	}
12780f3e72b5SJason Gunthorpe 
12790f3e72b5SJason Gunthorpe 	return ret;
12800f3e72b5SJason Gunthorpe }
12810f3e72b5SJason Gunthorpe 
12820f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep)
12830f3e72b5SJason Gunthorpe {
12840f3e72b5SJason Gunthorpe 	struct vfio_group *group =
12850f3e72b5SJason Gunthorpe 		container_of(inode->i_cdev, struct vfio_group, cdev);
12860f3e72b5SJason Gunthorpe 	int ret;
12870f3e72b5SJason Gunthorpe 
12880f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
12890f3e72b5SJason Gunthorpe 
12900f3e72b5SJason Gunthorpe 	/* users can be zero if this races with vfio_group_put() */
12910f3e72b5SJason Gunthorpe 	if (!refcount_inc_not_zero(&group->users)) {
12920f3e72b5SJason Gunthorpe 		ret = -ENODEV;
12930f3e72b5SJason Gunthorpe 		goto err_unlock;
12940f3e72b5SJason Gunthorpe 	}
12950f3e72b5SJason Gunthorpe 
12960f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
12970f3e72b5SJason Gunthorpe 		ret = -EPERM;
12980f3e72b5SJason Gunthorpe 		goto err_put;
12990f3e72b5SJason Gunthorpe 	}
13000f3e72b5SJason Gunthorpe 
13010f3e72b5SJason Gunthorpe 	/*
13020f3e72b5SJason Gunthorpe 	 * Do we need multiple instances of the group open?  Seems not.
13030f3e72b5SJason Gunthorpe 	 */
13040f3e72b5SJason Gunthorpe 	if (group->opened_file) {
13050f3e72b5SJason Gunthorpe 		ret = -EBUSY;
13060f3e72b5SJason Gunthorpe 		goto err_put;
13070f3e72b5SJason Gunthorpe 	}
13080f3e72b5SJason Gunthorpe 	group->opened_file = filep;
13090f3e72b5SJason Gunthorpe 	filep->private_data = group;
13100f3e72b5SJason Gunthorpe 
13110f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13120f3e72b5SJason Gunthorpe 	return 0;
13130f3e72b5SJason Gunthorpe err_put:
13140f3e72b5SJason Gunthorpe 	vfio_group_put(group);
13150f3e72b5SJason Gunthorpe err_unlock:
13160f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13170f3e72b5SJason Gunthorpe 	return ret;
13180f3e72b5SJason Gunthorpe }
13190f3e72b5SJason Gunthorpe 
13200f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep)
13210f3e72b5SJason Gunthorpe {
13220f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
13230f3e72b5SJason Gunthorpe 
13240f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
13250f3e72b5SJason Gunthorpe 
13260f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
13270f3e72b5SJason Gunthorpe 	/*
13280f3e72b5SJason Gunthorpe 	 * Device FDs hold a group file reference, therefore the group release
13290f3e72b5SJason Gunthorpe 	 * is only called when there are no open devices.
13300f3e72b5SJason Gunthorpe 	 */
13310f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
13320f3e72b5SJason Gunthorpe 	if (group->container) {
13330f3e72b5SJason Gunthorpe 		WARN_ON(group->container_users != 1);
13340f3e72b5SJason Gunthorpe 		__vfio_group_unset_container(group);
13350f3e72b5SJason Gunthorpe 	}
13360f3e72b5SJason Gunthorpe 	group->opened_file = NULL;
13370f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13380f3e72b5SJason Gunthorpe 
13390f3e72b5SJason Gunthorpe 	vfio_group_put(group);
13400f3e72b5SJason Gunthorpe 
13410f3e72b5SJason Gunthorpe 	return 0;
13420f3e72b5SJason Gunthorpe }
13430f3e72b5SJason Gunthorpe 
13440f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = {
13450f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
13460f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
13470f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
13480f3e72b5SJason Gunthorpe 	.open		= vfio_group_fops_open,
13490f3e72b5SJason Gunthorpe 	.release	= vfio_group_fops_release,
13500f3e72b5SJason Gunthorpe };
13510f3e72b5SJason Gunthorpe 
13520f3e72b5SJason Gunthorpe /*
13530f3e72b5SJason Gunthorpe  * VFIO Device fd
13540f3e72b5SJason Gunthorpe  */
13550f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
13560f3e72b5SJason Gunthorpe {
13570f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
13580f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
13590f3e72b5SJason Gunthorpe 
13600f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
13610f3e72b5SJason Gunthorpe 	vfio_assert_device_open(device);
13620f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
13630f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device)
13640f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
13650f3e72b5SJason Gunthorpe 
13660f3e72b5SJason Gunthorpe 	iommu_driver = device->group->container->iommu_driver;
13670f3e72b5SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
13680f3e72b5SJason Gunthorpe 		iommu_driver->ops->unregister_device(
13690f3e72b5SJason Gunthorpe 			device->group->container->iommu_data, device);
13700f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
13710f3e72b5SJason Gunthorpe 	device->open_count--;
13720f3e72b5SJason Gunthorpe 	if (device->open_count == 0)
13730f3e72b5SJason Gunthorpe 		device->kvm = NULL;
13740f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
13750f3e72b5SJason Gunthorpe 
13760f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
13770f3e72b5SJason Gunthorpe 
13780f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
13790f3e72b5SJason Gunthorpe 
13800f3e72b5SJason Gunthorpe 	vfio_device_put(device);
13810f3e72b5SJason Gunthorpe 
13820f3e72b5SJason Gunthorpe 	return 0;
13830f3e72b5SJason Gunthorpe }
13840f3e72b5SJason Gunthorpe 
13850f3e72b5SJason Gunthorpe /*
13860f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
13870f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
13880f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
13890f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
13900f3e72b5SJason Gunthorpe  *
13910f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
13920f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
13930f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
13940f3e72b5SJason Gunthorpe  *
13950f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
13960f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
13970f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
13980f3e72b5SJason Gunthorpe  *
13990f3e72b5SJason Gunthorpe  */
14000f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
14010f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
14020f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
14030f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
14040f3e72b5SJason Gunthorpe {
14050f3e72b5SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
14060f3e72b5SJason Gunthorpe 	/*
14070f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
14080f3e72b5SJason Gunthorpe 	 * following FSM arcs:
14090f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
14100f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
14110f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
14120f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
14130f3e72b5SJason Gunthorpe 	 *
14140f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
14150f3e72b5SJason Gunthorpe 	 * arcs:
14160f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
14170f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
14180f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
14190f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
14200f3e72b5SJason Gunthorpe 	 * Without P2P the driver must implement:
14210f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
14220f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
14230f3e72b5SJason Gunthorpe 	 *
14240f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
14250f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
14260f3e72b5SJason Gunthorpe 	 * following ones:
14270f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
14280f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
14290f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
14300f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
14310f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
14320f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
14330f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
14340f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
14350f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
14360f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
14370f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
14380f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
14390f3e72b5SJason Gunthorpe 	 */
14400f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
14410f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
14420f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14430f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
14440f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
14450f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
14460f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14470f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14480f3e72b5SJason Gunthorpe 		},
14490f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
14500f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
14510f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
14520f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
14530f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
14540f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14550f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14560f3e72b5SJason Gunthorpe 		},
14570f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
14580f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14590f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
14600f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
14610f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
14620f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
14630f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14640f3e72b5SJason Gunthorpe 		},
14650f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
14660f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14670f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
14680f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
14690f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
14700f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
14710f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14720f3e72b5SJason Gunthorpe 		},
14730f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
14740f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14750f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
14760f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
14770f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
14780f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14790f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14800f3e72b5SJason Gunthorpe 		},
14810f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
14820f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
14830f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
14840f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
14850f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
14860f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
14870f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14880f3e72b5SJason Gunthorpe 		},
14890f3e72b5SJason Gunthorpe 	};
14900f3e72b5SJason Gunthorpe 
14910f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
14920f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
14930f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
14940f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
14950f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
14960f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
14970f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
14980f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
14990f3e72b5SJason Gunthorpe 	};
15000f3e72b5SJason Gunthorpe 
15010f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
15020f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
15030f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
15040f3e72b5SJason Gunthorpe 		return -EINVAL;
15050f3e72b5SJason Gunthorpe 
15060f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
15070f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
15080f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
15090f3e72b5SJason Gunthorpe 		return -EINVAL;
15100f3e72b5SJason Gunthorpe 
15110f3e72b5SJason Gunthorpe 	/*
15120f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
15130f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
15140f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
15150f3e72b5SJason Gunthorpe 	 */
15160f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
15170f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
15180f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
15190f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
15200f3e72b5SJason Gunthorpe 
15210f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
15220f3e72b5SJason Gunthorpe }
15230f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
15240f3e72b5SJason Gunthorpe 
15250f3e72b5SJason Gunthorpe /*
15260f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
15270f3e72b5SJason Gunthorpe  */
15280f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
15290f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
15300f3e72b5SJason Gunthorpe {
15310f3e72b5SJason Gunthorpe 	int ret;
15320f3e72b5SJason Gunthorpe 	int fd;
15330f3e72b5SJason Gunthorpe 
15340f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
15350f3e72b5SJason Gunthorpe 	if (fd < 0) {
15360f3e72b5SJason Gunthorpe 		ret = fd;
15370f3e72b5SJason Gunthorpe 		goto out_fput;
15380f3e72b5SJason Gunthorpe 	}
15390f3e72b5SJason Gunthorpe 
15400f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
15410f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
15420f3e72b5SJason Gunthorpe 		ret = -EFAULT;
15430f3e72b5SJason Gunthorpe 		goto out_put_unused;
15440f3e72b5SJason Gunthorpe 	}
15450f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
15460f3e72b5SJason Gunthorpe 	return 0;
15470f3e72b5SJason Gunthorpe 
15480f3e72b5SJason Gunthorpe out_put_unused:
15490f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
15500f3e72b5SJason Gunthorpe out_fput:
15510f3e72b5SJason Gunthorpe 	fput(filp);
15520f3e72b5SJason Gunthorpe 	return ret;
15530f3e72b5SJason Gunthorpe }
15540f3e72b5SJason Gunthorpe 
15550f3e72b5SJason Gunthorpe static int
15560f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
15570f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
15580f3e72b5SJason Gunthorpe 					   size_t argsz)
15590f3e72b5SJason Gunthorpe {
15600f3e72b5SJason Gunthorpe 	size_t minsz =
15610f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
15620f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
15630f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
15640f3e72b5SJason Gunthorpe 	int ret;
15650f3e72b5SJason Gunthorpe 
15660f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
15670f3e72b5SJason Gunthorpe 		return -ENOTTY;
15680f3e72b5SJason Gunthorpe 
15690f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
15700f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
15710f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
15720f3e72b5SJason Gunthorpe 				 sizeof(mig));
15730f3e72b5SJason Gunthorpe 	if (ret != 1)
15740f3e72b5SJason Gunthorpe 		return ret;
15750f3e72b5SJason Gunthorpe 
15760f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
15770f3e72b5SJason Gunthorpe 		return -EFAULT;
15780f3e72b5SJason Gunthorpe 
15790f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
15800f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
15810f3e72b5SJason Gunthorpe 
15820f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
15830f3e72b5SJason Gunthorpe 							   &curr_state);
15840f3e72b5SJason Gunthorpe 		if (ret)
15850f3e72b5SJason Gunthorpe 			return ret;
15860f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
15870f3e72b5SJason Gunthorpe 		goto out_copy;
15880f3e72b5SJason Gunthorpe 	}
15890f3e72b5SJason Gunthorpe 
15900f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
15910f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
15920f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
15930f3e72b5SJason Gunthorpe 		goto out_copy;
15940f3e72b5SJason Gunthorpe 
15950f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
15960f3e72b5SJason Gunthorpe out_copy:
15970f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
15980f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
15990f3e72b5SJason Gunthorpe 		return -EFAULT;
16000f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
16010f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
16020f3e72b5SJason Gunthorpe 	return 0;
16030f3e72b5SJason Gunthorpe }
16040f3e72b5SJason Gunthorpe 
16050f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
16060f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
16070f3e72b5SJason Gunthorpe 					       size_t argsz)
16080f3e72b5SJason Gunthorpe {
16090f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
16100f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
16110f3e72b5SJason Gunthorpe 	};
16120f3e72b5SJason Gunthorpe 	int ret;
16130f3e72b5SJason Gunthorpe 
16140f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
16150f3e72b5SJason Gunthorpe 		return -ENOTTY;
16160f3e72b5SJason Gunthorpe 
16170f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
16180f3e72b5SJason Gunthorpe 				 sizeof(mig));
16190f3e72b5SJason Gunthorpe 	if (ret != 1)
16200f3e72b5SJason Gunthorpe 		return ret;
16210f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
16220f3e72b5SJason Gunthorpe 		return -EFAULT;
16230f3e72b5SJason Gunthorpe 	return 0;
16240f3e72b5SJason Gunthorpe }
16250f3e72b5SJason Gunthorpe 
16260f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
16270f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
16280f3e72b5SJason Gunthorpe {
16290f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
16300f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
16310f3e72b5SJason Gunthorpe 
16320f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
16330f3e72b5SJason Gunthorpe 		return -EFAULT;
16340f3e72b5SJason Gunthorpe 
16350f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
16360f3e72b5SJason Gunthorpe 		return -EINVAL;
16370f3e72b5SJason Gunthorpe 
16380f3e72b5SJason Gunthorpe 	/* Check unknown flags */
16390f3e72b5SJason Gunthorpe 	if (feature.flags &
16400f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
16410f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
16420f3e72b5SJason Gunthorpe 		return -EINVAL;
16430f3e72b5SJason Gunthorpe 
16440f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
16450f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
16460f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
16470f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
16480f3e72b5SJason Gunthorpe 		return -EINVAL;
16490f3e72b5SJason Gunthorpe 
16500f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
16510f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
16520f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
16530f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
16540f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
16550f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
16560f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
16570f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
16580f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
16590f3e72b5SJason Gunthorpe 	default:
16600f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
16610f3e72b5SJason Gunthorpe 			return -EINVAL;
16620f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
16630f3e72b5SJason Gunthorpe 						   arg->data,
16640f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
16650f3e72b5SJason Gunthorpe 	}
16660f3e72b5SJason Gunthorpe }
16670f3e72b5SJason Gunthorpe 
16680f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
16690f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
16700f3e72b5SJason Gunthorpe {
16710f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
16720f3e72b5SJason Gunthorpe 
16730f3e72b5SJason Gunthorpe 	switch (cmd) {
16740f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
16750f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature(device, (void __user *)arg);
16760f3e72b5SJason Gunthorpe 	default:
16770f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
16780f3e72b5SJason Gunthorpe 			return -EINVAL;
16790f3e72b5SJason Gunthorpe 		return device->ops->ioctl(device, cmd, arg);
16800f3e72b5SJason Gunthorpe 	}
16810f3e72b5SJason Gunthorpe }
16820f3e72b5SJason Gunthorpe 
16830f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
16840f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
16850f3e72b5SJason Gunthorpe {
16860f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
16870f3e72b5SJason Gunthorpe 
16880f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
16890f3e72b5SJason Gunthorpe 		return -EINVAL;
16900f3e72b5SJason Gunthorpe 
16910f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
16920f3e72b5SJason Gunthorpe }
16930f3e72b5SJason Gunthorpe 
16940f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
16950f3e72b5SJason Gunthorpe 				      const char __user *buf,
16960f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
16970f3e72b5SJason Gunthorpe {
16980f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
16990f3e72b5SJason Gunthorpe 
17000f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
17010f3e72b5SJason Gunthorpe 		return -EINVAL;
17020f3e72b5SJason Gunthorpe 
17030f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
17040f3e72b5SJason Gunthorpe }
17050f3e72b5SJason Gunthorpe 
17060f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
17070f3e72b5SJason Gunthorpe {
17080f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
17090f3e72b5SJason Gunthorpe 
17100f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
17110f3e72b5SJason Gunthorpe 		return -EINVAL;
17120f3e72b5SJason Gunthorpe 
17130f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
17140f3e72b5SJason Gunthorpe }
17150f3e72b5SJason Gunthorpe 
17160f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = {
17170f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
17180f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
17190f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
17200f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
17210f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
17220f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
17230f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
17240f3e72b5SJason Gunthorpe };
17250f3e72b5SJason Gunthorpe 
17260f3e72b5SJason Gunthorpe /**
17270f3e72b5SJason Gunthorpe  * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
17280f3e72b5SJason Gunthorpe  * @file: VFIO group file
17290f3e72b5SJason Gunthorpe  *
17300f3e72b5SJason Gunthorpe  * The returned iommu_group is valid as long as a ref is held on the file.
17310f3e72b5SJason Gunthorpe  */
17320f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file)
17330f3e72b5SJason Gunthorpe {
17340f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17350f3e72b5SJason Gunthorpe 
17360f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17370f3e72b5SJason Gunthorpe 		return NULL;
17380f3e72b5SJason Gunthorpe 	return group->iommu_group;
17390f3e72b5SJason Gunthorpe }
17400f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
17410f3e72b5SJason Gunthorpe 
17420f3e72b5SJason Gunthorpe /**
17430f3e72b5SJason Gunthorpe  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
17440f3e72b5SJason Gunthorpe  *        is always CPU cache coherent
17450f3e72b5SJason Gunthorpe  * @file: VFIO group file
17460f3e72b5SJason Gunthorpe  *
17470f3e72b5SJason Gunthorpe  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
17480f3e72b5SJason Gunthorpe  * bit in DMA transactions. A return of false indicates that the user has
17490f3e72b5SJason Gunthorpe  * rights to access additional instructions such as wbinvd on x86.
17500f3e72b5SJason Gunthorpe  */
17510f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file)
17520f3e72b5SJason Gunthorpe {
17530f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17540f3e72b5SJason Gunthorpe 	bool ret;
17550f3e72b5SJason Gunthorpe 
17560f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17570f3e72b5SJason Gunthorpe 		return true;
17580f3e72b5SJason Gunthorpe 
17590f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
17600f3e72b5SJason Gunthorpe 	if (group->container) {
17610f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(group->container,
17620f3e72b5SJason Gunthorpe 						 VFIO_DMA_CC_IOMMU);
17630f3e72b5SJason Gunthorpe 	} else {
17640f3e72b5SJason Gunthorpe 		/*
17650f3e72b5SJason Gunthorpe 		 * Since the coherency state is determined only once a container
17660f3e72b5SJason Gunthorpe 		 * is attached the user must do so before they can prove they
17670f3e72b5SJason Gunthorpe 		 * have permission.
17680f3e72b5SJason Gunthorpe 		 */
17690f3e72b5SJason Gunthorpe 		ret = true;
17700f3e72b5SJason Gunthorpe 	}
17710f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
17720f3e72b5SJason Gunthorpe 	return ret;
17730f3e72b5SJason Gunthorpe }
17740f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
17750f3e72b5SJason Gunthorpe 
17760f3e72b5SJason Gunthorpe /**
17770f3e72b5SJason Gunthorpe  * vfio_file_set_kvm - Link a kvm with VFIO drivers
17780f3e72b5SJason Gunthorpe  * @file: VFIO group file
17790f3e72b5SJason Gunthorpe  * @kvm: KVM to link
17800f3e72b5SJason Gunthorpe  *
17810f3e72b5SJason Gunthorpe  * When a VFIO device is first opened the KVM will be available in
17820f3e72b5SJason Gunthorpe  * device->kvm if one was associated with the group.
17830f3e72b5SJason Gunthorpe  */
17840f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
17850f3e72b5SJason Gunthorpe {
17860f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17870f3e72b5SJason Gunthorpe 
17880f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17890f3e72b5SJason Gunthorpe 		return;
17900f3e72b5SJason Gunthorpe 
17910f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
17920f3e72b5SJason Gunthorpe 	group->kvm = kvm;
17930f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
17940f3e72b5SJason Gunthorpe }
17950f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
17960f3e72b5SJason Gunthorpe 
17970f3e72b5SJason Gunthorpe /**
17980f3e72b5SJason Gunthorpe  * vfio_file_has_dev - True if the VFIO file is a handle for device
17990f3e72b5SJason Gunthorpe  * @file: VFIO file to check
18000f3e72b5SJason Gunthorpe  * @device: Device that must be part of the file
18010f3e72b5SJason Gunthorpe  *
18020f3e72b5SJason Gunthorpe  * Returns true if given file has permission to manipulate the given device.
18030f3e72b5SJason Gunthorpe  */
18040f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
18050f3e72b5SJason Gunthorpe {
18060f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
18070f3e72b5SJason Gunthorpe 
18080f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
18090f3e72b5SJason Gunthorpe 		return false;
18100f3e72b5SJason Gunthorpe 
18110f3e72b5SJason Gunthorpe 	return group == device->group;
18120f3e72b5SJason Gunthorpe }
18130f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev);
18140f3e72b5SJason Gunthorpe 
18150f3e72b5SJason Gunthorpe /*
18160f3e72b5SJason Gunthorpe  * Sub-module support
18170f3e72b5SJason Gunthorpe  */
18180f3e72b5SJason Gunthorpe /*
18190f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
18200f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
18210f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
18220f3e72b5SJason Gunthorpe  *
18230f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
18240f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
18250f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
18260f3e72b5SJason Gunthorpe  */
18270f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
18280f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
18290f3e72b5SJason Gunthorpe {
18300f3e72b5SJason Gunthorpe 	void *buf;
18310f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
18320f3e72b5SJason Gunthorpe 
18330f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
18340f3e72b5SJason Gunthorpe 	if (!buf) {
18350f3e72b5SJason Gunthorpe 		kfree(caps->buf);
18360f3e72b5SJason Gunthorpe 		caps->buf = NULL;
18370f3e72b5SJason Gunthorpe 		caps->size = 0;
18380f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
18390f3e72b5SJason Gunthorpe 	}
18400f3e72b5SJason Gunthorpe 
18410f3e72b5SJason Gunthorpe 	caps->buf = buf;
18420f3e72b5SJason Gunthorpe 	header = buf + caps->size;
18430f3e72b5SJason Gunthorpe 
18440f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
18450f3e72b5SJason Gunthorpe 	memset(header, 0, size);
18460f3e72b5SJason Gunthorpe 
18470f3e72b5SJason Gunthorpe 	header->id = id;
18480f3e72b5SJason Gunthorpe 	header->version = version;
18490f3e72b5SJason Gunthorpe 
18500f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
18510f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
18520f3e72b5SJason Gunthorpe 		; /* nothing */
18530f3e72b5SJason Gunthorpe 
18540f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
18550f3e72b5SJason Gunthorpe 	caps->size += size;
18560f3e72b5SJason Gunthorpe 
18570f3e72b5SJason Gunthorpe 	return header;
18580f3e72b5SJason Gunthorpe }
18590f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
18600f3e72b5SJason Gunthorpe 
18610f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
18620f3e72b5SJason Gunthorpe {
18630f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
18640f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
18650f3e72b5SJason Gunthorpe 
18660f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
18670f3e72b5SJason Gunthorpe 		tmp->next += offset;
18680f3e72b5SJason Gunthorpe }
18690f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
18700f3e72b5SJason Gunthorpe 
18710f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
18720f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
18730f3e72b5SJason Gunthorpe {
18740f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
18750f3e72b5SJason Gunthorpe 
18760f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
18770f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
18780f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
18790f3e72b5SJason Gunthorpe 
18800f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
18810f3e72b5SJason Gunthorpe 
18820f3e72b5SJason Gunthorpe 	return 0;
18830f3e72b5SJason Gunthorpe }
18840f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
18850f3e72b5SJason Gunthorpe 
18860f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
18870f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
18880f3e72b5SJason Gunthorpe {
18890f3e72b5SJason Gunthorpe 	unsigned long minsz;
18900f3e72b5SJason Gunthorpe 	size_t size;
18910f3e72b5SJason Gunthorpe 
18920f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
18930f3e72b5SJason Gunthorpe 
18940f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
18950f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
18960f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
18970f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
18980f3e72b5SJason Gunthorpe 		return -EINVAL;
18990f3e72b5SJason Gunthorpe 
19000f3e72b5SJason Gunthorpe 	if (data_size)
19010f3e72b5SJason Gunthorpe 		*data_size = 0;
19020f3e72b5SJason Gunthorpe 
19030f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
19040f3e72b5SJason Gunthorpe 		return -EINVAL;
19050f3e72b5SJason Gunthorpe 
19060f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
19070f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
19080f3e72b5SJason Gunthorpe 		size = 0;
19090f3e72b5SJason Gunthorpe 		break;
19100f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
19110f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
19120f3e72b5SJason Gunthorpe 		break;
19130f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
19140f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
19150f3e72b5SJason Gunthorpe 		break;
19160f3e72b5SJason Gunthorpe 	default:
19170f3e72b5SJason Gunthorpe 		return -EINVAL;
19180f3e72b5SJason Gunthorpe 	}
19190f3e72b5SJason Gunthorpe 
19200f3e72b5SJason Gunthorpe 	if (size) {
19210f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
19220f3e72b5SJason Gunthorpe 			return -EINVAL;
19230f3e72b5SJason Gunthorpe 
19240f3e72b5SJason Gunthorpe 		if (!data_size)
19250f3e72b5SJason Gunthorpe 			return -EINVAL;
19260f3e72b5SJason Gunthorpe 
19270f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
19280f3e72b5SJason Gunthorpe 	}
19290f3e72b5SJason Gunthorpe 
19300f3e72b5SJason Gunthorpe 	return 0;
19310f3e72b5SJason Gunthorpe }
19320f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
19330f3e72b5SJason Gunthorpe 
19340f3e72b5SJason Gunthorpe /*
19350f3e72b5SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
19360f3e72b5SJason Gunthorpe  * domain only.
19370f3e72b5SJason Gunthorpe  * @device [in]  : device
19380f3e72b5SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
19390f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
19400f3e72b5SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
19410f3e72b5SJason Gunthorpe  * @prot [in]    : protection flags
19420f3e72b5SJason Gunthorpe  * @pages[out]   : array of host pages
19430f3e72b5SJason Gunthorpe  * Return error or number of pages pinned.
19440f3e72b5SJason Gunthorpe  */
19450f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
19460f3e72b5SJason Gunthorpe 		   int npage, int prot, struct page **pages)
19470f3e72b5SJason Gunthorpe {
19480f3e72b5SJason Gunthorpe 	struct vfio_container *container;
19490f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
19500f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
19510f3e72b5SJason Gunthorpe 	int ret;
19520f3e72b5SJason Gunthorpe 
19530f3e72b5SJason Gunthorpe 	if (!pages || !npage || !vfio_assert_device_open(device))
19540f3e72b5SJason Gunthorpe 		return -EINVAL;
19550f3e72b5SJason Gunthorpe 
19560f3e72b5SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
19570f3e72b5SJason Gunthorpe 		return -E2BIG;
19580f3e72b5SJason Gunthorpe 
19590f3e72b5SJason Gunthorpe 	if (group->dev_counter > 1)
19600f3e72b5SJason Gunthorpe 		return -EINVAL;
19610f3e72b5SJason Gunthorpe 
19620f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
19630f3e72b5SJason Gunthorpe 	container = group->container;
19640f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
19650f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->pin_pages))
19660f3e72b5SJason Gunthorpe 		ret = driver->ops->pin_pages(container->iommu_data,
19670f3e72b5SJason Gunthorpe 					     group->iommu_group, iova,
19680f3e72b5SJason Gunthorpe 					     npage, prot, pages);
19690f3e72b5SJason Gunthorpe 	else
19700f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
19710f3e72b5SJason Gunthorpe 
19720f3e72b5SJason Gunthorpe 	return ret;
19730f3e72b5SJason Gunthorpe }
19740f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
19750f3e72b5SJason Gunthorpe 
19760f3e72b5SJason Gunthorpe /*
19770f3e72b5SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
19780f3e72b5SJason Gunthorpe  * @device [in]  : device
19790f3e72b5SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
19800f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
19810f3e72b5SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
19820f3e72b5SJason Gunthorpe  */
19830f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
19840f3e72b5SJason Gunthorpe {
19850f3e72b5SJason Gunthorpe 	struct vfio_container *container;
19860f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
19870f3e72b5SJason Gunthorpe 
19880f3e72b5SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
19890f3e72b5SJason Gunthorpe 		return;
19900f3e72b5SJason Gunthorpe 
19910f3e72b5SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
19920f3e72b5SJason Gunthorpe 		return;
19930f3e72b5SJason Gunthorpe 
19940f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
19950f3e72b5SJason Gunthorpe 	container = device->group->container;
19960f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
19970f3e72b5SJason Gunthorpe 
19980f3e72b5SJason Gunthorpe 	driver->ops->unpin_pages(container->iommu_data, iova, npage);
19990f3e72b5SJason Gunthorpe }
20000f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
20010f3e72b5SJason Gunthorpe 
20020f3e72b5SJason Gunthorpe /*
20030f3e72b5SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
20040f3e72b5SJason Gunthorpe  * behalf of the device.
20050f3e72b5SJason Gunthorpe  *
20060f3e72b5SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
20070f3e72b5SJason Gunthorpe  * into/from a kernel buffer.
20080f3e72b5SJason Gunthorpe  *
20090f3e72b5SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
20100f3e72b5SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
20110f3e72b5SJason Gunthorpe  *
20120f3e72b5SJason Gunthorpe  * @device [in]		: VFIO device
20130f3e72b5SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
20140f3e72b5SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
20150f3e72b5SJason Gunthorpe  * @len [in]		: kernel buffer length
20160f3e72b5SJason Gunthorpe  * @write		: indicate read or write
20170f3e72b5SJason Gunthorpe  * Return error code on failure or 0 on success.
20180f3e72b5SJason Gunthorpe  */
20190f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
20200f3e72b5SJason Gunthorpe 		size_t len, bool write)
20210f3e72b5SJason Gunthorpe {
20220f3e72b5SJason Gunthorpe 	struct vfio_container *container;
20230f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
20240f3e72b5SJason Gunthorpe 	int ret = 0;
20250f3e72b5SJason Gunthorpe 
20260f3e72b5SJason Gunthorpe 	if (!data || len <= 0 || !vfio_assert_device_open(device))
20270f3e72b5SJason Gunthorpe 		return -EINVAL;
20280f3e72b5SJason Gunthorpe 
20290f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
20300f3e72b5SJason Gunthorpe 	container = device->group->container;
20310f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
20320f3e72b5SJason Gunthorpe 
20330f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->dma_rw))
20340f3e72b5SJason Gunthorpe 		ret = driver->ops->dma_rw(container->iommu_data,
20350f3e72b5SJason Gunthorpe 					  iova, data, len, write);
20360f3e72b5SJason Gunthorpe 	else
20370f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
20380f3e72b5SJason Gunthorpe 	return ret;
20390f3e72b5SJason Gunthorpe }
20400f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
20410f3e72b5SJason Gunthorpe 
20420f3e72b5SJason Gunthorpe /*
20430f3e72b5SJason Gunthorpe  * Module/class support
20440f3e72b5SJason Gunthorpe  */
20450f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode)
20460f3e72b5SJason Gunthorpe {
20470f3e72b5SJason Gunthorpe 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
20480f3e72b5SJason Gunthorpe }
20490f3e72b5SJason Gunthorpe 
20500f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = {
20510f3e72b5SJason Gunthorpe 	.minor = VFIO_MINOR,
20520f3e72b5SJason Gunthorpe 	.name = "vfio",
20530f3e72b5SJason Gunthorpe 	.fops = &vfio_fops,
20540f3e72b5SJason Gunthorpe 	.nodename = "vfio/vfio",
20550f3e72b5SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
20560f3e72b5SJason Gunthorpe };
20570f3e72b5SJason Gunthorpe 
20580f3e72b5SJason Gunthorpe static int __init vfio_init(void)
20590f3e72b5SJason Gunthorpe {
20600f3e72b5SJason Gunthorpe 	int ret;
20610f3e72b5SJason Gunthorpe 
20620f3e72b5SJason Gunthorpe 	ida_init(&vfio.group_ida);
20630f3e72b5SJason Gunthorpe 	mutex_init(&vfio.group_lock);
20640f3e72b5SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
20650f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.group_list);
20660f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
20670f3e72b5SJason Gunthorpe 
20680f3e72b5SJason Gunthorpe 	ret = misc_register(&vfio_dev);
20690f3e72b5SJason Gunthorpe 	if (ret) {
20700f3e72b5SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
20710f3e72b5SJason Gunthorpe 		return ret;
20720f3e72b5SJason Gunthorpe 	}
20730f3e72b5SJason Gunthorpe 
20740f3e72b5SJason Gunthorpe 	/* /dev/vfio/$GROUP */
20750f3e72b5SJason Gunthorpe 	vfio.class = class_create(THIS_MODULE, "vfio");
20760f3e72b5SJason Gunthorpe 	if (IS_ERR(vfio.class)) {
20770f3e72b5SJason Gunthorpe 		ret = PTR_ERR(vfio.class);
20780f3e72b5SJason Gunthorpe 		goto err_class;
20790f3e72b5SJason Gunthorpe 	}
20800f3e72b5SJason Gunthorpe 
20810f3e72b5SJason Gunthorpe 	vfio.class->devnode = vfio_devnode;
20820f3e72b5SJason Gunthorpe 
20830f3e72b5SJason Gunthorpe 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
20840f3e72b5SJason Gunthorpe 	if (ret)
20850f3e72b5SJason Gunthorpe 		goto err_alloc_chrdev;
20860f3e72b5SJason Gunthorpe 
20870f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
20880f3e72b5SJason Gunthorpe 	ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
20890f3e72b5SJason Gunthorpe #endif
20900f3e72b5SJason Gunthorpe 	if (ret)
20910f3e72b5SJason Gunthorpe 		goto err_driver_register;
20920f3e72b5SJason Gunthorpe 
20930f3e72b5SJason Gunthorpe 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
20940f3e72b5SJason Gunthorpe 	return 0;
20950f3e72b5SJason Gunthorpe 
20960f3e72b5SJason Gunthorpe err_driver_register:
20970f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
20980f3e72b5SJason Gunthorpe err_alloc_chrdev:
20990f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
21000f3e72b5SJason Gunthorpe 	vfio.class = NULL;
21010f3e72b5SJason Gunthorpe err_class:
21020f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
21030f3e72b5SJason Gunthorpe 	return ret;
21040f3e72b5SJason Gunthorpe }
21050f3e72b5SJason Gunthorpe 
21060f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void)
21070f3e72b5SJason Gunthorpe {
21080f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&vfio.group_list));
21090f3e72b5SJason Gunthorpe 
21100f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
21110f3e72b5SJason Gunthorpe 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
21120f3e72b5SJason Gunthorpe #endif
21130f3e72b5SJason Gunthorpe 	ida_destroy(&vfio.group_ida);
21140f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
21150f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
21160f3e72b5SJason Gunthorpe 	vfio.class = NULL;
21170f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
21180f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
21190f3e72b5SJason Gunthorpe }
21200f3e72b5SJason Gunthorpe 
21210f3e72b5SJason Gunthorpe module_init(vfio_init);
21220f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
21230f3e72b5SJason Gunthorpe 
21240f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
21250f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2");
21260f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR);
21270f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC);
21280f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
21290f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
21300f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2131