xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision 150ee2f9)
10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
20f3e72b5SJason Gunthorpe /*
30f3e72b5SJason Gunthorpe  * VFIO core
40f3e72b5SJason Gunthorpe  *
50f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
60f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
70f3e72b5SJason Gunthorpe  *
80f3e72b5SJason Gunthorpe  * Derived from original vfio:
90f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
100f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
110f3e72b5SJason Gunthorpe  */
120f3e72b5SJason Gunthorpe 
130f3e72b5SJason Gunthorpe #include <linux/cdev.h>
140f3e72b5SJason Gunthorpe #include <linux/compat.h>
150f3e72b5SJason Gunthorpe #include <linux/device.h>
160f3e72b5SJason Gunthorpe #include <linux/file.h>
170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h>
180f3e72b5SJason Gunthorpe #include <linux/fs.h>
190f3e72b5SJason Gunthorpe #include <linux/idr.h>
200f3e72b5SJason Gunthorpe #include <linux/iommu.h>
210f3e72b5SJason Gunthorpe #include <linux/list.h>
220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
230f3e72b5SJason Gunthorpe #include <linux/module.h>
240f3e72b5SJason Gunthorpe #include <linux/mutex.h>
250f3e72b5SJason Gunthorpe #include <linux/pci.h>
260f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
270f3e72b5SJason Gunthorpe #include <linux/sched.h>
280f3e72b5SJason Gunthorpe #include <linux/slab.h>
290f3e72b5SJason Gunthorpe #include <linux/stat.h>
300f3e72b5SJason Gunthorpe #include <linux/string.h>
310f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
320f3e72b5SJason Gunthorpe #include <linux/vfio.h>
330f3e72b5SJason Gunthorpe #include <linux/wait.h>
340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
350f3e72b5SJason Gunthorpe #include "vfio.h"
360f3e72b5SJason Gunthorpe 
370f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
380f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
390f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
400f3e72b5SJason Gunthorpe 
410f3e72b5SJason Gunthorpe static struct vfio {
420f3e72b5SJason Gunthorpe 	struct class			*class;
430f3e72b5SJason Gunthorpe 	struct list_head		iommu_drivers_list;
440f3e72b5SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
450f3e72b5SJason Gunthorpe 	struct list_head		group_list;
460f3e72b5SJason Gunthorpe 	struct mutex			group_lock; /* locks group_list */
470f3e72b5SJason Gunthorpe 	struct ida			group_ida;
480f3e72b5SJason Gunthorpe 	dev_t				group_devt;
490f3e72b5SJason Gunthorpe } vfio;
500f3e72b5SJason Gunthorpe 
510f3e72b5SJason Gunthorpe struct vfio_iommu_driver {
520f3e72b5SJason Gunthorpe 	const struct vfio_iommu_driver_ops	*ops;
530f3e72b5SJason Gunthorpe 	struct list_head			vfio_next;
540f3e72b5SJason Gunthorpe };
550f3e72b5SJason Gunthorpe 
560f3e72b5SJason Gunthorpe struct vfio_container {
570f3e72b5SJason Gunthorpe 	struct kref			kref;
580f3e72b5SJason Gunthorpe 	struct list_head		group_list;
590f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_lock;
600f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
610f3e72b5SJason Gunthorpe 	void				*iommu_data;
620f3e72b5SJason Gunthorpe 	bool				noiommu;
630f3e72b5SJason Gunthorpe };
640f3e72b5SJason Gunthorpe 
650f3e72b5SJason Gunthorpe struct vfio_group {
660f3e72b5SJason Gunthorpe 	struct device 			dev;
670f3e72b5SJason Gunthorpe 	struct cdev			cdev;
680f3e72b5SJason Gunthorpe 	refcount_t			users;
690f3e72b5SJason Gunthorpe 	unsigned int			container_users;
700f3e72b5SJason Gunthorpe 	struct iommu_group		*iommu_group;
710f3e72b5SJason Gunthorpe 	struct vfio_container		*container;
720f3e72b5SJason Gunthorpe 	struct list_head		device_list;
730f3e72b5SJason Gunthorpe 	struct mutex			device_lock;
740f3e72b5SJason Gunthorpe 	struct list_head		vfio_next;
750f3e72b5SJason Gunthorpe 	struct list_head		container_next;
760f3e72b5SJason Gunthorpe 	enum vfio_group_type		type;
770f3e72b5SJason Gunthorpe 	unsigned int			dev_counter;
780f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_rwsem;
790f3e72b5SJason Gunthorpe 	struct kvm			*kvm;
800f3e72b5SJason Gunthorpe 	struct file			*opened_file;
810f3e72b5SJason Gunthorpe 	struct blocking_notifier_head	notifier;
820f3e72b5SJason Gunthorpe };
830f3e72b5SJason Gunthorpe 
840f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
850f3e72b5SJason Gunthorpe static bool noiommu __read_mostly;
860f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
870f3e72b5SJason Gunthorpe 		   noiommu, bool, S_IRUGO | S_IWUSR);
880f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
890f3e72b5SJason Gunthorpe #endif
900f3e72b5SJason Gunthorpe 
910f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
920f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops;
930f3e72b5SJason Gunthorpe 
940f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
950f3e72b5SJason Gunthorpe {
960f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
970f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
980f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
990f3e72b5SJason Gunthorpe 
1000f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
1010f3e72b5SJason Gunthorpe 		return -EINVAL;
1020f3e72b5SJason Gunthorpe 
1030f3e72b5SJason Gunthorpe 	/*
1040f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
1050f3e72b5SJason Gunthorpe 	 */
1060f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1070f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
1080f3e72b5SJason Gunthorpe 	if (dev_set)
1090f3e72b5SJason Gunthorpe 		goto found_get_ref;
1100f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1110f3e72b5SJason Gunthorpe 
1120f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
1130f3e72b5SJason Gunthorpe 	if (!new_dev_set)
1140f3e72b5SJason Gunthorpe 		return -ENOMEM;
1150f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
1160f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
1170f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
1180f3e72b5SJason Gunthorpe 
1190f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1200f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
1210f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
1220f3e72b5SJason Gunthorpe 	if (!dev_set) {
1230f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
1240f3e72b5SJason Gunthorpe 		goto found_get_ref;
1250f3e72b5SJason Gunthorpe 	}
1260f3e72b5SJason Gunthorpe 
1270f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
1280f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
1290f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
1300f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
1310f3e72b5SJason Gunthorpe 	}
1320f3e72b5SJason Gunthorpe 
1330f3e72b5SJason Gunthorpe found_get_ref:
1340f3e72b5SJason Gunthorpe 	dev_set->device_count++;
1350f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1360f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1370f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
1380f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
1390f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1400f3e72b5SJason Gunthorpe 	return 0;
1410f3e72b5SJason Gunthorpe }
1420f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
1430f3e72b5SJason Gunthorpe 
1440f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
1450f3e72b5SJason Gunthorpe {
1460f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
1470f3e72b5SJason Gunthorpe 
1480f3e72b5SJason Gunthorpe 	if (!dev_set)
1490f3e72b5SJason Gunthorpe 		return;
1500f3e72b5SJason Gunthorpe 
1510f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1520f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
1530f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1540f3e72b5SJason Gunthorpe 
1550f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1560f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
1570f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
1580f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
1590f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
1600f3e72b5SJason Gunthorpe 		kfree(dev_set);
1610f3e72b5SJason Gunthorpe 	}
1620f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1630f3e72b5SJason Gunthorpe }
1640f3e72b5SJason Gunthorpe 
1650f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
1660f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
1670f3e72b5SJason Gunthorpe {
1680f3e72b5SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
1690f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
1700f3e72b5SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
1710f3e72b5SJason Gunthorpe 		return ERR_PTR(-EPERM);
1720f3e72b5SJason Gunthorpe 
1730f3e72b5SJason Gunthorpe 	return NULL;
1740f3e72b5SJason Gunthorpe }
1750f3e72b5SJason Gunthorpe 
1760f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
1770f3e72b5SJason Gunthorpe {
1780f3e72b5SJason Gunthorpe }
1790f3e72b5SJason Gunthorpe 
1800f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
1810f3e72b5SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
1820f3e72b5SJason Gunthorpe {
1830f3e72b5SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
1840f3e72b5SJason Gunthorpe 		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
1850f3e72b5SJason Gunthorpe 
1860f3e72b5SJason Gunthorpe 	return -ENOTTY;
1870f3e72b5SJason Gunthorpe }
1880f3e72b5SJason Gunthorpe 
1890f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
1900f3e72b5SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
1910f3e72b5SJason Gunthorpe {
1920f3e72b5SJason Gunthorpe 	return 0;
1930f3e72b5SJason Gunthorpe }
1940f3e72b5SJason Gunthorpe 
1950f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
1960f3e72b5SJason Gunthorpe 				      struct iommu_group *iommu_group)
1970f3e72b5SJason Gunthorpe {
1980f3e72b5SJason Gunthorpe }
1990f3e72b5SJason Gunthorpe 
2000f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
2010f3e72b5SJason Gunthorpe 	.name = "vfio-noiommu",
2020f3e72b5SJason Gunthorpe 	.owner = THIS_MODULE,
2030f3e72b5SJason Gunthorpe 	.open = vfio_noiommu_open,
2040f3e72b5SJason Gunthorpe 	.release = vfio_noiommu_release,
2050f3e72b5SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
2060f3e72b5SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
2070f3e72b5SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
2080f3e72b5SJason Gunthorpe };
2090f3e72b5SJason Gunthorpe 
2100f3e72b5SJason Gunthorpe /*
2110f3e72b5SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
2120f3e72b5SJason Gunthorpe  * use vfio-noiommu.
2130f3e72b5SJason Gunthorpe  */
2140f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2150f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2160f3e72b5SJason Gunthorpe {
2170f3e72b5SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
2180f3e72b5SJason Gunthorpe }
2190f3e72b5SJason Gunthorpe #else
2200f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2210f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2220f3e72b5SJason Gunthorpe {
2230f3e72b5SJason Gunthorpe 	return true;
2240f3e72b5SJason Gunthorpe }
2250f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */
2260f3e72b5SJason Gunthorpe 
2270f3e72b5SJason Gunthorpe /*
2280f3e72b5SJason Gunthorpe  * IOMMU driver registration
2290f3e72b5SJason Gunthorpe  */
2300f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2310f3e72b5SJason Gunthorpe {
2320f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
2330f3e72b5SJason Gunthorpe 
2340f3e72b5SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
2350f3e72b5SJason Gunthorpe 		return -EINVAL;
2360f3e72b5SJason Gunthorpe 
2370f3e72b5SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
2380f3e72b5SJason Gunthorpe 	if (!driver)
2390f3e72b5SJason Gunthorpe 		return -ENOMEM;
2400f3e72b5SJason Gunthorpe 
2410f3e72b5SJason Gunthorpe 	driver->ops = ops;
2420f3e72b5SJason Gunthorpe 
2430f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2440f3e72b5SJason Gunthorpe 
2450f3e72b5SJason Gunthorpe 	/* Check for duplicates */
2460f3e72b5SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
2470f3e72b5SJason Gunthorpe 		if (tmp->ops == ops) {
2480f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2490f3e72b5SJason Gunthorpe 			kfree(driver);
2500f3e72b5SJason Gunthorpe 			return -EINVAL;
2510f3e72b5SJason Gunthorpe 		}
2520f3e72b5SJason Gunthorpe 	}
2530f3e72b5SJason Gunthorpe 
2540f3e72b5SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
2550f3e72b5SJason Gunthorpe 
2560f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2570f3e72b5SJason Gunthorpe 
2580f3e72b5SJason Gunthorpe 	return 0;
2590f3e72b5SJason Gunthorpe }
2600f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
2610f3e72b5SJason Gunthorpe 
2620f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2630f3e72b5SJason Gunthorpe {
2640f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
2650f3e72b5SJason Gunthorpe 
2660f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2670f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
2680f3e72b5SJason Gunthorpe 		if (driver->ops == ops) {
2690f3e72b5SJason Gunthorpe 			list_del(&driver->vfio_next);
2700f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2710f3e72b5SJason Gunthorpe 			kfree(driver);
2720f3e72b5SJason Gunthorpe 			return;
2730f3e72b5SJason Gunthorpe 		}
2740f3e72b5SJason Gunthorpe 	}
2750f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2760f3e72b5SJason Gunthorpe }
2770f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
2780f3e72b5SJason Gunthorpe 
2790f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group);
2800f3e72b5SJason Gunthorpe 
2810f3e72b5SJason Gunthorpe /*
2820f3e72b5SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
2830f3e72b5SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
2840f3e72b5SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
2850f3e72b5SJason Gunthorpe  * closed in any order.
2860f3e72b5SJason Gunthorpe  */
2870f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
2880f3e72b5SJason Gunthorpe {
2890f3e72b5SJason Gunthorpe 	kref_get(&container->kref);
2900f3e72b5SJason Gunthorpe }
2910f3e72b5SJason Gunthorpe 
2920f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref)
2930f3e72b5SJason Gunthorpe {
2940f3e72b5SJason Gunthorpe 	struct vfio_container *container;
2950f3e72b5SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
2960f3e72b5SJason Gunthorpe 
2970f3e72b5SJason Gunthorpe 	kfree(container);
2980f3e72b5SJason Gunthorpe }
2990f3e72b5SJason Gunthorpe 
3000f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
3010f3e72b5SJason Gunthorpe {
3020f3e72b5SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
3030f3e72b5SJason Gunthorpe }
3040f3e72b5SJason Gunthorpe 
3050f3e72b5SJason Gunthorpe /*
3060f3e72b5SJason Gunthorpe  * Group objects - create, release, get, put, search
3070f3e72b5SJason Gunthorpe  */
3080f3e72b5SJason Gunthorpe static struct vfio_group *
3090f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3100f3e72b5SJason Gunthorpe {
3110f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3120f3e72b5SJason Gunthorpe 
3130f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
3140f3e72b5SJason Gunthorpe 		if (group->iommu_group == iommu_group) {
3150f3e72b5SJason Gunthorpe 			vfio_group_get(group);
3160f3e72b5SJason Gunthorpe 			return group;
3170f3e72b5SJason Gunthorpe 		}
3180f3e72b5SJason Gunthorpe 	}
3190f3e72b5SJason Gunthorpe 	return NULL;
3200f3e72b5SJason Gunthorpe }
3210f3e72b5SJason Gunthorpe 
3220f3e72b5SJason Gunthorpe static struct vfio_group *
3230f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3240f3e72b5SJason Gunthorpe {
3250f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3260f3e72b5SJason Gunthorpe 
3270f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3280f3e72b5SJason Gunthorpe 	group = __vfio_group_get_from_iommu(iommu_group);
3290f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
3300f3e72b5SJason Gunthorpe 	return group;
3310f3e72b5SJason Gunthorpe }
3320f3e72b5SJason Gunthorpe 
3330f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev)
3340f3e72b5SJason Gunthorpe {
3350f3e72b5SJason Gunthorpe 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
3360f3e72b5SJason Gunthorpe 
3370f3e72b5SJason Gunthorpe 	mutex_destroy(&group->device_lock);
3380f3e72b5SJason Gunthorpe 	iommu_group_put(group->iommu_group);
3390f3e72b5SJason Gunthorpe 	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
3400f3e72b5SJason Gunthorpe 	kfree(group);
3410f3e72b5SJason Gunthorpe }
3420f3e72b5SJason Gunthorpe 
3430f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
3440f3e72b5SJason Gunthorpe 					   enum vfio_group_type type)
3450f3e72b5SJason Gunthorpe {
3460f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3470f3e72b5SJason Gunthorpe 	int minor;
3480f3e72b5SJason Gunthorpe 
3490f3e72b5SJason Gunthorpe 	group = kzalloc(sizeof(*group), GFP_KERNEL);
3500f3e72b5SJason Gunthorpe 	if (!group)
3510f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
3520f3e72b5SJason Gunthorpe 
3530f3e72b5SJason Gunthorpe 	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
3540f3e72b5SJason Gunthorpe 	if (minor < 0) {
3550f3e72b5SJason Gunthorpe 		kfree(group);
3560f3e72b5SJason Gunthorpe 		return ERR_PTR(minor);
3570f3e72b5SJason Gunthorpe 	}
3580f3e72b5SJason Gunthorpe 
3590f3e72b5SJason Gunthorpe 	device_initialize(&group->dev);
3600f3e72b5SJason Gunthorpe 	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
3610f3e72b5SJason Gunthorpe 	group->dev.class = vfio.class;
3620f3e72b5SJason Gunthorpe 	group->dev.release = vfio_group_release;
3630f3e72b5SJason Gunthorpe 	cdev_init(&group->cdev, &vfio_group_fops);
3640f3e72b5SJason Gunthorpe 	group->cdev.owner = THIS_MODULE;
3650f3e72b5SJason Gunthorpe 
3660f3e72b5SJason Gunthorpe 	refcount_set(&group->users, 1);
3670f3e72b5SJason Gunthorpe 	init_rwsem(&group->group_rwsem);
3680f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&group->device_list);
3690f3e72b5SJason Gunthorpe 	mutex_init(&group->device_lock);
3700f3e72b5SJason Gunthorpe 	group->iommu_group = iommu_group;
3710f3e72b5SJason Gunthorpe 	/* put in vfio_group_release() */
3720f3e72b5SJason Gunthorpe 	iommu_group_ref_get(iommu_group);
3730f3e72b5SJason Gunthorpe 	group->type = type;
3740f3e72b5SJason Gunthorpe 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
3750f3e72b5SJason Gunthorpe 
3760f3e72b5SJason Gunthorpe 	return group;
3770f3e72b5SJason Gunthorpe }
3780f3e72b5SJason Gunthorpe 
3790f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
3800f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
3810f3e72b5SJason Gunthorpe {
3820f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3830f3e72b5SJason Gunthorpe 	struct vfio_group *ret;
3840f3e72b5SJason Gunthorpe 	int err;
3850f3e72b5SJason Gunthorpe 
3860f3e72b5SJason Gunthorpe 	group = vfio_group_alloc(iommu_group, type);
3870f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
3880f3e72b5SJason Gunthorpe 		return group;
3890f3e72b5SJason Gunthorpe 
3900f3e72b5SJason Gunthorpe 	err = dev_set_name(&group->dev, "%s%d",
3910f3e72b5SJason Gunthorpe 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
3920f3e72b5SJason Gunthorpe 			   iommu_group_id(iommu_group));
3930f3e72b5SJason Gunthorpe 	if (err) {
3940f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
3950f3e72b5SJason Gunthorpe 		goto err_put;
3960f3e72b5SJason Gunthorpe 	}
3970f3e72b5SJason Gunthorpe 
3980f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3990f3e72b5SJason Gunthorpe 
4000f3e72b5SJason Gunthorpe 	/* Did we race creating this group? */
4010f3e72b5SJason Gunthorpe 	ret = __vfio_group_get_from_iommu(iommu_group);
4020f3e72b5SJason Gunthorpe 	if (ret)
4030f3e72b5SJason Gunthorpe 		goto err_unlock;
4040f3e72b5SJason Gunthorpe 
4050f3e72b5SJason Gunthorpe 	err = cdev_device_add(&group->cdev, &group->dev);
4060f3e72b5SJason Gunthorpe 	if (err) {
4070f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
4080f3e72b5SJason Gunthorpe 		goto err_unlock;
4090f3e72b5SJason Gunthorpe 	}
4100f3e72b5SJason Gunthorpe 
4110f3e72b5SJason Gunthorpe 	list_add(&group->vfio_next, &vfio.group_list);
4120f3e72b5SJason Gunthorpe 
4130f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4140f3e72b5SJason Gunthorpe 	return group;
4150f3e72b5SJason Gunthorpe 
4160f3e72b5SJason Gunthorpe err_unlock:
4170f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4180f3e72b5SJason Gunthorpe err_put:
4190f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4200f3e72b5SJason Gunthorpe 	return ret;
4210f3e72b5SJason Gunthorpe }
4220f3e72b5SJason Gunthorpe 
4230f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group)
4240f3e72b5SJason Gunthorpe {
4250f3e72b5SJason Gunthorpe 	if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
4260f3e72b5SJason Gunthorpe 		return;
4270f3e72b5SJason Gunthorpe 
4280f3e72b5SJason Gunthorpe 	/*
4290f3e72b5SJason Gunthorpe 	 * These data structures all have paired operations that can only be
4300f3e72b5SJason Gunthorpe 	 * undone when the caller holds a live reference on the group. Since all
4310f3e72b5SJason Gunthorpe 	 * pairs must be undone these WARN_ON's indicate some caller did not
4320f3e72b5SJason Gunthorpe 	 * properly hold the group reference.
4330f3e72b5SJason Gunthorpe 	 */
4340f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&group->device_list));
4350f3e72b5SJason Gunthorpe 	WARN_ON(group->container || group->container_users);
4360f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
4370f3e72b5SJason Gunthorpe 
4380f3e72b5SJason Gunthorpe 	list_del(&group->vfio_next);
4390f3e72b5SJason Gunthorpe 	cdev_device_del(&group->cdev, &group->dev);
4400f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4410f3e72b5SJason Gunthorpe 
4420f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4430f3e72b5SJason Gunthorpe }
4440f3e72b5SJason Gunthorpe 
4450f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group)
4460f3e72b5SJason Gunthorpe {
4470f3e72b5SJason Gunthorpe 	refcount_inc(&group->users);
4480f3e72b5SJason Gunthorpe }
4490f3e72b5SJason Gunthorpe 
4500f3e72b5SJason Gunthorpe /*
4510f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
4520f3e72b5SJason Gunthorpe  */
4530f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
4540f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device)
4550f3e72b5SJason Gunthorpe {
4560f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
4570f3e72b5SJason Gunthorpe 		complete(&device->comp);
4580f3e72b5SJason Gunthorpe }
4590f3e72b5SJason Gunthorpe 
4600f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device)
4610f3e72b5SJason Gunthorpe {
4620f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
4630f3e72b5SJason Gunthorpe }
4640f3e72b5SJason Gunthorpe 
4650f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
4660f3e72b5SJason Gunthorpe 						 struct device *dev)
4670f3e72b5SJason Gunthorpe {
4680f3e72b5SJason Gunthorpe 	struct vfio_device *device;
4690f3e72b5SJason Gunthorpe 
4700f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
4710f3e72b5SJason Gunthorpe 	list_for_each_entry(device, &group->device_list, group_next) {
4720f3e72b5SJason Gunthorpe 		if (device->dev == dev && vfio_device_try_get(device)) {
4730f3e72b5SJason Gunthorpe 			mutex_unlock(&group->device_lock);
4740f3e72b5SJason Gunthorpe 			return device;
4750f3e72b5SJason Gunthorpe 		}
4760f3e72b5SJason Gunthorpe 	}
4770f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
4780f3e72b5SJason Gunthorpe 	return NULL;
4790f3e72b5SJason Gunthorpe }
4800f3e72b5SJason Gunthorpe 
4810f3e72b5SJason Gunthorpe /*
4820f3e72b5SJason Gunthorpe  * VFIO driver API
4830f3e72b5SJason Gunthorpe  */
4840f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
4850f3e72b5SJason Gunthorpe 			 const struct vfio_device_ops *ops)
4860f3e72b5SJason Gunthorpe {
4870f3e72b5SJason Gunthorpe 	init_completion(&device->comp);
4880f3e72b5SJason Gunthorpe 	device->dev = dev;
4890f3e72b5SJason Gunthorpe 	device->ops = ops;
4900f3e72b5SJason Gunthorpe }
4910f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev);
4920f3e72b5SJason Gunthorpe 
4930f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device)
4940f3e72b5SJason Gunthorpe {
4950f3e72b5SJason Gunthorpe 	vfio_release_device_set(device);
4960f3e72b5SJason Gunthorpe }
4970f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
4980f3e72b5SJason Gunthorpe 
4990f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
5000f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
5010f3e72b5SJason Gunthorpe {
5020f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5030f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5040f3e72b5SJason Gunthorpe 	int ret;
5050f3e72b5SJason Gunthorpe 
5060f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_alloc();
5070f3e72b5SJason Gunthorpe 	if (IS_ERR(iommu_group))
5080f3e72b5SJason Gunthorpe 		return ERR_CAST(iommu_group);
5090f3e72b5SJason Gunthorpe 
5100f3e72b5SJason Gunthorpe 	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
5110f3e72b5SJason Gunthorpe 	if (ret)
5120f3e72b5SJason Gunthorpe 		goto out_put_group;
5130f3e72b5SJason Gunthorpe 	ret = iommu_group_add_device(iommu_group, dev);
5140f3e72b5SJason Gunthorpe 	if (ret)
5150f3e72b5SJason Gunthorpe 		goto out_put_group;
5160f3e72b5SJason Gunthorpe 
5170f3e72b5SJason Gunthorpe 	group = vfio_create_group(iommu_group, type);
5180f3e72b5SJason Gunthorpe 	if (IS_ERR(group)) {
5190f3e72b5SJason Gunthorpe 		ret = PTR_ERR(group);
5200f3e72b5SJason Gunthorpe 		goto out_remove_device;
5210f3e72b5SJason Gunthorpe 	}
5220f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5230f3e72b5SJason Gunthorpe 	return group;
5240f3e72b5SJason Gunthorpe 
5250f3e72b5SJason Gunthorpe out_remove_device:
5260f3e72b5SJason Gunthorpe 	iommu_group_remove_device(dev);
5270f3e72b5SJason Gunthorpe out_put_group:
5280f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5290f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
5300f3e72b5SJason Gunthorpe }
5310f3e72b5SJason Gunthorpe 
5320f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
5330f3e72b5SJason Gunthorpe {
5340f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5350f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5360f3e72b5SJason Gunthorpe 
5370f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_get(dev);
5380f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
5390f3e72b5SJason Gunthorpe 	if (!iommu_group && noiommu) {
5400f3e72b5SJason Gunthorpe 		/*
5410f3e72b5SJason Gunthorpe 		 * With noiommu enabled, create an IOMMU group for devices that
5420f3e72b5SJason Gunthorpe 		 * don't already have one, implying no IOMMU hardware/driver
5430f3e72b5SJason Gunthorpe 		 * exists.  Taint the kernel because we're about to give a DMA
5440f3e72b5SJason Gunthorpe 		 * capable device to a user without IOMMU protection.
5450f3e72b5SJason Gunthorpe 		 */
5460f3e72b5SJason Gunthorpe 		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
5470f3e72b5SJason Gunthorpe 		if (!IS_ERR(group)) {
5480f3e72b5SJason Gunthorpe 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
5490f3e72b5SJason Gunthorpe 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
5500f3e72b5SJason Gunthorpe 		}
5510f3e72b5SJason Gunthorpe 		return group;
5520f3e72b5SJason Gunthorpe 	}
5530f3e72b5SJason Gunthorpe #endif
5540f3e72b5SJason Gunthorpe 	if (!iommu_group)
5550f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
5560f3e72b5SJason Gunthorpe 
5570f3e72b5SJason Gunthorpe 	/*
5580f3e72b5SJason Gunthorpe 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
5590f3e72b5SJason Gunthorpe 	 * restore cache coherency. It has to be checked here because it is only
5600f3e72b5SJason Gunthorpe 	 * valid for cases where we are using iommu groups.
5610f3e72b5SJason Gunthorpe 	 */
5620f3e72b5SJason Gunthorpe 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
5630f3e72b5SJason Gunthorpe 		iommu_group_put(iommu_group);
5640f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
5650f3e72b5SJason Gunthorpe 	}
5660f3e72b5SJason Gunthorpe 
5670f3e72b5SJason Gunthorpe 	group = vfio_group_get_from_iommu(iommu_group);
5680f3e72b5SJason Gunthorpe 	if (!group)
5690f3e72b5SJason Gunthorpe 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
5700f3e72b5SJason Gunthorpe 
5710f3e72b5SJason Gunthorpe 	/* The vfio_group holds a reference to the iommu_group */
5720f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5730f3e72b5SJason Gunthorpe 	return group;
5740f3e72b5SJason Gunthorpe }
5750f3e72b5SJason Gunthorpe 
5760f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device,
5770f3e72b5SJason Gunthorpe 		struct vfio_group *group)
5780f3e72b5SJason Gunthorpe {
5790f3e72b5SJason Gunthorpe 	struct vfio_device *existing_device;
5800f3e72b5SJason Gunthorpe 
5810f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
5820f3e72b5SJason Gunthorpe 		return PTR_ERR(group);
5830f3e72b5SJason Gunthorpe 
5840f3e72b5SJason Gunthorpe 	/*
5850f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
5860f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
5870f3e72b5SJason Gunthorpe 	 */
5880f3e72b5SJason Gunthorpe 	if (!device->dev_set)
5890f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
5900f3e72b5SJason Gunthorpe 
5910f3e72b5SJason Gunthorpe 	existing_device = vfio_group_get_device(group, device->dev);
5920f3e72b5SJason Gunthorpe 	if (existing_device) {
5930f3e72b5SJason Gunthorpe 		dev_WARN(device->dev, "Device already exists on group %d\n",
5940f3e72b5SJason Gunthorpe 			 iommu_group_id(group->iommu_group));
5950f3e72b5SJason Gunthorpe 		vfio_device_put(existing_device);
5960f3e72b5SJason Gunthorpe 		if (group->type == VFIO_NO_IOMMU ||
5970f3e72b5SJason Gunthorpe 		    group->type == VFIO_EMULATED_IOMMU)
5980f3e72b5SJason Gunthorpe 			iommu_group_remove_device(device->dev);
5990f3e72b5SJason Gunthorpe 		vfio_group_put(group);
6000f3e72b5SJason Gunthorpe 		return -EBUSY;
6010f3e72b5SJason Gunthorpe 	}
6020f3e72b5SJason Gunthorpe 
6030f3e72b5SJason Gunthorpe 	/* Our reference on group is moved to the device */
6040f3e72b5SJason Gunthorpe 	device->group = group;
6050f3e72b5SJason Gunthorpe 
6060f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
6070f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
6080f3e72b5SJason Gunthorpe 
6090f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6100f3e72b5SJason Gunthorpe 	list_add(&device->group_next, &group->device_list);
6110f3e72b5SJason Gunthorpe 	group->dev_counter++;
6120f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
6130f3e72b5SJason Gunthorpe 
6140f3e72b5SJason Gunthorpe 	return 0;
6150f3e72b5SJason Gunthorpe }
6160f3e72b5SJason Gunthorpe 
6170f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
6180f3e72b5SJason Gunthorpe {
6190f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
6200f3e72b5SJason Gunthorpe 		vfio_group_find_or_alloc(device->dev));
6210f3e72b5SJason Gunthorpe }
6220f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
6230f3e72b5SJason Gunthorpe 
6240f3e72b5SJason Gunthorpe /*
6250f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
6260f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
6270f3e72b5SJason Gunthorpe  */
6280f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
6290f3e72b5SJason Gunthorpe {
6300f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
6310f3e72b5SJason Gunthorpe 		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
6320f3e72b5SJason Gunthorpe }
6330f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
6340f3e72b5SJason Gunthorpe 
6350f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
6360f3e72b5SJason Gunthorpe 						     char *buf)
6370f3e72b5SJason Gunthorpe {
6380f3e72b5SJason Gunthorpe 	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
6390f3e72b5SJason Gunthorpe 
6400f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6410f3e72b5SJason Gunthorpe 	list_for_each_entry(it, &group->device_list, group_next) {
6420f3e72b5SJason Gunthorpe 		int ret;
6430f3e72b5SJason Gunthorpe 
6440f3e72b5SJason Gunthorpe 		if (it->ops->match) {
6450f3e72b5SJason Gunthorpe 			ret = it->ops->match(it, buf);
6460f3e72b5SJason Gunthorpe 			if (ret < 0) {
6470f3e72b5SJason Gunthorpe 				device = ERR_PTR(ret);
6480f3e72b5SJason Gunthorpe 				break;
6490f3e72b5SJason Gunthorpe 			}
6500f3e72b5SJason Gunthorpe 		} else {
6510f3e72b5SJason Gunthorpe 			ret = !strcmp(dev_name(it->dev), buf);
6520f3e72b5SJason Gunthorpe 		}
6530f3e72b5SJason Gunthorpe 
6540f3e72b5SJason Gunthorpe 		if (ret && vfio_device_try_get(it)) {
6550f3e72b5SJason Gunthorpe 			device = it;
6560f3e72b5SJason Gunthorpe 			break;
6570f3e72b5SJason Gunthorpe 		}
6580f3e72b5SJason Gunthorpe 	}
6590f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
6600f3e72b5SJason Gunthorpe 
6610f3e72b5SJason Gunthorpe 	return device;
6620f3e72b5SJason Gunthorpe }
6630f3e72b5SJason Gunthorpe 
6640f3e72b5SJason Gunthorpe /*
6650f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
6660f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
6670f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
6680f3e72b5SJason Gunthorpe {
6690f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
6700f3e72b5SJason Gunthorpe 	unsigned int i = 0;
6710f3e72b5SJason Gunthorpe 	bool interrupted = false;
6720f3e72b5SJason Gunthorpe 	long rc;
6730f3e72b5SJason Gunthorpe 
6740f3e72b5SJason Gunthorpe 	vfio_device_put(device);
6750f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
6760f3e72b5SJason Gunthorpe 	while (rc <= 0) {
6770f3e72b5SJason Gunthorpe 		if (device->ops->request)
6780f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
6790f3e72b5SJason Gunthorpe 
6800f3e72b5SJason Gunthorpe 		if (interrupted) {
6810f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
6820f3e72b5SJason Gunthorpe 							 HZ * 10);
6830f3e72b5SJason Gunthorpe 		} else {
6840f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
6850f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
6860f3e72b5SJason Gunthorpe 			if (rc < 0) {
6870f3e72b5SJason Gunthorpe 				interrupted = true;
6880f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
6890f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
6900f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
6910f3e72b5SJason Gunthorpe 					 "blocked until device is released",
6920f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
6930f3e72b5SJason Gunthorpe 			}
6940f3e72b5SJason Gunthorpe 		}
6950f3e72b5SJason Gunthorpe 	}
6960f3e72b5SJason Gunthorpe 
6970f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6980f3e72b5SJason Gunthorpe 	list_del(&device->group_next);
6990f3e72b5SJason Gunthorpe 	group->dev_counter--;
7000f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7010f3e72b5SJason Gunthorpe 
7020f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
7030f3e72b5SJason Gunthorpe 		iommu_group_remove_device(device->dev);
7040f3e72b5SJason Gunthorpe 
7050f3e72b5SJason Gunthorpe 	/* Matches the get in vfio_register_group_dev() */
7060f3e72b5SJason Gunthorpe 	vfio_group_put(group);
7070f3e72b5SJason Gunthorpe }
7080f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
7090f3e72b5SJason Gunthorpe 
7100f3e72b5SJason Gunthorpe /*
7110f3e72b5SJason Gunthorpe  * VFIO base fd, /dev/vfio/vfio
7120f3e72b5SJason Gunthorpe  */
7130f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container,
7140f3e72b5SJason Gunthorpe 				       unsigned long arg)
7150f3e72b5SJason Gunthorpe {
7160f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
7170f3e72b5SJason Gunthorpe 	long ret = 0;
7180f3e72b5SJason Gunthorpe 
7190f3e72b5SJason Gunthorpe 	down_read(&container->group_lock);
7200f3e72b5SJason Gunthorpe 
7210f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
7220f3e72b5SJason Gunthorpe 
7230f3e72b5SJason Gunthorpe 	switch (arg) {
7240f3e72b5SJason Gunthorpe 		/* No base extensions yet */
7250f3e72b5SJason Gunthorpe 	default:
7260f3e72b5SJason Gunthorpe 		/*
7270f3e72b5SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
7280f3e72b5SJason Gunthorpe 		 * extensions and return the first positive result.  If
7290f3e72b5SJason Gunthorpe 		 * a driver is already set, further queries will be passed
7300f3e72b5SJason Gunthorpe 		 * only to that driver.
7310f3e72b5SJason Gunthorpe 		 */
7320f3e72b5SJason Gunthorpe 		if (!driver) {
7330f3e72b5SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
7340f3e72b5SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
7350f3e72b5SJason Gunthorpe 					    vfio_next) {
7360f3e72b5SJason Gunthorpe 
7370f3e72b5SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
7380f3e72b5SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
7390f3e72b5SJason Gunthorpe 							       driver))
7400f3e72b5SJason Gunthorpe 					continue;
7410f3e72b5SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
7420f3e72b5SJason Gunthorpe 					continue;
7430f3e72b5SJason Gunthorpe 
7440f3e72b5SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
7450f3e72b5SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
7460f3e72b5SJason Gunthorpe 							 arg);
7470f3e72b5SJason Gunthorpe 				module_put(driver->ops->owner);
7480f3e72b5SJason Gunthorpe 				if (ret > 0)
7490f3e72b5SJason Gunthorpe 					break;
7500f3e72b5SJason Gunthorpe 			}
7510f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
7520f3e72b5SJason Gunthorpe 		} else
7530f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
7540f3e72b5SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
7550f3e72b5SJason Gunthorpe 	}
7560f3e72b5SJason Gunthorpe 
7570f3e72b5SJason Gunthorpe 	up_read(&container->group_lock);
7580f3e72b5SJason Gunthorpe 
7590f3e72b5SJason Gunthorpe 	return ret;
7600f3e72b5SJason Gunthorpe }
7610f3e72b5SJason Gunthorpe 
7620f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */
7630f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
7640f3e72b5SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
7650f3e72b5SJason Gunthorpe 					  void *data)
7660f3e72b5SJason Gunthorpe {
7670f3e72b5SJason Gunthorpe 	struct vfio_group *group;
7680f3e72b5SJason Gunthorpe 	int ret = -ENODEV;
7690f3e72b5SJason Gunthorpe 
7700f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
7710f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
7720f3e72b5SJason Gunthorpe 						group->type);
7730f3e72b5SJason Gunthorpe 		if (ret)
7740f3e72b5SJason Gunthorpe 			goto unwind;
7750f3e72b5SJason Gunthorpe 	}
7760f3e72b5SJason Gunthorpe 
7770f3e72b5SJason Gunthorpe 	return ret;
7780f3e72b5SJason Gunthorpe 
7790f3e72b5SJason Gunthorpe unwind:
7800f3e72b5SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
7810f3e72b5SJason Gunthorpe 					     container_next) {
7820f3e72b5SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
7830f3e72b5SJason Gunthorpe 	}
7840f3e72b5SJason Gunthorpe 
7850f3e72b5SJason Gunthorpe 	return ret;
7860f3e72b5SJason Gunthorpe }
7870f3e72b5SJason Gunthorpe 
7880f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
7890f3e72b5SJason Gunthorpe 				 unsigned long arg)
7900f3e72b5SJason Gunthorpe {
7910f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
7920f3e72b5SJason Gunthorpe 	long ret = -ENODEV;
7930f3e72b5SJason Gunthorpe 
7940f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
7950f3e72b5SJason Gunthorpe 
7960f3e72b5SJason Gunthorpe 	/*
7970f3e72b5SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
7980f3e72b5SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
7990f3e72b5SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
8000f3e72b5SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
8010f3e72b5SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
8020f3e72b5SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
8030f3e72b5SJason Gunthorpe 	 */
8040f3e72b5SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
8050f3e72b5SJason Gunthorpe 		up_write(&container->group_lock);
8060f3e72b5SJason Gunthorpe 		return -EINVAL;
8070f3e72b5SJason Gunthorpe 	}
8080f3e72b5SJason Gunthorpe 
8090f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
8100f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
8110f3e72b5SJason Gunthorpe 		void *data;
8120f3e72b5SJason Gunthorpe 
8130f3e72b5SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
8140f3e72b5SJason Gunthorpe 			continue;
8150f3e72b5SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
8160f3e72b5SJason Gunthorpe 			continue;
8170f3e72b5SJason Gunthorpe 
8180f3e72b5SJason Gunthorpe 		/*
8190f3e72b5SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
8200f3e72b5SJason Gunthorpe 		 * so test which iommu driver reported support for this
8210f3e72b5SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
8220f3e72b5SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
8230f3e72b5SJason Gunthorpe 		 * interfaces if they'd like.
8240f3e72b5SJason Gunthorpe 		 */
8250f3e72b5SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
8260f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8270f3e72b5SJason Gunthorpe 			continue;
8280f3e72b5SJason Gunthorpe 		}
8290f3e72b5SJason Gunthorpe 
8300f3e72b5SJason Gunthorpe 		data = driver->ops->open(arg);
8310f3e72b5SJason Gunthorpe 		if (IS_ERR(data)) {
8320f3e72b5SJason Gunthorpe 			ret = PTR_ERR(data);
8330f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8340f3e72b5SJason Gunthorpe 			continue;
8350f3e72b5SJason Gunthorpe 		}
8360f3e72b5SJason Gunthorpe 
8370f3e72b5SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
8380f3e72b5SJason Gunthorpe 		if (ret) {
8390f3e72b5SJason Gunthorpe 			driver->ops->release(data);
8400f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8410f3e72b5SJason Gunthorpe 			continue;
8420f3e72b5SJason Gunthorpe 		}
8430f3e72b5SJason Gunthorpe 
8440f3e72b5SJason Gunthorpe 		container->iommu_driver = driver;
8450f3e72b5SJason Gunthorpe 		container->iommu_data = data;
8460f3e72b5SJason Gunthorpe 		break;
8470f3e72b5SJason Gunthorpe 	}
8480f3e72b5SJason Gunthorpe 
8490f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
8500f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
8510f3e72b5SJason Gunthorpe 
8520f3e72b5SJason Gunthorpe 	return ret;
8530f3e72b5SJason Gunthorpe }
8540f3e72b5SJason Gunthorpe 
8550f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
8560f3e72b5SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
8570f3e72b5SJason Gunthorpe {
8580f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
8590f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
8600f3e72b5SJason Gunthorpe 	void *data;
8610f3e72b5SJason Gunthorpe 	long ret = -EINVAL;
8620f3e72b5SJason Gunthorpe 
8630f3e72b5SJason Gunthorpe 	if (!container)
8640f3e72b5SJason Gunthorpe 		return ret;
8650f3e72b5SJason Gunthorpe 
8660f3e72b5SJason Gunthorpe 	switch (cmd) {
8670f3e72b5SJason Gunthorpe 	case VFIO_GET_API_VERSION:
8680f3e72b5SJason Gunthorpe 		ret = VFIO_API_VERSION;
8690f3e72b5SJason Gunthorpe 		break;
8700f3e72b5SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
8710f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(container, arg);
8720f3e72b5SJason Gunthorpe 		break;
8730f3e72b5SJason Gunthorpe 	case VFIO_SET_IOMMU:
8740f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
8750f3e72b5SJason Gunthorpe 		break;
8760f3e72b5SJason Gunthorpe 	default:
8770f3e72b5SJason Gunthorpe 		driver = container->iommu_driver;
8780f3e72b5SJason Gunthorpe 		data = container->iommu_data;
8790f3e72b5SJason Gunthorpe 
8800f3e72b5SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
8810f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
8820f3e72b5SJason Gunthorpe 	}
8830f3e72b5SJason Gunthorpe 
8840f3e72b5SJason Gunthorpe 	return ret;
8850f3e72b5SJason Gunthorpe }
8860f3e72b5SJason Gunthorpe 
8870f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
8880f3e72b5SJason Gunthorpe {
8890f3e72b5SJason Gunthorpe 	struct vfio_container *container;
8900f3e72b5SJason Gunthorpe 
8910f3e72b5SJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL);
8920f3e72b5SJason Gunthorpe 	if (!container)
8930f3e72b5SJason Gunthorpe 		return -ENOMEM;
8940f3e72b5SJason Gunthorpe 
8950f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
8960f3e72b5SJason Gunthorpe 	init_rwsem(&container->group_lock);
8970f3e72b5SJason Gunthorpe 	kref_init(&container->kref);
8980f3e72b5SJason Gunthorpe 
8990f3e72b5SJason Gunthorpe 	filep->private_data = container;
9000f3e72b5SJason Gunthorpe 
9010f3e72b5SJason Gunthorpe 	return 0;
9020f3e72b5SJason Gunthorpe }
9030f3e72b5SJason Gunthorpe 
9040f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
9050f3e72b5SJason Gunthorpe {
9060f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
9070f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
9080f3e72b5SJason Gunthorpe 
9090f3e72b5SJason Gunthorpe 	if (driver && driver->ops->notify)
9100f3e72b5SJason Gunthorpe 		driver->ops->notify(container->iommu_data,
9110f3e72b5SJason Gunthorpe 				    VFIO_IOMMU_CONTAINER_CLOSE);
9120f3e72b5SJason Gunthorpe 
9130f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
9140f3e72b5SJason Gunthorpe 
9150f3e72b5SJason Gunthorpe 	vfio_container_put(container);
9160f3e72b5SJason Gunthorpe 
9170f3e72b5SJason Gunthorpe 	return 0;
9180f3e72b5SJason Gunthorpe }
9190f3e72b5SJason Gunthorpe 
9200f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = {
9210f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
9220f3e72b5SJason Gunthorpe 	.open		= vfio_fops_open,
9230f3e72b5SJason Gunthorpe 	.release	= vfio_fops_release,
9240f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
9250f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
9260f3e72b5SJason Gunthorpe };
9270f3e72b5SJason Gunthorpe 
9280f3e72b5SJason Gunthorpe /*
9290f3e72b5SJason Gunthorpe  * VFIO Group fd, /dev/vfio/$GROUP
9300f3e72b5SJason Gunthorpe  */
9310f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group)
9320f3e72b5SJason Gunthorpe {
9330f3e72b5SJason Gunthorpe 	struct vfio_container *container = group->container;
9340f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9350f3e72b5SJason Gunthorpe 
9360f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
9370f3e72b5SJason Gunthorpe 
9380f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
9390f3e72b5SJason Gunthorpe 
9400f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
9410f3e72b5SJason Gunthorpe 	if (driver)
9420f3e72b5SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
9430f3e72b5SJason Gunthorpe 					  group->iommu_group);
9440f3e72b5SJason Gunthorpe 
9450f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
9460f3e72b5SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
9470f3e72b5SJason Gunthorpe 
9480f3e72b5SJason Gunthorpe 	group->container = NULL;
9490f3e72b5SJason Gunthorpe 	group->container_users = 0;
9500f3e72b5SJason Gunthorpe 	list_del(&group->container_next);
9510f3e72b5SJason Gunthorpe 
9520f3e72b5SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
9530f3e72b5SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
9540f3e72b5SJason Gunthorpe 		driver->ops->release(container->iommu_data);
9550f3e72b5SJason Gunthorpe 		module_put(driver->ops->owner);
9560f3e72b5SJason Gunthorpe 		container->iommu_driver = NULL;
9570f3e72b5SJason Gunthorpe 		container->iommu_data = NULL;
9580f3e72b5SJason Gunthorpe 	}
9590f3e72b5SJason Gunthorpe 
9600f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
9610f3e72b5SJason Gunthorpe 
9620f3e72b5SJason Gunthorpe 	vfio_container_put(container);
9630f3e72b5SJason Gunthorpe }
9640f3e72b5SJason Gunthorpe 
9650f3e72b5SJason Gunthorpe /*
9660f3e72b5SJason Gunthorpe  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
9670f3e72b5SJason Gunthorpe  * if there was no container to unset.  Since the ioctl is called on
9680f3e72b5SJason Gunthorpe  * the group, we know that still exists, therefore the only valid
9690f3e72b5SJason Gunthorpe  * transition here is 1->0.
9700f3e72b5SJason Gunthorpe  */
9710f3e72b5SJason Gunthorpe static int vfio_group_unset_container(struct vfio_group *group)
9720f3e72b5SJason Gunthorpe {
9730f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
9740f3e72b5SJason Gunthorpe 
9750f3e72b5SJason Gunthorpe 	if (!group->container)
9760f3e72b5SJason Gunthorpe 		return -EINVAL;
9770f3e72b5SJason Gunthorpe 	if (group->container_users != 1)
9780f3e72b5SJason Gunthorpe 		return -EBUSY;
9790f3e72b5SJason Gunthorpe 	__vfio_group_unset_container(group);
9800f3e72b5SJason Gunthorpe 	return 0;
9810f3e72b5SJason Gunthorpe }
9820f3e72b5SJason Gunthorpe 
9830f3e72b5SJason Gunthorpe static int vfio_group_set_container(struct vfio_group *group, int container_fd)
9840f3e72b5SJason Gunthorpe {
9850f3e72b5SJason Gunthorpe 	struct fd f;
9860f3e72b5SJason Gunthorpe 	struct vfio_container *container;
9870f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9880f3e72b5SJason Gunthorpe 	int ret = 0;
9890f3e72b5SJason Gunthorpe 
9900f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
9910f3e72b5SJason Gunthorpe 
9920f3e72b5SJason Gunthorpe 	if (group->container || WARN_ON(group->container_users))
9930f3e72b5SJason Gunthorpe 		return -EINVAL;
9940f3e72b5SJason Gunthorpe 
9950f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
9960f3e72b5SJason Gunthorpe 		return -EPERM;
9970f3e72b5SJason Gunthorpe 
9980f3e72b5SJason Gunthorpe 	f = fdget(container_fd);
9990f3e72b5SJason Gunthorpe 	if (!f.file)
10000f3e72b5SJason Gunthorpe 		return -EBADF;
10010f3e72b5SJason Gunthorpe 
10020f3e72b5SJason Gunthorpe 	/* Sanity check, is this really our fd? */
10030f3e72b5SJason Gunthorpe 	if (f.file->f_op != &vfio_fops) {
10040f3e72b5SJason Gunthorpe 		fdput(f);
10050f3e72b5SJason Gunthorpe 		return -EINVAL;
10060f3e72b5SJason Gunthorpe 	}
10070f3e72b5SJason Gunthorpe 
10080f3e72b5SJason Gunthorpe 	container = f.file->private_data;
10090f3e72b5SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
10100f3e72b5SJason Gunthorpe 
10110f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
10120f3e72b5SJason Gunthorpe 
10130f3e72b5SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
10140f3e72b5SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
10150f3e72b5SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
10160f3e72b5SJason Gunthorpe 		ret = -EPERM;
10170f3e72b5SJason Gunthorpe 		goto unlock_out;
10180f3e72b5SJason Gunthorpe 	}
10190f3e72b5SJason Gunthorpe 
10200f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
10210f3e72b5SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
10220f3e72b5SJason Gunthorpe 		if (ret)
10230f3e72b5SJason Gunthorpe 			goto unlock_out;
10240f3e72b5SJason Gunthorpe 	}
10250f3e72b5SJason Gunthorpe 
10260f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
10270f3e72b5SJason Gunthorpe 	if (driver) {
10280f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
10290f3e72b5SJason Gunthorpe 						group->iommu_group,
10300f3e72b5SJason Gunthorpe 						group->type);
10310f3e72b5SJason Gunthorpe 		if (ret) {
10320f3e72b5SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
10330f3e72b5SJason Gunthorpe 				iommu_group_release_dma_owner(
10340f3e72b5SJason Gunthorpe 					group->iommu_group);
10350f3e72b5SJason Gunthorpe 			goto unlock_out;
10360f3e72b5SJason Gunthorpe 		}
10370f3e72b5SJason Gunthorpe 	}
10380f3e72b5SJason Gunthorpe 
10390f3e72b5SJason Gunthorpe 	group->container = container;
10400f3e72b5SJason Gunthorpe 	group->container_users = 1;
10410f3e72b5SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
10420f3e72b5SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
10430f3e72b5SJason Gunthorpe 
10440f3e72b5SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
10450f3e72b5SJason Gunthorpe 	vfio_container_get(container);
10460f3e72b5SJason Gunthorpe 
10470f3e72b5SJason Gunthorpe unlock_out:
10480f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
10490f3e72b5SJason Gunthorpe 	fdput(f);
10500f3e72b5SJason Gunthorpe 	return ret;
10510f3e72b5SJason Gunthorpe }
10520f3e72b5SJason Gunthorpe 
10530f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops;
10540f3e72b5SJason Gunthorpe 
10550f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
10560f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
10570f3e72b5SJason Gunthorpe {
10580f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
10590f3e72b5SJason Gunthorpe }
10600f3e72b5SJason Gunthorpe 
10610f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device)
10620f3e72b5SJason Gunthorpe {
10630f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
10640f3e72b5SJason Gunthorpe 
10650f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
10660f3e72b5SJason Gunthorpe 
10670f3e72b5SJason Gunthorpe 	if (!group->container || !group->container->iommu_driver ||
10680f3e72b5SJason Gunthorpe 	    WARN_ON(!group->container_users))
10690f3e72b5SJason Gunthorpe 		return -EINVAL;
10700f3e72b5SJason Gunthorpe 
10710f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
10720f3e72b5SJason Gunthorpe 		return -EPERM;
10730f3e72b5SJason Gunthorpe 
10740f3e72b5SJason Gunthorpe 	get_file(group->opened_file);
10750f3e72b5SJason Gunthorpe 	group->container_users++;
10760f3e72b5SJason Gunthorpe 	return 0;
10770f3e72b5SJason Gunthorpe }
10780f3e72b5SJason Gunthorpe 
10790f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device)
10800f3e72b5SJason Gunthorpe {
10810f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
10820f3e72b5SJason Gunthorpe 	WARN_ON(device->group->container_users <= 1);
10830f3e72b5SJason Gunthorpe 	device->group->container_users--;
10840f3e72b5SJason Gunthorpe 	fput(device->group->opened_file);
10850f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
10860f3e72b5SJason Gunthorpe }
10870f3e72b5SJason Gunthorpe 
10880f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device)
10890f3e72b5SJason Gunthorpe {
10900f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
10910f3e72b5SJason Gunthorpe 	struct file *filep;
10920f3e72b5SJason Gunthorpe 	int ret;
10930f3e72b5SJason Gunthorpe 
10940f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
10950f3e72b5SJason Gunthorpe 	ret = vfio_device_assign_container(device);
10960f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
10970f3e72b5SJason Gunthorpe 	if (ret)
10980f3e72b5SJason Gunthorpe 		return ERR_PTR(ret);
10990f3e72b5SJason Gunthorpe 
11000f3e72b5SJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner)) {
11010f3e72b5SJason Gunthorpe 		ret = -ENODEV;
11020f3e72b5SJason Gunthorpe 		goto err_unassign_container;
11030f3e72b5SJason Gunthorpe 	}
11040f3e72b5SJason Gunthorpe 
11050f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
11060f3e72b5SJason Gunthorpe 	device->open_count++;
11070f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
11080f3e72b5SJason Gunthorpe 		/*
11090f3e72b5SJason Gunthorpe 		 * Here we pass the KVM pointer with the group under the read
11100f3e72b5SJason Gunthorpe 		 * lock.  If the device driver will use it, it must obtain a
11110f3e72b5SJason Gunthorpe 		 * reference and release it during close_device.
11120f3e72b5SJason Gunthorpe 		 */
11130f3e72b5SJason Gunthorpe 		down_read(&device->group->group_rwsem);
11140f3e72b5SJason Gunthorpe 		device->kvm = device->group->kvm;
11150f3e72b5SJason Gunthorpe 
11160f3e72b5SJason Gunthorpe 		if (device->ops->open_device) {
11170f3e72b5SJason Gunthorpe 			ret = device->ops->open_device(device);
11180f3e72b5SJason Gunthorpe 			if (ret)
11190f3e72b5SJason Gunthorpe 				goto err_undo_count;
11200f3e72b5SJason Gunthorpe 		}
11210f3e72b5SJason Gunthorpe 
11220f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
11230f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->register_device)
11240f3e72b5SJason Gunthorpe 			iommu_driver->ops->register_device(
11250f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
11260f3e72b5SJason Gunthorpe 
11270f3e72b5SJason Gunthorpe 		up_read(&device->group->group_rwsem);
11280f3e72b5SJason Gunthorpe 	}
11290f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
11300f3e72b5SJason Gunthorpe 
11310f3e72b5SJason Gunthorpe 	/*
11320f3e72b5SJason Gunthorpe 	 * We can't use anon_inode_getfd() because we need to modify
11330f3e72b5SJason Gunthorpe 	 * the f_mode flags directly to allow more than just ioctls
11340f3e72b5SJason Gunthorpe 	 */
11350f3e72b5SJason Gunthorpe 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
11360f3e72b5SJason Gunthorpe 				   device, O_RDWR);
11370f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
11380f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
11390f3e72b5SJason Gunthorpe 		goto err_close_device;
11400f3e72b5SJason Gunthorpe 	}
11410f3e72b5SJason Gunthorpe 
11420f3e72b5SJason Gunthorpe 	/*
11430f3e72b5SJason Gunthorpe 	 * TODO: add an anon_inode interface to do this.
11440f3e72b5SJason Gunthorpe 	 * Appears to be missing by lack of need rather than
11450f3e72b5SJason Gunthorpe 	 * explicitly prevented.  Now there's need.
11460f3e72b5SJason Gunthorpe 	 */
11470f3e72b5SJason Gunthorpe 	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
11480f3e72b5SJason Gunthorpe 
11490f3e72b5SJason Gunthorpe 	if (device->group->type == VFIO_NO_IOMMU)
11500f3e72b5SJason Gunthorpe 		dev_warn(device->dev, "vfio-noiommu device opened by user "
11510f3e72b5SJason Gunthorpe 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
11520f3e72b5SJason Gunthorpe 	/*
11530f3e72b5SJason Gunthorpe 	 * On success the ref of device is moved to the file and
11540f3e72b5SJason Gunthorpe 	 * put in vfio_device_fops_release()
11550f3e72b5SJason Gunthorpe 	 */
11560f3e72b5SJason Gunthorpe 	return filep;
11570f3e72b5SJason Gunthorpe 
11580f3e72b5SJason Gunthorpe err_close_device:
11590f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
11600f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
11610f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device) {
11620f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
11630f3e72b5SJason Gunthorpe 
11640f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
11650f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->unregister_device)
11660f3e72b5SJason Gunthorpe 			iommu_driver->ops->unregister_device(
11670f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
11680f3e72b5SJason Gunthorpe 	}
11690f3e72b5SJason Gunthorpe err_undo_count:
11700f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
11710f3e72b5SJason Gunthorpe 	device->open_count--;
11720f3e72b5SJason Gunthorpe 	if (device->open_count == 0 && device->kvm)
11730f3e72b5SJason Gunthorpe 		device->kvm = NULL;
11740f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
11750f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
11760f3e72b5SJason Gunthorpe err_unassign_container:
11770f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
11780f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
11790f3e72b5SJason Gunthorpe }
11800f3e72b5SJason Gunthorpe 
1181*150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
1182*150ee2f9SJason Gunthorpe 					  char __user *arg)
11830f3e72b5SJason Gunthorpe {
11840f3e72b5SJason Gunthorpe 	struct vfio_device *device;
11850f3e72b5SJason Gunthorpe 	struct file *filep;
1186*150ee2f9SJason Gunthorpe 	char *buf;
11870f3e72b5SJason Gunthorpe 	int fdno;
11880f3e72b5SJason Gunthorpe 	int ret;
11890f3e72b5SJason Gunthorpe 
1190*150ee2f9SJason Gunthorpe 	buf = strndup_user(arg, PAGE_SIZE);
1191*150ee2f9SJason Gunthorpe 	if (IS_ERR(buf))
1192*150ee2f9SJason Gunthorpe 		return PTR_ERR(buf);
1193*150ee2f9SJason Gunthorpe 
11940f3e72b5SJason Gunthorpe 	device = vfio_device_get_from_name(group, buf);
1195*150ee2f9SJason Gunthorpe 	kfree(buf);
11960f3e72b5SJason Gunthorpe 	if (IS_ERR(device))
11970f3e72b5SJason Gunthorpe 		return PTR_ERR(device);
11980f3e72b5SJason Gunthorpe 
11990f3e72b5SJason Gunthorpe 	fdno = get_unused_fd_flags(O_CLOEXEC);
12000f3e72b5SJason Gunthorpe 	if (fdno < 0) {
12010f3e72b5SJason Gunthorpe 		ret = fdno;
12020f3e72b5SJason Gunthorpe 		goto err_put_device;
12030f3e72b5SJason Gunthorpe 	}
12040f3e72b5SJason Gunthorpe 
12050f3e72b5SJason Gunthorpe 	filep = vfio_device_open(device);
12060f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
12070f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
12080f3e72b5SJason Gunthorpe 		goto err_put_fdno;
12090f3e72b5SJason Gunthorpe 	}
12100f3e72b5SJason Gunthorpe 
12110f3e72b5SJason Gunthorpe 	fd_install(fdno, filep);
12120f3e72b5SJason Gunthorpe 	return fdno;
12130f3e72b5SJason Gunthorpe 
12140f3e72b5SJason Gunthorpe err_put_fdno:
12150f3e72b5SJason Gunthorpe 	put_unused_fd(fdno);
12160f3e72b5SJason Gunthorpe err_put_device:
12170f3e72b5SJason Gunthorpe 	vfio_device_put(device);
12180f3e72b5SJason Gunthorpe 	return ret;
12190f3e72b5SJason Gunthorpe }
12200f3e72b5SJason Gunthorpe 
12210f3e72b5SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep,
12220f3e72b5SJason Gunthorpe 				      unsigned int cmd, unsigned long arg)
12230f3e72b5SJason Gunthorpe {
12240f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
1225*150ee2f9SJason Gunthorpe 	void __user *uarg = (void __user *)arg;
12260f3e72b5SJason Gunthorpe 	long ret = -ENOTTY;
12270f3e72b5SJason Gunthorpe 
12280f3e72b5SJason Gunthorpe 	switch (cmd) {
1229*150ee2f9SJason Gunthorpe 	case VFIO_GROUP_GET_DEVICE_FD:
1230*150ee2f9SJason Gunthorpe 		return vfio_group_ioctl_get_device_fd(group, uarg);
12310f3e72b5SJason Gunthorpe 	case VFIO_GROUP_GET_STATUS:
12320f3e72b5SJason Gunthorpe 	{
12330f3e72b5SJason Gunthorpe 		struct vfio_group_status status;
12340f3e72b5SJason Gunthorpe 		unsigned long minsz;
12350f3e72b5SJason Gunthorpe 
12360f3e72b5SJason Gunthorpe 		minsz = offsetofend(struct vfio_group_status, flags);
12370f3e72b5SJason Gunthorpe 
12380f3e72b5SJason Gunthorpe 		if (copy_from_user(&status, (void __user *)arg, minsz))
12390f3e72b5SJason Gunthorpe 			return -EFAULT;
12400f3e72b5SJason Gunthorpe 
12410f3e72b5SJason Gunthorpe 		if (status.argsz < minsz)
12420f3e72b5SJason Gunthorpe 			return -EINVAL;
12430f3e72b5SJason Gunthorpe 
12440f3e72b5SJason Gunthorpe 		status.flags = 0;
12450f3e72b5SJason Gunthorpe 
12460f3e72b5SJason Gunthorpe 		down_read(&group->group_rwsem);
12470f3e72b5SJason Gunthorpe 		if (group->container)
12480f3e72b5SJason Gunthorpe 			status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
12490f3e72b5SJason Gunthorpe 					VFIO_GROUP_FLAGS_VIABLE;
12500f3e72b5SJason Gunthorpe 		else if (!iommu_group_dma_owner_claimed(group->iommu_group))
12510f3e72b5SJason Gunthorpe 			status.flags |= VFIO_GROUP_FLAGS_VIABLE;
12520f3e72b5SJason Gunthorpe 		up_read(&group->group_rwsem);
12530f3e72b5SJason Gunthorpe 
12540f3e72b5SJason Gunthorpe 		if (copy_to_user((void __user *)arg, &status, minsz))
12550f3e72b5SJason Gunthorpe 			return -EFAULT;
12560f3e72b5SJason Gunthorpe 
12570f3e72b5SJason Gunthorpe 		ret = 0;
12580f3e72b5SJason Gunthorpe 		break;
12590f3e72b5SJason Gunthorpe 	}
12600f3e72b5SJason Gunthorpe 	case VFIO_GROUP_SET_CONTAINER:
12610f3e72b5SJason Gunthorpe 	{
12620f3e72b5SJason Gunthorpe 		int fd;
12630f3e72b5SJason Gunthorpe 
12640f3e72b5SJason Gunthorpe 		if (get_user(fd, (int __user *)arg))
12650f3e72b5SJason Gunthorpe 			return -EFAULT;
12660f3e72b5SJason Gunthorpe 
12670f3e72b5SJason Gunthorpe 		if (fd < 0)
12680f3e72b5SJason Gunthorpe 			return -EINVAL;
12690f3e72b5SJason Gunthorpe 
12700f3e72b5SJason Gunthorpe 		down_write(&group->group_rwsem);
12710f3e72b5SJason Gunthorpe 		ret = vfio_group_set_container(group, fd);
12720f3e72b5SJason Gunthorpe 		up_write(&group->group_rwsem);
12730f3e72b5SJason Gunthorpe 		break;
12740f3e72b5SJason Gunthorpe 	}
12750f3e72b5SJason Gunthorpe 	case VFIO_GROUP_UNSET_CONTAINER:
12760f3e72b5SJason Gunthorpe 		down_write(&group->group_rwsem);
12770f3e72b5SJason Gunthorpe 		ret = vfio_group_unset_container(group);
12780f3e72b5SJason Gunthorpe 		up_write(&group->group_rwsem);
12790f3e72b5SJason Gunthorpe 		break;
12800f3e72b5SJason Gunthorpe 	}
12810f3e72b5SJason Gunthorpe 
12820f3e72b5SJason Gunthorpe 	return ret;
12830f3e72b5SJason Gunthorpe }
12840f3e72b5SJason Gunthorpe 
12850f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep)
12860f3e72b5SJason Gunthorpe {
12870f3e72b5SJason Gunthorpe 	struct vfio_group *group =
12880f3e72b5SJason Gunthorpe 		container_of(inode->i_cdev, struct vfio_group, cdev);
12890f3e72b5SJason Gunthorpe 	int ret;
12900f3e72b5SJason Gunthorpe 
12910f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
12920f3e72b5SJason Gunthorpe 
12930f3e72b5SJason Gunthorpe 	/* users can be zero if this races with vfio_group_put() */
12940f3e72b5SJason Gunthorpe 	if (!refcount_inc_not_zero(&group->users)) {
12950f3e72b5SJason Gunthorpe 		ret = -ENODEV;
12960f3e72b5SJason Gunthorpe 		goto err_unlock;
12970f3e72b5SJason Gunthorpe 	}
12980f3e72b5SJason Gunthorpe 
12990f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
13000f3e72b5SJason Gunthorpe 		ret = -EPERM;
13010f3e72b5SJason Gunthorpe 		goto err_put;
13020f3e72b5SJason Gunthorpe 	}
13030f3e72b5SJason Gunthorpe 
13040f3e72b5SJason Gunthorpe 	/*
13050f3e72b5SJason Gunthorpe 	 * Do we need multiple instances of the group open?  Seems not.
13060f3e72b5SJason Gunthorpe 	 */
13070f3e72b5SJason Gunthorpe 	if (group->opened_file) {
13080f3e72b5SJason Gunthorpe 		ret = -EBUSY;
13090f3e72b5SJason Gunthorpe 		goto err_put;
13100f3e72b5SJason Gunthorpe 	}
13110f3e72b5SJason Gunthorpe 	group->opened_file = filep;
13120f3e72b5SJason Gunthorpe 	filep->private_data = group;
13130f3e72b5SJason Gunthorpe 
13140f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13150f3e72b5SJason Gunthorpe 	return 0;
13160f3e72b5SJason Gunthorpe err_put:
13170f3e72b5SJason Gunthorpe 	vfio_group_put(group);
13180f3e72b5SJason Gunthorpe err_unlock:
13190f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13200f3e72b5SJason Gunthorpe 	return ret;
13210f3e72b5SJason Gunthorpe }
13220f3e72b5SJason Gunthorpe 
13230f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep)
13240f3e72b5SJason Gunthorpe {
13250f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
13260f3e72b5SJason Gunthorpe 
13270f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
13280f3e72b5SJason Gunthorpe 
13290f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
13300f3e72b5SJason Gunthorpe 	/*
13310f3e72b5SJason Gunthorpe 	 * Device FDs hold a group file reference, therefore the group release
13320f3e72b5SJason Gunthorpe 	 * is only called when there are no open devices.
13330f3e72b5SJason Gunthorpe 	 */
13340f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
13350f3e72b5SJason Gunthorpe 	if (group->container) {
13360f3e72b5SJason Gunthorpe 		WARN_ON(group->container_users != 1);
13370f3e72b5SJason Gunthorpe 		__vfio_group_unset_container(group);
13380f3e72b5SJason Gunthorpe 	}
13390f3e72b5SJason Gunthorpe 	group->opened_file = NULL;
13400f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13410f3e72b5SJason Gunthorpe 
13420f3e72b5SJason Gunthorpe 	vfio_group_put(group);
13430f3e72b5SJason Gunthorpe 
13440f3e72b5SJason Gunthorpe 	return 0;
13450f3e72b5SJason Gunthorpe }
13460f3e72b5SJason Gunthorpe 
13470f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = {
13480f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
13490f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
13500f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
13510f3e72b5SJason Gunthorpe 	.open		= vfio_group_fops_open,
13520f3e72b5SJason Gunthorpe 	.release	= vfio_group_fops_release,
13530f3e72b5SJason Gunthorpe };
13540f3e72b5SJason Gunthorpe 
13550f3e72b5SJason Gunthorpe /*
13560f3e72b5SJason Gunthorpe  * VFIO Device fd
13570f3e72b5SJason Gunthorpe  */
13580f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
13590f3e72b5SJason Gunthorpe {
13600f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
13610f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
13620f3e72b5SJason Gunthorpe 
13630f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
13640f3e72b5SJason Gunthorpe 	vfio_assert_device_open(device);
13650f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
13660f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device)
13670f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
13680f3e72b5SJason Gunthorpe 
13690f3e72b5SJason Gunthorpe 	iommu_driver = device->group->container->iommu_driver;
13700f3e72b5SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
13710f3e72b5SJason Gunthorpe 		iommu_driver->ops->unregister_device(
13720f3e72b5SJason Gunthorpe 			device->group->container->iommu_data, device);
13730f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
13740f3e72b5SJason Gunthorpe 	device->open_count--;
13750f3e72b5SJason Gunthorpe 	if (device->open_count == 0)
13760f3e72b5SJason Gunthorpe 		device->kvm = NULL;
13770f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
13780f3e72b5SJason Gunthorpe 
13790f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
13800f3e72b5SJason Gunthorpe 
13810f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
13820f3e72b5SJason Gunthorpe 
13830f3e72b5SJason Gunthorpe 	vfio_device_put(device);
13840f3e72b5SJason Gunthorpe 
13850f3e72b5SJason Gunthorpe 	return 0;
13860f3e72b5SJason Gunthorpe }
13870f3e72b5SJason Gunthorpe 
13880f3e72b5SJason Gunthorpe /*
13890f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
13900f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
13910f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
13920f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
13930f3e72b5SJason Gunthorpe  *
13940f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
13950f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
13960f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
13970f3e72b5SJason Gunthorpe  *
13980f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
13990f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
14000f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
14010f3e72b5SJason Gunthorpe  *
14020f3e72b5SJason Gunthorpe  */
14030f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
14040f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
14050f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
14060f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
14070f3e72b5SJason Gunthorpe {
14080f3e72b5SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
14090f3e72b5SJason Gunthorpe 	/*
14100f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
14110f3e72b5SJason Gunthorpe 	 * following FSM arcs:
14120f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
14130f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
14140f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
14150f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
14160f3e72b5SJason Gunthorpe 	 *
14170f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
14180f3e72b5SJason Gunthorpe 	 * arcs:
14190f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
14200f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
14210f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
14220f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
14230f3e72b5SJason Gunthorpe 	 * Without P2P the driver must implement:
14240f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
14250f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
14260f3e72b5SJason Gunthorpe 	 *
14270f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
14280f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
14290f3e72b5SJason Gunthorpe 	 * following ones:
14300f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
14310f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
14320f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
14330f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
14340f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
14350f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
14360f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
14370f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
14380f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
14390f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
14400f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
14410f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
14420f3e72b5SJason Gunthorpe 	 */
14430f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
14440f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
14450f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14460f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
14470f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
14480f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
14490f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14500f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14510f3e72b5SJason Gunthorpe 		},
14520f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
14530f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
14540f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
14550f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
14560f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
14570f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14580f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14590f3e72b5SJason Gunthorpe 		},
14600f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
14610f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14620f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
14630f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
14640f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
14650f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
14660f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14670f3e72b5SJason Gunthorpe 		},
14680f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
14690f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14700f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
14710f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
14720f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
14730f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
14740f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14750f3e72b5SJason Gunthorpe 		},
14760f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
14770f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14780f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
14790f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
14800f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
14810f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14820f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14830f3e72b5SJason Gunthorpe 		},
14840f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
14850f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
14860f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
14870f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
14880f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
14890f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
14900f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14910f3e72b5SJason Gunthorpe 		},
14920f3e72b5SJason Gunthorpe 	};
14930f3e72b5SJason Gunthorpe 
14940f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
14950f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
14960f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
14970f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
14980f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
14990f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
15000f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
15010f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
15020f3e72b5SJason Gunthorpe 	};
15030f3e72b5SJason Gunthorpe 
15040f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
15050f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
15060f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
15070f3e72b5SJason Gunthorpe 		return -EINVAL;
15080f3e72b5SJason Gunthorpe 
15090f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
15100f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
15110f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
15120f3e72b5SJason Gunthorpe 		return -EINVAL;
15130f3e72b5SJason Gunthorpe 
15140f3e72b5SJason Gunthorpe 	/*
15150f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
15160f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
15170f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
15180f3e72b5SJason Gunthorpe 	 */
15190f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
15200f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
15210f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
15220f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
15230f3e72b5SJason Gunthorpe 
15240f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
15250f3e72b5SJason Gunthorpe }
15260f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
15270f3e72b5SJason Gunthorpe 
15280f3e72b5SJason Gunthorpe /*
15290f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
15300f3e72b5SJason Gunthorpe  */
15310f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
15320f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
15330f3e72b5SJason Gunthorpe {
15340f3e72b5SJason Gunthorpe 	int ret;
15350f3e72b5SJason Gunthorpe 	int fd;
15360f3e72b5SJason Gunthorpe 
15370f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
15380f3e72b5SJason Gunthorpe 	if (fd < 0) {
15390f3e72b5SJason Gunthorpe 		ret = fd;
15400f3e72b5SJason Gunthorpe 		goto out_fput;
15410f3e72b5SJason Gunthorpe 	}
15420f3e72b5SJason Gunthorpe 
15430f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
15440f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
15450f3e72b5SJason Gunthorpe 		ret = -EFAULT;
15460f3e72b5SJason Gunthorpe 		goto out_put_unused;
15470f3e72b5SJason Gunthorpe 	}
15480f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
15490f3e72b5SJason Gunthorpe 	return 0;
15500f3e72b5SJason Gunthorpe 
15510f3e72b5SJason Gunthorpe out_put_unused:
15520f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
15530f3e72b5SJason Gunthorpe out_fput:
15540f3e72b5SJason Gunthorpe 	fput(filp);
15550f3e72b5SJason Gunthorpe 	return ret;
15560f3e72b5SJason Gunthorpe }
15570f3e72b5SJason Gunthorpe 
15580f3e72b5SJason Gunthorpe static int
15590f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
15600f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
15610f3e72b5SJason Gunthorpe 					   size_t argsz)
15620f3e72b5SJason Gunthorpe {
15630f3e72b5SJason Gunthorpe 	size_t minsz =
15640f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
15650f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
15660f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
15670f3e72b5SJason Gunthorpe 	int ret;
15680f3e72b5SJason Gunthorpe 
15690f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
15700f3e72b5SJason Gunthorpe 		return -ENOTTY;
15710f3e72b5SJason Gunthorpe 
15720f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
15730f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
15740f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
15750f3e72b5SJason Gunthorpe 				 sizeof(mig));
15760f3e72b5SJason Gunthorpe 	if (ret != 1)
15770f3e72b5SJason Gunthorpe 		return ret;
15780f3e72b5SJason Gunthorpe 
15790f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
15800f3e72b5SJason Gunthorpe 		return -EFAULT;
15810f3e72b5SJason Gunthorpe 
15820f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
15830f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
15840f3e72b5SJason Gunthorpe 
15850f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
15860f3e72b5SJason Gunthorpe 							   &curr_state);
15870f3e72b5SJason Gunthorpe 		if (ret)
15880f3e72b5SJason Gunthorpe 			return ret;
15890f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
15900f3e72b5SJason Gunthorpe 		goto out_copy;
15910f3e72b5SJason Gunthorpe 	}
15920f3e72b5SJason Gunthorpe 
15930f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
15940f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
15950f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
15960f3e72b5SJason Gunthorpe 		goto out_copy;
15970f3e72b5SJason Gunthorpe 
15980f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
15990f3e72b5SJason Gunthorpe out_copy:
16000f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
16010f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
16020f3e72b5SJason Gunthorpe 		return -EFAULT;
16030f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
16040f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
16050f3e72b5SJason Gunthorpe 	return 0;
16060f3e72b5SJason Gunthorpe }
16070f3e72b5SJason Gunthorpe 
16080f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
16090f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
16100f3e72b5SJason Gunthorpe 					       size_t argsz)
16110f3e72b5SJason Gunthorpe {
16120f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
16130f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
16140f3e72b5SJason Gunthorpe 	};
16150f3e72b5SJason Gunthorpe 	int ret;
16160f3e72b5SJason Gunthorpe 
16170f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
16180f3e72b5SJason Gunthorpe 		return -ENOTTY;
16190f3e72b5SJason Gunthorpe 
16200f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
16210f3e72b5SJason Gunthorpe 				 sizeof(mig));
16220f3e72b5SJason Gunthorpe 	if (ret != 1)
16230f3e72b5SJason Gunthorpe 		return ret;
16240f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
16250f3e72b5SJason Gunthorpe 		return -EFAULT;
16260f3e72b5SJason Gunthorpe 	return 0;
16270f3e72b5SJason Gunthorpe }
16280f3e72b5SJason Gunthorpe 
16290f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
16300f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
16310f3e72b5SJason Gunthorpe {
16320f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
16330f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
16340f3e72b5SJason Gunthorpe 
16350f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
16360f3e72b5SJason Gunthorpe 		return -EFAULT;
16370f3e72b5SJason Gunthorpe 
16380f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
16390f3e72b5SJason Gunthorpe 		return -EINVAL;
16400f3e72b5SJason Gunthorpe 
16410f3e72b5SJason Gunthorpe 	/* Check unknown flags */
16420f3e72b5SJason Gunthorpe 	if (feature.flags &
16430f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
16440f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
16450f3e72b5SJason Gunthorpe 		return -EINVAL;
16460f3e72b5SJason Gunthorpe 
16470f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
16480f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
16490f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
16500f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
16510f3e72b5SJason Gunthorpe 		return -EINVAL;
16520f3e72b5SJason Gunthorpe 
16530f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
16540f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
16550f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
16560f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
16570f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
16580f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
16590f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
16600f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
16610f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
16620f3e72b5SJason Gunthorpe 	default:
16630f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
16640f3e72b5SJason Gunthorpe 			return -EINVAL;
16650f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
16660f3e72b5SJason Gunthorpe 						   arg->data,
16670f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
16680f3e72b5SJason Gunthorpe 	}
16690f3e72b5SJason Gunthorpe }
16700f3e72b5SJason Gunthorpe 
16710f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
16720f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
16730f3e72b5SJason Gunthorpe {
16740f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
16750f3e72b5SJason Gunthorpe 
16760f3e72b5SJason Gunthorpe 	switch (cmd) {
16770f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
16780f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature(device, (void __user *)arg);
16790f3e72b5SJason Gunthorpe 	default:
16800f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
16810f3e72b5SJason Gunthorpe 			return -EINVAL;
16820f3e72b5SJason Gunthorpe 		return device->ops->ioctl(device, cmd, arg);
16830f3e72b5SJason Gunthorpe 	}
16840f3e72b5SJason Gunthorpe }
16850f3e72b5SJason Gunthorpe 
16860f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
16870f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
16880f3e72b5SJason Gunthorpe {
16890f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
16900f3e72b5SJason Gunthorpe 
16910f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
16920f3e72b5SJason Gunthorpe 		return -EINVAL;
16930f3e72b5SJason Gunthorpe 
16940f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
16950f3e72b5SJason Gunthorpe }
16960f3e72b5SJason Gunthorpe 
16970f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
16980f3e72b5SJason Gunthorpe 				      const char __user *buf,
16990f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
17000f3e72b5SJason Gunthorpe {
17010f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
17020f3e72b5SJason Gunthorpe 
17030f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
17040f3e72b5SJason Gunthorpe 		return -EINVAL;
17050f3e72b5SJason Gunthorpe 
17060f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
17070f3e72b5SJason Gunthorpe }
17080f3e72b5SJason Gunthorpe 
17090f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
17100f3e72b5SJason Gunthorpe {
17110f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
17120f3e72b5SJason Gunthorpe 
17130f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
17140f3e72b5SJason Gunthorpe 		return -EINVAL;
17150f3e72b5SJason Gunthorpe 
17160f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
17170f3e72b5SJason Gunthorpe }
17180f3e72b5SJason Gunthorpe 
17190f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = {
17200f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
17210f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
17220f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
17230f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
17240f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
17250f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
17260f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
17270f3e72b5SJason Gunthorpe };
17280f3e72b5SJason Gunthorpe 
17290f3e72b5SJason Gunthorpe /**
17300f3e72b5SJason Gunthorpe  * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
17310f3e72b5SJason Gunthorpe  * @file: VFIO group file
17320f3e72b5SJason Gunthorpe  *
17330f3e72b5SJason Gunthorpe  * The returned iommu_group is valid as long as a ref is held on the file.
17340f3e72b5SJason Gunthorpe  */
17350f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file)
17360f3e72b5SJason Gunthorpe {
17370f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17380f3e72b5SJason Gunthorpe 
17390f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17400f3e72b5SJason Gunthorpe 		return NULL;
17410f3e72b5SJason Gunthorpe 	return group->iommu_group;
17420f3e72b5SJason Gunthorpe }
17430f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
17440f3e72b5SJason Gunthorpe 
17450f3e72b5SJason Gunthorpe /**
17460f3e72b5SJason Gunthorpe  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
17470f3e72b5SJason Gunthorpe  *        is always CPU cache coherent
17480f3e72b5SJason Gunthorpe  * @file: VFIO group file
17490f3e72b5SJason Gunthorpe  *
17500f3e72b5SJason Gunthorpe  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
17510f3e72b5SJason Gunthorpe  * bit in DMA transactions. A return of false indicates that the user has
17520f3e72b5SJason Gunthorpe  * rights to access additional instructions such as wbinvd on x86.
17530f3e72b5SJason Gunthorpe  */
17540f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file)
17550f3e72b5SJason Gunthorpe {
17560f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17570f3e72b5SJason Gunthorpe 	bool ret;
17580f3e72b5SJason Gunthorpe 
17590f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17600f3e72b5SJason Gunthorpe 		return true;
17610f3e72b5SJason Gunthorpe 
17620f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
17630f3e72b5SJason Gunthorpe 	if (group->container) {
17640f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(group->container,
17650f3e72b5SJason Gunthorpe 						 VFIO_DMA_CC_IOMMU);
17660f3e72b5SJason Gunthorpe 	} else {
17670f3e72b5SJason Gunthorpe 		/*
17680f3e72b5SJason Gunthorpe 		 * Since the coherency state is determined only once a container
17690f3e72b5SJason Gunthorpe 		 * is attached the user must do so before they can prove they
17700f3e72b5SJason Gunthorpe 		 * have permission.
17710f3e72b5SJason Gunthorpe 		 */
17720f3e72b5SJason Gunthorpe 		ret = true;
17730f3e72b5SJason Gunthorpe 	}
17740f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
17750f3e72b5SJason Gunthorpe 	return ret;
17760f3e72b5SJason Gunthorpe }
17770f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
17780f3e72b5SJason Gunthorpe 
17790f3e72b5SJason Gunthorpe /**
17800f3e72b5SJason Gunthorpe  * vfio_file_set_kvm - Link a kvm with VFIO drivers
17810f3e72b5SJason Gunthorpe  * @file: VFIO group file
17820f3e72b5SJason Gunthorpe  * @kvm: KVM to link
17830f3e72b5SJason Gunthorpe  *
17840f3e72b5SJason Gunthorpe  * When a VFIO device is first opened the KVM will be available in
17850f3e72b5SJason Gunthorpe  * device->kvm if one was associated with the group.
17860f3e72b5SJason Gunthorpe  */
17870f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
17880f3e72b5SJason Gunthorpe {
17890f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17900f3e72b5SJason Gunthorpe 
17910f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17920f3e72b5SJason Gunthorpe 		return;
17930f3e72b5SJason Gunthorpe 
17940f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
17950f3e72b5SJason Gunthorpe 	group->kvm = kvm;
17960f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
17970f3e72b5SJason Gunthorpe }
17980f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
17990f3e72b5SJason Gunthorpe 
18000f3e72b5SJason Gunthorpe /**
18010f3e72b5SJason Gunthorpe  * vfio_file_has_dev - True if the VFIO file is a handle for device
18020f3e72b5SJason Gunthorpe  * @file: VFIO file to check
18030f3e72b5SJason Gunthorpe  * @device: Device that must be part of the file
18040f3e72b5SJason Gunthorpe  *
18050f3e72b5SJason Gunthorpe  * Returns true if given file has permission to manipulate the given device.
18060f3e72b5SJason Gunthorpe  */
18070f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
18080f3e72b5SJason Gunthorpe {
18090f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
18100f3e72b5SJason Gunthorpe 
18110f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
18120f3e72b5SJason Gunthorpe 		return false;
18130f3e72b5SJason Gunthorpe 
18140f3e72b5SJason Gunthorpe 	return group == device->group;
18150f3e72b5SJason Gunthorpe }
18160f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev);
18170f3e72b5SJason Gunthorpe 
18180f3e72b5SJason Gunthorpe /*
18190f3e72b5SJason Gunthorpe  * Sub-module support
18200f3e72b5SJason Gunthorpe  */
18210f3e72b5SJason Gunthorpe /*
18220f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
18230f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
18240f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
18250f3e72b5SJason Gunthorpe  *
18260f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
18270f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
18280f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
18290f3e72b5SJason Gunthorpe  */
18300f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
18310f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
18320f3e72b5SJason Gunthorpe {
18330f3e72b5SJason Gunthorpe 	void *buf;
18340f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
18350f3e72b5SJason Gunthorpe 
18360f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
18370f3e72b5SJason Gunthorpe 	if (!buf) {
18380f3e72b5SJason Gunthorpe 		kfree(caps->buf);
18390f3e72b5SJason Gunthorpe 		caps->buf = NULL;
18400f3e72b5SJason Gunthorpe 		caps->size = 0;
18410f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
18420f3e72b5SJason Gunthorpe 	}
18430f3e72b5SJason Gunthorpe 
18440f3e72b5SJason Gunthorpe 	caps->buf = buf;
18450f3e72b5SJason Gunthorpe 	header = buf + caps->size;
18460f3e72b5SJason Gunthorpe 
18470f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
18480f3e72b5SJason Gunthorpe 	memset(header, 0, size);
18490f3e72b5SJason Gunthorpe 
18500f3e72b5SJason Gunthorpe 	header->id = id;
18510f3e72b5SJason Gunthorpe 	header->version = version;
18520f3e72b5SJason Gunthorpe 
18530f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
18540f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
18550f3e72b5SJason Gunthorpe 		; /* nothing */
18560f3e72b5SJason Gunthorpe 
18570f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
18580f3e72b5SJason Gunthorpe 	caps->size += size;
18590f3e72b5SJason Gunthorpe 
18600f3e72b5SJason Gunthorpe 	return header;
18610f3e72b5SJason Gunthorpe }
18620f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
18630f3e72b5SJason Gunthorpe 
18640f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
18650f3e72b5SJason Gunthorpe {
18660f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
18670f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
18680f3e72b5SJason Gunthorpe 
18690f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
18700f3e72b5SJason Gunthorpe 		tmp->next += offset;
18710f3e72b5SJason Gunthorpe }
18720f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
18730f3e72b5SJason Gunthorpe 
18740f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
18750f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
18760f3e72b5SJason Gunthorpe {
18770f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
18780f3e72b5SJason Gunthorpe 
18790f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
18800f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
18810f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
18820f3e72b5SJason Gunthorpe 
18830f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
18840f3e72b5SJason Gunthorpe 
18850f3e72b5SJason Gunthorpe 	return 0;
18860f3e72b5SJason Gunthorpe }
18870f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
18880f3e72b5SJason Gunthorpe 
18890f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
18900f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
18910f3e72b5SJason Gunthorpe {
18920f3e72b5SJason Gunthorpe 	unsigned long minsz;
18930f3e72b5SJason Gunthorpe 	size_t size;
18940f3e72b5SJason Gunthorpe 
18950f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
18960f3e72b5SJason Gunthorpe 
18970f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
18980f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
18990f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
19000f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
19010f3e72b5SJason Gunthorpe 		return -EINVAL;
19020f3e72b5SJason Gunthorpe 
19030f3e72b5SJason Gunthorpe 	if (data_size)
19040f3e72b5SJason Gunthorpe 		*data_size = 0;
19050f3e72b5SJason Gunthorpe 
19060f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
19070f3e72b5SJason Gunthorpe 		return -EINVAL;
19080f3e72b5SJason Gunthorpe 
19090f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
19100f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
19110f3e72b5SJason Gunthorpe 		size = 0;
19120f3e72b5SJason Gunthorpe 		break;
19130f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
19140f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
19150f3e72b5SJason Gunthorpe 		break;
19160f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
19170f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
19180f3e72b5SJason Gunthorpe 		break;
19190f3e72b5SJason Gunthorpe 	default:
19200f3e72b5SJason Gunthorpe 		return -EINVAL;
19210f3e72b5SJason Gunthorpe 	}
19220f3e72b5SJason Gunthorpe 
19230f3e72b5SJason Gunthorpe 	if (size) {
19240f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
19250f3e72b5SJason Gunthorpe 			return -EINVAL;
19260f3e72b5SJason Gunthorpe 
19270f3e72b5SJason Gunthorpe 		if (!data_size)
19280f3e72b5SJason Gunthorpe 			return -EINVAL;
19290f3e72b5SJason Gunthorpe 
19300f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
19310f3e72b5SJason Gunthorpe 	}
19320f3e72b5SJason Gunthorpe 
19330f3e72b5SJason Gunthorpe 	return 0;
19340f3e72b5SJason Gunthorpe }
19350f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
19360f3e72b5SJason Gunthorpe 
19370f3e72b5SJason Gunthorpe /*
19380f3e72b5SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
19390f3e72b5SJason Gunthorpe  * domain only.
19400f3e72b5SJason Gunthorpe  * @device [in]  : device
19410f3e72b5SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
19420f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
19430f3e72b5SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
19440f3e72b5SJason Gunthorpe  * @prot [in]    : protection flags
19450f3e72b5SJason Gunthorpe  * @pages[out]   : array of host pages
19460f3e72b5SJason Gunthorpe  * Return error or number of pages pinned.
19470f3e72b5SJason Gunthorpe  */
19480f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
19490f3e72b5SJason Gunthorpe 		   int npage, int prot, struct page **pages)
19500f3e72b5SJason Gunthorpe {
19510f3e72b5SJason Gunthorpe 	struct vfio_container *container;
19520f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
19530f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
19540f3e72b5SJason Gunthorpe 	int ret;
19550f3e72b5SJason Gunthorpe 
19560f3e72b5SJason Gunthorpe 	if (!pages || !npage || !vfio_assert_device_open(device))
19570f3e72b5SJason Gunthorpe 		return -EINVAL;
19580f3e72b5SJason Gunthorpe 
19590f3e72b5SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
19600f3e72b5SJason Gunthorpe 		return -E2BIG;
19610f3e72b5SJason Gunthorpe 
19620f3e72b5SJason Gunthorpe 	if (group->dev_counter > 1)
19630f3e72b5SJason Gunthorpe 		return -EINVAL;
19640f3e72b5SJason Gunthorpe 
19650f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
19660f3e72b5SJason Gunthorpe 	container = group->container;
19670f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
19680f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->pin_pages))
19690f3e72b5SJason Gunthorpe 		ret = driver->ops->pin_pages(container->iommu_data,
19700f3e72b5SJason Gunthorpe 					     group->iommu_group, iova,
19710f3e72b5SJason Gunthorpe 					     npage, prot, pages);
19720f3e72b5SJason Gunthorpe 	else
19730f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
19740f3e72b5SJason Gunthorpe 
19750f3e72b5SJason Gunthorpe 	return ret;
19760f3e72b5SJason Gunthorpe }
19770f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
19780f3e72b5SJason Gunthorpe 
19790f3e72b5SJason Gunthorpe /*
19800f3e72b5SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
19810f3e72b5SJason Gunthorpe  * @device [in]  : device
19820f3e72b5SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
19830f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
19840f3e72b5SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
19850f3e72b5SJason Gunthorpe  */
19860f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
19870f3e72b5SJason Gunthorpe {
19880f3e72b5SJason Gunthorpe 	struct vfio_container *container;
19890f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
19900f3e72b5SJason Gunthorpe 
19910f3e72b5SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
19920f3e72b5SJason Gunthorpe 		return;
19930f3e72b5SJason Gunthorpe 
19940f3e72b5SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
19950f3e72b5SJason Gunthorpe 		return;
19960f3e72b5SJason Gunthorpe 
19970f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
19980f3e72b5SJason Gunthorpe 	container = device->group->container;
19990f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
20000f3e72b5SJason Gunthorpe 
20010f3e72b5SJason Gunthorpe 	driver->ops->unpin_pages(container->iommu_data, iova, npage);
20020f3e72b5SJason Gunthorpe }
20030f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
20040f3e72b5SJason Gunthorpe 
20050f3e72b5SJason Gunthorpe /*
20060f3e72b5SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
20070f3e72b5SJason Gunthorpe  * behalf of the device.
20080f3e72b5SJason Gunthorpe  *
20090f3e72b5SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
20100f3e72b5SJason Gunthorpe  * into/from a kernel buffer.
20110f3e72b5SJason Gunthorpe  *
20120f3e72b5SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
20130f3e72b5SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
20140f3e72b5SJason Gunthorpe  *
20150f3e72b5SJason Gunthorpe  * @device [in]		: VFIO device
20160f3e72b5SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
20170f3e72b5SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
20180f3e72b5SJason Gunthorpe  * @len [in]		: kernel buffer length
20190f3e72b5SJason Gunthorpe  * @write		: indicate read or write
20200f3e72b5SJason Gunthorpe  * Return error code on failure or 0 on success.
20210f3e72b5SJason Gunthorpe  */
20220f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
20230f3e72b5SJason Gunthorpe 		size_t len, bool write)
20240f3e72b5SJason Gunthorpe {
20250f3e72b5SJason Gunthorpe 	struct vfio_container *container;
20260f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
20270f3e72b5SJason Gunthorpe 	int ret = 0;
20280f3e72b5SJason Gunthorpe 
20290f3e72b5SJason Gunthorpe 	if (!data || len <= 0 || !vfio_assert_device_open(device))
20300f3e72b5SJason Gunthorpe 		return -EINVAL;
20310f3e72b5SJason Gunthorpe 
20320f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
20330f3e72b5SJason Gunthorpe 	container = device->group->container;
20340f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
20350f3e72b5SJason Gunthorpe 
20360f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->dma_rw))
20370f3e72b5SJason Gunthorpe 		ret = driver->ops->dma_rw(container->iommu_data,
20380f3e72b5SJason Gunthorpe 					  iova, data, len, write);
20390f3e72b5SJason Gunthorpe 	else
20400f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
20410f3e72b5SJason Gunthorpe 	return ret;
20420f3e72b5SJason Gunthorpe }
20430f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
20440f3e72b5SJason Gunthorpe 
20450f3e72b5SJason Gunthorpe /*
20460f3e72b5SJason Gunthorpe  * Module/class support
20470f3e72b5SJason Gunthorpe  */
20480f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode)
20490f3e72b5SJason Gunthorpe {
20500f3e72b5SJason Gunthorpe 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
20510f3e72b5SJason Gunthorpe }
20520f3e72b5SJason Gunthorpe 
20530f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = {
20540f3e72b5SJason Gunthorpe 	.minor = VFIO_MINOR,
20550f3e72b5SJason Gunthorpe 	.name = "vfio",
20560f3e72b5SJason Gunthorpe 	.fops = &vfio_fops,
20570f3e72b5SJason Gunthorpe 	.nodename = "vfio/vfio",
20580f3e72b5SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
20590f3e72b5SJason Gunthorpe };
20600f3e72b5SJason Gunthorpe 
20610f3e72b5SJason Gunthorpe static int __init vfio_init(void)
20620f3e72b5SJason Gunthorpe {
20630f3e72b5SJason Gunthorpe 	int ret;
20640f3e72b5SJason Gunthorpe 
20650f3e72b5SJason Gunthorpe 	ida_init(&vfio.group_ida);
20660f3e72b5SJason Gunthorpe 	mutex_init(&vfio.group_lock);
20670f3e72b5SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
20680f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.group_list);
20690f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
20700f3e72b5SJason Gunthorpe 
20710f3e72b5SJason Gunthorpe 	ret = misc_register(&vfio_dev);
20720f3e72b5SJason Gunthorpe 	if (ret) {
20730f3e72b5SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
20740f3e72b5SJason Gunthorpe 		return ret;
20750f3e72b5SJason Gunthorpe 	}
20760f3e72b5SJason Gunthorpe 
20770f3e72b5SJason Gunthorpe 	/* /dev/vfio/$GROUP */
20780f3e72b5SJason Gunthorpe 	vfio.class = class_create(THIS_MODULE, "vfio");
20790f3e72b5SJason Gunthorpe 	if (IS_ERR(vfio.class)) {
20800f3e72b5SJason Gunthorpe 		ret = PTR_ERR(vfio.class);
20810f3e72b5SJason Gunthorpe 		goto err_class;
20820f3e72b5SJason Gunthorpe 	}
20830f3e72b5SJason Gunthorpe 
20840f3e72b5SJason Gunthorpe 	vfio.class->devnode = vfio_devnode;
20850f3e72b5SJason Gunthorpe 
20860f3e72b5SJason Gunthorpe 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
20870f3e72b5SJason Gunthorpe 	if (ret)
20880f3e72b5SJason Gunthorpe 		goto err_alloc_chrdev;
20890f3e72b5SJason Gunthorpe 
20900f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
20910f3e72b5SJason Gunthorpe 	ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
20920f3e72b5SJason Gunthorpe #endif
20930f3e72b5SJason Gunthorpe 	if (ret)
20940f3e72b5SJason Gunthorpe 		goto err_driver_register;
20950f3e72b5SJason Gunthorpe 
20960f3e72b5SJason Gunthorpe 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
20970f3e72b5SJason Gunthorpe 	return 0;
20980f3e72b5SJason Gunthorpe 
20990f3e72b5SJason Gunthorpe err_driver_register:
21000f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
21010f3e72b5SJason Gunthorpe err_alloc_chrdev:
21020f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
21030f3e72b5SJason Gunthorpe 	vfio.class = NULL;
21040f3e72b5SJason Gunthorpe err_class:
21050f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
21060f3e72b5SJason Gunthorpe 	return ret;
21070f3e72b5SJason Gunthorpe }
21080f3e72b5SJason Gunthorpe 
21090f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void)
21100f3e72b5SJason Gunthorpe {
21110f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&vfio.group_list));
21120f3e72b5SJason Gunthorpe 
21130f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
21140f3e72b5SJason Gunthorpe 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
21150f3e72b5SJason Gunthorpe #endif
21160f3e72b5SJason Gunthorpe 	ida_destroy(&vfio.group_ida);
21170f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
21180f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
21190f3e72b5SJason Gunthorpe 	vfio.class = NULL;
21200f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
21210f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
21220f3e72b5SJason Gunthorpe }
21230f3e72b5SJason Gunthorpe 
21240f3e72b5SJason Gunthorpe module_init(vfio_init);
21250f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
21260f3e72b5SJason Gunthorpe 
21270f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
21280f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2");
21290f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR);
21300f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC);
21310f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
21320f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
21330f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2134