xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision 8e5c6995)
10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
20f3e72b5SJason Gunthorpe /*
30f3e72b5SJason Gunthorpe  * VFIO core
40f3e72b5SJason Gunthorpe  *
50f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
60f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
70f3e72b5SJason Gunthorpe  *
80f3e72b5SJason Gunthorpe  * Derived from original vfio:
90f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
100f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
110f3e72b5SJason Gunthorpe  */
120f3e72b5SJason Gunthorpe 
130f3e72b5SJason Gunthorpe #include <linux/cdev.h>
140f3e72b5SJason Gunthorpe #include <linux/compat.h>
150f3e72b5SJason Gunthorpe #include <linux/device.h>
160f3e72b5SJason Gunthorpe #include <linux/file.h>
170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h>
180f3e72b5SJason Gunthorpe #include <linux/fs.h>
190f3e72b5SJason Gunthorpe #include <linux/idr.h>
200f3e72b5SJason Gunthorpe #include <linux/iommu.h>
210f3e72b5SJason Gunthorpe #include <linux/list.h>
220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
230f3e72b5SJason Gunthorpe #include <linux/module.h>
240f3e72b5SJason Gunthorpe #include <linux/mutex.h>
250f3e72b5SJason Gunthorpe #include <linux/pci.h>
260f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
270f3e72b5SJason Gunthorpe #include <linux/sched.h>
280f3e72b5SJason Gunthorpe #include <linux/slab.h>
290f3e72b5SJason Gunthorpe #include <linux/stat.h>
300f3e72b5SJason Gunthorpe #include <linux/string.h>
310f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
320f3e72b5SJason Gunthorpe #include <linux/vfio.h>
330f3e72b5SJason Gunthorpe #include <linux/wait.h>
340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
35*8e5c6995SAbhishek Sahu #include <linux/pm_runtime.h>
360f3e72b5SJason Gunthorpe #include "vfio.h"
370f3e72b5SJason Gunthorpe 
380f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
390f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
400f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
410f3e72b5SJason Gunthorpe 
420f3e72b5SJason Gunthorpe static struct vfio {
430f3e72b5SJason Gunthorpe 	struct class			*class;
440f3e72b5SJason Gunthorpe 	struct list_head		iommu_drivers_list;
450f3e72b5SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
460f3e72b5SJason Gunthorpe 	struct list_head		group_list;
470f3e72b5SJason Gunthorpe 	struct mutex			group_lock; /* locks group_list */
480f3e72b5SJason Gunthorpe 	struct ida			group_ida;
490f3e72b5SJason Gunthorpe 	dev_t				group_devt;
500f3e72b5SJason Gunthorpe } vfio;
510f3e72b5SJason Gunthorpe 
520f3e72b5SJason Gunthorpe struct vfio_iommu_driver {
530f3e72b5SJason Gunthorpe 	const struct vfio_iommu_driver_ops	*ops;
540f3e72b5SJason Gunthorpe 	struct list_head			vfio_next;
550f3e72b5SJason Gunthorpe };
560f3e72b5SJason Gunthorpe 
570f3e72b5SJason Gunthorpe struct vfio_container {
580f3e72b5SJason Gunthorpe 	struct kref			kref;
590f3e72b5SJason Gunthorpe 	struct list_head		group_list;
600f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_lock;
610f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
620f3e72b5SJason Gunthorpe 	void				*iommu_data;
630f3e72b5SJason Gunthorpe 	bool				noiommu;
640f3e72b5SJason Gunthorpe };
650f3e72b5SJason Gunthorpe 
660f3e72b5SJason Gunthorpe struct vfio_group {
670f3e72b5SJason Gunthorpe 	struct device 			dev;
680f3e72b5SJason Gunthorpe 	struct cdev			cdev;
690f3e72b5SJason Gunthorpe 	refcount_t			users;
700f3e72b5SJason Gunthorpe 	unsigned int			container_users;
710f3e72b5SJason Gunthorpe 	struct iommu_group		*iommu_group;
720f3e72b5SJason Gunthorpe 	struct vfio_container		*container;
730f3e72b5SJason Gunthorpe 	struct list_head		device_list;
740f3e72b5SJason Gunthorpe 	struct mutex			device_lock;
750f3e72b5SJason Gunthorpe 	struct list_head		vfio_next;
760f3e72b5SJason Gunthorpe 	struct list_head		container_next;
770f3e72b5SJason Gunthorpe 	enum vfio_group_type		type;
780f3e72b5SJason Gunthorpe 	unsigned int			dev_counter;
790f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_rwsem;
800f3e72b5SJason Gunthorpe 	struct kvm			*kvm;
810f3e72b5SJason Gunthorpe 	struct file			*opened_file;
820f3e72b5SJason Gunthorpe 	struct blocking_notifier_head	notifier;
830f3e72b5SJason Gunthorpe };
840f3e72b5SJason Gunthorpe 
850f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
860f3e72b5SJason Gunthorpe static bool noiommu __read_mostly;
870f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
880f3e72b5SJason Gunthorpe 		   noiommu, bool, S_IRUGO | S_IWUSR);
890f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
900f3e72b5SJason Gunthorpe #endif
910f3e72b5SJason Gunthorpe 
920f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
930f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops;
940f3e72b5SJason Gunthorpe 
950f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
960f3e72b5SJason Gunthorpe {
970f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
980f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
990f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
1000f3e72b5SJason Gunthorpe 
1010f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
1020f3e72b5SJason Gunthorpe 		return -EINVAL;
1030f3e72b5SJason Gunthorpe 
1040f3e72b5SJason Gunthorpe 	/*
1050f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
1060f3e72b5SJason Gunthorpe 	 */
1070f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1080f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
1090f3e72b5SJason Gunthorpe 	if (dev_set)
1100f3e72b5SJason Gunthorpe 		goto found_get_ref;
1110f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1120f3e72b5SJason Gunthorpe 
1130f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
1140f3e72b5SJason Gunthorpe 	if (!new_dev_set)
1150f3e72b5SJason Gunthorpe 		return -ENOMEM;
1160f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
1170f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
1180f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
1190f3e72b5SJason Gunthorpe 
1200f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1210f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
1220f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
1230f3e72b5SJason Gunthorpe 	if (!dev_set) {
1240f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
1250f3e72b5SJason Gunthorpe 		goto found_get_ref;
1260f3e72b5SJason Gunthorpe 	}
1270f3e72b5SJason Gunthorpe 
1280f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
1290f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
1300f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
1310f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
1320f3e72b5SJason Gunthorpe 	}
1330f3e72b5SJason Gunthorpe 
1340f3e72b5SJason Gunthorpe found_get_ref:
1350f3e72b5SJason Gunthorpe 	dev_set->device_count++;
1360f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1370f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1380f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
1390f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
1400f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1410f3e72b5SJason Gunthorpe 	return 0;
1420f3e72b5SJason Gunthorpe }
1430f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
1440f3e72b5SJason Gunthorpe 
1450f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
1460f3e72b5SJason Gunthorpe {
1470f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
1480f3e72b5SJason Gunthorpe 
1490f3e72b5SJason Gunthorpe 	if (!dev_set)
1500f3e72b5SJason Gunthorpe 		return;
1510f3e72b5SJason Gunthorpe 
1520f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1530f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
1540f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1550f3e72b5SJason Gunthorpe 
1560f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1570f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
1580f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
1590f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
1600f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
1610f3e72b5SJason Gunthorpe 		kfree(dev_set);
1620f3e72b5SJason Gunthorpe 	}
1630f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1640f3e72b5SJason Gunthorpe }
1650f3e72b5SJason Gunthorpe 
1660f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
1670f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
1680f3e72b5SJason Gunthorpe {
1690f3e72b5SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
1700f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
1710f3e72b5SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
1720f3e72b5SJason Gunthorpe 		return ERR_PTR(-EPERM);
1730f3e72b5SJason Gunthorpe 
1740f3e72b5SJason Gunthorpe 	return NULL;
1750f3e72b5SJason Gunthorpe }
1760f3e72b5SJason Gunthorpe 
1770f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
1780f3e72b5SJason Gunthorpe {
1790f3e72b5SJason Gunthorpe }
1800f3e72b5SJason Gunthorpe 
1810f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
1820f3e72b5SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
1830f3e72b5SJason Gunthorpe {
1840f3e72b5SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
1850f3e72b5SJason Gunthorpe 		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
1860f3e72b5SJason Gunthorpe 
1870f3e72b5SJason Gunthorpe 	return -ENOTTY;
1880f3e72b5SJason Gunthorpe }
1890f3e72b5SJason Gunthorpe 
1900f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
1910f3e72b5SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
1920f3e72b5SJason Gunthorpe {
1930f3e72b5SJason Gunthorpe 	return 0;
1940f3e72b5SJason Gunthorpe }
1950f3e72b5SJason Gunthorpe 
1960f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
1970f3e72b5SJason Gunthorpe 				      struct iommu_group *iommu_group)
1980f3e72b5SJason Gunthorpe {
1990f3e72b5SJason Gunthorpe }
2000f3e72b5SJason Gunthorpe 
2010f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
2020f3e72b5SJason Gunthorpe 	.name = "vfio-noiommu",
2030f3e72b5SJason Gunthorpe 	.owner = THIS_MODULE,
2040f3e72b5SJason Gunthorpe 	.open = vfio_noiommu_open,
2050f3e72b5SJason Gunthorpe 	.release = vfio_noiommu_release,
2060f3e72b5SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
2070f3e72b5SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
2080f3e72b5SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
2090f3e72b5SJason Gunthorpe };
2100f3e72b5SJason Gunthorpe 
2110f3e72b5SJason Gunthorpe /*
2120f3e72b5SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
2130f3e72b5SJason Gunthorpe  * use vfio-noiommu.
2140f3e72b5SJason Gunthorpe  */
2150f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2160f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2170f3e72b5SJason Gunthorpe {
2180f3e72b5SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
2190f3e72b5SJason Gunthorpe }
2200f3e72b5SJason Gunthorpe #else
2210f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2220f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2230f3e72b5SJason Gunthorpe {
2240f3e72b5SJason Gunthorpe 	return true;
2250f3e72b5SJason Gunthorpe }
2260f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */
2270f3e72b5SJason Gunthorpe 
2280f3e72b5SJason Gunthorpe /*
2290f3e72b5SJason Gunthorpe  * IOMMU driver registration
2300f3e72b5SJason Gunthorpe  */
2310f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2320f3e72b5SJason Gunthorpe {
2330f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
2340f3e72b5SJason Gunthorpe 
2350f3e72b5SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
2360f3e72b5SJason Gunthorpe 		return -EINVAL;
2370f3e72b5SJason Gunthorpe 
2380f3e72b5SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
2390f3e72b5SJason Gunthorpe 	if (!driver)
2400f3e72b5SJason Gunthorpe 		return -ENOMEM;
2410f3e72b5SJason Gunthorpe 
2420f3e72b5SJason Gunthorpe 	driver->ops = ops;
2430f3e72b5SJason Gunthorpe 
2440f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2450f3e72b5SJason Gunthorpe 
2460f3e72b5SJason Gunthorpe 	/* Check for duplicates */
2470f3e72b5SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
2480f3e72b5SJason Gunthorpe 		if (tmp->ops == ops) {
2490f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2500f3e72b5SJason Gunthorpe 			kfree(driver);
2510f3e72b5SJason Gunthorpe 			return -EINVAL;
2520f3e72b5SJason Gunthorpe 		}
2530f3e72b5SJason Gunthorpe 	}
2540f3e72b5SJason Gunthorpe 
2550f3e72b5SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
2560f3e72b5SJason Gunthorpe 
2570f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2580f3e72b5SJason Gunthorpe 
2590f3e72b5SJason Gunthorpe 	return 0;
2600f3e72b5SJason Gunthorpe }
2610f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
2620f3e72b5SJason Gunthorpe 
2630f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2640f3e72b5SJason Gunthorpe {
2650f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
2660f3e72b5SJason Gunthorpe 
2670f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2680f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
2690f3e72b5SJason Gunthorpe 		if (driver->ops == ops) {
2700f3e72b5SJason Gunthorpe 			list_del(&driver->vfio_next);
2710f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2720f3e72b5SJason Gunthorpe 			kfree(driver);
2730f3e72b5SJason Gunthorpe 			return;
2740f3e72b5SJason Gunthorpe 		}
2750f3e72b5SJason Gunthorpe 	}
2760f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2770f3e72b5SJason Gunthorpe }
2780f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
2790f3e72b5SJason Gunthorpe 
2800f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group);
2810f3e72b5SJason Gunthorpe 
2820f3e72b5SJason Gunthorpe /*
2830f3e72b5SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
2840f3e72b5SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
2850f3e72b5SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
2860f3e72b5SJason Gunthorpe  * closed in any order.
2870f3e72b5SJason Gunthorpe  */
2880f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
2890f3e72b5SJason Gunthorpe {
2900f3e72b5SJason Gunthorpe 	kref_get(&container->kref);
2910f3e72b5SJason Gunthorpe }
2920f3e72b5SJason Gunthorpe 
2930f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref)
2940f3e72b5SJason Gunthorpe {
2950f3e72b5SJason Gunthorpe 	struct vfio_container *container;
2960f3e72b5SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
2970f3e72b5SJason Gunthorpe 
2980f3e72b5SJason Gunthorpe 	kfree(container);
2990f3e72b5SJason Gunthorpe }
3000f3e72b5SJason Gunthorpe 
3010f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
3020f3e72b5SJason Gunthorpe {
3030f3e72b5SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
3040f3e72b5SJason Gunthorpe }
3050f3e72b5SJason Gunthorpe 
3060f3e72b5SJason Gunthorpe /*
3070f3e72b5SJason Gunthorpe  * Group objects - create, release, get, put, search
3080f3e72b5SJason Gunthorpe  */
3090f3e72b5SJason Gunthorpe static struct vfio_group *
3100f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3110f3e72b5SJason Gunthorpe {
3120f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3130f3e72b5SJason Gunthorpe 
3140f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
3150f3e72b5SJason Gunthorpe 		if (group->iommu_group == iommu_group) {
3160f3e72b5SJason Gunthorpe 			vfio_group_get(group);
3170f3e72b5SJason Gunthorpe 			return group;
3180f3e72b5SJason Gunthorpe 		}
3190f3e72b5SJason Gunthorpe 	}
3200f3e72b5SJason Gunthorpe 	return NULL;
3210f3e72b5SJason Gunthorpe }
3220f3e72b5SJason Gunthorpe 
3230f3e72b5SJason Gunthorpe static struct vfio_group *
3240f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3250f3e72b5SJason Gunthorpe {
3260f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3270f3e72b5SJason Gunthorpe 
3280f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3290f3e72b5SJason Gunthorpe 	group = __vfio_group_get_from_iommu(iommu_group);
3300f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
3310f3e72b5SJason Gunthorpe 	return group;
3320f3e72b5SJason Gunthorpe }
3330f3e72b5SJason Gunthorpe 
3340f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev)
3350f3e72b5SJason Gunthorpe {
3360f3e72b5SJason Gunthorpe 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
3370f3e72b5SJason Gunthorpe 
3380f3e72b5SJason Gunthorpe 	mutex_destroy(&group->device_lock);
3390f3e72b5SJason Gunthorpe 	iommu_group_put(group->iommu_group);
3400f3e72b5SJason Gunthorpe 	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
3410f3e72b5SJason Gunthorpe 	kfree(group);
3420f3e72b5SJason Gunthorpe }
3430f3e72b5SJason Gunthorpe 
3440f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
3450f3e72b5SJason Gunthorpe 					   enum vfio_group_type type)
3460f3e72b5SJason Gunthorpe {
3470f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3480f3e72b5SJason Gunthorpe 	int minor;
3490f3e72b5SJason Gunthorpe 
3500f3e72b5SJason Gunthorpe 	group = kzalloc(sizeof(*group), GFP_KERNEL);
3510f3e72b5SJason Gunthorpe 	if (!group)
3520f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
3530f3e72b5SJason Gunthorpe 
3540f3e72b5SJason Gunthorpe 	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
3550f3e72b5SJason Gunthorpe 	if (minor < 0) {
3560f3e72b5SJason Gunthorpe 		kfree(group);
3570f3e72b5SJason Gunthorpe 		return ERR_PTR(minor);
3580f3e72b5SJason Gunthorpe 	}
3590f3e72b5SJason Gunthorpe 
3600f3e72b5SJason Gunthorpe 	device_initialize(&group->dev);
3610f3e72b5SJason Gunthorpe 	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
3620f3e72b5SJason Gunthorpe 	group->dev.class = vfio.class;
3630f3e72b5SJason Gunthorpe 	group->dev.release = vfio_group_release;
3640f3e72b5SJason Gunthorpe 	cdev_init(&group->cdev, &vfio_group_fops);
3650f3e72b5SJason Gunthorpe 	group->cdev.owner = THIS_MODULE;
3660f3e72b5SJason Gunthorpe 
3670f3e72b5SJason Gunthorpe 	refcount_set(&group->users, 1);
3680f3e72b5SJason Gunthorpe 	init_rwsem(&group->group_rwsem);
3690f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&group->device_list);
3700f3e72b5SJason Gunthorpe 	mutex_init(&group->device_lock);
3710f3e72b5SJason Gunthorpe 	group->iommu_group = iommu_group;
3720f3e72b5SJason Gunthorpe 	/* put in vfio_group_release() */
3730f3e72b5SJason Gunthorpe 	iommu_group_ref_get(iommu_group);
3740f3e72b5SJason Gunthorpe 	group->type = type;
3750f3e72b5SJason Gunthorpe 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
3760f3e72b5SJason Gunthorpe 
3770f3e72b5SJason Gunthorpe 	return group;
3780f3e72b5SJason Gunthorpe }
3790f3e72b5SJason Gunthorpe 
3800f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
3810f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
3820f3e72b5SJason Gunthorpe {
3830f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3840f3e72b5SJason Gunthorpe 	struct vfio_group *ret;
3850f3e72b5SJason Gunthorpe 	int err;
3860f3e72b5SJason Gunthorpe 
3870f3e72b5SJason Gunthorpe 	group = vfio_group_alloc(iommu_group, type);
3880f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
3890f3e72b5SJason Gunthorpe 		return group;
3900f3e72b5SJason Gunthorpe 
3910f3e72b5SJason Gunthorpe 	err = dev_set_name(&group->dev, "%s%d",
3920f3e72b5SJason Gunthorpe 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
3930f3e72b5SJason Gunthorpe 			   iommu_group_id(iommu_group));
3940f3e72b5SJason Gunthorpe 	if (err) {
3950f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
3960f3e72b5SJason Gunthorpe 		goto err_put;
3970f3e72b5SJason Gunthorpe 	}
3980f3e72b5SJason Gunthorpe 
3990f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
4000f3e72b5SJason Gunthorpe 
4010f3e72b5SJason Gunthorpe 	/* Did we race creating this group? */
4020f3e72b5SJason Gunthorpe 	ret = __vfio_group_get_from_iommu(iommu_group);
4030f3e72b5SJason Gunthorpe 	if (ret)
4040f3e72b5SJason Gunthorpe 		goto err_unlock;
4050f3e72b5SJason Gunthorpe 
4060f3e72b5SJason Gunthorpe 	err = cdev_device_add(&group->cdev, &group->dev);
4070f3e72b5SJason Gunthorpe 	if (err) {
4080f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
4090f3e72b5SJason Gunthorpe 		goto err_unlock;
4100f3e72b5SJason Gunthorpe 	}
4110f3e72b5SJason Gunthorpe 
4120f3e72b5SJason Gunthorpe 	list_add(&group->vfio_next, &vfio.group_list);
4130f3e72b5SJason Gunthorpe 
4140f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4150f3e72b5SJason Gunthorpe 	return group;
4160f3e72b5SJason Gunthorpe 
4170f3e72b5SJason Gunthorpe err_unlock:
4180f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4190f3e72b5SJason Gunthorpe err_put:
4200f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4210f3e72b5SJason Gunthorpe 	return ret;
4220f3e72b5SJason Gunthorpe }
4230f3e72b5SJason Gunthorpe 
4240f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group)
4250f3e72b5SJason Gunthorpe {
4260f3e72b5SJason Gunthorpe 	if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
4270f3e72b5SJason Gunthorpe 		return;
4280f3e72b5SJason Gunthorpe 
4290f3e72b5SJason Gunthorpe 	/*
4300f3e72b5SJason Gunthorpe 	 * These data structures all have paired operations that can only be
4310f3e72b5SJason Gunthorpe 	 * undone when the caller holds a live reference on the group. Since all
4320f3e72b5SJason Gunthorpe 	 * pairs must be undone these WARN_ON's indicate some caller did not
4330f3e72b5SJason Gunthorpe 	 * properly hold the group reference.
4340f3e72b5SJason Gunthorpe 	 */
4350f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&group->device_list));
4360f3e72b5SJason Gunthorpe 	WARN_ON(group->container || group->container_users);
4370f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
4380f3e72b5SJason Gunthorpe 
4390f3e72b5SJason Gunthorpe 	list_del(&group->vfio_next);
4400f3e72b5SJason Gunthorpe 	cdev_device_del(&group->cdev, &group->dev);
4410f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4420f3e72b5SJason Gunthorpe 
4430f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4440f3e72b5SJason Gunthorpe }
4450f3e72b5SJason Gunthorpe 
4460f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group)
4470f3e72b5SJason Gunthorpe {
4480f3e72b5SJason Gunthorpe 	refcount_inc(&group->users);
4490f3e72b5SJason Gunthorpe }
4500f3e72b5SJason Gunthorpe 
4510f3e72b5SJason Gunthorpe /*
4520f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
4530f3e72b5SJason Gunthorpe  */
4540f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
4550f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device)
4560f3e72b5SJason Gunthorpe {
4570f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
4580f3e72b5SJason Gunthorpe 		complete(&device->comp);
4590f3e72b5SJason Gunthorpe }
4600f3e72b5SJason Gunthorpe 
4610f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device)
4620f3e72b5SJason Gunthorpe {
4630f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
4640f3e72b5SJason Gunthorpe }
4650f3e72b5SJason Gunthorpe 
4660f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
4670f3e72b5SJason Gunthorpe 						 struct device *dev)
4680f3e72b5SJason Gunthorpe {
4690f3e72b5SJason Gunthorpe 	struct vfio_device *device;
4700f3e72b5SJason Gunthorpe 
4710f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
4720f3e72b5SJason Gunthorpe 	list_for_each_entry(device, &group->device_list, group_next) {
4730f3e72b5SJason Gunthorpe 		if (device->dev == dev && vfio_device_try_get(device)) {
4740f3e72b5SJason Gunthorpe 			mutex_unlock(&group->device_lock);
4750f3e72b5SJason Gunthorpe 			return device;
4760f3e72b5SJason Gunthorpe 		}
4770f3e72b5SJason Gunthorpe 	}
4780f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
4790f3e72b5SJason Gunthorpe 	return NULL;
4800f3e72b5SJason Gunthorpe }
4810f3e72b5SJason Gunthorpe 
4820f3e72b5SJason Gunthorpe /*
4830f3e72b5SJason Gunthorpe  * VFIO driver API
4840f3e72b5SJason Gunthorpe  */
4850f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
4860f3e72b5SJason Gunthorpe 			 const struct vfio_device_ops *ops)
4870f3e72b5SJason Gunthorpe {
4880f3e72b5SJason Gunthorpe 	init_completion(&device->comp);
4890f3e72b5SJason Gunthorpe 	device->dev = dev;
4900f3e72b5SJason Gunthorpe 	device->ops = ops;
4910f3e72b5SJason Gunthorpe }
4920f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev);
4930f3e72b5SJason Gunthorpe 
4940f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device)
4950f3e72b5SJason Gunthorpe {
4960f3e72b5SJason Gunthorpe 	vfio_release_device_set(device);
4970f3e72b5SJason Gunthorpe }
4980f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
4990f3e72b5SJason Gunthorpe 
5000f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
5010f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
5020f3e72b5SJason Gunthorpe {
5030f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5040f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5050f3e72b5SJason Gunthorpe 	int ret;
5060f3e72b5SJason Gunthorpe 
5070f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_alloc();
5080f3e72b5SJason Gunthorpe 	if (IS_ERR(iommu_group))
5090f3e72b5SJason Gunthorpe 		return ERR_CAST(iommu_group);
5100f3e72b5SJason Gunthorpe 
5110f3e72b5SJason Gunthorpe 	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
5120f3e72b5SJason Gunthorpe 	if (ret)
5130f3e72b5SJason Gunthorpe 		goto out_put_group;
5140f3e72b5SJason Gunthorpe 	ret = iommu_group_add_device(iommu_group, dev);
5150f3e72b5SJason Gunthorpe 	if (ret)
5160f3e72b5SJason Gunthorpe 		goto out_put_group;
5170f3e72b5SJason Gunthorpe 
5180f3e72b5SJason Gunthorpe 	group = vfio_create_group(iommu_group, type);
5190f3e72b5SJason Gunthorpe 	if (IS_ERR(group)) {
5200f3e72b5SJason Gunthorpe 		ret = PTR_ERR(group);
5210f3e72b5SJason Gunthorpe 		goto out_remove_device;
5220f3e72b5SJason Gunthorpe 	}
5230f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5240f3e72b5SJason Gunthorpe 	return group;
5250f3e72b5SJason Gunthorpe 
5260f3e72b5SJason Gunthorpe out_remove_device:
5270f3e72b5SJason Gunthorpe 	iommu_group_remove_device(dev);
5280f3e72b5SJason Gunthorpe out_put_group:
5290f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5300f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
5310f3e72b5SJason Gunthorpe }
5320f3e72b5SJason Gunthorpe 
5330f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
5340f3e72b5SJason Gunthorpe {
5350f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5360f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5370f3e72b5SJason Gunthorpe 
5380f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_get(dev);
5390f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
5400f3e72b5SJason Gunthorpe 	if (!iommu_group && noiommu) {
5410f3e72b5SJason Gunthorpe 		/*
5420f3e72b5SJason Gunthorpe 		 * With noiommu enabled, create an IOMMU group for devices that
5430f3e72b5SJason Gunthorpe 		 * don't already have one, implying no IOMMU hardware/driver
5440f3e72b5SJason Gunthorpe 		 * exists.  Taint the kernel because we're about to give a DMA
5450f3e72b5SJason Gunthorpe 		 * capable device to a user without IOMMU protection.
5460f3e72b5SJason Gunthorpe 		 */
5470f3e72b5SJason Gunthorpe 		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
5480f3e72b5SJason Gunthorpe 		if (!IS_ERR(group)) {
5490f3e72b5SJason Gunthorpe 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
5500f3e72b5SJason Gunthorpe 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
5510f3e72b5SJason Gunthorpe 		}
5520f3e72b5SJason Gunthorpe 		return group;
5530f3e72b5SJason Gunthorpe 	}
5540f3e72b5SJason Gunthorpe #endif
5550f3e72b5SJason Gunthorpe 	if (!iommu_group)
5560f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
5570f3e72b5SJason Gunthorpe 
5580f3e72b5SJason Gunthorpe 	/*
5590f3e72b5SJason Gunthorpe 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
5600f3e72b5SJason Gunthorpe 	 * restore cache coherency. It has to be checked here because it is only
5610f3e72b5SJason Gunthorpe 	 * valid for cases where we are using iommu groups.
5620f3e72b5SJason Gunthorpe 	 */
5630f3e72b5SJason Gunthorpe 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
5640f3e72b5SJason Gunthorpe 		iommu_group_put(iommu_group);
5650f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
5660f3e72b5SJason Gunthorpe 	}
5670f3e72b5SJason Gunthorpe 
5680f3e72b5SJason Gunthorpe 	group = vfio_group_get_from_iommu(iommu_group);
5690f3e72b5SJason Gunthorpe 	if (!group)
5700f3e72b5SJason Gunthorpe 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
5710f3e72b5SJason Gunthorpe 
5720f3e72b5SJason Gunthorpe 	/* The vfio_group holds a reference to the iommu_group */
5730f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
5740f3e72b5SJason Gunthorpe 	return group;
5750f3e72b5SJason Gunthorpe }
5760f3e72b5SJason Gunthorpe 
5770f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device,
5780f3e72b5SJason Gunthorpe 		struct vfio_group *group)
5790f3e72b5SJason Gunthorpe {
5800f3e72b5SJason Gunthorpe 	struct vfio_device *existing_device;
5810f3e72b5SJason Gunthorpe 
5820f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
5830f3e72b5SJason Gunthorpe 		return PTR_ERR(group);
5840f3e72b5SJason Gunthorpe 
5850f3e72b5SJason Gunthorpe 	/*
5860f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
5870f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
5880f3e72b5SJason Gunthorpe 	 */
5890f3e72b5SJason Gunthorpe 	if (!device->dev_set)
5900f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
5910f3e72b5SJason Gunthorpe 
5920f3e72b5SJason Gunthorpe 	existing_device = vfio_group_get_device(group, device->dev);
5930f3e72b5SJason Gunthorpe 	if (existing_device) {
5940f3e72b5SJason Gunthorpe 		dev_WARN(device->dev, "Device already exists on group %d\n",
5950f3e72b5SJason Gunthorpe 			 iommu_group_id(group->iommu_group));
5960f3e72b5SJason Gunthorpe 		vfio_device_put(existing_device);
5970f3e72b5SJason Gunthorpe 		if (group->type == VFIO_NO_IOMMU ||
5980f3e72b5SJason Gunthorpe 		    group->type == VFIO_EMULATED_IOMMU)
5990f3e72b5SJason Gunthorpe 			iommu_group_remove_device(device->dev);
6000f3e72b5SJason Gunthorpe 		vfio_group_put(group);
6010f3e72b5SJason Gunthorpe 		return -EBUSY;
6020f3e72b5SJason Gunthorpe 	}
6030f3e72b5SJason Gunthorpe 
6040f3e72b5SJason Gunthorpe 	/* Our reference on group is moved to the device */
6050f3e72b5SJason Gunthorpe 	device->group = group;
6060f3e72b5SJason Gunthorpe 
6070f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
6080f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
6090f3e72b5SJason Gunthorpe 
6100f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6110f3e72b5SJason Gunthorpe 	list_add(&device->group_next, &group->device_list);
6120f3e72b5SJason Gunthorpe 	group->dev_counter++;
6130f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
6140f3e72b5SJason Gunthorpe 
6150f3e72b5SJason Gunthorpe 	return 0;
6160f3e72b5SJason Gunthorpe }
6170f3e72b5SJason Gunthorpe 
6180f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
6190f3e72b5SJason Gunthorpe {
6200f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
6210f3e72b5SJason Gunthorpe 		vfio_group_find_or_alloc(device->dev));
6220f3e72b5SJason Gunthorpe }
6230f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
6240f3e72b5SJason Gunthorpe 
6250f3e72b5SJason Gunthorpe /*
6260f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
6270f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
6280f3e72b5SJason Gunthorpe  */
6290f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
6300f3e72b5SJason Gunthorpe {
6310f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
6320f3e72b5SJason Gunthorpe 		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
6330f3e72b5SJason Gunthorpe }
6340f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
6350f3e72b5SJason Gunthorpe 
6360f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
6370f3e72b5SJason Gunthorpe 						     char *buf)
6380f3e72b5SJason Gunthorpe {
6390f3e72b5SJason Gunthorpe 	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
6400f3e72b5SJason Gunthorpe 
6410f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6420f3e72b5SJason Gunthorpe 	list_for_each_entry(it, &group->device_list, group_next) {
6430f3e72b5SJason Gunthorpe 		int ret;
6440f3e72b5SJason Gunthorpe 
6450f3e72b5SJason Gunthorpe 		if (it->ops->match) {
6460f3e72b5SJason Gunthorpe 			ret = it->ops->match(it, buf);
6470f3e72b5SJason Gunthorpe 			if (ret < 0) {
6480f3e72b5SJason Gunthorpe 				device = ERR_PTR(ret);
6490f3e72b5SJason Gunthorpe 				break;
6500f3e72b5SJason Gunthorpe 			}
6510f3e72b5SJason Gunthorpe 		} else {
6520f3e72b5SJason Gunthorpe 			ret = !strcmp(dev_name(it->dev), buf);
6530f3e72b5SJason Gunthorpe 		}
6540f3e72b5SJason Gunthorpe 
6550f3e72b5SJason Gunthorpe 		if (ret && vfio_device_try_get(it)) {
6560f3e72b5SJason Gunthorpe 			device = it;
6570f3e72b5SJason Gunthorpe 			break;
6580f3e72b5SJason Gunthorpe 		}
6590f3e72b5SJason Gunthorpe 	}
6600f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
6610f3e72b5SJason Gunthorpe 
6620f3e72b5SJason Gunthorpe 	return device;
6630f3e72b5SJason Gunthorpe }
6640f3e72b5SJason Gunthorpe 
6650f3e72b5SJason Gunthorpe /*
6660f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
6670f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
6680f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
6690f3e72b5SJason Gunthorpe {
6700f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
6710f3e72b5SJason Gunthorpe 	unsigned int i = 0;
6720f3e72b5SJason Gunthorpe 	bool interrupted = false;
6730f3e72b5SJason Gunthorpe 	long rc;
6740f3e72b5SJason Gunthorpe 
6750f3e72b5SJason Gunthorpe 	vfio_device_put(device);
6760f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
6770f3e72b5SJason Gunthorpe 	while (rc <= 0) {
6780f3e72b5SJason Gunthorpe 		if (device->ops->request)
6790f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
6800f3e72b5SJason Gunthorpe 
6810f3e72b5SJason Gunthorpe 		if (interrupted) {
6820f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
6830f3e72b5SJason Gunthorpe 							 HZ * 10);
6840f3e72b5SJason Gunthorpe 		} else {
6850f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
6860f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
6870f3e72b5SJason Gunthorpe 			if (rc < 0) {
6880f3e72b5SJason Gunthorpe 				interrupted = true;
6890f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
6900f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
6910f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
6920f3e72b5SJason Gunthorpe 					 "blocked until device is released",
6930f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
6940f3e72b5SJason Gunthorpe 			}
6950f3e72b5SJason Gunthorpe 		}
6960f3e72b5SJason Gunthorpe 	}
6970f3e72b5SJason Gunthorpe 
6980f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
6990f3e72b5SJason Gunthorpe 	list_del(&device->group_next);
7000f3e72b5SJason Gunthorpe 	group->dev_counter--;
7010f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7020f3e72b5SJason Gunthorpe 
7030f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
7040f3e72b5SJason Gunthorpe 		iommu_group_remove_device(device->dev);
7050f3e72b5SJason Gunthorpe 
7060f3e72b5SJason Gunthorpe 	/* Matches the get in vfio_register_group_dev() */
7070f3e72b5SJason Gunthorpe 	vfio_group_put(group);
7080f3e72b5SJason Gunthorpe }
7090f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
7100f3e72b5SJason Gunthorpe 
7110f3e72b5SJason Gunthorpe /*
7120f3e72b5SJason Gunthorpe  * VFIO base fd, /dev/vfio/vfio
7130f3e72b5SJason Gunthorpe  */
7140f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container,
7150f3e72b5SJason Gunthorpe 				       unsigned long arg)
7160f3e72b5SJason Gunthorpe {
7170f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
7180f3e72b5SJason Gunthorpe 	long ret = 0;
7190f3e72b5SJason Gunthorpe 
7200f3e72b5SJason Gunthorpe 	down_read(&container->group_lock);
7210f3e72b5SJason Gunthorpe 
7220f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
7230f3e72b5SJason Gunthorpe 
7240f3e72b5SJason Gunthorpe 	switch (arg) {
7250f3e72b5SJason Gunthorpe 		/* No base extensions yet */
7260f3e72b5SJason Gunthorpe 	default:
7270f3e72b5SJason Gunthorpe 		/*
7280f3e72b5SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
7290f3e72b5SJason Gunthorpe 		 * extensions and return the first positive result.  If
7300f3e72b5SJason Gunthorpe 		 * a driver is already set, further queries will be passed
7310f3e72b5SJason Gunthorpe 		 * only to that driver.
7320f3e72b5SJason Gunthorpe 		 */
7330f3e72b5SJason Gunthorpe 		if (!driver) {
7340f3e72b5SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
7350f3e72b5SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
7360f3e72b5SJason Gunthorpe 					    vfio_next) {
7370f3e72b5SJason Gunthorpe 
7380f3e72b5SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
7390f3e72b5SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
7400f3e72b5SJason Gunthorpe 							       driver))
7410f3e72b5SJason Gunthorpe 					continue;
7420f3e72b5SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
7430f3e72b5SJason Gunthorpe 					continue;
7440f3e72b5SJason Gunthorpe 
7450f3e72b5SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
7460f3e72b5SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
7470f3e72b5SJason Gunthorpe 							 arg);
7480f3e72b5SJason Gunthorpe 				module_put(driver->ops->owner);
7490f3e72b5SJason Gunthorpe 				if (ret > 0)
7500f3e72b5SJason Gunthorpe 					break;
7510f3e72b5SJason Gunthorpe 			}
7520f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
7530f3e72b5SJason Gunthorpe 		} else
7540f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
7550f3e72b5SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
7560f3e72b5SJason Gunthorpe 	}
7570f3e72b5SJason Gunthorpe 
7580f3e72b5SJason Gunthorpe 	up_read(&container->group_lock);
7590f3e72b5SJason Gunthorpe 
7600f3e72b5SJason Gunthorpe 	return ret;
7610f3e72b5SJason Gunthorpe }
7620f3e72b5SJason Gunthorpe 
7630f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */
7640f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
7650f3e72b5SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
7660f3e72b5SJason Gunthorpe 					  void *data)
7670f3e72b5SJason Gunthorpe {
7680f3e72b5SJason Gunthorpe 	struct vfio_group *group;
7690f3e72b5SJason Gunthorpe 	int ret = -ENODEV;
7700f3e72b5SJason Gunthorpe 
7710f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
7720f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
7730f3e72b5SJason Gunthorpe 						group->type);
7740f3e72b5SJason Gunthorpe 		if (ret)
7750f3e72b5SJason Gunthorpe 			goto unwind;
7760f3e72b5SJason Gunthorpe 	}
7770f3e72b5SJason Gunthorpe 
7780f3e72b5SJason Gunthorpe 	return ret;
7790f3e72b5SJason Gunthorpe 
7800f3e72b5SJason Gunthorpe unwind:
7810f3e72b5SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
7820f3e72b5SJason Gunthorpe 					     container_next) {
7830f3e72b5SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
7840f3e72b5SJason Gunthorpe 	}
7850f3e72b5SJason Gunthorpe 
7860f3e72b5SJason Gunthorpe 	return ret;
7870f3e72b5SJason Gunthorpe }
7880f3e72b5SJason Gunthorpe 
7890f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
7900f3e72b5SJason Gunthorpe 				 unsigned long arg)
7910f3e72b5SJason Gunthorpe {
7920f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
7930f3e72b5SJason Gunthorpe 	long ret = -ENODEV;
7940f3e72b5SJason Gunthorpe 
7950f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
7960f3e72b5SJason Gunthorpe 
7970f3e72b5SJason Gunthorpe 	/*
7980f3e72b5SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
7990f3e72b5SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
8000f3e72b5SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
8010f3e72b5SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
8020f3e72b5SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
8030f3e72b5SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
8040f3e72b5SJason Gunthorpe 	 */
8050f3e72b5SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
8060f3e72b5SJason Gunthorpe 		up_write(&container->group_lock);
8070f3e72b5SJason Gunthorpe 		return -EINVAL;
8080f3e72b5SJason Gunthorpe 	}
8090f3e72b5SJason Gunthorpe 
8100f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
8110f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
8120f3e72b5SJason Gunthorpe 		void *data;
8130f3e72b5SJason Gunthorpe 
8140f3e72b5SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
8150f3e72b5SJason Gunthorpe 			continue;
8160f3e72b5SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
8170f3e72b5SJason Gunthorpe 			continue;
8180f3e72b5SJason Gunthorpe 
8190f3e72b5SJason Gunthorpe 		/*
8200f3e72b5SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
8210f3e72b5SJason Gunthorpe 		 * so test which iommu driver reported support for this
8220f3e72b5SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
8230f3e72b5SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
8240f3e72b5SJason Gunthorpe 		 * interfaces if they'd like.
8250f3e72b5SJason Gunthorpe 		 */
8260f3e72b5SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
8270f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8280f3e72b5SJason Gunthorpe 			continue;
8290f3e72b5SJason Gunthorpe 		}
8300f3e72b5SJason Gunthorpe 
8310f3e72b5SJason Gunthorpe 		data = driver->ops->open(arg);
8320f3e72b5SJason Gunthorpe 		if (IS_ERR(data)) {
8330f3e72b5SJason Gunthorpe 			ret = PTR_ERR(data);
8340f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8350f3e72b5SJason Gunthorpe 			continue;
8360f3e72b5SJason Gunthorpe 		}
8370f3e72b5SJason Gunthorpe 
8380f3e72b5SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
8390f3e72b5SJason Gunthorpe 		if (ret) {
8400f3e72b5SJason Gunthorpe 			driver->ops->release(data);
8410f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
8420f3e72b5SJason Gunthorpe 			continue;
8430f3e72b5SJason Gunthorpe 		}
8440f3e72b5SJason Gunthorpe 
8450f3e72b5SJason Gunthorpe 		container->iommu_driver = driver;
8460f3e72b5SJason Gunthorpe 		container->iommu_data = data;
8470f3e72b5SJason Gunthorpe 		break;
8480f3e72b5SJason Gunthorpe 	}
8490f3e72b5SJason Gunthorpe 
8500f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
8510f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
8520f3e72b5SJason Gunthorpe 
8530f3e72b5SJason Gunthorpe 	return ret;
8540f3e72b5SJason Gunthorpe }
8550f3e72b5SJason Gunthorpe 
8560f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
8570f3e72b5SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
8580f3e72b5SJason Gunthorpe {
8590f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
8600f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
8610f3e72b5SJason Gunthorpe 	void *data;
8620f3e72b5SJason Gunthorpe 	long ret = -EINVAL;
8630f3e72b5SJason Gunthorpe 
8640f3e72b5SJason Gunthorpe 	if (!container)
8650f3e72b5SJason Gunthorpe 		return ret;
8660f3e72b5SJason Gunthorpe 
8670f3e72b5SJason Gunthorpe 	switch (cmd) {
8680f3e72b5SJason Gunthorpe 	case VFIO_GET_API_VERSION:
8690f3e72b5SJason Gunthorpe 		ret = VFIO_API_VERSION;
8700f3e72b5SJason Gunthorpe 		break;
8710f3e72b5SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
8720f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(container, arg);
8730f3e72b5SJason Gunthorpe 		break;
8740f3e72b5SJason Gunthorpe 	case VFIO_SET_IOMMU:
8750f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
8760f3e72b5SJason Gunthorpe 		break;
8770f3e72b5SJason Gunthorpe 	default:
8780f3e72b5SJason Gunthorpe 		driver = container->iommu_driver;
8790f3e72b5SJason Gunthorpe 		data = container->iommu_data;
8800f3e72b5SJason Gunthorpe 
8810f3e72b5SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
8820f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
8830f3e72b5SJason Gunthorpe 	}
8840f3e72b5SJason Gunthorpe 
8850f3e72b5SJason Gunthorpe 	return ret;
8860f3e72b5SJason Gunthorpe }
8870f3e72b5SJason Gunthorpe 
8880f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
8890f3e72b5SJason Gunthorpe {
8900f3e72b5SJason Gunthorpe 	struct vfio_container *container;
8910f3e72b5SJason Gunthorpe 
8920f3e72b5SJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL);
8930f3e72b5SJason Gunthorpe 	if (!container)
8940f3e72b5SJason Gunthorpe 		return -ENOMEM;
8950f3e72b5SJason Gunthorpe 
8960f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
8970f3e72b5SJason Gunthorpe 	init_rwsem(&container->group_lock);
8980f3e72b5SJason Gunthorpe 	kref_init(&container->kref);
8990f3e72b5SJason Gunthorpe 
9000f3e72b5SJason Gunthorpe 	filep->private_data = container;
9010f3e72b5SJason Gunthorpe 
9020f3e72b5SJason Gunthorpe 	return 0;
9030f3e72b5SJason Gunthorpe }
9040f3e72b5SJason Gunthorpe 
9050f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
9060f3e72b5SJason Gunthorpe {
9070f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
9080f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
9090f3e72b5SJason Gunthorpe 
9100f3e72b5SJason Gunthorpe 	if (driver && driver->ops->notify)
9110f3e72b5SJason Gunthorpe 		driver->ops->notify(container->iommu_data,
9120f3e72b5SJason Gunthorpe 				    VFIO_IOMMU_CONTAINER_CLOSE);
9130f3e72b5SJason Gunthorpe 
9140f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
9150f3e72b5SJason Gunthorpe 
9160f3e72b5SJason Gunthorpe 	vfio_container_put(container);
9170f3e72b5SJason Gunthorpe 
9180f3e72b5SJason Gunthorpe 	return 0;
9190f3e72b5SJason Gunthorpe }
9200f3e72b5SJason Gunthorpe 
9210f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = {
9220f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
9230f3e72b5SJason Gunthorpe 	.open		= vfio_fops_open,
9240f3e72b5SJason Gunthorpe 	.release	= vfio_fops_release,
9250f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
9260f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
9270f3e72b5SJason Gunthorpe };
9280f3e72b5SJason Gunthorpe 
9290f3e72b5SJason Gunthorpe /*
9300f3e72b5SJason Gunthorpe  * VFIO Group fd, /dev/vfio/$GROUP
9310f3e72b5SJason Gunthorpe  */
9320f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group)
9330f3e72b5SJason Gunthorpe {
9340f3e72b5SJason Gunthorpe 	struct vfio_container *container = group->container;
9350f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9360f3e72b5SJason Gunthorpe 
9370f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
9380f3e72b5SJason Gunthorpe 
9390f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
9400f3e72b5SJason Gunthorpe 
9410f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
9420f3e72b5SJason Gunthorpe 	if (driver)
9430f3e72b5SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
9440f3e72b5SJason Gunthorpe 					  group->iommu_group);
9450f3e72b5SJason Gunthorpe 
9460f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
9470f3e72b5SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
9480f3e72b5SJason Gunthorpe 
9490f3e72b5SJason Gunthorpe 	group->container = NULL;
9500f3e72b5SJason Gunthorpe 	group->container_users = 0;
9510f3e72b5SJason Gunthorpe 	list_del(&group->container_next);
9520f3e72b5SJason Gunthorpe 
9530f3e72b5SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
9540f3e72b5SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
9550f3e72b5SJason Gunthorpe 		driver->ops->release(container->iommu_data);
9560f3e72b5SJason Gunthorpe 		module_put(driver->ops->owner);
9570f3e72b5SJason Gunthorpe 		container->iommu_driver = NULL;
9580f3e72b5SJason Gunthorpe 		container->iommu_data = NULL;
9590f3e72b5SJason Gunthorpe 	}
9600f3e72b5SJason Gunthorpe 
9610f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
9620f3e72b5SJason Gunthorpe 
9630f3e72b5SJason Gunthorpe 	vfio_container_put(container);
9640f3e72b5SJason Gunthorpe }
9650f3e72b5SJason Gunthorpe 
9660f3e72b5SJason Gunthorpe /*
9670f3e72b5SJason Gunthorpe  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
9680f3e72b5SJason Gunthorpe  * if there was no container to unset.  Since the ioctl is called on
9690f3e72b5SJason Gunthorpe  * the group, we know that still exists, therefore the only valid
9700f3e72b5SJason Gunthorpe  * transition here is 1->0.
9710f3e72b5SJason Gunthorpe  */
972b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group)
9730f3e72b5SJason Gunthorpe {
974b3b43590SJason Gunthorpe 	int ret = 0;
9750f3e72b5SJason Gunthorpe 
976b3b43590SJason Gunthorpe 	down_write(&group->group_rwsem);
977b3b43590SJason Gunthorpe 	if (!group->container) {
978b3b43590SJason Gunthorpe 		ret = -EINVAL;
979b3b43590SJason Gunthorpe 		goto out_unlock;
980b3b43590SJason Gunthorpe 	}
981b3b43590SJason Gunthorpe 	if (group->container_users != 1) {
982b3b43590SJason Gunthorpe 		ret = -EBUSY;
983b3b43590SJason Gunthorpe 		goto out_unlock;
984b3b43590SJason Gunthorpe 	}
9850f3e72b5SJason Gunthorpe 	__vfio_group_unset_container(group);
986b3b43590SJason Gunthorpe 
987b3b43590SJason Gunthorpe out_unlock:
988b3b43590SJason Gunthorpe 	up_write(&group->group_rwsem);
989b3b43590SJason Gunthorpe 	return ret;
9900f3e72b5SJason Gunthorpe }
9910f3e72b5SJason Gunthorpe 
99267671f15SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group,
99367671f15SJason Gunthorpe 					  int __user *arg)
9940f3e72b5SJason Gunthorpe {
9950f3e72b5SJason Gunthorpe 	struct fd f;
9960f3e72b5SJason Gunthorpe 	struct vfio_container *container;
9970f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
99867671f15SJason Gunthorpe 	int container_fd;
9990f3e72b5SJason Gunthorpe 	int ret = 0;
10000f3e72b5SJason Gunthorpe 
10010f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
10020f3e72b5SJason Gunthorpe 		return -EPERM;
10030f3e72b5SJason Gunthorpe 
100467671f15SJason Gunthorpe 	if (get_user(container_fd, arg))
100567671f15SJason Gunthorpe 		return -EFAULT;
100667671f15SJason Gunthorpe 	if (container_fd < 0)
100767671f15SJason Gunthorpe 		return -EINVAL;
10080f3e72b5SJason Gunthorpe 	f = fdget(container_fd);
10090f3e72b5SJason Gunthorpe 	if (!f.file)
10100f3e72b5SJason Gunthorpe 		return -EBADF;
10110f3e72b5SJason Gunthorpe 
10120f3e72b5SJason Gunthorpe 	/* Sanity check, is this really our fd? */
10130f3e72b5SJason Gunthorpe 	if (f.file->f_op != &vfio_fops) {
101467671f15SJason Gunthorpe 		ret = -EINVAL;
101567671f15SJason Gunthorpe 		goto out_fdput;
10160f3e72b5SJason Gunthorpe 	}
10170f3e72b5SJason Gunthorpe 	container = f.file->private_data;
10180f3e72b5SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
10190f3e72b5SJason Gunthorpe 
102067671f15SJason Gunthorpe 	down_write(&group->group_rwsem);
102167671f15SJason Gunthorpe 
102267671f15SJason Gunthorpe 	if (group->container || WARN_ON(group->container_users)) {
102367671f15SJason Gunthorpe 		ret = -EINVAL;
102467671f15SJason Gunthorpe 		goto out_unlock_group;
102567671f15SJason Gunthorpe 	}
102667671f15SJason Gunthorpe 
10270f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
10280f3e72b5SJason Gunthorpe 
10290f3e72b5SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
10300f3e72b5SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
10310f3e72b5SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
10320f3e72b5SJason Gunthorpe 		ret = -EPERM;
103367671f15SJason Gunthorpe 		goto out_unlock_container;
10340f3e72b5SJason Gunthorpe 	}
10350f3e72b5SJason Gunthorpe 
10360f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
10370f3e72b5SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
10380f3e72b5SJason Gunthorpe 		if (ret)
103967671f15SJason Gunthorpe 			goto out_unlock_container;
10400f3e72b5SJason Gunthorpe 	}
10410f3e72b5SJason Gunthorpe 
10420f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
10430f3e72b5SJason Gunthorpe 	if (driver) {
10440f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
10450f3e72b5SJason Gunthorpe 						group->iommu_group,
10460f3e72b5SJason Gunthorpe 						group->type);
10470f3e72b5SJason Gunthorpe 		if (ret) {
10480f3e72b5SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
10490f3e72b5SJason Gunthorpe 				iommu_group_release_dma_owner(
10500f3e72b5SJason Gunthorpe 					group->iommu_group);
105167671f15SJason Gunthorpe 			goto out_unlock_container;
10520f3e72b5SJason Gunthorpe 		}
10530f3e72b5SJason Gunthorpe 	}
10540f3e72b5SJason Gunthorpe 
10550f3e72b5SJason Gunthorpe 	group->container = container;
10560f3e72b5SJason Gunthorpe 	group->container_users = 1;
10570f3e72b5SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
10580f3e72b5SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
10590f3e72b5SJason Gunthorpe 
10600f3e72b5SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
10610f3e72b5SJason Gunthorpe 	vfio_container_get(container);
10620f3e72b5SJason Gunthorpe 
106367671f15SJason Gunthorpe out_unlock_container:
10640f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
106567671f15SJason Gunthorpe out_unlock_group:
106667671f15SJason Gunthorpe 	up_write(&group->group_rwsem);
106767671f15SJason Gunthorpe out_fdput:
10680f3e72b5SJason Gunthorpe 	fdput(f);
10690f3e72b5SJason Gunthorpe 	return ret;
10700f3e72b5SJason Gunthorpe }
10710f3e72b5SJason Gunthorpe 
10720f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops;
10730f3e72b5SJason Gunthorpe 
10740f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
10750f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
10760f3e72b5SJason Gunthorpe {
10770f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
10780f3e72b5SJason Gunthorpe }
10790f3e72b5SJason Gunthorpe 
10800f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device)
10810f3e72b5SJason Gunthorpe {
10820f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
10830f3e72b5SJason Gunthorpe 
10840f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
10850f3e72b5SJason Gunthorpe 
10860f3e72b5SJason Gunthorpe 	if (!group->container || !group->container->iommu_driver ||
10870f3e72b5SJason Gunthorpe 	    WARN_ON(!group->container_users))
10880f3e72b5SJason Gunthorpe 		return -EINVAL;
10890f3e72b5SJason Gunthorpe 
10900f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
10910f3e72b5SJason Gunthorpe 		return -EPERM;
10920f3e72b5SJason Gunthorpe 
10930f3e72b5SJason Gunthorpe 	get_file(group->opened_file);
10940f3e72b5SJason Gunthorpe 	group->container_users++;
10950f3e72b5SJason Gunthorpe 	return 0;
10960f3e72b5SJason Gunthorpe }
10970f3e72b5SJason Gunthorpe 
10980f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device)
10990f3e72b5SJason Gunthorpe {
11000f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
11010f3e72b5SJason Gunthorpe 	WARN_ON(device->group->container_users <= 1);
11020f3e72b5SJason Gunthorpe 	device->group->container_users--;
11030f3e72b5SJason Gunthorpe 	fput(device->group->opened_file);
11040f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
11050f3e72b5SJason Gunthorpe }
11060f3e72b5SJason Gunthorpe 
11070f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device)
11080f3e72b5SJason Gunthorpe {
11090f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
11100f3e72b5SJason Gunthorpe 	struct file *filep;
11110f3e72b5SJason Gunthorpe 	int ret;
11120f3e72b5SJason Gunthorpe 
11130f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
11140f3e72b5SJason Gunthorpe 	ret = vfio_device_assign_container(device);
11150f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
11160f3e72b5SJason Gunthorpe 	if (ret)
11170f3e72b5SJason Gunthorpe 		return ERR_PTR(ret);
11180f3e72b5SJason Gunthorpe 
11190f3e72b5SJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner)) {
11200f3e72b5SJason Gunthorpe 		ret = -ENODEV;
11210f3e72b5SJason Gunthorpe 		goto err_unassign_container;
11220f3e72b5SJason Gunthorpe 	}
11230f3e72b5SJason Gunthorpe 
11240f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
11250f3e72b5SJason Gunthorpe 	device->open_count++;
11260f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
11270f3e72b5SJason Gunthorpe 		/*
11280f3e72b5SJason Gunthorpe 		 * Here we pass the KVM pointer with the group under the read
11290f3e72b5SJason Gunthorpe 		 * lock.  If the device driver will use it, it must obtain a
11300f3e72b5SJason Gunthorpe 		 * reference and release it during close_device.
11310f3e72b5SJason Gunthorpe 		 */
11320f3e72b5SJason Gunthorpe 		down_read(&device->group->group_rwsem);
11330f3e72b5SJason Gunthorpe 		device->kvm = device->group->kvm;
11340f3e72b5SJason Gunthorpe 
11350f3e72b5SJason Gunthorpe 		if (device->ops->open_device) {
11360f3e72b5SJason Gunthorpe 			ret = device->ops->open_device(device);
11370f3e72b5SJason Gunthorpe 			if (ret)
11380f3e72b5SJason Gunthorpe 				goto err_undo_count;
11390f3e72b5SJason Gunthorpe 		}
11400f3e72b5SJason Gunthorpe 
11410f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
11420f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->register_device)
11430f3e72b5SJason Gunthorpe 			iommu_driver->ops->register_device(
11440f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
11450f3e72b5SJason Gunthorpe 
11460f3e72b5SJason Gunthorpe 		up_read(&device->group->group_rwsem);
11470f3e72b5SJason Gunthorpe 	}
11480f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
11490f3e72b5SJason Gunthorpe 
11500f3e72b5SJason Gunthorpe 	/*
11510f3e72b5SJason Gunthorpe 	 * We can't use anon_inode_getfd() because we need to modify
11520f3e72b5SJason Gunthorpe 	 * the f_mode flags directly to allow more than just ioctls
11530f3e72b5SJason Gunthorpe 	 */
11540f3e72b5SJason Gunthorpe 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
11550f3e72b5SJason Gunthorpe 				   device, O_RDWR);
11560f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
11570f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
11580f3e72b5SJason Gunthorpe 		goto err_close_device;
11590f3e72b5SJason Gunthorpe 	}
11600f3e72b5SJason Gunthorpe 
11610f3e72b5SJason Gunthorpe 	/*
11620f3e72b5SJason Gunthorpe 	 * TODO: add an anon_inode interface to do this.
11630f3e72b5SJason Gunthorpe 	 * Appears to be missing by lack of need rather than
11640f3e72b5SJason Gunthorpe 	 * explicitly prevented.  Now there's need.
11650f3e72b5SJason Gunthorpe 	 */
11660f3e72b5SJason Gunthorpe 	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
11670f3e72b5SJason Gunthorpe 
11680f3e72b5SJason Gunthorpe 	if (device->group->type == VFIO_NO_IOMMU)
11690f3e72b5SJason Gunthorpe 		dev_warn(device->dev, "vfio-noiommu device opened by user "
11700f3e72b5SJason Gunthorpe 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
11710f3e72b5SJason Gunthorpe 	/*
11720f3e72b5SJason Gunthorpe 	 * On success the ref of device is moved to the file and
11730f3e72b5SJason Gunthorpe 	 * put in vfio_device_fops_release()
11740f3e72b5SJason Gunthorpe 	 */
11750f3e72b5SJason Gunthorpe 	return filep;
11760f3e72b5SJason Gunthorpe 
11770f3e72b5SJason Gunthorpe err_close_device:
11780f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
11790f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
11800f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device) {
11810f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
11820f3e72b5SJason Gunthorpe 
11830f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
11840f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->unregister_device)
11850f3e72b5SJason Gunthorpe 			iommu_driver->ops->unregister_device(
11860f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
11870f3e72b5SJason Gunthorpe 	}
11880f3e72b5SJason Gunthorpe err_undo_count:
11890f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
11900f3e72b5SJason Gunthorpe 	device->open_count--;
11910f3e72b5SJason Gunthorpe 	if (device->open_count == 0 && device->kvm)
11920f3e72b5SJason Gunthorpe 		device->kvm = NULL;
11930f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
11940f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
11950f3e72b5SJason Gunthorpe err_unassign_container:
11960f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
11970f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
11980f3e72b5SJason Gunthorpe }
11990f3e72b5SJason Gunthorpe 
1200150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
1201150ee2f9SJason Gunthorpe 					  char __user *arg)
12020f3e72b5SJason Gunthorpe {
12030f3e72b5SJason Gunthorpe 	struct vfio_device *device;
12040f3e72b5SJason Gunthorpe 	struct file *filep;
1205150ee2f9SJason Gunthorpe 	char *buf;
12060f3e72b5SJason Gunthorpe 	int fdno;
12070f3e72b5SJason Gunthorpe 	int ret;
12080f3e72b5SJason Gunthorpe 
1209150ee2f9SJason Gunthorpe 	buf = strndup_user(arg, PAGE_SIZE);
1210150ee2f9SJason Gunthorpe 	if (IS_ERR(buf))
1211150ee2f9SJason Gunthorpe 		return PTR_ERR(buf);
1212150ee2f9SJason Gunthorpe 
12130f3e72b5SJason Gunthorpe 	device = vfio_device_get_from_name(group, buf);
1214150ee2f9SJason Gunthorpe 	kfree(buf);
12150f3e72b5SJason Gunthorpe 	if (IS_ERR(device))
12160f3e72b5SJason Gunthorpe 		return PTR_ERR(device);
12170f3e72b5SJason Gunthorpe 
12180f3e72b5SJason Gunthorpe 	fdno = get_unused_fd_flags(O_CLOEXEC);
12190f3e72b5SJason Gunthorpe 	if (fdno < 0) {
12200f3e72b5SJason Gunthorpe 		ret = fdno;
12210f3e72b5SJason Gunthorpe 		goto err_put_device;
12220f3e72b5SJason Gunthorpe 	}
12230f3e72b5SJason Gunthorpe 
12240f3e72b5SJason Gunthorpe 	filep = vfio_device_open(device);
12250f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
12260f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
12270f3e72b5SJason Gunthorpe 		goto err_put_fdno;
12280f3e72b5SJason Gunthorpe 	}
12290f3e72b5SJason Gunthorpe 
12300f3e72b5SJason Gunthorpe 	fd_install(fdno, filep);
12310f3e72b5SJason Gunthorpe 	return fdno;
12320f3e72b5SJason Gunthorpe 
12330f3e72b5SJason Gunthorpe err_put_fdno:
12340f3e72b5SJason Gunthorpe 	put_unused_fd(fdno);
12350f3e72b5SJason Gunthorpe err_put_device:
12360f3e72b5SJason Gunthorpe 	vfio_device_put(device);
12370f3e72b5SJason Gunthorpe 	return ret;
12380f3e72b5SJason Gunthorpe }
12390f3e72b5SJason Gunthorpe 
124099a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group,
124199a27c08SJason Gunthorpe 				       struct vfio_group_status __user *arg)
12420f3e72b5SJason Gunthorpe {
124399a27c08SJason Gunthorpe 	unsigned long minsz = offsetofend(struct vfio_group_status, flags);
12440f3e72b5SJason Gunthorpe 	struct vfio_group_status status;
12450f3e72b5SJason Gunthorpe 
124699a27c08SJason Gunthorpe 	if (copy_from_user(&status, arg, minsz))
12470f3e72b5SJason Gunthorpe 		return -EFAULT;
12480f3e72b5SJason Gunthorpe 
12490f3e72b5SJason Gunthorpe 	if (status.argsz < minsz)
12500f3e72b5SJason Gunthorpe 		return -EINVAL;
12510f3e72b5SJason Gunthorpe 
12520f3e72b5SJason Gunthorpe 	status.flags = 0;
12530f3e72b5SJason Gunthorpe 
12540f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
12550f3e72b5SJason Gunthorpe 	if (group->container)
12560f3e72b5SJason Gunthorpe 		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
12570f3e72b5SJason Gunthorpe 				VFIO_GROUP_FLAGS_VIABLE;
12580f3e72b5SJason Gunthorpe 	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
12590f3e72b5SJason Gunthorpe 		status.flags |= VFIO_GROUP_FLAGS_VIABLE;
12600f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
12610f3e72b5SJason Gunthorpe 
126299a27c08SJason Gunthorpe 	if (copy_to_user(arg, &status, minsz))
12630f3e72b5SJason Gunthorpe 		return -EFAULT;
126499a27c08SJason Gunthorpe 	return 0;
12650f3e72b5SJason Gunthorpe }
126699a27c08SJason Gunthorpe 
126799a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep,
126899a27c08SJason Gunthorpe 				      unsigned int cmd, unsigned long arg)
126999a27c08SJason Gunthorpe {
127099a27c08SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
127199a27c08SJason Gunthorpe 	void __user *uarg = (void __user *)arg;
127299a27c08SJason Gunthorpe 
127399a27c08SJason Gunthorpe 	switch (cmd) {
127499a27c08SJason Gunthorpe 	case VFIO_GROUP_GET_DEVICE_FD:
127599a27c08SJason Gunthorpe 		return vfio_group_ioctl_get_device_fd(group, uarg);
127699a27c08SJason Gunthorpe 	case VFIO_GROUP_GET_STATUS:
127799a27c08SJason Gunthorpe 		return vfio_group_ioctl_get_status(group, uarg);
12780f3e72b5SJason Gunthorpe 	case VFIO_GROUP_SET_CONTAINER:
127967671f15SJason Gunthorpe 		return vfio_group_ioctl_set_container(group, uarg);
12800f3e72b5SJason Gunthorpe 	case VFIO_GROUP_UNSET_CONTAINER:
1281b3b43590SJason Gunthorpe 		return vfio_group_ioctl_unset_container(group);
128299a27c08SJason Gunthorpe 	default:
128399a27c08SJason Gunthorpe 		return -ENOTTY;
12840f3e72b5SJason Gunthorpe 	}
12850f3e72b5SJason Gunthorpe }
12860f3e72b5SJason Gunthorpe 
12870f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep)
12880f3e72b5SJason Gunthorpe {
12890f3e72b5SJason Gunthorpe 	struct vfio_group *group =
12900f3e72b5SJason Gunthorpe 		container_of(inode->i_cdev, struct vfio_group, cdev);
12910f3e72b5SJason Gunthorpe 	int ret;
12920f3e72b5SJason Gunthorpe 
12930f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
12940f3e72b5SJason Gunthorpe 
12950f3e72b5SJason Gunthorpe 	/* users can be zero if this races with vfio_group_put() */
12960f3e72b5SJason Gunthorpe 	if (!refcount_inc_not_zero(&group->users)) {
12970f3e72b5SJason Gunthorpe 		ret = -ENODEV;
12980f3e72b5SJason Gunthorpe 		goto err_unlock;
12990f3e72b5SJason Gunthorpe 	}
13000f3e72b5SJason Gunthorpe 
13010f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
13020f3e72b5SJason Gunthorpe 		ret = -EPERM;
13030f3e72b5SJason Gunthorpe 		goto err_put;
13040f3e72b5SJason Gunthorpe 	}
13050f3e72b5SJason Gunthorpe 
13060f3e72b5SJason Gunthorpe 	/*
13070f3e72b5SJason Gunthorpe 	 * Do we need multiple instances of the group open?  Seems not.
13080f3e72b5SJason Gunthorpe 	 */
13090f3e72b5SJason Gunthorpe 	if (group->opened_file) {
13100f3e72b5SJason Gunthorpe 		ret = -EBUSY;
13110f3e72b5SJason Gunthorpe 		goto err_put;
13120f3e72b5SJason Gunthorpe 	}
13130f3e72b5SJason Gunthorpe 	group->opened_file = filep;
13140f3e72b5SJason Gunthorpe 	filep->private_data = group;
13150f3e72b5SJason Gunthorpe 
13160f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13170f3e72b5SJason Gunthorpe 	return 0;
13180f3e72b5SJason Gunthorpe err_put:
13190f3e72b5SJason Gunthorpe 	vfio_group_put(group);
13200f3e72b5SJason Gunthorpe err_unlock:
13210f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13220f3e72b5SJason Gunthorpe 	return ret;
13230f3e72b5SJason Gunthorpe }
13240f3e72b5SJason Gunthorpe 
13250f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep)
13260f3e72b5SJason Gunthorpe {
13270f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
13280f3e72b5SJason Gunthorpe 
13290f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
13300f3e72b5SJason Gunthorpe 
13310f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
13320f3e72b5SJason Gunthorpe 	/*
13330f3e72b5SJason Gunthorpe 	 * Device FDs hold a group file reference, therefore the group release
13340f3e72b5SJason Gunthorpe 	 * is only called when there are no open devices.
13350f3e72b5SJason Gunthorpe 	 */
13360f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
13370f3e72b5SJason Gunthorpe 	if (group->container) {
13380f3e72b5SJason Gunthorpe 		WARN_ON(group->container_users != 1);
13390f3e72b5SJason Gunthorpe 		__vfio_group_unset_container(group);
13400f3e72b5SJason Gunthorpe 	}
13410f3e72b5SJason Gunthorpe 	group->opened_file = NULL;
13420f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
13430f3e72b5SJason Gunthorpe 
13440f3e72b5SJason Gunthorpe 	vfio_group_put(group);
13450f3e72b5SJason Gunthorpe 
13460f3e72b5SJason Gunthorpe 	return 0;
13470f3e72b5SJason Gunthorpe }
13480f3e72b5SJason Gunthorpe 
13490f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = {
13500f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
13510f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
13520f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
13530f3e72b5SJason Gunthorpe 	.open		= vfio_group_fops_open,
13540f3e72b5SJason Gunthorpe 	.release	= vfio_group_fops_release,
13550f3e72b5SJason Gunthorpe };
13560f3e72b5SJason Gunthorpe 
13570f3e72b5SJason Gunthorpe /*
1358*8e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_resume_and_get().
1359*8e5c6995SAbhishek Sahu  * Return error code on failure or 0 on success.
1360*8e5c6995SAbhishek Sahu  */
1361*8e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
1362*8e5c6995SAbhishek Sahu {
1363*8e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
1364*8e5c6995SAbhishek Sahu 
1365*8e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm) {
1366*8e5c6995SAbhishek Sahu 		int ret;
1367*8e5c6995SAbhishek Sahu 
1368*8e5c6995SAbhishek Sahu 		ret = pm_runtime_resume_and_get(dev);
1369*8e5c6995SAbhishek Sahu 		if (ret) {
1370*8e5c6995SAbhishek Sahu 			dev_info_ratelimited(dev,
1371*8e5c6995SAbhishek Sahu 				"vfio: runtime resume failed %d\n", ret);
1372*8e5c6995SAbhishek Sahu 			return -EIO;
1373*8e5c6995SAbhishek Sahu 		}
1374*8e5c6995SAbhishek Sahu 	}
1375*8e5c6995SAbhishek Sahu 
1376*8e5c6995SAbhishek Sahu 	return 0;
1377*8e5c6995SAbhishek Sahu }
1378*8e5c6995SAbhishek Sahu 
1379*8e5c6995SAbhishek Sahu /*
1380*8e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_put().
1381*8e5c6995SAbhishek Sahu  */
1382*8e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
1383*8e5c6995SAbhishek Sahu {
1384*8e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
1385*8e5c6995SAbhishek Sahu 
1386*8e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm)
1387*8e5c6995SAbhishek Sahu 		pm_runtime_put(dev);
1388*8e5c6995SAbhishek Sahu }
1389*8e5c6995SAbhishek Sahu 
1390*8e5c6995SAbhishek Sahu /*
13910f3e72b5SJason Gunthorpe  * VFIO Device fd
13920f3e72b5SJason Gunthorpe  */
13930f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
13940f3e72b5SJason Gunthorpe {
13950f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
13960f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
13970f3e72b5SJason Gunthorpe 
13980f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
13990f3e72b5SJason Gunthorpe 	vfio_assert_device_open(device);
14000f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
14010f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device)
14020f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
14030f3e72b5SJason Gunthorpe 
14040f3e72b5SJason Gunthorpe 	iommu_driver = device->group->container->iommu_driver;
14050f3e72b5SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
14060f3e72b5SJason Gunthorpe 		iommu_driver->ops->unregister_device(
14070f3e72b5SJason Gunthorpe 			device->group->container->iommu_data, device);
14080f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
14090f3e72b5SJason Gunthorpe 	device->open_count--;
14100f3e72b5SJason Gunthorpe 	if (device->open_count == 0)
14110f3e72b5SJason Gunthorpe 		device->kvm = NULL;
14120f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
14130f3e72b5SJason Gunthorpe 
14140f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
14150f3e72b5SJason Gunthorpe 
14160f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
14170f3e72b5SJason Gunthorpe 
14180f3e72b5SJason Gunthorpe 	vfio_device_put(device);
14190f3e72b5SJason Gunthorpe 
14200f3e72b5SJason Gunthorpe 	return 0;
14210f3e72b5SJason Gunthorpe }
14220f3e72b5SJason Gunthorpe 
14230f3e72b5SJason Gunthorpe /*
14240f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
14250f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
14260f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
14270f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
14280f3e72b5SJason Gunthorpe  *
14290f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
14300f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
14310f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
14320f3e72b5SJason Gunthorpe  *
14330f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
14340f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
14350f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
14360f3e72b5SJason Gunthorpe  *
14370f3e72b5SJason Gunthorpe  */
14380f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
14390f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
14400f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
14410f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
14420f3e72b5SJason Gunthorpe {
14430f3e72b5SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
14440f3e72b5SJason Gunthorpe 	/*
14450f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
14460f3e72b5SJason Gunthorpe 	 * following FSM arcs:
14470f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
14480f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
14490f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
14500f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
14510f3e72b5SJason Gunthorpe 	 *
14520f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
14530f3e72b5SJason Gunthorpe 	 * arcs:
14540f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
14550f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
14560f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
14570f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
14580f3e72b5SJason Gunthorpe 	 * Without P2P the driver must implement:
14590f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
14600f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
14610f3e72b5SJason Gunthorpe 	 *
14620f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
14630f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
14640f3e72b5SJason Gunthorpe 	 * following ones:
14650f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
14660f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
14670f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
14680f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
14690f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
14700f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
14710f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
14720f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
14730f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
14740f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
14750f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
14760f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
14770f3e72b5SJason Gunthorpe 	 */
14780f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
14790f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
14800f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14810f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
14820f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
14830f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
14840f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14850f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14860f3e72b5SJason Gunthorpe 		},
14870f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
14880f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
14890f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
14900f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
14910f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
14920f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
14930f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
14940f3e72b5SJason Gunthorpe 		},
14950f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
14960f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
14970f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
14980f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
14990f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
15000f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
15010f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15020f3e72b5SJason Gunthorpe 		},
15030f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
15040f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
15050f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
15060f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
15070f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
15080f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
15090f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15100f3e72b5SJason Gunthorpe 		},
15110f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
15120f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
15130f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
15140f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
15150f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
15160f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
15170f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15180f3e72b5SJason Gunthorpe 		},
15190f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
15200f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
15210f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
15220f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
15230f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
15240f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
15250f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15260f3e72b5SJason Gunthorpe 		},
15270f3e72b5SJason Gunthorpe 	};
15280f3e72b5SJason Gunthorpe 
15290f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
15300f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
15310f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
15320f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
15330f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
15340f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
15350f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
15360f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
15370f3e72b5SJason Gunthorpe 	};
15380f3e72b5SJason Gunthorpe 
15390f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
15400f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
15410f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
15420f3e72b5SJason Gunthorpe 		return -EINVAL;
15430f3e72b5SJason Gunthorpe 
15440f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
15450f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
15460f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
15470f3e72b5SJason Gunthorpe 		return -EINVAL;
15480f3e72b5SJason Gunthorpe 
15490f3e72b5SJason Gunthorpe 	/*
15500f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
15510f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
15520f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
15530f3e72b5SJason Gunthorpe 	 */
15540f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
15550f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
15560f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
15570f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
15580f3e72b5SJason Gunthorpe 
15590f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
15600f3e72b5SJason Gunthorpe }
15610f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
15620f3e72b5SJason Gunthorpe 
15630f3e72b5SJason Gunthorpe /*
15640f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
15650f3e72b5SJason Gunthorpe  */
15660f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
15670f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
15680f3e72b5SJason Gunthorpe {
15690f3e72b5SJason Gunthorpe 	int ret;
15700f3e72b5SJason Gunthorpe 	int fd;
15710f3e72b5SJason Gunthorpe 
15720f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
15730f3e72b5SJason Gunthorpe 	if (fd < 0) {
15740f3e72b5SJason Gunthorpe 		ret = fd;
15750f3e72b5SJason Gunthorpe 		goto out_fput;
15760f3e72b5SJason Gunthorpe 	}
15770f3e72b5SJason Gunthorpe 
15780f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
15790f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
15800f3e72b5SJason Gunthorpe 		ret = -EFAULT;
15810f3e72b5SJason Gunthorpe 		goto out_put_unused;
15820f3e72b5SJason Gunthorpe 	}
15830f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
15840f3e72b5SJason Gunthorpe 	return 0;
15850f3e72b5SJason Gunthorpe 
15860f3e72b5SJason Gunthorpe out_put_unused:
15870f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
15880f3e72b5SJason Gunthorpe out_fput:
15890f3e72b5SJason Gunthorpe 	fput(filp);
15900f3e72b5SJason Gunthorpe 	return ret;
15910f3e72b5SJason Gunthorpe }
15920f3e72b5SJason Gunthorpe 
15930f3e72b5SJason Gunthorpe static int
15940f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
15950f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
15960f3e72b5SJason Gunthorpe 					   size_t argsz)
15970f3e72b5SJason Gunthorpe {
15980f3e72b5SJason Gunthorpe 	size_t minsz =
15990f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
16000f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
16010f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
16020f3e72b5SJason Gunthorpe 	int ret;
16030f3e72b5SJason Gunthorpe 
16040f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
16050f3e72b5SJason Gunthorpe 		return -ENOTTY;
16060f3e72b5SJason Gunthorpe 
16070f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
16080f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
16090f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
16100f3e72b5SJason Gunthorpe 				 sizeof(mig));
16110f3e72b5SJason Gunthorpe 	if (ret != 1)
16120f3e72b5SJason Gunthorpe 		return ret;
16130f3e72b5SJason Gunthorpe 
16140f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
16150f3e72b5SJason Gunthorpe 		return -EFAULT;
16160f3e72b5SJason Gunthorpe 
16170f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
16180f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
16190f3e72b5SJason Gunthorpe 
16200f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
16210f3e72b5SJason Gunthorpe 							   &curr_state);
16220f3e72b5SJason Gunthorpe 		if (ret)
16230f3e72b5SJason Gunthorpe 			return ret;
16240f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
16250f3e72b5SJason Gunthorpe 		goto out_copy;
16260f3e72b5SJason Gunthorpe 	}
16270f3e72b5SJason Gunthorpe 
16280f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
16290f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
16300f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
16310f3e72b5SJason Gunthorpe 		goto out_copy;
16320f3e72b5SJason Gunthorpe 
16330f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
16340f3e72b5SJason Gunthorpe out_copy:
16350f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
16360f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
16370f3e72b5SJason Gunthorpe 		return -EFAULT;
16380f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
16390f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
16400f3e72b5SJason Gunthorpe 	return 0;
16410f3e72b5SJason Gunthorpe }
16420f3e72b5SJason Gunthorpe 
16430f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
16440f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
16450f3e72b5SJason Gunthorpe 					       size_t argsz)
16460f3e72b5SJason Gunthorpe {
16470f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
16480f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
16490f3e72b5SJason Gunthorpe 	};
16500f3e72b5SJason Gunthorpe 	int ret;
16510f3e72b5SJason Gunthorpe 
16520f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
16530f3e72b5SJason Gunthorpe 		return -ENOTTY;
16540f3e72b5SJason Gunthorpe 
16550f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
16560f3e72b5SJason Gunthorpe 				 sizeof(mig));
16570f3e72b5SJason Gunthorpe 	if (ret != 1)
16580f3e72b5SJason Gunthorpe 		return ret;
16590f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
16600f3e72b5SJason Gunthorpe 		return -EFAULT;
16610f3e72b5SJason Gunthorpe 	return 0;
16620f3e72b5SJason Gunthorpe }
16630f3e72b5SJason Gunthorpe 
16640f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
16650f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
16660f3e72b5SJason Gunthorpe {
16670f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
16680f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
16690f3e72b5SJason Gunthorpe 
16700f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
16710f3e72b5SJason Gunthorpe 		return -EFAULT;
16720f3e72b5SJason Gunthorpe 
16730f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
16740f3e72b5SJason Gunthorpe 		return -EINVAL;
16750f3e72b5SJason Gunthorpe 
16760f3e72b5SJason Gunthorpe 	/* Check unknown flags */
16770f3e72b5SJason Gunthorpe 	if (feature.flags &
16780f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
16790f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
16800f3e72b5SJason Gunthorpe 		return -EINVAL;
16810f3e72b5SJason Gunthorpe 
16820f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
16830f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
16840f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
16850f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
16860f3e72b5SJason Gunthorpe 		return -EINVAL;
16870f3e72b5SJason Gunthorpe 
16880f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
16890f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
16900f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
16910f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
16920f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
16930f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
16940f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
16950f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
16960f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
16970f3e72b5SJason Gunthorpe 	default:
16980f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
16990f3e72b5SJason Gunthorpe 			return -EINVAL;
17000f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
17010f3e72b5SJason Gunthorpe 						   arg->data,
17020f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
17030f3e72b5SJason Gunthorpe 	}
17040f3e72b5SJason Gunthorpe }
17050f3e72b5SJason Gunthorpe 
17060f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
17070f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
17080f3e72b5SJason Gunthorpe {
17090f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
1710*8e5c6995SAbhishek Sahu 	int ret;
1711*8e5c6995SAbhishek Sahu 
1712*8e5c6995SAbhishek Sahu 	ret = vfio_device_pm_runtime_get(device);
1713*8e5c6995SAbhishek Sahu 	if (ret)
1714*8e5c6995SAbhishek Sahu 		return ret;
17150f3e72b5SJason Gunthorpe 
17160f3e72b5SJason Gunthorpe 	switch (cmd) {
17170f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
1718*8e5c6995SAbhishek Sahu 		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
1719*8e5c6995SAbhishek Sahu 		break;
1720*8e5c6995SAbhishek Sahu 
17210f3e72b5SJason Gunthorpe 	default:
17220f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
1723*8e5c6995SAbhishek Sahu 			ret = -EINVAL;
1724*8e5c6995SAbhishek Sahu 		else
1725*8e5c6995SAbhishek Sahu 			ret = device->ops->ioctl(device, cmd, arg);
1726*8e5c6995SAbhishek Sahu 		break;
17270f3e72b5SJason Gunthorpe 	}
1728*8e5c6995SAbhishek Sahu 
1729*8e5c6995SAbhishek Sahu 	vfio_device_pm_runtime_put(device);
1730*8e5c6995SAbhishek Sahu 	return ret;
17310f3e72b5SJason Gunthorpe }
17320f3e72b5SJason Gunthorpe 
17330f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
17340f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
17350f3e72b5SJason Gunthorpe {
17360f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
17370f3e72b5SJason Gunthorpe 
17380f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
17390f3e72b5SJason Gunthorpe 		return -EINVAL;
17400f3e72b5SJason Gunthorpe 
17410f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
17420f3e72b5SJason Gunthorpe }
17430f3e72b5SJason Gunthorpe 
17440f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
17450f3e72b5SJason Gunthorpe 				      const char __user *buf,
17460f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
17470f3e72b5SJason Gunthorpe {
17480f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
17490f3e72b5SJason Gunthorpe 
17500f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
17510f3e72b5SJason Gunthorpe 		return -EINVAL;
17520f3e72b5SJason Gunthorpe 
17530f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
17540f3e72b5SJason Gunthorpe }
17550f3e72b5SJason Gunthorpe 
17560f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
17570f3e72b5SJason Gunthorpe {
17580f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
17590f3e72b5SJason Gunthorpe 
17600f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
17610f3e72b5SJason Gunthorpe 		return -EINVAL;
17620f3e72b5SJason Gunthorpe 
17630f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
17640f3e72b5SJason Gunthorpe }
17650f3e72b5SJason Gunthorpe 
17660f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = {
17670f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
17680f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
17690f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
17700f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
17710f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
17720f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
17730f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
17740f3e72b5SJason Gunthorpe };
17750f3e72b5SJason Gunthorpe 
17760f3e72b5SJason Gunthorpe /**
17770f3e72b5SJason Gunthorpe  * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
17780f3e72b5SJason Gunthorpe  * @file: VFIO group file
17790f3e72b5SJason Gunthorpe  *
17800f3e72b5SJason Gunthorpe  * The returned iommu_group is valid as long as a ref is held on the file.
17810f3e72b5SJason Gunthorpe  */
17820f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file)
17830f3e72b5SJason Gunthorpe {
17840f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
17850f3e72b5SJason Gunthorpe 
17860f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
17870f3e72b5SJason Gunthorpe 		return NULL;
17880f3e72b5SJason Gunthorpe 	return group->iommu_group;
17890f3e72b5SJason Gunthorpe }
17900f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
17910f3e72b5SJason Gunthorpe 
17920f3e72b5SJason Gunthorpe /**
17930f3e72b5SJason Gunthorpe  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
17940f3e72b5SJason Gunthorpe  *        is always CPU cache coherent
17950f3e72b5SJason Gunthorpe  * @file: VFIO group file
17960f3e72b5SJason Gunthorpe  *
17970f3e72b5SJason Gunthorpe  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
17980f3e72b5SJason Gunthorpe  * bit in DMA transactions. A return of false indicates that the user has
17990f3e72b5SJason Gunthorpe  * rights to access additional instructions such as wbinvd on x86.
18000f3e72b5SJason Gunthorpe  */
18010f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file)
18020f3e72b5SJason Gunthorpe {
18030f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
18040f3e72b5SJason Gunthorpe 	bool ret;
18050f3e72b5SJason Gunthorpe 
18060f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
18070f3e72b5SJason Gunthorpe 		return true;
18080f3e72b5SJason Gunthorpe 
18090f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
18100f3e72b5SJason Gunthorpe 	if (group->container) {
18110f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(group->container,
18120f3e72b5SJason Gunthorpe 						 VFIO_DMA_CC_IOMMU);
18130f3e72b5SJason Gunthorpe 	} else {
18140f3e72b5SJason Gunthorpe 		/*
18150f3e72b5SJason Gunthorpe 		 * Since the coherency state is determined only once a container
18160f3e72b5SJason Gunthorpe 		 * is attached the user must do so before they can prove they
18170f3e72b5SJason Gunthorpe 		 * have permission.
18180f3e72b5SJason Gunthorpe 		 */
18190f3e72b5SJason Gunthorpe 		ret = true;
18200f3e72b5SJason Gunthorpe 	}
18210f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
18220f3e72b5SJason Gunthorpe 	return ret;
18230f3e72b5SJason Gunthorpe }
18240f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
18250f3e72b5SJason Gunthorpe 
18260f3e72b5SJason Gunthorpe /**
18270f3e72b5SJason Gunthorpe  * vfio_file_set_kvm - Link a kvm with VFIO drivers
18280f3e72b5SJason Gunthorpe  * @file: VFIO group file
18290f3e72b5SJason Gunthorpe  * @kvm: KVM to link
18300f3e72b5SJason Gunthorpe  *
18310f3e72b5SJason Gunthorpe  * When a VFIO device is first opened the KVM will be available in
18320f3e72b5SJason Gunthorpe  * device->kvm if one was associated with the group.
18330f3e72b5SJason Gunthorpe  */
18340f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
18350f3e72b5SJason Gunthorpe {
18360f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
18370f3e72b5SJason Gunthorpe 
18380f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
18390f3e72b5SJason Gunthorpe 		return;
18400f3e72b5SJason Gunthorpe 
18410f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
18420f3e72b5SJason Gunthorpe 	group->kvm = kvm;
18430f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
18440f3e72b5SJason Gunthorpe }
18450f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
18460f3e72b5SJason Gunthorpe 
18470f3e72b5SJason Gunthorpe /**
18480f3e72b5SJason Gunthorpe  * vfio_file_has_dev - True if the VFIO file is a handle for device
18490f3e72b5SJason Gunthorpe  * @file: VFIO file to check
18500f3e72b5SJason Gunthorpe  * @device: Device that must be part of the file
18510f3e72b5SJason Gunthorpe  *
18520f3e72b5SJason Gunthorpe  * Returns true if given file has permission to manipulate the given device.
18530f3e72b5SJason Gunthorpe  */
18540f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
18550f3e72b5SJason Gunthorpe {
18560f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
18570f3e72b5SJason Gunthorpe 
18580f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
18590f3e72b5SJason Gunthorpe 		return false;
18600f3e72b5SJason Gunthorpe 
18610f3e72b5SJason Gunthorpe 	return group == device->group;
18620f3e72b5SJason Gunthorpe }
18630f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev);
18640f3e72b5SJason Gunthorpe 
18650f3e72b5SJason Gunthorpe /*
18660f3e72b5SJason Gunthorpe  * Sub-module support
18670f3e72b5SJason Gunthorpe  */
18680f3e72b5SJason Gunthorpe /*
18690f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
18700f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
18710f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
18720f3e72b5SJason Gunthorpe  *
18730f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
18740f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
18750f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
18760f3e72b5SJason Gunthorpe  */
18770f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
18780f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
18790f3e72b5SJason Gunthorpe {
18800f3e72b5SJason Gunthorpe 	void *buf;
18810f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
18820f3e72b5SJason Gunthorpe 
18830f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
18840f3e72b5SJason Gunthorpe 	if (!buf) {
18850f3e72b5SJason Gunthorpe 		kfree(caps->buf);
18860f3e72b5SJason Gunthorpe 		caps->buf = NULL;
18870f3e72b5SJason Gunthorpe 		caps->size = 0;
18880f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
18890f3e72b5SJason Gunthorpe 	}
18900f3e72b5SJason Gunthorpe 
18910f3e72b5SJason Gunthorpe 	caps->buf = buf;
18920f3e72b5SJason Gunthorpe 	header = buf + caps->size;
18930f3e72b5SJason Gunthorpe 
18940f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
18950f3e72b5SJason Gunthorpe 	memset(header, 0, size);
18960f3e72b5SJason Gunthorpe 
18970f3e72b5SJason Gunthorpe 	header->id = id;
18980f3e72b5SJason Gunthorpe 	header->version = version;
18990f3e72b5SJason Gunthorpe 
19000f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
19010f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
19020f3e72b5SJason Gunthorpe 		; /* nothing */
19030f3e72b5SJason Gunthorpe 
19040f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
19050f3e72b5SJason Gunthorpe 	caps->size += size;
19060f3e72b5SJason Gunthorpe 
19070f3e72b5SJason Gunthorpe 	return header;
19080f3e72b5SJason Gunthorpe }
19090f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
19100f3e72b5SJason Gunthorpe 
19110f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
19120f3e72b5SJason Gunthorpe {
19130f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
19140f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
19150f3e72b5SJason Gunthorpe 
19160f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
19170f3e72b5SJason Gunthorpe 		tmp->next += offset;
19180f3e72b5SJason Gunthorpe }
19190f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
19200f3e72b5SJason Gunthorpe 
19210f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
19220f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
19230f3e72b5SJason Gunthorpe {
19240f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
19250f3e72b5SJason Gunthorpe 
19260f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
19270f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
19280f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
19290f3e72b5SJason Gunthorpe 
19300f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
19310f3e72b5SJason Gunthorpe 
19320f3e72b5SJason Gunthorpe 	return 0;
19330f3e72b5SJason Gunthorpe }
19340f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
19350f3e72b5SJason Gunthorpe 
19360f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
19370f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
19380f3e72b5SJason Gunthorpe {
19390f3e72b5SJason Gunthorpe 	unsigned long minsz;
19400f3e72b5SJason Gunthorpe 	size_t size;
19410f3e72b5SJason Gunthorpe 
19420f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
19430f3e72b5SJason Gunthorpe 
19440f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
19450f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
19460f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
19470f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
19480f3e72b5SJason Gunthorpe 		return -EINVAL;
19490f3e72b5SJason Gunthorpe 
19500f3e72b5SJason Gunthorpe 	if (data_size)
19510f3e72b5SJason Gunthorpe 		*data_size = 0;
19520f3e72b5SJason Gunthorpe 
19530f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
19540f3e72b5SJason Gunthorpe 		return -EINVAL;
19550f3e72b5SJason Gunthorpe 
19560f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
19570f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
19580f3e72b5SJason Gunthorpe 		size = 0;
19590f3e72b5SJason Gunthorpe 		break;
19600f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
19610f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
19620f3e72b5SJason Gunthorpe 		break;
19630f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
19640f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
19650f3e72b5SJason Gunthorpe 		break;
19660f3e72b5SJason Gunthorpe 	default:
19670f3e72b5SJason Gunthorpe 		return -EINVAL;
19680f3e72b5SJason Gunthorpe 	}
19690f3e72b5SJason Gunthorpe 
19700f3e72b5SJason Gunthorpe 	if (size) {
19710f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
19720f3e72b5SJason Gunthorpe 			return -EINVAL;
19730f3e72b5SJason Gunthorpe 
19740f3e72b5SJason Gunthorpe 		if (!data_size)
19750f3e72b5SJason Gunthorpe 			return -EINVAL;
19760f3e72b5SJason Gunthorpe 
19770f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
19780f3e72b5SJason Gunthorpe 	}
19790f3e72b5SJason Gunthorpe 
19800f3e72b5SJason Gunthorpe 	return 0;
19810f3e72b5SJason Gunthorpe }
19820f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
19830f3e72b5SJason Gunthorpe 
19840f3e72b5SJason Gunthorpe /*
19850f3e72b5SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
19860f3e72b5SJason Gunthorpe  * domain only.
19870f3e72b5SJason Gunthorpe  * @device [in]  : device
19880f3e72b5SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
19890f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
19900f3e72b5SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
19910f3e72b5SJason Gunthorpe  * @prot [in]    : protection flags
19920f3e72b5SJason Gunthorpe  * @pages[out]   : array of host pages
19930f3e72b5SJason Gunthorpe  * Return error or number of pages pinned.
19940f3e72b5SJason Gunthorpe  */
19950f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
19960f3e72b5SJason Gunthorpe 		   int npage, int prot, struct page **pages)
19970f3e72b5SJason Gunthorpe {
19980f3e72b5SJason Gunthorpe 	struct vfio_container *container;
19990f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
20000f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
20010f3e72b5SJason Gunthorpe 	int ret;
20020f3e72b5SJason Gunthorpe 
20030f3e72b5SJason Gunthorpe 	if (!pages || !npage || !vfio_assert_device_open(device))
20040f3e72b5SJason Gunthorpe 		return -EINVAL;
20050f3e72b5SJason Gunthorpe 
20060f3e72b5SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
20070f3e72b5SJason Gunthorpe 		return -E2BIG;
20080f3e72b5SJason Gunthorpe 
20090f3e72b5SJason Gunthorpe 	if (group->dev_counter > 1)
20100f3e72b5SJason Gunthorpe 		return -EINVAL;
20110f3e72b5SJason Gunthorpe 
20120f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
20130f3e72b5SJason Gunthorpe 	container = group->container;
20140f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
20150f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->pin_pages))
20160f3e72b5SJason Gunthorpe 		ret = driver->ops->pin_pages(container->iommu_data,
20170f3e72b5SJason Gunthorpe 					     group->iommu_group, iova,
20180f3e72b5SJason Gunthorpe 					     npage, prot, pages);
20190f3e72b5SJason Gunthorpe 	else
20200f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
20210f3e72b5SJason Gunthorpe 
20220f3e72b5SJason Gunthorpe 	return ret;
20230f3e72b5SJason Gunthorpe }
20240f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
20250f3e72b5SJason Gunthorpe 
20260f3e72b5SJason Gunthorpe /*
20270f3e72b5SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
20280f3e72b5SJason Gunthorpe  * @device [in]  : device
20290f3e72b5SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
20300f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
20310f3e72b5SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
20320f3e72b5SJason Gunthorpe  */
20330f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
20340f3e72b5SJason Gunthorpe {
20350f3e72b5SJason Gunthorpe 	struct vfio_container *container;
20360f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
20370f3e72b5SJason Gunthorpe 
20380f3e72b5SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
20390f3e72b5SJason Gunthorpe 		return;
20400f3e72b5SJason Gunthorpe 
20410f3e72b5SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
20420f3e72b5SJason Gunthorpe 		return;
20430f3e72b5SJason Gunthorpe 
20440f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
20450f3e72b5SJason Gunthorpe 	container = device->group->container;
20460f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
20470f3e72b5SJason Gunthorpe 
20480f3e72b5SJason Gunthorpe 	driver->ops->unpin_pages(container->iommu_data, iova, npage);
20490f3e72b5SJason Gunthorpe }
20500f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
20510f3e72b5SJason Gunthorpe 
20520f3e72b5SJason Gunthorpe /*
20530f3e72b5SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
20540f3e72b5SJason Gunthorpe  * behalf of the device.
20550f3e72b5SJason Gunthorpe  *
20560f3e72b5SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
20570f3e72b5SJason Gunthorpe  * into/from a kernel buffer.
20580f3e72b5SJason Gunthorpe  *
20590f3e72b5SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
20600f3e72b5SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
20610f3e72b5SJason Gunthorpe  *
20620f3e72b5SJason Gunthorpe  * @device [in]		: VFIO device
20630f3e72b5SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
20640f3e72b5SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
20650f3e72b5SJason Gunthorpe  * @len [in]		: kernel buffer length
20660f3e72b5SJason Gunthorpe  * @write		: indicate read or write
20670f3e72b5SJason Gunthorpe  * Return error code on failure or 0 on success.
20680f3e72b5SJason Gunthorpe  */
20690f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
20700f3e72b5SJason Gunthorpe 		size_t len, bool write)
20710f3e72b5SJason Gunthorpe {
20720f3e72b5SJason Gunthorpe 	struct vfio_container *container;
20730f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
20740f3e72b5SJason Gunthorpe 	int ret = 0;
20750f3e72b5SJason Gunthorpe 
20760f3e72b5SJason Gunthorpe 	if (!data || len <= 0 || !vfio_assert_device_open(device))
20770f3e72b5SJason Gunthorpe 		return -EINVAL;
20780f3e72b5SJason Gunthorpe 
20790f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
20800f3e72b5SJason Gunthorpe 	container = device->group->container;
20810f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
20820f3e72b5SJason Gunthorpe 
20830f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->dma_rw))
20840f3e72b5SJason Gunthorpe 		ret = driver->ops->dma_rw(container->iommu_data,
20850f3e72b5SJason Gunthorpe 					  iova, data, len, write);
20860f3e72b5SJason Gunthorpe 	else
20870f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
20880f3e72b5SJason Gunthorpe 	return ret;
20890f3e72b5SJason Gunthorpe }
20900f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
20910f3e72b5SJason Gunthorpe 
20920f3e72b5SJason Gunthorpe /*
20930f3e72b5SJason Gunthorpe  * Module/class support
20940f3e72b5SJason Gunthorpe  */
20950f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode)
20960f3e72b5SJason Gunthorpe {
20970f3e72b5SJason Gunthorpe 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
20980f3e72b5SJason Gunthorpe }
20990f3e72b5SJason Gunthorpe 
21000f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = {
21010f3e72b5SJason Gunthorpe 	.minor = VFIO_MINOR,
21020f3e72b5SJason Gunthorpe 	.name = "vfio",
21030f3e72b5SJason Gunthorpe 	.fops = &vfio_fops,
21040f3e72b5SJason Gunthorpe 	.nodename = "vfio/vfio",
21050f3e72b5SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
21060f3e72b5SJason Gunthorpe };
21070f3e72b5SJason Gunthorpe 
21080f3e72b5SJason Gunthorpe static int __init vfio_init(void)
21090f3e72b5SJason Gunthorpe {
21100f3e72b5SJason Gunthorpe 	int ret;
21110f3e72b5SJason Gunthorpe 
21120f3e72b5SJason Gunthorpe 	ida_init(&vfio.group_ida);
21130f3e72b5SJason Gunthorpe 	mutex_init(&vfio.group_lock);
21140f3e72b5SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
21150f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.group_list);
21160f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
21170f3e72b5SJason Gunthorpe 
21180f3e72b5SJason Gunthorpe 	ret = misc_register(&vfio_dev);
21190f3e72b5SJason Gunthorpe 	if (ret) {
21200f3e72b5SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
21210f3e72b5SJason Gunthorpe 		return ret;
21220f3e72b5SJason Gunthorpe 	}
21230f3e72b5SJason Gunthorpe 
21240f3e72b5SJason Gunthorpe 	/* /dev/vfio/$GROUP */
21250f3e72b5SJason Gunthorpe 	vfio.class = class_create(THIS_MODULE, "vfio");
21260f3e72b5SJason Gunthorpe 	if (IS_ERR(vfio.class)) {
21270f3e72b5SJason Gunthorpe 		ret = PTR_ERR(vfio.class);
21280f3e72b5SJason Gunthorpe 		goto err_class;
21290f3e72b5SJason Gunthorpe 	}
21300f3e72b5SJason Gunthorpe 
21310f3e72b5SJason Gunthorpe 	vfio.class->devnode = vfio_devnode;
21320f3e72b5SJason Gunthorpe 
21330f3e72b5SJason Gunthorpe 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
21340f3e72b5SJason Gunthorpe 	if (ret)
21350f3e72b5SJason Gunthorpe 		goto err_alloc_chrdev;
21360f3e72b5SJason Gunthorpe 
21370f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
21380f3e72b5SJason Gunthorpe 	ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
21390f3e72b5SJason Gunthorpe #endif
21400f3e72b5SJason Gunthorpe 	if (ret)
21410f3e72b5SJason Gunthorpe 		goto err_driver_register;
21420f3e72b5SJason Gunthorpe 
21430f3e72b5SJason Gunthorpe 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
21440f3e72b5SJason Gunthorpe 	return 0;
21450f3e72b5SJason Gunthorpe 
21460f3e72b5SJason Gunthorpe err_driver_register:
21470f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
21480f3e72b5SJason Gunthorpe err_alloc_chrdev:
21490f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
21500f3e72b5SJason Gunthorpe 	vfio.class = NULL;
21510f3e72b5SJason Gunthorpe err_class:
21520f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
21530f3e72b5SJason Gunthorpe 	return ret;
21540f3e72b5SJason Gunthorpe }
21550f3e72b5SJason Gunthorpe 
21560f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void)
21570f3e72b5SJason Gunthorpe {
21580f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&vfio.group_list));
21590f3e72b5SJason Gunthorpe 
21600f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
21610f3e72b5SJason Gunthorpe 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
21620f3e72b5SJason Gunthorpe #endif
21630f3e72b5SJason Gunthorpe 	ida_destroy(&vfio.group_ida);
21640f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
21650f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
21660f3e72b5SJason Gunthorpe 	vfio.class = NULL;
21670f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
21680f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
21690f3e72b5SJason Gunthorpe }
21700f3e72b5SJason Gunthorpe 
21710f3e72b5SJason Gunthorpe module_init(vfio_init);
21720f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
21730f3e72b5SJason Gunthorpe 
21740f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
21750f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2");
21760f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR);
21770f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC);
21780f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
21790f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
21800f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2181