xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision 03e650f6)
10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
20f3e72b5SJason Gunthorpe /*
30f3e72b5SJason Gunthorpe  * VFIO core
40f3e72b5SJason Gunthorpe  *
50f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
60f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
70f3e72b5SJason Gunthorpe  *
80f3e72b5SJason Gunthorpe  * Derived from original vfio:
90f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
100f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
110f3e72b5SJason Gunthorpe  */
120f3e72b5SJason Gunthorpe 
130f3e72b5SJason Gunthorpe #include <linux/cdev.h>
140f3e72b5SJason Gunthorpe #include <linux/compat.h>
150f3e72b5SJason Gunthorpe #include <linux/device.h>
160f3e72b5SJason Gunthorpe #include <linux/file.h>
170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h>
180f3e72b5SJason Gunthorpe #include <linux/fs.h>
190f3e72b5SJason Gunthorpe #include <linux/idr.h>
200f3e72b5SJason Gunthorpe #include <linux/iommu.h>
210f3e72b5SJason Gunthorpe #include <linux/list.h>
220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
230f3e72b5SJason Gunthorpe #include <linux/module.h>
240f3e72b5SJason Gunthorpe #include <linux/mutex.h>
250f3e72b5SJason Gunthorpe #include <linux/pci.h>
260f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
270f3e72b5SJason Gunthorpe #include <linux/sched.h>
280f3e72b5SJason Gunthorpe #include <linux/slab.h>
290f3e72b5SJason Gunthorpe #include <linux/stat.h>
300f3e72b5SJason Gunthorpe #include <linux/string.h>
310f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
320f3e72b5SJason Gunthorpe #include <linux/vfio.h>
330f3e72b5SJason Gunthorpe #include <linux/wait.h>
340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
358e5c6995SAbhishek Sahu #include <linux/pm_runtime.h>
3680c4b92aSYishai Hadas #include <linux/interval_tree.h>
3780c4b92aSYishai Hadas #include <linux/iova_bitmap.h>
380f3e72b5SJason Gunthorpe #include "vfio.h"
390f3e72b5SJason Gunthorpe 
400f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
410f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
420f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
430f3e72b5SJason Gunthorpe 
440f3e72b5SJason Gunthorpe static struct vfio {
450f3e72b5SJason Gunthorpe 	struct class			*class;
460f3e72b5SJason Gunthorpe 	struct list_head		iommu_drivers_list;
470f3e72b5SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
480f3e72b5SJason Gunthorpe 	struct list_head		group_list;
490f3e72b5SJason Gunthorpe 	struct mutex			group_lock; /* locks group_list */
500f3e72b5SJason Gunthorpe 	struct ida			group_ida;
510f3e72b5SJason Gunthorpe 	dev_t				group_devt;
523c28a761SYi Liu 	struct class			*device_class;
533c28a761SYi Liu 	struct ida			device_ida;
540f3e72b5SJason Gunthorpe } vfio;
550f3e72b5SJason Gunthorpe 
560f3e72b5SJason Gunthorpe struct vfio_iommu_driver {
570f3e72b5SJason Gunthorpe 	const struct vfio_iommu_driver_ops	*ops;
580f3e72b5SJason Gunthorpe 	struct list_head			vfio_next;
590f3e72b5SJason Gunthorpe };
600f3e72b5SJason Gunthorpe 
610f3e72b5SJason Gunthorpe struct vfio_container {
620f3e72b5SJason Gunthorpe 	struct kref			kref;
630f3e72b5SJason Gunthorpe 	struct list_head		group_list;
640f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_lock;
650f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
660f3e72b5SJason Gunthorpe 	void				*iommu_data;
670f3e72b5SJason Gunthorpe 	bool				noiommu;
680f3e72b5SJason Gunthorpe };
690f3e72b5SJason Gunthorpe 
700f3e72b5SJason Gunthorpe struct vfio_group {
710f3e72b5SJason Gunthorpe 	struct device 			dev;
720f3e72b5SJason Gunthorpe 	struct cdev			cdev;
730f3e72b5SJason Gunthorpe 	refcount_t			users;
740f3e72b5SJason Gunthorpe 	unsigned int			container_users;
750f3e72b5SJason Gunthorpe 	struct iommu_group		*iommu_group;
760f3e72b5SJason Gunthorpe 	struct vfio_container		*container;
770f3e72b5SJason Gunthorpe 	struct list_head		device_list;
780f3e72b5SJason Gunthorpe 	struct mutex			device_lock;
790f3e72b5SJason Gunthorpe 	struct list_head		vfio_next;
800f3e72b5SJason Gunthorpe 	struct list_head		container_next;
810f3e72b5SJason Gunthorpe 	enum vfio_group_type		type;
820f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_rwsem;
830f3e72b5SJason Gunthorpe 	struct kvm			*kvm;
840f3e72b5SJason Gunthorpe 	struct file			*opened_file;
850f3e72b5SJason Gunthorpe 	struct blocking_notifier_head	notifier;
860f3e72b5SJason Gunthorpe };
870f3e72b5SJason Gunthorpe 
880f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
890f3e72b5SJason Gunthorpe static bool noiommu __read_mostly;
900f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
910f3e72b5SJason Gunthorpe 		   noiommu, bool, S_IRUGO | S_IWUSR);
920f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
930f3e72b5SJason Gunthorpe #endif
940f3e72b5SJason Gunthorpe 
950f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
960f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops;
970f3e72b5SJason Gunthorpe 
980f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
990f3e72b5SJason Gunthorpe {
1000f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
1010f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
1020f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
1030f3e72b5SJason Gunthorpe 
1040f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
1050f3e72b5SJason Gunthorpe 		return -EINVAL;
1060f3e72b5SJason Gunthorpe 
1070f3e72b5SJason Gunthorpe 	/*
1080f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
1090f3e72b5SJason Gunthorpe 	 */
1100f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1110f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
1120f3e72b5SJason Gunthorpe 	if (dev_set)
1130f3e72b5SJason Gunthorpe 		goto found_get_ref;
1140f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1150f3e72b5SJason Gunthorpe 
1160f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
1170f3e72b5SJason Gunthorpe 	if (!new_dev_set)
1180f3e72b5SJason Gunthorpe 		return -ENOMEM;
1190f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
1200f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
1210f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
1220f3e72b5SJason Gunthorpe 
1230f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1240f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
1250f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
1260f3e72b5SJason Gunthorpe 	if (!dev_set) {
1270f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
1280f3e72b5SJason Gunthorpe 		goto found_get_ref;
1290f3e72b5SJason Gunthorpe 	}
1300f3e72b5SJason Gunthorpe 
1310f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
1320f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
1330f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
1340f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
1350f3e72b5SJason Gunthorpe 	}
1360f3e72b5SJason Gunthorpe 
1370f3e72b5SJason Gunthorpe found_get_ref:
1380f3e72b5SJason Gunthorpe 	dev_set->device_count++;
1390f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1400f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1410f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
1420f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
1430f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1440f3e72b5SJason Gunthorpe 	return 0;
1450f3e72b5SJason Gunthorpe }
1460f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
1470f3e72b5SJason Gunthorpe 
1480f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
1490f3e72b5SJason Gunthorpe {
1500f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
1510f3e72b5SJason Gunthorpe 
1520f3e72b5SJason Gunthorpe 	if (!dev_set)
1530f3e72b5SJason Gunthorpe 		return;
1540f3e72b5SJason Gunthorpe 
1550f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1560f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
1570f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1580f3e72b5SJason Gunthorpe 
1590f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1600f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
1610f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
1620f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
1630f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
1640f3e72b5SJason Gunthorpe 		kfree(dev_set);
1650f3e72b5SJason Gunthorpe 	}
1660f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1670f3e72b5SJason Gunthorpe }
1680f3e72b5SJason Gunthorpe 
1690f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
1700f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
1710f3e72b5SJason Gunthorpe {
1720f3e72b5SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
1730f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
1740f3e72b5SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
1750f3e72b5SJason Gunthorpe 		return ERR_PTR(-EPERM);
1760f3e72b5SJason Gunthorpe 
1770f3e72b5SJason Gunthorpe 	return NULL;
1780f3e72b5SJason Gunthorpe }
1790f3e72b5SJason Gunthorpe 
1800f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
1810f3e72b5SJason Gunthorpe {
1820f3e72b5SJason Gunthorpe }
1830f3e72b5SJason Gunthorpe 
1840f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
1850f3e72b5SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
1860f3e72b5SJason Gunthorpe {
1870f3e72b5SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
1880f3e72b5SJason Gunthorpe 		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
1890f3e72b5SJason Gunthorpe 
1900f3e72b5SJason Gunthorpe 	return -ENOTTY;
1910f3e72b5SJason Gunthorpe }
1920f3e72b5SJason Gunthorpe 
1930f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
1940f3e72b5SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
1950f3e72b5SJason Gunthorpe {
1960f3e72b5SJason Gunthorpe 	return 0;
1970f3e72b5SJason Gunthorpe }
1980f3e72b5SJason Gunthorpe 
1990f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
2000f3e72b5SJason Gunthorpe 				      struct iommu_group *iommu_group)
2010f3e72b5SJason Gunthorpe {
2020f3e72b5SJason Gunthorpe }
2030f3e72b5SJason Gunthorpe 
2040f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
2050f3e72b5SJason Gunthorpe 	.name = "vfio-noiommu",
2060f3e72b5SJason Gunthorpe 	.owner = THIS_MODULE,
2070f3e72b5SJason Gunthorpe 	.open = vfio_noiommu_open,
2080f3e72b5SJason Gunthorpe 	.release = vfio_noiommu_release,
2090f3e72b5SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
2100f3e72b5SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
2110f3e72b5SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
2120f3e72b5SJason Gunthorpe };
2130f3e72b5SJason Gunthorpe 
2140f3e72b5SJason Gunthorpe /*
2150f3e72b5SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
2160f3e72b5SJason Gunthorpe  * use vfio-noiommu.
2170f3e72b5SJason Gunthorpe  */
2180f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2190f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2200f3e72b5SJason Gunthorpe {
2210f3e72b5SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
2220f3e72b5SJason Gunthorpe }
2230f3e72b5SJason Gunthorpe #else
2240f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2250f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2260f3e72b5SJason Gunthorpe {
2270f3e72b5SJason Gunthorpe 	return true;
2280f3e72b5SJason Gunthorpe }
2290f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */
2300f3e72b5SJason Gunthorpe 
2310f3e72b5SJason Gunthorpe /*
2320f3e72b5SJason Gunthorpe  * IOMMU driver registration
2330f3e72b5SJason Gunthorpe  */
2340f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2350f3e72b5SJason Gunthorpe {
2360f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
2370f3e72b5SJason Gunthorpe 
2380f3e72b5SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
2390f3e72b5SJason Gunthorpe 		return -EINVAL;
2400f3e72b5SJason Gunthorpe 
2410f3e72b5SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
2420f3e72b5SJason Gunthorpe 	if (!driver)
2430f3e72b5SJason Gunthorpe 		return -ENOMEM;
2440f3e72b5SJason Gunthorpe 
2450f3e72b5SJason Gunthorpe 	driver->ops = ops;
2460f3e72b5SJason Gunthorpe 
2470f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2480f3e72b5SJason Gunthorpe 
2490f3e72b5SJason Gunthorpe 	/* Check for duplicates */
2500f3e72b5SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
2510f3e72b5SJason Gunthorpe 		if (tmp->ops == ops) {
2520f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2530f3e72b5SJason Gunthorpe 			kfree(driver);
2540f3e72b5SJason Gunthorpe 			return -EINVAL;
2550f3e72b5SJason Gunthorpe 		}
2560f3e72b5SJason Gunthorpe 	}
2570f3e72b5SJason Gunthorpe 
2580f3e72b5SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
2590f3e72b5SJason Gunthorpe 
2600f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2610f3e72b5SJason Gunthorpe 
2620f3e72b5SJason Gunthorpe 	return 0;
2630f3e72b5SJason Gunthorpe }
2640f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
2650f3e72b5SJason Gunthorpe 
2660f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2670f3e72b5SJason Gunthorpe {
2680f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
2690f3e72b5SJason Gunthorpe 
2700f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2710f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
2720f3e72b5SJason Gunthorpe 		if (driver->ops == ops) {
2730f3e72b5SJason Gunthorpe 			list_del(&driver->vfio_next);
2740f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2750f3e72b5SJason Gunthorpe 			kfree(driver);
2760f3e72b5SJason Gunthorpe 			return;
2770f3e72b5SJason Gunthorpe 		}
2780f3e72b5SJason Gunthorpe 	}
2790f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2800f3e72b5SJason Gunthorpe }
2810f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
2820f3e72b5SJason Gunthorpe 
2830f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group);
2840f3e72b5SJason Gunthorpe 
2850f3e72b5SJason Gunthorpe /*
2860f3e72b5SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
2870f3e72b5SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
2880f3e72b5SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
2890f3e72b5SJason Gunthorpe  * closed in any order.
2900f3e72b5SJason Gunthorpe  */
2910f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
2920f3e72b5SJason Gunthorpe {
2930f3e72b5SJason Gunthorpe 	kref_get(&container->kref);
2940f3e72b5SJason Gunthorpe }
2950f3e72b5SJason Gunthorpe 
2960f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref)
2970f3e72b5SJason Gunthorpe {
2980f3e72b5SJason Gunthorpe 	struct vfio_container *container;
2990f3e72b5SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
3000f3e72b5SJason Gunthorpe 
3010f3e72b5SJason Gunthorpe 	kfree(container);
3020f3e72b5SJason Gunthorpe }
3030f3e72b5SJason Gunthorpe 
3040f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
3050f3e72b5SJason Gunthorpe {
3060f3e72b5SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
3070f3e72b5SJason Gunthorpe }
3080f3e72b5SJason Gunthorpe 
3090f3e72b5SJason Gunthorpe /*
3100f3e72b5SJason Gunthorpe  * Group objects - create, release, get, put, search
3110f3e72b5SJason Gunthorpe  */
3120f3e72b5SJason Gunthorpe static struct vfio_group *
3130f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3140f3e72b5SJason Gunthorpe {
3150f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3160f3e72b5SJason Gunthorpe 
3170f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
3180f3e72b5SJason Gunthorpe 		if (group->iommu_group == iommu_group) {
3190f3e72b5SJason Gunthorpe 			vfio_group_get(group);
3200f3e72b5SJason Gunthorpe 			return group;
3210f3e72b5SJason Gunthorpe 		}
3220f3e72b5SJason Gunthorpe 	}
3230f3e72b5SJason Gunthorpe 	return NULL;
3240f3e72b5SJason Gunthorpe }
3250f3e72b5SJason Gunthorpe 
3260f3e72b5SJason Gunthorpe static struct vfio_group *
3270f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3280f3e72b5SJason Gunthorpe {
3290f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3300f3e72b5SJason Gunthorpe 
3310f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3320f3e72b5SJason Gunthorpe 	group = __vfio_group_get_from_iommu(iommu_group);
3330f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
3340f3e72b5SJason Gunthorpe 	return group;
3350f3e72b5SJason Gunthorpe }
3360f3e72b5SJason Gunthorpe 
3370f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev)
3380f3e72b5SJason Gunthorpe {
3390f3e72b5SJason Gunthorpe 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
3400f3e72b5SJason Gunthorpe 
3410f3e72b5SJason Gunthorpe 	mutex_destroy(&group->device_lock);
3420f3e72b5SJason Gunthorpe 	iommu_group_put(group->iommu_group);
3430f3e72b5SJason Gunthorpe 	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
3440f3e72b5SJason Gunthorpe 	kfree(group);
3450f3e72b5SJason Gunthorpe }
3460f3e72b5SJason Gunthorpe 
3470f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
3480f3e72b5SJason Gunthorpe 					   enum vfio_group_type type)
3490f3e72b5SJason Gunthorpe {
3500f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3510f3e72b5SJason Gunthorpe 	int minor;
3520f3e72b5SJason Gunthorpe 
3530f3e72b5SJason Gunthorpe 	group = kzalloc(sizeof(*group), GFP_KERNEL);
3540f3e72b5SJason Gunthorpe 	if (!group)
3550f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
3560f3e72b5SJason Gunthorpe 
3570f3e72b5SJason Gunthorpe 	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
3580f3e72b5SJason Gunthorpe 	if (minor < 0) {
3590f3e72b5SJason Gunthorpe 		kfree(group);
3600f3e72b5SJason Gunthorpe 		return ERR_PTR(minor);
3610f3e72b5SJason Gunthorpe 	}
3620f3e72b5SJason Gunthorpe 
3630f3e72b5SJason Gunthorpe 	device_initialize(&group->dev);
3640f3e72b5SJason Gunthorpe 	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
3650f3e72b5SJason Gunthorpe 	group->dev.class = vfio.class;
3660f3e72b5SJason Gunthorpe 	group->dev.release = vfio_group_release;
3670f3e72b5SJason Gunthorpe 	cdev_init(&group->cdev, &vfio_group_fops);
3680f3e72b5SJason Gunthorpe 	group->cdev.owner = THIS_MODULE;
3690f3e72b5SJason Gunthorpe 
3700f3e72b5SJason Gunthorpe 	refcount_set(&group->users, 1);
3710f3e72b5SJason Gunthorpe 	init_rwsem(&group->group_rwsem);
3720f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&group->device_list);
3730f3e72b5SJason Gunthorpe 	mutex_init(&group->device_lock);
3740f3e72b5SJason Gunthorpe 	group->iommu_group = iommu_group;
3750f3e72b5SJason Gunthorpe 	/* put in vfio_group_release() */
3760f3e72b5SJason Gunthorpe 	iommu_group_ref_get(iommu_group);
3770f3e72b5SJason Gunthorpe 	group->type = type;
3780f3e72b5SJason Gunthorpe 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
3790f3e72b5SJason Gunthorpe 
3800f3e72b5SJason Gunthorpe 	return group;
3810f3e72b5SJason Gunthorpe }
3820f3e72b5SJason Gunthorpe 
3830f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
3840f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
3850f3e72b5SJason Gunthorpe {
3860f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3870f3e72b5SJason Gunthorpe 	struct vfio_group *ret;
3880f3e72b5SJason Gunthorpe 	int err;
3890f3e72b5SJason Gunthorpe 
3900f3e72b5SJason Gunthorpe 	group = vfio_group_alloc(iommu_group, type);
3910f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
3920f3e72b5SJason Gunthorpe 		return group;
3930f3e72b5SJason Gunthorpe 
3940f3e72b5SJason Gunthorpe 	err = dev_set_name(&group->dev, "%s%d",
3950f3e72b5SJason Gunthorpe 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
3960f3e72b5SJason Gunthorpe 			   iommu_group_id(iommu_group));
3970f3e72b5SJason Gunthorpe 	if (err) {
3980f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
3990f3e72b5SJason Gunthorpe 		goto err_put;
4000f3e72b5SJason Gunthorpe 	}
4010f3e72b5SJason Gunthorpe 
4020f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
4030f3e72b5SJason Gunthorpe 
4040f3e72b5SJason Gunthorpe 	/* Did we race creating this group? */
4050f3e72b5SJason Gunthorpe 	ret = __vfio_group_get_from_iommu(iommu_group);
4060f3e72b5SJason Gunthorpe 	if (ret)
4070f3e72b5SJason Gunthorpe 		goto err_unlock;
4080f3e72b5SJason Gunthorpe 
4090f3e72b5SJason Gunthorpe 	err = cdev_device_add(&group->cdev, &group->dev);
4100f3e72b5SJason Gunthorpe 	if (err) {
4110f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
4120f3e72b5SJason Gunthorpe 		goto err_unlock;
4130f3e72b5SJason Gunthorpe 	}
4140f3e72b5SJason Gunthorpe 
4150f3e72b5SJason Gunthorpe 	list_add(&group->vfio_next, &vfio.group_list);
4160f3e72b5SJason Gunthorpe 
4170f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4180f3e72b5SJason Gunthorpe 	return group;
4190f3e72b5SJason Gunthorpe 
4200f3e72b5SJason Gunthorpe err_unlock:
4210f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4220f3e72b5SJason Gunthorpe err_put:
4230f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4240f3e72b5SJason Gunthorpe 	return ret;
4250f3e72b5SJason Gunthorpe }
4260f3e72b5SJason Gunthorpe 
4270f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group)
4280f3e72b5SJason Gunthorpe {
4290f3e72b5SJason Gunthorpe 	if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
4300f3e72b5SJason Gunthorpe 		return;
4310f3e72b5SJason Gunthorpe 
4320f3e72b5SJason Gunthorpe 	/*
4330f3e72b5SJason Gunthorpe 	 * These data structures all have paired operations that can only be
4340f3e72b5SJason Gunthorpe 	 * undone when the caller holds a live reference on the group. Since all
4350f3e72b5SJason Gunthorpe 	 * pairs must be undone these WARN_ON's indicate some caller did not
4360f3e72b5SJason Gunthorpe 	 * properly hold the group reference.
4370f3e72b5SJason Gunthorpe 	 */
4380f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&group->device_list));
4390f3e72b5SJason Gunthorpe 	WARN_ON(group->container || group->container_users);
4400f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
4410f3e72b5SJason Gunthorpe 
4420f3e72b5SJason Gunthorpe 	list_del(&group->vfio_next);
4430f3e72b5SJason Gunthorpe 	cdev_device_del(&group->cdev, &group->dev);
4440f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4450f3e72b5SJason Gunthorpe 
4460f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4470f3e72b5SJason Gunthorpe }
4480f3e72b5SJason Gunthorpe 
4490f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group)
4500f3e72b5SJason Gunthorpe {
4510f3e72b5SJason Gunthorpe 	refcount_inc(&group->users);
4520f3e72b5SJason Gunthorpe }
4530f3e72b5SJason Gunthorpe 
4540f3e72b5SJason Gunthorpe /*
4550f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
4560f3e72b5SJason Gunthorpe  */
4570f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
4584a725b8dSKevin Tian static void vfio_device_put_registration(struct vfio_device *device)
4590f3e72b5SJason Gunthorpe {
4600f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
4610f3e72b5SJason Gunthorpe 		complete(&device->comp);
4620f3e72b5SJason Gunthorpe }
4630f3e72b5SJason Gunthorpe 
4644a725b8dSKevin Tian static bool vfio_device_try_get_registration(struct vfio_device *device)
4650f3e72b5SJason Gunthorpe {
4660f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
4670f3e72b5SJason Gunthorpe }
4680f3e72b5SJason Gunthorpe 
4690f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
4700f3e72b5SJason Gunthorpe 						 struct device *dev)
4710f3e72b5SJason Gunthorpe {
4720f3e72b5SJason Gunthorpe 	struct vfio_device *device;
4730f3e72b5SJason Gunthorpe 
4740f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
4750f3e72b5SJason Gunthorpe 	list_for_each_entry(device, &group->device_list, group_next) {
4764a725b8dSKevin Tian 		if (device->dev == dev &&
4774a725b8dSKevin Tian 		    vfio_device_try_get_registration(device)) {
4780f3e72b5SJason Gunthorpe 			mutex_unlock(&group->device_lock);
4790f3e72b5SJason Gunthorpe 			return device;
4800f3e72b5SJason Gunthorpe 		}
4810f3e72b5SJason Gunthorpe 	}
4820f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
4830f3e72b5SJason Gunthorpe 	return NULL;
4840f3e72b5SJason Gunthorpe }
4850f3e72b5SJason Gunthorpe 
4860f3e72b5SJason Gunthorpe /*
4870f3e72b5SJason Gunthorpe  * VFIO driver API
4880f3e72b5SJason Gunthorpe  */
489cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */
4903c28a761SYi Liu static void vfio_device_release(struct device *dev)
491cb9ff3f3SKevin Tian {
492cb9ff3f3SKevin Tian 	struct vfio_device *device =
4933c28a761SYi Liu 			container_of(dev, struct vfio_device, device);
494cb9ff3f3SKevin Tian 
495ebb72b76SKevin Tian 	vfio_release_device_set(device);
4963c28a761SYi Liu 	ida_free(&vfio.device_ida, device->index);
497cb9ff3f3SKevin Tian 
498cb9ff3f3SKevin Tian 	/*
499cb9ff3f3SKevin Tian 	 * kvfree() cannot be done here due to a life cycle mess in
500cb9ff3f3SKevin Tian 	 * vfio-ccw. Before the ccw part is fixed all drivers are
501cb9ff3f3SKevin Tian 	 * required to support @release and call vfio_free_device()
502cb9ff3f3SKevin Tian 	 * from there.
503cb9ff3f3SKevin Tian 	 */
504cb9ff3f3SKevin Tian 	device->ops->release(device);
505cb9ff3f3SKevin Tian }
506cb9ff3f3SKevin Tian 
507cb9ff3f3SKevin Tian /*
508cb9ff3f3SKevin Tian  * Allocate and initialize vfio_device so it can be registered to vfio
509cb9ff3f3SKevin Tian  * core.
510cb9ff3f3SKevin Tian  *
511cb9ff3f3SKevin Tian  * Drivers should use the wrapper vfio_alloc_device() for allocation.
512cb9ff3f3SKevin Tian  * @size is the size of the structure to be allocated, including any
513cb9ff3f3SKevin Tian  * private data used by the driver.
514cb9ff3f3SKevin Tian  *
515cb9ff3f3SKevin Tian  * Driver may provide an @init callback to cover device private data.
516cb9ff3f3SKevin Tian  *
517cb9ff3f3SKevin Tian  * Use vfio_put_device() to release the structure after success return.
518cb9ff3f3SKevin Tian  */
519cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
520cb9ff3f3SKevin Tian 				       const struct vfio_device_ops *ops)
521cb9ff3f3SKevin Tian {
522cb9ff3f3SKevin Tian 	struct vfio_device *device;
523cb9ff3f3SKevin Tian 	int ret;
524cb9ff3f3SKevin Tian 
525cb9ff3f3SKevin Tian 	if (WARN_ON(size < sizeof(struct vfio_device)))
526cb9ff3f3SKevin Tian 		return ERR_PTR(-EINVAL);
527cb9ff3f3SKevin Tian 
528cb9ff3f3SKevin Tian 	device = kvzalloc(size, GFP_KERNEL);
529cb9ff3f3SKevin Tian 	if (!device)
530cb9ff3f3SKevin Tian 		return ERR_PTR(-ENOMEM);
531cb9ff3f3SKevin Tian 
532cb9ff3f3SKevin Tian 	ret = vfio_init_device(device, dev, ops);
533cb9ff3f3SKevin Tian 	if (ret)
534cb9ff3f3SKevin Tian 		goto out_free;
535cb9ff3f3SKevin Tian 	return device;
536cb9ff3f3SKevin Tian 
537cb9ff3f3SKevin Tian out_free:
538cb9ff3f3SKevin Tian 	kvfree(device);
539cb9ff3f3SKevin Tian 	return ERR_PTR(ret);
540cb9ff3f3SKevin Tian }
541cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device);
542cb9ff3f3SKevin Tian 
543cb9ff3f3SKevin Tian /*
544cb9ff3f3SKevin Tian  * Initialize a vfio_device so it can be registered to vfio core.
545cb9ff3f3SKevin Tian  *
546cb9ff3f3SKevin Tian  * Only vfio-ccw driver should call this interface.
547cb9ff3f3SKevin Tian  */
548cb9ff3f3SKevin Tian int vfio_init_device(struct vfio_device *device, struct device *dev,
549cb9ff3f3SKevin Tian 		     const struct vfio_device_ops *ops)
550cb9ff3f3SKevin Tian {
551cb9ff3f3SKevin Tian 	int ret;
552cb9ff3f3SKevin Tian 
5533c28a761SYi Liu 	ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
5543c28a761SYi Liu 	if (ret < 0) {
5553c28a761SYi Liu 		dev_dbg(dev, "Error to alloc index\n");
5563c28a761SYi Liu 		return ret;
5573c28a761SYi Liu 	}
5583c28a761SYi Liu 
5593c28a761SYi Liu 	device->index = ret;
560ebb72b76SKevin Tian 	init_completion(&device->comp);
561ebb72b76SKevin Tian 	device->dev = dev;
562ebb72b76SKevin Tian 	device->ops = ops;
563cb9ff3f3SKevin Tian 
564cb9ff3f3SKevin Tian 	if (ops->init) {
565cb9ff3f3SKevin Tian 		ret = ops->init(device);
566cb9ff3f3SKevin Tian 		if (ret)
567cb9ff3f3SKevin Tian 			goto out_uninit;
568cb9ff3f3SKevin Tian 	}
569cb9ff3f3SKevin Tian 
5703c28a761SYi Liu 	device_initialize(&device->device);
5713c28a761SYi Liu 	device->device.release = vfio_device_release;
5723c28a761SYi Liu 	device->device.class = vfio.device_class;
5733c28a761SYi Liu 	device->device.parent = device->dev;
574cb9ff3f3SKevin Tian 	return 0;
575cb9ff3f3SKevin Tian 
576cb9ff3f3SKevin Tian out_uninit:
577ebb72b76SKevin Tian 	vfio_release_device_set(device);
5783c28a761SYi Liu 	ida_free(&vfio.device_ida, device->index);
579cb9ff3f3SKevin Tian 	return ret;
580cb9ff3f3SKevin Tian }
581cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_init_device);
582cb9ff3f3SKevin Tian 
583cb9ff3f3SKevin Tian /*
584cb9ff3f3SKevin Tian  * The helper called by driver @release callback to free the device
585cb9ff3f3SKevin Tian  * structure. Drivers which don't have private data to clean can
586cb9ff3f3SKevin Tian  * simply use this helper as its @release.
587cb9ff3f3SKevin Tian  */
588cb9ff3f3SKevin Tian void vfio_free_device(struct vfio_device *device)
589cb9ff3f3SKevin Tian {
590cb9ff3f3SKevin Tian 	kvfree(device);
591cb9ff3f3SKevin Tian }
592cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_free_device);
593cb9ff3f3SKevin Tian 
5940f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
5950f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
5960f3e72b5SJason Gunthorpe {
5970f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5980f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5990f3e72b5SJason Gunthorpe 	int ret;
6000f3e72b5SJason Gunthorpe 
6010f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_alloc();
6020f3e72b5SJason Gunthorpe 	if (IS_ERR(iommu_group))
6030f3e72b5SJason Gunthorpe 		return ERR_CAST(iommu_group);
6040f3e72b5SJason Gunthorpe 
6050f3e72b5SJason Gunthorpe 	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
6060f3e72b5SJason Gunthorpe 	if (ret)
6070f3e72b5SJason Gunthorpe 		goto out_put_group;
6080f3e72b5SJason Gunthorpe 	ret = iommu_group_add_device(iommu_group, dev);
6090f3e72b5SJason Gunthorpe 	if (ret)
6100f3e72b5SJason Gunthorpe 		goto out_put_group;
6110f3e72b5SJason Gunthorpe 
6120f3e72b5SJason Gunthorpe 	group = vfio_create_group(iommu_group, type);
6130f3e72b5SJason Gunthorpe 	if (IS_ERR(group)) {
6140f3e72b5SJason Gunthorpe 		ret = PTR_ERR(group);
6150f3e72b5SJason Gunthorpe 		goto out_remove_device;
6160f3e72b5SJason Gunthorpe 	}
6170f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
6180f3e72b5SJason Gunthorpe 	return group;
6190f3e72b5SJason Gunthorpe 
6200f3e72b5SJason Gunthorpe out_remove_device:
6210f3e72b5SJason Gunthorpe 	iommu_group_remove_device(dev);
6220f3e72b5SJason Gunthorpe out_put_group:
6230f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
6240f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
6250f3e72b5SJason Gunthorpe }
6260f3e72b5SJason Gunthorpe 
6270f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
6280f3e72b5SJason Gunthorpe {
6290f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
6300f3e72b5SJason Gunthorpe 	struct vfio_group *group;
6310f3e72b5SJason Gunthorpe 
6320f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_get(dev);
6330f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
6340f3e72b5SJason Gunthorpe 	if (!iommu_group && noiommu) {
6350f3e72b5SJason Gunthorpe 		/*
6360f3e72b5SJason Gunthorpe 		 * With noiommu enabled, create an IOMMU group for devices that
6370f3e72b5SJason Gunthorpe 		 * don't already have one, implying no IOMMU hardware/driver
6380f3e72b5SJason Gunthorpe 		 * exists.  Taint the kernel because we're about to give a DMA
6390f3e72b5SJason Gunthorpe 		 * capable device to a user without IOMMU protection.
6400f3e72b5SJason Gunthorpe 		 */
6410f3e72b5SJason Gunthorpe 		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
6420f3e72b5SJason Gunthorpe 		if (!IS_ERR(group)) {
6430f3e72b5SJason Gunthorpe 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
6440f3e72b5SJason Gunthorpe 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
6450f3e72b5SJason Gunthorpe 		}
6460f3e72b5SJason Gunthorpe 		return group;
6470f3e72b5SJason Gunthorpe 	}
6480f3e72b5SJason Gunthorpe #endif
6490f3e72b5SJason Gunthorpe 	if (!iommu_group)
6500f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
6510f3e72b5SJason Gunthorpe 
6520f3e72b5SJason Gunthorpe 	/*
6530f3e72b5SJason Gunthorpe 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
6540f3e72b5SJason Gunthorpe 	 * restore cache coherency. It has to be checked here because it is only
6550f3e72b5SJason Gunthorpe 	 * valid for cases where we are using iommu groups.
6560f3e72b5SJason Gunthorpe 	 */
6570f3e72b5SJason Gunthorpe 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
6580f3e72b5SJason Gunthorpe 		iommu_group_put(iommu_group);
6590f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
6600f3e72b5SJason Gunthorpe 	}
6610f3e72b5SJason Gunthorpe 
6620f3e72b5SJason Gunthorpe 	group = vfio_group_get_from_iommu(iommu_group);
6630f3e72b5SJason Gunthorpe 	if (!group)
6640f3e72b5SJason Gunthorpe 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
6650f3e72b5SJason Gunthorpe 
6660f3e72b5SJason Gunthorpe 	/* The vfio_group holds a reference to the iommu_group */
6670f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
6680f3e72b5SJason Gunthorpe 	return group;
6690f3e72b5SJason Gunthorpe }
6700f3e72b5SJason Gunthorpe 
6710f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device,
6720f3e72b5SJason Gunthorpe 		struct vfio_group *group)
6730f3e72b5SJason Gunthorpe {
6740f3e72b5SJason Gunthorpe 	struct vfio_device *existing_device;
6753c28a761SYi Liu 	int ret;
6760f3e72b5SJason Gunthorpe 
6770f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
6780f3e72b5SJason Gunthorpe 		return PTR_ERR(group);
6790f3e72b5SJason Gunthorpe 
6800f3e72b5SJason Gunthorpe 	/*
6810f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
6820f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
6830f3e72b5SJason Gunthorpe 	 */
6840f3e72b5SJason Gunthorpe 	if (!device->dev_set)
6850f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
6860f3e72b5SJason Gunthorpe 
6870f3e72b5SJason Gunthorpe 	existing_device = vfio_group_get_device(group, device->dev);
6880f3e72b5SJason Gunthorpe 	if (existing_device) {
6890f3e72b5SJason Gunthorpe 		dev_WARN(device->dev, "Device already exists on group %d\n",
6900f3e72b5SJason Gunthorpe 			 iommu_group_id(group->iommu_group));
6914a725b8dSKevin Tian 		vfio_device_put_registration(existing_device);
6923c28a761SYi Liu 		ret = -EBUSY;
6933c28a761SYi Liu 		goto err_out;
6940f3e72b5SJason Gunthorpe 	}
6950f3e72b5SJason Gunthorpe 
6960f3e72b5SJason Gunthorpe 	/* Our reference on group is moved to the device */
6970f3e72b5SJason Gunthorpe 	device->group = group;
6980f3e72b5SJason Gunthorpe 
6993c28a761SYi Liu 	ret = dev_set_name(&device->device, "vfio%d", device->index);
7003c28a761SYi Liu 	if (ret)
7013c28a761SYi Liu 		goto err_out;
7023c28a761SYi Liu 
7033c28a761SYi Liu 	ret = device_add(&device->device);
7043c28a761SYi Liu 	if (ret)
7053c28a761SYi Liu 		goto err_out;
7063c28a761SYi Liu 
7070f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
7080f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
7090f3e72b5SJason Gunthorpe 
7100f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
7110f3e72b5SJason Gunthorpe 	list_add(&device->group_next, &group->device_list);
7120f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7130f3e72b5SJason Gunthorpe 
7140f3e72b5SJason Gunthorpe 	return 0;
7153c28a761SYi Liu err_out:
7163c28a761SYi Liu 	if (group->type == VFIO_NO_IOMMU ||
7173c28a761SYi Liu 	    group->type == VFIO_EMULATED_IOMMU)
7183c28a761SYi Liu 		iommu_group_remove_device(device->dev);
7193c28a761SYi Liu 	vfio_group_put(group);
7203c28a761SYi Liu 	return ret;
7210f3e72b5SJason Gunthorpe }
7220f3e72b5SJason Gunthorpe 
7230f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
7240f3e72b5SJason Gunthorpe {
7250f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
7260f3e72b5SJason Gunthorpe 		vfio_group_find_or_alloc(device->dev));
7270f3e72b5SJason Gunthorpe }
7280f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
7290f3e72b5SJason Gunthorpe 
7300f3e72b5SJason Gunthorpe /*
7310f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
7320f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
7330f3e72b5SJason Gunthorpe  */
7340f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
7350f3e72b5SJason Gunthorpe {
7360f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
7370f3e72b5SJason Gunthorpe 		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
7380f3e72b5SJason Gunthorpe }
7390f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
7400f3e72b5SJason Gunthorpe 
7410f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
7420f3e72b5SJason Gunthorpe 						     char *buf)
7430f3e72b5SJason Gunthorpe {
7440f3e72b5SJason Gunthorpe 	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
7450f3e72b5SJason Gunthorpe 
7460f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
7470f3e72b5SJason Gunthorpe 	list_for_each_entry(it, &group->device_list, group_next) {
7480f3e72b5SJason Gunthorpe 		int ret;
7490f3e72b5SJason Gunthorpe 
7500f3e72b5SJason Gunthorpe 		if (it->ops->match) {
7510f3e72b5SJason Gunthorpe 			ret = it->ops->match(it, buf);
7520f3e72b5SJason Gunthorpe 			if (ret < 0) {
7530f3e72b5SJason Gunthorpe 				device = ERR_PTR(ret);
7540f3e72b5SJason Gunthorpe 				break;
7550f3e72b5SJason Gunthorpe 			}
7560f3e72b5SJason Gunthorpe 		} else {
7570f3e72b5SJason Gunthorpe 			ret = !strcmp(dev_name(it->dev), buf);
7580f3e72b5SJason Gunthorpe 		}
7590f3e72b5SJason Gunthorpe 
7604a725b8dSKevin Tian 		if (ret && vfio_device_try_get_registration(it)) {
7610f3e72b5SJason Gunthorpe 			device = it;
7620f3e72b5SJason Gunthorpe 			break;
7630f3e72b5SJason Gunthorpe 		}
7640f3e72b5SJason Gunthorpe 	}
7650f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7660f3e72b5SJason Gunthorpe 
7670f3e72b5SJason Gunthorpe 	return device;
7680f3e72b5SJason Gunthorpe }
7690f3e72b5SJason Gunthorpe 
7700f3e72b5SJason Gunthorpe /*
7710f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
7720f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
7730f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
7740f3e72b5SJason Gunthorpe {
7750f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
7760f3e72b5SJason Gunthorpe 	unsigned int i = 0;
7770f3e72b5SJason Gunthorpe 	bool interrupted = false;
7780f3e72b5SJason Gunthorpe 	long rc;
7790f3e72b5SJason Gunthorpe 
7804a725b8dSKevin Tian 	vfio_device_put_registration(device);
7810f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
7820f3e72b5SJason Gunthorpe 	while (rc <= 0) {
7830f3e72b5SJason Gunthorpe 		if (device->ops->request)
7840f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
7850f3e72b5SJason Gunthorpe 
7860f3e72b5SJason Gunthorpe 		if (interrupted) {
7870f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
7880f3e72b5SJason Gunthorpe 							 HZ * 10);
7890f3e72b5SJason Gunthorpe 		} else {
7900f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
7910f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
7920f3e72b5SJason Gunthorpe 			if (rc < 0) {
7930f3e72b5SJason Gunthorpe 				interrupted = true;
7940f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
7950f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
7960f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
7970f3e72b5SJason Gunthorpe 					 "blocked until device is released",
7980f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
7990f3e72b5SJason Gunthorpe 			}
8000f3e72b5SJason Gunthorpe 		}
8010f3e72b5SJason Gunthorpe 	}
8020f3e72b5SJason Gunthorpe 
8030f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
8040f3e72b5SJason Gunthorpe 	list_del(&device->group_next);
8050f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
8060f3e72b5SJason Gunthorpe 
8073c28a761SYi Liu 	/* Balances device_add in register path */
8083c28a761SYi Liu 	device_del(&device->device);
8093c28a761SYi Liu 
8100f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
8110f3e72b5SJason Gunthorpe 		iommu_group_remove_device(device->dev);
8120f3e72b5SJason Gunthorpe 
8130f3e72b5SJason Gunthorpe 	/* Matches the get in vfio_register_group_dev() */
8140f3e72b5SJason Gunthorpe 	vfio_group_put(group);
8150f3e72b5SJason Gunthorpe }
8160f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
8170f3e72b5SJason Gunthorpe 
8180f3e72b5SJason Gunthorpe /*
8190f3e72b5SJason Gunthorpe  * VFIO base fd, /dev/vfio/vfio
8200f3e72b5SJason Gunthorpe  */
8210f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container,
8220f3e72b5SJason Gunthorpe 				       unsigned long arg)
8230f3e72b5SJason Gunthorpe {
8240f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
8250f3e72b5SJason Gunthorpe 	long ret = 0;
8260f3e72b5SJason Gunthorpe 
8270f3e72b5SJason Gunthorpe 	down_read(&container->group_lock);
8280f3e72b5SJason Gunthorpe 
8290f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
8300f3e72b5SJason Gunthorpe 
8310f3e72b5SJason Gunthorpe 	switch (arg) {
8320f3e72b5SJason Gunthorpe 		/* No base extensions yet */
8330f3e72b5SJason Gunthorpe 	default:
8340f3e72b5SJason Gunthorpe 		/*
8350f3e72b5SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
8360f3e72b5SJason Gunthorpe 		 * extensions and return the first positive result.  If
8370f3e72b5SJason Gunthorpe 		 * a driver is already set, further queries will be passed
8380f3e72b5SJason Gunthorpe 		 * only to that driver.
8390f3e72b5SJason Gunthorpe 		 */
8400f3e72b5SJason Gunthorpe 		if (!driver) {
8410f3e72b5SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
8420f3e72b5SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
8430f3e72b5SJason Gunthorpe 					    vfio_next) {
8440f3e72b5SJason Gunthorpe 
8450f3e72b5SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
8460f3e72b5SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
8470f3e72b5SJason Gunthorpe 							       driver))
8480f3e72b5SJason Gunthorpe 					continue;
8490f3e72b5SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
8500f3e72b5SJason Gunthorpe 					continue;
8510f3e72b5SJason Gunthorpe 
8520f3e72b5SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
8530f3e72b5SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
8540f3e72b5SJason Gunthorpe 							 arg);
8550f3e72b5SJason Gunthorpe 				module_put(driver->ops->owner);
8560f3e72b5SJason Gunthorpe 				if (ret > 0)
8570f3e72b5SJason Gunthorpe 					break;
8580f3e72b5SJason Gunthorpe 			}
8590f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
8600f3e72b5SJason Gunthorpe 		} else
8610f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
8620f3e72b5SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
8630f3e72b5SJason Gunthorpe 	}
8640f3e72b5SJason Gunthorpe 
8650f3e72b5SJason Gunthorpe 	up_read(&container->group_lock);
8660f3e72b5SJason Gunthorpe 
8670f3e72b5SJason Gunthorpe 	return ret;
8680f3e72b5SJason Gunthorpe }
8690f3e72b5SJason Gunthorpe 
8700f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */
8710f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
8720f3e72b5SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
8730f3e72b5SJason Gunthorpe 					  void *data)
8740f3e72b5SJason Gunthorpe {
8750f3e72b5SJason Gunthorpe 	struct vfio_group *group;
8760f3e72b5SJason Gunthorpe 	int ret = -ENODEV;
8770f3e72b5SJason Gunthorpe 
8780f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
8790f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
8800f3e72b5SJason Gunthorpe 						group->type);
8810f3e72b5SJason Gunthorpe 		if (ret)
8820f3e72b5SJason Gunthorpe 			goto unwind;
8830f3e72b5SJason Gunthorpe 	}
8840f3e72b5SJason Gunthorpe 
8850f3e72b5SJason Gunthorpe 	return ret;
8860f3e72b5SJason Gunthorpe 
8870f3e72b5SJason Gunthorpe unwind:
8880f3e72b5SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
8890f3e72b5SJason Gunthorpe 					     container_next) {
8900f3e72b5SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
8910f3e72b5SJason Gunthorpe 	}
8920f3e72b5SJason Gunthorpe 
8930f3e72b5SJason Gunthorpe 	return ret;
8940f3e72b5SJason Gunthorpe }
8950f3e72b5SJason Gunthorpe 
8960f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
8970f3e72b5SJason Gunthorpe 				 unsigned long arg)
8980f3e72b5SJason Gunthorpe {
8990f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9000f3e72b5SJason Gunthorpe 	long ret = -ENODEV;
9010f3e72b5SJason Gunthorpe 
9020f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
9030f3e72b5SJason Gunthorpe 
9040f3e72b5SJason Gunthorpe 	/*
9050f3e72b5SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
9060f3e72b5SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
9070f3e72b5SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
9080f3e72b5SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
9090f3e72b5SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
9100f3e72b5SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
9110f3e72b5SJason Gunthorpe 	 */
9120f3e72b5SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
9130f3e72b5SJason Gunthorpe 		up_write(&container->group_lock);
9140f3e72b5SJason Gunthorpe 		return -EINVAL;
9150f3e72b5SJason Gunthorpe 	}
9160f3e72b5SJason Gunthorpe 
9170f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
9180f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
9190f3e72b5SJason Gunthorpe 		void *data;
9200f3e72b5SJason Gunthorpe 
9210f3e72b5SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
9220f3e72b5SJason Gunthorpe 			continue;
9230f3e72b5SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
9240f3e72b5SJason Gunthorpe 			continue;
9250f3e72b5SJason Gunthorpe 
9260f3e72b5SJason Gunthorpe 		/*
9270f3e72b5SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
9280f3e72b5SJason Gunthorpe 		 * so test which iommu driver reported support for this
9290f3e72b5SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
9300f3e72b5SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
9310f3e72b5SJason Gunthorpe 		 * interfaces if they'd like.
9320f3e72b5SJason Gunthorpe 		 */
9330f3e72b5SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
9340f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
9350f3e72b5SJason Gunthorpe 			continue;
9360f3e72b5SJason Gunthorpe 		}
9370f3e72b5SJason Gunthorpe 
9380f3e72b5SJason Gunthorpe 		data = driver->ops->open(arg);
9390f3e72b5SJason Gunthorpe 		if (IS_ERR(data)) {
9400f3e72b5SJason Gunthorpe 			ret = PTR_ERR(data);
9410f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
9420f3e72b5SJason Gunthorpe 			continue;
9430f3e72b5SJason Gunthorpe 		}
9440f3e72b5SJason Gunthorpe 
9450f3e72b5SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
9460f3e72b5SJason Gunthorpe 		if (ret) {
9470f3e72b5SJason Gunthorpe 			driver->ops->release(data);
9480f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
9490f3e72b5SJason Gunthorpe 			continue;
9500f3e72b5SJason Gunthorpe 		}
9510f3e72b5SJason Gunthorpe 
9520f3e72b5SJason Gunthorpe 		container->iommu_driver = driver;
9530f3e72b5SJason Gunthorpe 		container->iommu_data = data;
9540f3e72b5SJason Gunthorpe 		break;
9550f3e72b5SJason Gunthorpe 	}
9560f3e72b5SJason Gunthorpe 
9570f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
9580f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
9590f3e72b5SJason Gunthorpe 
9600f3e72b5SJason Gunthorpe 	return ret;
9610f3e72b5SJason Gunthorpe }
9620f3e72b5SJason Gunthorpe 
9630f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
9640f3e72b5SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
9650f3e72b5SJason Gunthorpe {
9660f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
9670f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9680f3e72b5SJason Gunthorpe 	void *data;
9690f3e72b5SJason Gunthorpe 	long ret = -EINVAL;
9700f3e72b5SJason Gunthorpe 
9710f3e72b5SJason Gunthorpe 	if (!container)
9720f3e72b5SJason Gunthorpe 		return ret;
9730f3e72b5SJason Gunthorpe 
9740f3e72b5SJason Gunthorpe 	switch (cmd) {
9750f3e72b5SJason Gunthorpe 	case VFIO_GET_API_VERSION:
9760f3e72b5SJason Gunthorpe 		ret = VFIO_API_VERSION;
9770f3e72b5SJason Gunthorpe 		break;
9780f3e72b5SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
9790f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(container, arg);
9800f3e72b5SJason Gunthorpe 		break;
9810f3e72b5SJason Gunthorpe 	case VFIO_SET_IOMMU:
9820f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
9830f3e72b5SJason Gunthorpe 		break;
9840f3e72b5SJason Gunthorpe 	default:
9850f3e72b5SJason Gunthorpe 		driver = container->iommu_driver;
9860f3e72b5SJason Gunthorpe 		data = container->iommu_data;
9870f3e72b5SJason Gunthorpe 
9880f3e72b5SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
9890f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
9900f3e72b5SJason Gunthorpe 	}
9910f3e72b5SJason Gunthorpe 
9920f3e72b5SJason Gunthorpe 	return ret;
9930f3e72b5SJason Gunthorpe }
9940f3e72b5SJason Gunthorpe 
9950f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
9960f3e72b5SJason Gunthorpe {
9970f3e72b5SJason Gunthorpe 	struct vfio_container *container;
9980f3e72b5SJason Gunthorpe 
9990f3e72b5SJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL);
10000f3e72b5SJason Gunthorpe 	if (!container)
10010f3e72b5SJason Gunthorpe 		return -ENOMEM;
10020f3e72b5SJason Gunthorpe 
10030f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
10040f3e72b5SJason Gunthorpe 	init_rwsem(&container->group_lock);
10050f3e72b5SJason Gunthorpe 	kref_init(&container->kref);
10060f3e72b5SJason Gunthorpe 
10070f3e72b5SJason Gunthorpe 	filep->private_data = container;
10080f3e72b5SJason Gunthorpe 
10090f3e72b5SJason Gunthorpe 	return 0;
10100f3e72b5SJason Gunthorpe }
10110f3e72b5SJason Gunthorpe 
10120f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
10130f3e72b5SJason Gunthorpe {
10140f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
10150f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
10160f3e72b5SJason Gunthorpe 
10170f3e72b5SJason Gunthorpe 	if (driver && driver->ops->notify)
10180f3e72b5SJason Gunthorpe 		driver->ops->notify(container->iommu_data,
10190f3e72b5SJason Gunthorpe 				    VFIO_IOMMU_CONTAINER_CLOSE);
10200f3e72b5SJason Gunthorpe 
10210f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
10220f3e72b5SJason Gunthorpe 
10230f3e72b5SJason Gunthorpe 	vfio_container_put(container);
10240f3e72b5SJason Gunthorpe 
10250f3e72b5SJason Gunthorpe 	return 0;
10260f3e72b5SJason Gunthorpe }
10270f3e72b5SJason Gunthorpe 
10280f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = {
10290f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
10300f3e72b5SJason Gunthorpe 	.open		= vfio_fops_open,
10310f3e72b5SJason Gunthorpe 	.release	= vfio_fops_release,
10320f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
10330f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
10340f3e72b5SJason Gunthorpe };
10350f3e72b5SJason Gunthorpe 
10360f3e72b5SJason Gunthorpe /*
10370f3e72b5SJason Gunthorpe  * VFIO Group fd, /dev/vfio/$GROUP
10380f3e72b5SJason Gunthorpe  */
1039429a781cSJason Gunthorpe static void vfio_group_detach_container(struct vfio_group *group)
10400f3e72b5SJason Gunthorpe {
10410f3e72b5SJason Gunthorpe 	struct vfio_container *container = group->container;
10420f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
10430f3e72b5SJason Gunthorpe 
10440f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
1045429a781cSJason Gunthorpe 	WARN_ON(group->container_users != 1);
10460f3e72b5SJason Gunthorpe 
10470f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
10480f3e72b5SJason Gunthorpe 
10490f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
10500f3e72b5SJason Gunthorpe 	if (driver)
10510f3e72b5SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
10520f3e72b5SJason Gunthorpe 					  group->iommu_group);
10530f3e72b5SJason Gunthorpe 
10540f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
10550f3e72b5SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
10560f3e72b5SJason Gunthorpe 
10570f3e72b5SJason Gunthorpe 	group->container = NULL;
10580f3e72b5SJason Gunthorpe 	group->container_users = 0;
10590f3e72b5SJason Gunthorpe 	list_del(&group->container_next);
10600f3e72b5SJason Gunthorpe 
10610f3e72b5SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
10620f3e72b5SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
10630f3e72b5SJason Gunthorpe 		driver->ops->release(container->iommu_data);
10640f3e72b5SJason Gunthorpe 		module_put(driver->ops->owner);
10650f3e72b5SJason Gunthorpe 		container->iommu_driver = NULL;
10660f3e72b5SJason Gunthorpe 		container->iommu_data = NULL;
10670f3e72b5SJason Gunthorpe 	}
10680f3e72b5SJason Gunthorpe 
10690f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
10700f3e72b5SJason Gunthorpe 
10710f3e72b5SJason Gunthorpe 	vfio_container_put(container);
10720f3e72b5SJason Gunthorpe }
10730f3e72b5SJason Gunthorpe 
10740f3e72b5SJason Gunthorpe /*
10750f3e72b5SJason Gunthorpe  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
10760f3e72b5SJason Gunthorpe  * if there was no container to unset.  Since the ioctl is called on
10770f3e72b5SJason Gunthorpe  * the group, we know that still exists, therefore the only valid
10780f3e72b5SJason Gunthorpe  * transition here is 1->0.
10790f3e72b5SJason Gunthorpe  */
1080b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group)
10810f3e72b5SJason Gunthorpe {
1082b3b43590SJason Gunthorpe 	int ret = 0;
10830f3e72b5SJason Gunthorpe 
1084b3b43590SJason Gunthorpe 	down_write(&group->group_rwsem);
1085b3b43590SJason Gunthorpe 	if (!group->container) {
1086b3b43590SJason Gunthorpe 		ret = -EINVAL;
1087b3b43590SJason Gunthorpe 		goto out_unlock;
1088b3b43590SJason Gunthorpe 	}
1089b3b43590SJason Gunthorpe 	if (group->container_users != 1) {
1090b3b43590SJason Gunthorpe 		ret = -EBUSY;
1091b3b43590SJason Gunthorpe 		goto out_unlock;
1092b3b43590SJason Gunthorpe 	}
1093429a781cSJason Gunthorpe 	vfio_group_detach_container(group);
1094b3b43590SJason Gunthorpe 
1095b3b43590SJason Gunthorpe out_unlock:
1096b3b43590SJason Gunthorpe 	up_write(&group->group_rwsem);
1097b3b43590SJason Gunthorpe 	return ret;
10980f3e72b5SJason Gunthorpe }
10990f3e72b5SJason Gunthorpe 
1100*03e650f6SJason Gunthorpe static struct vfio_container *vfio_container_from_file(struct file *file)
11010f3e72b5SJason Gunthorpe {
11020f3e72b5SJason Gunthorpe 	struct vfio_container *container;
1103*03e650f6SJason Gunthorpe 
1104*03e650f6SJason Gunthorpe 	/* Sanity check, is this really our fd? */
1105*03e650f6SJason Gunthorpe 	if (file->f_op != &vfio_fops)
1106*03e650f6SJason Gunthorpe 		return NULL;
1107*03e650f6SJason Gunthorpe 
1108*03e650f6SJason Gunthorpe 	container = file->private_data;
1109*03e650f6SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
1110*03e650f6SJason Gunthorpe 	return container;
1111*03e650f6SJason Gunthorpe }
1112*03e650f6SJason Gunthorpe 
1113*03e650f6SJason Gunthorpe static int vfio_container_attach_group(struct vfio_container *container,
1114*03e650f6SJason Gunthorpe 				       struct vfio_group *group)
1115*03e650f6SJason Gunthorpe {
11160f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
11170f3e72b5SJason Gunthorpe 	int ret = 0;
11180f3e72b5SJason Gunthorpe 
1119*03e650f6SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
1120*03e650f6SJason Gunthorpe 
11210f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
11220f3e72b5SJason Gunthorpe 		return -EPERM;
11230f3e72b5SJason Gunthorpe 
11240f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
11250f3e72b5SJason Gunthorpe 
11260f3e72b5SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
11270f3e72b5SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
11280f3e72b5SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
11290f3e72b5SJason Gunthorpe 		ret = -EPERM;
113067671f15SJason Gunthorpe 		goto out_unlock_container;
11310f3e72b5SJason Gunthorpe 	}
11320f3e72b5SJason Gunthorpe 
11330f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
1134*03e650f6SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, group);
11350f3e72b5SJason Gunthorpe 		if (ret)
113667671f15SJason Gunthorpe 			goto out_unlock_container;
11370f3e72b5SJason Gunthorpe 	}
11380f3e72b5SJason Gunthorpe 
11390f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
11400f3e72b5SJason Gunthorpe 	if (driver) {
11410f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
11420f3e72b5SJason Gunthorpe 						group->iommu_group,
11430f3e72b5SJason Gunthorpe 						group->type);
11440f3e72b5SJason Gunthorpe 		if (ret) {
11450f3e72b5SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
11460f3e72b5SJason Gunthorpe 				iommu_group_release_dma_owner(
11470f3e72b5SJason Gunthorpe 					group->iommu_group);
114867671f15SJason Gunthorpe 			goto out_unlock_container;
11490f3e72b5SJason Gunthorpe 		}
11500f3e72b5SJason Gunthorpe 	}
11510f3e72b5SJason Gunthorpe 
11520f3e72b5SJason Gunthorpe 	group->container = container;
11530f3e72b5SJason Gunthorpe 	group->container_users = 1;
11540f3e72b5SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
11550f3e72b5SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
11560f3e72b5SJason Gunthorpe 
11570f3e72b5SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
11580f3e72b5SJason Gunthorpe 	vfio_container_get(container);
11590f3e72b5SJason Gunthorpe 
116067671f15SJason Gunthorpe out_unlock_container:
11610f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
1162*03e650f6SJason Gunthorpe 	return ret;
1163*03e650f6SJason Gunthorpe }
1164*03e650f6SJason Gunthorpe 
1165*03e650f6SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group,
1166*03e650f6SJason Gunthorpe 					  int __user *arg)
1167*03e650f6SJason Gunthorpe {
1168*03e650f6SJason Gunthorpe 	struct vfio_container *container;
1169*03e650f6SJason Gunthorpe 	struct fd f;
1170*03e650f6SJason Gunthorpe 	int ret;
1171*03e650f6SJason Gunthorpe 	int fd;
1172*03e650f6SJason Gunthorpe 
1173*03e650f6SJason Gunthorpe 	if (get_user(fd, arg))
1174*03e650f6SJason Gunthorpe 		return -EFAULT;
1175*03e650f6SJason Gunthorpe 
1176*03e650f6SJason Gunthorpe 	f = fdget(fd);
1177*03e650f6SJason Gunthorpe 	if (!f.file)
1178*03e650f6SJason Gunthorpe 		return -EBADF;
1179*03e650f6SJason Gunthorpe 
1180*03e650f6SJason Gunthorpe 	down_write(&group->group_rwsem);
1181*03e650f6SJason Gunthorpe 	if (group->container || WARN_ON(group->container_users)) {
1182*03e650f6SJason Gunthorpe 		ret = -EINVAL;
1183*03e650f6SJason Gunthorpe 		goto out_unlock;
1184*03e650f6SJason Gunthorpe 	}
1185*03e650f6SJason Gunthorpe 	container = vfio_container_from_file(f.file);
1186*03e650f6SJason Gunthorpe 	ret = -EINVAL;
1187*03e650f6SJason Gunthorpe 	if (container) {
1188*03e650f6SJason Gunthorpe 		ret = vfio_container_attach_group(container, group);
1189*03e650f6SJason Gunthorpe 		goto out_unlock;
1190*03e650f6SJason Gunthorpe 	}
1191*03e650f6SJason Gunthorpe 
1192*03e650f6SJason Gunthorpe out_unlock:
119367671f15SJason Gunthorpe 	up_write(&group->group_rwsem);
11940f3e72b5SJason Gunthorpe 	fdput(f);
11950f3e72b5SJason Gunthorpe 	return ret;
11960f3e72b5SJason Gunthorpe }
11970f3e72b5SJason Gunthorpe 
11980f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops;
11990f3e72b5SJason Gunthorpe 
12000f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
12010f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
12020f3e72b5SJason Gunthorpe {
12030f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
12040f3e72b5SJason Gunthorpe }
12050f3e72b5SJason Gunthorpe 
12060f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device)
12070f3e72b5SJason Gunthorpe {
12080f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
12090f3e72b5SJason Gunthorpe 
12100f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
12110f3e72b5SJason Gunthorpe 
12120f3e72b5SJason Gunthorpe 	if (!group->container || !group->container->iommu_driver ||
12130f3e72b5SJason Gunthorpe 	    WARN_ON(!group->container_users))
12140f3e72b5SJason Gunthorpe 		return -EINVAL;
12150f3e72b5SJason Gunthorpe 
12160f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
12170f3e72b5SJason Gunthorpe 		return -EPERM;
12180f3e72b5SJason Gunthorpe 
12190f3e72b5SJason Gunthorpe 	get_file(group->opened_file);
12200f3e72b5SJason Gunthorpe 	group->container_users++;
12210f3e72b5SJason Gunthorpe 	return 0;
12220f3e72b5SJason Gunthorpe }
12230f3e72b5SJason Gunthorpe 
12240f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device)
12250f3e72b5SJason Gunthorpe {
12260f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
12270f3e72b5SJason Gunthorpe 	WARN_ON(device->group->container_users <= 1);
12280f3e72b5SJason Gunthorpe 	device->group->container_users--;
12290f3e72b5SJason Gunthorpe 	fput(device->group->opened_file);
12300f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
12310f3e72b5SJason Gunthorpe }
12320f3e72b5SJason Gunthorpe 
12330f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device)
12340f3e72b5SJason Gunthorpe {
12350f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
12360f3e72b5SJason Gunthorpe 	struct file *filep;
12370f3e72b5SJason Gunthorpe 	int ret;
12380f3e72b5SJason Gunthorpe 
12390f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
12400f3e72b5SJason Gunthorpe 	ret = vfio_device_assign_container(device);
12410f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
12420f3e72b5SJason Gunthorpe 	if (ret)
12430f3e72b5SJason Gunthorpe 		return ERR_PTR(ret);
12440f3e72b5SJason Gunthorpe 
12450f3e72b5SJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner)) {
12460f3e72b5SJason Gunthorpe 		ret = -ENODEV;
12470f3e72b5SJason Gunthorpe 		goto err_unassign_container;
12480f3e72b5SJason Gunthorpe 	}
12490f3e72b5SJason Gunthorpe 
12500f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
12510f3e72b5SJason Gunthorpe 	device->open_count++;
12520f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
12530f3e72b5SJason Gunthorpe 		/*
12540f3e72b5SJason Gunthorpe 		 * Here we pass the KVM pointer with the group under the read
12550f3e72b5SJason Gunthorpe 		 * lock.  If the device driver will use it, it must obtain a
12560f3e72b5SJason Gunthorpe 		 * reference and release it during close_device.
12570f3e72b5SJason Gunthorpe 		 */
12580f3e72b5SJason Gunthorpe 		down_read(&device->group->group_rwsem);
12590f3e72b5SJason Gunthorpe 		device->kvm = device->group->kvm;
12600f3e72b5SJason Gunthorpe 
12610f3e72b5SJason Gunthorpe 		if (device->ops->open_device) {
12620f3e72b5SJason Gunthorpe 			ret = device->ops->open_device(device);
12630f3e72b5SJason Gunthorpe 			if (ret)
12640f3e72b5SJason Gunthorpe 				goto err_undo_count;
12650f3e72b5SJason Gunthorpe 		}
12660f3e72b5SJason Gunthorpe 
12670f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
12680f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->register_device)
12690f3e72b5SJason Gunthorpe 			iommu_driver->ops->register_device(
12700f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
12710f3e72b5SJason Gunthorpe 
12720f3e72b5SJason Gunthorpe 		up_read(&device->group->group_rwsem);
12730f3e72b5SJason Gunthorpe 	}
12740f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
12750f3e72b5SJason Gunthorpe 
12760f3e72b5SJason Gunthorpe 	/*
12770f3e72b5SJason Gunthorpe 	 * We can't use anon_inode_getfd() because we need to modify
12780f3e72b5SJason Gunthorpe 	 * the f_mode flags directly to allow more than just ioctls
12790f3e72b5SJason Gunthorpe 	 */
12800f3e72b5SJason Gunthorpe 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
12810f3e72b5SJason Gunthorpe 				   device, O_RDWR);
12820f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
12830f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
12840f3e72b5SJason Gunthorpe 		goto err_close_device;
12850f3e72b5SJason Gunthorpe 	}
12860f3e72b5SJason Gunthorpe 
12870f3e72b5SJason Gunthorpe 	/*
12880f3e72b5SJason Gunthorpe 	 * TODO: add an anon_inode interface to do this.
12890f3e72b5SJason Gunthorpe 	 * Appears to be missing by lack of need rather than
12900f3e72b5SJason Gunthorpe 	 * explicitly prevented.  Now there's need.
12910f3e72b5SJason Gunthorpe 	 */
12920f3e72b5SJason Gunthorpe 	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
12930f3e72b5SJason Gunthorpe 
12940f3e72b5SJason Gunthorpe 	if (device->group->type == VFIO_NO_IOMMU)
12950f3e72b5SJason Gunthorpe 		dev_warn(device->dev, "vfio-noiommu device opened by user "
12960f3e72b5SJason Gunthorpe 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
12970f3e72b5SJason Gunthorpe 	/*
12980f3e72b5SJason Gunthorpe 	 * On success the ref of device is moved to the file and
12990f3e72b5SJason Gunthorpe 	 * put in vfio_device_fops_release()
13000f3e72b5SJason Gunthorpe 	 */
13010f3e72b5SJason Gunthorpe 	return filep;
13020f3e72b5SJason Gunthorpe 
13030f3e72b5SJason Gunthorpe err_close_device:
13040f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
13050f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
13060f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device) {
13070f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
13080f3e72b5SJason Gunthorpe 
13090f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
13100f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->unregister_device)
13110f3e72b5SJason Gunthorpe 			iommu_driver->ops->unregister_device(
13120f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
13130f3e72b5SJason Gunthorpe 	}
13140f3e72b5SJason Gunthorpe err_undo_count:
13150f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
13160f3e72b5SJason Gunthorpe 	device->open_count--;
13170f3e72b5SJason Gunthorpe 	if (device->open_count == 0 && device->kvm)
13180f3e72b5SJason Gunthorpe 		device->kvm = NULL;
13190f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
13200f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
13210f3e72b5SJason Gunthorpe err_unassign_container:
13220f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
13230f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
13240f3e72b5SJason Gunthorpe }
13250f3e72b5SJason Gunthorpe 
1326150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
1327150ee2f9SJason Gunthorpe 					  char __user *arg)
13280f3e72b5SJason Gunthorpe {
13290f3e72b5SJason Gunthorpe 	struct vfio_device *device;
13300f3e72b5SJason Gunthorpe 	struct file *filep;
1331150ee2f9SJason Gunthorpe 	char *buf;
13320f3e72b5SJason Gunthorpe 	int fdno;
13330f3e72b5SJason Gunthorpe 	int ret;
13340f3e72b5SJason Gunthorpe 
1335150ee2f9SJason Gunthorpe 	buf = strndup_user(arg, PAGE_SIZE);
1336150ee2f9SJason Gunthorpe 	if (IS_ERR(buf))
1337150ee2f9SJason Gunthorpe 		return PTR_ERR(buf);
1338150ee2f9SJason Gunthorpe 
13390f3e72b5SJason Gunthorpe 	device = vfio_device_get_from_name(group, buf);
1340150ee2f9SJason Gunthorpe 	kfree(buf);
13410f3e72b5SJason Gunthorpe 	if (IS_ERR(device))
13420f3e72b5SJason Gunthorpe 		return PTR_ERR(device);
13430f3e72b5SJason Gunthorpe 
13440f3e72b5SJason Gunthorpe 	fdno = get_unused_fd_flags(O_CLOEXEC);
13450f3e72b5SJason Gunthorpe 	if (fdno < 0) {
13460f3e72b5SJason Gunthorpe 		ret = fdno;
13470f3e72b5SJason Gunthorpe 		goto err_put_device;
13480f3e72b5SJason Gunthorpe 	}
13490f3e72b5SJason Gunthorpe 
13500f3e72b5SJason Gunthorpe 	filep = vfio_device_open(device);
13510f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
13520f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
13530f3e72b5SJason Gunthorpe 		goto err_put_fdno;
13540f3e72b5SJason Gunthorpe 	}
13550f3e72b5SJason Gunthorpe 
13560f3e72b5SJason Gunthorpe 	fd_install(fdno, filep);
13570f3e72b5SJason Gunthorpe 	return fdno;
13580f3e72b5SJason Gunthorpe 
13590f3e72b5SJason Gunthorpe err_put_fdno:
13600f3e72b5SJason Gunthorpe 	put_unused_fd(fdno);
13610f3e72b5SJason Gunthorpe err_put_device:
13624a725b8dSKevin Tian 	vfio_device_put_registration(device);
13630f3e72b5SJason Gunthorpe 	return ret;
13640f3e72b5SJason Gunthorpe }
13650f3e72b5SJason Gunthorpe 
136699a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group,
136799a27c08SJason Gunthorpe 				       struct vfio_group_status __user *arg)
13680f3e72b5SJason Gunthorpe {
136999a27c08SJason Gunthorpe 	unsigned long minsz = offsetofend(struct vfio_group_status, flags);
13700f3e72b5SJason Gunthorpe 	struct vfio_group_status status;
13710f3e72b5SJason Gunthorpe 
137299a27c08SJason Gunthorpe 	if (copy_from_user(&status, arg, minsz))
13730f3e72b5SJason Gunthorpe 		return -EFAULT;
13740f3e72b5SJason Gunthorpe 
13750f3e72b5SJason Gunthorpe 	if (status.argsz < minsz)
13760f3e72b5SJason Gunthorpe 		return -EINVAL;
13770f3e72b5SJason Gunthorpe 
13780f3e72b5SJason Gunthorpe 	status.flags = 0;
13790f3e72b5SJason Gunthorpe 
13800f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
13810f3e72b5SJason Gunthorpe 	if (group->container)
13820f3e72b5SJason Gunthorpe 		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
13830f3e72b5SJason Gunthorpe 				VFIO_GROUP_FLAGS_VIABLE;
13840f3e72b5SJason Gunthorpe 	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
13850f3e72b5SJason Gunthorpe 		status.flags |= VFIO_GROUP_FLAGS_VIABLE;
13860f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
13870f3e72b5SJason Gunthorpe 
138899a27c08SJason Gunthorpe 	if (copy_to_user(arg, &status, minsz))
13890f3e72b5SJason Gunthorpe 		return -EFAULT;
139099a27c08SJason Gunthorpe 	return 0;
13910f3e72b5SJason Gunthorpe }
139299a27c08SJason Gunthorpe 
139399a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep,
139499a27c08SJason Gunthorpe 				      unsigned int cmd, unsigned long arg)
139599a27c08SJason Gunthorpe {
139699a27c08SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
139799a27c08SJason Gunthorpe 	void __user *uarg = (void __user *)arg;
139899a27c08SJason Gunthorpe 
139999a27c08SJason Gunthorpe 	switch (cmd) {
140099a27c08SJason Gunthorpe 	case VFIO_GROUP_GET_DEVICE_FD:
140199a27c08SJason Gunthorpe 		return vfio_group_ioctl_get_device_fd(group, uarg);
140299a27c08SJason Gunthorpe 	case VFIO_GROUP_GET_STATUS:
140399a27c08SJason Gunthorpe 		return vfio_group_ioctl_get_status(group, uarg);
14040f3e72b5SJason Gunthorpe 	case VFIO_GROUP_SET_CONTAINER:
140567671f15SJason Gunthorpe 		return vfio_group_ioctl_set_container(group, uarg);
14060f3e72b5SJason Gunthorpe 	case VFIO_GROUP_UNSET_CONTAINER:
1407b3b43590SJason Gunthorpe 		return vfio_group_ioctl_unset_container(group);
140899a27c08SJason Gunthorpe 	default:
140999a27c08SJason Gunthorpe 		return -ENOTTY;
14100f3e72b5SJason Gunthorpe 	}
14110f3e72b5SJason Gunthorpe }
14120f3e72b5SJason Gunthorpe 
14130f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep)
14140f3e72b5SJason Gunthorpe {
14150f3e72b5SJason Gunthorpe 	struct vfio_group *group =
14160f3e72b5SJason Gunthorpe 		container_of(inode->i_cdev, struct vfio_group, cdev);
14170f3e72b5SJason Gunthorpe 	int ret;
14180f3e72b5SJason Gunthorpe 
14190f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
14200f3e72b5SJason Gunthorpe 
14210f3e72b5SJason Gunthorpe 	/* users can be zero if this races with vfio_group_put() */
14220f3e72b5SJason Gunthorpe 	if (!refcount_inc_not_zero(&group->users)) {
14230f3e72b5SJason Gunthorpe 		ret = -ENODEV;
14240f3e72b5SJason Gunthorpe 		goto err_unlock;
14250f3e72b5SJason Gunthorpe 	}
14260f3e72b5SJason Gunthorpe 
14270f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
14280f3e72b5SJason Gunthorpe 		ret = -EPERM;
14290f3e72b5SJason Gunthorpe 		goto err_put;
14300f3e72b5SJason Gunthorpe 	}
14310f3e72b5SJason Gunthorpe 
14320f3e72b5SJason Gunthorpe 	/*
14330f3e72b5SJason Gunthorpe 	 * Do we need multiple instances of the group open?  Seems not.
14340f3e72b5SJason Gunthorpe 	 */
14350f3e72b5SJason Gunthorpe 	if (group->opened_file) {
14360f3e72b5SJason Gunthorpe 		ret = -EBUSY;
14370f3e72b5SJason Gunthorpe 		goto err_put;
14380f3e72b5SJason Gunthorpe 	}
14390f3e72b5SJason Gunthorpe 	group->opened_file = filep;
14400f3e72b5SJason Gunthorpe 	filep->private_data = group;
14410f3e72b5SJason Gunthorpe 
14420f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
14430f3e72b5SJason Gunthorpe 	return 0;
14440f3e72b5SJason Gunthorpe err_put:
14450f3e72b5SJason Gunthorpe 	vfio_group_put(group);
14460f3e72b5SJason Gunthorpe err_unlock:
14470f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
14480f3e72b5SJason Gunthorpe 	return ret;
14490f3e72b5SJason Gunthorpe }
14500f3e72b5SJason Gunthorpe 
14510f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep)
14520f3e72b5SJason Gunthorpe {
14530f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
14540f3e72b5SJason Gunthorpe 
14550f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
14560f3e72b5SJason Gunthorpe 
14570f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
14580f3e72b5SJason Gunthorpe 	/*
14590f3e72b5SJason Gunthorpe 	 * Device FDs hold a group file reference, therefore the group release
14600f3e72b5SJason Gunthorpe 	 * is only called when there are no open devices.
14610f3e72b5SJason Gunthorpe 	 */
14620f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
1463429a781cSJason Gunthorpe 	if (group->container)
1464429a781cSJason Gunthorpe 		vfio_group_detach_container(group);
14650f3e72b5SJason Gunthorpe 	group->opened_file = NULL;
14660f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
14670f3e72b5SJason Gunthorpe 
14680f3e72b5SJason Gunthorpe 	vfio_group_put(group);
14690f3e72b5SJason Gunthorpe 
14700f3e72b5SJason Gunthorpe 	return 0;
14710f3e72b5SJason Gunthorpe }
14720f3e72b5SJason Gunthorpe 
14730f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = {
14740f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
14750f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
14760f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
14770f3e72b5SJason Gunthorpe 	.open		= vfio_group_fops_open,
14780f3e72b5SJason Gunthorpe 	.release	= vfio_group_fops_release,
14790f3e72b5SJason Gunthorpe };
14800f3e72b5SJason Gunthorpe 
14810f3e72b5SJason Gunthorpe /*
14828e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_resume_and_get().
14838e5c6995SAbhishek Sahu  * Return error code on failure or 0 on success.
14848e5c6995SAbhishek Sahu  */
14858e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
14868e5c6995SAbhishek Sahu {
14878e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
14888e5c6995SAbhishek Sahu 
14898e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm) {
14908e5c6995SAbhishek Sahu 		int ret;
14918e5c6995SAbhishek Sahu 
14928e5c6995SAbhishek Sahu 		ret = pm_runtime_resume_and_get(dev);
14938e5c6995SAbhishek Sahu 		if (ret) {
14948e5c6995SAbhishek Sahu 			dev_info_ratelimited(dev,
14958e5c6995SAbhishek Sahu 				"vfio: runtime resume failed %d\n", ret);
14968e5c6995SAbhishek Sahu 			return -EIO;
14978e5c6995SAbhishek Sahu 		}
14988e5c6995SAbhishek Sahu 	}
14998e5c6995SAbhishek Sahu 
15008e5c6995SAbhishek Sahu 	return 0;
15018e5c6995SAbhishek Sahu }
15028e5c6995SAbhishek Sahu 
15038e5c6995SAbhishek Sahu /*
15048e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_put().
15058e5c6995SAbhishek Sahu  */
15068e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
15078e5c6995SAbhishek Sahu {
15088e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
15098e5c6995SAbhishek Sahu 
15108e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm)
15118e5c6995SAbhishek Sahu 		pm_runtime_put(dev);
15128e5c6995SAbhishek Sahu }
15138e5c6995SAbhishek Sahu 
15148e5c6995SAbhishek Sahu /*
15150f3e72b5SJason Gunthorpe  * VFIO Device fd
15160f3e72b5SJason Gunthorpe  */
15170f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
15180f3e72b5SJason Gunthorpe {
15190f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
15200f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
15210f3e72b5SJason Gunthorpe 
15220f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
15230f3e72b5SJason Gunthorpe 	vfio_assert_device_open(device);
15240f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
15250f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device)
15260f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
15270f3e72b5SJason Gunthorpe 
15280f3e72b5SJason Gunthorpe 	iommu_driver = device->group->container->iommu_driver;
15290f3e72b5SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
15300f3e72b5SJason Gunthorpe 		iommu_driver->ops->unregister_device(
15310f3e72b5SJason Gunthorpe 			device->group->container->iommu_data, device);
15320f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
15330f3e72b5SJason Gunthorpe 	device->open_count--;
15340f3e72b5SJason Gunthorpe 	if (device->open_count == 0)
15350f3e72b5SJason Gunthorpe 		device->kvm = NULL;
15360f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
15370f3e72b5SJason Gunthorpe 
15380f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
15390f3e72b5SJason Gunthorpe 
15400f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
15410f3e72b5SJason Gunthorpe 
15424a725b8dSKevin Tian 	vfio_device_put_registration(device);
15430f3e72b5SJason Gunthorpe 
15440f3e72b5SJason Gunthorpe 	return 0;
15450f3e72b5SJason Gunthorpe }
15460f3e72b5SJason Gunthorpe 
15470f3e72b5SJason Gunthorpe /*
15480f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
15490f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
15500f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
15510f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
15520f3e72b5SJason Gunthorpe  *
15530f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
15540f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
15550f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
15560f3e72b5SJason Gunthorpe  *
15570f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
15580f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
15590f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
15600f3e72b5SJason Gunthorpe  *
15610f3e72b5SJason Gunthorpe  */
15620f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
15630f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
15640f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
15650f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
15660f3e72b5SJason Gunthorpe {
15670f3e72b5SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
15680f3e72b5SJason Gunthorpe 	/*
15690f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
15700f3e72b5SJason Gunthorpe 	 * following FSM arcs:
15710f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
15720f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
15730f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
15740f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
15750f3e72b5SJason Gunthorpe 	 *
15760f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
15770f3e72b5SJason Gunthorpe 	 * arcs:
15780f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
15790f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
15800f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
15810f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
15820f3e72b5SJason Gunthorpe 	 * Without P2P the driver must implement:
15830f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
15840f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
15850f3e72b5SJason Gunthorpe 	 *
15860f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
15870f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
15880f3e72b5SJason Gunthorpe 	 * following ones:
15890f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
15900f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
15910f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
15920f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
15930f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
15940f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
15950f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
15960f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
15970f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
15980f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
15990f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
16000f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
16010f3e72b5SJason Gunthorpe 	 */
16020f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
16030f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
16040f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
16050f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
16060f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
16070f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
16080f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
16090f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16100f3e72b5SJason Gunthorpe 		},
16110f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
16120f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
16130f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
16140f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
16150f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
16160f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
16170f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16180f3e72b5SJason Gunthorpe 		},
16190f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
16200f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
16210f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
16220f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
16230f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
16240f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
16250f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16260f3e72b5SJason Gunthorpe 		},
16270f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
16280f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
16290f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
16300f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
16310f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
16320f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
16330f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16340f3e72b5SJason Gunthorpe 		},
16350f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
16360f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
16370f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
16380f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
16390f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
16400f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
16410f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16420f3e72b5SJason Gunthorpe 		},
16430f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
16440f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
16450f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
16460f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
16470f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
16480f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
16490f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16500f3e72b5SJason Gunthorpe 		},
16510f3e72b5SJason Gunthorpe 	};
16520f3e72b5SJason Gunthorpe 
16530f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
16540f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
16550f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
16560f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
16570f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
16580f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
16590f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
16600f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
16610f3e72b5SJason Gunthorpe 	};
16620f3e72b5SJason Gunthorpe 
16630f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
16640f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
16650f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
16660f3e72b5SJason Gunthorpe 		return -EINVAL;
16670f3e72b5SJason Gunthorpe 
16680f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
16690f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
16700f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
16710f3e72b5SJason Gunthorpe 		return -EINVAL;
16720f3e72b5SJason Gunthorpe 
16730f3e72b5SJason Gunthorpe 	/*
16740f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
16750f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
16760f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
16770f3e72b5SJason Gunthorpe 	 */
16780f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
16790f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
16800f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
16810f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
16820f3e72b5SJason Gunthorpe 
16830f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
16840f3e72b5SJason Gunthorpe }
16850f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
16860f3e72b5SJason Gunthorpe 
16870f3e72b5SJason Gunthorpe /*
16880f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
16890f3e72b5SJason Gunthorpe  */
16900f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
16910f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
16920f3e72b5SJason Gunthorpe {
16930f3e72b5SJason Gunthorpe 	int ret;
16940f3e72b5SJason Gunthorpe 	int fd;
16950f3e72b5SJason Gunthorpe 
16960f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
16970f3e72b5SJason Gunthorpe 	if (fd < 0) {
16980f3e72b5SJason Gunthorpe 		ret = fd;
16990f3e72b5SJason Gunthorpe 		goto out_fput;
17000f3e72b5SJason Gunthorpe 	}
17010f3e72b5SJason Gunthorpe 
17020f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
17030f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
17040f3e72b5SJason Gunthorpe 		ret = -EFAULT;
17050f3e72b5SJason Gunthorpe 		goto out_put_unused;
17060f3e72b5SJason Gunthorpe 	}
17070f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
17080f3e72b5SJason Gunthorpe 	return 0;
17090f3e72b5SJason Gunthorpe 
17100f3e72b5SJason Gunthorpe out_put_unused:
17110f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
17120f3e72b5SJason Gunthorpe out_fput:
17130f3e72b5SJason Gunthorpe 	fput(filp);
17140f3e72b5SJason Gunthorpe 	return ret;
17150f3e72b5SJason Gunthorpe }
17160f3e72b5SJason Gunthorpe 
17170f3e72b5SJason Gunthorpe static int
17180f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
17190f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
17200f3e72b5SJason Gunthorpe 					   size_t argsz)
17210f3e72b5SJason Gunthorpe {
17220f3e72b5SJason Gunthorpe 	size_t minsz =
17230f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
17240f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
17250f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
17260f3e72b5SJason Gunthorpe 	int ret;
17270f3e72b5SJason Gunthorpe 
17280f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
17290f3e72b5SJason Gunthorpe 		return -ENOTTY;
17300f3e72b5SJason Gunthorpe 
17310f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
17320f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
17330f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
17340f3e72b5SJason Gunthorpe 				 sizeof(mig));
17350f3e72b5SJason Gunthorpe 	if (ret != 1)
17360f3e72b5SJason Gunthorpe 		return ret;
17370f3e72b5SJason Gunthorpe 
17380f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
17390f3e72b5SJason Gunthorpe 		return -EFAULT;
17400f3e72b5SJason Gunthorpe 
17410f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
17420f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
17430f3e72b5SJason Gunthorpe 
17440f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
17450f3e72b5SJason Gunthorpe 							   &curr_state);
17460f3e72b5SJason Gunthorpe 		if (ret)
17470f3e72b5SJason Gunthorpe 			return ret;
17480f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
17490f3e72b5SJason Gunthorpe 		goto out_copy;
17500f3e72b5SJason Gunthorpe 	}
17510f3e72b5SJason Gunthorpe 
17520f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
17530f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
17540f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
17550f3e72b5SJason Gunthorpe 		goto out_copy;
17560f3e72b5SJason Gunthorpe 
17570f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
17580f3e72b5SJason Gunthorpe out_copy:
17590f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
17600f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
17610f3e72b5SJason Gunthorpe 		return -EFAULT;
17620f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
17630f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
17640f3e72b5SJason Gunthorpe 	return 0;
17650f3e72b5SJason Gunthorpe }
17660f3e72b5SJason Gunthorpe 
17670f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
17680f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
17690f3e72b5SJason Gunthorpe 					       size_t argsz)
17700f3e72b5SJason Gunthorpe {
17710f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
17720f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
17730f3e72b5SJason Gunthorpe 	};
17740f3e72b5SJason Gunthorpe 	int ret;
17750f3e72b5SJason Gunthorpe 
17760f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
17770f3e72b5SJason Gunthorpe 		return -ENOTTY;
17780f3e72b5SJason Gunthorpe 
17790f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
17800f3e72b5SJason Gunthorpe 				 sizeof(mig));
17810f3e72b5SJason Gunthorpe 	if (ret != 1)
17820f3e72b5SJason Gunthorpe 		return ret;
17830f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
17840f3e72b5SJason Gunthorpe 		return -EFAULT;
17850f3e72b5SJason Gunthorpe 	return 0;
17860f3e72b5SJason Gunthorpe }
17870f3e72b5SJason Gunthorpe 
178880c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */
178980c4b92aSYishai Hadas #define LOG_MAX_RANGES \
179080c4b92aSYishai Hadas 	(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
179180c4b92aSYishai Hadas 
179280c4b92aSYishai Hadas static int
179380c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
179480c4b92aSYishai Hadas 					u32 flags, void __user *arg,
179580c4b92aSYishai Hadas 					size_t argsz)
179680c4b92aSYishai Hadas {
179780c4b92aSYishai Hadas 	size_t minsz =
179880c4b92aSYishai Hadas 		offsetofend(struct vfio_device_feature_dma_logging_control,
179980c4b92aSYishai Hadas 			    ranges);
180080c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_range __user *ranges;
180180c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_control control;
180280c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_range range;
180380c4b92aSYishai Hadas 	struct rb_root_cached root = RB_ROOT_CACHED;
180480c4b92aSYishai Hadas 	struct interval_tree_node *nodes;
180580c4b92aSYishai Hadas 	u64 iova_end;
180680c4b92aSYishai Hadas 	u32 nnodes;
180780c4b92aSYishai Hadas 	int i, ret;
180880c4b92aSYishai Hadas 
180980c4b92aSYishai Hadas 	if (!device->log_ops)
181080c4b92aSYishai Hadas 		return -ENOTTY;
181180c4b92aSYishai Hadas 
181280c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
181380c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_SET,
181480c4b92aSYishai Hadas 				 sizeof(control));
181580c4b92aSYishai Hadas 	if (ret != 1)
181680c4b92aSYishai Hadas 		return ret;
181780c4b92aSYishai Hadas 
181880c4b92aSYishai Hadas 	if (copy_from_user(&control, arg, minsz))
181980c4b92aSYishai Hadas 		return -EFAULT;
182080c4b92aSYishai Hadas 
182180c4b92aSYishai Hadas 	nnodes = control.num_ranges;
182280c4b92aSYishai Hadas 	if (!nnodes)
182380c4b92aSYishai Hadas 		return -EINVAL;
182480c4b92aSYishai Hadas 
182580c4b92aSYishai Hadas 	if (nnodes > LOG_MAX_RANGES)
182680c4b92aSYishai Hadas 		return -E2BIG;
182780c4b92aSYishai Hadas 
182880c4b92aSYishai Hadas 	ranges = u64_to_user_ptr(control.ranges);
182980c4b92aSYishai Hadas 	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
183080c4b92aSYishai Hadas 			      GFP_KERNEL);
183180c4b92aSYishai Hadas 	if (!nodes)
183280c4b92aSYishai Hadas 		return -ENOMEM;
183380c4b92aSYishai Hadas 
183480c4b92aSYishai Hadas 	for (i = 0; i < nnodes; i++) {
183580c4b92aSYishai Hadas 		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
183680c4b92aSYishai Hadas 			ret = -EFAULT;
183780c4b92aSYishai Hadas 			goto end;
183880c4b92aSYishai Hadas 		}
183980c4b92aSYishai Hadas 		if (!IS_ALIGNED(range.iova, control.page_size) ||
184080c4b92aSYishai Hadas 		    !IS_ALIGNED(range.length, control.page_size)) {
184180c4b92aSYishai Hadas 			ret = -EINVAL;
184280c4b92aSYishai Hadas 			goto end;
184380c4b92aSYishai Hadas 		}
184480c4b92aSYishai Hadas 
184580c4b92aSYishai Hadas 		if (check_add_overflow(range.iova, range.length, &iova_end) ||
184680c4b92aSYishai Hadas 		    iova_end > ULONG_MAX) {
184780c4b92aSYishai Hadas 			ret = -EOVERFLOW;
184880c4b92aSYishai Hadas 			goto end;
184980c4b92aSYishai Hadas 		}
185080c4b92aSYishai Hadas 
185180c4b92aSYishai Hadas 		nodes[i].start = range.iova;
185280c4b92aSYishai Hadas 		nodes[i].last = range.iova + range.length - 1;
185380c4b92aSYishai Hadas 		if (interval_tree_iter_first(&root, nodes[i].start,
185480c4b92aSYishai Hadas 					     nodes[i].last)) {
185580c4b92aSYishai Hadas 			/* Range overlapping */
185680c4b92aSYishai Hadas 			ret = -EINVAL;
185780c4b92aSYishai Hadas 			goto end;
185880c4b92aSYishai Hadas 		}
185980c4b92aSYishai Hadas 		interval_tree_insert(nodes + i, &root);
186080c4b92aSYishai Hadas 	}
186180c4b92aSYishai Hadas 
186280c4b92aSYishai Hadas 	ret = device->log_ops->log_start(device, &root, nnodes,
186380c4b92aSYishai Hadas 					 &control.page_size);
186480c4b92aSYishai Hadas 	if (ret)
186580c4b92aSYishai Hadas 		goto end;
186680c4b92aSYishai Hadas 
186780c4b92aSYishai Hadas 	if (copy_to_user(arg, &control, sizeof(control))) {
186880c4b92aSYishai Hadas 		ret = -EFAULT;
186980c4b92aSYishai Hadas 		device->log_ops->log_stop(device);
187080c4b92aSYishai Hadas 	}
187180c4b92aSYishai Hadas 
187280c4b92aSYishai Hadas end:
187380c4b92aSYishai Hadas 	kfree(nodes);
187480c4b92aSYishai Hadas 	return ret;
187580c4b92aSYishai Hadas }
187680c4b92aSYishai Hadas 
187780c4b92aSYishai Hadas static int
187880c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
187980c4b92aSYishai Hadas 				       u32 flags, void __user *arg,
188080c4b92aSYishai Hadas 				       size_t argsz)
188180c4b92aSYishai Hadas {
188280c4b92aSYishai Hadas 	int ret;
188380c4b92aSYishai Hadas 
188480c4b92aSYishai Hadas 	if (!device->log_ops)
188580c4b92aSYishai Hadas 		return -ENOTTY;
188680c4b92aSYishai Hadas 
188780c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
188880c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_SET, 0);
188980c4b92aSYishai Hadas 	if (ret != 1)
189080c4b92aSYishai Hadas 		return ret;
189180c4b92aSYishai Hadas 
189280c4b92aSYishai Hadas 	return device->log_ops->log_stop(device);
189380c4b92aSYishai Hadas }
189480c4b92aSYishai Hadas 
189580c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
189680c4b92aSYishai Hadas 					  unsigned long iova, size_t length,
189780c4b92aSYishai Hadas 					  void *opaque)
189880c4b92aSYishai Hadas {
189980c4b92aSYishai Hadas 	struct vfio_device *device = opaque;
190080c4b92aSYishai Hadas 
190180c4b92aSYishai Hadas 	return device->log_ops->log_read_and_clear(device, iova, length, iter);
190280c4b92aSYishai Hadas }
190380c4b92aSYishai Hadas 
190480c4b92aSYishai Hadas static int
190580c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
190680c4b92aSYishai Hadas 					 u32 flags, void __user *arg,
190780c4b92aSYishai Hadas 					 size_t argsz)
190880c4b92aSYishai Hadas {
190980c4b92aSYishai Hadas 	size_t minsz =
191080c4b92aSYishai Hadas 		offsetofend(struct vfio_device_feature_dma_logging_report,
191180c4b92aSYishai Hadas 			    bitmap);
191280c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_report report;
191380c4b92aSYishai Hadas 	struct iova_bitmap *iter;
191480c4b92aSYishai Hadas 	u64 iova_end;
191580c4b92aSYishai Hadas 	int ret;
191680c4b92aSYishai Hadas 
191780c4b92aSYishai Hadas 	if (!device->log_ops)
191880c4b92aSYishai Hadas 		return -ENOTTY;
191980c4b92aSYishai Hadas 
192080c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
192180c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_GET,
192280c4b92aSYishai Hadas 				 sizeof(report));
192380c4b92aSYishai Hadas 	if (ret != 1)
192480c4b92aSYishai Hadas 		return ret;
192580c4b92aSYishai Hadas 
192680c4b92aSYishai Hadas 	if (copy_from_user(&report, arg, minsz))
192780c4b92aSYishai Hadas 		return -EFAULT;
192880c4b92aSYishai Hadas 
192980c4b92aSYishai Hadas 	if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
193080c4b92aSYishai Hadas 		return -EINVAL;
193180c4b92aSYishai Hadas 
193280c4b92aSYishai Hadas 	if (check_add_overflow(report.iova, report.length, &iova_end) ||
193380c4b92aSYishai Hadas 	    iova_end > ULONG_MAX)
193480c4b92aSYishai Hadas 		return -EOVERFLOW;
193580c4b92aSYishai Hadas 
193680c4b92aSYishai Hadas 	iter = iova_bitmap_alloc(report.iova, report.length,
193780c4b92aSYishai Hadas 				 report.page_size,
193880c4b92aSYishai Hadas 				 u64_to_user_ptr(report.bitmap));
193980c4b92aSYishai Hadas 	if (IS_ERR(iter))
194080c4b92aSYishai Hadas 		return PTR_ERR(iter);
194180c4b92aSYishai Hadas 
194280c4b92aSYishai Hadas 	ret = iova_bitmap_for_each(iter, device,
194380c4b92aSYishai Hadas 				   vfio_device_log_read_and_clear);
194480c4b92aSYishai Hadas 
194580c4b92aSYishai Hadas 	iova_bitmap_free(iter);
194680c4b92aSYishai Hadas 	return ret;
194780c4b92aSYishai Hadas }
194880c4b92aSYishai Hadas 
19490f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
19500f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
19510f3e72b5SJason Gunthorpe {
19520f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
19530f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
19540f3e72b5SJason Gunthorpe 
19550f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
19560f3e72b5SJason Gunthorpe 		return -EFAULT;
19570f3e72b5SJason Gunthorpe 
19580f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
19590f3e72b5SJason Gunthorpe 		return -EINVAL;
19600f3e72b5SJason Gunthorpe 
19610f3e72b5SJason Gunthorpe 	/* Check unknown flags */
19620f3e72b5SJason Gunthorpe 	if (feature.flags &
19630f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
19640f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
19650f3e72b5SJason Gunthorpe 		return -EINVAL;
19660f3e72b5SJason Gunthorpe 
19670f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
19680f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
19690f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
19700f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
19710f3e72b5SJason Gunthorpe 		return -EINVAL;
19720f3e72b5SJason Gunthorpe 
19730f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
19740f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
19750f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
19760f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
19770f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
19780f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
19790f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
19800f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
19810f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
198280c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
198380c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_start(
198480c4b92aSYishai Hadas 			device, feature.flags, arg->data,
198580c4b92aSYishai Hadas 			feature.argsz - minsz);
198680c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
198780c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_stop(
198880c4b92aSYishai Hadas 			device, feature.flags, arg->data,
198980c4b92aSYishai Hadas 			feature.argsz - minsz);
199080c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
199180c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_report(
199280c4b92aSYishai Hadas 			device, feature.flags, arg->data,
199380c4b92aSYishai Hadas 			feature.argsz - minsz);
19940f3e72b5SJason Gunthorpe 	default:
19950f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
19960f3e72b5SJason Gunthorpe 			return -EINVAL;
19970f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
19980f3e72b5SJason Gunthorpe 						   arg->data,
19990f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
20000f3e72b5SJason Gunthorpe 	}
20010f3e72b5SJason Gunthorpe }
20020f3e72b5SJason Gunthorpe 
20030f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
20040f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
20050f3e72b5SJason Gunthorpe {
20060f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20078e5c6995SAbhishek Sahu 	int ret;
20088e5c6995SAbhishek Sahu 
20098e5c6995SAbhishek Sahu 	ret = vfio_device_pm_runtime_get(device);
20108e5c6995SAbhishek Sahu 	if (ret)
20118e5c6995SAbhishek Sahu 		return ret;
20120f3e72b5SJason Gunthorpe 
20130f3e72b5SJason Gunthorpe 	switch (cmd) {
20140f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
20158e5c6995SAbhishek Sahu 		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
20168e5c6995SAbhishek Sahu 		break;
20178e5c6995SAbhishek Sahu 
20180f3e72b5SJason Gunthorpe 	default:
20190f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
20208e5c6995SAbhishek Sahu 			ret = -EINVAL;
20218e5c6995SAbhishek Sahu 		else
20228e5c6995SAbhishek Sahu 			ret = device->ops->ioctl(device, cmd, arg);
20238e5c6995SAbhishek Sahu 		break;
20240f3e72b5SJason Gunthorpe 	}
20258e5c6995SAbhishek Sahu 
20268e5c6995SAbhishek Sahu 	vfio_device_pm_runtime_put(device);
20278e5c6995SAbhishek Sahu 	return ret;
20280f3e72b5SJason Gunthorpe }
20290f3e72b5SJason Gunthorpe 
20300f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
20310f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
20320f3e72b5SJason Gunthorpe {
20330f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20340f3e72b5SJason Gunthorpe 
20350f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
20360f3e72b5SJason Gunthorpe 		return -EINVAL;
20370f3e72b5SJason Gunthorpe 
20380f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
20390f3e72b5SJason Gunthorpe }
20400f3e72b5SJason Gunthorpe 
20410f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
20420f3e72b5SJason Gunthorpe 				      const char __user *buf,
20430f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
20440f3e72b5SJason Gunthorpe {
20450f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20460f3e72b5SJason Gunthorpe 
20470f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
20480f3e72b5SJason Gunthorpe 		return -EINVAL;
20490f3e72b5SJason Gunthorpe 
20500f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
20510f3e72b5SJason Gunthorpe }
20520f3e72b5SJason Gunthorpe 
20530f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
20540f3e72b5SJason Gunthorpe {
20550f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20560f3e72b5SJason Gunthorpe 
20570f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
20580f3e72b5SJason Gunthorpe 		return -EINVAL;
20590f3e72b5SJason Gunthorpe 
20600f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
20610f3e72b5SJason Gunthorpe }
20620f3e72b5SJason Gunthorpe 
20630f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = {
20640f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
20650f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
20660f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
20670f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
20680f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
20690f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
20700f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
20710f3e72b5SJason Gunthorpe };
20720f3e72b5SJason Gunthorpe 
20730f3e72b5SJason Gunthorpe /**
20740f3e72b5SJason Gunthorpe  * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
20750f3e72b5SJason Gunthorpe  * @file: VFIO group file
20760f3e72b5SJason Gunthorpe  *
20770f3e72b5SJason Gunthorpe  * The returned iommu_group is valid as long as a ref is held on the file.
20780f3e72b5SJason Gunthorpe  */
20790f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file)
20800f3e72b5SJason Gunthorpe {
20810f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
20820f3e72b5SJason Gunthorpe 
20830f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
20840f3e72b5SJason Gunthorpe 		return NULL;
20850f3e72b5SJason Gunthorpe 	return group->iommu_group;
20860f3e72b5SJason Gunthorpe }
20870f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
20880f3e72b5SJason Gunthorpe 
20890f3e72b5SJason Gunthorpe /**
20900f3e72b5SJason Gunthorpe  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
20910f3e72b5SJason Gunthorpe  *        is always CPU cache coherent
20920f3e72b5SJason Gunthorpe  * @file: VFIO group file
20930f3e72b5SJason Gunthorpe  *
20940f3e72b5SJason Gunthorpe  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
20950f3e72b5SJason Gunthorpe  * bit in DMA transactions. A return of false indicates that the user has
20960f3e72b5SJason Gunthorpe  * rights to access additional instructions such as wbinvd on x86.
20970f3e72b5SJason Gunthorpe  */
20980f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file)
20990f3e72b5SJason Gunthorpe {
21000f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
21010f3e72b5SJason Gunthorpe 	bool ret;
21020f3e72b5SJason Gunthorpe 
21030f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
21040f3e72b5SJason Gunthorpe 		return true;
21050f3e72b5SJason Gunthorpe 
21060f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
21070f3e72b5SJason Gunthorpe 	if (group->container) {
21080f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(group->container,
21090f3e72b5SJason Gunthorpe 						 VFIO_DMA_CC_IOMMU);
21100f3e72b5SJason Gunthorpe 	} else {
21110f3e72b5SJason Gunthorpe 		/*
21120f3e72b5SJason Gunthorpe 		 * Since the coherency state is determined only once a container
21130f3e72b5SJason Gunthorpe 		 * is attached the user must do so before they can prove they
21140f3e72b5SJason Gunthorpe 		 * have permission.
21150f3e72b5SJason Gunthorpe 		 */
21160f3e72b5SJason Gunthorpe 		ret = true;
21170f3e72b5SJason Gunthorpe 	}
21180f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
21190f3e72b5SJason Gunthorpe 	return ret;
21200f3e72b5SJason Gunthorpe }
21210f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
21220f3e72b5SJason Gunthorpe 
21230f3e72b5SJason Gunthorpe /**
21240f3e72b5SJason Gunthorpe  * vfio_file_set_kvm - Link a kvm with VFIO drivers
21250f3e72b5SJason Gunthorpe  * @file: VFIO group file
21260f3e72b5SJason Gunthorpe  * @kvm: KVM to link
21270f3e72b5SJason Gunthorpe  *
21280f3e72b5SJason Gunthorpe  * When a VFIO device is first opened the KVM will be available in
21290f3e72b5SJason Gunthorpe  * device->kvm if one was associated with the group.
21300f3e72b5SJason Gunthorpe  */
21310f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
21320f3e72b5SJason Gunthorpe {
21330f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
21340f3e72b5SJason Gunthorpe 
21350f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
21360f3e72b5SJason Gunthorpe 		return;
21370f3e72b5SJason Gunthorpe 
21380f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
21390f3e72b5SJason Gunthorpe 	group->kvm = kvm;
21400f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
21410f3e72b5SJason Gunthorpe }
21420f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
21430f3e72b5SJason Gunthorpe 
21440f3e72b5SJason Gunthorpe /**
21450f3e72b5SJason Gunthorpe  * vfio_file_has_dev - True if the VFIO file is a handle for device
21460f3e72b5SJason Gunthorpe  * @file: VFIO file to check
21470f3e72b5SJason Gunthorpe  * @device: Device that must be part of the file
21480f3e72b5SJason Gunthorpe  *
21490f3e72b5SJason Gunthorpe  * Returns true if given file has permission to manipulate the given device.
21500f3e72b5SJason Gunthorpe  */
21510f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
21520f3e72b5SJason Gunthorpe {
21530f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
21540f3e72b5SJason Gunthorpe 
21550f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
21560f3e72b5SJason Gunthorpe 		return false;
21570f3e72b5SJason Gunthorpe 
21580f3e72b5SJason Gunthorpe 	return group == device->group;
21590f3e72b5SJason Gunthorpe }
21600f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev);
21610f3e72b5SJason Gunthorpe 
21620f3e72b5SJason Gunthorpe /*
21630f3e72b5SJason Gunthorpe  * Sub-module support
21640f3e72b5SJason Gunthorpe  */
21650f3e72b5SJason Gunthorpe /*
21660f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
21670f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
21680f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
21690f3e72b5SJason Gunthorpe  *
21700f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
21710f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
21720f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
21730f3e72b5SJason Gunthorpe  */
21740f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
21750f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
21760f3e72b5SJason Gunthorpe {
21770f3e72b5SJason Gunthorpe 	void *buf;
21780f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
21790f3e72b5SJason Gunthorpe 
21800f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
21810f3e72b5SJason Gunthorpe 	if (!buf) {
21820f3e72b5SJason Gunthorpe 		kfree(caps->buf);
21830f3e72b5SJason Gunthorpe 		caps->buf = NULL;
21840f3e72b5SJason Gunthorpe 		caps->size = 0;
21850f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
21860f3e72b5SJason Gunthorpe 	}
21870f3e72b5SJason Gunthorpe 
21880f3e72b5SJason Gunthorpe 	caps->buf = buf;
21890f3e72b5SJason Gunthorpe 	header = buf + caps->size;
21900f3e72b5SJason Gunthorpe 
21910f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
21920f3e72b5SJason Gunthorpe 	memset(header, 0, size);
21930f3e72b5SJason Gunthorpe 
21940f3e72b5SJason Gunthorpe 	header->id = id;
21950f3e72b5SJason Gunthorpe 	header->version = version;
21960f3e72b5SJason Gunthorpe 
21970f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
21980f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
21990f3e72b5SJason Gunthorpe 		; /* nothing */
22000f3e72b5SJason Gunthorpe 
22010f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
22020f3e72b5SJason Gunthorpe 	caps->size += size;
22030f3e72b5SJason Gunthorpe 
22040f3e72b5SJason Gunthorpe 	return header;
22050f3e72b5SJason Gunthorpe }
22060f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
22070f3e72b5SJason Gunthorpe 
22080f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
22090f3e72b5SJason Gunthorpe {
22100f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
22110f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
22120f3e72b5SJason Gunthorpe 
22130f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
22140f3e72b5SJason Gunthorpe 		tmp->next += offset;
22150f3e72b5SJason Gunthorpe }
22160f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
22170f3e72b5SJason Gunthorpe 
22180f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
22190f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
22200f3e72b5SJason Gunthorpe {
22210f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
22220f3e72b5SJason Gunthorpe 
22230f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
22240f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
22250f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
22260f3e72b5SJason Gunthorpe 
22270f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
22280f3e72b5SJason Gunthorpe 
22290f3e72b5SJason Gunthorpe 	return 0;
22300f3e72b5SJason Gunthorpe }
22310f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
22320f3e72b5SJason Gunthorpe 
22330f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
22340f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
22350f3e72b5SJason Gunthorpe {
22360f3e72b5SJason Gunthorpe 	unsigned long minsz;
22370f3e72b5SJason Gunthorpe 	size_t size;
22380f3e72b5SJason Gunthorpe 
22390f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
22400f3e72b5SJason Gunthorpe 
22410f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
22420f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
22430f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
22440f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
22450f3e72b5SJason Gunthorpe 		return -EINVAL;
22460f3e72b5SJason Gunthorpe 
22470f3e72b5SJason Gunthorpe 	if (data_size)
22480f3e72b5SJason Gunthorpe 		*data_size = 0;
22490f3e72b5SJason Gunthorpe 
22500f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
22510f3e72b5SJason Gunthorpe 		return -EINVAL;
22520f3e72b5SJason Gunthorpe 
22530f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
22540f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
22550f3e72b5SJason Gunthorpe 		size = 0;
22560f3e72b5SJason Gunthorpe 		break;
22570f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
22580f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
22590f3e72b5SJason Gunthorpe 		break;
22600f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
22610f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
22620f3e72b5SJason Gunthorpe 		break;
22630f3e72b5SJason Gunthorpe 	default:
22640f3e72b5SJason Gunthorpe 		return -EINVAL;
22650f3e72b5SJason Gunthorpe 	}
22660f3e72b5SJason Gunthorpe 
22670f3e72b5SJason Gunthorpe 	if (size) {
22680f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
22690f3e72b5SJason Gunthorpe 			return -EINVAL;
22700f3e72b5SJason Gunthorpe 
22710f3e72b5SJason Gunthorpe 		if (!data_size)
22720f3e72b5SJason Gunthorpe 			return -EINVAL;
22730f3e72b5SJason Gunthorpe 
22740f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
22750f3e72b5SJason Gunthorpe 	}
22760f3e72b5SJason Gunthorpe 
22770f3e72b5SJason Gunthorpe 	return 0;
22780f3e72b5SJason Gunthorpe }
22790f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
22800f3e72b5SJason Gunthorpe 
22810f3e72b5SJason Gunthorpe /*
22820f3e72b5SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
22830f3e72b5SJason Gunthorpe  * domain only.
22840f3e72b5SJason Gunthorpe  * @device [in]  : device
22850f3e72b5SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
22860f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
22870f3e72b5SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
22880f3e72b5SJason Gunthorpe  * @prot [in]    : protection flags
22890f3e72b5SJason Gunthorpe  * @pages[out]   : array of host pages
22900f3e72b5SJason Gunthorpe  * Return error or number of pages pinned.
229121c13829SJason Gunthorpe  *
229221c13829SJason Gunthorpe  * A driver may only call this function if the vfio_device was created
229321c13829SJason Gunthorpe  * by vfio_register_emulated_iommu_dev().
22940f3e72b5SJason Gunthorpe  */
22950f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
22960f3e72b5SJason Gunthorpe 		   int npage, int prot, struct page **pages)
22970f3e72b5SJason Gunthorpe {
22980f3e72b5SJason Gunthorpe 	struct vfio_container *container;
22990f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
23000f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
23010f3e72b5SJason Gunthorpe 	int ret;
23020f3e72b5SJason Gunthorpe 
23030f3e72b5SJason Gunthorpe 	if (!pages || !npage || !vfio_assert_device_open(device))
23040f3e72b5SJason Gunthorpe 		return -EINVAL;
23050f3e72b5SJason Gunthorpe 
23060f3e72b5SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
23070f3e72b5SJason Gunthorpe 		return -E2BIG;
23080f3e72b5SJason Gunthorpe 
23090f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
23100f3e72b5SJason Gunthorpe 	container = group->container;
23110f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
23120f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->pin_pages))
23130f3e72b5SJason Gunthorpe 		ret = driver->ops->pin_pages(container->iommu_data,
23140f3e72b5SJason Gunthorpe 					     group->iommu_group, iova,
23150f3e72b5SJason Gunthorpe 					     npage, prot, pages);
23160f3e72b5SJason Gunthorpe 	else
23170f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
23180f3e72b5SJason Gunthorpe 
23190f3e72b5SJason Gunthorpe 	return ret;
23200f3e72b5SJason Gunthorpe }
23210f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
23220f3e72b5SJason Gunthorpe 
23230f3e72b5SJason Gunthorpe /*
23240f3e72b5SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
23250f3e72b5SJason Gunthorpe  * @device [in]  : device
23260f3e72b5SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
23270f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
23280f3e72b5SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
23290f3e72b5SJason Gunthorpe  */
23300f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
23310f3e72b5SJason Gunthorpe {
23320f3e72b5SJason Gunthorpe 	struct vfio_container *container;
23330f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
23340f3e72b5SJason Gunthorpe 
23350f3e72b5SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
23360f3e72b5SJason Gunthorpe 		return;
23370f3e72b5SJason Gunthorpe 
23380f3e72b5SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
23390f3e72b5SJason Gunthorpe 		return;
23400f3e72b5SJason Gunthorpe 
23410f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
23420f3e72b5SJason Gunthorpe 	container = device->group->container;
23430f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
23440f3e72b5SJason Gunthorpe 
23450f3e72b5SJason Gunthorpe 	driver->ops->unpin_pages(container->iommu_data, iova, npage);
23460f3e72b5SJason Gunthorpe }
23470f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
23480f3e72b5SJason Gunthorpe 
23490f3e72b5SJason Gunthorpe /*
23500f3e72b5SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
23510f3e72b5SJason Gunthorpe  * behalf of the device.
23520f3e72b5SJason Gunthorpe  *
23530f3e72b5SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
23540f3e72b5SJason Gunthorpe  * into/from a kernel buffer.
23550f3e72b5SJason Gunthorpe  *
23560f3e72b5SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
23570f3e72b5SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
23580f3e72b5SJason Gunthorpe  *
23590f3e72b5SJason Gunthorpe  * @device [in]		: VFIO device
23600f3e72b5SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
23610f3e72b5SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
23620f3e72b5SJason Gunthorpe  * @len [in]		: kernel buffer length
23630f3e72b5SJason Gunthorpe  * @write		: indicate read or write
23640f3e72b5SJason Gunthorpe  * Return error code on failure or 0 on success.
23650f3e72b5SJason Gunthorpe  */
23660f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
23670f3e72b5SJason Gunthorpe 		size_t len, bool write)
23680f3e72b5SJason Gunthorpe {
23690f3e72b5SJason Gunthorpe 	struct vfio_container *container;
23700f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
23710f3e72b5SJason Gunthorpe 	int ret = 0;
23720f3e72b5SJason Gunthorpe 
23730f3e72b5SJason Gunthorpe 	if (!data || len <= 0 || !vfio_assert_device_open(device))
23740f3e72b5SJason Gunthorpe 		return -EINVAL;
23750f3e72b5SJason Gunthorpe 
23760f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
23770f3e72b5SJason Gunthorpe 	container = device->group->container;
23780f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
23790f3e72b5SJason Gunthorpe 
23800f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->dma_rw))
23810f3e72b5SJason Gunthorpe 		ret = driver->ops->dma_rw(container->iommu_data,
23820f3e72b5SJason Gunthorpe 					  iova, data, len, write);
23830f3e72b5SJason Gunthorpe 	else
23840f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
23850f3e72b5SJason Gunthorpe 	return ret;
23860f3e72b5SJason Gunthorpe }
23870f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
23880f3e72b5SJason Gunthorpe 
23890f3e72b5SJason Gunthorpe /*
23900f3e72b5SJason Gunthorpe  * Module/class support
23910f3e72b5SJason Gunthorpe  */
23920f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode)
23930f3e72b5SJason Gunthorpe {
23940f3e72b5SJason Gunthorpe 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
23950f3e72b5SJason Gunthorpe }
23960f3e72b5SJason Gunthorpe 
23970f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = {
23980f3e72b5SJason Gunthorpe 	.minor = VFIO_MINOR,
23990f3e72b5SJason Gunthorpe 	.name = "vfio",
24000f3e72b5SJason Gunthorpe 	.fops = &vfio_fops,
24010f3e72b5SJason Gunthorpe 	.nodename = "vfio/vfio",
24020f3e72b5SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
24030f3e72b5SJason Gunthorpe };
24040f3e72b5SJason Gunthorpe 
24050f3e72b5SJason Gunthorpe static int __init vfio_init(void)
24060f3e72b5SJason Gunthorpe {
24070f3e72b5SJason Gunthorpe 	int ret;
24080f3e72b5SJason Gunthorpe 
24090f3e72b5SJason Gunthorpe 	ida_init(&vfio.group_ida);
24103c28a761SYi Liu 	ida_init(&vfio.device_ida);
24110f3e72b5SJason Gunthorpe 	mutex_init(&vfio.group_lock);
24120f3e72b5SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
24130f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.group_list);
24140f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
24150f3e72b5SJason Gunthorpe 
24160f3e72b5SJason Gunthorpe 	ret = misc_register(&vfio_dev);
24170f3e72b5SJason Gunthorpe 	if (ret) {
24180f3e72b5SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
24190f3e72b5SJason Gunthorpe 		return ret;
24200f3e72b5SJason Gunthorpe 	}
24210f3e72b5SJason Gunthorpe 
24220f3e72b5SJason Gunthorpe 	/* /dev/vfio/$GROUP */
24230f3e72b5SJason Gunthorpe 	vfio.class = class_create(THIS_MODULE, "vfio");
24240f3e72b5SJason Gunthorpe 	if (IS_ERR(vfio.class)) {
24250f3e72b5SJason Gunthorpe 		ret = PTR_ERR(vfio.class);
24263c28a761SYi Liu 		goto err_group_class;
24270f3e72b5SJason Gunthorpe 	}
24280f3e72b5SJason Gunthorpe 
24290f3e72b5SJason Gunthorpe 	vfio.class->devnode = vfio_devnode;
24300f3e72b5SJason Gunthorpe 
24313c28a761SYi Liu 	/* /sys/class/vfio-dev/vfioX */
24323c28a761SYi Liu 	vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
24333c28a761SYi Liu 	if (IS_ERR(vfio.device_class)) {
24343c28a761SYi Liu 		ret = PTR_ERR(vfio.device_class);
24353c28a761SYi Liu 		goto err_dev_class;
24363c28a761SYi Liu 	}
24373c28a761SYi Liu 
24380f3e72b5SJason Gunthorpe 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
24390f3e72b5SJason Gunthorpe 	if (ret)
24400f3e72b5SJason Gunthorpe 		goto err_alloc_chrdev;
24410f3e72b5SJason Gunthorpe 
24420f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
24430f3e72b5SJason Gunthorpe 	ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
24440f3e72b5SJason Gunthorpe #endif
24450f3e72b5SJason Gunthorpe 	if (ret)
24460f3e72b5SJason Gunthorpe 		goto err_driver_register;
24470f3e72b5SJason Gunthorpe 
24480f3e72b5SJason Gunthorpe 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
24490f3e72b5SJason Gunthorpe 	return 0;
24500f3e72b5SJason Gunthorpe 
24510f3e72b5SJason Gunthorpe err_driver_register:
24520f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
24530f3e72b5SJason Gunthorpe err_alloc_chrdev:
24543c28a761SYi Liu 	class_destroy(vfio.device_class);
24553c28a761SYi Liu 	vfio.device_class = NULL;
24563c28a761SYi Liu err_dev_class:
24570f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
24580f3e72b5SJason Gunthorpe 	vfio.class = NULL;
24593c28a761SYi Liu err_group_class:
24600f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
24610f3e72b5SJason Gunthorpe 	return ret;
24620f3e72b5SJason Gunthorpe }
24630f3e72b5SJason Gunthorpe 
24640f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void)
24650f3e72b5SJason Gunthorpe {
24660f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&vfio.group_list));
24670f3e72b5SJason Gunthorpe 
24680f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
24690f3e72b5SJason Gunthorpe 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
24700f3e72b5SJason Gunthorpe #endif
24713c28a761SYi Liu 	ida_destroy(&vfio.device_ida);
24720f3e72b5SJason Gunthorpe 	ida_destroy(&vfio.group_ida);
24730f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
24743c28a761SYi Liu 	class_destroy(vfio.device_class);
24753c28a761SYi Liu 	vfio.device_class = NULL;
24760f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
24770f3e72b5SJason Gunthorpe 	vfio.class = NULL;
24780f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
24790f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
24800f3e72b5SJason Gunthorpe }
24810f3e72b5SJason Gunthorpe 
24820f3e72b5SJason Gunthorpe module_init(vfio_init);
24830f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
24840f3e72b5SJason Gunthorpe 
24850f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
24860f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2");
24870f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR);
24880f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC);
24890f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
24900f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
24910f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2492