xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision cb9ff3f3)
10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
20f3e72b5SJason Gunthorpe /*
30f3e72b5SJason Gunthorpe  * VFIO core
40f3e72b5SJason Gunthorpe  *
50f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
60f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
70f3e72b5SJason Gunthorpe  *
80f3e72b5SJason Gunthorpe  * Derived from original vfio:
90f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
100f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
110f3e72b5SJason Gunthorpe  */
120f3e72b5SJason Gunthorpe 
130f3e72b5SJason Gunthorpe #include <linux/cdev.h>
140f3e72b5SJason Gunthorpe #include <linux/compat.h>
150f3e72b5SJason Gunthorpe #include <linux/device.h>
160f3e72b5SJason Gunthorpe #include <linux/file.h>
170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h>
180f3e72b5SJason Gunthorpe #include <linux/fs.h>
190f3e72b5SJason Gunthorpe #include <linux/idr.h>
200f3e72b5SJason Gunthorpe #include <linux/iommu.h>
210f3e72b5SJason Gunthorpe #include <linux/list.h>
220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
230f3e72b5SJason Gunthorpe #include <linux/module.h>
240f3e72b5SJason Gunthorpe #include <linux/mutex.h>
250f3e72b5SJason Gunthorpe #include <linux/pci.h>
260f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
270f3e72b5SJason Gunthorpe #include <linux/sched.h>
280f3e72b5SJason Gunthorpe #include <linux/slab.h>
290f3e72b5SJason Gunthorpe #include <linux/stat.h>
300f3e72b5SJason Gunthorpe #include <linux/string.h>
310f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
320f3e72b5SJason Gunthorpe #include <linux/vfio.h>
330f3e72b5SJason Gunthorpe #include <linux/wait.h>
340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
358e5c6995SAbhishek Sahu #include <linux/pm_runtime.h>
3680c4b92aSYishai Hadas #include <linux/interval_tree.h>
3780c4b92aSYishai Hadas #include <linux/iova_bitmap.h>
380f3e72b5SJason Gunthorpe #include "vfio.h"
390f3e72b5SJason Gunthorpe 
400f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
410f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
420f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
430f3e72b5SJason Gunthorpe 
440f3e72b5SJason Gunthorpe static struct vfio {
450f3e72b5SJason Gunthorpe 	struct class			*class;
460f3e72b5SJason Gunthorpe 	struct list_head		iommu_drivers_list;
470f3e72b5SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
480f3e72b5SJason Gunthorpe 	struct list_head		group_list;
490f3e72b5SJason Gunthorpe 	struct mutex			group_lock; /* locks group_list */
500f3e72b5SJason Gunthorpe 	struct ida			group_ida;
510f3e72b5SJason Gunthorpe 	dev_t				group_devt;
520f3e72b5SJason Gunthorpe } vfio;
530f3e72b5SJason Gunthorpe 
540f3e72b5SJason Gunthorpe struct vfio_iommu_driver {
550f3e72b5SJason Gunthorpe 	const struct vfio_iommu_driver_ops	*ops;
560f3e72b5SJason Gunthorpe 	struct list_head			vfio_next;
570f3e72b5SJason Gunthorpe };
580f3e72b5SJason Gunthorpe 
590f3e72b5SJason Gunthorpe struct vfio_container {
600f3e72b5SJason Gunthorpe 	struct kref			kref;
610f3e72b5SJason Gunthorpe 	struct list_head		group_list;
620f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_lock;
630f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
640f3e72b5SJason Gunthorpe 	void				*iommu_data;
650f3e72b5SJason Gunthorpe 	bool				noiommu;
660f3e72b5SJason Gunthorpe };
670f3e72b5SJason Gunthorpe 
680f3e72b5SJason Gunthorpe struct vfio_group {
690f3e72b5SJason Gunthorpe 	struct device 			dev;
700f3e72b5SJason Gunthorpe 	struct cdev			cdev;
710f3e72b5SJason Gunthorpe 	refcount_t			users;
720f3e72b5SJason Gunthorpe 	unsigned int			container_users;
730f3e72b5SJason Gunthorpe 	struct iommu_group		*iommu_group;
740f3e72b5SJason Gunthorpe 	struct vfio_container		*container;
750f3e72b5SJason Gunthorpe 	struct list_head		device_list;
760f3e72b5SJason Gunthorpe 	struct mutex			device_lock;
770f3e72b5SJason Gunthorpe 	struct list_head		vfio_next;
780f3e72b5SJason Gunthorpe 	struct list_head		container_next;
790f3e72b5SJason Gunthorpe 	enum vfio_group_type		type;
800f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_rwsem;
810f3e72b5SJason Gunthorpe 	struct kvm			*kvm;
820f3e72b5SJason Gunthorpe 	struct file			*opened_file;
830f3e72b5SJason Gunthorpe 	struct blocking_notifier_head	notifier;
840f3e72b5SJason Gunthorpe };
850f3e72b5SJason Gunthorpe 
860f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
870f3e72b5SJason Gunthorpe static bool noiommu __read_mostly;
880f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
890f3e72b5SJason Gunthorpe 		   noiommu, bool, S_IRUGO | S_IWUSR);
900f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
910f3e72b5SJason Gunthorpe #endif
920f3e72b5SJason Gunthorpe 
930f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
940f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops;
950f3e72b5SJason Gunthorpe 
960f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
970f3e72b5SJason Gunthorpe {
980f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
990f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
1000f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
1010f3e72b5SJason Gunthorpe 
1020f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
1030f3e72b5SJason Gunthorpe 		return -EINVAL;
1040f3e72b5SJason Gunthorpe 
1050f3e72b5SJason Gunthorpe 	/*
1060f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
1070f3e72b5SJason Gunthorpe 	 */
1080f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1090f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
1100f3e72b5SJason Gunthorpe 	if (dev_set)
1110f3e72b5SJason Gunthorpe 		goto found_get_ref;
1120f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1130f3e72b5SJason Gunthorpe 
1140f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
1150f3e72b5SJason Gunthorpe 	if (!new_dev_set)
1160f3e72b5SJason Gunthorpe 		return -ENOMEM;
1170f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
1180f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
1190f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
1200f3e72b5SJason Gunthorpe 
1210f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1220f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
1230f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
1240f3e72b5SJason Gunthorpe 	if (!dev_set) {
1250f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
1260f3e72b5SJason Gunthorpe 		goto found_get_ref;
1270f3e72b5SJason Gunthorpe 	}
1280f3e72b5SJason Gunthorpe 
1290f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
1300f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
1310f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
1320f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
1330f3e72b5SJason Gunthorpe 	}
1340f3e72b5SJason Gunthorpe 
1350f3e72b5SJason Gunthorpe found_get_ref:
1360f3e72b5SJason Gunthorpe 	dev_set->device_count++;
1370f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1380f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1390f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
1400f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
1410f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1420f3e72b5SJason Gunthorpe 	return 0;
1430f3e72b5SJason Gunthorpe }
1440f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
1450f3e72b5SJason Gunthorpe 
1460f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
1470f3e72b5SJason Gunthorpe {
1480f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
1490f3e72b5SJason Gunthorpe 
1500f3e72b5SJason Gunthorpe 	if (!dev_set)
1510f3e72b5SJason Gunthorpe 		return;
1520f3e72b5SJason Gunthorpe 
1530f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1540f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
1550f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1560f3e72b5SJason Gunthorpe 
1570f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1580f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
1590f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
1600f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
1610f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
1620f3e72b5SJason Gunthorpe 		kfree(dev_set);
1630f3e72b5SJason Gunthorpe 	}
1640f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1650f3e72b5SJason Gunthorpe }
1660f3e72b5SJason Gunthorpe 
1670f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
1680f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
1690f3e72b5SJason Gunthorpe {
1700f3e72b5SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
1710f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
1720f3e72b5SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
1730f3e72b5SJason Gunthorpe 		return ERR_PTR(-EPERM);
1740f3e72b5SJason Gunthorpe 
1750f3e72b5SJason Gunthorpe 	return NULL;
1760f3e72b5SJason Gunthorpe }
1770f3e72b5SJason Gunthorpe 
1780f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
1790f3e72b5SJason Gunthorpe {
1800f3e72b5SJason Gunthorpe }
1810f3e72b5SJason Gunthorpe 
1820f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
1830f3e72b5SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
1840f3e72b5SJason Gunthorpe {
1850f3e72b5SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
1860f3e72b5SJason Gunthorpe 		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
1870f3e72b5SJason Gunthorpe 
1880f3e72b5SJason Gunthorpe 	return -ENOTTY;
1890f3e72b5SJason Gunthorpe }
1900f3e72b5SJason Gunthorpe 
1910f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
1920f3e72b5SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
1930f3e72b5SJason Gunthorpe {
1940f3e72b5SJason Gunthorpe 	return 0;
1950f3e72b5SJason Gunthorpe }
1960f3e72b5SJason Gunthorpe 
1970f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
1980f3e72b5SJason Gunthorpe 				      struct iommu_group *iommu_group)
1990f3e72b5SJason Gunthorpe {
2000f3e72b5SJason Gunthorpe }
2010f3e72b5SJason Gunthorpe 
2020f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
2030f3e72b5SJason Gunthorpe 	.name = "vfio-noiommu",
2040f3e72b5SJason Gunthorpe 	.owner = THIS_MODULE,
2050f3e72b5SJason Gunthorpe 	.open = vfio_noiommu_open,
2060f3e72b5SJason Gunthorpe 	.release = vfio_noiommu_release,
2070f3e72b5SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
2080f3e72b5SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
2090f3e72b5SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
2100f3e72b5SJason Gunthorpe };
2110f3e72b5SJason Gunthorpe 
2120f3e72b5SJason Gunthorpe /*
2130f3e72b5SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
2140f3e72b5SJason Gunthorpe  * use vfio-noiommu.
2150f3e72b5SJason Gunthorpe  */
2160f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2170f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2180f3e72b5SJason Gunthorpe {
2190f3e72b5SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
2200f3e72b5SJason Gunthorpe }
2210f3e72b5SJason Gunthorpe #else
2220f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
2230f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
2240f3e72b5SJason Gunthorpe {
2250f3e72b5SJason Gunthorpe 	return true;
2260f3e72b5SJason Gunthorpe }
2270f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */
2280f3e72b5SJason Gunthorpe 
2290f3e72b5SJason Gunthorpe /*
2300f3e72b5SJason Gunthorpe  * IOMMU driver registration
2310f3e72b5SJason Gunthorpe  */
2320f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2330f3e72b5SJason Gunthorpe {
2340f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
2350f3e72b5SJason Gunthorpe 
2360f3e72b5SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
2370f3e72b5SJason Gunthorpe 		return -EINVAL;
2380f3e72b5SJason Gunthorpe 
2390f3e72b5SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
2400f3e72b5SJason Gunthorpe 	if (!driver)
2410f3e72b5SJason Gunthorpe 		return -ENOMEM;
2420f3e72b5SJason Gunthorpe 
2430f3e72b5SJason Gunthorpe 	driver->ops = ops;
2440f3e72b5SJason Gunthorpe 
2450f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2460f3e72b5SJason Gunthorpe 
2470f3e72b5SJason Gunthorpe 	/* Check for duplicates */
2480f3e72b5SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
2490f3e72b5SJason Gunthorpe 		if (tmp->ops == ops) {
2500f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2510f3e72b5SJason Gunthorpe 			kfree(driver);
2520f3e72b5SJason Gunthorpe 			return -EINVAL;
2530f3e72b5SJason Gunthorpe 		}
2540f3e72b5SJason Gunthorpe 	}
2550f3e72b5SJason Gunthorpe 
2560f3e72b5SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
2570f3e72b5SJason Gunthorpe 
2580f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2590f3e72b5SJason Gunthorpe 
2600f3e72b5SJason Gunthorpe 	return 0;
2610f3e72b5SJason Gunthorpe }
2620f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
2630f3e72b5SJason Gunthorpe 
2640f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
2650f3e72b5SJason Gunthorpe {
2660f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
2670f3e72b5SJason Gunthorpe 
2680f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
2690f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
2700f3e72b5SJason Gunthorpe 		if (driver->ops == ops) {
2710f3e72b5SJason Gunthorpe 			list_del(&driver->vfio_next);
2720f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
2730f3e72b5SJason Gunthorpe 			kfree(driver);
2740f3e72b5SJason Gunthorpe 			return;
2750f3e72b5SJason Gunthorpe 		}
2760f3e72b5SJason Gunthorpe 	}
2770f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
2780f3e72b5SJason Gunthorpe }
2790f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
2800f3e72b5SJason Gunthorpe 
2810f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group);
2820f3e72b5SJason Gunthorpe 
2830f3e72b5SJason Gunthorpe /*
2840f3e72b5SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
2850f3e72b5SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
2860f3e72b5SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
2870f3e72b5SJason Gunthorpe  * closed in any order.
2880f3e72b5SJason Gunthorpe  */
2890f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
2900f3e72b5SJason Gunthorpe {
2910f3e72b5SJason Gunthorpe 	kref_get(&container->kref);
2920f3e72b5SJason Gunthorpe }
2930f3e72b5SJason Gunthorpe 
2940f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref)
2950f3e72b5SJason Gunthorpe {
2960f3e72b5SJason Gunthorpe 	struct vfio_container *container;
2970f3e72b5SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
2980f3e72b5SJason Gunthorpe 
2990f3e72b5SJason Gunthorpe 	kfree(container);
3000f3e72b5SJason Gunthorpe }
3010f3e72b5SJason Gunthorpe 
3020f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
3030f3e72b5SJason Gunthorpe {
3040f3e72b5SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
3050f3e72b5SJason Gunthorpe }
3060f3e72b5SJason Gunthorpe 
3070f3e72b5SJason Gunthorpe /*
3080f3e72b5SJason Gunthorpe  * Group objects - create, release, get, put, search
3090f3e72b5SJason Gunthorpe  */
3100f3e72b5SJason Gunthorpe static struct vfio_group *
3110f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3120f3e72b5SJason Gunthorpe {
3130f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3140f3e72b5SJason Gunthorpe 
3150f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
3160f3e72b5SJason Gunthorpe 		if (group->iommu_group == iommu_group) {
3170f3e72b5SJason Gunthorpe 			vfio_group_get(group);
3180f3e72b5SJason Gunthorpe 			return group;
3190f3e72b5SJason Gunthorpe 		}
3200f3e72b5SJason Gunthorpe 	}
3210f3e72b5SJason Gunthorpe 	return NULL;
3220f3e72b5SJason Gunthorpe }
3230f3e72b5SJason Gunthorpe 
3240f3e72b5SJason Gunthorpe static struct vfio_group *
3250f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group)
3260f3e72b5SJason Gunthorpe {
3270f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3280f3e72b5SJason Gunthorpe 
3290f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
3300f3e72b5SJason Gunthorpe 	group = __vfio_group_get_from_iommu(iommu_group);
3310f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
3320f3e72b5SJason Gunthorpe 	return group;
3330f3e72b5SJason Gunthorpe }
3340f3e72b5SJason Gunthorpe 
3350f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev)
3360f3e72b5SJason Gunthorpe {
3370f3e72b5SJason Gunthorpe 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
3380f3e72b5SJason Gunthorpe 
3390f3e72b5SJason Gunthorpe 	mutex_destroy(&group->device_lock);
3400f3e72b5SJason Gunthorpe 	iommu_group_put(group->iommu_group);
3410f3e72b5SJason Gunthorpe 	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
3420f3e72b5SJason Gunthorpe 	kfree(group);
3430f3e72b5SJason Gunthorpe }
3440f3e72b5SJason Gunthorpe 
3450f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
3460f3e72b5SJason Gunthorpe 					   enum vfio_group_type type)
3470f3e72b5SJason Gunthorpe {
3480f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3490f3e72b5SJason Gunthorpe 	int minor;
3500f3e72b5SJason Gunthorpe 
3510f3e72b5SJason Gunthorpe 	group = kzalloc(sizeof(*group), GFP_KERNEL);
3520f3e72b5SJason Gunthorpe 	if (!group)
3530f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
3540f3e72b5SJason Gunthorpe 
3550f3e72b5SJason Gunthorpe 	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
3560f3e72b5SJason Gunthorpe 	if (minor < 0) {
3570f3e72b5SJason Gunthorpe 		kfree(group);
3580f3e72b5SJason Gunthorpe 		return ERR_PTR(minor);
3590f3e72b5SJason Gunthorpe 	}
3600f3e72b5SJason Gunthorpe 
3610f3e72b5SJason Gunthorpe 	device_initialize(&group->dev);
3620f3e72b5SJason Gunthorpe 	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
3630f3e72b5SJason Gunthorpe 	group->dev.class = vfio.class;
3640f3e72b5SJason Gunthorpe 	group->dev.release = vfio_group_release;
3650f3e72b5SJason Gunthorpe 	cdev_init(&group->cdev, &vfio_group_fops);
3660f3e72b5SJason Gunthorpe 	group->cdev.owner = THIS_MODULE;
3670f3e72b5SJason Gunthorpe 
3680f3e72b5SJason Gunthorpe 	refcount_set(&group->users, 1);
3690f3e72b5SJason Gunthorpe 	init_rwsem(&group->group_rwsem);
3700f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&group->device_list);
3710f3e72b5SJason Gunthorpe 	mutex_init(&group->device_lock);
3720f3e72b5SJason Gunthorpe 	group->iommu_group = iommu_group;
3730f3e72b5SJason Gunthorpe 	/* put in vfio_group_release() */
3740f3e72b5SJason Gunthorpe 	iommu_group_ref_get(iommu_group);
3750f3e72b5SJason Gunthorpe 	group->type = type;
3760f3e72b5SJason Gunthorpe 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
3770f3e72b5SJason Gunthorpe 
3780f3e72b5SJason Gunthorpe 	return group;
3790f3e72b5SJason Gunthorpe }
3800f3e72b5SJason Gunthorpe 
3810f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
3820f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
3830f3e72b5SJason Gunthorpe {
3840f3e72b5SJason Gunthorpe 	struct vfio_group *group;
3850f3e72b5SJason Gunthorpe 	struct vfio_group *ret;
3860f3e72b5SJason Gunthorpe 	int err;
3870f3e72b5SJason Gunthorpe 
3880f3e72b5SJason Gunthorpe 	group = vfio_group_alloc(iommu_group, type);
3890f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
3900f3e72b5SJason Gunthorpe 		return group;
3910f3e72b5SJason Gunthorpe 
3920f3e72b5SJason Gunthorpe 	err = dev_set_name(&group->dev, "%s%d",
3930f3e72b5SJason Gunthorpe 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
3940f3e72b5SJason Gunthorpe 			   iommu_group_id(iommu_group));
3950f3e72b5SJason Gunthorpe 	if (err) {
3960f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
3970f3e72b5SJason Gunthorpe 		goto err_put;
3980f3e72b5SJason Gunthorpe 	}
3990f3e72b5SJason Gunthorpe 
4000f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
4010f3e72b5SJason Gunthorpe 
4020f3e72b5SJason Gunthorpe 	/* Did we race creating this group? */
4030f3e72b5SJason Gunthorpe 	ret = __vfio_group_get_from_iommu(iommu_group);
4040f3e72b5SJason Gunthorpe 	if (ret)
4050f3e72b5SJason Gunthorpe 		goto err_unlock;
4060f3e72b5SJason Gunthorpe 
4070f3e72b5SJason Gunthorpe 	err = cdev_device_add(&group->cdev, &group->dev);
4080f3e72b5SJason Gunthorpe 	if (err) {
4090f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
4100f3e72b5SJason Gunthorpe 		goto err_unlock;
4110f3e72b5SJason Gunthorpe 	}
4120f3e72b5SJason Gunthorpe 
4130f3e72b5SJason Gunthorpe 	list_add(&group->vfio_next, &vfio.group_list);
4140f3e72b5SJason Gunthorpe 
4150f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4160f3e72b5SJason Gunthorpe 	return group;
4170f3e72b5SJason Gunthorpe 
4180f3e72b5SJason Gunthorpe err_unlock:
4190f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4200f3e72b5SJason Gunthorpe err_put:
4210f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4220f3e72b5SJason Gunthorpe 	return ret;
4230f3e72b5SJason Gunthorpe }
4240f3e72b5SJason Gunthorpe 
4250f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group)
4260f3e72b5SJason Gunthorpe {
4270f3e72b5SJason Gunthorpe 	if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
4280f3e72b5SJason Gunthorpe 		return;
4290f3e72b5SJason Gunthorpe 
4300f3e72b5SJason Gunthorpe 	/*
4310f3e72b5SJason Gunthorpe 	 * These data structures all have paired operations that can only be
4320f3e72b5SJason Gunthorpe 	 * undone when the caller holds a live reference on the group. Since all
4330f3e72b5SJason Gunthorpe 	 * pairs must be undone these WARN_ON's indicate some caller did not
4340f3e72b5SJason Gunthorpe 	 * properly hold the group reference.
4350f3e72b5SJason Gunthorpe 	 */
4360f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&group->device_list));
4370f3e72b5SJason Gunthorpe 	WARN_ON(group->container || group->container_users);
4380f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
4390f3e72b5SJason Gunthorpe 
4400f3e72b5SJason Gunthorpe 	list_del(&group->vfio_next);
4410f3e72b5SJason Gunthorpe 	cdev_device_del(&group->cdev, &group->dev);
4420f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
4430f3e72b5SJason Gunthorpe 
4440f3e72b5SJason Gunthorpe 	put_device(&group->dev);
4450f3e72b5SJason Gunthorpe }
4460f3e72b5SJason Gunthorpe 
4470f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group)
4480f3e72b5SJason Gunthorpe {
4490f3e72b5SJason Gunthorpe 	refcount_inc(&group->users);
4500f3e72b5SJason Gunthorpe }
4510f3e72b5SJason Gunthorpe 
4520f3e72b5SJason Gunthorpe /*
4530f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
4540f3e72b5SJason Gunthorpe  */
4550f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
4560f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device)
4570f3e72b5SJason Gunthorpe {
4580f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
4590f3e72b5SJason Gunthorpe 		complete(&device->comp);
4600f3e72b5SJason Gunthorpe }
4610f3e72b5SJason Gunthorpe 
4620f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device)
4630f3e72b5SJason Gunthorpe {
4640f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
4650f3e72b5SJason Gunthorpe }
4660f3e72b5SJason Gunthorpe 
4670f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
4680f3e72b5SJason Gunthorpe 						 struct device *dev)
4690f3e72b5SJason Gunthorpe {
4700f3e72b5SJason Gunthorpe 	struct vfio_device *device;
4710f3e72b5SJason Gunthorpe 
4720f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
4730f3e72b5SJason Gunthorpe 	list_for_each_entry(device, &group->device_list, group_next) {
4740f3e72b5SJason Gunthorpe 		if (device->dev == dev && vfio_device_try_get(device)) {
4750f3e72b5SJason Gunthorpe 			mutex_unlock(&group->device_lock);
4760f3e72b5SJason Gunthorpe 			return device;
4770f3e72b5SJason Gunthorpe 		}
4780f3e72b5SJason Gunthorpe 	}
4790f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
4800f3e72b5SJason Gunthorpe 	return NULL;
4810f3e72b5SJason Gunthorpe }
4820f3e72b5SJason Gunthorpe 
4830f3e72b5SJason Gunthorpe /*
4840f3e72b5SJason Gunthorpe  * VFIO driver API
4850f3e72b5SJason Gunthorpe  */
4860f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
4870f3e72b5SJason Gunthorpe 			 const struct vfio_device_ops *ops)
4880f3e72b5SJason Gunthorpe {
4890f3e72b5SJason Gunthorpe 	init_completion(&device->comp);
4900f3e72b5SJason Gunthorpe 	device->dev = dev;
4910f3e72b5SJason Gunthorpe 	device->ops = ops;
4920f3e72b5SJason Gunthorpe }
4930f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev);
4940f3e72b5SJason Gunthorpe 
4950f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device)
4960f3e72b5SJason Gunthorpe {
4970f3e72b5SJason Gunthorpe 	vfio_release_device_set(device);
4980f3e72b5SJason Gunthorpe }
4990f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
5000f3e72b5SJason Gunthorpe 
501*cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */
502*cb9ff3f3SKevin Tian void vfio_device_release(struct kref *kref)
503*cb9ff3f3SKevin Tian {
504*cb9ff3f3SKevin Tian 	struct vfio_device *device =
505*cb9ff3f3SKevin Tian 			container_of(kref, struct vfio_device, kref);
506*cb9ff3f3SKevin Tian 
507*cb9ff3f3SKevin Tian 	vfio_uninit_group_dev(device);
508*cb9ff3f3SKevin Tian 
509*cb9ff3f3SKevin Tian 	/*
510*cb9ff3f3SKevin Tian 	 * kvfree() cannot be done here due to a life cycle mess in
511*cb9ff3f3SKevin Tian 	 * vfio-ccw. Before the ccw part is fixed all drivers are
512*cb9ff3f3SKevin Tian 	 * required to support @release and call vfio_free_device()
513*cb9ff3f3SKevin Tian 	 * from there.
514*cb9ff3f3SKevin Tian 	 */
515*cb9ff3f3SKevin Tian 	device->ops->release(device);
516*cb9ff3f3SKevin Tian }
517*cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_device_release);
518*cb9ff3f3SKevin Tian 
519*cb9ff3f3SKevin Tian /*
520*cb9ff3f3SKevin Tian  * Allocate and initialize vfio_device so it can be registered to vfio
521*cb9ff3f3SKevin Tian  * core.
522*cb9ff3f3SKevin Tian  *
523*cb9ff3f3SKevin Tian  * Drivers should use the wrapper vfio_alloc_device() for allocation.
524*cb9ff3f3SKevin Tian  * @size is the size of the structure to be allocated, including any
525*cb9ff3f3SKevin Tian  * private data used by the driver.
526*cb9ff3f3SKevin Tian  *
527*cb9ff3f3SKevin Tian  * Driver may provide an @init callback to cover device private data.
528*cb9ff3f3SKevin Tian  *
529*cb9ff3f3SKevin Tian  * Use vfio_put_device() to release the structure after success return.
530*cb9ff3f3SKevin Tian  */
531*cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
532*cb9ff3f3SKevin Tian 				       const struct vfio_device_ops *ops)
533*cb9ff3f3SKevin Tian {
534*cb9ff3f3SKevin Tian 	struct vfio_device *device;
535*cb9ff3f3SKevin Tian 	int ret;
536*cb9ff3f3SKevin Tian 
537*cb9ff3f3SKevin Tian 	if (WARN_ON(size < sizeof(struct vfio_device)))
538*cb9ff3f3SKevin Tian 		return ERR_PTR(-EINVAL);
539*cb9ff3f3SKevin Tian 
540*cb9ff3f3SKevin Tian 	device = kvzalloc(size, GFP_KERNEL);
541*cb9ff3f3SKevin Tian 	if (!device)
542*cb9ff3f3SKevin Tian 		return ERR_PTR(-ENOMEM);
543*cb9ff3f3SKevin Tian 
544*cb9ff3f3SKevin Tian 	ret = vfio_init_device(device, dev, ops);
545*cb9ff3f3SKevin Tian 	if (ret)
546*cb9ff3f3SKevin Tian 		goto out_free;
547*cb9ff3f3SKevin Tian 	return device;
548*cb9ff3f3SKevin Tian 
549*cb9ff3f3SKevin Tian out_free:
550*cb9ff3f3SKevin Tian 	kvfree(device);
551*cb9ff3f3SKevin Tian 	return ERR_PTR(ret);
552*cb9ff3f3SKevin Tian }
553*cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device);
554*cb9ff3f3SKevin Tian 
555*cb9ff3f3SKevin Tian /*
556*cb9ff3f3SKevin Tian  * Initialize a vfio_device so it can be registered to vfio core.
557*cb9ff3f3SKevin Tian  *
558*cb9ff3f3SKevin Tian  * Only vfio-ccw driver should call this interface.
559*cb9ff3f3SKevin Tian  */
560*cb9ff3f3SKevin Tian int vfio_init_device(struct vfio_device *device, struct device *dev,
561*cb9ff3f3SKevin Tian 		     const struct vfio_device_ops *ops)
562*cb9ff3f3SKevin Tian {
563*cb9ff3f3SKevin Tian 	int ret;
564*cb9ff3f3SKevin Tian 
565*cb9ff3f3SKevin Tian 	vfio_init_group_dev(device, dev, ops);
566*cb9ff3f3SKevin Tian 
567*cb9ff3f3SKevin Tian 	if (ops->init) {
568*cb9ff3f3SKevin Tian 		ret = ops->init(device);
569*cb9ff3f3SKevin Tian 		if (ret)
570*cb9ff3f3SKevin Tian 			goto out_uninit;
571*cb9ff3f3SKevin Tian 	}
572*cb9ff3f3SKevin Tian 
573*cb9ff3f3SKevin Tian 	kref_init(&device->kref);
574*cb9ff3f3SKevin Tian 	return 0;
575*cb9ff3f3SKevin Tian 
576*cb9ff3f3SKevin Tian out_uninit:
577*cb9ff3f3SKevin Tian 	vfio_uninit_group_dev(device);
578*cb9ff3f3SKevin Tian 	return ret;
579*cb9ff3f3SKevin Tian }
580*cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_init_device);
581*cb9ff3f3SKevin Tian 
582*cb9ff3f3SKevin Tian /*
583*cb9ff3f3SKevin Tian  * The helper called by driver @release callback to free the device
584*cb9ff3f3SKevin Tian  * structure. Drivers which don't have private data to clean can
585*cb9ff3f3SKevin Tian  * simply use this helper as its @release.
586*cb9ff3f3SKevin Tian  */
587*cb9ff3f3SKevin Tian void vfio_free_device(struct vfio_device *device)
588*cb9ff3f3SKevin Tian {
589*cb9ff3f3SKevin Tian 	kvfree(device);
590*cb9ff3f3SKevin Tian }
591*cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_free_device);
592*cb9ff3f3SKevin Tian 
5930f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
5940f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
5950f3e72b5SJason Gunthorpe {
5960f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
5970f3e72b5SJason Gunthorpe 	struct vfio_group *group;
5980f3e72b5SJason Gunthorpe 	int ret;
5990f3e72b5SJason Gunthorpe 
6000f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_alloc();
6010f3e72b5SJason Gunthorpe 	if (IS_ERR(iommu_group))
6020f3e72b5SJason Gunthorpe 		return ERR_CAST(iommu_group);
6030f3e72b5SJason Gunthorpe 
6040f3e72b5SJason Gunthorpe 	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
6050f3e72b5SJason Gunthorpe 	if (ret)
6060f3e72b5SJason Gunthorpe 		goto out_put_group;
6070f3e72b5SJason Gunthorpe 	ret = iommu_group_add_device(iommu_group, dev);
6080f3e72b5SJason Gunthorpe 	if (ret)
6090f3e72b5SJason Gunthorpe 		goto out_put_group;
6100f3e72b5SJason Gunthorpe 
6110f3e72b5SJason Gunthorpe 	group = vfio_create_group(iommu_group, type);
6120f3e72b5SJason Gunthorpe 	if (IS_ERR(group)) {
6130f3e72b5SJason Gunthorpe 		ret = PTR_ERR(group);
6140f3e72b5SJason Gunthorpe 		goto out_remove_device;
6150f3e72b5SJason Gunthorpe 	}
6160f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
6170f3e72b5SJason Gunthorpe 	return group;
6180f3e72b5SJason Gunthorpe 
6190f3e72b5SJason Gunthorpe out_remove_device:
6200f3e72b5SJason Gunthorpe 	iommu_group_remove_device(dev);
6210f3e72b5SJason Gunthorpe out_put_group:
6220f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
6230f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
6240f3e72b5SJason Gunthorpe }
6250f3e72b5SJason Gunthorpe 
6260f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
6270f3e72b5SJason Gunthorpe {
6280f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
6290f3e72b5SJason Gunthorpe 	struct vfio_group *group;
6300f3e72b5SJason Gunthorpe 
6310f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_get(dev);
6320f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
6330f3e72b5SJason Gunthorpe 	if (!iommu_group && noiommu) {
6340f3e72b5SJason Gunthorpe 		/*
6350f3e72b5SJason Gunthorpe 		 * With noiommu enabled, create an IOMMU group for devices that
6360f3e72b5SJason Gunthorpe 		 * don't already have one, implying no IOMMU hardware/driver
6370f3e72b5SJason Gunthorpe 		 * exists.  Taint the kernel because we're about to give a DMA
6380f3e72b5SJason Gunthorpe 		 * capable device to a user without IOMMU protection.
6390f3e72b5SJason Gunthorpe 		 */
6400f3e72b5SJason Gunthorpe 		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
6410f3e72b5SJason Gunthorpe 		if (!IS_ERR(group)) {
6420f3e72b5SJason Gunthorpe 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
6430f3e72b5SJason Gunthorpe 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
6440f3e72b5SJason Gunthorpe 		}
6450f3e72b5SJason Gunthorpe 		return group;
6460f3e72b5SJason Gunthorpe 	}
6470f3e72b5SJason Gunthorpe #endif
6480f3e72b5SJason Gunthorpe 	if (!iommu_group)
6490f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
6500f3e72b5SJason Gunthorpe 
6510f3e72b5SJason Gunthorpe 	/*
6520f3e72b5SJason Gunthorpe 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
6530f3e72b5SJason Gunthorpe 	 * restore cache coherency. It has to be checked here because it is only
6540f3e72b5SJason Gunthorpe 	 * valid for cases where we are using iommu groups.
6550f3e72b5SJason Gunthorpe 	 */
6560f3e72b5SJason Gunthorpe 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
6570f3e72b5SJason Gunthorpe 		iommu_group_put(iommu_group);
6580f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
6590f3e72b5SJason Gunthorpe 	}
6600f3e72b5SJason Gunthorpe 
6610f3e72b5SJason Gunthorpe 	group = vfio_group_get_from_iommu(iommu_group);
6620f3e72b5SJason Gunthorpe 	if (!group)
6630f3e72b5SJason Gunthorpe 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
6640f3e72b5SJason Gunthorpe 
6650f3e72b5SJason Gunthorpe 	/* The vfio_group holds a reference to the iommu_group */
6660f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
6670f3e72b5SJason Gunthorpe 	return group;
6680f3e72b5SJason Gunthorpe }
6690f3e72b5SJason Gunthorpe 
6700f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device,
6710f3e72b5SJason Gunthorpe 		struct vfio_group *group)
6720f3e72b5SJason Gunthorpe {
6730f3e72b5SJason Gunthorpe 	struct vfio_device *existing_device;
6740f3e72b5SJason Gunthorpe 
6750f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
6760f3e72b5SJason Gunthorpe 		return PTR_ERR(group);
6770f3e72b5SJason Gunthorpe 
6780f3e72b5SJason Gunthorpe 	/*
6790f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
6800f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
6810f3e72b5SJason Gunthorpe 	 */
6820f3e72b5SJason Gunthorpe 	if (!device->dev_set)
6830f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
6840f3e72b5SJason Gunthorpe 
6850f3e72b5SJason Gunthorpe 	existing_device = vfio_group_get_device(group, device->dev);
6860f3e72b5SJason Gunthorpe 	if (existing_device) {
6870f3e72b5SJason Gunthorpe 		dev_WARN(device->dev, "Device already exists on group %d\n",
6880f3e72b5SJason Gunthorpe 			 iommu_group_id(group->iommu_group));
6890f3e72b5SJason Gunthorpe 		vfio_device_put(existing_device);
6900f3e72b5SJason Gunthorpe 		if (group->type == VFIO_NO_IOMMU ||
6910f3e72b5SJason Gunthorpe 		    group->type == VFIO_EMULATED_IOMMU)
6920f3e72b5SJason Gunthorpe 			iommu_group_remove_device(device->dev);
6930f3e72b5SJason Gunthorpe 		vfio_group_put(group);
6940f3e72b5SJason Gunthorpe 		return -EBUSY;
6950f3e72b5SJason Gunthorpe 	}
6960f3e72b5SJason Gunthorpe 
6970f3e72b5SJason Gunthorpe 	/* Our reference on group is moved to the device */
6980f3e72b5SJason Gunthorpe 	device->group = group;
6990f3e72b5SJason Gunthorpe 
7000f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
7010f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
7020f3e72b5SJason Gunthorpe 
7030f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
7040f3e72b5SJason Gunthorpe 	list_add(&device->group_next, &group->device_list);
7050f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7060f3e72b5SJason Gunthorpe 
7070f3e72b5SJason Gunthorpe 	return 0;
7080f3e72b5SJason Gunthorpe }
7090f3e72b5SJason Gunthorpe 
7100f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
7110f3e72b5SJason Gunthorpe {
7120f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
7130f3e72b5SJason Gunthorpe 		vfio_group_find_or_alloc(device->dev));
7140f3e72b5SJason Gunthorpe }
7150f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
7160f3e72b5SJason Gunthorpe 
7170f3e72b5SJason Gunthorpe /*
7180f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
7190f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
7200f3e72b5SJason Gunthorpe  */
7210f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
7220f3e72b5SJason Gunthorpe {
7230f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
7240f3e72b5SJason Gunthorpe 		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
7250f3e72b5SJason Gunthorpe }
7260f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
7270f3e72b5SJason Gunthorpe 
7280f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
7290f3e72b5SJason Gunthorpe 						     char *buf)
7300f3e72b5SJason Gunthorpe {
7310f3e72b5SJason Gunthorpe 	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
7320f3e72b5SJason Gunthorpe 
7330f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
7340f3e72b5SJason Gunthorpe 	list_for_each_entry(it, &group->device_list, group_next) {
7350f3e72b5SJason Gunthorpe 		int ret;
7360f3e72b5SJason Gunthorpe 
7370f3e72b5SJason Gunthorpe 		if (it->ops->match) {
7380f3e72b5SJason Gunthorpe 			ret = it->ops->match(it, buf);
7390f3e72b5SJason Gunthorpe 			if (ret < 0) {
7400f3e72b5SJason Gunthorpe 				device = ERR_PTR(ret);
7410f3e72b5SJason Gunthorpe 				break;
7420f3e72b5SJason Gunthorpe 			}
7430f3e72b5SJason Gunthorpe 		} else {
7440f3e72b5SJason Gunthorpe 			ret = !strcmp(dev_name(it->dev), buf);
7450f3e72b5SJason Gunthorpe 		}
7460f3e72b5SJason Gunthorpe 
7470f3e72b5SJason Gunthorpe 		if (ret && vfio_device_try_get(it)) {
7480f3e72b5SJason Gunthorpe 			device = it;
7490f3e72b5SJason Gunthorpe 			break;
7500f3e72b5SJason Gunthorpe 		}
7510f3e72b5SJason Gunthorpe 	}
7520f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7530f3e72b5SJason Gunthorpe 
7540f3e72b5SJason Gunthorpe 	return device;
7550f3e72b5SJason Gunthorpe }
7560f3e72b5SJason Gunthorpe 
7570f3e72b5SJason Gunthorpe /*
7580f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
7590f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
7600f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
7610f3e72b5SJason Gunthorpe {
7620f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
7630f3e72b5SJason Gunthorpe 	unsigned int i = 0;
7640f3e72b5SJason Gunthorpe 	bool interrupted = false;
7650f3e72b5SJason Gunthorpe 	long rc;
7660f3e72b5SJason Gunthorpe 
7670f3e72b5SJason Gunthorpe 	vfio_device_put(device);
7680f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
7690f3e72b5SJason Gunthorpe 	while (rc <= 0) {
7700f3e72b5SJason Gunthorpe 		if (device->ops->request)
7710f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
7720f3e72b5SJason Gunthorpe 
7730f3e72b5SJason Gunthorpe 		if (interrupted) {
7740f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
7750f3e72b5SJason Gunthorpe 							 HZ * 10);
7760f3e72b5SJason Gunthorpe 		} else {
7770f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
7780f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
7790f3e72b5SJason Gunthorpe 			if (rc < 0) {
7800f3e72b5SJason Gunthorpe 				interrupted = true;
7810f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
7820f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
7830f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
7840f3e72b5SJason Gunthorpe 					 "blocked until device is released",
7850f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
7860f3e72b5SJason Gunthorpe 			}
7870f3e72b5SJason Gunthorpe 		}
7880f3e72b5SJason Gunthorpe 	}
7890f3e72b5SJason Gunthorpe 
7900f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
7910f3e72b5SJason Gunthorpe 	list_del(&device->group_next);
7920f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
7930f3e72b5SJason Gunthorpe 
7940f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
7950f3e72b5SJason Gunthorpe 		iommu_group_remove_device(device->dev);
7960f3e72b5SJason Gunthorpe 
7970f3e72b5SJason Gunthorpe 	/* Matches the get in vfio_register_group_dev() */
7980f3e72b5SJason Gunthorpe 	vfio_group_put(group);
7990f3e72b5SJason Gunthorpe }
8000f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
8010f3e72b5SJason Gunthorpe 
8020f3e72b5SJason Gunthorpe /*
8030f3e72b5SJason Gunthorpe  * VFIO base fd, /dev/vfio/vfio
8040f3e72b5SJason Gunthorpe  */
8050f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container,
8060f3e72b5SJason Gunthorpe 				       unsigned long arg)
8070f3e72b5SJason Gunthorpe {
8080f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
8090f3e72b5SJason Gunthorpe 	long ret = 0;
8100f3e72b5SJason Gunthorpe 
8110f3e72b5SJason Gunthorpe 	down_read(&container->group_lock);
8120f3e72b5SJason Gunthorpe 
8130f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
8140f3e72b5SJason Gunthorpe 
8150f3e72b5SJason Gunthorpe 	switch (arg) {
8160f3e72b5SJason Gunthorpe 		/* No base extensions yet */
8170f3e72b5SJason Gunthorpe 	default:
8180f3e72b5SJason Gunthorpe 		/*
8190f3e72b5SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
8200f3e72b5SJason Gunthorpe 		 * extensions and return the first positive result.  If
8210f3e72b5SJason Gunthorpe 		 * a driver is already set, further queries will be passed
8220f3e72b5SJason Gunthorpe 		 * only to that driver.
8230f3e72b5SJason Gunthorpe 		 */
8240f3e72b5SJason Gunthorpe 		if (!driver) {
8250f3e72b5SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
8260f3e72b5SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
8270f3e72b5SJason Gunthorpe 					    vfio_next) {
8280f3e72b5SJason Gunthorpe 
8290f3e72b5SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
8300f3e72b5SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
8310f3e72b5SJason Gunthorpe 							       driver))
8320f3e72b5SJason Gunthorpe 					continue;
8330f3e72b5SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
8340f3e72b5SJason Gunthorpe 					continue;
8350f3e72b5SJason Gunthorpe 
8360f3e72b5SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
8370f3e72b5SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
8380f3e72b5SJason Gunthorpe 							 arg);
8390f3e72b5SJason Gunthorpe 				module_put(driver->ops->owner);
8400f3e72b5SJason Gunthorpe 				if (ret > 0)
8410f3e72b5SJason Gunthorpe 					break;
8420f3e72b5SJason Gunthorpe 			}
8430f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
8440f3e72b5SJason Gunthorpe 		} else
8450f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
8460f3e72b5SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
8470f3e72b5SJason Gunthorpe 	}
8480f3e72b5SJason Gunthorpe 
8490f3e72b5SJason Gunthorpe 	up_read(&container->group_lock);
8500f3e72b5SJason Gunthorpe 
8510f3e72b5SJason Gunthorpe 	return ret;
8520f3e72b5SJason Gunthorpe }
8530f3e72b5SJason Gunthorpe 
8540f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */
8550f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
8560f3e72b5SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
8570f3e72b5SJason Gunthorpe 					  void *data)
8580f3e72b5SJason Gunthorpe {
8590f3e72b5SJason Gunthorpe 	struct vfio_group *group;
8600f3e72b5SJason Gunthorpe 	int ret = -ENODEV;
8610f3e72b5SJason Gunthorpe 
8620f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
8630f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
8640f3e72b5SJason Gunthorpe 						group->type);
8650f3e72b5SJason Gunthorpe 		if (ret)
8660f3e72b5SJason Gunthorpe 			goto unwind;
8670f3e72b5SJason Gunthorpe 	}
8680f3e72b5SJason Gunthorpe 
8690f3e72b5SJason Gunthorpe 	return ret;
8700f3e72b5SJason Gunthorpe 
8710f3e72b5SJason Gunthorpe unwind:
8720f3e72b5SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
8730f3e72b5SJason Gunthorpe 					     container_next) {
8740f3e72b5SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
8750f3e72b5SJason Gunthorpe 	}
8760f3e72b5SJason Gunthorpe 
8770f3e72b5SJason Gunthorpe 	return ret;
8780f3e72b5SJason Gunthorpe }
8790f3e72b5SJason Gunthorpe 
8800f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
8810f3e72b5SJason Gunthorpe 				 unsigned long arg)
8820f3e72b5SJason Gunthorpe {
8830f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
8840f3e72b5SJason Gunthorpe 	long ret = -ENODEV;
8850f3e72b5SJason Gunthorpe 
8860f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
8870f3e72b5SJason Gunthorpe 
8880f3e72b5SJason Gunthorpe 	/*
8890f3e72b5SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
8900f3e72b5SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
8910f3e72b5SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
8920f3e72b5SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
8930f3e72b5SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
8940f3e72b5SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
8950f3e72b5SJason Gunthorpe 	 */
8960f3e72b5SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
8970f3e72b5SJason Gunthorpe 		up_write(&container->group_lock);
8980f3e72b5SJason Gunthorpe 		return -EINVAL;
8990f3e72b5SJason Gunthorpe 	}
9000f3e72b5SJason Gunthorpe 
9010f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
9020f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
9030f3e72b5SJason Gunthorpe 		void *data;
9040f3e72b5SJason Gunthorpe 
9050f3e72b5SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
9060f3e72b5SJason Gunthorpe 			continue;
9070f3e72b5SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
9080f3e72b5SJason Gunthorpe 			continue;
9090f3e72b5SJason Gunthorpe 
9100f3e72b5SJason Gunthorpe 		/*
9110f3e72b5SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
9120f3e72b5SJason Gunthorpe 		 * so test which iommu driver reported support for this
9130f3e72b5SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
9140f3e72b5SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
9150f3e72b5SJason Gunthorpe 		 * interfaces if they'd like.
9160f3e72b5SJason Gunthorpe 		 */
9170f3e72b5SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
9180f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
9190f3e72b5SJason Gunthorpe 			continue;
9200f3e72b5SJason Gunthorpe 		}
9210f3e72b5SJason Gunthorpe 
9220f3e72b5SJason Gunthorpe 		data = driver->ops->open(arg);
9230f3e72b5SJason Gunthorpe 		if (IS_ERR(data)) {
9240f3e72b5SJason Gunthorpe 			ret = PTR_ERR(data);
9250f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
9260f3e72b5SJason Gunthorpe 			continue;
9270f3e72b5SJason Gunthorpe 		}
9280f3e72b5SJason Gunthorpe 
9290f3e72b5SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
9300f3e72b5SJason Gunthorpe 		if (ret) {
9310f3e72b5SJason Gunthorpe 			driver->ops->release(data);
9320f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
9330f3e72b5SJason Gunthorpe 			continue;
9340f3e72b5SJason Gunthorpe 		}
9350f3e72b5SJason Gunthorpe 
9360f3e72b5SJason Gunthorpe 		container->iommu_driver = driver;
9370f3e72b5SJason Gunthorpe 		container->iommu_data = data;
9380f3e72b5SJason Gunthorpe 		break;
9390f3e72b5SJason Gunthorpe 	}
9400f3e72b5SJason Gunthorpe 
9410f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
9420f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
9430f3e72b5SJason Gunthorpe 
9440f3e72b5SJason Gunthorpe 	return ret;
9450f3e72b5SJason Gunthorpe }
9460f3e72b5SJason Gunthorpe 
9470f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
9480f3e72b5SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
9490f3e72b5SJason Gunthorpe {
9500f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
9510f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
9520f3e72b5SJason Gunthorpe 	void *data;
9530f3e72b5SJason Gunthorpe 	long ret = -EINVAL;
9540f3e72b5SJason Gunthorpe 
9550f3e72b5SJason Gunthorpe 	if (!container)
9560f3e72b5SJason Gunthorpe 		return ret;
9570f3e72b5SJason Gunthorpe 
9580f3e72b5SJason Gunthorpe 	switch (cmd) {
9590f3e72b5SJason Gunthorpe 	case VFIO_GET_API_VERSION:
9600f3e72b5SJason Gunthorpe 		ret = VFIO_API_VERSION;
9610f3e72b5SJason Gunthorpe 		break;
9620f3e72b5SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
9630f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(container, arg);
9640f3e72b5SJason Gunthorpe 		break;
9650f3e72b5SJason Gunthorpe 	case VFIO_SET_IOMMU:
9660f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
9670f3e72b5SJason Gunthorpe 		break;
9680f3e72b5SJason Gunthorpe 	default:
9690f3e72b5SJason Gunthorpe 		driver = container->iommu_driver;
9700f3e72b5SJason Gunthorpe 		data = container->iommu_data;
9710f3e72b5SJason Gunthorpe 
9720f3e72b5SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
9730f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
9740f3e72b5SJason Gunthorpe 	}
9750f3e72b5SJason Gunthorpe 
9760f3e72b5SJason Gunthorpe 	return ret;
9770f3e72b5SJason Gunthorpe }
9780f3e72b5SJason Gunthorpe 
9790f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
9800f3e72b5SJason Gunthorpe {
9810f3e72b5SJason Gunthorpe 	struct vfio_container *container;
9820f3e72b5SJason Gunthorpe 
9830f3e72b5SJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL);
9840f3e72b5SJason Gunthorpe 	if (!container)
9850f3e72b5SJason Gunthorpe 		return -ENOMEM;
9860f3e72b5SJason Gunthorpe 
9870f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
9880f3e72b5SJason Gunthorpe 	init_rwsem(&container->group_lock);
9890f3e72b5SJason Gunthorpe 	kref_init(&container->kref);
9900f3e72b5SJason Gunthorpe 
9910f3e72b5SJason Gunthorpe 	filep->private_data = container;
9920f3e72b5SJason Gunthorpe 
9930f3e72b5SJason Gunthorpe 	return 0;
9940f3e72b5SJason Gunthorpe }
9950f3e72b5SJason Gunthorpe 
9960f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
9970f3e72b5SJason Gunthorpe {
9980f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
9990f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
10000f3e72b5SJason Gunthorpe 
10010f3e72b5SJason Gunthorpe 	if (driver && driver->ops->notify)
10020f3e72b5SJason Gunthorpe 		driver->ops->notify(container->iommu_data,
10030f3e72b5SJason Gunthorpe 				    VFIO_IOMMU_CONTAINER_CLOSE);
10040f3e72b5SJason Gunthorpe 
10050f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
10060f3e72b5SJason Gunthorpe 
10070f3e72b5SJason Gunthorpe 	vfio_container_put(container);
10080f3e72b5SJason Gunthorpe 
10090f3e72b5SJason Gunthorpe 	return 0;
10100f3e72b5SJason Gunthorpe }
10110f3e72b5SJason Gunthorpe 
10120f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = {
10130f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
10140f3e72b5SJason Gunthorpe 	.open		= vfio_fops_open,
10150f3e72b5SJason Gunthorpe 	.release	= vfio_fops_release,
10160f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
10170f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
10180f3e72b5SJason Gunthorpe };
10190f3e72b5SJason Gunthorpe 
10200f3e72b5SJason Gunthorpe /*
10210f3e72b5SJason Gunthorpe  * VFIO Group fd, /dev/vfio/$GROUP
10220f3e72b5SJason Gunthorpe  */
10230f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group)
10240f3e72b5SJason Gunthorpe {
10250f3e72b5SJason Gunthorpe 	struct vfio_container *container = group->container;
10260f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
10270f3e72b5SJason Gunthorpe 
10280f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
10290f3e72b5SJason Gunthorpe 
10300f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
10310f3e72b5SJason Gunthorpe 
10320f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
10330f3e72b5SJason Gunthorpe 	if (driver)
10340f3e72b5SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
10350f3e72b5SJason Gunthorpe 					  group->iommu_group);
10360f3e72b5SJason Gunthorpe 
10370f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
10380f3e72b5SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
10390f3e72b5SJason Gunthorpe 
10400f3e72b5SJason Gunthorpe 	group->container = NULL;
10410f3e72b5SJason Gunthorpe 	group->container_users = 0;
10420f3e72b5SJason Gunthorpe 	list_del(&group->container_next);
10430f3e72b5SJason Gunthorpe 
10440f3e72b5SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
10450f3e72b5SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
10460f3e72b5SJason Gunthorpe 		driver->ops->release(container->iommu_data);
10470f3e72b5SJason Gunthorpe 		module_put(driver->ops->owner);
10480f3e72b5SJason Gunthorpe 		container->iommu_driver = NULL;
10490f3e72b5SJason Gunthorpe 		container->iommu_data = NULL;
10500f3e72b5SJason Gunthorpe 	}
10510f3e72b5SJason Gunthorpe 
10520f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
10530f3e72b5SJason Gunthorpe 
10540f3e72b5SJason Gunthorpe 	vfio_container_put(container);
10550f3e72b5SJason Gunthorpe }
10560f3e72b5SJason Gunthorpe 
10570f3e72b5SJason Gunthorpe /*
10580f3e72b5SJason Gunthorpe  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
10590f3e72b5SJason Gunthorpe  * if there was no container to unset.  Since the ioctl is called on
10600f3e72b5SJason Gunthorpe  * the group, we know that still exists, therefore the only valid
10610f3e72b5SJason Gunthorpe  * transition here is 1->0.
10620f3e72b5SJason Gunthorpe  */
1063b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group)
10640f3e72b5SJason Gunthorpe {
1065b3b43590SJason Gunthorpe 	int ret = 0;
10660f3e72b5SJason Gunthorpe 
1067b3b43590SJason Gunthorpe 	down_write(&group->group_rwsem);
1068b3b43590SJason Gunthorpe 	if (!group->container) {
1069b3b43590SJason Gunthorpe 		ret = -EINVAL;
1070b3b43590SJason Gunthorpe 		goto out_unlock;
1071b3b43590SJason Gunthorpe 	}
1072b3b43590SJason Gunthorpe 	if (group->container_users != 1) {
1073b3b43590SJason Gunthorpe 		ret = -EBUSY;
1074b3b43590SJason Gunthorpe 		goto out_unlock;
1075b3b43590SJason Gunthorpe 	}
10760f3e72b5SJason Gunthorpe 	__vfio_group_unset_container(group);
1077b3b43590SJason Gunthorpe 
1078b3b43590SJason Gunthorpe out_unlock:
1079b3b43590SJason Gunthorpe 	up_write(&group->group_rwsem);
1080b3b43590SJason Gunthorpe 	return ret;
10810f3e72b5SJason Gunthorpe }
10820f3e72b5SJason Gunthorpe 
108367671f15SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group,
108467671f15SJason Gunthorpe 					  int __user *arg)
10850f3e72b5SJason Gunthorpe {
10860f3e72b5SJason Gunthorpe 	struct fd f;
10870f3e72b5SJason Gunthorpe 	struct vfio_container *container;
10880f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
108967671f15SJason Gunthorpe 	int container_fd;
10900f3e72b5SJason Gunthorpe 	int ret = 0;
10910f3e72b5SJason Gunthorpe 
10920f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
10930f3e72b5SJason Gunthorpe 		return -EPERM;
10940f3e72b5SJason Gunthorpe 
109567671f15SJason Gunthorpe 	if (get_user(container_fd, arg))
109667671f15SJason Gunthorpe 		return -EFAULT;
109767671f15SJason Gunthorpe 	if (container_fd < 0)
109867671f15SJason Gunthorpe 		return -EINVAL;
10990f3e72b5SJason Gunthorpe 	f = fdget(container_fd);
11000f3e72b5SJason Gunthorpe 	if (!f.file)
11010f3e72b5SJason Gunthorpe 		return -EBADF;
11020f3e72b5SJason Gunthorpe 
11030f3e72b5SJason Gunthorpe 	/* Sanity check, is this really our fd? */
11040f3e72b5SJason Gunthorpe 	if (f.file->f_op != &vfio_fops) {
110567671f15SJason Gunthorpe 		ret = -EINVAL;
110667671f15SJason Gunthorpe 		goto out_fdput;
11070f3e72b5SJason Gunthorpe 	}
11080f3e72b5SJason Gunthorpe 	container = f.file->private_data;
11090f3e72b5SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
11100f3e72b5SJason Gunthorpe 
111167671f15SJason Gunthorpe 	down_write(&group->group_rwsem);
111267671f15SJason Gunthorpe 
111367671f15SJason Gunthorpe 	if (group->container || WARN_ON(group->container_users)) {
111467671f15SJason Gunthorpe 		ret = -EINVAL;
111567671f15SJason Gunthorpe 		goto out_unlock_group;
111667671f15SJason Gunthorpe 	}
111767671f15SJason Gunthorpe 
11180f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
11190f3e72b5SJason Gunthorpe 
11200f3e72b5SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
11210f3e72b5SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
11220f3e72b5SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
11230f3e72b5SJason Gunthorpe 		ret = -EPERM;
112467671f15SJason Gunthorpe 		goto out_unlock_container;
11250f3e72b5SJason Gunthorpe 	}
11260f3e72b5SJason Gunthorpe 
11270f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
11280f3e72b5SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
11290f3e72b5SJason Gunthorpe 		if (ret)
113067671f15SJason Gunthorpe 			goto out_unlock_container;
11310f3e72b5SJason Gunthorpe 	}
11320f3e72b5SJason Gunthorpe 
11330f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
11340f3e72b5SJason Gunthorpe 	if (driver) {
11350f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
11360f3e72b5SJason Gunthorpe 						group->iommu_group,
11370f3e72b5SJason Gunthorpe 						group->type);
11380f3e72b5SJason Gunthorpe 		if (ret) {
11390f3e72b5SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
11400f3e72b5SJason Gunthorpe 				iommu_group_release_dma_owner(
11410f3e72b5SJason Gunthorpe 					group->iommu_group);
114267671f15SJason Gunthorpe 			goto out_unlock_container;
11430f3e72b5SJason Gunthorpe 		}
11440f3e72b5SJason Gunthorpe 	}
11450f3e72b5SJason Gunthorpe 
11460f3e72b5SJason Gunthorpe 	group->container = container;
11470f3e72b5SJason Gunthorpe 	group->container_users = 1;
11480f3e72b5SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
11490f3e72b5SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
11500f3e72b5SJason Gunthorpe 
11510f3e72b5SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
11520f3e72b5SJason Gunthorpe 	vfio_container_get(container);
11530f3e72b5SJason Gunthorpe 
115467671f15SJason Gunthorpe out_unlock_container:
11550f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
115667671f15SJason Gunthorpe out_unlock_group:
115767671f15SJason Gunthorpe 	up_write(&group->group_rwsem);
115867671f15SJason Gunthorpe out_fdput:
11590f3e72b5SJason Gunthorpe 	fdput(f);
11600f3e72b5SJason Gunthorpe 	return ret;
11610f3e72b5SJason Gunthorpe }
11620f3e72b5SJason Gunthorpe 
11630f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops;
11640f3e72b5SJason Gunthorpe 
11650f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
11660f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
11670f3e72b5SJason Gunthorpe {
11680f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
11690f3e72b5SJason Gunthorpe }
11700f3e72b5SJason Gunthorpe 
11710f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device)
11720f3e72b5SJason Gunthorpe {
11730f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
11740f3e72b5SJason Gunthorpe 
11750f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
11760f3e72b5SJason Gunthorpe 
11770f3e72b5SJason Gunthorpe 	if (!group->container || !group->container->iommu_driver ||
11780f3e72b5SJason Gunthorpe 	    WARN_ON(!group->container_users))
11790f3e72b5SJason Gunthorpe 		return -EINVAL;
11800f3e72b5SJason Gunthorpe 
11810f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
11820f3e72b5SJason Gunthorpe 		return -EPERM;
11830f3e72b5SJason Gunthorpe 
11840f3e72b5SJason Gunthorpe 	get_file(group->opened_file);
11850f3e72b5SJason Gunthorpe 	group->container_users++;
11860f3e72b5SJason Gunthorpe 	return 0;
11870f3e72b5SJason Gunthorpe }
11880f3e72b5SJason Gunthorpe 
11890f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device)
11900f3e72b5SJason Gunthorpe {
11910f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
11920f3e72b5SJason Gunthorpe 	WARN_ON(device->group->container_users <= 1);
11930f3e72b5SJason Gunthorpe 	device->group->container_users--;
11940f3e72b5SJason Gunthorpe 	fput(device->group->opened_file);
11950f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
11960f3e72b5SJason Gunthorpe }
11970f3e72b5SJason Gunthorpe 
11980f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device)
11990f3e72b5SJason Gunthorpe {
12000f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
12010f3e72b5SJason Gunthorpe 	struct file *filep;
12020f3e72b5SJason Gunthorpe 	int ret;
12030f3e72b5SJason Gunthorpe 
12040f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
12050f3e72b5SJason Gunthorpe 	ret = vfio_device_assign_container(device);
12060f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
12070f3e72b5SJason Gunthorpe 	if (ret)
12080f3e72b5SJason Gunthorpe 		return ERR_PTR(ret);
12090f3e72b5SJason Gunthorpe 
12100f3e72b5SJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner)) {
12110f3e72b5SJason Gunthorpe 		ret = -ENODEV;
12120f3e72b5SJason Gunthorpe 		goto err_unassign_container;
12130f3e72b5SJason Gunthorpe 	}
12140f3e72b5SJason Gunthorpe 
12150f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
12160f3e72b5SJason Gunthorpe 	device->open_count++;
12170f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
12180f3e72b5SJason Gunthorpe 		/*
12190f3e72b5SJason Gunthorpe 		 * Here we pass the KVM pointer with the group under the read
12200f3e72b5SJason Gunthorpe 		 * lock.  If the device driver will use it, it must obtain a
12210f3e72b5SJason Gunthorpe 		 * reference and release it during close_device.
12220f3e72b5SJason Gunthorpe 		 */
12230f3e72b5SJason Gunthorpe 		down_read(&device->group->group_rwsem);
12240f3e72b5SJason Gunthorpe 		device->kvm = device->group->kvm;
12250f3e72b5SJason Gunthorpe 
12260f3e72b5SJason Gunthorpe 		if (device->ops->open_device) {
12270f3e72b5SJason Gunthorpe 			ret = device->ops->open_device(device);
12280f3e72b5SJason Gunthorpe 			if (ret)
12290f3e72b5SJason Gunthorpe 				goto err_undo_count;
12300f3e72b5SJason Gunthorpe 		}
12310f3e72b5SJason Gunthorpe 
12320f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
12330f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->register_device)
12340f3e72b5SJason Gunthorpe 			iommu_driver->ops->register_device(
12350f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
12360f3e72b5SJason Gunthorpe 
12370f3e72b5SJason Gunthorpe 		up_read(&device->group->group_rwsem);
12380f3e72b5SJason Gunthorpe 	}
12390f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
12400f3e72b5SJason Gunthorpe 
12410f3e72b5SJason Gunthorpe 	/*
12420f3e72b5SJason Gunthorpe 	 * We can't use anon_inode_getfd() because we need to modify
12430f3e72b5SJason Gunthorpe 	 * the f_mode flags directly to allow more than just ioctls
12440f3e72b5SJason Gunthorpe 	 */
12450f3e72b5SJason Gunthorpe 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
12460f3e72b5SJason Gunthorpe 				   device, O_RDWR);
12470f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
12480f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
12490f3e72b5SJason Gunthorpe 		goto err_close_device;
12500f3e72b5SJason Gunthorpe 	}
12510f3e72b5SJason Gunthorpe 
12520f3e72b5SJason Gunthorpe 	/*
12530f3e72b5SJason Gunthorpe 	 * TODO: add an anon_inode interface to do this.
12540f3e72b5SJason Gunthorpe 	 * Appears to be missing by lack of need rather than
12550f3e72b5SJason Gunthorpe 	 * explicitly prevented.  Now there's need.
12560f3e72b5SJason Gunthorpe 	 */
12570f3e72b5SJason Gunthorpe 	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
12580f3e72b5SJason Gunthorpe 
12590f3e72b5SJason Gunthorpe 	if (device->group->type == VFIO_NO_IOMMU)
12600f3e72b5SJason Gunthorpe 		dev_warn(device->dev, "vfio-noiommu device opened by user "
12610f3e72b5SJason Gunthorpe 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
12620f3e72b5SJason Gunthorpe 	/*
12630f3e72b5SJason Gunthorpe 	 * On success the ref of device is moved to the file and
12640f3e72b5SJason Gunthorpe 	 * put in vfio_device_fops_release()
12650f3e72b5SJason Gunthorpe 	 */
12660f3e72b5SJason Gunthorpe 	return filep;
12670f3e72b5SJason Gunthorpe 
12680f3e72b5SJason Gunthorpe err_close_device:
12690f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
12700f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
12710f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device) {
12720f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
12730f3e72b5SJason Gunthorpe 
12740f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
12750f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->unregister_device)
12760f3e72b5SJason Gunthorpe 			iommu_driver->ops->unregister_device(
12770f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
12780f3e72b5SJason Gunthorpe 	}
12790f3e72b5SJason Gunthorpe err_undo_count:
12800f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
12810f3e72b5SJason Gunthorpe 	device->open_count--;
12820f3e72b5SJason Gunthorpe 	if (device->open_count == 0 && device->kvm)
12830f3e72b5SJason Gunthorpe 		device->kvm = NULL;
12840f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
12850f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
12860f3e72b5SJason Gunthorpe err_unassign_container:
12870f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
12880f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
12890f3e72b5SJason Gunthorpe }
12900f3e72b5SJason Gunthorpe 
1291150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
1292150ee2f9SJason Gunthorpe 					  char __user *arg)
12930f3e72b5SJason Gunthorpe {
12940f3e72b5SJason Gunthorpe 	struct vfio_device *device;
12950f3e72b5SJason Gunthorpe 	struct file *filep;
1296150ee2f9SJason Gunthorpe 	char *buf;
12970f3e72b5SJason Gunthorpe 	int fdno;
12980f3e72b5SJason Gunthorpe 	int ret;
12990f3e72b5SJason Gunthorpe 
1300150ee2f9SJason Gunthorpe 	buf = strndup_user(arg, PAGE_SIZE);
1301150ee2f9SJason Gunthorpe 	if (IS_ERR(buf))
1302150ee2f9SJason Gunthorpe 		return PTR_ERR(buf);
1303150ee2f9SJason Gunthorpe 
13040f3e72b5SJason Gunthorpe 	device = vfio_device_get_from_name(group, buf);
1305150ee2f9SJason Gunthorpe 	kfree(buf);
13060f3e72b5SJason Gunthorpe 	if (IS_ERR(device))
13070f3e72b5SJason Gunthorpe 		return PTR_ERR(device);
13080f3e72b5SJason Gunthorpe 
13090f3e72b5SJason Gunthorpe 	fdno = get_unused_fd_flags(O_CLOEXEC);
13100f3e72b5SJason Gunthorpe 	if (fdno < 0) {
13110f3e72b5SJason Gunthorpe 		ret = fdno;
13120f3e72b5SJason Gunthorpe 		goto err_put_device;
13130f3e72b5SJason Gunthorpe 	}
13140f3e72b5SJason Gunthorpe 
13150f3e72b5SJason Gunthorpe 	filep = vfio_device_open(device);
13160f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
13170f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
13180f3e72b5SJason Gunthorpe 		goto err_put_fdno;
13190f3e72b5SJason Gunthorpe 	}
13200f3e72b5SJason Gunthorpe 
13210f3e72b5SJason Gunthorpe 	fd_install(fdno, filep);
13220f3e72b5SJason Gunthorpe 	return fdno;
13230f3e72b5SJason Gunthorpe 
13240f3e72b5SJason Gunthorpe err_put_fdno:
13250f3e72b5SJason Gunthorpe 	put_unused_fd(fdno);
13260f3e72b5SJason Gunthorpe err_put_device:
13270f3e72b5SJason Gunthorpe 	vfio_device_put(device);
13280f3e72b5SJason Gunthorpe 	return ret;
13290f3e72b5SJason Gunthorpe }
13300f3e72b5SJason Gunthorpe 
133199a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group,
133299a27c08SJason Gunthorpe 				       struct vfio_group_status __user *arg)
13330f3e72b5SJason Gunthorpe {
133499a27c08SJason Gunthorpe 	unsigned long minsz = offsetofend(struct vfio_group_status, flags);
13350f3e72b5SJason Gunthorpe 	struct vfio_group_status status;
13360f3e72b5SJason Gunthorpe 
133799a27c08SJason Gunthorpe 	if (copy_from_user(&status, arg, minsz))
13380f3e72b5SJason Gunthorpe 		return -EFAULT;
13390f3e72b5SJason Gunthorpe 
13400f3e72b5SJason Gunthorpe 	if (status.argsz < minsz)
13410f3e72b5SJason Gunthorpe 		return -EINVAL;
13420f3e72b5SJason Gunthorpe 
13430f3e72b5SJason Gunthorpe 	status.flags = 0;
13440f3e72b5SJason Gunthorpe 
13450f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
13460f3e72b5SJason Gunthorpe 	if (group->container)
13470f3e72b5SJason Gunthorpe 		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
13480f3e72b5SJason Gunthorpe 				VFIO_GROUP_FLAGS_VIABLE;
13490f3e72b5SJason Gunthorpe 	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
13500f3e72b5SJason Gunthorpe 		status.flags |= VFIO_GROUP_FLAGS_VIABLE;
13510f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
13520f3e72b5SJason Gunthorpe 
135399a27c08SJason Gunthorpe 	if (copy_to_user(arg, &status, minsz))
13540f3e72b5SJason Gunthorpe 		return -EFAULT;
135599a27c08SJason Gunthorpe 	return 0;
13560f3e72b5SJason Gunthorpe }
135799a27c08SJason Gunthorpe 
135899a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep,
135999a27c08SJason Gunthorpe 				      unsigned int cmd, unsigned long arg)
136099a27c08SJason Gunthorpe {
136199a27c08SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
136299a27c08SJason Gunthorpe 	void __user *uarg = (void __user *)arg;
136399a27c08SJason Gunthorpe 
136499a27c08SJason Gunthorpe 	switch (cmd) {
136599a27c08SJason Gunthorpe 	case VFIO_GROUP_GET_DEVICE_FD:
136699a27c08SJason Gunthorpe 		return vfio_group_ioctl_get_device_fd(group, uarg);
136799a27c08SJason Gunthorpe 	case VFIO_GROUP_GET_STATUS:
136899a27c08SJason Gunthorpe 		return vfio_group_ioctl_get_status(group, uarg);
13690f3e72b5SJason Gunthorpe 	case VFIO_GROUP_SET_CONTAINER:
137067671f15SJason Gunthorpe 		return vfio_group_ioctl_set_container(group, uarg);
13710f3e72b5SJason Gunthorpe 	case VFIO_GROUP_UNSET_CONTAINER:
1372b3b43590SJason Gunthorpe 		return vfio_group_ioctl_unset_container(group);
137399a27c08SJason Gunthorpe 	default:
137499a27c08SJason Gunthorpe 		return -ENOTTY;
13750f3e72b5SJason Gunthorpe 	}
13760f3e72b5SJason Gunthorpe }
13770f3e72b5SJason Gunthorpe 
13780f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep)
13790f3e72b5SJason Gunthorpe {
13800f3e72b5SJason Gunthorpe 	struct vfio_group *group =
13810f3e72b5SJason Gunthorpe 		container_of(inode->i_cdev, struct vfio_group, cdev);
13820f3e72b5SJason Gunthorpe 	int ret;
13830f3e72b5SJason Gunthorpe 
13840f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
13850f3e72b5SJason Gunthorpe 
13860f3e72b5SJason Gunthorpe 	/* users can be zero if this races with vfio_group_put() */
13870f3e72b5SJason Gunthorpe 	if (!refcount_inc_not_zero(&group->users)) {
13880f3e72b5SJason Gunthorpe 		ret = -ENODEV;
13890f3e72b5SJason Gunthorpe 		goto err_unlock;
13900f3e72b5SJason Gunthorpe 	}
13910f3e72b5SJason Gunthorpe 
13920f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
13930f3e72b5SJason Gunthorpe 		ret = -EPERM;
13940f3e72b5SJason Gunthorpe 		goto err_put;
13950f3e72b5SJason Gunthorpe 	}
13960f3e72b5SJason Gunthorpe 
13970f3e72b5SJason Gunthorpe 	/*
13980f3e72b5SJason Gunthorpe 	 * Do we need multiple instances of the group open?  Seems not.
13990f3e72b5SJason Gunthorpe 	 */
14000f3e72b5SJason Gunthorpe 	if (group->opened_file) {
14010f3e72b5SJason Gunthorpe 		ret = -EBUSY;
14020f3e72b5SJason Gunthorpe 		goto err_put;
14030f3e72b5SJason Gunthorpe 	}
14040f3e72b5SJason Gunthorpe 	group->opened_file = filep;
14050f3e72b5SJason Gunthorpe 	filep->private_data = group;
14060f3e72b5SJason Gunthorpe 
14070f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
14080f3e72b5SJason Gunthorpe 	return 0;
14090f3e72b5SJason Gunthorpe err_put:
14100f3e72b5SJason Gunthorpe 	vfio_group_put(group);
14110f3e72b5SJason Gunthorpe err_unlock:
14120f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
14130f3e72b5SJason Gunthorpe 	return ret;
14140f3e72b5SJason Gunthorpe }
14150f3e72b5SJason Gunthorpe 
14160f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep)
14170f3e72b5SJason Gunthorpe {
14180f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
14190f3e72b5SJason Gunthorpe 
14200f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
14210f3e72b5SJason Gunthorpe 
14220f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
14230f3e72b5SJason Gunthorpe 	/*
14240f3e72b5SJason Gunthorpe 	 * Device FDs hold a group file reference, therefore the group release
14250f3e72b5SJason Gunthorpe 	 * is only called when there are no open devices.
14260f3e72b5SJason Gunthorpe 	 */
14270f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
14280f3e72b5SJason Gunthorpe 	if (group->container) {
14290f3e72b5SJason Gunthorpe 		WARN_ON(group->container_users != 1);
14300f3e72b5SJason Gunthorpe 		__vfio_group_unset_container(group);
14310f3e72b5SJason Gunthorpe 	}
14320f3e72b5SJason Gunthorpe 	group->opened_file = NULL;
14330f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
14340f3e72b5SJason Gunthorpe 
14350f3e72b5SJason Gunthorpe 	vfio_group_put(group);
14360f3e72b5SJason Gunthorpe 
14370f3e72b5SJason Gunthorpe 	return 0;
14380f3e72b5SJason Gunthorpe }
14390f3e72b5SJason Gunthorpe 
14400f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = {
14410f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
14420f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
14430f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
14440f3e72b5SJason Gunthorpe 	.open		= vfio_group_fops_open,
14450f3e72b5SJason Gunthorpe 	.release	= vfio_group_fops_release,
14460f3e72b5SJason Gunthorpe };
14470f3e72b5SJason Gunthorpe 
14480f3e72b5SJason Gunthorpe /*
14498e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_resume_and_get().
14508e5c6995SAbhishek Sahu  * Return error code on failure or 0 on success.
14518e5c6995SAbhishek Sahu  */
14528e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
14538e5c6995SAbhishek Sahu {
14548e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
14558e5c6995SAbhishek Sahu 
14568e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm) {
14578e5c6995SAbhishek Sahu 		int ret;
14588e5c6995SAbhishek Sahu 
14598e5c6995SAbhishek Sahu 		ret = pm_runtime_resume_and_get(dev);
14608e5c6995SAbhishek Sahu 		if (ret) {
14618e5c6995SAbhishek Sahu 			dev_info_ratelimited(dev,
14628e5c6995SAbhishek Sahu 				"vfio: runtime resume failed %d\n", ret);
14638e5c6995SAbhishek Sahu 			return -EIO;
14648e5c6995SAbhishek Sahu 		}
14658e5c6995SAbhishek Sahu 	}
14668e5c6995SAbhishek Sahu 
14678e5c6995SAbhishek Sahu 	return 0;
14688e5c6995SAbhishek Sahu }
14698e5c6995SAbhishek Sahu 
14708e5c6995SAbhishek Sahu /*
14718e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_put().
14728e5c6995SAbhishek Sahu  */
14738e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
14748e5c6995SAbhishek Sahu {
14758e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
14768e5c6995SAbhishek Sahu 
14778e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm)
14788e5c6995SAbhishek Sahu 		pm_runtime_put(dev);
14798e5c6995SAbhishek Sahu }
14808e5c6995SAbhishek Sahu 
14818e5c6995SAbhishek Sahu /*
14820f3e72b5SJason Gunthorpe  * VFIO Device fd
14830f3e72b5SJason Gunthorpe  */
14840f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
14850f3e72b5SJason Gunthorpe {
14860f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
14870f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
14880f3e72b5SJason Gunthorpe 
14890f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
14900f3e72b5SJason Gunthorpe 	vfio_assert_device_open(device);
14910f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
14920f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device)
14930f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
14940f3e72b5SJason Gunthorpe 
14950f3e72b5SJason Gunthorpe 	iommu_driver = device->group->container->iommu_driver;
14960f3e72b5SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
14970f3e72b5SJason Gunthorpe 		iommu_driver->ops->unregister_device(
14980f3e72b5SJason Gunthorpe 			device->group->container->iommu_data, device);
14990f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
15000f3e72b5SJason Gunthorpe 	device->open_count--;
15010f3e72b5SJason Gunthorpe 	if (device->open_count == 0)
15020f3e72b5SJason Gunthorpe 		device->kvm = NULL;
15030f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
15040f3e72b5SJason Gunthorpe 
15050f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
15060f3e72b5SJason Gunthorpe 
15070f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
15080f3e72b5SJason Gunthorpe 
15090f3e72b5SJason Gunthorpe 	vfio_device_put(device);
15100f3e72b5SJason Gunthorpe 
15110f3e72b5SJason Gunthorpe 	return 0;
15120f3e72b5SJason Gunthorpe }
15130f3e72b5SJason Gunthorpe 
15140f3e72b5SJason Gunthorpe /*
15150f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
15160f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
15170f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
15180f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
15190f3e72b5SJason Gunthorpe  *
15200f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
15210f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
15220f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
15230f3e72b5SJason Gunthorpe  *
15240f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
15250f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
15260f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
15270f3e72b5SJason Gunthorpe  *
15280f3e72b5SJason Gunthorpe  */
15290f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
15300f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
15310f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
15320f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
15330f3e72b5SJason Gunthorpe {
15340f3e72b5SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
15350f3e72b5SJason Gunthorpe 	/*
15360f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
15370f3e72b5SJason Gunthorpe 	 * following FSM arcs:
15380f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
15390f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
15400f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
15410f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
15420f3e72b5SJason Gunthorpe 	 *
15430f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
15440f3e72b5SJason Gunthorpe 	 * arcs:
15450f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
15460f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
15470f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
15480f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
15490f3e72b5SJason Gunthorpe 	 * Without P2P the driver must implement:
15500f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
15510f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
15520f3e72b5SJason Gunthorpe 	 *
15530f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
15540f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
15550f3e72b5SJason Gunthorpe 	 * following ones:
15560f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
15570f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
15580f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
15590f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
15600f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
15610f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
15620f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
15630f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
15640f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
15650f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
15660f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
15670f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
15680f3e72b5SJason Gunthorpe 	 */
15690f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
15700f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
15710f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
15720f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
15730f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
15740f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
15750f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
15760f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15770f3e72b5SJason Gunthorpe 		},
15780f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
15790f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
15800f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
15810f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
15820f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
15830f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
15840f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15850f3e72b5SJason Gunthorpe 		},
15860f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
15870f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
15880f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
15890f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
15900f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
15910f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
15920f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
15930f3e72b5SJason Gunthorpe 		},
15940f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
15950f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
15960f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
15970f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
15980f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
15990f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
16000f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16010f3e72b5SJason Gunthorpe 		},
16020f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
16030f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
16040f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
16050f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
16060f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
16070f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
16080f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16090f3e72b5SJason Gunthorpe 		},
16100f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
16110f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
16120f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
16130f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
16140f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
16150f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
16160f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
16170f3e72b5SJason Gunthorpe 		},
16180f3e72b5SJason Gunthorpe 	};
16190f3e72b5SJason Gunthorpe 
16200f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
16210f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
16220f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
16230f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
16240f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
16250f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
16260f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
16270f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
16280f3e72b5SJason Gunthorpe 	};
16290f3e72b5SJason Gunthorpe 
16300f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
16310f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
16320f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
16330f3e72b5SJason Gunthorpe 		return -EINVAL;
16340f3e72b5SJason Gunthorpe 
16350f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
16360f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
16370f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
16380f3e72b5SJason Gunthorpe 		return -EINVAL;
16390f3e72b5SJason Gunthorpe 
16400f3e72b5SJason Gunthorpe 	/*
16410f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
16420f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
16430f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
16440f3e72b5SJason Gunthorpe 	 */
16450f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
16460f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
16470f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
16480f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
16490f3e72b5SJason Gunthorpe 
16500f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
16510f3e72b5SJason Gunthorpe }
16520f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
16530f3e72b5SJason Gunthorpe 
16540f3e72b5SJason Gunthorpe /*
16550f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
16560f3e72b5SJason Gunthorpe  */
16570f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
16580f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
16590f3e72b5SJason Gunthorpe {
16600f3e72b5SJason Gunthorpe 	int ret;
16610f3e72b5SJason Gunthorpe 	int fd;
16620f3e72b5SJason Gunthorpe 
16630f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
16640f3e72b5SJason Gunthorpe 	if (fd < 0) {
16650f3e72b5SJason Gunthorpe 		ret = fd;
16660f3e72b5SJason Gunthorpe 		goto out_fput;
16670f3e72b5SJason Gunthorpe 	}
16680f3e72b5SJason Gunthorpe 
16690f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
16700f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
16710f3e72b5SJason Gunthorpe 		ret = -EFAULT;
16720f3e72b5SJason Gunthorpe 		goto out_put_unused;
16730f3e72b5SJason Gunthorpe 	}
16740f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
16750f3e72b5SJason Gunthorpe 	return 0;
16760f3e72b5SJason Gunthorpe 
16770f3e72b5SJason Gunthorpe out_put_unused:
16780f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
16790f3e72b5SJason Gunthorpe out_fput:
16800f3e72b5SJason Gunthorpe 	fput(filp);
16810f3e72b5SJason Gunthorpe 	return ret;
16820f3e72b5SJason Gunthorpe }
16830f3e72b5SJason Gunthorpe 
16840f3e72b5SJason Gunthorpe static int
16850f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
16860f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
16870f3e72b5SJason Gunthorpe 					   size_t argsz)
16880f3e72b5SJason Gunthorpe {
16890f3e72b5SJason Gunthorpe 	size_t minsz =
16900f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
16910f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
16920f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
16930f3e72b5SJason Gunthorpe 	int ret;
16940f3e72b5SJason Gunthorpe 
16950f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
16960f3e72b5SJason Gunthorpe 		return -ENOTTY;
16970f3e72b5SJason Gunthorpe 
16980f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
16990f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
17000f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
17010f3e72b5SJason Gunthorpe 				 sizeof(mig));
17020f3e72b5SJason Gunthorpe 	if (ret != 1)
17030f3e72b5SJason Gunthorpe 		return ret;
17040f3e72b5SJason Gunthorpe 
17050f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
17060f3e72b5SJason Gunthorpe 		return -EFAULT;
17070f3e72b5SJason Gunthorpe 
17080f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
17090f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
17100f3e72b5SJason Gunthorpe 
17110f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
17120f3e72b5SJason Gunthorpe 							   &curr_state);
17130f3e72b5SJason Gunthorpe 		if (ret)
17140f3e72b5SJason Gunthorpe 			return ret;
17150f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
17160f3e72b5SJason Gunthorpe 		goto out_copy;
17170f3e72b5SJason Gunthorpe 	}
17180f3e72b5SJason Gunthorpe 
17190f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
17200f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
17210f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
17220f3e72b5SJason Gunthorpe 		goto out_copy;
17230f3e72b5SJason Gunthorpe 
17240f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
17250f3e72b5SJason Gunthorpe out_copy:
17260f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
17270f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
17280f3e72b5SJason Gunthorpe 		return -EFAULT;
17290f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
17300f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
17310f3e72b5SJason Gunthorpe 	return 0;
17320f3e72b5SJason Gunthorpe }
17330f3e72b5SJason Gunthorpe 
17340f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
17350f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
17360f3e72b5SJason Gunthorpe 					       size_t argsz)
17370f3e72b5SJason Gunthorpe {
17380f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
17390f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
17400f3e72b5SJason Gunthorpe 	};
17410f3e72b5SJason Gunthorpe 	int ret;
17420f3e72b5SJason Gunthorpe 
17430f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
17440f3e72b5SJason Gunthorpe 		return -ENOTTY;
17450f3e72b5SJason Gunthorpe 
17460f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
17470f3e72b5SJason Gunthorpe 				 sizeof(mig));
17480f3e72b5SJason Gunthorpe 	if (ret != 1)
17490f3e72b5SJason Gunthorpe 		return ret;
17500f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
17510f3e72b5SJason Gunthorpe 		return -EFAULT;
17520f3e72b5SJason Gunthorpe 	return 0;
17530f3e72b5SJason Gunthorpe }
17540f3e72b5SJason Gunthorpe 
175580c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */
175680c4b92aSYishai Hadas #define LOG_MAX_RANGES \
175780c4b92aSYishai Hadas 	(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
175880c4b92aSYishai Hadas 
175980c4b92aSYishai Hadas static int
176080c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
176180c4b92aSYishai Hadas 					u32 flags, void __user *arg,
176280c4b92aSYishai Hadas 					size_t argsz)
176380c4b92aSYishai Hadas {
176480c4b92aSYishai Hadas 	size_t minsz =
176580c4b92aSYishai Hadas 		offsetofend(struct vfio_device_feature_dma_logging_control,
176680c4b92aSYishai Hadas 			    ranges);
176780c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_range __user *ranges;
176880c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_control control;
176980c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_range range;
177080c4b92aSYishai Hadas 	struct rb_root_cached root = RB_ROOT_CACHED;
177180c4b92aSYishai Hadas 	struct interval_tree_node *nodes;
177280c4b92aSYishai Hadas 	u64 iova_end;
177380c4b92aSYishai Hadas 	u32 nnodes;
177480c4b92aSYishai Hadas 	int i, ret;
177580c4b92aSYishai Hadas 
177680c4b92aSYishai Hadas 	if (!device->log_ops)
177780c4b92aSYishai Hadas 		return -ENOTTY;
177880c4b92aSYishai Hadas 
177980c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
178080c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_SET,
178180c4b92aSYishai Hadas 				 sizeof(control));
178280c4b92aSYishai Hadas 	if (ret != 1)
178380c4b92aSYishai Hadas 		return ret;
178480c4b92aSYishai Hadas 
178580c4b92aSYishai Hadas 	if (copy_from_user(&control, arg, minsz))
178680c4b92aSYishai Hadas 		return -EFAULT;
178780c4b92aSYishai Hadas 
178880c4b92aSYishai Hadas 	nnodes = control.num_ranges;
178980c4b92aSYishai Hadas 	if (!nnodes)
179080c4b92aSYishai Hadas 		return -EINVAL;
179180c4b92aSYishai Hadas 
179280c4b92aSYishai Hadas 	if (nnodes > LOG_MAX_RANGES)
179380c4b92aSYishai Hadas 		return -E2BIG;
179480c4b92aSYishai Hadas 
179580c4b92aSYishai Hadas 	ranges = u64_to_user_ptr(control.ranges);
179680c4b92aSYishai Hadas 	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
179780c4b92aSYishai Hadas 			      GFP_KERNEL);
179880c4b92aSYishai Hadas 	if (!nodes)
179980c4b92aSYishai Hadas 		return -ENOMEM;
180080c4b92aSYishai Hadas 
180180c4b92aSYishai Hadas 	for (i = 0; i < nnodes; i++) {
180280c4b92aSYishai Hadas 		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
180380c4b92aSYishai Hadas 			ret = -EFAULT;
180480c4b92aSYishai Hadas 			goto end;
180580c4b92aSYishai Hadas 		}
180680c4b92aSYishai Hadas 		if (!IS_ALIGNED(range.iova, control.page_size) ||
180780c4b92aSYishai Hadas 		    !IS_ALIGNED(range.length, control.page_size)) {
180880c4b92aSYishai Hadas 			ret = -EINVAL;
180980c4b92aSYishai Hadas 			goto end;
181080c4b92aSYishai Hadas 		}
181180c4b92aSYishai Hadas 
181280c4b92aSYishai Hadas 		if (check_add_overflow(range.iova, range.length, &iova_end) ||
181380c4b92aSYishai Hadas 		    iova_end > ULONG_MAX) {
181480c4b92aSYishai Hadas 			ret = -EOVERFLOW;
181580c4b92aSYishai Hadas 			goto end;
181680c4b92aSYishai Hadas 		}
181780c4b92aSYishai Hadas 
181880c4b92aSYishai Hadas 		nodes[i].start = range.iova;
181980c4b92aSYishai Hadas 		nodes[i].last = range.iova + range.length - 1;
182080c4b92aSYishai Hadas 		if (interval_tree_iter_first(&root, nodes[i].start,
182180c4b92aSYishai Hadas 					     nodes[i].last)) {
182280c4b92aSYishai Hadas 			/* Range overlapping */
182380c4b92aSYishai Hadas 			ret = -EINVAL;
182480c4b92aSYishai Hadas 			goto end;
182580c4b92aSYishai Hadas 		}
182680c4b92aSYishai Hadas 		interval_tree_insert(nodes + i, &root);
182780c4b92aSYishai Hadas 	}
182880c4b92aSYishai Hadas 
182980c4b92aSYishai Hadas 	ret = device->log_ops->log_start(device, &root, nnodes,
183080c4b92aSYishai Hadas 					 &control.page_size);
183180c4b92aSYishai Hadas 	if (ret)
183280c4b92aSYishai Hadas 		goto end;
183380c4b92aSYishai Hadas 
183480c4b92aSYishai Hadas 	if (copy_to_user(arg, &control, sizeof(control))) {
183580c4b92aSYishai Hadas 		ret = -EFAULT;
183680c4b92aSYishai Hadas 		device->log_ops->log_stop(device);
183780c4b92aSYishai Hadas 	}
183880c4b92aSYishai Hadas 
183980c4b92aSYishai Hadas end:
184080c4b92aSYishai Hadas 	kfree(nodes);
184180c4b92aSYishai Hadas 	return ret;
184280c4b92aSYishai Hadas }
184380c4b92aSYishai Hadas 
184480c4b92aSYishai Hadas static int
184580c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
184680c4b92aSYishai Hadas 				       u32 flags, void __user *arg,
184780c4b92aSYishai Hadas 				       size_t argsz)
184880c4b92aSYishai Hadas {
184980c4b92aSYishai Hadas 	int ret;
185080c4b92aSYishai Hadas 
185180c4b92aSYishai Hadas 	if (!device->log_ops)
185280c4b92aSYishai Hadas 		return -ENOTTY;
185380c4b92aSYishai Hadas 
185480c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
185580c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_SET, 0);
185680c4b92aSYishai Hadas 	if (ret != 1)
185780c4b92aSYishai Hadas 		return ret;
185880c4b92aSYishai Hadas 
185980c4b92aSYishai Hadas 	return device->log_ops->log_stop(device);
186080c4b92aSYishai Hadas }
186180c4b92aSYishai Hadas 
186280c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
186380c4b92aSYishai Hadas 					  unsigned long iova, size_t length,
186480c4b92aSYishai Hadas 					  void *opaque)
186580c4b92aSYishai Hadas {
186680c4b92aSYishai Hadas 	struct vfio_device *device = opaque;
186780c4b92aSYishai Hadas 
186880c4b92aSYishai Hadas 	return device->log_ops->log_read_and_clear(device, iova, length, iter);
186980c4b92aSYishai Hadas }
187080c4b92aSYishai Hadas 
187180c4b92aSYishai Hadas static int
187280c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
187380c4b92aSYishai Hadas 					 u32 flags, void __user *arg,
187480c4b92aSYishai Hadas 					 size_t argsz)
187580c4b92aSYishai Hadas {
187680c4b92aSYishai Hadas 	size_t minsz =
187780c4b92aSYishai Hadas 		offsetofend(struct vfio_device_feature_dma_logging_report,
187880c4b92aSYishai Hadas 			    bitmap);
187980c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_report report;
188080c4b92aSYishai Hadas 	struct iova_bitmap *iter;
188180c4b92aSYishai Hadas 	u64 iova_end;
188280c4b92aSYishai Hadas 	int ret;
188380c4b92aSYishai Hadas 
188480c4b92aSYishai Hadas 	if (!device->log_ops)
188580c4b92aSYishai Hadas 		return -ENOTTY;
188680c4b92aSYishai Hadas 
188780c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
188880c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_GET,
188980c4b92aSYishai Hadas 				 sizeof(report));
189080c4b92aSYishai Hadas 	if (ret != 1)
189180c4b92aSYishai Hadas 		return ret;
189280c4b92aSYishai Hadas 
189380c4b92aSYishai Hadas 	if (copy_from_user(&report, arg, minsz))
189480c4b92aSYishai Hadas 		return -EFAULT;
189580c4b92aSYishai Hadas 
189680c4b92aSYishai Hadas 	if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
189780c4b92aSYishai Hadas 		return -EINVAL;
189880c4b92aSYishai Hadas 
189980c4b92aSYishai Hadas 	if (check_add_overflow(report.iova, report.length, &iova_end) ||
190080c4b92aSYishai Hadas 	    iova_end > ULONG_MAX)
190180c4b92aSYishai Hadas 		return -EOVERFLOW;
190280c4b92aSYishai Hadas 
190380c4b92aSYishai Hadas 	iter = iova_bitmap_alloc(report.iova, report.length,
190480c4b92aSYishai Hadas 				 report.page_size,
190580c4b92aSYishai Hadas 				 u64_to_user_ptr(report.bitmap));
190680c4b92aSYishai Hadas 	if (IS_ERR(iter))
190780c4b92aSYishai Hadas 		return PTR_ERR(iter);
190880c4b92aSYishai Hadas 
190980c4b92aSYishai Hadas 	ret = iova_bitmap_for_each(iter, device,
191080c4b92aSYishai Hadas 				   vfio_device_log_read_and_clear);
191180c4b92aSYishai Hadas 
191280c4b92aSYishai Hadas 	iova_bitmap_free(iter);
191380c4b92aSYishai Hadas 	return ret;
191480c4b92aSYishai Hadas }
191580c4b92aSYishai Hadas 
19160f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
19170f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
19180f3e72b5SJason Gunthorpe {
19190f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
19200f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
19210f3e72b5SJason Gunthorpe 
19220f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
19230f3e72b5SJason Gunthorpe 		return -EFAULT;
19240f3e72b5SJason Gunthorpe 
19250f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
19260f3e72b5SJason Gunthorpe 		return -EINVAL;
19270f3e72b5SJason Gunthorpe 
19280f3e72b5SJason Gunthorpe 	/* Check unknown flags */
19290f3e72b5SJason Gunthorpe 	if (feature.flags &
19300f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
19310f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
19320f3e72b5SJason Gunthorpe 		return -EINVAL;
19330f3e72b5SJason Gunthorpe 
19340f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
19350f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
19360f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
19370f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
19380f3e72b5SJason Gunthorpe 		return -EINVAL;
19390f3e72b5SJason Gunthorpe 
19400f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
19410f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
19420f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
19430f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
19440f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
19450f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
19460f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
19470f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
19480f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
194980c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
195080c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_start(
195180c4b92aSYishai Hadas 			device, feature.flags, arg->data,
195280c4b92aSYishai Hadas 			feature.argsz - minsz);
195380c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
195480c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_stop(
195580c4b92aSYishai Hadas 			device, feature.flags, arg->data,
195680c4b92aSYishai Hadas 			feature.argsz - minsz);
195780c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
195880c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_report(
195980c4b92aSYishai Hadas 			device, feature.flags, arg->data,
196080c4b92aSYishai Hadas 			feature.argsz - minsz);
19610f3e72b5SJason Gunthorpe 	default:
19620f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
19630f3e72b5SJason Gunthorpe 			return -EINVAL;
19640f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
19650f3e72b5SJason Gunthorpe 						   arg->data,
19660f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
19670f3e72b5SJason Gunthorpe 	}
19680f3e72b5SJason Gunthorpe }
19690f3e72b5SJason Gunthorpe 
19700f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
19710f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
19720f3e72b5SJason Gunthorpe {
19730f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
19748e5c6995SAbhishek Sahu 	int ret;
19758e5c6995SAbhishek Sahu 
19768e5c6995SAbhishek Sahu 	ret = vfio_device_pm_runtime_get(device);
19778e5c6995SAbhishek Sahu 	if (ret)
19788e5c6995SAbhishek Sahu 		return ret;
19790f3e72b5SJason Gunthorpe 
19800f3e72b5SJason Gunthorpe 	switch (cmd) {
19810f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
19828e5c6995SAbhishek Sahu 		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
19838e5c6995SAbhishek Sahu 		break;
19848e5c6995SAbhishek Sahu 
19850f3e72b5SJason Gunthorpe 	default:
19860f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
19878e5c6995SAbhishek Sahu 			ret = -EINVAL;
19888e5c6995SAbhishek Sahu 		else
19898e5c6995SAbhishek Sahu 			ret = device->ops->ioctl(device, cmd, arg);
19908e5c6995SAbhishek Sahu 		break;
19910f3e72b5SJason Gunthorpe 	}
19928e5c6995SAbhishek Sahu 
19938e5c6995SAbhishek Sahu 	vfio_device_pm_runtime_put(device);
19948e5c6995SAbhishek Sahu 	return ret;
19950f3e72b5SJason Gunthorpe }
19960f3e72b5SJason Gunthorpe 
19970f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
19980f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
19990f3e72b5SJason Gunthorpe {
20000f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20010f3e72b5SJason Gunthorpe 
20020f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
20030f3e72b5SJason Gunthorpe 		return -EINVAL;
20040f3e72b5SJason Gunthorpe 
20050f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
20060f3e72b5SJason Gunthorpe }
20070f3e72b5SJason Gunthorpe 
20080f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
20090f3e72b5SJason Gunthorpe 				      const char __user *buf,
20100f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
20110f3e72b5SJason Gunthorpe {
20120f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20130f3e72b5SJason Gunthorpe 
20140f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
20150f3e72b5SJason Gunthorpe 		return -EINVAL;
20160f3e72b5SJason Gunthorpe 
20170f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
20180f3e72b5SJason Gunthorpe }
20190f3e72b5SJason Gunthorpe 
20200f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
20210f3e72b5SJason Gunthorpe {
20220f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
20230f3e72b5SJason Gunthorpe 
20240f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
20250f3e72b5SJason Gunthorpe 		return -EINVAL;
20260f3e72b5SJason Gunthorpe 
20270f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
20280f3e72b5SJason Gunthorpe }
20290f3e72b5SJason Gunthorpe 
20300f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = {
20310f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
20320f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
20330f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
20340f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
20350f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
20360f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
20370f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
20380f3e72b5SJason Gunthorpe };
20390f3e72b5SJason Gunthorpe 
20400f3e72b5SJason Gunthorpe /**
20410f3e72b5SJason Gunthorpe  * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
20420f3e72b5SJason Gunthorpe  * @file: VFIO group file
20430f3e72b5SJason Gunthorpe  *
20440f3e72b5SJason Gunthorpe  * The returned iommu_group is valid as long as a ref is held on the file.
20450f3e72b5SJason Gunthorpe  */
20460f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file)
20470f3e72b5SJason Gunthorpe {
20480f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
20490f3e72b5SJason Gunthorpe 
20500f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
20510f3e72b5SJason Gunthorpe 		return NULL;
20520f3e72b5SJason Gunthorpe 	return group->iommu_group;
20530f3e72b5SJason Gunthorpe }
20540f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
20550f3e72b5SJason Gunthorpe 
20560f3e72b5SJason Gunthorpe /**
20570f3e72b5SJason Gunthorpe  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
20580f3e72b5SJason Gunthorpe  *        is always CPU cache coherent
20590f3e72b5SJason Gunthorpe  * @file: VFIO group file
20600f3e72b5SJason Gunthorpe  *
20610f3e72b5SJason Gunthorpe  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
20620f3e72b5SJason Gunthorpe  * bit in DMA transactions. A return of false indicates that the user has
20630f3e72b5SJason Gunthorpe  * rights to access additional instructions such as wbinvd on x86.
20640f3e72b5SJason Gunthorpe  */
20650f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file)
20660f3e72b5SJason Gunthorpe {
20670f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
20680f3e72b5SJason Gunthorpe 	bool ret;
20690f3e72b5SJason Gunthorpe 
20700f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
20710f3e72b5SJason Gunthorpe 		return true;
20720f3e72b5SJason Gunthorpe 
20730f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
20740f3e72b5SJason Gunthorpe 	if (group->container) {
20750f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(group->container,
20760f3e72b5SJason Gunthorpe 						 VFIO_DMA_CC_IOMMU);
20770f3e72b5SJason Gunthorpe 	} else {
20780f3e72b5SJason Gunthorpe 		/*
20790f3e72b5SJason Gunthorpe 		 * Since the coherency state is determined only once a container
20800f3e72b5SJason Gunthorpe 		 * is attached the user must do so before they can prove they
20810f3e72b5SJason Gunthorpe 		 * have permission.
20820f3e72b5SJason Gunthorpe 		 */
20830f3e72b5SJason Gunthorpe 		ret = true;
20840f3e72b5SJason Gunthorpe 	}
20850f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
20860f3e72b5SJason Gunthorpe 	return ret;
20870f3e72b5SJason Gunthorpe }
20880f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
20890f3e72b5SJason Gunthorpe 
20900f3e72b5SJason Gunthorpe /**
20910f3e72b5SJason Gunthorpe  * vfio_file_set_kvm - Link a kvm with VFIO drivers
20920f3e72b5SJason Gunthorpe  * @file: VFIO group file
20930f3e72b5SJason Gunthorpe  * @kvm: KVM to link
20940f3e72b5SJason Gunthorpe  *
20950f3e72b5SJason Gunthorpe  * When a VFIO device is first opened the KVM will be available in
20960f3e72b5SJason Gunthorpe  * device->kvm if one was associated with the group.
20970f3e72b5SJason Gunthorpe  */
20980f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
20990f3e72b5SJason Gunthorpe {
21000f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
21010f3e72b5SJason Gunthorpe 
21020f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
21030f3e72b5SJason Gunthorpe 		return;
21040f3e72b5SJason Gunthorpe 
21050f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
21060f3e72b5SJason Gunthorpe 	group->kvm = kvm;
21070f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
21080f3e72b5SJason Gunthorpe }
21090f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
21100f3e72b5SJason Gunthorpe 
21110f3e72b5SJason Gunthorpe /**
21120f3e72b5SJason Gunthorpe  * vfio_file_has_dev - True if the VFIO file is a handle for device
21130f3e72b5SJason Gunthorpe  * @file: VFIO file to check
21140f3e72b5SJason Gunthorpe  * @device: Device that must be part of the file
21150f3e72b5SJason Gunthorpe  *
21160f3e72b5SJason Gunthorpe  * Returns true if given file has permission to manipulate the given device.
21170f3e72b5SJason Gunthorpe  */
21180f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
21190f3e72b5SJason Gunthorpe {
21200f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
21210f3e72b5SJason Gunthorpe 
21220f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
21230f3e72b5SJason Gunthorpe 		return false;
21240f3e72b5SJason Gunthorpe 
21250f3e72b5SJason Gunthorpe 	return group == device->group;
21260f3e72b5SJason Gunthorpe }
21270f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev);
21280f3e72b5SJason Gunthorpe 
21290f3e72b5SJason Gunthorpe /*
21300f3e72b5SJason Gunthorpe  * Sub-module support
21310f3e72b5SJason Gunthorpe  */
21320f3e72b5SJason Gunthorpe /*
21330f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
21340f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
21350f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
21360f3e72b5SJason Gunthorpe  *
21370f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
21380f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
21390f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
21400f3e72b5SJason Gunthorpe  */
21410f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
21420f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
21430f3e72b5SJason Gunthorpe {
21440f3e72b5SJason Gunthorpe 	void *buf;
21450f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
21460f3e72b5SJason Gunthorpe 
21470f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
21480f3e72b5SJason Gunthorpe 	if (!buf) {
21490f3e72b5SJason Gunthorpe 		kfree(caps->buf);
21500f3e72b5SJason Gunthorpe 		caps->buf = NULL;
21510f3e72b5SJason Gunthorpe 		caps->size = 0;
21520f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
21530f3e72b5SJason Gunthorpe 	}
21540f3e72b5SJason Gunthorpe 
21550f3e72b5SJason Gunthorpe 	caps->buf = buf;
21560f3e72b5SJason Gunthorpe 	header = buf + caps->size;
21570f3e72b5SJason Gunthorpe 
21580f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
21590f3e72b5SJason Gunthorpe 	memset(header, 0, size);
21600f3e72b5SJason Gunthorpe 
21610f3e72b5SJason Gunthorpe 	header->id = id;
21620f3e72b5SJason Gunthorpe 	header->version = version;
21630f3e72b5SJason Gunthorpe 
21640f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
21650f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
21660f3e72b5SJason Gunthorpe 		; /* nothing */
21670f3e72b5SJason Gunthorpe 
21680f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
21690f3e72b5SJason Gunthorpe 	caps->size += size;
21700f3e72b5SJason Gunthorpe 
21710f3e72b5SJason Gunthorpe 	return header;
21720f3e72b5SJason Gunthorpe }
21730f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
21740f3e72b5SJason Gunthorpe 
21750f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
21760f3e72b5SJason Gunthorpe {
21770f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
21780f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
21790f3e72b5SJason Gunthorpe 
21800f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
21810f3e72b5SJason Gunthorpe 		tmp->next += offset;
21820f3e72b5SJason Gunthorpe }
21830f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
21840f3e72b5SJason Gunthorpe 
21850f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
21860f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
21870f3e72b5SJason Gunthorpe {
21880f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
21890f3e72b5SJason Gunthorpe 
21900f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
21910f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
21920f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
21930f3e72b5SJason Gunthorpe 
21940f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
21950f3e72b5SJason Gunthorpe 
21960f3e72b5SJason Gunthorpe 	return 0;
21970f3e72b5SJason Gunthorpe }
21980f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
21990f3e72b5SJason Gunthorpe 
22000f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
22010f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
22020f3e72b5SJason Gunthorpe {
22030f3e72b5SJason Gunthorpe 	unsigned long minsz;
22040f3e72b5SJason Gunthorpe 	size_t size;
22050f3e72b5SJason Gunthorpe 
22060f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
22070f3e72b5SJason Gunthorpe 
22080f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
22090f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
22100f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
22110f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
22120f3e72b5SJason Gunthorpe 		return -EINVAL;
22130f3e72b5SJason Gunthorpe 
22140f3e72b5SJason Gunthorpe 	if (data_size)
22150f3e72b5SJason Gunthorpe 		*data_size = 0;
22160f3e72b5SJason Gunthorpe 
22170f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
22180f3e72b5SJason Gunthorpe 		return -EINVAL;
22190f3e72b5SJason Gunthorpe 
22200f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
22210f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
22220f3e72b5SJason Gunthorpe 		size = 0;
22230f3e72b5SJason Gunthorpe 		break;
22240f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
22250f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
22260f3e72b5SJason Gunthorpe 		break;
22270f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
22280f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
22290f3e72b5SJason Gunthorpe 		break;
22300f3e72b5SJason Gunthorpe 	default:
22310f3e72b5SJason Gunthorpe 		return -EINVAL;
22320f3e72b5SJason Gunthorpe 	}
22330f3e72b5SJason Gunthorpe 
22340f3e72b5SJason Gunthorpe 	if (size) {
22350f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
22360f3e72b5SJason Gunthorpe 			return -EINVAL;
22370f3e72b5SJason Gunthorpe 
22380f3e72b5SJason Gunthorpe 		if (!data_size)
22390f3e72b5SJason Gunthorpe 			return -EINVAL;
22400f3e72b5SJason Gunthorpe 
22410f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
22420f3e72b5SJason Gunthorpe 	}
22430f3e72b5SJason Gunthorpe 
22440f3e72b5SJason Gunthorpe 	return 0;
22450f3e72b5SJason Gunthorpe }
22460f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
22470f3e72b5SJason Gunthorpe 
22480f3e72b5SJason Gunthorpe /*
22490f3e72b5SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
22500f3e72b5SJason Gunthorpe  * domain only.
22510f3e72b5SJason Gunthorpe  * @device [in]  : device
22520f3e72b5SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
22530f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
22540f3e72b5SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
22550f3e72b5SJason Gunthorpe  * @prot [in]    : protection flags
22560f3e72b5SJason Gunthorpe  * @pages[out]   : array of host pages
22570f3e72b5SJason Gunthorpe  * Return error or number of pages pinned.
225821c13829SJason Gunthorpe  *
225921c13829SJason Gunthorpe  * A driver may only call this function if the vfio_device was created
226021c13829SJason Gunthorpe  * by vfio_register_emulated_iommu_dev().
22610f3e72b5SJason Gunthorpe  */
22620f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
22630f3e72b5SJason Gunthorpe 		   int npage, int prot, struct page **pages)
22640f3e72b5SJason Gunthorpe {
22650f3e72b5SJason Gunthorpe 	struct vfio_container *container;
22660f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
22670f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
22680f3e72b5SJason Gunthorpe 	int ret;
22690f3e72b5SJason Gunthorpe 
22700f3e72b5SJason Gunthorpe 	if (!pages || !npage || !vfio_assert_device_open(device))
22710f3e72b5SJason Gunthorpe 		return -EINVAL;
22720f3e72b5SJason Gunthorpe 
22730f3e72b5SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
22740f3e72b5SJason Gunthorpe 		return -E2BIG;
22750f3e72b5SJason Gunthorpe 
22760f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
22770f3e72b5SJason Gunthorpe 	container = group->container;
22780f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
22790f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->pin_pages))
22800f3e72b5SJason Gunthorpe 		ret = driver->ops->pin_pages(container->iommu_data,
22810f3e72b5SJason Gunthorpe 					     group->iommu_group, iova,
22820f3e72b5SJason Gunthorpe 					     npage, prot, pages);
22830f3e72b5SJason Gunthorpe 	else
22840f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
22850f3e72b5SJason Gunthorpe 
22860f3e72b5SJason Gunthorpe 	return ret;
22870f3e72b5SJason Gunthorpe }
22880f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
22890f3e72b5SJason Gunthorpe 
22900f3e72b5SJason Gunthorpe /*
22910f3e72b5SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
22920f3e72b5SJason Gunthorpe  * @device [in]  : device
22930f3e72b5SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
22940f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
22950f3e72b5SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
22960f3e72b5SJason Gunthorpe  */
22970f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
22980f3e72b5SJason Gunthorpe {
22990f3e72b5SJason Gunthorpe 	struct vfio_container *container;
23000f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
23010f3e72b5SJason Gunthorpe 
23020f3e72b5SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
23030f3e72b5SJason Gunthorpe 		return;
23040f3e72b5SJason Gunthorpe 
23050f3e72b5SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
23060f3e72b5SJason Gunthorpe 		return;
23070f3e72b5SJason Gunthorpe 
23080f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
23090f3e72b5SJason Gunthorpe 	container = device->group->container;
23100f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
23110f3e72b5SJason Gunthorpe 
23120f3e72b5SJason Gunthorpe 	driver->ops->unpin_pages(container->iommu_data, iova, npage);
23130f3e72b5SJason Gunthorpe }
23140f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
23150f3e72b5SJason Gunthorpe 
23160f3e72b5SJason Gunthorpe /*
23170f3e72b5SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
23180f3e72b5SJason Gunthorpe  * behalf of the device.
23190f3e72b5SJason Gunthorpe  *
23200f3e72b5SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
23210f3e72b5SJason Gunthorpe  * into/from a kernel buffer.
23220f3e72b5SJason Gunthorpe  *
23230f3e72b5SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
23240f3e72b5SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
23250f3e72b5SJason Gunthorpe  *
23260f3e72b5SJason Gunthorpe  * @device [in]		: VFIO device
23270f3e72b5SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
23280f3e72b5SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
23290f3e72b5SJason Gunthorpe  * @len [in]		: kernel buffer length
23300f3e72b5SJason Gunthorpe  * @write		: indicate read or write
23310f3e72b5SJason Gunthorpe  * Return error code on failure or 0 on success.
23320f3e72b5SJason Gunthorpe  */
23330f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
23340f3e72b5SJason Gunthorpe 		size_t len, bool write)
23350f3e72b5SJason Gunthorpe {
23360f3e72b5SJason Gunthorpe 	struct vfio_container *container;
23370f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
23380f3e72b5SJason Gunthorpe 	int ret = 0;
23390f3e72b5SJason Gunthorpe 
23400f3e72b5SJason Gunthorpe 	if (!data || len <= 0 || !vfio_assert_device_open(device))
23410f3e72b5SJason Gunthorpe 		return -EINVAL;
23420f3e72b5SJason Gunthorpe 
23430f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
23440f3e72b5SJason Gunthorpe 	container = device->group->container;
23450f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
23460f3e72b5SJason Gunthorpe 
23470f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->dma_rw))
23480f3e72b5SJason Gunthorpe 		ret = driver->ops->dma_rw(container->iommu_data,
23490f3e72b5SJason Gunthorpe 					  iova, data, len, write);
23500f3e72b5SJason Gunthorpe 	else
23510f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
23520f3e72b5SJason Gunthorpe 	return ret;
23530f3e72b5SJason Gunthorpe }
23540f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
23550f3e72b5SJason Gunthorpe 
23560f3e72b5SJason Gunthorpe /*
23570f3e72b5SJason Gunthorpe  * Module/class support
23580f3e72b5SJason Gunthorpe  */
23590f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode)
23600f3e72b5SJason Gunthorpe {
23610f3e72b5SJason Gunthorpe 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
23620f3e72b5SJason Gunthorpe }
23630f3e72b5SJason Gunthorpe 
23640f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = {
23650f3e72b5SJason Gunthorpe 	.minor = VFIO_MINOR,
23660f3e72b5SJason Gunthorpe 	.name = "vfio",
23670f3e72b5SJason Gunthorpe 	.fops = &vfio_fops,
23680f3e72b5SJason Gunthorpe 	.nodename = "vfio/vfio",
23690f3e72b5SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
23700f3e72b5SJason Gunthorpe };
23710f3e72b5SJason Gunthorpe 
23720f3e72b5SJason Gunthorpe static int __init vfio_init(void)
23730f3e72b5SJason Gunthorpe {
23740f3e72b5SJason Gunthorpe 	int ret;
23750f3e72b5SJason Gunthorpe 
23760f3e72b5SJason Gunthorpe 	ida_init(&vfio.group_ida);
23770f3e72b5SJason Gunthorpe 	mutex_init(&vfio.group_lock);
23780f3e72b5SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
23790f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.group_list);
23800f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
23810f3e72b5SJason Gunthorpe 
23820f3e72b5SJason Gunthorpe 	ret = misc_register(&vfio_dev);
23830f3e72b5SJason Gunthorpe 	if (ret) {
23840f3e72b5SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
23850f3e72b5SJason Gunthorpe 		return ret;
23860f3e72b5SJason Gunthorpe 	}
23870f3e72b5SJason Gunthorpe 
23880f3e72b5SJason Gunthorpe 	/* /dev/vfio/$GROUP */
23890f3e72b5SJason Gunthorpe 	vfio.class = class_create(THIS_MODULE, "vfio");
23900f3e72b5SJason Gunthorpe 	if (IS_ERR(vfio.class)) {
23910f3e72b5SJason Gunthorpe 		ret = PTR_ERR(vfio.class);
23920f3e72b5SJason Gunthorpe 		goto err_class;
23930f3e72b5SJason Gunthorpe 	}
23940f3e72b5SJason Gunthorpe 
23950f3e72b5SJason Gunthorpe 	vfio.class->devnode = vfio_devnode;
23960f3e72b5SJason Gunthorpe 
23970f3e72b5SJason Gunthorpe 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
23980f3e72b5SJason Gunthorpe 	if (ret)
23990f3e72b5SJason Gunthorpe 		goto err_alloc_chrdev;
24000f3e72b5SJason Gunthorpe 
24010f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
24020f3e72b5SJason Gunthorpe 	ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
24030f3e72b5SJason Gunthorpe #endif
24040f3e72b5SJason Gunthorpe 	if (ret)
24050f3e72b5SJason Gunthorpe 		goto err_driver_register;
24060f3e72b5SJason Gunthorpe 
24070f3e72b5SJason Gunthorpe 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
24080f3e72b5SJason Gunthorpe 	return 0;
24090f3e72b5SJason Gunthorpe 
24100f3e72b5SJason Gunthorpe err_driver_register:
24110f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
24120f3e72b5SJason Gunthorpe err_alloc_chrdev:
24130f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
24140f3e72b5SJason Gunthorpe 	vfio.class = NULL;
24150f3e72b5SJason Gunthorpe err_class:
24160f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
24170f3e72b5SJason Gunthorpe 	return ret;
24180f3e72b5SJason Gunthorpe }
24190f3e72b5SJason Gunthorpe 
24200f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void)
24210f3e72b5SJason Gunthorpe {
24220f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&vfio.group_list));
24230f3e72b5SJason Gunthorpe 
24240f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
24250f3e72b5SJason Gunthorpe 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
24260f3e72b5SJason Gunthorpe #endif
24270f3e72b5SJason Gunthorpe 	ida_destroy(&vfio.group_ida);
24280f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
24290f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
24300f3e72b5SJason Gunthorpe 	vfio.class = NULL;
24310f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
24320f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
24330f3e72b5SJason Gunthorpe }
24340f3e72b5SJason Gunthorpe 
24350f3e72b5SJason Gunthorpe module_init(vfio_init);
24360f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
24370f3e72b5SJason Gunthorpe 
24380f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
24390f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2");
24400f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR);
24410f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC);
24420f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
24430f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
24440f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2445