xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision a881b496)
10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
20f3e72b5SJason Gunthorpe /*
30f3e72b5SJason Gunthorpe  * VFIO core
40f3e72b5SJason Gunthorpe  *
50f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
60f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
70f3e72b5SJason Gunthorpe  *
80f3e72b5SJason Gunthorpe  * Derived from original vfio:
90f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
100f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
110f3e72b5SJason Gunthorpe  */
120f3e72b5SJason Gunthorpe 
130f3e72b5SJason Gunthorpe #include <linux/cdev.h>
140f3e72b5SJason Gunthorpe #include <linux/compat.h>
150f3e72b5SJason Gunthorpe #include <linux/device.h>
160f3e72b5SJason Gunthorpe #include <linux/fs.h>
170f3e72b5SJason Gunthorpe #include <linux/idr.h>
180f3e72b5SJason Gunthorpe #include <linux/iommu.h>
192b48f52fSMatthew Rosato #ifdef CONFIG_HAVE_KVM
202b48f52fSMatthew Rosato #include <linux/kvm_host.h>
212b48f52fSMatthew Rosato #endif
220f3e72b5SJason Gunthorpe #include <linux/list.h>
230f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
240f3e72b5SJason Gunthorpe #include <linux/module.h>
250f3e72b5SJason Gunthorpe #include <linux/mutex.h>
260f3e72b5SJason Gunthorpe #include <linux/pci.h>
270f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
280f3e72b5SJason Gunthorpe #include <linux/sched.h>
290f3e72b5SJason Gunthorpe #include <linux/slab.h>
300f3e72b5SJason Gunthorpe #include <linux/stat.h>
310f3e72b5SJason Gunthorpe #include <linux/string.h>
320f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
330f3e72b5SJason Gunthorpe #include <linux/vfio.h>
340f3e72b5SJason Gunthorpe #include <linux/wait.h>
350f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
368e5c6995SAbhishek Sahu #include <linux/pm_runtime.h>
3780c4b92aSYishai Hadas #include <linux/interval_tree.h>
3880c4b92aSYishai Hadas #include <linux/iova_bitmap.h>
392a3dab19SJason Gunthorpe #include <linux/iommufd.h>
400f3e72b5SJason Gunthorpe #include "vfio.h"
410f3e72b5SJason Gunthorpe 
420f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
430f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
440f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
450f3e72b5SJason Gunthorpe 
460f3e72b5SJason Gunthorpe static struct vfio {
473c28a761SYi Liu 	struct class			*device_class;
483c28a761SYi Liu 	struct ida			device_ida;
490f3e72b5SJason Gunthorpe } vfio;
500f3e72b5SJason Gunthorpe 
51c9a397ceSJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
52c9a397ceSJason Gunthorpe bool vfio_noiommu __read_mostly;
53c9a397ceSJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
54c9a397ceSJason Gunthorpe 		   vfio_noiommu, bool, S_IRUGO | S_IWUSR);
55c9a397ceSJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
56c9a397ceSJason Gunthorpe #endif
57c9a397ceSJason Gunthorpe 
580f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
590f3e72b5SJason Gunthorpe 
vfio_assign_device_set(struct vfio_device * device,void * set_id)600f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
610f3e72b5SJason Gunthorpe {
620f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
630f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
640f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
650f3e72b5SJason Gunthorpe 
660f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
670f3e72b5SJason Gunthorpe 		return -EINVAL;
680f3e72b5SJason Gunthorpe 
690f3e72b5SJason Gunthorpe 	/*
700f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
710f3e72b5SJason Gunthorpe 	 */
720f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
730f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
740f3e72b5SJason Gunthorpe 	if (dev_set)
750f3e72b5SJason Gunthorpe 		goto found_get_ref;
760f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
770f3e72b5SJason Gunthorpe 
780f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
790f3e72b5SJason Gunthorpe 	if (!new_dev_set)
800f3e72b5SJason Gunthorpe 		return -ENOMEM;
810f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
820f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
830f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
840f3e72b5SJason Gunthorpe 
850f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
860f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
870f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
880f3e72b5SJason Gunthorpe 	if (!dev_set) {
890f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
900f3e72b5SJason Gunthorpe 		goto found_get_ref;
910f3e72b5SJason Gunthorpe 	}
920f3e72b5SJason Gunthorpe 
930f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
940f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
950f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
960f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
970f3e72b5SJason Gunthorpe 	}
980f3e72b5SJason Gunthorpe 
990f3e72b5SJason Gunthorpe found_get_ref:
1000f3e72b5SJason Gunthorpe 	dev_set->device_count++;
1010f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1020f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1030f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
1040f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
1050f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1060f3e72b5SJason Gunthorpe 	return 0;
1070f3e72b5SJason Gunthorpe }
1080f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
1090f3e72b5SJason Gunthorpe 
vfio_release_device_set(struct vfio_device * device)1100f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
1110f3e72b5SJason Gunthorpe {
1120f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
1130f3e72b5SJason Gunthorpe 
1140f3e72b5SJason Gunthorpe 	if (!dev_set)
1150f3e72b5SJason Gunthorpe 		return;
1160f3e72b5SJason Gunthorpe 
1170f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
1180f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
1190f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
1200f3e72b5SJason Gunthorpe 
1210f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
1220f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
1230f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
1240f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
1250f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
1260f3e72b5SJason Gunthorpe 		kfree(dev_set);
1270f3e72b5SJason Gunthorpe 	}
1280f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
1290f3e72b5SJason Gunthorpe }
1300f3e72b5SJason Gunthorpe 
vfio_device_set_open_count(struct vfio_device_set * dev_set)1315cd189e4SAnthony DeRossi unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
1325cd189e4SAnthony DeRossi {
1335cd189e4SAnthony DeRossi 	struct vfio_device *cur;
1345cd189e4SAnthony DeRossi 	unsigned int open_count = 0;
1355cd189e4SAnthony DeRossi 
1365cd189e4SAnthony DeRossi 	lockdep_assert_held(&dev_set->lock);
1375cd189e4SAnthony DeRossi 
1385cd189e4SAnthony DeRossi 	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
1395cd189e4SAnthony DeRossi 		open_count += cur->open_count;
1405cd189e4SAnthony DeRossi 	return open_count;
1415cd189e4SAnthony DeRossi }
1425cd189e4SAnthony DeRossi EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
1435cd189e4SAnthony DeRossi 
144a80e1de9SYi Liu struct vfio_device *
vfio_find_device_in_devset(struct vfio_device_set * dev_set,struct device * dev)145a80e1de9SYi Liu vfio_find_device_in_devset(struct vfio_device_set *dev_set,
146a80e1de9SYi Liu 			   struct device *dev)
147a80e1de9SYi Liu {
148a80e1de9SYi Liu 	struct vfio_device *cur;
149a80e1de9SYi Liu 
150a80e1de9SYi Liu 	lockdep_assert_held(&dev_set->lock);
151a80e1de9SYi Liu 
152a80e1de9SYi Liu 	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
153a80e1de9SYi Liu 		if (cur->dev == dev)
154a80e1de9SYi Liu 			return cur;
155a80e1de9SYi Liu 	return NULL;
156a80e1de9SYi Liu }
157a80e1de9SYi Liu EXPORT_SYMBOL_GPL(vfio_find_device_in_devset);
158a80e1de9SYi Liu 
1590f3e72b5SJason Gunthorpe /*
1600f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
1610f3e72b5SJason Gunthorpe  */
1620f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
vfio_device_put_registration(struct vfio_device * device)1639eefba80SYi Liu void vfio_device_put_registration(struct vfio_device *device)
1640f3e72b5SJason Gunthorpe {
1650f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
1660f3e72b5SJason Gunthorpe 		complete(&device->comp);
1670f3e72b5SJason Gunthorpe }
1680f3e72b5SJason Gunthorpe 
vfio_device_try_get_registration(struct vfio_device * device)1699eefba80SYi Liu bool vfio_device_try_get_registration(struct vfio_device *device)
1700f3e72b5SJason Gunthorpe {
1710f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
1720f3e72b5SJason Gunthorpe }
1730f3e72b5SJason Gunthorpe 
1740f3e72b5SJason Gunthorpe /*
1750f3e72b5SJason Gunthorpe  * VFIO driver API
1760f3e72b5SJason Gunthorpe  */
177cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */
vfio_device_release(struct device * dev)1783c28a761SYi Liu static void vfio_device_release(struct device *dev)
179cb9ff3f3SKevin Tian {
180cb9ff3f3SKevin Tian 	struct vfio_device *device =
1813c28a761SYi Liu 			container_of(dev, struct vfio_device, device);
182cb9ff3f3SKevin Tian 
183ebb72b76SKevin Tian 	vfio_release_device_set(device);
1843c28a761SYi Liu 	ida_free(&vfio.device_ida, device->index);
185cb9ff3f3SKevin Tian 
186913447d0SEric Farman 	if (device->ops->release)
187cb9ff3f3SKevin Tian 		device->ops->release(device);
188913447d0SEric Farman 
189913447d0SEric Farman 	kvfree(device);
190cb9ff3f3SKevin Tian }
191cb9ff3f3SKevin Tian 
192d1104f93SEric Farman static int vfio_init_device(struct vfio_device *device, struct device *dev,
193d1104f93SEric Farman 			    const struct vfio_device_ops *ops);
194d1104f93SEric Farman 
195cb9ff3f3SKevin Tian /*
196cb9ff3f3SKevin Tian  * Allocate and initialize vfio_device so it can be registered to vfio
197cb9ff3f3SKevin Tian  * core.
198cb9ff3f3SKevin Tian  *
199cb9ff3f3SKevin Tian  * Drivers should use the wrapper vfio_alloc_device() for allocation.
200cb9ff3f3SKevin Tian  * @size is the size of the structure to be allocated, including any
201cb9ff3f3SKevin Tian  * private data used by the driver.
202cb9ff3f3SKevin Tian  *
203cb9ff3f3SKevin Tian  * Driver may provide an @init callback to cover device private data.
204cb9ff3f3SKevin Tian  *
205cb9ff3f3SKevin Tian  * Use vfio_put_device() to release the structure after success return.
206cb9ff3f3SKevin Tian  */
_vfio_alloc_device(size_t size,struct device * dev,const struct vfio_device_ops * ops)207cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
208cb9ff3f3SKevin Tian 				       const struct vfio_device_ops *ops)
209cb9ff3f3SKevin Tian {
210cb9ff3f3SKevin Tian 	struct vfio_device *device;
211cb9ff3f3SKevin Tian 	int ret;
212cb9ff3f3SKevin Tian 
213cb9ff3f3SKevin Tian 	if (WARN_ON(size < sizeof(struct vfio_device)))
214cb9ff3f3SKevin Tian 		return ERR_PTR(-EINVAL);
215cb9ff3f3SKevin Tian 
216cb9ff3f3SKevin Tian 	device = kvzalloc(size, GFP_KERNEL);
217cb9ff3f3SKevin Tian 	if (!device)
218cb9ff3f3SKevin Tian 		return ERR_PTR(-ENOMEM);
219cb9ff3f3SKevin Tian 
220cb9ff3f3SKevin Tian 	ret = vfio_init_device(device, dev, ops);
221cb9ff3f3SKevin Tian 	if (ret)
222cb9ff3f3SKevin Tian 		goto out_free;
223cb9ff3f3SKevin Tian 	return device;
224cb9ff3f3SKevin Tian 
225cb9ff3f3SKevin Tian out_free:
226cb9ff3f3SKevin Tian 	kvfree(device);
227cb9ff3f3SKevin Tian 	return ERR_PTR(ret);
228cb9ff3f3SKevin Tian }
229cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device);
230cb9ff3f3SKevin Tian 
231cb9ff3f3SKevin Tian /*
232cb9ff3f3SKevin Tian  * Initialize a vfio_device so it can be registered to vfio core.
233cb9ff3f3SKevin Tian  */
vfio_init_device(struct vfio_device * device,struct device * dev,const struct vfio_device_ops * ops)234d1104f93SEric Farman static int vfio_init_device(struct vfio_device *device, struct device *dev,
235cb9ff3f3SKevin Tian 			    const struct vfio_device_ops *ops)
236cb9ff3f3SKevin Tian {
237cb9ff3f3SKevin Tian 	int ret;
238cb9ff3f3SKevin Tian 
2393c28a761SYi Liu 	ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
2403c28a761SYi Liu 	if (ret < 0) {
2413c28a761SYi Liu 		dev_dbg(dev, "Error to alloc index\n");
2423c28a761SYi Liu 		return ret;
2433c28a761SYi Liu 	}
2443c28a761SYi Liu 
2453c28a761SYi Liu 	device->index = ret;
246ebb72b76SKevin Tian 	init_completion(&device->comp);
247ebb72b76SKevin Tian 	device->dev = dev;
248ebb72b76SKevin Tian 	device->ops = ops;
249cb9ff3f3SKevin Tian 
250cb9ff3f3SKevin Tian 	if (ops->init) {
251cb9ff3f3SKevin Tian 		ret = ops->init(device);
252cb9ff3f3SKevin Tian 		if (ret)
253cb9ff3f3SKevin Tian 			goto out_uninit;
254cb9ff3f3SKevin Tian 	}
255cb9ff3f3SKevin Tian 
2563c28a761SYi Liu 	device_initialize(&device->device);
2573c28a761SYi Liu 	device->device.release = vfio_device_release;
2583c28a761SYi Liu 	device->device.class = vfio.device_class;
2593c28a761SYi Liu 	device->device.parent = device->dev;
260cb9ff3f3SKevin Tian 	return 0;
261cb9ff3f3SKevin Tian 
262cb9ff3f3SKevin Tian out_uninit:
263ebb72b76SKevin Tian 	vfio_release_device_set(device);
2643c28a761SYi Liu 	ida_free(&vfio.device_ida, device->index);
265cb9ff3f3SKevin Tian 	return ret;
266cb9ff3f3SKevin Tian }
267cb9ff3f3SKevin Tian 
__vfio_register_dev(struct vfio_device * device,enum vfio_group_type type)26849ea02d3SYi Liu static int __vfio_register_dev(struct vfio_device *device,
26949ea02d3SYi Liu 			       enum vfio_group_type type)
27049ea02d3SYi Liu {
27149ea02d3SYi Liu 	int ret;
27249ea02d3SYi Liu 
2737d12578cSYi Liu 	if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) &&
2747d12578cSYi Liu 		    (!device->ops->bind_iommufd ||
2757d12578cSYi Liu 		     !device->ops->unbind_iommufd ||
2769048c734SYi Liu 		     !device->ops->attach_ioas ||
2779048c734SYi Liu 		     !device->ops->detach_ioas)))
278a4d1f91dSJason Gunthorpe 		return -EINVAL;
279a4d1f91dSJason Gunthorpe 
2800f3e72b5SJason Gunthorpe 	/*
2810f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
2820f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
2830f3e72b5SJason Gunthorpe 	 */
2840f3e72b5SJason Gunthorpe 	if (!device->dev_set)
2850f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
2860f3e72b5SJason Gunthorpe 
2873c28a761SYi Liu 	ret = dev_set_name(&device->device, "vfio%d", device->index);
2883c28a761SYi Liu 	if (ret)
28949ea02d3SYi Liu 		return ret;
29049ea02d3SYi Liu 
29149ea02d3SYi Liu 	ret = vfio_device_set_group(device, type);
29249ea02d3SYi Liu 	if (ret)
29349ea02d3SYi Liu 		return ret;
2943c28a761SYi Liu 
2955398be25SYi Liu 	/*
2965398be25SYi Liu 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
2975398be25SYi Liu 	 * restore cache coherency. It has to be checked here because it is only
2985398be25SYi Liu 	 * valid for cases where we are using iommu groups.
2995398be25SYi Liu 	 */
3005398be25SYi Liu 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
3015398be25SYi Liu 	    !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
3025398be25SYi Liu 		ret = -EINVAL;
3035398be25SYi Liu 		goto err_out;
3045398be25SYi Liu 	}
3055398be25SYi Liu 
3068b6f173aSYi Liu 	ret = vfio_device_add(device);
3073c28a761SYi Liu 	if (ret)
3083c28a761SYi Liu 		goto err_out;
3093c28a761SYi Liu 
3100f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
3110f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
3120f3e72b5SJason Gunthorpe 
31332e09228SYi Liu 	vfio_device_group_register(device);
3140f3e72b5SJason Gunthorpe 
3150f3e72b5SJason Gunthorpe 	return 0;
3163c28a761SYi Liu err_out:
317ca5f21b2SJason Gunthorpe 	vfio_device_remove_group(device);
3183c28a761SYi Liu 	return ret;
3190f3e72b5SJason Gunthorpe }
3200f3e72b5SJason Gunthorpe 
vfio_register_group_dev(struct vfio_device * device)3210f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
3220f3e72b5SJason Gunthorpe {
32349ea02d3SYi Liu 	return __vfio_register_dev(device, VFIO_IOMMU);
3240f3e72b5SJason Gunthorpe }
3250f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
3260f3e72b5SJason Gunthorpe 
3270f3e72b5SJason Gunthorpe /*
3280f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
3290f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
3300f3e72b5SJason Gunthorpe  */
vfio_register_emulated_iommu_dev(struct vfio_device * device)3310f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
3320f3e72b5SJason Gunthorpe {
33349ea02d3SYi Liu 	return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
3340f3e72b5SJason Gunthorpe }
3350f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
3360f3e72b5SJason Gunthorpe 
3370f3e72b5SJason Gunthorpe /*
3380f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
3390f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
vfio_unregister_group_dev(struct vfio_device * device)3400f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
3410f3e72b5SJason Gunthorpe {
3420f3e72b5SJason Gunthorpe 	unsigned int i = 0;
3430f3e72b5SJason Gunthorpe 	bool interrupted = false;
3440f3e72b5SJason Gunthorpe 	long rc;
3450f3e72b5SJason Gunthorpe 
346291872a5SYi Liu 	/*
347291872a5SYi Liu 	 * Prevent new device opened by userspace via the
348291872a5SYi Liu 	 * VFIO_GROUP_GET_DEVICE_FD in the group path.
349291872a5SYi Liu 	 */
350291872a5SYi Liu 	vfio_device_group_unregister(device);
351291872a5SYi Liu 
3528b6f173aSYi Liu 	/*
3538b6f173aSYi Liu 	 * Balances vfio_device_add() in register path, also prevents
3548b6f173aSYi Liu 	 * new device opened by userspace in the cdev path.
3558b6f173aSYi Liu 	 */
3568b6f173aSYi Liu 	vfio_device_del(device);
35738c24544SYi Liu 
3584a725b8dSKevin Tian 	vfio_device_put_registration(device);
3590f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
3600f3e72b5SJason Gunthorpe 	while (rc <= 0) {
3610f3e72b5SJason Gunthorpe 		if (device->ops->request)
3620f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
3630f3e72b5SJason Gunthorpe 
3640f3e72b5SJason Gunthorpe 		if (interrupted) {
3650f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
3660f3e72b5SJason Gunthorpe 							 HZ * 10);
3670f3e72b5SJason Gunthorpe 		} else {
3680f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
3690f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
3700f3e72b5SJason Gunthorpe 			if (rc < 0) {
3710f3e72b5SJason Gunthorpe 				interrupted = true;
3720f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
3730f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
3740f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
3750f3e72b5SJason Gunthorpe 					 "blocked until device is released",
3760f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
3770f3e72b5SJason Gunthorpe 			}
3780f3e72b5SJason Gunthorpe 		}
3790f3e72b5SJason Gunthorpe 	}
3800f3e72b5SJason Gunthorpe 
38149ea02d3SYi Liu 	/* Balances vfio_device_set_group in register path */
382ca5f21b2SJason Gunthorpe 	vfio_device_remove_group(device);
3830f3e72b5SJason Gunthorpe }
3840f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
3850f3e72b5SJason Gunthorpe 
3862b48f52fSMatthew Rosato #ifdef CONFIG_HAVE_KVM
vfio_device_get_kvm_safe(struct vfio_device * device,struct kvm * kvm)3875c6de3eaSYi Liu void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
3882b48f52fSMatthew Rosato {
3892b48f52fSMatthew Rosato 	void (*pfn)(struct kvm *kvm);
3902b48f52fSMatthew Rosato 	bool (*fn)(struct kvm *kvm);
3912b48f52fSMatthew Rosato 	bool ret;
3922b48f52fSMatthew Rosato 
3932b48f52fSMatthew Rosato 	lockdep_assert_held(&device->dev_set->lock);
3942b48f52fSMatthew Rosato 
3955c6de3eaSYi Liu 	if (!kvm)
3965c6de3eaSYi Liu 		return;
3975c6de3eaSYi Liu 
3982b48f52fSMatthew Rosato 	pfn = symbol_get(kvm_put_kvm);
3992b48f52fSMatthew Rosato 	if (WARN_ON(!pfn))
4002b48f52fSMatthew Rosato 		return;
4012b48f52fSMatthew Rosato 
4022b48f52fSMatthew Rosato 	fn = symbol_get(kvm_get_kvm_safe);
4032b48f52fSMatthew Rosato 	if (WARN_ON(!fn)) {
4042b48f52fSMatthew Rosato 		symbol_put(kvm_put_kvm);
4052b48f52fSMatthew Rosato 		return;
4062b48f52fSMatthew Rosato 	}
4072b48f52fSMatthew Rosato 
4082b48f52fSMatthew Rosato 	ret = fn(kvm);
4092b48f52fSMatthew Rosato 	symbol_put(kvm_get_kvm_safe);
4102b48f52fSMatthew Rosato 	if (!ret) {
4112b48f52fSMatthew Rosato 		symbol_put(kvm_put_kvm);
4122b48f52fSMatthew Rosato 		return;
4132b48f52fSMatthew Rosato 	}
4142b48f52fSMatthew Rosato 
4152b48f52fSMatthew Rosato 	device->put_kvm = pfn;
4162b48f52fSMatthew Rosato 	device->kvm = kvm;
4172b48f52fSMatthew Rosato }
4182b48f52fSMatthew Rosato 
vfio_device_put_kvm(struct vfio_device * device)4192b48f52fSMatthew Rosato void vfio_device_put_kvm(struct vfio_device *device)
4202b48f52fSMatthew Rosato {
4212b48f52fSMatthew Rosato 	lockdep_assert_held(&device->dev_set->lock);
4222b48f52fSMatthew Rosato 
4232b48f52fSMatthew Rosato 	if (!device->kvm)
4242b48f52fSMatthew Rosato 		return;
4252b48f52fSMatthew Rosato 
4262b48f52fSMatthew Rosato 	if (WARN_ON(!device->put_kvm))
4272b48f52fSMatthew Rosato 		goto clear;
4282b48f52fSMatthew Rosato 
4292b48f52fSMatthew Rosato 	device->put_kvm(device->kvm);
4302b48f52fSMatthew Rosato 	device->put_kvm = NULL;
4312b48f52fSMatthew Rosato 	symbol_put(kvm_put_kvm);
4322b48f52fSMatthew Rosato 
4332b48f52fSMatthew Rosato clear:
4342b48f52fSMatthew Rosato 	device->kvm = NULL;
4352b48f52fSMatthew Rosato }
4362b48f52fSMatthew Rosato #endif
4372b48f52fSMatthew Rosato 
4380f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
vfio_assert_device_open(struct vfio_device * device)4394741f2e9SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
4400f3e72b5SJason Gunthorpe {
4410f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
4420f3e72b5SJason Gunthorpe }
4430f3e72b5SJason Gunthorpe 
444b1a3b5c6SYi Liu struct vfio_device_file *
vfio_allocate_device_file(struct vfio_device * device)445b1a3b5c6SYi Liu vfio_allocate_device_file(struct vfio_device *device)
446b1a3b5c6SYi Liu {
447b1a3b5c6SYi Liu 	struct vfio_device_file *df;
448b1a3b5c6SYi Liu 
449b1a3b5c6SYi Liu 	df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT);
450b1a3b5c6SYi Liu 	if (!df)
451b1a3b5c6SYi Liu 		return ERR_PTR(-ENOMEM);
452b1a3b5c6SYi Liu 
453b1a3b5c6SYi Liu 	df->device = device;
45434aeeecdSYi Liu 	spin_lock_init(&df->kvm_ref_lock);
455b1a3b5c6SYi Liu 
456b1a3b5c6SYi Liu 	return df;
457b1a3b5c6SYi Liu }
458b1a3b5c6SYi Liu 
vfio_df_device_first_open(struct vfio_device_file * df)45905f37e1cSYi Liu static int vfio_df_device_first_open(struct vfio_device_file *df)
460294aaccbSJason Gunthorpe {
46105f37e1cSYi Liu 	struct vfio_device *device = df->device;
46205f37e1cSYi Liu 	struct iommufd_ctx *iommufd = df->iommufd;
463294aaccbSJason Gunthorpe 	int ret;
464294aaccbSJason Gunthorpe 
465294aaccbSJason Gunthorpe 	lockdep_assert_held(&device->dev_set->lock);
466294aaccbSJason Gunthorpe 
467294aaccbSJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner))
468294aaccbSJason Gunthorpe 		return -ENODEV;
469294aaccbSJason Gunthorpe 
4705c8d3d93SYi Liu 	if (iommufd)
47131014aefSYi Liu 		ret = vfio_df_iommufd_bind(df);
4725c8d3d93SYi Liu 	else
4735c8d3d93SYi Liu 		ret = vfio_device_group_use_iommu(device);
474bab6fabcSJason Gunthorpe 	if (ret)
475bab6fabcSJason Gunthorpe 		goto err_module_put;
476bab6fabcSJason Gunthorpe 
477294aaccbSJason Gunthorpe 	if (device->ops->open_device) {
478294aaccbSJason Gunthorpe 		ret = device->ops->open_device(device);
479294aaccbSJason Gunthorpe 		if (ret)
4805c8d3d93SYi Liu 			goto err_unuse_iommu;
481294aaccbSJason Gunthorpe 	}
482294aaccbSJason Gunthorpe 	return 0;
483294aaccbSJason Gunthorpe 
4845c8d3d93SYi Liu err_unuse_iommu:
4855c8d3d93SYi Liu 	if (iommufd)
48631014aefSYi Liu 		vfio_df_iommufd_unbind(df);
4875c8d3d93SYi Liu 	else
4885c8d3d93SYi Liu 		vfio_device_group_unuse_iommu(device);
489bab6fabcSJason Gunthorpe err_module_put:
490294aaccbSJason Gunthorpe 	module_put(device->dev->driver->owner);
491294aaccbSJason Gunthorpe 	return ret;
492294aaccbSJason Gunthorpe }
493294aaccbSJason Gunthorpe 
vfio_df_device_last_close(struct vfio_device_file * df)49405f37e1cSYi Liu static void vfio_df_device_last_close(struct vfio_device_file *df)
495294aaccbSJason Gunthorpe {
49605f37e1cSYi Liu 	struct vfio_device *device = df->device;
49705f37e1cSYi Liu 	struct iommufd_ctx *iommufd = df->iommufd;
49805f37e1cSYi Liu 
499294aaccbSJason Gunthorpe 	lockdep_assert_held(&device->dev_set->lock);
500294aaccbSJason Gunthorpe 
501294aaccbSJason Gunthorpe 	if (device->ops->close_device)
502294aaccbSJason Gunthorpe 		device->ops->close_device(device);
5035c8d3d93SYi Liu 	if (iommufd)
50431014aefSYi Liu 		vfio_df_iommufd_unbind(df);
5055c8d3d93SYi Liu 	else
5065c8d3d93SYi Liu 		vfio_device_group_unuse_iommu(device);
507294aaccbSJason Gunthorpe 	module_put(device->dev->driver->owner);
508294aaccbSJason Gunthorpe }
509294aaccbSJason Gunthorpe 
vfio_df_open(struct vfio_device_file * df)51005f37e1cSYi Liu int vfio_df_open(struct vfio_device_file *df)
5110f3e72b5SJason Gunthorpe {
51205f37e1cSYi Liu 	struct vfio_device *device = df->device;
5135cfff077SYi Liu 	int ret = 0;
5140f3e72b5SJason Gunthorpe 
5152b48f52fSMatthew Rosato 	lockdep_assert_held(&device->dev_set->lock);
5162b48f52fSMatthew Rosato 
517839e692fSYi Liu 	/*
518839e692fSYi Liu 	 * Only the group path allows the device to be opened multiple
519839e692fSYi Liu 	 * times.  The device cdev path doesn't have a secure way for it.
520839e692fSYi Liu 	 */
521839e692fSYi Liu 	if (device->open_count != 0 && !df->group)
522839e692fSYi Liu 		return -EINVAL;
523839e692fSYi Liu 
5240f3e72b5SJason Gunthorpe 	device->open_count++;
5250f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
52605f37e1cSYi Liu 		ret = vfio_df_device_first_open(df);
5270f3e72b5SJason Gunthorpe 		if (ret)
5285cfff077SYi Liu 			device->open_count--;
5290f3e72b5SJason Gunthorpe 	}
5300f3e72b5SJason Gunthorpe 
5315cfff077SYi Liu 	return ret;
5325cfff077SYi Liu }
5335cfff077SYi Liu 
vfio_df_close(struct vfio_device_file * df)53405f37e1cSYi Liu void vfio_df_close(struct vfio_device_file *df)
5355cfff077SYi Liu {
53605f37e1cSYi Liu 	struct vfio_device *device = df->device;
53705f37e1cSYi Liu 
5382b48f52fSMatthew Rosato 	lockdep_assert_held(&device->dev_set->lock);
5392b48f52fSMatthew Rosato 
5405cfff077SYi Liu 	vfio_assert_device_open(device);
5415cfff077SYi Liu 	if (device->open_count == 1)
54205f37e1cSYi Liu 		vfio_df_device_last_close(df);
5435cfff077SYi Liu 	device->open_count--;
5445cfff077SYi Liu }
5455cfff077SYi Liu 
5460f3e72b5SJason Gunthorpe /*
5478e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_resume_and_get().
5488e5c6995SAbhishek Sahu  * Return error code on failure or 0 on success.
5498e5c6995SAbhishek Sahu  */
vfio_device_pm_runtime_get(struct vfio_device * device)5508e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
5518e5c6995SAbhishek Sahu {
5528e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
5538e5c6995SAbhishek Sahu 
5548e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm) {
5558e5c6995SAbhishek Sahu 		int ret;
5568e5c6995SAbhishek Sahu 
5578e5c6995SAbhishek Sahu 		ret = pm_runtime_resume_and_get(dev);
5588e5c6995SAbhishek Sahu 		if (ret) {
5598e5c6995SAbhishek Sahu 			dev_info_ratelimited(dev,
5608e5c6995SAbhishek Sahu 				"vfio: runtime resume failed %d\n", ret);
5618e5c6995SAbhishek Sahu 			return -EIO;
5628e5c6995SAbhishek Sahu 		}
5638e5c6995SAbhishek Sahu 	}
5648e5c6995SAbhishek Sahu 
5658e5c6995SAbhishek Sahu 	return 0;
5668e5c6995SAbhishek Sahu }
5678e5c6995SAbhishek Sahu 
5688e5c6995SAbhishek Sahu /*
5698e5c6995SAbhishek Sahu  * Wrapper around pm_runtime_put().
5708e5c6995SAbhishek Sahu  */
vfio_device_pm_runtime_put(struct vfio_device * device)5718e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
5728e5c6995SAbhishek Sahu {
5738e5c6995SAbhishek Sahu 	struct device *dev = device->dev;
5748e5c6995SAbhishek Sahu 
5758e5c6995SAbhishek Sahu 	if (dev->driver && dev->driver->pm)
5768e5c6995SAbhishek Sahu 		pm_runtime_put(dev);
5778e5c6995SAbhishek Sahu }
5788e5c6995SAbhishek Sahu 
5798e5c6995SAbhishek Sahu /*
5800f3e72b5SJason Gunthorpe  * VFIO Device fd
5810f3e72b5SJason Gunthorpe  */
vfio_device_fops_release(struct inode * inode,struct file * filep)5820f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
5830f3e72b5SJason Gunthorpe {
584b1a3b5c6SYi Liu 	struct vfio_device_file *df = filep->private_data;
585b1a3b5c6SYi Liu 	struct vfio_device *device = df->device;
5860f3e72b5SJason Gunthorpe 
5878b6f173aSYi Liu 	if (df->group)
58805f37e1cSYi Liu 		vfio_df_group_close(df);
5895fcc2696SYi Liu 	else
5905fcc2696SYi Liu 		vfio_df_unbind_iommufd(df);
5910f3e72b5SJason Gunthorpe 
5924a725b8dSKevin Tian 	vfio_device_put_registration(device);
5930f3e72b5SJason Gunthorpe 
594b1a3b5c6SYi Liu 	kfree(df);
595b1a3b5c6SYi Liu 
5960f3e72b5SJason Gunthorpe 	return 0;
5970f3e72b5SJason Gunthorpe }
5980f3e72b5SJason Gunthorpe 
5990f3e72b5SJason Gunthorpe /*
6000f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
6010f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
6020f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
6030f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
6040f3e72b5SJason Gunthorpe  *
6050f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
6060f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
6070f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
6080f3e72b5SJason Gunthorpe  *
6090f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
6100f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
6110f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
6120f3e72b5SJason Gunthorpe  *
6130f3e72b5SJason Gunthorpe  */
vfio_mig_get_next_state(struct vfio_device * device,enum vfio_device_mig_state cur_fsm,enum vfio_device_mig_state new_fsm,enum vfio_device_mig_state * next_fsm)6140f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
6150f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
6160f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
6170f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
6180f3e72b5SJason Gunthorpe {
6194db52602SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 };
6200f3e72b5SJason Gunthorpe 	/*
6210f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
6220f3e72b5SJason Gunthorpe 	 * following FSM arcs:
6230f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
6240f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
6250f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
6260f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
6270f3e72b5SJason Gunthorpe 	 *
6280f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
6290f3e72b5SJason Gunthorpe 	 * arcs:
6300f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
6310f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
6320f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
6330f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
6344db52602SJason Gunthorpe 	 *
6354db52602SJason Gunthorpe 	 * If precopy is supported then the driver must support these additional
6364db52602SJason Gunthorpe 	 * FSM arcs:
6374db52602SJason Gunthorpe 	 *         RUNNING -> PRE_COPY
6384db52602SJason Gunthorpe 	 *         PRE_COPY -> RUNNING
6394db52602SJason Gunthorpe 	 *         PRE_COPY -> STOP_COPY
6404db52602SJason Gunthorpe 	 * However, if precopy and P2P are supported together then the driver
6414db52602SJason Gunthorpe 	 * must support these additional arcs beyond the P2P arcs above:
6424db52602SJason Gunthorpe 	 *         PRE_COPY -> RUNNING
6434db52602SJason Gunthorpe 	 *         PRE_COPY -> PRE_COPY_P2P
6444db52602SJason Gunthorpe 	 *         PRE_COPY_P2P -> PRE_COPY
6454db52602SJason Gunthorpe 	 *         PRE_COPY_P2P -> RUNNING_P2P
6464db52602SJason Gunthorpe 	 *         PRE_COPY_P2P -> STOP_COPY
6474db52602SJason Gunthorpe 	 *         RUNNING -> PRE_COPY
6484db52602SJason Gunthorpe 	 *         RUNNING_P2P -> PRE_COPY_P2P
6494db52602SJason Gunthorpe 	 *
6504db52602SJason Gunthorpe 	 * Without P2P and precopy the driver must implement:
6510f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
6520f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
6530f3e72b5SJason Gunthorpe 	 *
6540f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
6550f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
6560f3e72b5SJason Gunthorpe 	 * following ones:
6574db52602SJason Gunthorpe 	 *         PRE_COPY -> PRE_COPY_P2P -> STOP_COPY
6584db52602SJason Gunthorpe 	 *         PRE_COPY -> RUNNING -> RUNNING_P2P
6594db52602SJason Gunthorpe 	 *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP
6604db52602SJason Gunthorpe 	 *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING
6614db52602SJason Gunthorpe 	 *         PRE_COPY_P2P -> RUNNING_P2P -> RUNNING
6624db52602SJason Gunthorpe 	 *         PRE_COPY_P2P -> RUNNING_P2P -> STOP
6634db52602SJason Gunthorpe 	 *         PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING
6640f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
6654db52602SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P
6660f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
6674db52602SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
6680f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
6694db52602SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> PRE_COPY_P2P
6700f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
6710f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
6720f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
6734db52602SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING -> PRE_COPY
6740f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
6750f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
6764db52602SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> PRE_COPY_P2P
6770f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
6784db52602SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
6790f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
6800f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
6810f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
6824db52602SJason Gunthorpe 	 *
6834db52602SJason Gunthorpe 	 *  The following transitions are blocked:
6844db52602SJason Gunthorpe 	 *         STOP_COPY -> PRE_COPY
6854db52602SJason Gunthorpe 	 *         STOP_COPY -> PRE_COPY_P2P
6860f3e72b5SJason Gunthorpe 	 */
6870f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
6880f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
6890f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
6900f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
6914db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
6924db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
6930f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
6940f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
6950f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
6960f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
6970f3e72b5SJason Gunthorpe 		},
6980f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
6990f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
7000f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
7014db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
7024db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
7030f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
7040f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
7050f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
7060f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7070f3e72b5SJason Gunthorpe 		},
7084db52602SJason Gunthorpe 		[VFIO_DEVICE_STATE_PRE_COPY] = {
7094db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING,
7104db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
7114db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
7124db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
7134db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
7144db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING,
7154db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING,
7164db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7174db52602SJason Gunthorpe 		},
7184db52602SJason Gunthorpe 		[VFIO_DEVICE_STATE_PRE_COPY_P2P] = {
7194db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
7204db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
7214db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
7224db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
7234db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
7244db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
7254db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
7264db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7274db52602SJason Gunthorpe 		},
7280f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
7290f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
7300f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
7314db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
7324db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
7330f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
7340f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
7350f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
7360f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7370f3e72b5SJason Gunthorpe 		},
7380f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
7390f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
7400f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
7414db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP,
7424db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP,
7430f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
7440f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
7450f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
7460f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7470f3e72b5SJason Gunthorpe 		},
7480f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
7490f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
7500f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
7514db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING,
7524db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
7530f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
7540f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
7550f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
7560f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7570f3e72b5SJason Gunthorpe 		},
7580f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
7590f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
7600f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
7614db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
7624db52602SJason Gunthorpe 			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
7630f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
7640f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
7650f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
7660f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
7670f3e72b5SJason Gunthorpe 		},
7680f3e72b5SJason Gunthorpe 	};
7690f3e72b5SJason Gunthorpe 
7700f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
7710f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
7720f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
7734db52602SJason Gunthorpe 		[VFIO_DEVICE_STATE_PRE_COPY] =
7744db52602SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY,
7754db52602SJason Gunthorpe 		[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY |
7764db52602SJason Gunthorpe 						   VFIO_MIGRATION_P2P |
7774db52602SJason Gunthorpe 						   VFIO_MIGRATION_PRE_COPY,
7780f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
7790f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
7800f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
7810f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
7820f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
7830f3e72b5SJason Gunthorpe 	};
7840f3e72b5SJason Gunthorpe 
7850f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
7860f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
7870f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
7880f3e72b5SJason Gunthorpe 		return -EINVAL;
7890f3e72b5SJason Gunthorpe 
7900f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
7910f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
7920f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
7930f3e72b5SJason Gunthorpe 		return -EINVAL;
7940f3e72b5SJason Gunthorpe 
7950f3e72b5SJason Gunthorpe 	/*
7960f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
7970f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
7980f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
7990f3e72b5SJason Gunthorpe 	 */
8000f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
8010f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
8020f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
8030f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
8040f3e72b5SJason Gunthorpe 
8050f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
8060f3e72b5SJason Gunthorpe }
8070f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
8080f3e72b5SJason Gunthorpe 
8090f3e72b5SJason Gunthorpe /*
8100f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
8110f3e72b5SJason Gunthorpe  */
vfio_ioct_mig_return_fd(struct file * filp,void __user * arg,struct vfio_device_feature_mig_state * mig)8120f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
8130f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
8140f3e72b5SJason Gunthorpe {
8150f3e72b5SJason Gunthorpe 	int ret;
8160f3e72b5SJason Gunthorpe 	int fd;
8170f3e72b5SJason Gunthorpe 
8180f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
8190f3e72b5SJason Gunthorpe 	if (fd < 0) {
8200f3e72b5SJason Gunthorpe 		ret = fd;
8210f3e72b5SJason Gunthorpe 		goto out_fput;
8220f3e72b5SJason Gunthorpe 	}
8230f3e72b5SJason Gunthorpe 
8240f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
8250f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
8260f3e72b5SJason Gunthorpe 		ret = -EFAULT;
8270f3e72b5SJason Gunthorpe 		goto out_put_unused;
8280f3e72b5SJason Gunthorpe 	}
8290f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
8300f3e72b5SJason Gunthorpe 	return 0;
8310f3e72b5SJason Gunthorpe 
8320f3e72b5SJason Gunthorpe out_put_unused:
8330f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
8340f3e72b5SJason Gunthorpe out_fput:
8350f3e72b5SJason Gunthorpe 	fput(filp);
8360f3e72b5SJason Gunthorpe 	return ret;
8370f3e72b5SJason Gunthorpe }
8380f3e72b5SJason Gunthorpe 
8390f3e72b5SJason Gunthorpe static int
vfio_ioctl_device_feature_mig_device_state(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)8400f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
8410f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
8420f3e72b5SJason Gunthorpe 					   size_t argsz)
8430f3e72b5SJason Gunthorpe {
8440f3e72b5SJason Gunthorpe 	size_t minsz =
8450f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
8460f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
8470f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
8480f3e72b5SJason Gunthorpe 	int ret;
8490f3e72b5SJason Gunthorpe 
8500f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
8510f3e72b5SJason Gunthorpe 		return -ENOTTY;
8520f3e72b5SJason Gunthorpe 
8530f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
8540f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
8550f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
8560f3e72b5SJason Gunthorpe 				 sizeof(mig));
8570f3e72b5SJason Gunthorpe 	if (ret != 1)
8580f3e72b5SJason Gunthorpe 		return ret;
8590f3e72b5SJason Gunthorpe 
8600f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
8610f3e72b5SJason Gunthorpe 		return -EFAULT;
8620f3e72b5SJason Gunthorpe 
8630f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
8640f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
8650f3e72b5SJason Gunthorpe 
8660f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
8670f3e72b5SJason Gunthorpe 							   &curr_state);
8680f3e72b5SJason Gunthorpe 		if (ret)
8690f3e72b5SJason Gunthorpe 			return ret;
8700f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
8710f3e72b5SJason Gunthorpe 		goto out_copy;
8720f3e72b5SJason Gunthorpe 	}
8730f3e72b5SJason Gunthorpe 
8740f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
8750f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
8760f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
8770f3e72b5SJason Gunthorpe 		goto out_copy;
8780f3e72b5SJason Gunthorpe 
8790f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
8800f3e72b5SJason Gunthorpe out_copy:
8810f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
8820f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
8830f3e72b5SJason Gunthorpe 		return -EFAULT;
8840f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
8850f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
8860f3e72b5SJason Gunthorpe 	return 0;
8870f3e72b5SJason Gunthorpe }
8880f3e72b5SJason Gunthorpe 
8894e016f96SYishai Hadas static int
vfio_ioctl_device_feature_migration_data_size(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)8904e016f96SYishai Hadas vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
8914e016f96SYishai Hadas 					      u32 flags, void __user *arg,
8924e016f96SYishai Hadas 					      size_t argsz)
8934e016f96SYishai Hadas {
8944e016f96SYishai Hadas 	struct vfio_device_feature_mig_data_size data_size = {};
8954e016f96SYishai Hadas 	unsigned long stop_copy_length;
8964e016f96SYishai Hadas 	int ret;
8974e016f96SYishai Hadas 
8984e016f96SYishai Hadas 	if (!device->mig_ops)
8994e016f96SYishai Hadas 		return -ENOTTY;
9004e016f96SYishai Hadas 
9014e016f96SYishai Hadas 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
9024e016f96SYishai Hadas 				 sizeof(data_size));
9034e016f96SYishai Hadas 	if (ret != 1)
9044e016f96SYishai Hadas 		return ret;
9054e016f96SYishai Hadas 
9064e016f96SYishai Hadas 	ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length);
9074e016f96SYishai Hadas 	if (ret)
9084e016f96SYishai Hadas 		return ret;
9094e016f96SYishai Hadas 
9104e016f96SYishai Hadas 	data_size.stop_copy_length = stop_copy_length;
9114e016f96SYishai Hadas 	if (copy_to_user(arg, &data_size, sizeof(data_size)))
9124e016f96SYishai Hadas 		return -EFAULT;
9134e016f96SYishai Hadas 
9144e016f96SYishai Hadas 	return 0;
9154e016f96SYishai Hadas }
9164e016f96SYishai Hadas 
vfio_ioctl_device_feature_migration(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)9170f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
9180f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
9190f3e72b5SJason Gunthorpe 					       size_t argsz)
9200f3e72b5SJason Gunthorpe {
9210f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
9220f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
9230f3e72b5SJason Gunthorpe 	};
9240f3e72b5SJason Gunthorpe 	int ret;
9250f3e72b5SJason Gunthorpe 
9260f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
9270f3e72b5SJason Gunthorpe 		return -ENOTTY;
9280f3e72b5SJason Gunthorpe 
9290f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
9300f3e72b5SJason Gunthorpe 				 sizeof(mig));
9310f3e72b5SJason Gunthorpe 	if (ret != 1)
9320f3e72b5SJason Gunthorpe 		return ret;
9330f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
9340f3e72b5SJason Gunthorpe 		return -EFAULT;
9350f3e72b5SJason Gunthorpe 	return 0;
9360f3e72b5SJason Gunthorpe }
9370f3e72b5SJason Gunthorpe 
vfio_combine_iova_ranges(struct rb_root_cached * root,u32 cur_nodes,u32 req_nodes)9389a4087faSBrett Creeley void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
9399a4087faSBrett Creeley 			      u32 req_nodes)
9409a4087faSBrett Creeley {
9419a4087faSBrett Creeley 	struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
9429a4087faSBrett Creeley 	unsigned long min_gap, curr_gap;
9439a4087faSBrett Creeley 
9449a4087faSBrett Creeley 	/* Special shortcut when a single range is required */
9459a4087faSBrett Creeley 	if (req_nodes == 1) {
9469a4087faSBrett Creeley 		unsigned long last;
9479a4087faSBrett Creeley 
9489a4087faSBrett Creeley 		comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
9499a4087faSBrett Creeley 		curr = comb_start;
9509a4087faSBrett Creeley 		while (curr) {
9519a4087faSBrett Creeley 			last = curr->last;
9529a4087faSBrett Creeley 			prev = curr;
9539a4087faSBrett Creeley 			curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
9549a4087faSBrett Creeley 			if (prev != comb_start)
9559a4087faSBrett Creeley 				interval_tree_remove(prev, root);
9569a4087faSBrett Creeley 		}
9579a4087faSBrett Creeley 		comb_start->last = last;
9589a4087faSBrett Creeley 		return;
9599a4087faSBrett Creeley 	}
9609a4087faSBrett Creeley 
9619a4087faSBrett Creeley 	/* Combine ranges which have the smallest gap */
9629a4087faSBrett Creeley 	while (cur_nodes > req_nodes) {
9639a4087faSBrett Creeley 		prev = NULL;
9649a4087faSBrett Creeley 		min_gap = ULONG_MAX;
9659a4087faSBrett Creeley 		curr = interval_tree_iter_first(root, 0, ULONG_MAX);
9669a4087faSBrett Creeley 		while (curr) {
9679a4087faSBrett Creeley 			if (prev) {
9689a4087faSBrett Creeley 				curr_gap = curr->start - prev->last;
9699a4087faSBrett Creeley 				if (curr_gap < min_gap) {
9709a4087faSBrett Creeley 					min_gap = curr_gap;
9719a4087faSBrett Creeley 					comb_start = prev;
9729a4087faSBrett Creeley 					comb_end = curr;
9739a4087faSBrett Creeley 				}
9749a4087faSBrett Creeley 			}
9759a4087faSBrett Creeley 			prev = curr;
9769a4087faSBrett Creeley 			curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
9779a4087faSBrett Creeley 		}
9789a4087faSBrett Creeley 		comb_start->last = comb_end->last;
9799a4087faSBrett Creeley 		interval_tree_remove(comb_end, root);
9809a4087faSBrett Creeley 		cur_nodes--;
9819a4087faSBrett Creeley 	}
9829a4087faSBrett Creeley }
9839a4087faSBrett Creeley EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges);
9849a4087faSBrett Creeley 
98580c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */
98680c4b92aSYishai Hadas #define LOG_MAX_RANGES \
98780c4b92aSYishai Hadas 	(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
98880c4b92aSYishai Hadas 
98980c4b92aSYishai Hadas static int
vfio_ioctl_device_feature_logging_start(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)99080c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
99180c4b92aSYishai Hadas 					u32 flags, void __user *arg,
99280c4b92aSYishai Hadas 					size_t argsz)
99380c4b92aSYishai Hadas {
99480c4b92aSYishai Hadas 	size_t minsz =
99580c4b92aSYishai Hadas 		offsetofend(struct vfio_device_feature_dma_logging_control,
99680c4b92aSYishai Hadas 			    ranges);
99780c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_range __user *ranges;
99880c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_control control;
99980c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_range range;
100080c4b92aSYishai Hadas 	struct rb_root_cached root = RB_ROOT_CACHED;
100180c4b92aSYishai Hadas 	struct interval_tree_node *nodes;
100280c4b92aSYishai Hadas 	u64 iova_end;
100380c4b92aSYishai Hadas 	u32 nnodes;
100480c4b92aSYishai Hadas 	int i, ret;
100580c4b92aSYishai Hadas 
100680c4b92aSYishai Hadas 	if (!device->log_ops)
100780c4b92aSYishai Hadas 		return -ENOTTY;
100880c4b92aSYishai Hadas 
100980c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
101080c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_SET,
101180c4b92aSYishai Hadas 				 sizeof(control));
101280c4b92aSYishai Hadas 	if (ret != 1)
101380c4b92aSYishai Hadas 		return ret;
101480c4b92aSYishai Hadas 
101580c4b92aSYishai Hadas 	if (copy_from_user(&control, arg, minsz))
101680c4b92aSYishai Hadas 		return -EFAULT;
101780c4b92aSYishai Hadas 
101880c4b92aSYishai Hadas 	nnodes = control.num_ranges;
101980c4b92aSYishai Hadas 	if (!nnodes)
102080c4b92aSYishai Hadas 		return -EINVAL;
102180c4b92aSYishai Hadas 
102280c4b92aSYishai Hadas 	if (nnodes > LOG_MAX_RANGES)
102380c4b92aSYishai Hadas 		return -E2BIG;
102480c4b92aSYishai Hadas 
102580c4b92aSYishai Hadas 	ranges = u64_to_user_ptr(control.ranges);
102680c4b92aSYishai Hadas 	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
102780c4b92aSYishai Hadas 			      GFP_KERNEL);
102880c4b92aSYishai Hadas 	if (!nodes)
102980c4b92aSYishai Hadas 		return -ENOMEM;
103080c4b92aSYishai Hadas 
103180c4b92aSYishai Hadas 	for (i = 0; i < nnodes; i++) {
103280c4b92aSYishai Hadas 		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
103380c4b92aSYishai Hadas 			ret = -EFAULT;
103480c4b92aSYishai Hadas 			goto end;
103580c4b92aSYishai Hadas 		}
103680c4b92aSYishai Hadas 		if (!IS_ALIGNED(range.iova, control.page_size) ||
103780c4b92aSYishai Hadas 		    !IS_ALIGNED(range.length, control.page_size)) {
103880c4b92aSYishai Hadas 			ret = -EINVAL;
103980c4b92aSYishai Hadas 			goto end;
104080c4b92aSYishai Hadas 		}
104180c4b92aSYishai Hadas 
104280c4b92aSYishai Hadas 		if (check_add_overflow(range.iova, range.length, &iova_end) ||
104380c4b92aSYishai Hadas 		    iova_end > ULONG_MAX) {
104480c4b92aSYishai Hadas 			ret = -EOVERFLOW;
104580c4b92aSYishai Hadas 			goto end;
104680c4b92aSYishai Hadas 		}
104780c4b92aSYishai Hadas 
104880c4b92aSYishai Hadas 		nodes[i].start = range.iova;
104980c4b92aSYishai Hadas 		nodes[i].last = range.iova + range.length - 1;
105080c4b92aSYishai Hadas 		if (interval_tree_iter_first(&root, nodes[i].start,
105180c4b92aSYishai Hadas 					     nodes[i].last)) {
105280c4b92aSYishai Hadas 			/* Range overlapping */
105380c4b92aSYishai Hadas 			ret = -EINVAL;
105480c4b92aSYishai Hadas 			goto end;
105580c4b92aSYishai Hadas 		}
105680c4b92aSYishai Hadas 		interval_tree_insert(nodes + i, &root);
105780c4b92aSYishai Hadas 	}
105880c4b92aSYishai Hadas 
105980c4b92aSYishai Hadas 	ret = device->log_ops->log_start(device, &root, nnodes,
106080c4b92aSYishai Hadas 					 &control.page_size);
106180c4b92aSYishai Hadas 	if (ret)
106280c4b92aSYishai Hadas 		goto end;
106380c4b92aSYishai Hadas 
106480c4b92aSYishai Hadas 	if (copy_to_user(arg, &control, sizeof(control))) {
106580c4b92aSYishai Hadas 		ret = -EFAULT;
106680c4b92aSYishai Hadas 		device->log_ops->log_stop(device);
106780c4b92aSYishai Hadas 	}
106880c4b92aSYishai Hadas 
106980c4b92aSYishai Hadas end:
107080c4b92aSYishai Hadas 	kfree(nodes);
107180c4b92aSYishai Hadas 	return ret;
107280c4b92aSYishai Hadas }
107380c4b92aSYishai Hadas 
107480c4b92aSYishai Hadas static int
vfio_ioctl_device_feature_logging_stop(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)107580c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
107680c4b92aSYishai Hadas 				       u32 flags, void __user *arg,
107780c4b92aSYishai Hadas 				       size_t argsz)
107880c4b92aSYishai Hadas {
107980c4b92aSYishai Hadas 	int ret;
108080c4b92aSYishai Hadas 
108180c4b92aSYishai Hadas 	if (!device->log_ops)
108280c4b92aSYishai Hadas 		return -ENOTTY;
108380c4b92aSYishai Hadas 
108480c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
108580c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_SET, 0);
108680c4b92aSYishai Hadas 	if (ret != 1)
108780c4b92aSYishai Hadas 		return ret;
108880c4b92aSYishai Hadas 
108980c4b92aSYishai Hadas 	return device->log_ops->log_stop(device);
109080c4b92aSYishai Hadas }
109180c4b92aSYishai Hadas 
vfio_device_log_read_and_clear(struct iova_bitmap * iter,unsigned long iova,size_t length,void * opaque)109280c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
109380c4b92aSYishai Hadas 					  unsigned long iova, size_t length,
109480c4b92aSYishai Hadas 					  void *opaque)
109580c4b92aSYishai Hadas {
109680c4b92aSYishai Hadas 	struct vfio_device *device = opaque;
109780c4b92aSYishai Hadas 
109880c4b92aSYishai Hadas 	return device->log_ops->log_read_and_clear(device, iova, length, iter);
109980c4b92aSYishai Hadas }
110080c4b92aSYishai Hadas 
110180c4b92aSYishai Hadas static int
vfio_ioctl_device_feature_logging_report(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)110280c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
110380c4b92aSYishai Hadas 					 u32 flags, void __user *arg,
110480c4b92aSYishai Hadas 					 size_t argsz)
110580c4b92aSYishai Hadas {
110680c4b92aSYishai Hadas 	size_t minsz =
110780c4b92aSYishai Hadas 		offsetofend(struct vfio_device_feature_dma_logging_report,
110880c4b92aSYishai Hadas 			    bitmap);
110980c4b92aSYishai Hadas 	struct vfio_device_feature_dma_logging_report report;
111080c4b92aSYishai Hadas 	struct iova_bitmap *iter;
111180c4b92aSYishai Hadas 	u64 iova_end;
111280c4b92aSYishai Hadas 	int ret;
111380c4b92aSYishai Hadas 
111480c4b92aSYishai Hadas 	if (!device->log_ops)
111580c4b92aSYishai Hadas 		return -ENOTTY;
111680c4b92aSYishai Hadas 
111780c4b92aSYishai Hadas 	ret = vfio_check_feature(flags, argsz,
111880c4b92aSYishai Hadas 				 VFIO_DEVICE_FEATURE_GET,
111980c4b92aSYishai Hadas 				 sizeof(report));
112080c4b92aSYishai Hadas 	if (ret != 1)
112180c4b92aSYishai Hadas 		return ret;
112280c4b92aSYishai Hadas 
112380c4b92aSYishai Hadas 	if (copy_from_user(&report, arg, minsz))
112480c4b92aSYishai Hadas 		return -EFAULT;
112580c4b92aSYishai Hadas 
112680c4b92aSYishai Hadas 	if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
112780c4b92aSYishai Hadas 		return -EINVAL;
112880c4b92aSYishai Hadas 
112980c4b92aSYishai Hadas 	if (check_add_overflow(report.iova, report.length, &iova_end) ||
113080c4b92aSYishai Hadas 	    iova_end > ULONG_MAX)
113180c4b92aSYishai Hadas 		return -EOVERFLOW;
113280c4b92aSYishai Hadas 
113380c4b92aSYishai Hadas 	iter = iova_bitmap_alloc(report.iova, report.length,
113480c4b92aSYishai Hadas 				 report.page_size,
113580c4b92aSYishai Hadas 				 u64_to_user_ptr(report.bitmap));
113680c4b92aSYishai Hadas 	if (IS_ERR(iter))
113780c4b92aSYishai Hadas 		return PTR_ERR(iter);
113880c4b92aSYishai Hadas 
113980c4b92aSYishai Hadas 	ret = iova_bitmap_for_each(iter, device,
114080c4b92aSYishai Hadas 				   vfio_device_log_read_and_clear);
114180c4b92aSYishai Hadas 
114280c4b92aSYishai Hadas 	iova_bitmap_free(iter);
114380c4b92aSYishai Hadas 	return ret;
114480c4b92aSYishai Hadas }
114580c4b92aSYishai Hadas 
vfio_ioctl_device_feature(struct vfio_device * device,struct vfio_device_feature __user * arg)11460f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
11470f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
11480f3e72b5SJason Gunthorpe {
11490f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
11500f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
11510f3e72b5SJason Gunthorpe 
11520f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
11530f3e72b5SJason Gunthorpe 		return -EFAULT;
11540f3e72b5SJason Gunthorpe 
11550f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
11560f3e72b5SJason Gunthorpe 		return -EINVAL;
11570f3e72b5SJason Gunthorpe 
11580f3e72b5SJason Gunthorpe 	/* Check unknown flags */
11590f3e72b5SJason Gunthorpe 	if (feature.flags &
11600f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
11610f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
11620f3e72b5SJason Gunthorpe 		return -EINVAL;
11630f3e72b5SJason Gunthorpe 
11640f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
11650f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
11660f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
11670f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
11680f3e72b5SJason Gunthorpe 		return -EINVAL;
11690f3e72b5SJason Gunthorpe 
11700f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
11710f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
11720f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
11730f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
11740f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
11750f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
11760f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
11770f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
11780f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
117980c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
118080c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_start(
118180c4b92aSYishai Hadas 			device, feature.flags, arg->data,
118280c4b92aSYishai Hadas 			feature.argsz - minsz);
118380c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
118480c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_stop(
118580c4b92aSYishai Hadas 			device, feature.flags, arg->data,
118680c4b92aSYishai Hadas 			feature.argsz - minsz);
118780c4b92aSYishai Hadas 	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
118880c4b92aSYishai Hadas 		return vfio_ioctl_device_feature_logging_report(
118980c4b92aSYishai Hadas 			device, feature.flags, arg->data,
119080c4b92aSYishai Hadas 			feature.argsz - minsz);
11914e016f96SYishai Hadas 	case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE:
11924e016f96SYishai Hadas 		return vfio_ioctl_device_feature_migration_data_size(
11934e016f96SYishai Hadas 			device, feature.flags, arg->data,
11944e016f96SYishai Hadas 			feature.argsz - minsz);
11950f3e72b5SJason Gunthorpe 	default:
11960f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
11970f3e72b5SJason Gunthorpe 			return -EINVAL;
11980f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
11990f3e72b5SJason Gunthorpe 						   arg->data,
12000f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
12010f3e72b5SJason Gunthorpe 	}
12020f3e72b5SJason Gunthorpe }
12030f3e72b5SJason Gunthorpe 
vfio_device_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)12040f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
12050f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
12060f3e72b5SJason Gunthorpe {
1207b1a3b5c6SYi Liu 	struct vfio_device_file *df = filep->private_data;
1208b1a3b5c6SYi Liu 	struct vfio_device *device = df->device;
1209ca9e45b4SYi Liu 	void __user *uptr = (void __user *)arg;
12108e5c6995SAbhishek Sahu 	int ret;
12118e5c6995SAbhishek Sahu 
12125fcc2696SYi Liu 	if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
12135fcc2696SYi Liu 		return vfio_df_ioctl_bind_iommufd(df, uptr);
12145fcc2696SYi Liu 
121582d93f58SYi Liu 	/* Paired with smp_store_release() following vfio_df_open() */
121682d93f58SYi Liu 	if (!smp_load_acquire(&df->access_granted))
121782d93f58SYi Liu 		return -EINVAL;
121882d93f58SYi Liu 
12198e5c6995SAbhishek Sahu 	ret = vfio_device_pm_runtime_get(device);
12208e5c6995SAbhishek Sahu 	if (ret)
12218e5c6995SAbhishek Sahu 		return ret;
12220f3e72b5SJason Gunthorpe 
1223b290a05fSYi Liu 	/* cdev only ioctls */
1224b290a05fSYi Liu 	if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) {
1225b290a05fSYi Liu 		switch (cmd) {
1226b290a05fSYi Liu 		case VFIO_DEVICE_ATTACH_IOMMUFD_PT:
1227b290a05fSYi Liu 			ret = vfio_df_ioctl_attach_pt(df, uptr);
1228b290a05fSYi Liu 			goto out;
1229b290a05fSYi Liu 
1230b290a05fSYi Liu 		case VFIO_DEVICE_DETACH_IOMMUFD_PT:
1231b290a05fSYi Liu 			ret = vfio_df_ioctl_detach_pt(df, uptr);
1232b290a05fSYi Liu 			goto out;
1233b290a05fSYi Liu 		}
1234b290a05fSYi Liu 	}
1235b290a05fSYi Liu 
12360f3e72b5SJason Gunthorpe 	switch (cmd) {
12370f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
1238ca9e45b4SYi Liu 		ret = vfio_ioctl_device_feature(device, uptr);
12398e5c6995SAbhishek Sahu 		break;
12408e5c6995SAbhishek Sahu 
12410f3e72b5SJason Gunthorpe 	default:
12420f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
12438e5c6995SAbhishek Sahu 			ret = -EINVAL;
12448e5c6995SAbhishek Sahu 		else
12458e5c6995SAbhishek Sahu 			ret = device->ops->ioctl(device, cmd, arg);
12468e5c6995SAbhishek Sahu 		break;
12470f3e72b5SJason Gunthorpe 	}
1248b290a05fSYi Liu out:
12498e5c6995SAbhishek Sahu 	vfio_device_pm_runtime_put(device);
12508e5c6995SAbhishek Sahu 	return ret;
12510f3e72b5SJason Gunthorpe }
12520f3e72b5SJason Gunthorpe 
vfio_device_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)12530f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
12540f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
12550f3e72b5SJason Gunthorpe {
1256b1a3b5c6SYi Liu 	struct vfio_device_file *df = filep->private_data;
1257b1a3b5c6SYi Liu 	struct vfio_device *device = df->device;
12580f3e72b5SJason Gunthorpe 
125982d93f58SYi Liu 	/* Paired with smp_store_release() following vfio_df_open() */
126082d93f58SYi Liu 	if (!smp_load_acquire(&df->access_granted))
126182d93f58SYi Liu 		return -EINVAL;
126282d93f58SYi Liu 
12630f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
12640f3e72b5SJason Gunthorpe 		return -EINVAL;
12650f3e72b5SJason Gunthorpe 
12660f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
12670f3e72b5SJason Gunthorpe }
12680f3e72b5SJason Gunthorpe 
vfio_device_fops_write(struct file * filep,const char __user * buf,size_t count,loff_t * ppos)12690f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
12700f3e72b5SJason Gunthorpe 				      const char __user *buf,
12710f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
12720f3e72b5SJason Gunthorpe {
1273b1a3b5c6SYi Liu 	struct vfio_device_file *df = filep->private_data;
1274b1a3b5c6SYi Liu 	struct vfio_device *device = df->device;
12750f3e72b5SJason Gunthorpe 
127682d93f58SYi Liu 	/* Paired with smp_store_release() following vfio_df_open() */
127782d93f58SYi Liu 	if (!smp_load_acquire(&df->access_granted))
127882d93f58SYi Liu 		return -EINVAL;
127982d93f58SYi Liu 
12800f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
12810f3e72b5SJason Gunthorpe 		return -EINVAL;
12820f3e72b5SJason Gunthorpe 
12830f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
12840f3e72b5SJason Gunthorpe }
12850f3e72b5SJason Gunthorpe 
vfio_device_fops_mmap(struct file * filep,struct vm_area_struct * vma)12860f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
12870f3e72b5SJason Gunthorpe {
1288b1a3b5c6SYi Liu 	struct vfio_device_file *df = filep->private_data;
1289b1a3b5c6SYi Liu 	struct vfio_device *device = df->device;
12900f3e72b5SJason Gunthorpe 
129182d93f58SYi Liu 	/* Paired with smp_store_release() following vfio_df_open() */
129282d93f58SYi Liu 	if (!smp_load_acquire(&df->access_granted))
129382d93f58SYi Liu 		return -EINVAL;
129482d93f58SYi Liu 
12950f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
12960f3e72b5SJason Gunthorpe 		return -EINVAL;
12970f3e72b5SJason Gunthorpe 
12980f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
12990f3e72b5SJason Gunthorpe }
13000f3e72b5SJason Gunthorpe 
13019eefba80SYi Liu const struct file_operations vfio_device_fops = {
13020f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
13038b6f173aSYi Liu 	.open		= vfio_device_fops_cdev_open,
13040f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
13050f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
13060f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
13070f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
13080f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
13090f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
13100f3e72b5SJason Gunthorpe };
13110f3e72b5SJason Gunthorpe 
vfio_device_from_file(struct file * file)131234aeeecdSYi Liu static struct vfio_device *vfio_device_from_file(struct file *file)
131334aeeecdSYi Liu {
131434aeeecdSYi Liu 	struct vfio_device_file *df = file->private_data;
131534aeeecdSYi Liu 
131634aeeecdSYi Liu 	if (file->f_op != &vfio_device_fops)
131734aeeecdSYi Liu 		return NULL;
131834aeeecdSYi Liu 	return df->device;
131934aeeecdSYi Liu }
132034aeeecdSYi Liu 
1321b1a59be8SYi Liu /**
1322b1a59be8SYi Liu  * vfio_file_is_valid - True if the file is valid vfio file
1323b1a59be8SYi Liu  * @file: VFIO group file or VFIO device file
1324b1a59be8SYi Liu  */
vfio_file_is_valid(struct file * file)1325b1a59be8SYi Liu bool vfio_file_is_valid(struct file *file)
1326b1a59be8SYi Liu {
132734aeeecdSYi Liu 	return vfio_group_from_file(file) ||
132834aeeecdSYi Liu 	       vfio_device_from_file(file);
1329b1a59be8SYi Liu }
1330b1a59be8SYi Liu EXPORT_SYMBOL_GPL(vfio_file_is_valid);
1331b1a59be8SYi Liu 
1332b1a59be8SYi Liu /**
1333b1a59be8SYi Liu  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
1334b1a59be8SYi Liu  *        is always CPU cache coherent
1335b1a59be8SYi Liu  * @file: VFIO group file or VFIO device file
1336b1a59be8SYi Liu  *
1337b1a59be8SYi Liu  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
1338b1a59be8SYi Liu  * bit in DMA transactions. A return of false indicates that the user has
1339b1a59be8SYi Liu  * rights to access additional instructions such as wbinvd on x86.
1340b1a59be8SYi Liu  */
vfio_file_enforced_coherent(struct file * file)1341b1a59be8SYi Liu bool vfio_file_enforced_coherent(struct file *file)
1342b1a59be8SYi Liu {
134334aeeecdSYi Liu 	struct vfio_device *device;
1344b1a59be8SYi Liu 	struct vfio_group *group;
1345b1a59be8SYi Liu 
1346b1a59be8SYi Liu 	group = vfio_group_from_file(file);
1347b1a59be8SYi Liu 	if (group)
1348b1a59be8SYi Liu 		return vfio_group_enforced_coherent(group);
1349b1a59be8SYi Liu 
135034aeeecdSYi Liu 	device = vfio_device_from_file(file);
135134aeeecdSYi Liu 	if (device)
135234aeeecdSYi Liu 		return device_iommu_capable(device->dev,
135334aeeecdSYi Liu 					    IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
135434aeeecdSYi Liu 
1355b1a59be8SYi Liu 	return true;
1356b1a59be8SYi Liu }
1357b1a59be8SYi Liu EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
1358b1a59be8SYi Liu 
vfio_device_file_set_kvm(struct file * file,struct kvm * kvm)135934aeeecdSYi Liu static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
136034aeeecdSYi Liu {
136134aeeecdSYi Liu 	struct vfio_device_file *df = file->private_data;
136234aeeecdSYi Liu 
136334aeeecdSYi Liu 	/*
136434aeeecdSYi Liu 	 * The kvm is first recorded in the vfio_device_file, and will
136534aeeecdSYi Liu 	 * be propagated to vfio_device::kvm when the file is bound to
136634aeeecdSYi Liu 	 * iommufd successfully in the vfio device cdev path.
136734aeeecdSYi Liu 	 */
136834aeeecdSYi Liu 	spin_lock(&df->kvm_ref_lock);
136934aeeecdSYi Liu 	df->kvm = kvm;
137034aeeecdSYi Liu 	spin_unlock(&df->kvm_ref_lock);
137134aeeecdSYi Liu }
137234aeeecdSYi Liu 
1373b1a59be8SYi Liu /**
1374b1a59be8SYi Liu  * vfio_file_set_kvm - Link a kvm with VFIO drivers
1375b1a59be8SYi Liu  * @file: VFIO group file or VFIO device file
1376b1a59be8SYi Liu  * @kvm: KVM to link
1377b1a59be8SYi Liu  *
1378b1a59be8SYi Liu  * When a VFIO device is first opened the KVM will be available in
1379b1a59be8SYi Liu  * device->kvm if one was associated with the file.
1380b1a59be8SYi Liu  */
vfio_file_set_kvm(struct file * file,struct kvm * kvm)1381b1a59be8SYi Liu void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
1382b1a59be8SYi Liu {
1383b1a59be8SYi Liu 	struct vfio_group *group;
1384b1a59be8SYi Liu 
1385b1a59be8SYi Liu 	group = vfio_group_from_file(file);
1386b1a59be8SYi Liu 	if (group)
1387b1a59be8SYi Liu 		vfio_group_set_kvm(group, kvm);
138834aeeecdSYi Liu 
138934aeeecdSYi Liu 	if (vfio_device_from_file(file))
139034aeeecdSYi Liu 		vfio_device_file_set_kvm(file, kvm);
1391b1a59be8SYi Liu }
1392b1a59be8SYi Liu EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
1393b1a59be8SYi Liu 
13940f3e72b5SJason Gunthorpe /*
13950f3e72b5SJason Gunthorpe  * Sub-module support
13960f3e72b5SJason Gunthorpe  */
13970f3e72b5SJason Gunthorpe /*
13980f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
13990f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
14000f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
14010f3e72b5SJason Gunthorpe  *
14020f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
14030f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
14040f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
14050f3e72b5SJason Gunthorpe  */
vfio_info_cap_add(struct vfio_info_cap * caps,size_t size,u16 id,u16 version)14060f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
14070f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
14080f3e72b5SJason Gunthorpe {
14090f3e72b5SJason Gunthorpe 	void *buf;
14100f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
14110f3e72b5SJason Gunthorpe 
1412*a881b496SStefan Hajnoczi 	/* Ensure that the next capability struct will be aligned */
1413*a881b496SStefan Hajnoczi 	size = ALIGN(size, sizeof(u64));
1414*a881b496SStefan Hajnoczi 
14150f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
14160f3e72b5SJason Gunthorpe 	if (!buf) {
14170f3e72b5SJason Gunthorpe 		kfree(caps->buf);
14180f3e72b5SJason Gunthorpe 		caps->buf = NULL;
14190f3e72b5SJason Gunthorpe 		caps->size = 0;
14200f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
14210f3e72b5SJason Gunthorpe 	}
14220f3e72b5SJason Gunthorpe 
14230f3e72b5SJason Gunthorpe 	caps->buf = buf;
14240f3e72b5SJason Gunthorpe 	header = buf + caps->size;
14250f3e72b5SJason Gunthorpe 
14260f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
14270f3e72b5SJason Gunthorpe 	memset(header, 0, size);
14280f3e72b5SJason Gunthorpe 
14290f3e72b5SJason Gunthorpe 	header->id = id;
14300f3e72b5SJason Gunthorpe 	header->version = version;
14310f3e72b5SJason Gunthorpe 
14320f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
14330f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
14340f3e72b5SJason Gunthorpe 		; /* nothing */
14350f3e72b5SJason Gunthorpe 
14360f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
14370f3e72b5SJason Gunthorpe 	caps->size += size;
14380f3e72b5SJason Gunthorpe 
14390f3e72b5SJason Gunthorpe 	return header;
14400f3e72b5SJason Gunthorpe }
14410f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
14420f3e72b5SJason Gunthorpe 
vfio_info_cap_shift(struct vfio_info_cap * caps,size_t offset)14430f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
14440f3e72b5SJason Gunthorpe {
14450f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
14460f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
14470f3e72b5SJason Gunthorpe 
1448*a881b496SStefan Hajnoczi 	/* Capability structs should start with proper alignment */
1449*a881b496SStefan Hajnoczi 	WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
1450*a881b496SStefan Hajnoczi 
14510f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
14520f3e72b5SJason Gunthorpe 		tmp->next += offset;
14530f3e72b5SJason Gunthorpe }
14540f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
14550f3e72b5SJason Gunthorpe 
vfio_info_add_capability(struct vfio_info_cap * caps,struct vfio_info_cap_header * cap,size_t size)14560f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
14570f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
14580f3e72b5SJason Gunthorpe {
14590f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
14600f3e72b5SJason Gunthorpe 
14610f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
14620f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
14630f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
14640f3e72b5SJason Gunthorpe 
14650f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
14660f3e72b5SJason Gunthorpe 
14670f3e72b5SJason Gunthorpe 	return 0;
14680f3e72b5SJason Gunthorpe }
14690f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
14700f3e72b5SJason Gunthorpe 
vfio_set_irqs_validate_and_prepare(struct vfio_irq_set * hdr,int num_irqs,int max_irq_type,size_t * data_size)14710f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
14720f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
14730f3e72b5SJason Gunthorpe {
14740f3e72b5SJason Gunthorpe 	unsigned long minsz;
14750f3e72b5SJason Gunthorpe 	size_t size;
14760f3e72b5SJason Gunthorpe 
14770f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
14780f3e72b5SJason Gunthorpe 
14790f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
14800f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
14810f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
14820f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
14830f3e72b5SJason Gunthorpe 		return -EINVAL;
14840f3e72b5SJason Gunthorpe 
14850f3e72b5SJason Gunthorpe 	if (data_size)
14860f3e72b5SJason Gunthorpe 		*data_size = 0;
14870f3e72b5SJason Gunthorpe 
14880f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
14890f3e72b5SJason Gunthorpe 		return -EINVAL;
14900f3e72b5SJason Gunthorpe 
14910f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
14920f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
14930f3e72b5SJason Gunthorpe 		size = 0;
14940f3e72b5SJason Gunthorpe 		break;
14950f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
14960f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
14970f3e72b5SJason Gunthorpe 		break;
14980f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
14990f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
15000f3e72b5SJason Gunthorpe 		break;
15010f3e72b5SJason Gunthorpe 	default:
15020f3e72b5SJason Gunthorpe 		return -EINVAL;
15030f3e72b5SJason Gunthorpe 	}
15040f3e72b5SJason Gunthorpe 
15050f3e72b5SJason Gunthorpe 	if (size) {
15060f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
15070f3e72b5SJason Gunthorpe 			return -EINVAL;
15080f3e72b5SJason Gunthorpe 
15090f3e72b5SJason Gunthorpe 		if (!data_size)
15100f3e72b5SJason Gunthorpe 			return -EINVAL;
15110f3e72b5SJason Gunthorpe 
15120f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
15130f3e72b5SJason Gunthorpe 	}
15140f3e72b5SJason Gunthorpe 
15150f3e72b5SJason Gunthorpe 	return 0;
15160f3e72b5SJason Gunthorpe }
15170f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
15180f3e72b5SJason Gunthorpe 
15190f3e72b5SJason Gunthorpe /*
15204741f2e9SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
15214741f2e9SJason Gunthorpe  * domain only.
15224741f2e9SJason Gunthorpe  * @device [in]  : device
15234741f2e9SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
15244741f2e9SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
15254741f2e9SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
15264741f2e9SJason Gunthorpe  * @prot [in]    : protection flags
15274741f2e9SJason Gunthorpe  * @pages[out]   : array of host pages
15284741f2e9SJason Gunthorpe  * Return error or number of pages pinned.
15294741f2e9SJason Gunthorpe  *
15304741f2e9SJason Gunthorpe  * A driver may only call this function if the vfio_device was created
15318da7a0e7SYi Liu  * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
15324741f2e9SJason Gunthorpe  */
vfio_pin_pages(struct vfio_device * device,dma_addr_t iova,int npage,int prot,struct page ** pages)15334741f2e9SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
15344741f2e9SJason Gunthorpe 		   int npage, int prot, struct page **pages)
15354741f2e9SJason Gunthorpe {
15364741f2e9SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
15374741f2e9SJason Gunthorpe 	if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
15384741f2e9SJason Gunthorpe 		return -EINVAL;
15398da7a0e7SYi Liu 	if (!device->ops->dma_unmap)
15408da7a0e7SYi Liu 		return -EINVAL;
15418da7a0e7SYi Liu 	if (vfio_device_has_container(device))
15424741f2e9SJason Gunthorpe 		return vfio_device_container_pin_pages(device, iova,
15434741f2e9SJason Gunthorpe 						       npage, prot, pages);
15444741f2e9SJason Gunthorpe 	if (device->iommufd_access) {
15454741f2e9SJason Gunthorpe 		int ret;
15464741f2e9SJason Gunthorpe 
15474741f2e9SJason Gunthorpe 		if (iova > ULONG_MAX)
15484741f2e9SJason Gunthorpe 			return -EINVAL;
15494741f2e9SJason Gunthorpe 		/*
15504741f2e9SJason Gunthorpe 		 * VFIO ignores the sub page offset, npages is from the start of
15514741f2e9SJason Gunthorpe 		 * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
15524741f2e9SJason Gunthorpe 		 * the sub page offset by doing:
15534741f2e9SJason Gunthorpe 		 *     pages[0] + (iova % PAGE_SIZE)
15544741f2e9SJason Gunthorpe 		 */
15554741f2e9SJason Gunthorpe 		ret = iommufd_access_pin_pages(
15564741f2e9SJason Gunthorpe 			device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
15574741f2e9SJason Gunthorpe 			npage * PAGE_SIZE, pages,
15584741f2e9SJason Gunthorpe 			(prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
15594741f2e9SJason Gunthorpe 		if (ret)
15604741f2e9SJason Gunthorpe 			return ret;
15614741f2e9SJason Gunthorpe 		return npage;
15624741f2e9SJason Gunthorpe 	}
15634741f2e9SJason Gunthorpe 	return -EINVAL;
15644741f2e9SJason Gunthorpe }
15654741f2e9SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
15664741f2e9SJason Gunthorpe 
15674741f2e9SJason Gunthorpe /*
15684741f2e9SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
15694741f2e9SJason Gunthorpe  * @device [in]  : device
15704741f2e9SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
15714741f2e9SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
15724741f2e9SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
15734741f2e9SJason Gunthorpe  */
vfio_unpin_pages(struct vfio_device * device,dma_addr_t iova,int npage)15744741f2e9SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
15754741f2e9SJason Gunthorpe {
15764741f2e9SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
15778da7a0e7SYi Liu 		return;
15788da7a0e7SYi Liu 	if (WARN_ON(!device->ops->dma_unmap))
15794741f2e9SJason Gunthorpe 		return;
15804741f2e9SJason Gunthorpe 
15814741f2e9SJason Gunthorpe 	if (vfio_device_has_container(device)) {
15824741f2e9SJason Gunthorpe 		vfio_device_container_unpin_pages(device, iova, npage);
15834741f2e9SJason Gunthorpe 		return;
15844741f2e9SJason Gunthorpe 	}
15854741f2e9SJason Gunthorpe 	if (device->iommufd_access) {
15864741f2e9SJason Gunthorpe 		if (WARN_ON(iova > ULONG_MAX))
15874741f2e9SJason Gunthorpe 			return;
15884741f2e9SJason Gunthorpe 		iommufd_access_unpin_pages(device->iommufd_access,
15894741f2e9SJason Gunthorpe 					   ALIGN_DOWN(iova, PAGE_SIZE),
15904741f2e9SJason Gunthorpe 					   npage * PAGE_SIZE);
15914741f2e9SJason Gunthorpe 		return;
15924741f2e9SJason Gunthorpe 	}
15934741f2e9SJason Gunthorpe }
15944741f2e9SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
15954741f2e9SJason Gunthorpe 
15964741f2e9SJason Gunthorpe /*
15974741f2e9SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
15984741f2e9SJason Gunthorpe  * behalf of the device.
15994741f2e9SJason Gunthorpe  *
16004741f2e9SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
16014741f2e9SJason Gunthorpe  * into/from a kernel buffer.
16024741f2e9SJason Gunthorpe  *
16034741f2e9SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
16044741f2e9SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
16054741f2e9SJason Gunthorpe  *
16064741f2e9SJason Gunthorpe  * @device [in]		: VFIO device
16074741f2e9SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
16084741f2e9SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
16094741f2e9SJason Gunthorpe  * @len [in]		: kernel buffer length
16104741f2e9SJason Gunthorpe  * @write		: indicate read or write
16114741f2e9SJason Gunthorpe  * Return error code on failure or 0 on success.
16124741f2e9SJason Gunthorpe  */
vfio_dma_rw(struct vfio_device * device,dma_addr_t iova,void * data,size_t len,bool write)16134741f2e9SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
16144741f2e9SJason Gunthorpe 		size_t len, bool write)
16158da7a0e7SYi Liu {
16168da7a0e7SYi Liu 	if (!data || len <= 0 || !vfio_assert_device_open(device))
16174741f2e9SJason Gunthorpe 		return -EINVAL;
16184741f2e9SJason Gunthorpe 
16194741f2e9SJason Gunthorpe 	if (vfio_device_has_container(device))
16204741f2e9SJason Gunthorpe 		return vfio_device_container_dma_rw(device, iova,
16214741f2e9SJason Gunthorpe 						    data, len, write);
16224741f2e9SJason Gunthorpe 
16234741f2e9SJason Gunthorpe 	if (device->iommufd_access) {
16244741f2e9SJason Gunthorpe 		unsigned int flags = 0;
16254741f2e9SJason Gunthorpe 
16264741f2e9SJason Gunthorpe 		if (iova > ULONG_MAX)
16274741f2e9SJason Gunthorpe 			return -EINVAL;
16284741f2e9SJason Gunthorpe 
16294741f2e9SJason Gunthorpe 		/* VFIO historically tries to auto-detect a kthread */
16304741f2e9SJason Gunthorpe 		if (!current->mm)
16314741f2e9SJason Gunthorpe 			flags |= IOMMUFD_ACCESS_RW_KTHREAD;
16324741f2e9SJason Gunthorpe 		if (write)
16334741f2e9SJason Gunthorpe 			flags |= IOMMUFD_ACCESS_RW_WRITE;
16344741f2e9SJason Gunthorpe 		return iommufd_access_rw(device->iommufd_access, iova, data,
16354741f2e9SJason Gunthorpe 					 len, flags);
16364741f2e9SJason Gunthorpe 	}
16374741f2e9SJason Gunthorpe 	return -EINVAL;
16380f3e72b5SJason Gunthorpe }
16390f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
16401334e47eSYi Liu 
16411334e47eSYi Liu /*
16421334e47eSYi Liu  * Module/class support
16431334e47eSYi Liu  */
vfio_init(void)16441334e47eSYi Liu static int __init vfio_init(void)
16451334e47eSYi Liu {
16461334e47eSYi Liu 	int ret;
16471334e47eSYi Liu 
16481334e47eSYi Liu 	ida_init(&vfio.device_ida);
16491334e47eSYi Liu 
1650e2d55709SJason Gunthorpe 	ret = vfio_group_init();
1651e2d55709SJason Gunthorpe 	if (ret)
1652e2d55709SJason Gunthorpe 		return ret;
1653e2d55709SJason Gunthorpe 
16541334e47eSYi Liu 	ret = vfio_virqfd_init();
16551aaba11dSGreg Kroah-Hartman 	if (ret)
16561334e47eSYi Liu 		goto err_virqfd;
16571334e47eSYi Liu 
16581334e47eSYi Liu 	/* /sys/class/vfio-dev/vfioX */
16591334e47eSYi Liu 	vfio.device_class = class_create("vfio-dev");
16601334e47eSYi Liu 	if (IS_ERR(vfio.device_class)) {
16618b6f173aSYi Liu 		ret = PTR_ERR(vfio.device_class);
16628b6f173aSYi Liu 		goto err_dev_class;
16638b6f173aSYi Liu 	}
16648b6f173aSYi Liu 
16651334e47eSYi Liu 	ret = vfio_cdev_init(vfio.device_class);
16661334e47eSYi Liu 	if (ret)
16671334e47eSYi Liu 		goto err_alloc_dev_chrdev;
16688b6f173aSYi Liu 
16698b6f173aSYi Liu 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
16708b6f173aSYi Liu 	return 0;
16711334e47eSYi Liu 
1672e2d55709SJason Gunthorpe err_alloc_dev_chrdev:
1673e2d55709SJason Gunthorpe 	class_destroy(vfio.device_class);
16741334e47eSYi Liu 	vfio.device_class = NULL;
16751334e47eSYi Liu err_dev_class:
16761334e47eSYi Liu 	vfio_virqfd_exit();
16771334e47eSYi Liu err_virqfd:
16781334e47eSYi Liu 	vfio_group_cleanup();
16791334e47eSYi Liu 	return ret;
16801334e47eSYi Liu }
16818b6f173aSYi Liu 
vfio_cleanup(void)16823c28a761SYi Liu static void __exit vfio_cleanup(void)
16833c28a761SYi Liu {
1684e2d55709SJason Gunthorpe 	ida_destroy(&vfio.device_ida);
16851334e47eSYi Liu 	vfio_cdev_cleanup();
16860f3e72b5SJason Gunthorpe 	class_destroy(vfio.device_class);
16870f3e72b5SJason Gunthorpe 	vfio.device_class = NULL;
16880f3e72b5SJason Gunthorpe 	vfio_virqfd_exit();
16890f3e72b5SJason Gunthorpe 	vfio_group_cleanup();
16900f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
16910f3e72b5SJason Gunthorpe }
16920f3e72b5SJason Gunthorpe 
16930f3e72b5SJason Gunthorpe module_init(vfio_init);
16940f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
16950f3e72b5SJason Gunthorpe 
16960f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
1697 MODULE_LICENSE("GPL v2");
1698 MODULE_AUTHOR(DRIVER_AUTHOR);
1699 MODULE_DESCRIPTION(DRIVER_DESC);
1700 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
1701