xref: /openbmc/linux/drivers/vfio/vfio_main.c (revision 0f3e72b5)
1*0f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
2*0f3e72b5SJason Gunthorpe /*
3*0f3e72b5SJason Gunthorpe  * VFIO core
4*0f3e72b5SJason Gunthorpe  *
5*0f3e72b5SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6*0f3e72b5SJason Gunthorpe  *     Author: Alex Williamson <alex.williamson@redhat.com>
7*0f3e72b5SJason Gunthorpe  *
8*0f3e72b5SJason Gunthorpe  * Derived from original vfio:
9*0f3e72b5SJason Gunthorpe  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10*0f3e72b5SJason Gunthorpe  * Author: Tom Lyon, pugs@cisco.com
11*0f3e72b5SJason Gunthorpe  */
12*0f3e72b5SJason Gunthorpe 
13*0f3e72b5SJason Gunthorpe #include <linux/cdev.h>
14*0f3e72b5SJason Gunthorpe #include <linux/compat.h>
15*0f3e72b5SJason Gunthorpe #include <linux/device.h>
16*0f3e72b5SJason Gunthorpe #include <linux/file.h>
17*0f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h>
18*0f3e72b5SJason Gunthorpe #include <linux/fs.h>
19*0f3e72b5SJason Gunthorpe #include <linux/idr.h>
20*0f3e72b5SJason Gunthorpe #include <linux/iommu.h>
21*0f3e72b5SJason Gunthorpe #include <linux/list.h>
22*0f3e72b5SJason Gunthorpe #include <linux/miscdevice.h>
23*0f3e72b5SJason Gunthorpe #include <linux/module.h>
24*0f3e72b5SJason Gunthorpe #include <linux/mutex.h>
25*0f3e72b5SJason Gunthorpe #include <linux/pci.h>
26*0f3e72b5SJason Gunthorpe #include <linux/rwsem.h>
27*0f3e72b5SJason Gunthorpe #include <linux/sched.h>
28*0f3e72b5SJason Gunthorpe #include <linux/slab.h>
29*0f3e72b5SJason Gunthorpe #include <linux/stat.h>
30*0f3e72b5SJason Gunthorpe #include <linux/string.h>
31*0f3e72b5SJason Gunthorpe #include <linux/uaccess.h>
32*0f3e72b5SJason Gunthorpe #include <linux/vfio.h>
33*0f3e72b5SJason Gunthorpe #include <linux/wait.h>
34*0f3e72b5SJason Gunthorpe #include <linux/sched/signal.h>
35*0f3e72b5SJason Gunthorpe #include "vfio.h"
36*0f3e72b5SJason Gunthorpe 
37*0f3e72b5SJason Gunthorpe #define DRIVER_VERSION	"0.3"
38*0f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
39*0f3e72b5SJason Gunthorpe #define DRIVER_DESC	"VFIO - User Level meta-driver"
40*0f3e72b5SJason Gunthorpe 
41*0f3e72b5SJason Gunthorpe static struct vfio {
42*0f3e72b5SJason Gunthorpe 	struct class			*class;
43*0f3e72b5SJason Gunthorpe 	struct list_head		iommu_drivers_list;
44*0f3e72b5SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
45*0f3e72b5SJason Gunthorpe 	struct list_head		group_list;
46*0f3e72b5SJason Gunthorpe 	struct mutex			group_lock; /* locks group_list */
47*0f3e72b5SJason Gunthorpe 	struct ida			group_ida;
48*0f3e72b5SJason Gunthorpe 	dev_t				group_devt;
49*0f3e72b5SJason Gunthorpe } vfio;
50*0f3e72b5SJason Gunthorpe 
51*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver {
52*0f3e72b5SJason Gunthorpe 	const struct vfio_iommu_driver_ops	*ops;
53*0f3e72b5SJason Gunthorpe 	struct list_head			vfio_next;
54*0f3e72b5SJason Gunthorpe };
55*0f3e72b5SJason Gunthorpe 
56*0f3e72b5SJason Gunthorpe struct vfio_container {
57*0f3e72b5SJason Gunthorpe 	struct kref			kref;
58*0f3e72b5SJason Gunthorpe 	struct list_head		group_list;
59*0f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_lock;
60*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
61*0f3e72b5SJason Gunthorpe 	void				*iommu_data;
62*0f3e72b5SJason Gunthorpe 	bool				noiommu;
63*0f3e72b5SJason Gunthorpe };
64*0f3e72b5SJason Gunthorpe 
65*0f3e72b5SJason Gunthorpe struct vfio_group {
66*0f3e72b5SJason Gunthorpe 	struct device 			dev;
67*0f3e72b5SJason Gunthorpe 	struct cdev			cdev;
68*0f3e72b5SJason Gunthorpe 	refcount_t			users;
69*0f3e72b5SJason Gunthorpe 	unsigned int			container_users;
70*0f3e72b5SJason Gunthorpe 	struct iommu_group		*iommu_group;
71*0f3e72b5SJason Gunthorpe 	struct vfio_container		*container;
72*0f3e72b5SJason Gunthorpe 	struct list_head		device_list;
73*0f3e72b5SJason Gunthorpe 	struct mutex			device_lock;
74*0f3e72b5SJason Gunthorpe 	struct list_head		vfio_next;
75*0f3e72b5SJason Gunthorpe 	struct list_head		container_next;
76*0f3e72b5SJason Gunthorpe 	enum vfio_group_type		type;
77*0f3e72b5SJason Gunthorpe 	unsigned int			dev_counter;
78*0f3e72b5SJason Gunthorpe 	struct rw_semaphore		group_rwsem;
79*0f3e72b5SJason Gunthorpe 	struct kvm			*kvm;
80*0f3e72b5SJason Gunthorpe 	struct file			*opened_file;
81*0f3e72b5SJason Gunthorpe 	struct blocking_notifier_head	notifier;
82*0f3e72b5SJason Gunthorpe };
83*0f3e72b5SJason Gunthorpe 
84*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
85*0f3e72b5SJason Gunthorpe static bool noiommu __read_mostly;
86*0f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode,
87*0f3e72b5SJason Gunthorpe 		   noiommu, bool, S_IRUGO | S_IWUSR);
88*0f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
89*0f3e72b5SJason Gunthorpe #endif
90*0f3e72b5SJason Gunthorpe 
91*0f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa);
92*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops;
93*0f3e72b5SJason Gunthorpe 
94*0f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id)
95*0f3e72b5SJason Gunthorpe {
96*0f3e72b5SJason Gunthorpe 	unsigned long idx = (unsigned long)set_id;
97*0f3e72b5SJason Gunthorpe 	struct vfio_device_set *new_dev_set;
98*0f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set;
99*0f3e72b5SJason Gunthorpe 
100*0f3e72b5SJason Gunthorpe 	if (WARN_ON(!set_id))
101*0f3e72b5SJason Gunthorpe 		return -EINVAL;
102*0f3e72b5SJason Gunthorpe 
103*0f3e72b5SJason Gunthorpe 	/*
104*0f3e72b5SJason Gunthorpe 	 * Atomically acquire a singleton object in the xarray for this set_id
105*0f3e72b5SJason Gunthorpe 	 */
106*0f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
107*0f3e72b5SJason Gunthorpe 	dev_set = xa_load(&vfio_device_set_xa, idx);
108*0f3e72b5SJason Gunthorpe 	if (dev_set)
109*0f3e72b5SJason Gunthorpe 		goto found_get_ref;
110*0f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
111*0f3e72b5SJason Gunthorpe 
112*0f3e72b5SJason Gunthorpe 	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
113*0f3e72b5SJason Gunthorpe 	if (!new_dev_set)
114*0f3e72b5SJason Gunthorpe 		return -ENOMEM;
115*0f3e72b5SJason Gunthorpe 	mutex_init(&new_dev_set->lock);
116*0f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&new_dev_set->device_list);
117*0f3e72b5SJason Gunthorpe 	new_dev_set->set_id = set_id;
118*0f3e72b5SJason Gunthorpe 
119*0f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
120*0f3e72b5SJason Gunthorpe 	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
121*0f3e72b5SJason Gunthorpe 			       GFP_KERNEL);
122*0f3e72b5SJason Gunthorpe 	if (!dev_set) {
123*0f3e72b5SJason Gunthorpe 		dev_set = new_dev_set;
124*0f3e72b5SJason Gunthorpe 		goto found_get_ref;
125*0f3e72b5SJason Gunthorpe 	}
126*0f3e72b5SJason Gunthorpe 
127*0f3e72b5SJason Gunthorpe 	kfree(new_dev_set);
128*0f3e72b5SJason Gunthorpe 	if (xa_is_err(dev_set)) {
129*0f3e72b5SJason Gunthorpe 		xa_unlock(&vfio_device_set_xa);
130*0f3e72b5SJason Gunthorpe 		return xa_err(dev_set);
131*0f3e72b5SJason Gunthorpe 	}
132*0f3e72b5SJason Gunthorpe 
133*0f3e72b5SJason Gunthorpe found_get_ref:
134*0f3e72b5SJason Gunthorpe 	dev_set->device_count++;
135*0f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
136*0f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
137*0f3e72b5SJason Gunthorpe 	device->dev_set = dev_set;
138*0f3e72b5SJason Gunthorpe 	list_add_tail(&device->dev_set_list, &dev_set->device_list);
139*0f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
140*0f3e72b5SJason Gunthorpe 	return 0;
141*0f3e72b5SJason Gunthorpe }
142*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set);
143*0f3e72b5SJason Gunthorpe 
144*0f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device)
145*0f3e72b5SJason Gunthorpe {
146*0f3e72b5SJason Gunthorpe 	struct vfio_device_set *dev_set = device->dev_set;
147*0f3e72b5SJason Gunthorpe 
148*0f3e72b5SJason Gunthorpe 	if (!dev_set)
149*0f3e72b5SJason Gunthorpe 		return;
150*0f3e72b5SJason Gunthorpe 
151*0f3e72b5SJason Gunthorpe 	mutex_lock(&dev_set->lock);
152*0f3e72b5SJason Gunthorpe 	list_del(&device->dev_set_list);
153*0f3e72b5SJason Gunthorpe 	mutex_unlock(&dev_set->lock);
154*0f3e72b5SJason Gunthorpe 
155*0f3e72b5SJason Gunthorpe 	xa_lock(&vfio_device_set_xa);
156*0f3e72b5SJason Gunthorpe 	if (!--dev_set->device_count) {
157*0f3e72b5SJason Gunthorpe 		__xa_erase(&vfio_device_set_xa,
158*0f3e72b5SJason Gunthorpe 			   (unsigned long)dev_set->set_id);
159*0f3e72b5SJason Gunthorpe 		mutex_destroy(&dev_set->lock);
160*0f3e72b5SJason Gunthorpe 		kfree(dev_set);
161*0f3e72b5SJason Gunthorpe 	}
162*0f3e72b5SJason Gunthorpe 	xa_unlock(&vfio_device_set_xa);
163*0f3e72b5SJason Gunthorpe }
164*0f3e72b5SJason Gunthorpe 
165*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
166*0f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
167*0f3e72b5SJason Gunthorpe {
168*0f3e72b5SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
169*0f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
170*0f3e72b5SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
171*0f3e72b5SJason Gunthorpe 		return ERR_PTR(-EPERM);
172*0f3e72b5SJason Gunthorpe 
173*0f3e72b5SJason Gunthorpe 	return NULL;
174*0f3e72b5SJason Gunthorpe }
175*0f3e72b5SJason Gunthorpe 
176*0f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
177*0f3e72b5SJason Gunthorpe {
178*0f3e72b5SJason Gunthorpe }
179*0f3e72b5SJason Gunthorpe 
180*0f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
181*0f3e72b5SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
182*0f3e72b5SJason Gunthorpe {
183*0f3e72b5SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
184*0f3e72b5SJason Gunthorpe 		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
185*0f3e72b5SJason Gunthorpe 
186*0f3e72b5SJason Gunthorpe 	return -ENOTTY;
187*0f3e72b5SJason Gunthorpe }
188*0f3e72b5SJason Gunthorpe 
189*0f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
190*0f3e72b5SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
191*0f3e72b5SJason Gunthorpe {
192*0f3e72b5SJason Gunthorpe 	return 0;
193*0f3e72b5SJason Gunthorpe }
194*0f3e72b5SJason Gunthorpe 
195*0f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
196*0f3e72b5SJason Gunthorpe 				      struct iommu_group *iommu_group)
197*0f3e72b5SJason Gunthorpe {
198*0f3e72b5SJason Gunthorpe }
199*0f3e72b5SJason Gunthorpe 
200*0f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
201*0f3e72b5SJason Gunthorpe 	.name = "vfio-noiommu",
202*0f3e72b5SJason Gunthorpe 	.owner = THIS_MODULE,
203*0f3e72b5SJason Gunthorpe 	.open = vfio_noiommu_open,
204*0f3e72b5SJason Gunthorpe 	.release = vfio_noiommu_release,
205*0f3e72b5SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
206*0f3e72b5SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
207*0f3e72b5SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
208*0f3e72b5SJason Gunthorpe };
209*0f3e72b5SJason Gunthorpe 
210*0f3e72b5SJason Gunthorpe /*
211*0f3e72b5SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
212*0f3e72b5SJason Gunthorpe  * use vfio-noiommu.
213*0f3e72b5SJason Gunthorpe  */
214*0f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
215*0f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
216*0f3e72b5SJason Gunthorpe {
217*0f3e72b5SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
218*0f3e72b5SJason Gunthorpe }
219*0f3e72b5SJason Gunthorpe #else
220*0f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
221*0f3e72b5SJason Gunthorpe 		const struct vfio_iommu_driver *driver)
222*0f3e72b5SJason Gunthorpe {
223*0f3e72b5SJason Gunthorpe 	return true;
224*0f3e72b5SJason Gunthorpe }
225*0f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */
226*0f3e72b5SJason Gunthorpe 
227*0f3e72b5SJason Gunthorpe /*
228*0f3e72b5SJason Gunthorpe  * IOMMU driver registration
229*0f3e72b5SJason Gunthorpe  */
230*0f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
231*0f3e72b5SJason Gunthorpe {
232*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
233*0f3e72b5SJason Gunthorpe 
234*0f3e72b5SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
235*0f3e72b5SJason Gunthorpe 		return -EINVAL;
236*0f3e72b5SJason Gunthorpe 
237*0f3e72b5SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
238*0f3e72b5SJason Gunthorpe 	if (!driver)
239*0f3e72b5SJason Gunthorpe 		return -ENOMEM;
240*0f3e72b5SJason Gunthorpe 
241*0f3e72b5SJason Gunthorpe 	driver->ops = ops;
242*0f3e72b5SJason Gunthorpe 
243*0f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
244*0f3e72b5SJason Gunthorpe 
245*0f3e72b5SJason Gunthorpe 	/* Check for duplicates */
246*0f3e72b5SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
247*0f3e72b5SJason Gunthorpe 		if (tmp->ops == ops) {
248*0f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
249*0f3e72b5SJason Gunthorpe 			kfree(driver);
250*0f3e72b5SJason Gunthorpe 			return -EINVAL;
251*0f3e72b5SJason Gunthorpe 		}
252*0f3e72b5SJason Gunthorpe 	}
253*0f3e72b5SJason Gunthorpe 
254*0f3e72b5SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
255*0f3e72b5SJason Gunthorpe 
256*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
257*0f3e72b5SJason Gunthorpe 
258*0f3e72b5SJason Gunthorpe 	return 0;
259*0f3e72b5SJason Gunthorpe }
260*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
261*0f3e72b5SJason Gunthorpe 
262*0f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
263*0f3e72b5SJason Gunthorpe {
264*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
265*0f3e72b5SJason Gunthorpe 
266*0f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
267*0f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
268*0f3e72b5SJason Gunthorpe 		if (driver->ops == ops) {
269*0f3e72b5SJason Gunthorpe 			list_del(&driver->vfio_next);
270*0f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
271*0f3e72b5SJason Gunthorpe 			kfree(driver);
272*0f3e72b5SJason Gunthorpe 			return;
273*0f3e72b5SJason Gunthorpe 		}
274*0f3e72b5SJason Gunthorpe 	}
275*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
276*0f3e72b5SJason Gunthorpe }
277*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
278*0f3e72b5SJason Gunthorpe 
279*0f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group);
280*0f3e72b5SJason Gunthorpe 
281*0f3e72b5SJason Gunthorpe /*
282*0f3e72b5SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
283*0f3e72b5SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
284*0f3e72b5SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
285*0f3e72b5SJason Gunthorpe  * closed in any order.
286*0f3e72b5SJason Gunthorpe  */
287*0f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
288*0f3e72b5SJason Gunthorpe {
289*0f3e72b5SJason Gunthorpe 	kref_get(&container->kref);
290*0f3e72b5SJason Gunthorpe }
291*0f3e72b5SJason Gunthorpe 
292*0f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref)
293*0f3e72b5SJason Gunthorpe {
294*0f3e72b5SJason Gunthorpe 	struct vfio_container *container;
295*0f3e72b5SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
296*0f3e72b5SJason Gunthorpe 
297*0f3e72b5SJason Gunthorpe 	kfree(container);
298*0f3e72b5SJason Gunthorpe }
299*0f3e72b5SJason Gunthorpe 
300*0f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
301*0f3e72b5SJason Gunthorpe {
302*0f3e72b5SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
303*0f3e72b5SJason Gunthorpe }
304*0f3e72b5SJason Gunthorpe 
305*0f3e72b5SJason Gunthorpe /*
306*0f3e72b5SJason Gunthorpe  * Group objects - create, release, get, put, search
307*0f3e72b5SJason Gunthorpe  */
308*0f3e72b5SJason Gunthorpe static struct vfio_group *
309*0f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
310*0f3e72b5SJason Gunthorpe {
311*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
312*0f3e72b5SJason Gunthorpe 
313*0f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
314*0f3e72b5SJason Gunthorpe 		if (group->iommu_group == iommu_group) {
315*0f3e72b5SJason Gunthorpe 			vfio_group_get(group);
316*0f3e72b5SJason Gunthorpe 			return group;
317*0f3e72b5SJason Gunthorpe 		}
318*0f3e72b5SJason Gunthorpe 	}
319*0f3e72b5SJason Gunthorpe 	return NULL;
320*0f3e72b5SJason Gunthorpe }
321*0f3e72b5SJason Gunthorpe 
322*0f3e72b5SJason Gunthorpe static struct vfio_group *
323*0f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group)
324*0f3e72b5SJason Gunthorpe {
325*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
326*0f3e72b5SJason Gunthorpe 
327*0f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
328*0f3e72b5SJason Gunthorpe 	group = __vfio_group_get_from_iommu(iommu_group);
329*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
330*0f3e72b5SJason Gunthorpe 	return group;
331*0f3e72b5SJason Gunthorpe }
332*0f3e72b5SJason Gunthorpe 
333*0f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev)
334*0f3e72b5SJason Gunthorpe {
335*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
336*0f3e72b5SJason Gunthorpe 
337*0f3e72b5SJason Gunthorpe 	mutex_destroy(&group->device_lock);
338*0f3e72b5SJason Gunthorpe 	iommu_group_put(group->iommu_group);
339*0f3e72b5SJason Gunthorpe 	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
340*0f3e72b5SJason Gunthorpe 	kfree(group);
341*0f3e72b5SJason Gunthorpe }
342*0f3e72b5SJason Gunthorpe 
343*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
344*0f3e72b5SJason Gunthorpe 					   enum vfio_group_type type)
345*0f3e72b5SJason Gunthorpe {
346*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
347*0f3e72b5SJason Gunthorpe 	int minor;
348*0f3e72b5SJason Gunthorpe 
349*0f3e72b5SJason Gunthorpe 	group = kzalloc(sizeof(*group), GFP_KERNEL);
350*0f3e72b5SJason Gunthorpe 	if (!group)
351*0f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
352*0f3e72b5SJason Gunthorpe 
353*0f3e72b5SJason Gunthorpe 	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
354*0f3e72b5SJason Gunthorpe 	if (minor < 0) {
355*0f3e72b5SJason Gunthorpe 		kfree(group);
356*0f3e72b5SJason Gunthorpe 		return ERR_PTR(minor);
357*0f3e72b5SJason Gunthorpe 	}
358*0f3e72b5SJason Gunthorpe 
359*0f3e72b5SJason Gunthorpe 	device_initialize(&group->dev);
360*0f3e72b5SJason Gunthorpe 	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
361*0f3e72b5SJason Gunthorpe 	group->dev.class = vfio.class;
362*0f3e72b5SJason Gunthorpe 	group->dev.release = vfio_group_release;
363*0f3e72b5SJason Gunthorpe 	cdev_init(&group->cdev, &vfio_group_fops);
364*0f3e72b5SJason Gunthorpe 	group->cdev.owner = THIS_MODULE;
365*0f3e72b5SJason Gunthorpe 
366*0f3e72b5SJason Gunthorpe 	refcount_set(&group->users, 1);
367*0f3e72b5SJason Gunthorpe 	init_rwsem(&group->group_rwsem);
368*0f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&group->device_list);
369*0f3e72b5SJason Gunthorpe 	mutex_init(&group->device_lock);
370*0f3e72b5SJason Gunthorpe 	group->iommu_group = iommu_group;
371*0f3e72b5SJason Gunthorpe 	/* put in vfio_group_release() */
372*0f3e72b5SJason Gunthorpe 	iommu_group_ref_get(iommu_group);
373*0f3e72b5SJason Gunthorpe 	group->type = type;
374*0f3e72b5SJason Gunthorpe 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
375*0f3e72b5SJason Gunthorpe 
376*0f3e72b5SJason Gunthorpe 	return group;
377*0f3e72b5SJason Gunthorpe }
378*0f3e72b5SJason Gunthorpe 
379*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
380*0f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
381*0f3e72b5SJason Gunthorpe {
382*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
383*0f3e72b5SJason Gunthorpe 	struct vfio_group *ret;
384*0f3e72b5SJason Gunthorpe 	int err;
385*0f3e72b5SJason Gunthorpe 
386*0f3e72b5SJason Gunthorpe 	group = vfio_group_alloc(iommu_group, type);
387*0f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
388*0f3e72b5SJason Gunthorpe 		return group;
389*0f3e72b5SJason Gunthorpe 
390*0f3e72b5SJason Gunthorpe 	err = dev_set_name(&group->dev, "%s%d",
391*0f3e72b5SJason Gunthorpe 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
392*0f3e72b5SJason Gunthorpe 			   iommu_group_id(iommu_group));
393*0f3e72b5SJason Gunthorpe 	if (err) {
394*0f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
395*0f3e72b5SJason Gunthorpe 		goto err_put;
396*0f3e72b5SJason Gunthorpe 	}
397*0f3e72b5SJason Gunthorpe 
398*0f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.group_lock);
399*0f3e72b5SJason Gunthorpe 
400*0f3e72b5SJason Gunthorpe 	/* Did we race creating this group? */
401*0f3e72b5SJason Gunthorpe 	ret = __vfio_group_get_from_iommu(iommu_group);
402*0f3e72b5SJason Gunthorpe 	if (ret)
403*0f3e72b5SJason Gunthorpe 		goto err_unlock;
404*0f3e72b5SJason Gunthorpe 
405*0f3e72b5SJason Gunthorpe 	err = cdev_device_add(&group->cdev, &group->dev);
406*0f3e72b5SJason Gunthorpe 	if (err) {
407*0f3e72b5SJason Gunthorpe 		ret = ERR_PTR(err);
408*0f3e72b5SJason Gunthorpe 		goto err_unlock;
409*0f3e72b5SJason Gunthorpe 	}
410*0f3e72b5SJason Gunthorpe 
411*0f3e72b5SJason Gunthorpe 	list_add(&group->vfio_next, &vfio.group_list);
412*0f3e72b5SJason Gunthorpe 
413*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
414*0f3e72b5SJason Gunthorpe 	return group;
415*0f3e72b5SJason Gunthorpe 
416*0f3e72b5SJason Gunthorpe err_unlock:
417*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
418*0f3e72b5SJason Gunthorpe err_put:
419*0f3e72b5SJason Gunthorpe 	put_device(&group->dev);
420*0f3e72b5SJason Gunthorpe 	return ret;
421*0f3e72b5SJason Gunthorpe }
422*0f3e72b5SJason Gunthorpe 
423*0f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group)
424*0f3e72b5SJason Gunthorpe {
425*0f3e72b5SJason Gunthorpe 	if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
426*0f3e72b5SJason Gunthorpe 		return;
427*0f3e72b5SJason Gunthorpe 
428*0f3e72b5SJason Gunthorpe 	/*
429*0f3e72b5SJason Gunthorpe 	 * These data structures all have paired operations that can only be
430*0f3e72b5SJason Gunthorpe 	 * undone when the caller holds a live reference on the group. Since all
431*0f3e72b5SJason Gunthorpe 	 * pairs must be undone these WARN_ON's indicate some caller did not
432*0f3e72b5SJason Gunthorpe 	 * properly hold the group reference.
433*0f3e72b5SJason Gunthorpe 	 */
434*0f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&group->device_list));
435*0f3e72b5SJason Gunthorpe 	WARN_ON(group->container || group->container_users);
436*0f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
437*0f3e72b5SJason Gunthorpe 
438*0f3e72b5SJason Gunthorpe 	list_del(&group->vfio_next);
439*0f3e72b5SJason Gunthorpe 	cdev_device_del(&group->cdev, &group->dev);
440*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.group_lock);
441*0f3e72b5SJason Gunthorpe 
442*0f3e72b5SJason Gunthorpe 	put_device(&group->dev);
443*0f3e72b5SJason Gunthorpe }
444*0f3e72b5SJason Gunthorpe 
445*0f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group)
446*0f3e72b5SJason Gunthorpe {
447*0f3e72b5SJason Gunthorpe 	refcount_inc(&group->users);
448*0f3e72b5SJason Gunthorpe }
449*0f3e72b5SJason Gunthorpe 
450*0f3e72b5SJason Gunthorpe /*
451*0f3e72b5SJason Gunthorpe  * Device objects - create, release, get, put, search
452*0f3e72b5SJason Gunthorpe  */
453*0f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */
454*0f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device)
455*0f3e72b5SJason Gunthorpe {
456*0f3e72b5SJason Gunthorpe 	if (refcount_dec_and_test(&device->refcount))
457*0f3e72b5SJason Gunthorpe 		complete(&device->comp);
458*0f3e72b5SJason Gunthorpe }
459*0f3e72b5SJason Gunthorpe 
460*0f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device)
461*0f3e72b5SJason Gunthorpe {
462*0f3e72b5SJason Gunthorpe 	return refcount_inc_not_zero(&device->refcount);
463*0f3e72b5SJason Gunthorpe }
464*0f3e72b5SJason Gunthorpe 
465*0f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
466*0f3e72b5SJason Gunthorpe 						 struct device *dev)
467*0f3e72b5SJason Gunthorpe {
468*0f3e72b5SJason Gunthorpe 	struct vfio_device *device;
469*0f3e72b5SJason Gunthorpe 
470*0f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
471*0f3e72b5SJason Gunthorpe 	list_for_each_entry(device, &group->device_list, group_next) {
472*0f3e72b5SJason Gunthorpe 		if (device->dev == dev && vfio_device_try_get(device)) {
473*0f3e72b5SJason Gunthorpe 			mutex_unlock(&group->device_lock);
474*0f3e72b5SJason Gunthorpe 			return device;
475*0f3e72b5SJason Gunthorpe 		}
476*0f3e72b5SJason Gunthorpe 	}
477*0f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
478*0f3e72b5SJason Gunthorpe 	return NULL;
479*0f3e72b5SJason Gunthorpe }
480*0f3e72b5SJason Gunthorpe 
481*0f3e72b5SJason Gunthorpe /*
482*0f3e72b5SJason Gunthorpe  * VFIO driver API
483*0f3e72b5SJason Gunthorpe  */
484*0f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
485*0f3e72b5SJason Gunthorpe 			 const struct vfio_device_ops *ops)
486*0f3e72b5SJason Gunthorpe {
487*0f3e72b5SJason Gunthorpe 	init_completion(&device->comp);
488*0f3e72b5SJason Gunthorpe 	device->dev = dev;
489*0f3e72b5SJason Gunthorpe 	device->ops = ops;
490*0f3e72b5SJason Gunthorpe }
491*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev);
492*0f3e72b5SJason Gunthorpe 
493*0f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device)
494*0f3e72b5SJason Gunthorpe {
495*0f3e72b5SJason Gunthorpe 	vfio_release_device_set(device);
496*0f3e72b5SJason Gunthorpe }
497*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
498*0f3e72b5SJason Gunthorpe 
499*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
500*0f3e72b5SJason Gunthorpe 		enum vfio_group_type type)
501*0f3e72b5SJason Gunthorpe {
502*0f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
503*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
504*0f3e72b5SJason Gunthorpe 	int ret;
505*0f3e72b5SJason Gunthorpe 
506*0f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_alloc();
507*0f3e72b5SJason Gunthorpe 	if (IS_ERR(iommu_group))
508*0f3e72b5SJason Gunthorpe 		return ERR_CAST(iommu_group);
509*0f3e72b5SJason Gunthorpe 
510*0f3e72b5SJason Gunthorpe 	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
511*0f3e72b5SJason Gunthorpe 	if (ret)
512*0f3e72b5SJason Gunthorpe 		goto out_put_group;
513*0f3e72b5SJason Gunthorpe 	ret = iommu_group_add_device(iommu_group, dev);
514*0f3e72b5SJason Gunthorpe 	if (ret)
515*0f3e72b5SJason Gunthorpe 		goto out_put_group;
516*0f3e72b5SJason Gunthorpe 
517*0f3e72b5SJason Gunthorpe 	group = vfio_create_group(iommu_group, type);
518*0f3e72b5SJason Gunthorpe 	if (IS_ERR(group)) {
519*0f3e72b5SJason Gunthorpe 		ret = PTR_ERR(group);
520*0f3e72b5SJason Gunthorpe 		goto out_remove_device;
521*0f3e72b5SJason Gunthorpe 	}
522*0f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
523*0f3e72b5SJason Gunthorpe 	return group;
524*0f3e72b5SJason Gunthorpe 
525*0f3e72b5SJason Gunthorpe out_remove_device:
526*0f3e72b5SJason Gunthorpe 	iommu_group_remove_device(dev);
527*0f3e72b5SJason Gunthorpe out_put_group:
528*0f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
529*0f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
530*0f3e72b5SJason Gunthorpe }
531*0f3e72b5SJason Gunthorpe 
532*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
533*0f3e72b5SJason Gunthorpe {
534*0f3e72b5SJason Gunthorpe 	struct iommu_group *iommu_group;
535*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
536*0f3e72b5SJason Gunthorpe 
537*0f3e72b5SJason Gunthorpe 	iommu_group = iommu_group_get(dev);
538*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
539*0f3e72b5SJason Gunthorpe 	if (!iommu_group && noiommu) {
540*0f3e72b5SJason Gunthorpe 		/*
541*0f3e72b5SJason Gunthorpe 		 * With noiommu enabled, create an IOMMU group for devices that
542*0f3e72b5SJason Gunthorpe 		 * don't already have one, implying no IOMMU hardware/driver
543*0f3e72b5SJason Gunthorpe 		 * exists.  Taint the kernel because we're about to give a DMA
544*0f3e72b5SJason Gunthorpe 		 * capable device to a user without IOMMU protection.
545*0f3e72b5SJason Gunthorpe 		 */
546*0f3e72b5SJason Gunthorpe 		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
547*0f3e72b5SJason Gunthorpe 		if (!IS_ERR(group)) {
548*0f3e72b5SJason Gunthorpe 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
549*0f3e72b5SJason Gunthorpe 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
550*0f3e72b5SJason Gunthorpe 		}
551*0f3e72b5SJason Gunthorpe 		return group;
552*0f3e72b5SJason Gunthorpe 	}
553*0f3e72b5SJason Gunthorpe #endif
554*0f3e72b5SJason Gunthorpe 	if (!iommu_group)
555*0f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
556*0f3e72b5SJason Gunthorpe 
557*0f3e72b5SJason Gunthorpe 	/*
558*0f3e72b5SJason Gunthorpe 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
559*0f3e72b5SJason Gunthorpe 	 * restore cache coherency. It has to be checked here because it is only
560*0f3e72b5SJason Gunthorpe 	 * valid for cases where we are using iommu groups.
561*0f3e72b5SJason Gunthorpe 	 */
562*0f3e72b5SJason Gunthorpe 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
563*0f3e72b5SJason Gunthorpe 		iommu_group_put(iommu_group);
564*0f3e72b5SJason Gunthorpe 		return ERR_PTR(-EINVAL);
565*0f3e72b5SJason Gunthorpe 	}
566*0f3e72b5SJason Gunthorpe 
567*0f3e72b5SJason Gunthorpe 	group = vfio_group_get_from_iommu(iommu_group);
568*0f3e72b5SJason Gunthorpe 	if (!group)
569*0f3e72b5SJason Gunthorpe 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
570*0f3e72b5SJason Gunthorpe 
571*0f3e72b5SJason Gunthorpe 	/* The vfio_group holds a reference to the iommu_group */
572*0f3e72b5SJason Gunthorpe 	iommu_group_put(iommu_group);
573*0f3e72b5SJason Gunthorpe 	return group;
574*0f3e72b5SJason Gunthorpe }
575*0f3e72b5SJason Gunthorpe 
576*0f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device,
577*0f3e72b5SJason Gunthorpe 		struct vfio_group *group)
578*0f3e72b5SJason Gunthorpe {
579*0f3e72b5SJason Gunthorpe 	struct vfio_device *existing_device;
580*0f3e72b5SJason Gunthorpe 
581*0f3e72b5SJason Gunthorpe 	if (IS_ERR(group))
582*0f3e72b5SJason Gunthorpe 		return PTR_ERR(group);
583*0f3e72b5SJason Gunthorpe 
584*0f3e72b5SJason Gunthorpe 	/*
585*0f3e72b5SJason Gunthorpe 	 * If the driver doesn't specify a set then the device is added to a
586*0f3e72b5SJason Gunthorpe 	 * singleton set just for itself.
587*0f3e72b5SJason Gunthorpe 	 */
588*0f3e72b5SJason Gunthorpe 	if (!device->dev_set)
589*0f3e72b5SJason Gunthorpe 		vfio_assign_device_set(device, device);
590*0f3e72b5SJason Gunthorpe 
591*0f3e72b5SJason Gunthorpe 	existing_device = vfio_group_get_device(group, device->dev);
592*0f3e72b5SJason Gunthorpe 	if (existing_device) {
593*0f3e72b5SJason Gunthorpe 		dev_WARN(device->dev, "Device already exists on group %d\n",
594*0f3e72b5SJason Gunthorpe 			 iommu_group_id(group->iommu_group));
595*0f3e72b5SJason Gunthorpe 		vfio_device_put(existing_device);
596*0f3e72b5SJason Gunthorpe 		if (group->type == VFIO_NO_IOMMU ||
597*0f3e72b5SJason Gunthorpe 		    group->type == VFIO_EMULATED_IOMMU)
598*0f3e72b5SJason Gunthorpe 			iommu_group_remove_device(device->dev);
599*0f3e72b5SJason Gunthorpe 		vfio_group_put(group);
600*0f3e72b5SJason Gunthorpe 		return -EBUSY;
601*0f3e72b5SJason Gunthorpe 	}
602*0f3e72b5SJason Gunthorpe 
603*0f3e72b5SJason Gunthorpe 	/* Our reference on group is moved to the device */
604*0f3e72b5SJason Gunthorpe 	device->group = group;
605*0f3e72b5SJason Gunthorpe 
606*0f3e72b5SJason Gunthorpe 	/* Refcounting can't start until the driver calls register */
607*0f3e72b5SJason Gunthorpe 	refcount_set(&device->refcount, 1);
608*0f3e72b5SJason Gunthorpe 
609*0f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
610*0f3e72b5SJason Gunthorpe 	list_add(&device->group_next, &group->device_list);
611*0f3e72b5SJason Gunthorpe 	group->dev_counter++;
612*0f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
613*0f3e72b5SJason Gunthorpe 
614*0f3e72b5SJason Gunthorpe 	return 0;
615*0f3e72b5SJason Gunthorpe }
616*0f3e72b5SJason Gunthorpe 
617*0f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device)
618*0f3e72b5SJason Gunthorpe {
619*0f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
620*0f3e72b5SJason Gunthorpe 		vfio_group_find_or_alloc(device->dev));
621*0f3e72b5SJason Gunthorpe }
622*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev);
623*0f3e72b5SJason Gunthorpe 
624*0f3e72b5SJason Gunthorpe /*
625*0f3e72b5SJason Gunthorpe  * Register a virtual device without IOMMU backing.  The user of this
626*0f3e72b5SJason Gunthorpe  * device must not be able to directly trigger unmediated DMA.
627*0f3e72b5SJason Gunthorpe  */
628*0f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device)
629*0f3e72b5SJason Gunthorpe {
630*0f3e72b5SJason Gunthorpe 	return __vfio_register_dev(device,
631*0f3e72b5SJason Gunthorpe 		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
632*0f3e72b5SJason Gunthorpe }
633*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
634*0f3e72b5SJason Gunthorpe 
635*0f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
636*0f3e72b5SJason Gunthorpe 						     char *buf)
637*0f3e72b5SJason Gunthorpe {
638*0f3e72b5SJason Gunthorpe 	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
639*0f3e72b5SJason Gunthorpe 
640*0f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
641*0f3e72b5SJason Gunthorpe 	list_for_each_entry(it, &group->device_list, group_next) {
642*0f3e72b5SJason Gunthorpe 		int ret;
643*0f3e72b5SJason Gunthorpe 
644*0f3e72b5SJason Gunthorpe 		if (it->ops->match) {
645*0f3e72b5SJason Gunthorpe 			ret = it->ops->match(it, buf);
646*0f3e72b5SJason Gunthorpe 			if (ret < 0) {
647*0f3e72b5SJason Gunthorpe 				device = ERR_PTR(ret);
648*0f3e72b5SJason Gunthorpe 				break;
649*0f3e72b5SJason Gunthorpe 			}
650*0f3e72b5SJason Gunthorpe 		} else {
651*0f3e72b5SJason Gunthorpe 			ret = !strcmp(dev_name(it->dev), buf);
652*0f3e72b5SJason Gunthorpe 		}
653*0f3e72b5SJason Gunthorpe 
654*0f3e72b5SJason Gunthorpe 		if (ret && vfio_device_try_get(it)) {
655*0f3e72b5SJason Gunthorpe 			device = it;
656*0f3e72b5SJason Gunthorpe 			break;
657*0f3e72b5SJason Gunthorpe 		}
658*0f3e72b5SJason Gunthorpe 	}
659*0f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
660*0f3e72b5SJason Gunthorpe 
661*0f3e72b5SJason Gunthorpe 	return device;
662*0f3e72b5SJason Gunthorpe }
663*0f3e72b5SJason Gunthorpe 
664*0f3e72b5SJason Gunthorpe /*
665*0f3e72b5SJason Gunthorpe  * Decrement the device reference count and wait for the device to be
666*0f3e72b5SJason Gunthorpe  * removed.  Open file descriptors for the device... */
667*0f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device)
668*0f3e72b5SJason Gunthorpe {
669*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
670*0f3e72b5SJason Gunthorpe 	unsigned int i = 0;
671*0f3e72b5SJason Gunthorpe 	bool interrupted = false;
672*0f3e72b5SJason Gunthorpe 	long rc;
673*0f3e72b5SJason Gunthorpe 
674*0f3e72b5SJason Gunthorpe 	vfio_device_put(device);
675*0f3e72b5SJason Gunthorpe 	rc = try_wait_for_completion(&device->comp);
676*0f3e72b5SJason Gunthorpe 	while (rc <= 0) {
677*0f3e72b5SJason Gunthorpe 		if (device->ops->request)
678*0f3e72b5SJason Gunthorpe 			device->ops->request(device, i++);
679*0f3e72b5SJason Gunthorpe 
680*0f3e72b5SJason Gunthorpe 		if (interrupted) {
681*0f3e72b5SJason Gunthorpe 			rc = wait_for_completion_timeout(&device->comp,
682*0f3e72b5SJason Gunthorpe 							 HZ * 10);
683*0f3e72b5SJason Gunthorpe 		} else {
684*0f3e72b5SJason Gunthorpe 			rc = wait_for_completion_interruptible_timeout(
685*0f3e72b5SJason Gunthorpe 				&device->comp, HZ * 10);
686*0f3e72b5SJason Gunthorpe 			if (rc < 0) {
687*0f3e72b5SJason Gunthorpe 				interrupted = true;
688*0f3e72b5SJason Gunthorpe 				dev_warn(device->dev,
689*0f3e72b5SJason Gunthorpe 					 "Device is currently in use, task"
690*0f3e72b5SJason Gunthorpe 					 " \"%s\" (%d) "
691*0f3e72b5SJason Gunthorpe 					 "blocked until device is released",
692*0f3e72b5SJason Gunthorpe 					 current->comm, task_pid_nr(current));
693*0f3e72b5SJason Gunthorpe 			}
694*0f3e72b5SJason Gunthorpe 		}
695*0f3e72b5SJason Gunthorpe 	}
696*0f3e72b5SJason Gunthorpe 
697*0f3e72b5SJason Gunthorpe 	mutex_lock(&group->device_lock);
698*0f3e72b5SJason Gunthorpe 	list_del(&device->group_next);
699*0f3e72b5SJason Gunthorpe 	group->dev_counter--;
700*0f3e72b5SJason Gunthorpe 	mutex_unlock(&group->device_lock);
701*0f3e72b5SJason Gunthorpe 
702*0f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
703*0f3e72b5SJason Gunthorpe 		iommu_group_remove_device(device->dev);
704*0f3e72b5SJason Gunthorpe 
705*0f3e72b5SJason Gunthorpe 	/* Matches the get in vfio_register_group_dev() */
706*0f3e72b5SJason Gunthorpe 	vfio_group_put(group);
707*0f3e72b5SJason Gunthorpe }
708*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
709*0f3e72b5SJason Gunthorpe 
710*0f3e72b5SJason Gunthorpe /*
711*0f3e72b5SJason Gunthorpe  * VFIO base fd, /dev/vfio/vfio
712*0f3e72b5SJason Gunthorpe  */
713*0f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container,
714*0f3e72b5SJason Gunthorpe 				       unsigned long arg)
715*0f3e72b5SJason Gunthorpe {
716*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
717*0f3e72b5SJason Gunthorpe 	long ret = 0;
718*0f3e72b5SJason Gunthorpe 
719*0f3e72b5SJason Gunthorpe 	down_read(&container->group_lock);
720*0f3e72b5SJason Gunthorpe 
721*0f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
722*0f3e72b5SJason Gunthorpe 
723*0f3e72b5SJason Gunthorpe 	switch (arg) {
724*0f3e72b5SJason Gunthorpe 		/* No base extensions yet */
725*0f3e72b5SJason Gunthorpe 	default:
726*0f3e72b5SJason Gunthorpe 		/*
727*0f3e72b5SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
728*0f3e72b5SJason Gunthorpe 		 * extensions and return the first positive result.  If
729*0f3e72b5SJason Gunthorpe 		 * a driver is already set, further queries will be passed
730*0f3e72b5SJason Gunthorpe 		 * only to that driver.
731*0f3e72b5SJason Gunthorpe 		 */
732*0f3e72b5SJason Gunthorpe 		if (!driver) {
733*0f3e72b5SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
734*0f3e72b5SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
735*0f3e72b5SJason Gunthorpe 					    vfio_next) {
736*0f3e72b5SJason Gunthorpe 
737*0f3e72b5SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
738*0f3e72b5SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
739*0f3e72b5SJason Gunthorpe 							       driver))
740*0f3e72b5SJason Gunthorpe 					continue;
741*0f3e72b5SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
742*0f3e72b5SJason Gunthorpe 					continue;
743*0f3e72b5SJason Gunthorpe 
744*0f3e72b5SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
745*0f3e72b5SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
746*0f3e72b5SJason Gunthorpe 							 arg);
747*0f3e72b5SJason Gunthorpe 				module_put(driver->ops->owner);
748*0f3e72b5SJason Gunthorpe 				if (ret > 0)
749*0f3e72b5SJason Gunthorpe 					break;
750*0f3e72b5SJason Gunthorpe 			}
751*0f3e72b5SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
752*0f3e72b5SJason Gunthorpe 		} else
753*0f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
754*0f3e72b5SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
755*0f3e72b5SJason Gunthorpe 	}
756*0f3e72b5SJason Gunthorpe 
757*0f3e72b5SJason Gunthorpe 	up_read(&container->group_lock);
758*0f3e72b5SJason Gunthorpe 
759*0f3e72b5SJason Gunthorpe 	return ret;
760*0f3e72b5SJason Gunthorpe }
761*0f3e72b5SJason Gunthorpe 
762*0f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */
763*0f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
764*0f3e72b5SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
765*0f3e72b5SJason Gunthorpe 					  void *data)
766*0f3e72b5SJason Gunthorpe {
767*0f3e72b5SJason Gunthorpe 	struct vfio_group *group;
768*0f3e72b5SJason Gunthorpe 	int ret = -ENODEV;
769*0f3e72b5SJason Gunthorpe 
770*0f3e72b5SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
771*0f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
772*0f3e72b5SJason Gunthorpe 						group->type);
773*0f3e72b5SJason Gunthorpe 		if (ret)
774*0f3e72b5SJason Gunthorpe 			goto unwind;
775*0f3e72b5SJason Gunthorpe 	}
776*0f3e72b5SJason Gunthorpe 
777*0f3e72b5SJason Gunthorpe 	return ret;
778*0f3e72b5SJason Gunthorpe 
779*0f3e72b5SJason Gunthorpe unwind:
780*0f3e72b5SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
781*0f3e72b5SJason Gunthorpe 					     container_next) {
782*0f3e72b5SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
783*0f3e72b5SJason Gunthorpe 	}
784*0f3e72b5SJason Gunthorpe 
785*0f3e72b5SJason Gunthorpe 	return ret;
786*0f3e72b5SJason Gunthorpe }
787*0f3e72b5SJason Gunthorpe 
788*0f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
789*0f3e72b5SJason Gunthorpe 				 unsigned long arg)
790*0f3e72b5SJason Gunthorpe {
791*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
792*0f3e72b5SJason Gunthorpe 	long ret = -ENODEV;
793*0f3e72b5SJason Gunthorpe 
794*0f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
795*0f3e72b5SJason Gunthorpe 
796*0f3e72b5SJason Gunthorpe 	/*
797*0f3e72b5SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
798*0f3e72b5SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
799*0f3e72b5SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
800*0f3e72b5SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
801*0f3e72b5SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
802*0f3e72b5SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
803*0f3e72b5SJason Gunthorpe 	 */
804*0f3e72b5SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
805*0f3e72b5SJason Gunthorpe 		up_write(&container->group_lock);
806*0f3e72b5SJason Gunthorpe 		return -EINVAL;
807*0f3e72b5SJason Gunthorpe 	}
808*0f3e72b5SJason Gunthorpe 
809*0f3e72b5SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
810*0f3e72b5SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
811*0f3e72b5SJason Gunthorpe 		void *data;
812*0f3e72b5SJason Gunthorpe 
813*0f3e72b5SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
814*0f3e72b5SJason Gunthorpe 			continue;
815*0f3e72b5SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
816*0f3e72b5SJason Gunthorpe 			continue;
817*0f3e72b5SJason Gunthorpe 
818*0f3e72b5SJason Gunthorpe 		/*
819*0f3e72b5SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
820*0f3e72b5SJason Gunthorpe 		 * so test which iommu driver reported support for this
821*0f3e72b5SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
822*0f3e72b5SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
823*0f3e72b5SJason Gunthorpe 		 * interfaces if they'd like.
824*0f3e72b5SJason Gunthorpe 		 */
825*0f3e72b5SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
826*0f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
827*0f3e72b5SJason Gunthorpe 			continue;
828*0f3e72b5SJason Gunthorpe 		}
829*0f3e72b5SJason Gunthorpe 
830*0f3e72b5SJason Gunthorpe 		data = driver->ops->open(arg);
831*0f3e72b5SJason Gunthorpe 		if (IS_ERR(data)) {
832*0f3e72b5SJason Gunthorpe 			ret = PTR_ERR(data);
833*0f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
834*0f3e72b5SJason Gunthorpe 			continue;
835*0f3e72b5SJason Gunthorpe 		}
836*0f3e72b5SJason Gunthorpe 
837*0f3e72b5SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
838*0f3e72b5SJason Gunthorpe 		if (ret) {
839*0f3e72b5SJason Gunthorpe 			driver->ops->release(data);
840*0f3e72b5SJason Gunthorpe 			module_put(driver->ops->owner);
841*0f3e72b5SJason Gunthorpe 			continue;
842*0f3e72b5SJason Gunthorpe 		}
843*0f3e72b5SJason Gunthorpe 
844*0f3e72b5SJason Gunthorpe 		container->iommu_driver = driver;
845*0f3e72b5SJason Gunthorpe 		container->iommu_data = data;
846*0f3e72b5SJason Gunthorpe 		break;
847*0f3e72b5SJason Gunthorpe 	}
848*0f3e72b5SJason Gunthorpe 
849*0f3e72b5SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
850*0f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
851*0f3e72b5SJason Gunthorpe 
852*0f3e72b5SJason Gunthorpe 	return ret;
853*0f3e72b5SJason Gunthorpe }
854*0f3e72b5SJason Gunthorpe 
855*0f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
856*0f3e72b5SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
857*0f3e72b5SJason Gunthorpe {
858*0f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
859*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
860*0f3e72b5SJason Gunthorpe 	void *data;
861*0f3e72b5SJason Gunthorpe 	long ret = -EINVAL;
862*0f3e72b5SJason Gunthorpe 
863*0f3e72b5SJason Gunthorpe 	if (!container)
864*0f3e72b5SJason Gunthorpe 		return ret;
865*0f3e72b5SJason Gunthorpe 
866*0f3e72b5SJason Gunthorpe 	switch (cmd) {
867*0f3e72b5SJason Gunthorpe 	case VFIO_GET_API_VERSION:
868*0f3e72b5SJason Gunthorpe 		ret = VFIO_API_VERSION;
869*0f3e72b5SJason Gunthorpe 		break;
870*0f3e72b5SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
871*0f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(container, arg);
872*0f3e72b5SJason Gunthorpe 		break;
873*0f3e72b5SJason Gunthorpe 	case VFIO_SET_IOMMU:
874*0f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
875*0f3e72b5SJason Gunthorpe 		break;
876*0f3e72b5SJason Gunthorpe 	default:
877*0f3e72b5SJason Gunthorpe 		driver = container->iommu_driver;
878*0f3e72b5SJason Gunthorpe 		data = container->iommu_data;
879*0f3e72b5SJason Gunthorpe 
880*0f3e72b5SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
881*0f3e72b5SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
882*0f3e72b5SJason Gunthorpe 	}
883*0f3e72b5SJason Gunthorpe 
884*0f3e72b5SJason Gunthorpe 	return ret;
885*0f3e72b5SJason Gunthorpe }
886*0f3e72b5SJason Gunthorpe 
887*0f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
888*0f3e72b5SJason Gunthorpe {
889*0f3e72b5SJason Gunthorpe 	struct vfio_container *container;
890*0f3e72b5SJason Gunthorpe 
891*0f3e72b5SJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL);
892*0f3e72b5SJason Gunthorpe 	if (!container)
893*0f3e72b5SJason Gunthorpe 		return -ENOMEM;
894*0f3e72b5SJason Gunthorpe 
895*0f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
896*0f3e72b5SJason Gunthorpe 	init_rwsem(&container->group_lock);
897*0f3e72b5SJason Gunthorpe 	kref_init(&container->kref);
898*0f3e72b5SJason Gunthorpe 
899*0f3e72b5SJason Gunthorpe 	filep->private_data = container;
900*0f3e72b5SJason Gunthorpe 
901*0f3e72b5SJason Gunthorpe 	return 0;
902*0f3e72b5SJason Gunthorpe }
903*0f3e72b5SJason Gunthorpe 
904*0f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
905*0f3e72b5SJason Gunthorpe {
906*0f3e72b5SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
907*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
908*0f3e72b5SJason Gunthorpe 
909*0f3e72b5SJason Gunthorpe 	if (driver && driver->ops->notify)
910*0f3e72b5SJason Gunthorpe 		driver->ops->notify(container->iommu_data,
911*0f3e72b5SJason Gunthorpe 				    VFIO_IOMMU_CONTAINER_CLOSE);
912*0f3e72b5SJason Gunthorpe 
913*0f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
914*0f3e72b5SJason Gunthorpe 
915*0f3e72b5SJason Gunthorpe 	vfio_container_put(container);
916*0f3e72b5SJason Gunthorpe 
917*0f3e72b5SJason Gunthorpe 	return 0;
918*0f3e72b5SJason Gunthorpe }
919*0f3e72b5SJason Gunthorpe 
920*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = {
921*0f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
922*0f3e72b5SJason Gunthorpe 	.open		= vfio_fops_open,
923*0f3e72b5SJason Gunthorpe 	.release	= vfio_fops_release,
924*0f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
925*0f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
926*0f3e72b5SJason Gunthorpe };
927*0f3e72b5SJason Gunthorpe 
928*0f3e72b5SJason Gunthorpe /*
929*0f3e72b5SJason Gunthorpe  * VFIO Group fd, /dev/vfio/$GROUP
930*0f3e72b5SJason Gunthorpe  */
931*0f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group)
932*0f3e72b5SJason Gunthorpe {
933*0f3e72b5SJason Gunthorpe 	struct vfio_container *container = group->container;
934*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
935*0f3e72b5SJason Gunthorpe 
936*0f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
937*0f3e72b5SJason Gunthorpe 
938*0f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
939*0f3e72b5SJason Gunthorpe 
940*0f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
941*0f3e72b5SJason Gunthorpe 	if (driver)
942*0f3e72b5SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
943*0f3e72b5SJason Gunthorpe 					  group->iommu_group);
944*0f3e72b5SJason Gunthorpe 
945*0f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
946*0f3e72b5SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
947*0f3e72b5SJason Gunthorpe 
948*0f3e72b5SJason Gunthorpe 	group->container = NULL;
949*0f3e72b5SJason Gunthorpe 	group->container_users = 0;
950*0f3e72b5SJason Gunthorpe 	list_del(&group->container_next);
951*0f3e72b5SJason Gunthorpe 
952*0f3e72b5SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
953*0f3e72b5SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
954*0f3e72b5SJason Gunthorpe 		driver->ops->release(container->iommu_data);
955*0f3e72b5SJason Gunthorpe 		module_put(driver->ops->owner);
956*0f3e72b5SJason Gunthorpe 		container->iommu_driver = NULL;
957*0f3e72b5SJason Gunthorpe 		container->iommu_data = NULL;
958*0f3e72b5SJason Gunthorpe 	}
959*0f3e72b5SJason Gunthorpe 
960*0f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
961*0f3e72b5SJason Gunthorpe 
962*0f3e72b5SJason Gunthorpe 	vfio_container_put(container);
963*0f3e72b5SJason Gunthorpe }
964*0f3e72b5SJason Gunthorpe 
965*0f3e72b5SJason Gunthorpe /*
966*0f3e72b5SJason Gunthorpe  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
967*0f3e72b5SJason Gunthorpe  * if there was no container to unset.  Since the ioctl is called on
968*0f3e72b5SJason Gunthorpe  * the group, we know that still exists, therefore the only valid
969*0f3e72b5SJason Gunthorpe  * transition here is 1->0.
970*0f3e72b5SJason Gunthorpe  */
971*0f3e72b5SJason Gunthorpe static int vfio_group_unset_container(struct vfio_group *group)
972*0f3e72b5SJason Gunthorpe {
973*0f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
974*0f3e72b5SJason Gunthorpe 
975*0f3e72b5SJason Gunthorpe 	if (!group->container)
976*0f3e72b5SJason Gunthorpe 		return -EINVAL;
977*0f3e72b5SJason Gunthorpe 	if (group->container_users != 1)
978*0f3e72b5SJason Gunthorpe 		return -EBUSY;
979*0f3e72b5SJason Gunthorpe 	__vfio_group_unset_container(group);
980*0f3e72b5SJason Gunthorpe 	return 0;
981*0f3e72b5SJason Gunthorpe }
982*0f3e72b5SJason Gunthorpe 
983*0f3e72b5SJason Gunthorpe static int vfio_group_set_container(struct vfio_group *group, int container_fd)
984*0f3e72b5SJason Gunthorpe {
985*0f3e72b5SJason Gunthorpe 	struct fd f;
986*0f3e72b5SJason Gunthorpe 	struct vfio_container *container;
987*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
988*0f3e72b5SJason Gunthorpe 	int ret = 0;
989*0f3e72b5SJason Gunthorpe 
990*0f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
991*0f3e72b5SJason Gunthorpe 
992*0f3e72b5SJason Gunthorpe 	if (group->container || WARN_ON(group->container_users))
993*0f3e72b5SJason Gunthorpe 		return -EINVAL;
994*0f3e72b5SJason Gunthorpe 
995*0f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
996*0f3e72b5SJason Gunthorpe 		return -EPERM;
997*0f3e72b5SJason Gunthorpe 
998*0f3e72b5SJason Gunthorpe 	f = fdget(container_fd);
999*0f3e72b5SJason Gunthorpe 	if (!f.file)
1000*0f3e72b5SJason Gunthorpe 		return -EBADF;
1001*0f3e72b5SJason Gunthorpe 
1002*0f3e72b5SJason Gunthorpe 	/* Sanity check, is this really our fd? */
1003*0f3e72b5SJason Gunthorpe 	if (f.file->f_op != &vfio_fops) {
1004*0f3e72b5SJason Gunthorpe 		fdput(f);
1005*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1006*0f3e72b5SJason Gunthorpe 	}
1007*0f3e72b5SJason Gunthorpe 
1008*0f3e72b5SJason Gunthorpe 	container = f.file->private_data;
1009*0f3e72b5SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
1010*0f3e72b5SJason Gunthorpe 
1011*0f3e72b5SJason Gunthorpe 	down_write(&container->group_lock);
1012*0f3e72b5SJason Gunthorpe 
1013*0f3e72b5SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
1014*0f3e72b5SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
1015*0f3e72b5SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
1016*0f3e72b5SJason Gunthorpe 		ret = -EPERM;
1017*0f3e72b5SJason Gunthorpe 		goto unlock_out;
1018*0f3e72b5SJason Gunthorpe 	}
1019*0f3e72b5SJason Gunthorpe 
1020*0f3e72b5SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
1021*0f3e72b5SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
1022*0f3e72b5SJason Gunthorpe 		if (ret)
1023*0f3e72b5SJason Gunthorpe 			goto unlock_out;
1024*0f3e72b5SJason Gunthorpe 	}
1025*0f3e72b5SJason Gunthorpe 
1026*0f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
1027*0f3e72b5SJason Gunthorpe 	if (driver) {
1028*0f3e72b5SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
1029*0f3e72b5SJason Gunthorpe 						group->iommu_group,
1030*0f3e72b5SJason Gunthorpe 						group->type);
1031*0f3e72b5SJason Gunthorpe 		if (ret) {
1032*0f3e72b5SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
1033*0f3e72b5SJason Gunthorpe 				iommu_group_release_dma_owner(
1034*0f3e72b5SJason Gunthorpe 					group->iommu_group);
1035*0f3e72b5SJason Gunthorpe 			goto unlock_out;
1036*0f3e72b5SJason Gunthorpe 		}
1037*0f3e72b5SJason Gunthorpe 	}
1038*0f3e72b5SJason Gunthorpe 
1039*0f3e72b5SJason Gunthorpe 	group->container = container;
1040*0f3e72b5SJason Gunthorpe 	group->container_users = 1;
1041*0f3e72b5SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
1042*0f3e72b5SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
1043*0f3e72b5SJason Gunthorpe 
1044*0f3e72b5SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
1045*0f3e72b5SJason Gunthorpe 	vfio_container_get(container);
1046*0f3e72b5SJason Gunthorpe 
1047*0f3e72b5SJason Gunthorpe unlock_out:
1048*0f3e72b5SJason Gunthorpe 	up_write(&container->group_lock);
1049*0f3e72b5SJason Gunthorpe 	fdput(f);
1050*0f3e72b5SJason Gunthorpe 	return ret;
1051*0f3e72b5SJason Gunthorpe }
1052*0f3e72b5SJason Gunthorpe 
1053*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops;
1054*0f3e72b5SJason Gunthorpe 
1055*0f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */
1056*0f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device)
1057*0f3e72b5SJason Gunthorpe {
1058*0f3e72b5SJason Gunthorpe 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
1059*0f3e72b5SJason Gunthorpe }
1060*0f3e72b5SJason Gunthorpe 
1061*0f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device)
1062*0f3e72b5SJason Gunthorpe {
1063*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
1064*0f3e72b5SJason Gunthorpe 
1065*0f3e72b5SJason Gunthorpe 	lockdep_assert_held_write(&group->group_rwsem);
1066*0f3e72b5SJason Gunthorpe 
1067*0f3e72b5SJason Gunthorpe 	if (!group->container || !group->container->iommu_driver ||
1068*0f3e72b5SJason Gunthorpe 	    WARN_ON(!group->container_users))
1069*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1070*0f3e72b5SJason Gunthorpe 
1071*0f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
1072*0f3e72b5SJason Gunthorpe 		return -EPERM;
1073*0f3e72b5SJason Gunthorpe 
1074*0f3e72b5SJason Gunthorpe 	get_file(group->opened_file);
1075*0f3e72b5SJason Gunthorpe 	group->container_users++;
1076*0f3e72b5SJason Gunthorpe 	return 0;
1077*0f3e72b5SJason Gunthorpe }
1078*0f3e72b5SJason Gunthorpe 
1079*0f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device)
1080*0f3e72b5SJason Gunthorpe {
1081*0f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
1082*0f3e72b5SJason Gunthorpe 	WARN_ON(device->group->container_users <= 1);
1083*0f3e72b5SJason Gunthorpe 	device->group->container_users--;
1084*0f3e72b5SJason Gunthorpe 	fput(device->group->opened_file);
1085*0f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
1086*0f3e72b5SJason Gunthorpe }
1087*0f3e72b5SJason Gunthorpe 
1088*0f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device)
1089*0f3e72b5SJason Gunthorpe {
1090*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
1091*0f3e72b5SJason Gunthorpe 	struct file *filep;
1092*0f3e72b5SJason Gunthorpe 	int ret;
1093*0f3e72b5SJason Gunthorpe 
1094*0f3e72b5SJason Gunthorpe 	down_write(&device->group->group_rwsem);
1095*0f3e72b5SJason Gunthorpe 	ret = vfio_device_assign_container(device);
1096*0f3e72b5SJason Gunthorpe 	up_write(&device->group->group_rwsem);
1097*0f3e72b5SJason Gunthorpe 	if (ret)
1098*0f3e72b5SJason Gunthorpe 		return ERR_PTR(ret);
1099*0f3e72b5SJason Gunthorpe 
1100*0f3e72b5SJason Gunthorpe 	if (!try_module_get(device->dev->driver->owner)) {
1101*0f3e72b5SJason Gunthorpe 		ret = -ENODEV;
1102*0f3e72b5SJason Gunthorpe 		goto err_unassign_container;
1103*0f3e72b5SJason Gunthorpe 	}
1104*0f3e72b5SJason Gunthorpe 
1105*0f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
1106*0f3e72b5SJason Gunthorpe 	device->open_count++;
1107*0f3e72b5SJason Gunthorpe 	if (device->open_count == 1) {
1108*0f3e72b5SJason Gunthorpe 		/*
1109*0f3e72b5SJason Gunthorpe 		 * Here we pass the KVM pointer with the group under the read
1110*0f3e72b5SJason Gunthorpe 		 * lock.  If the device driver will use it, it must obtain a
1111*0f3e72b5SJason Gunthorpe 		 * reference and release it during close_device.
1112*0f3e72b5SJason Gunthorpe 		 */
1113*0f3e72b5SJason Gunthorpe 		down_read(&device->group->group_rwsem);
1114*0f3e72b5SJason Gunthorpe 		device->kvm = device->group->kvm;
1115*0f3e72b5SJason Gunthorpe 
1116*0f3e72b5SJason Gunthorpe 		if (device->ops->open_device) {
1117*0f3e72b5SJason Gunthorpe 			ret = device->ops->open_device(device);
1118*0f3e72b5SJason Gunthorpe 			if (ret)
1119*0f3e72b5SJason Gunthorpe 				goto err_undo_count;
1120*0f3e72b5SJason Gunthorpe 		}
1121*0f3e72b5SJason Gunthorpe 
1122*0f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
1123*0f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->register_device)
1124*0f3e72b5SJason Gunthorpe 			iommu_driver->ops->register_device(
1125*0f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
1126*0f3e72b5SJason Gunthorpe 
1127*0f3e72b5SJason Gunthorpe 		up_read(&device->group->group_rwsem);
1128*0f3e72b5SJason Gunthorpe 	}
1129*0f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
1130*0f3e72b5SJason Gunthorpe 
1131*0f3e72b5SJason Gunthorpe 	/*
1132*0f3e72b5SJason Gunthorpe 	 * We can't use anon_inode_getfd() because we need to modify
1133*0f3e72b5SJason Gunthorpe 	 * the f_mode flags directly to allow more than just ioctls
1134*0f3e72b5SJason Gunthorpe 	 */
1135*0f3e72b5SJason Gunthorpe 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1136*0f3e72b5SJason Gunthorpe 				   device, O_RDWR);
1137*0f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
1138*0f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
1139*0f3e72b5SJason Gunthorpe 		goto err_close_device;
1140*0f3e72b5SJason Gunthorpe 	}
1141*0f3e72b5SJason Gunthorpe 
1142*0f3e72b5SJason Gunthorpe 	/*
1143*0f3e72b5SJason Gunthorpe 	 * TODO: add an anon_inode interface to do this.
1144*0f3e72b5SJason Gunthorpe 	 * Appears to be missing by lack of need rather than
1145*0f3e72b5SJason Gunthorpe 	 * explicitly prevented.  Now there's need.
1146*0f3e72b5SJason Gunthorpe 	 */
1147*0f3e72b5SJason Gunthorpe 	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
1148*0f3e72b5SJason Gunthorpe 
1149*0f3e72b5SJason Gunthorpe 	if (device->group->type == VFIO_NO_IOMMU)
1150*0f3e72b5SJason Gunthorpe 		dev_warn(device->dev, "vfio-noiommu device opened by user "
1151*0f3e72b5SJason Gunthorpe 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
1152*0f3e72b5SJason Gunthorpe 	/*
1153*0f3e72b5SJason Gunthorpe 	 * On success the ref of device is moved to the file and
1154*0f3e72b5SJason Gunthorpe 	 * put in vfio_device_fops_release()
1155*0f3e72b5SJason Gunthorpe 	 */
1156*0f3e72b5SJason Gunthorpe 	return filep;
1157*0f3e72b5SJason Gunthorpe 
1158*0f3e72b5SJason Gunthorpe err_close_device:
1159*0f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
1160*0f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
1161*0f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device) {
1162*0f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
1163*0f3e72b5SJason Gunthorpe 
1164*0f3e72b5SJason Gunthorpe 		iommu_driver = device->group->container->iommu_driver;
1165*0f3e72b5SJason Gunthorpe 		if (iommu_driver && iommu_driver->ops->unregister_device)
1166*0f3e72b5SJason Gunthorpe 			iommu_driver->ops->unregister_device(
1167*0f3e72b5SJason Gunthorpe 				device->group->container->iommu_data, device);
1168*0f3e72b5SJason Gunthorpe 	}
1169*0f3e72b5SJason Gunthorpe err_undo_count:
1170*0f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
1171*0f3e72b5SJason Gunthorpe 	device->open_count--;
1172*0f3e72b5SJason Gunthorpe 	if (device->open_count == 0 && device->kvm)
1173*0f3e72b5SJason Gunthorpe 		device->kvm = NULL;
1174*0f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
1175*0f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
1176*0f3e72b5SJason Gunthorpe err_unassign_container:
1177*0f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
1178*0f3e72b5SJason Gunthorpe 	return ERR_PTR(ret);
1179*0f3e72b5SJason Gunthorpe }
1180*0f3e72b5SJason Gunthorpe 
1181*0f3e72b5SJason Gunthorpe static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1182*0f3e72b5SJason Gunthorpe {
1183*0f3e72b5SJason Gunthorpe 	struct vfio_device *device;
1184*0f3e72b5SJason Gunthorpe 	struct file *filep;
1185*0f3e72b5SJason Gunthorpe 	int fdno;
1186*0f3e72b5SJason Gunthorpe 	int ret;
1187*0f3e72b5SJason Gunthorpe 
1188*0f3e72b5SJason Gunthorpe 	device = vfio_device_get_from_name(group, buf);
1189*0f3e72b5SJason Gunthorpe 	if (IS_ERR(device))
1190*0f3e72b5SJason Gunthorpe 		return PTR_ERR(device);
1191*0f3e72b5SJason Gunthorpe 
1192*0f3e72b5SJason Gunthorpe 	fdno = get_unused_fd_flags(O_CLOEXEC);
1193*0f3e72b5SJason Gunthorpe 	if (fdno < 0) {
1194*0f3e72b5SJason Gunthorpe 		ret = fdno;
1195*0f3e72b5SJason Gunthorpe 		goto err_put_device;
1196*0f3e72b5SJason Gunthorpe 	}
1197*0f3e72b5SJason Gunthorpe 
1198*0f3e72b5SJason Gunthorpe 	filep = vfio_device_open(device);
1199*0f3e72b5SJason Gunthorpe 	if (IS_ERR(filep)) {
1200*0f3e72b5SJason Gunthorpe 		ret = PTR_ERR(filep);
1201*0f3e72b5SJason Gunthorpe 		goto err_put_fdno;
1202*0f3e72b5SJason Gunthorpe 	}
1203*0f3e72b5SJason Gunthorpe 
1204*0f3e72b5SJason Gunthorpe 	fd_install(fdno, filep);
1205*0f3e72b5SJason Gunthorpe 	return fdno;
1206*0f3e72b5SJason Gunthorpe 
1207*0f3e72b5SJason Gunthorpe err_put_fdno:
1208*0f3e72b5SJason Gunthorpe 	put_unused_fd(fdno);
1209*0f3e72b5SJason Gunthorpe err_put_device:
1210*0f3e72b5SJason Gunthorpe 	vfio_device_put(device);
1211*0f3e72b5SJason Gunthorpe 	return ret;
1212*0f3e72b5SJason Gunthorpe }
1213*0f3e72b5SJason Gunthorpe 
1214*0f3e72b5SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep,
1215*0f3e72b5SJason Gunthorpe 				      unsigned int cmd, unsigned long arg)
1216*0f3e72b5SJason Gunthorpe {
1217*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
1218*0f3e72b5SJason Gunthorpe 	long ret = -ENOTTY;
1219*0f3e72b5SJason Gunthorpe 
1220*0f3e72b5SJason Gunthorpe 	switch (cmd) {
1221*0f3e72b5SJason Gunthorpe 	case VFIO_GROUP_GET_STATUS:
1222*0f3e72b5SJason Gunthorpe 	{
1223*0f3e72b5SJason Gunthorpe 		struct vfio_group_status status;
1224*0f3e72b5SJason Gunthorpe 		unsigned long minsz;
1225*0f3e72b5SJason Gunthorpe 
1226*0f3e72b5SJason Gunthorpe 		minsz = offsetofend(struct vfio_group_status, flags);
1227*0f3e72b5SJason Gunthorpe 
1228*0f3e72b5SJason Gunthorpe 		if (copy_from_user(&status, (void __user *)arg, minsz))
1229*0f3e72b5SJason Gunthorpe 			return -EFAULT;
1230*0f3e72b5SJason Gunthorpe 
1231*0f3e72b5SJason Gunthorpe 		if (status.argsz < minsz)
1232*0f3e72b5SJason Gunthorpe 			return -EINVAL;
1233*0f3e72b5SJason Gunthorpe 
1234*0f3e72b5SJason Gunthorpe 		status.flags = 0;
1235*0f3e72b5SJason Gunthorpe 
1236*0f3e72b5SJason Gunthorpe 		down_read(&group->group_rwsem);
1237*0f3e72b5SJason Gunthorpe 		if (group->container)
1238*0f3e72b5SJason Gunthorpe 			status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
1239*0f3e72b5SJason Gunthorpe 					VFIO_GROUP_FLAGS_VIABLE;
1240*0f3e72b5SJason Gunthorpe 		else if (!iommu_group_dma_owner_claimed(group->iommu_group))
1241*0f3e72b5SJason Gunthorpe 			status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1242*0f3e72b5SJason Gunthorpe 		up_read(&group->group_rwsem);
1243*0f3e72b5SJason Gunthorpe 
1244*0f3e72b5SJason Gunthorpe 		if (copy_to_user((void __user *)arg, &status, minsz))
1245*0f3e72b5SJason Gunthorpe 			return -EFAULT;
1246*0f3e72b5SJason Gunthorpe 
1247*0f3e72b5SJason Gunthorpe 		ret = 0;
1248*0f3e72b5SJason Gunthorpe 		break;
1249*0f3e72b5SJason Gunthorpe 	}
1250*0f3e72b5SJason Gunthorpe 	case VFIO_GROUP_SET_CONTAINER:
1251*0f3e72b5SJason Gunthorpe 	{
1252*0f3e72b5SJason Gunthorpe 		int fd;
1253*0f3e72b5SJason Gunthorpe 
1254*0f3e72b5SJason Gunthorpe 		if (get_user(fd, (int __user *)arg))
1255*0f3e72b5SJason Gunthorpe 			return -EFAULT;
1256*0f3e72b5SJason Gunthorpe 
1257*0f3e72b5SJason Gunthorpe 		if (fd < 0)
1258*0f3e72b5SJason Gunthorpe 			return -EINVAL;
1259*0f3e72b5SJason Gunthorpe 
1260*0f3e72b5SJason Gunthorpe 		down_write(&group->group_rwsem);
1261*0f3e72b5SJason Gunthorpe 		ret = vfio_group_set_container(group, fd);
1262*0f3e72b5SJason Gunthorpe 		up_write(&group->group_rwsem);
1263*0f3e72b5SJason Gunthorpe 		break;
1264*0f3e72b5SJason Gunthorpe 	}
1265*0f3e72b5SJason Gunthorpe 	case VFIO_GROUP_UNSET_CONTAINER:
1266*0f3e72b5SJason Gunthorpe 		down_write(&group->group_rwsem);
1267*0f3e72b5SJason Gunthorpe 		ret = vfio_group_unset_container(group);
1268*0f3e72b5SJason Gunthorpe 		up_write(&group->group_rwsem);
1269*0f3e72b5SJason Gunthorpe 		break;
1270*0f3e72b5SJason Gunthorpe 	case VFIO_GROUP_GET_DEVICE_FD:
1271*0f3e72b5SJason Gunthorpe 	{
1272*0f3e72b5SJason Gunthorpe 		char *buf;
1273*0f3e72b5SJason Gunthorpe 
1274*0f3e72b5SJason Gunthorpe 		buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1275*0f3e72b5SJason Gunthorpe 		if (IS_ERR(buf))
1276*0f3e72b5SJason Gunthorpe 			return PTR_ERR(buf);
1277*0f3e72b5SJason Gunthorpe 
1278*0f3e72b5SJason Gunthorpe 		ret = vfio_group_get_device_fd(group, buf);
1279*0f3e72b5SJason Gunthorpe 		kfree(buf);
1280*0f3e72b5SJason Gunthorpe 		break;
1281*0f3e72b5SJason Gunthorpe 	}
1282*0f3e72b5SJason Gunthorpe 	}
1283*0f3e72b5SJason Gunthorpe 
1284*0f3e72b5SJason Gunthorpe 	return ret;
1285*0f3e72b5SJason Gunthorpe }
1286*0f3e72b5SJason Gunthorpe 
1287*0f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1288*0f3e72b5SJason Gunthorpe {
1289*0f3e72b5SJason Gunthorpe 	struct vfio_group *group =
1290*0f3e72b5SJason Gunthorpe 		container_of(inode->i_cdev, struct vfio_group, cdev);
1291*0f3e72b5SJason Gunthorpe 	int ret;
1292*0f3e72b5SJason Gunthorpe 
1293*0f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
1294*0f3e72b5SJason Gunthorpe 
1295*0f3e72b5SJason Gunthorpe 	/* users can be zero if this races with vfio_group_put() */
1296*0f3e72b5SJason Gunthorpe 	if (!refcount_inc_not_zero(&group->users)) {
1297*0f3e72b5SJason Gunthorpe 		ret = -ENODEV;
1298*0f3e72b5SJason Gunthorpe 		goto err_unlock;
1299*0f3e72b5SJason Gunthorpe 	}
1300*0f3e72b5SJason Gunthorpe 
1301*0f3e72b5SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
1302*0f3e72b5SJason Gunthorpe 		ret = -EPERM;
1303*0f3e72b5SJason Gunthorpe 		goto err_put;
1304*0f3e72b5SJason Gunthorpe 	}
1305*0f3e72b5SJason Gunthorpe 
1306*0f3e72b5SJason Gunthorpe 	/*
1307*0f3e72b5SJason Gunthorpe 	 * Do we need multiple instances of the group open?  Seems not.
1308*0f3e72b5SJason Gunthorpe 	 */
1309*0f3e72b5SJason Gunthorpe 	if (group->opened_file) {
1310*0f3e72b5SJason Gunthorpe 		ret = -EBUSY;
1311*0f3e72b5SJason Gunthorpe 		goto err_put;
1312*0f3e72b5SJason Gunthorpe 	}
1313*0f3e72b5SJason Gunthorpe 	group->opened_file = filep;
1314*0f3e72b5SJason Gunthorpe 	filep->private_data = group;
1315*0f3e72b5SJason Gunthorpe 
1316*0f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
1317*0f3e72b5SJason Gunthorpe 	return 0;
1318*0f3e72b5SJason Gunthorpe err_put:
1319*0f3e72b5SJason Gunthorpe 	vfio_group_put(group);
1320*0f3e72b5SJason Gunthorpe err_unlock:
1321*0f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
1322*0f3e72b5SJason Gunthorpe 	return ret;
1323*0f3e72b5SJason Gunthorpe }
1324*0f3e72b5SJason Gunthorpe 
1325*0f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1326*0f3e72b5SJason Gunthorpe {
1327*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = filep->private_data;
1328*0f3e72b5SJason Gunthorpe 
1329*0f3e72b5SJason Gunthorpe 	filep->private_data = NULL;
1330*0f3e72b5SJason Gunthorpe 
1331*0f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
1332*0f3e72b5SJason Gunthorpe 	/*
1333*0f3e72b5SJason Gunthorpe 	 * Device FDs hold a group file reference, therefore the group release
1334*0f3e72b5SJason Gunthorpe 	 * is only called when there are no open devices.
1335*0f3e72b5SJason Gunthorpe 	 */
1336*0f3e72b5SJason Gunthorpe 	WARN_ON(group->notifier.head);
1337*0f3e72b5SJason Gunthorpe 	if (group->container) {
1338*0f3e72b5SJason Gunthorpe 		WARN_ON(group->container_users != 1);
1339*0f3e72b5SJason Gunthorpe 		__vfio_group_unset_container(group);
1340*0f3e72b5SJason Gunthorpe 	}
1341*0f3e72b5SJason Gunthorpe 	group->opened_file = NULL;
1342*0f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
1343*0f3e72b5SJason Gunthorpe 
1344*0f3e72b5SJason Gunthorpe 	vfio_group_put(group);
1345*0f3e72b5SJason Gunthorpe 
1346*0f3e72b5SJason Gunthorpe 	return 0;
1347*0f3e72b5SJason Gunthorpe }
1348*0f3e72b5SJason Gunthorpe 
1349*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = {
1350*0f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
1351*0f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
1352*0f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
1353*0f3e72b5SJason Gunthorpe 	.open		= vfio_group_fops_open,
1354*0f3e72b5SJason Gunthorpe 	.release	= vfio_group_fops_release,
1355*0f3e72b5SJason Gunthorpe };
1356*0f3e72b5SJason Gunthorpe 
1357*0f3e72b5SJason Gunthorpe /*
1358*0f3e72b5SJason Gunthorpe  * VFIO Device fd
1359*0f3e72b5SJason Gunthorpe  */
1360*0f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1361*0f3e72b5SJason Gunthorpe {
1362*0f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
1363*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver;
1364*0f3e72b5SJason Gunthorpe 
1365*0f3e72b5SJason Gunthorpe 	mutex_lock(&device->dev_set->lock);
1366*0f3e72b5SJason Gunthorpe 	vfio_assert_device_open(device);
1367*0f3e72b5SJason Gunthorpe 	down_read(&device->group->group_rwsem);
1368*0f3e72b5SJason Gunthorpe 	if (device->open_count == 1 && device->ops->close_device)
1369*0f3e72b5SJason Gunthorpe 		device->ops->close_device(device);
1370*0f3e72b5SJason Gunthorpe 
1371*0f3e72b5SJason Gunthorpe 	iommu_driver = device->group->container->iommu_driver;
1372*0f3e72b5SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
1373*0f3e72b5SJason Gunthorpe 		iommu_driver->ops->unregister_device(
1374*0f3e72b5SJason Gunthorpe 			device->group->container->iommu_data, device);
1375*0f3e72b5SJason Gunthorpe 	up_read(&device->group->group_rwsem);
1376*0f3e72b5SJason Gunthorpe 	device->open_count--;
1377*0f3e72b5SJason Gunthorpe 	if (device->open_count == 0)
1378*0f3e72b5SJason Gunthorpe 		device->kvm = NULL;
1379*0f3e72b5SJason Gunthorpe 	mutex_unlock(&device->dev_set->lock);
1380*0f3e72b5SJason Gunthorpe 
1381*0f3e72b5SJason Gunthorpe 	module_put(device->dev->driver->owner);
1382*0f3e72b5SJason Gunthorpe 
1383*0f3e72b5SJason Gunthorpe 	vfio_device_unassign_container(device);
1384*0f3e72b5SJason Gunthorpe 
1385*0f3e72b5SJason Gunthorpe 	vfio_device_put(device);
1386*0f3e72b5SJason Gunthorpe 
1387*0f3e72b5SJason Gunthorpe 	return 0;
1388*0f3e72b5SJason Gunthorpe }
1389*0f3e72b5SJason Gunthorpe 
1390*0f3e72b5SJason Gunthorpe /*
1391*0f3e72b5SJason Gunthorpe  * vfio_mig_get_next_state - Compute the next step in the FSM
1392*0f3e72b5SJason Gunthorpe  * @cur_fsm - The current state the device is in
1393*0f3e72b5SJason Gunthorpe  * @new_fsm - The target state to reach
1394*0f3e72b5SJason Gunthorpe  * @next_fsm - Pointer to the next step to get to new_fsm
1395*0f3e72b5SJason Gunthorpe  *
1396*0f3e72b5SJason Gunthorpe  * Return 0 upon success, otherwise -errno
1397*0f3e72b5SJason Gunthorpe  * Upon success the next step in the state progression between cur_fsm and
1398*0f3e72b5SJason Gunthorpe  * new_fsm will be set in next_fsm.
1399*0f3e72b5SJason Gunthorpe  *
1400*0f3e72b5SJason Gunthorpe  * This breaks down requests for combination transitions into smaller steps and
1401*0f3e72b5SJason Gunthorpe  * returns the next step to get to new_fsm. The function may need to be called
1402*0f3e72b5SJason Gunthorpe  * multiple times before reaching new_fsm.
1403*0f3e72b5SJason Gunthorpe  *
1404*0f3e72b5SJason Gunthorpe  */
1405*0f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device,
1406*0f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state cur_fsm,
1407*0f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state new_fsm,
1408*0f3e72b5SJason Gunthorpe 			    enum vfio_device_mig_state *next_fsm)
1409*0f3e72b5SJason Gunthorpe {
1410*0f3e72b5SJason Gunthorpe 	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
1411*0f3e72b5SJason Gunthorpe 	/*
1412*0f3e72b5SJason Gunthorpe 	 * The coding in this table requires the driver to implement the
1413*0f3e72b5SJason Gunthorpe 	 * following FSM arcs:
1414*0f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP
1415*0f3e72b5SJason Gunthorpe 	 *         STOP -> RESUMING
1416*0f3e72b5SJason Gunthorpe 	 *         STOP -> STOP_COPY
1417*0f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP
1418*0f3e72b5SJason Gunthorpe 	 *
1419*0f3e72b5SJason Gunthorpe 	 * If P2P is supported then the driver must also implement these FSM
1420*0f3e72b5SJason Gunthorpe 	 * arcs:
1421*0f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P
1422*0f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> RUNNING
1423*0f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP
1424*0f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P
1425*0f3e72b5SJason Gunthorpe 	 * Without P2P the driver must implement:
1426*0f3e72b5SJason Gunthorpe 	 *         RUNNING -> STOP
1427*0f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING
1428*0f3e72b5SJason Gunthorpe 	 *
1429*0f3e72b5SJason Gunthorpe 	 * The coding will step through multiple states for some combination
1430*0f3e72b5SJason Gunthorpe 	 * transitions; if all optional features are supported, this means the
1431*0f3e72b5SJason Gunthorpe 	 * following ones:
1432*0f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P
1433*0f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
1434*0f3e72b5SJason Gunthorpe 	 *         RESUMING -> STOP -> STOP_COPY
1435*0f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP
1436*0f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
1437*0f3e72b5SJason Gunthorpe 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
1438*0f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> RESUMING
1439*0f3e72b5SJason Gunthorpe 	 *         RUNNING_P2P -> STOP -> STOP_COPY
1440*0f3e72b5SJason Gunthorpe 	 *         STOP -> RUNNING_P2P -> RUNNING
1441*0f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RESUMING
1442*0f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P
1443*0f3e72b5SJason Gunthorpe 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
1444*0f3e72b5SJason Gunthorpe 	 */
1445*0f3e72b5SJason Gunthorpe 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
1446*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = {
1447*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1448*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
1449*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
1450*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
1451*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
1452*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1453*0f3e72b5SJason Gunthorpe 		},
1454*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = {
1455*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
1456*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
1457*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
1458*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
1459*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
1460*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1461*0f3e72b5SJason Gunthorpe 		},
1462*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = {
1463*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1464*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
1465*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
1466*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
1467*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
1468*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1469*0f3e72b5SJason Gunthorpe 		},
1470*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = {
1471*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1472*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
1473*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
1474*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
1475*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
1476*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1477*0f3e72b5SJason Gunthorpe 		},
1478*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
1479*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1480*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
1481*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
1482*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
1483*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
1484*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1485*0f3e72b5SJason Gunthorpe 		},
1486*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = {
1487*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
1488*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
1489*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
1490*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
1491*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
1492*0f3e72b5SJason Gunthorpe 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1493*0f3e72b5SJason Gunthorpe 		},
1494*0f3e72b5SJason Gunthorpe 	};
1495*0f3e72b5SJason Gunthorpe 
1496*0f3e72b5SJason Gunthorpe 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
1497*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
1498*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
1499*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
1500*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
1501*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
1502*0f3e72b5SJason Gunthorpe 			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
1503*0f3e72b5SJason Gunthorpe 		[VFIO_DEVICE_STATE_ERROR] = ~0U,
1504*0f3e72b5SJason Gunthorpe 	};
1505*0f3e72b5SJason Gunthorpe 
1506*0f3e72b5SJason Gunthorpe 	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
1507*0f3e72b5SJason Gunthorpe 		    (state_flags_table[cur_fsm] & device->migration_flags) !=
1508*0f3e72b5SJason Gunthorpe 			state_flags_table[cur_fsm]))
1509*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1510*0f3e72b5SJason Gunthorpe 
1511*0f3e72b5SJason Gunthorpe 	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
1512*0f3e72b5SJason Gunthorpe 	   (state_flags_table[new_fsm] & device->migration_flags) !=
1513*0f3e72b5SJason Gunthorpe 			state_flags_table[new_fsm])
1514*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1515*0f3e72b5SJason Gunthorpe 
1516*0f3e72b5SJason Gunthorpe 	/*
1517*0f3e72b5SJason Gunthorpe 	 * Arcs touching optional and unsupported states are skipped over. The
1518*0f3e72b5SJason Gunthorpe 	 * driver will instead see an arc from the original state to the next
1519*0f3e72b5SJason Gunthorpe 	 * logical state, as per the above comment.
1520*0f3e72b5SJason Gunthorpe 	 */
1521*0f3e72b5SJason Gunthorpe 	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
1522*0f3e72b5SJason Gunthorpe 	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
1523*0f3e72b5SJason Gunthorpe 			state_flags_table[*next_fsm])
1524*0f3e72b5SJason Gunthorpe 		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
1525*0f3e72b5SJason Gunthorpe 
1526*0f3e72b5SJason Gunthorpe 	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
1527*0f3e72b5SJason Gunthorpe }
1528*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
1529*0f3e72b5SJason Gunthorpe 
1530*0f3e72b5SJason Gunthorpe /*
1531*0f3e72b5SJason Gunthorpe  * Convert the drivers's struct file into a FD number and return it to userspace
1532*0f3e72b5SJason Gunthorpe  */
1533*0f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
1534*0f3e72b5SJason Gunthorpe 				   struct vfio_device_feature_mig_state *mig)
1535*0f3e72b5SJason Gunthorpe {
1536*0f3e72b5SJason Gunthorpe 	int ret;
1537*0f3e72b5SJason Gunthorpe 	int fd;
1538*0f3e72b5SJason Gunthorpe 
1539*0f3e72b5SJason Gunthorpe 	fd = get_unused_fd_flags(O_CLOEXEC);
1540*0f3e72b5SJason Gunthorpe 	if (fd < 0) {
1541*0f3e72b5SJason Gunthorpe 		ret = fd;
1542*0f3e72b5SJason Gunthorpe 		goto out_fput;
1543*0f3e72b5SJason Gunthorpe 	}
1544*0f3e72b5SJason Gunthorpe 
1545*0f3e72b5SJason Gunthorpe 	mig->data_fd = fd;
1546*0f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, mig, sizeof(*mig))) {
1547*0f3e72b5SJason Gunthorpe 		ret = -EFAULT;
1548*0f3e72b5SJason Gunthorpe 		goto out_put_unused;
1549*0f3e72b5SJason Gunthorpe 	}
1550*0f3e72b5SJason Gunthorpe 	fd_install(fd, filp);
1551*0f3e72b5SJason Gunthorpe 	return 0;
1552*0f3e72b5SJason Gunthorpe 
1553*0f3e72b5SJason Gunthorpe out_put_unused:
1554*0f3e72b5SJason Gunthorpe 	put_unused_fd(fd);
1555*0f3e72b5SJason Gunthorpe out_fput:
1556*0f3e72b5SJason Gunthorpe 	fput(filp);
1557*0f3e72b5SJason Gunthorpe 	return ret;
1558*0f3e72b5SJason Gunthorpe }
1559*0f3e72b5SJason Gunthorpe 
1560*0f3e72b5SJason Gunthorpe static int
1561*0f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
1562*0f3e72b5SJason Gunthorpe 					   u32 flags, void __user *arg,
1563*0f3e72b5SJason Gunthorpe 					   size_t argsz)
1564*0f3e72b5SJason Gunthorpe {
1565*0f3e72b5SJason Gunthorpe 	size_t minsz =
1566*0f3e72b5SJason Gunthorpe 		offsetofend(struct vfio_device_feature_mig_state, data_fd);
1567*0f3e72b5SJason Gunthorpe 	struct vfio_device_feature_mig_state mig;
1568*0f3e72b5SJason Gunthorpe 	struct file *filp = NULL;
1569*0f3e72b5SJason Gunthorpe 	int ret;
1570*0f3e72b5SJason Gunthorpe 
1571*0f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
1572*0f3e72b5SJason Gunthorpe 		return -ENOTTY;
1573*0f3e72b5SJason Gunthorpe 
1574*0f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz,
1575*0f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_SET |
1576*0f3e72b5SJason Gunthorpe 				 VFIO_DEVICE_FEATURE_GET,
1577*0f3e72b5SJason Gunthorpe 				 sizeof(mig));
1578*0f3e72b5SJason Gunthorpe 	if (ret != 1)
1579*0f3e72b5SJason Gunthorpe 		return ret;
1580*0f3e72b5SJason Gunthorpe 
1581*0f3e72b5SJason Gunthorpe 	if (copy_from_user(&mig, arg, minsz))
1582*0f3e72b5SJason Gunthorpe 		return -EFAULT;
1583*0f3e72b5SJason Gunthorpe 
1584*0f3e72b5SJason Gunthorpe 	if (flags & VFIO_DEVICE_FEATURE_GET) {
1585*0f3e72b5SJason Gunthorpe 		enum vfio_device_mig_state curr_state;
1586*0f3e72b5SJason Gunthorpe 
1587*0f3e72b5SJason Gunthorpe 		ret = device->mig_ops->migration_get_state(device,
1588*0f3e72b5SJason Gunthorpe 							   &curr_state);
1589*0f3e72b5SJason Gunthorpe 		if (ret)
1590*0f3e72b5SJason Gunthorpe 			return ret;
1591*0f3e72b5SJason Gunthorpe 		mig.device_state = curr_state;
1592*0f3e72b5SJason Gunthorpe 		goto out_copy;
1593*0f3e72b5SJason Gunthorpe 	}
1594*0f3e72b5SJason Gunthorpe 
1595*0f3e72b5SJason Gunthorpe 	/* Handle the VFIO_DEVICE_FEATURE_SET */
1596*0f3e72b5SJason Gunthorpe 	filp = device->mig_ops->migration_set_state(device, mig.device_state);
1597*0f3e72b5SJason Gunthorpe 	if (IS_ERR(filp) || !filp)
1598*0f3e72b5SJason Gunthorpe 		goto out_copy;
1599*0f3e72b5SJason Gunthorpe 
1600*0f3e72b5SJason Gunthorpe 	return vfio_ioct_mig_return_fd(filp, arg, &mig);
1601*0f3e72b5SJason Gunthorpe out_copy:
1602*0f3e72b5SJason Gunthorpe 	mig.data_fd = -1;
1603*0f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
1604*0f3e72b5SJason Gunthorpe 		return -EFAULT;
1605*0f3e72b5SJason Gunthorpe 	if (IS_ERR(filp))
1606*0f3e72b5SJason Gunthorpe 		return PTR_ERR(filp);
1607*0f3e72b5SJason Gunthorpe 	return 0;
1608*0f3e72b5SJason Gunthorpe }
1609*0f3e72b5SJason Gunthorpe 
1610*0f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
1611*0f3e72b5SJason Gunthorpe 					       u32 flags, void __user *arg,
1612*0f3e72b5SJason Gunthorpe 					       size_t argsz)
1613*0f3e72b5SJason Gunthorpe {
1614*0f3e72b5SJason Gunthorpe 	struct vfio_device_feature_migration mig = {
1615*0f3e72b5SJason Gunthorpe 		.flags = device->migration_flags,
1616*0f3e72b5SJason Gunthorpe 	};
1617*0f3e72b5SJason Gunthorpe 	int ret;
1618*0f3e72b5SJason Gunthorpe 
1619*0f3e72b5SJason Gunthorpe 	if (!device->mig_ops)
1620*0f3e72b5SJason Gunthorpe 		return -ENOTTY;
1621*0f3e72b5SJason Gunthorpe 
1622*0f3e72b5SJason Gunthorpe 	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
1623*0f3e72b5SJason Gunthorpe 				 sizeof(mig));
1624*0f3e72b5SJason Gunthorpe 	if (ret != 1)
1625*0f3e72b5SJason Gunthorpe 		return ret;
1626*0f3e72b5SJason Gunthorpe 	if (copy_to_user(arg, &mig, sizeof(mig)))
1627*0f3e72b5SJason Gunthorpe 		return -EFAULT;
1628*0f3e72b5SJason Gunthorpe 	return 0;
1629*0f3e72b5SJason Gunthorpe }
1630*0f3e72b5SJason Gunthorpe 
1631*0f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device,
1632*0f3e72b5SJason Gunthorpe 				     struct vfio_device_feature __user *arg)
1633*0f3e72b5SJason Gunthorpe {
1634*0f3e72b5SJason Gunthorpe 	size_t minsz = offsetofend(struct vfio_device_feature, flags);
1635*0f3e72b5SJason Gunthorpe 	struct vfio_device_feature feature;
1636*0f3e72b5SJason Gunthorpe 
1637*0f3e72b5SJason Gunthorpe 	if (copy_from_user(&feature, arg, minsz))
1638*0f3e72b5SJason Gunthorpe 		return -EFAULT;
1639*0f3e72b5SJason Gunthorpe 
1640*0f3e72b5SJason Gunthorpe 	if (feature.argsz < minsz)
1641*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1642*0f3e72b5SJason Gunthorpe 
1643*0f3e72b5SJason Gunthorpe 	/* Check unknown flags */
1644*0f3e72b5SJason Gunthorpe 	if (feature.flags &
1645*0f3e72b5SJason Gunthorpe 	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
1646*0f3e72b5SJason Gunthorpe 	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
1647*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1648*0f3e72b5SJason Gunthorpe 
1649*0f3e72b5SJason Gunthorpe 	/* GET & SET are mutually exclusive except with PROBE */
1650*0f3e72b5SJason Gunthorpe 	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
1651*0f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
1652*0f3e72b5SJason Gunthorpe 	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
1653*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1654*0f3e72b5SJason Gunthorpe 
1655*0f3e72b5SJason Gunthorpe 	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
1656*0f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIGRATION:
1657*0f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_migration(
1658*0f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
1659*0f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
1660*0f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
1661*0f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature_mig_device_state(
1662*0f3e72b5SJason Gunthorpe 			device, feature.flags, arg->data,
1663*0f3e72b5SJason Gunthorpe 			feature.argsz - minsz);
1664*0f3e72b5SJason Gunthorpe 	default:
1665*0f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->device_feature))
1666*0f3e72b5SJason Gunthorpe 			return -EINVAL;
1667*0f3e72b5SJason Gunthorpe 		return device->ops->device_feature(device, feature.flags,
1668*0f3e72b5SJason Gunthorpe 						   arg->data,
1669*0f3e72b5SJason Gunthorpe 						   feature.argsz - minsz);
1670*0f3e72b5SJason Gunthorpe 	}
1671*0f3e72b5SJason Gunthorpe }
1672*0f3e72b5SJason Gunthorpe 
1673*0f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep,
1674*0f3e72b5SJason Gunthorpe 				       unsigned int cmd, unsigned long arg)
1675*0f3e72b5SJason Gunthorpe {
1676*0f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
1677*0f3e72b5SJason Gunthorpe 
1678*0f3e72b5SJason Gunthorpe 	switch (cmd) {
1679*0f3e72b5SJason Gunthorpe 	case VFIO_DEVICE_FEATURE:
1680*0f3e72b5SJason Gunthorpe 		return vfio_ioctl_device_feature(device, (void __user *)arg);
1681*0f3e72b5SJason Gunthorpe 	default:
1682*0f3e72b5SJason Gunthorpe 		if (unlikely(!device->ops->ioctl))
1683*0f3e72b5SJason Gunthorpe 			return -EINVAL;
1684*0f3e72b5SJason Gunthorpe 		return device->ops->ioctl(device, cmd, arg);
1685*0f3e72b5SJason Gunthorpe 	}
1686*0f3e72b5SJason Gunthorpe }
1687*0f3e72b5SJason Gunthorpe 
1688*0f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1689*0f3e72b5SJason Gunthorpe 				     size_t count, loff_t *ppos)
1690*0f3e72b5SJason Gunthorpe {
1691*0f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
1692*0f3e72b5SJason Gunthorpe 
1693*0f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->read))
1694*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1695*0f3e72b5SJason Gunthorpe 
1696*0f3e72b5SJason Gunthorpe 	return device->ops->read(device, buf, count, ppos);
1697*0f3e72b5SJason Gunthorpe }
1698*0f3e72b5SJason Gunthorpe 
1699*0f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep,
1700*0f3e72b5SJason Gunthorpe 				      const char __user *buf,
1701*0f3e72b5SJason Gunthorpe 				      size_t count, loff_t *ppos)
1702*0f3e72b5SJason Gunthorpe {
1703*0f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
1704*0f3e72b5SJason Gunthorpe 
1705*0f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->write))
1706*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1707*0f3e72b5SJason Gunthorpe 
1708*0f3e72b5SJason Gunthorpe 	return device->ops->write(device, buf, count, ppos);
1709*0f3e72b5SJason Gunthorpe }
1710*0f3e72b5SJason Gunthorpe 
1711*0f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1712*0f3e72b5SJason Gunthorpe {
1713*0f3e72b5SJason Gunthorpe 	struct vfio_device *device = filep->private_data;
1714*0f3e72b5SJason Gunthorpe 
1715*0f3e72b5SJason Gunthorpe 	if (unlikely(!device->ops->mmap))
1716*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1717*0f3e72b5SJason Gunthorpe 
1718*0f3e72b5SJason Gunthorpe 	return device->ops->mmap(device, vma);
1719*0f3e72b5SJason Gunthorpe }
1720*0f3e72b5SJason Gunthorpe 
1721*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = {
1722*0f3e72b5SJason Gunthorpe 	.owner		= THIS_MODULE,
1723*0f3e72b5SJason Gunthorpe 	.release	= vfio_device_fops_release,
1724*0f3e72b5SJason Gunthorpe 	.read		= vfio_device_fops_read,
1725*0f3e72b5SJason Gunthorpe 	.write		= vfio_device_fops_write,
1726*0f3e72b5SJason Gunthorpe 	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
1727*0f3e72b5SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
1728*0f3e72b5SJason Gunthorpe 	.mmap		= vfio_device_fops_mmap,
1729*0f3e72b5SJason Gunthorpe };
1730*0f3e72b5SJason Gunthorpe 
1731*0f3e72b5SJason Gunthorpe /**
1732*0f3e72b5SJason Gunthorpe  * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
1733*0f3e72b5SJason Gunthorpe  * @file: VFIO group file
1734*0f3e72b5SJason Gunthorpe  *
1735*0f3e72b5SJason Gunthorpe  * The returned iommu_group is valid as long as a ref is held on the file.
1736*0f3e72b5SJason Gunthorpe  */
1737*0f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file)
1738*0f3e72b5SJason Gunthorpe {
1739*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
1740*0f3e72b5SJason Gunthorpe 
1741*0f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
1742*0f3e72b5SJason Gunthorpe 		return NULL;
1743*0f3e72b5SJason Gunthorpe 	return group->iommu_group;
1744*0f3e72b5SJason Gunthorpe }
1745*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
1746*0f3e72b5SJason Gunthorpe 
1747*0f3e72b5SJason Gunthorpe /**
1748*0f3e72b5SJason Gunthorpe  * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
1749*0f3e72b5SJason Gunthorpe  *        is always CPU cache coherent
1750*0f3e72b5SJason Gunthorpe  * @file: VFIO group file
1751*0f3e72b5SJason Gunthorpe  *
1752*0f3e72b5SJason Gunthorpe  * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
1753*0f3e72b5SJason Gunthorpe  * bit in DMA transactions. A return of false indicates that the user has
1754*0f3e72b5SJason Gunthorpe  * rights to access additional instructions such as wbinvd on x86.
1755*0f3e72b5SJason Gunthorpe  */
1756*0f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file)
1757*0f3e72b5SJason Gunthorpe {
1758*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
1759*0f3e72b5SJason Gunthorpe 	bool ret;
1760*0f3e72b5SJason Gunthorpe 
1761*0f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
1762*0f3e72b5SJason Gunthorpe 		return true;
1763*0f3e72b5SJason Gunthorpe 
1764*0f3e72b5SJason Gunthorpe 	down_read(&group->group_rwsem);
1765*0f3e72b5SJason Gunthorpe 	if (group->container) {
1766*0f3e72b5SJason Gunthorpe 		ret = vfio_ioctl_check_extension(group->container,
1767*0f3e72b5SJason Gunthorpe 						 VFIO_DMA_CC_IOMMU);
1768*0f3e72b5SJason Gunthorpe 	} else {
1769*0f3e72b5SJason Gunthorpe 		/*
1770*0f3e72b5SJason Gunthorpe 		 * Since the coherency state is determined only once a container
1771*0f3e72b5SJason Gunthorpe 		 * is attached the user must do so before they can prove they
1772*0f3e72b5SJason Gunthorpe 		 * have permission.
1773*0f3e72b5SJason Gunthorpe 		 */
1774*0f3e72b5SJason Gunthorpe 		ret = true;
1775*0f3e72b5SJason Gunthorpe 	}
1776*0f3e72b5SJason Gunthorpe 	up_read(&group->group_rwsem);
1777*0f3e72b5SJason Gunthorpe 	return ret;
1778*0f3e72b5SJason Gunthorpe }
1779*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
1780*0f3e72b5SJason Gunthorpe 
1781*0f3e72b5SJason Gunthorpe /**
1782*0f3e72b5SJason Gunthorpe  * vfio_file_set_kvm - Link a kvm with VFIO drivers
1783*0f3e72b5SJason Gunthorpe  * @file: VFIO group file
1784*0f3e72b5SJason Gunthorpe  * @kvm: KVM to link
1785*0f3e72b5SJason Gunthorpe  *
1786*0f3e72b5SJason Gunthorpe  * When a VFIO device is first opened the KVM will be available in
1787*0f3e72b5SJason Gunthorpe  * device->kvm if one was associated with the group.
1788*0f3e72b5SJason Gunthorpe  */
1789*0f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
1790*0f3e72b5SJason Gunthorpe {
1791*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
1792*0f3e72b5SJason Gunthorpe 
1793*0f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
1794*0f3e72b5SJason Gunthorpe 		return;
1795*0f3e72b5SJason Gunthorpe 
1796*0f3e72b5SJason Gunthorpe 	down_write(&group->group_rwsem);
1797*0f3e72b5SJason Gunthorpe 	group->kvm = kvm;
1798*0f3e72b5SJason Gunthorpe 	up_write(&group->group_rwsem);
1799*0f3e72b5SJason Gunthorpe }
1800*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
1801*0f3e72b5SJason Gunthorpe 
1802*0f3e72b5SJason Gunthorpe /**
1803*0f3e72b5SJason Gunthorpe  * vfio_file_has_dev - True if the VFIO file is a handle for device
1804*0f3e72b5SJason Gunthorpe  * @file: VFIO file to check
1805*0f3e72b5SJason Gunthorpe  * @device: Device that must be part of the file
1806*0f3e72b5SJason Gunthorpe  *
1807*0f3e72b5SJason Gunthorpe  * Returns true if given file has permission to manipulate the given device.
1808*0f3e72b5SJason Gunthorpe  */
1809*0f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
1810*0f3e72b5SJason Gunthorpe {
1811*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = file->private_data;
1812*0f3e72b5SJason Gunthorpe 
1813*0f3e72b5SJason Gunthorpe 	if (file->f_op != &vfio_group_fops)
1814*0f3e72b5SJason Gunthorpe 		return false;
1815*0f3e72b5SJason Gunthorpe 
1816*0f3e72b5SJason Gunthorpe 	return group == device->group;
1817*0f3e72b5SJason Gunthorpe }
1818*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev);
1819*0f3e72b5SJason Gunthorpe 
1820*0f3e72b5SJason Gunthorpe /*
1821*0f3e72b5SJason Gunthorpe  * Sub-module support
1822*0f3e72b5SJason Gunthorpe  */
1823*0f3e72b5SJason Gunthorpe /*
1824*0f3e72b5SJason Gunthorpe  * Helper for managing a buffer of info chain capabilities, allocate or
1825*0f3e72b5SJason Gunthorpe  * reallocate a buffer with additional @size, filling in @id and @version
1826*0f3e72b5SJason Gunthorpe  * of the capability.  A pointer to the new capability is returned.
1827*0f3e72b5SJason Gunthorpe  *
1828*0f3e72b5SJason Gunthorpe  * NB. The chain is based at the head of the buffer, so new entries are
1829*0f3e72b5SJason Gunthorpe  * added to the tail, vfio_info_cap_shift() should be called to fixup the
1830*0f3e72b5SJason Gunthorpe  * next offsets prior to copying to the user buffer.
1831*0f3e72b5SJason Gunthorpe  */
1832*0f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1833*0f3e72b5SJason Gunthorpe 					       size_t size, u16 id, u16 version)
1834*0f3e72b5SJason Gunthorpe {
1835*0f3e72b5SJason Gunthorpe 	void *buf;
1836*0f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header, *tmp;
1837*0f3e72b5SJason Gunthorpe 
1838*0f3e72b5SJason Gunthorpe 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1839*0f3e72b5SJason Gunthorpe 	if (!buf) {
1840*0f3e72b5SJason Gunthorpe 		kfree(caps->buf);
1841*0f3e72b5SJason Gunthorpe 		caps->buf = NULL;
1842*0f3e72b5SJason Gunthorpe 		caps->size = 0;
1843*0f3e72b5SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
1844*0f3e72b5SJason Gunthorpe 	}
1845*0f3e72b5SJason Gunthorpe 
1846*0f3e72b5SJason Gunthorpe 	caps->buf = buf;
1847*0f3e72b5SJason Gunthorpe 	header = buf + caps->size;
1848*0f3e72b5SJason Gunthorpe 
1849*0f3e72b5SJason Gunthorpe 	/* Eventually copied to user buffer, zero */
1850*0f3e72b5SJason Gunthorpe 	memset(header, 0, size);
1851*0f3e72b5SJason Gunthorpe 
1852*0f3e72b5SJason Gunthorpe 	header->id = id;
1853*0f3e72b5SJason Gunthorpe 	header->version = version;
1854*0f3e72b5SJason Gunthorpe 
1855*0f3e72b5SJason Gunthorpe 	/* Add to the end of the capability chain */
1856*0f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1857*0f3e72b5SJason Gunthorpe 		; /* nothing */
1858*0f3e72b5SJason Gunthorpe 
1859*0f3e72b5SJason Gunthorpe 	tmp->next = caps->size;
1860*0f3e72b5SJason Gunthorpe 	caps->size += size;
1861*0f3e72b5SJason Gunthorpe 
1862*0f3e72b5SJason Gunthorpe 	return header;
1863*0f3e72b5SJason Gunthorpe }
1864*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1865*0f3e72b5SJason Gunthorpe 
1866*0f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1867*0f3e72b5SJason Gunthorpe {
1868*0f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *tmp;
1869*0f3e72b5SJason Gunthorpe 	void *buf = (void *)caps->buf;
1870*0f3e72b5SJason Gunthorpe 
1871*0f3e72b5SJason Gunthorpe 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1872*0f3e72b5SJason Gunthorpe 		tmp->next += offset;
1873*0f3e72b5SJason Gunthorpe }
1874*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift);
1875*0f3e72b5SJason Gunthorpe 
1876*0f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps,
1877*0f3e72b5SJason Gunthorpe 			     struct vfio_info_cap_header *cap, size_t size)
1878*0f3e72b5SJason Gunthorpe {
1879*0f3e72b5SJason Gunthorpe 	struct vfio_info_cap_header *header;
1880*0f3e72b5SJason Gunthorpe 
1881*0f3e72b5SJason Gunthorpe 	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1882*0f3e72b5SJason Gunthorpe 	if (IS_ERR(header))
1883*0f3e72b5SJason Gunthorpe 		return PTR_ERR(header);
1884*0f3e72b5SJason Gunthorpe 
1885*0f3e72b5SJason Gunthorpe 	memcpy(header + 1, cap + 1, size - sizeof(*header));
1886*0f3e72b5SJason Gunthorpe 
1887*0f3e72b5SJason Gunthorpe 	return 0;
1888*0f3e72b5SJason Gunthorpe }
1889*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability);
1890*0f3e72b5SJason Gunthorpe 
1891*0f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1892*0f3e72b5SJason Gunthorpe 				       int max_irq_type, size_t *data_size)
1893*0f3e72b5SJason Gunthorpe {
1894*0f3e72b5SJason Gunthorpe 	unsigned long minsz;
1895*0f3e72b5SJason Gunthorpe 	size_t size;
1896*0f3e72b5SJason Gunthorpe 
1897*0f3e72b5SJason Gunthorpe 	minsz = offsetofend(struct vfio_irq_set, count);
1898*0f3e72b5SJason Gunthorpe 
1899*0f3e72b5SJason Gunthorpe 	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1900*0f3e72b5SJason Gunthorpe 	    (hdr->count >= (U32_MAX - hdr->start)) ||
1901*0f3e72b5SJason Gunthorpe 	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1902*0f3e72b5SJason Gunthorpe 				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1903*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1904*0f3e72b5SJason Gunthorpe 
1905*0f3e72b5SJason Gunthorpe 	if (data_size)
1906*0f3e72b5SJason Gunthorpe 		*data_size = 0;
1907*0f3e72b5SJason Gunthorpe 
1908*0f3e72b5SJason Gunthorpe 	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1909*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1910*0f3e72b5SJason Gunthorpe 
1911*0f3e72b5SJason Gunthorpe 	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1912*0f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_NONE:
1913*0f3e72b5SJason Gunthorpe 		size = 0;
1914*0f3e72b5SJason Gunthorpe 		break;
1915*0f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_BOOL:
1916*0f3e72b5SJason Gunthorpe 		size = sizeof(uint8_t);
1917*0f3e72b5SJason Gunthorpe 		break;
1918*0f3e72b5SJason Gunthorpe 	case VFIO_IRQ_SET_DATA_EVENTFD:
1919*0f3e72b5SJason Gunthorpe 		size = sizeof(int32_t);
1920*0f3e72b5SJason Gunthorpe 		break;
1921*0f3e72b5SJason Gunthorpe 	default:
1922*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1923*0f3e72b5SJason Gunthorpe 	}
1924*0f3e72b5SJason Gunthorpe 
1925*0f3e72b5SJason Gunthorpe 	if (size) {
1926*0f3e72b5SJason Gunthorpe 		if (hdr->argsz - minsz < hdr->count * size)
1927*0f3e72b5SJason Gunthorpe 			return -EINVAL;
1928*0f3e72b5SJason Gunthorpe 
1929*0f3e72b5SJason Gunthorpe 		if (!data_size)
1930*0f3e72b5SJason Gunthorpe 			return -EINVAL;
1931*0f3e72b5SJason Gunthorpe 
1932*0f3e72b5SJason Gunthorpe 		*data_size = hdr->count * size;
1933*0f3e72b5SJason Gunthorpe 	}
1934*0f3e72b5SJason Gunthorpe 
1935*0f3e72b5SJason Gunthorpe 	return 0;
1936*0f3e72b5SJason Gunthorpe }
1937*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1938*0f3e72b5SJason Gunthorpe 
1939*0f3e72b5SJason Gunthorpe /*
1940*0f3e72b5SJason Gunthorpe  * Pin contiguous user pages and return their associated host pages for local
1941*0f3e72b5SJason Gunthorpe  * domain only.
1942*0f3e72b5SJason Gunthorpe  * @device [in]  : device
1943*0f3e72b5SJason Gunthorpe  * @iova [in]    : starting IOVA of user pages to be pinned.
1944*0f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be pinned.  This count should not
1945*0f3e72b5SJason Gunthorpe  *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1946*0f3e72b5SJason Gunthorpe  * @prot [in]    : protection flags
1947*0f3e72b5SJason Gunthorpe  * @pages[out]   : array of host pages
1948*0f3e72b5SJason Gunthorpe  * Return error or number of pages pinned.
1949*0f3e72b5SJason Gunthorpe  */
1950*0f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
1951*0f3e72b5SJason Gunthorpe 		   int npage, int prot, struct page **pages)
1952*0f3e72b5SJason Gunthorpe {
1953*0f3e72b5SJason Gunthorpe 	struct vfio_container *container;
1954*0f3e72b5SJason Gunthorpe 	struct vfio_group *group = device->group;
1955*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
1956*0f3e72b5SJason Gunthorpe 	int ret;
1957*0f3e72b5SJason Gunthorpe 
1958*0f3e72b5SJason Gunthorpe 	if (!pages || !npage || !vfio_assert_device_open(device))
1959*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1960*0f3e72b5SJason Gunthorpe 
1961*0f3e72b5SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1962*0f3e72b5SJason Gunthorpe 		return -E2BIG;
1963*0f3e72b5SJason Gunthorpe 
1964*0f3e72b5SJason Gunthorpe 	if (group->dev_counter > 1)
1965*0f3e72b5SJason Gunthorpe 		return -EINVAL;
1966*0f3e72b5SJason Gunthorpe 
1967*0f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
1968*0f3e72b5SJason Gunthorpe 	container = group->container;
1969*0f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
1970*0f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->pin_pages))
1971*0f3e72b5SJason Gunthorpe 		ret = driver->ops->pin_pages(container->iommu_data,
1972*0f3e72b5SJason Gunthorpe 					     group->iommu_group, iova,
1973*0f3e72b5SJason Gunthorpe 					     npage, prot, pages);
1974*0f3e72b5SJason Gunthorpe 	else
1975*0f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
1976*0f3e72b5SJason Gunthorpe 
1977*0f3e72b5SJason Gunthorpe 	return ret;
1978*0f3e72b5SJason Gunthorpe }
1979*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages);
1980*0f3e72b5SJason Gunthorpe 
1981*0f3e72b5SJason Gunthorpe /*
1982*0f3e72b5SJason Gunthorpe  * Unpin contiguous host pages for local domain only.
1983*0f3e72b5SJason Gunthorpe  * @device [in]  : device
1984*0f3e72b5SJason Gunthorpe  * @iova [in]    : starting address of user pages to be unpinned.
1985*0f3e72b5SJason Gunthorpe  * @npage [in]   : count of pages to be unpinned.  This count should not
1986*0f3e72b5SJason Gunthorpe  *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1987*0f3e72b5SJason Gunthorpe  */
1988*0f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
1989*0f3e72b5SJason Gunthorpe {
1990*0f3e72b5SJason Gunthorpe 	struct vfio_container *container;
1991*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
1992*0f3e72b5SJason Gunthorpe 
1993*0f3e72b5SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
1994*0f3e72b5SJason Gunthorpe 		return;
1995*0f3e72b5SJason Gunthorpe 
1996*0f3e72b5SJason Gunthorpe 	if (WARN_ON(!vfio_assert_device_open(device)))
1997*0f3e72b5SJason Gunthorpe 		return;
1998*0f3e72b5SJason Gunthorpe 
1999*0f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
2000*0f3e72b5SJason Gunthorpe 	container = device->group->container;
2001*0f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
2002*0f3e72b5SJason Gunthorpe 
2003*0f3e72b5SJason Gunthorpe 	driver->ops->unpin_pages(container->iommu_data, iova, npage);
2004*0f3e72b5SJason Gunthorpe }
2005*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages);
2006*0f3e72b5SJason Gunthorpe 
2007*0f3e72b5SJason Gunthorpe /*
2008*0f3e72b5SJason Gunthorpe  * This interface allows the CPUs to perform some sort of virtual DMA on
2009*0f3e72b5SJason Gunthorpe  * behalf of the device.
2010*0f3e72b5SJason Gunthorpe  *
2011*0f3e72b5SJason Gunthorpe  * CPUs read/write from/into a range of IOVAs pointing to user space memory
2012*0f3e72b5SJason Gunthorpe  * into/from a kernel buffer.
2013*0f3e72b5SJason Gunthorpe  *
2014*0f3e72b5SJason Gunthorpe  * As the read/write of user space memory is conducted via the CPUs and is
2015*0f3e72b5SJason Gunthorpe  * not a real device DMA, it is not necessary to pin the user space memory.
2016*0f3e72b5SJason Gunthorpe  *
2017*0f3e72b5SJason Gunthorpe  * @device [in]		: VFIO device
2018*0f3e72b5SJason Gunthorpe  * @iova [in]		: base IOVA of a user space buffer
2019*0f3e72b5SJason Gunthorpe  * @data [in]		: pointer to kernel buffer
2020*0f3e72b5SJason Gunthorpe  * @len [in]		: kernel buffer length
2021*0f3e72b5SJason Gunthorpe  * @write		: indicate read or write
2022*0f3e72b5SJason Gunthorpe  * Return error code on failure or 0 on success.
2023*0f3e72b5SJason Gunthorpe  */
2024*0f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
2025*0f3e72b5SJason Gunthorpe 		size_t len, bool write)
2026*0f3e72b5SJason Gunthorpe {
2027*0f3e72b5SJason Gunthorpe 	struct vfio_container *container;
2028*0f3e72b5SJason Gunthorpe 	struct vfio_iommu_driver *driver;
2029*0f3e72b5SJason Gunthorpe 	int ret = 0;
2030*0f3e72b5SJason Gunthorpe 
2031*0f3e72b5SJason Gunthorpe 	if (!data || len <= 0 || !vfio_assert_device_open(device))
2032*0f3e72b5SJason Gunthorpe 		return -EINVAL;
2033*0f3e72b5SJason Gunthorpe 
2034*0f3e72b5SJason Gunthorpe 	/* group->container cannot change while a vfio device is open */
2035*0f3e72b5SJason Gunthorpe 	container = device->group->container;
2036*0f3e72b5SJason Gunthorpe 	driver = container->iommu_driver;
2037*0f3e72b5SJason Gunthorpe 
2038*0f3e72b5SJason Gunthorpe 	if (likely(driver && driver->ops->dma_rw))
2039*0f3e72b5SJason Gunthorpe 		ret = driver->ops->dma_rw(container->iommu_data,
2040*0f3e72b5SJason Gunthorpe 					  iova, data, len, write);
2041*0f3e72b5SJason Gunthorpe 	else
2042*0f3e72b5SJason Gunthorpe 		ret = -ENOTTY;
2043*0f3e72b5SJason Gunthorpe 	return ret;
2044*0f3e72b5SJason Gunthorpe }
2045*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw);
2046*0f3e72b5SJason Gunthorpe 
2047*0f3e72b5SJason Gunthorpe /*
2048*0f3e72b5SJason Gunthorpe  * Module/class support
2049*0f3e72b5SJason Gunthorpe  */
2050*0f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode)
2051*0f3e72b5SJason Gunthorpe {
2052*0f3e72b5SJason Gunthorpe 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2053*0f3e72b5SJason Gunthorpe }
2054*0f3e72b5SJason Gunthorpe 
2055*0f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = {
2056*0f3e72b5SJason Gunthorpe 	.minor = VFIO_MINOR,
2057*0f3e72b5SJason Gunthorpe 	.name = "vfio",
2058*0f3e72b5SJason Gunthorpe 	.fops = &vfio_fops,
2059*0f3e72b5SJason Gunthorpe 	.nodename = "vfio/vfio",
2060*0f3e72b5SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
2061*0f3e72b5SJason Gunthorpe };
2062*0f3e72b5SJason Gunthorpe 
2063*0f3e72b5SJason Gunthorpe static int __init vfio_init(void)
2064*0f3e72b5SJason Gunthorpe {
2065*0f3e72b5SJason Gunthorpe 	int ret;
2066*0f3e72b5SJason Gunthorpe 
2067*0f3e72b5SJason Gunthorpe 	ida_init(&vfio.group_ida);
2068*0f3e72b5SJason Gunthorpe 	mutex_init(&vfio.group_lock);
2069*0f3e72b5SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
2070*0f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.group_list);
2071*0f3e72b5SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2072*0f3e72b5SJason Gunthorpe 
2073*0f3e72b5SJason Gunthorpe 	ret = misc_register(&vfio_dev);
2074*0f3e72b5SJason Gunthorpe 	if (ret) {
2075*0f3e72b5SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
2076*0f3e72b5SJason Gunthorpe 		return ret;
2077*0f3e72b5SJason Gunthorpe 	}
2078*0f3e72b5SJason Gunthorpe 
2079*0f3e72b5SJason Gunthorpe 	/* /dev/vfio/$GROUP */
2080*0f3e72b5SJason Gunthorpe 	vfio.class = class_create(THIS_MODULE, "vfio");
2081*0f3e72b5SJason Gunthorpe 	if (IS_ERR(vfio.class)) {
2082*0f3e72b5SJason Gunthorpe 		ret = PTR_ERR(vfio.class);
2083*0f3e72b5SJason Gunthorpe 		goto err_class;
2084*0f3e72b5SJason Gunthorpe 	}
2085*0f3e72b5SJason Gunthorpe 
2086*0f3e72b5SJason Gunthorpe 	vfio.class->devnode = vfio_devnode;
2087*0f3e72b5SJason Gunthorpe 
2088*0f3e72b5SJason Gunthorpe 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
2089*0f3e72b5SJason Gunthorpe 	if (ret)
2090*0f3e72b5SJason Gunthorpe 		goto err_alloc_chrdev;
2091*0f3e72b5SJason Gunthorpe 
2092*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
2093*0f3e72b5SJason Gunthorpe 	ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
2094*0f3e72b5SJason Gunthorpe #endif
2095*0f3e72b5SJason Gunthorpe 	if (ret)
2096*0f3e72b5SJason Gunthorpe 		goto err_driver_register;
2097*0f3e72b5SJason Gunthorpe 
2098*0f3e72b5SJason Gunthorpe 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2099*0f3e72b5SJason Gunthorpe 	return 0;
2100*0f3e72b5SJason Gunthorpe 
2101*0f3e72b5SJason Gunthorpe err_driver_register:
2102*0f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2103*0f3e72b5SJason Gunthorpe err_alloc_chrdev:
2104*0f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
2105*0f3e72b5SJason Gunthorpe 	vfio.class = NULL;
2106*0f3e72b5SJason Gunthorpe err_class:
2107*0f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
2108*0f3e72b5SJason Gunthorpe 	return ret;
2109*0f3e72b5SJason Gunthorpe }
2110*0f3e72b5SJason Gunthorpe 
2111*0f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void)
2112*0f3e72b5SJason Gunthorpe {
2113*0f3e72b5SJason Gunthorpe 	WARN_ON(!list_empty(&vfio.group_list));
2114*0f3e72b5SJason Gunthorpe 
2115*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU
2116*0f3e72b5SJason Gunthorpe 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2117*0f3e72b5SJason Gunthorpe #endif
2118*0f3e72b5SJason Gunthorpe 	ida_destroy(&vfio.group_ida);
2119*0f3e72b5SJason Gunthorpe 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2120*0f3e72b5SJason Gunthorpe 	class_destroy(vfio.class);
2121*0f3e72b5SJason Gunthorpe 	vfio.class = NULL;
2122*0f3e72b5SJason Gunthorpe 	misc_deregister(&vfio_dev);
2123*0f3e72b5SJason Gunthorpe 	xa_destroy(&vfio_device_set_xa);
2124*0f3e72b5SJason Gunthorpe }
2125*0f3e72b5SJason Gunthorpe 
2126*0f3e72b5SJason Gunthorpe module_init(vfio_init);
2127*0f3e72b5SJason Gunthorpe module_exit(vfio_cleanup);
2128*0f3e72b5SJason Gunthorpe 
2129*0f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION);
2130*0f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2");
2131*0f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR);
2132*0f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC);
2133*0f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2134*0f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
2135*0f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2136