xref: /openbmc/linux/drivers/vfio/container.c (revision 9a87ffc99ec8eb8d35eed7c4f816d75f5cc9662e)
1cdc71fe4SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only
2cdc71fe4SJason Gunthorpe /*
3cdc71fe4SJason Gunthorpe  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
4cdc71fe4SJason Gunthorpe  *
5cdc71fe4SJason Gunthorpe  * VFIO container (/dev/vfio/vfio)
6cdc71fe4SJason Gunthorpe  */
7cdc71fe4SJason Gunthorpe #include <linux/file.h>
8cdc71fe4SJason Gunthorpe #include <linux/slab.h>
9cdc71fe4SJason Gunthorpe #include <linux/fs.h>
10cdc71fe4SJason Gunthorpe #include <linux/capability.h>
11cdc71fe4SJason Gunthorpe #include <linux/iommu.h>
12cdc71fe4SJason Gunthorpe #include <linux/miscdevice.h>
13cdc71fe4SJason Gunthorpe #include <linux/vfio.h>
14cdc71fe4SJason Gunthorpe #include <uapi/linux/vfio.h>
15cdc71fe4SJason Gunthorpe 
16cdc71fe4SJason Gunthorpe #include "vfio.h"
17cdc71fe4SJason Gunthorpe 
18cdc71fe4SJason Gunthorpe struct vfio_container {
19cdc71fe4SJason Gunthorpe 	struct kref			kref;
20cdc71fe4SJason Gunthorpe 	struct list_head		group_list;
21cdc71fe4SJason Gunthorpe 	struct rw_semaphore		group_lock;
22cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver	*iommu_driver;
23cdc71fe4SJason Gunthorpe 	void				*iommu_data;
24cdc71fe4SJason Gunthorpe 	bool				noiommu;
25cdc71fe4SJason Gunthorpe };
26cdc71fe4SJason Gunthorpe 
27cdc71fe4SJason Gunthorpe static struct vfio {
28cdc71fe4SJason Gunthorpe 	struct list_head		iommu_drivers_list;
29cdc71fe4SJason Gunthorpe 	struct mutex			iommu_drivers_lock;
30cdc71fe4SJason Gunthorpe } vfio;
31cdc71fe4SJason Gunthorpe 
vfio_noiommu_open(unsigned long arg)32cdc71fe4SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg)
33cdc71fe4SJason Gunthorpe {
34cdc71fe4SJason Gunthorpe 	if (arg != VFIO_NOIOMMU_IOMMU)
35cdc71fe4SJason Gunthorpe 		return ERR_PTR(-EINVAL);
36cdc71fe4SJason Gunthorpe 	if (!capable(CAP_SYS_RAWIO))
37cdc71fe4SJason Gunthorpe 		return ERR_PTR(-EPERM);
38cdc71fe4SJason Gunthorpe 
39cdc71fe4SJason Gunthorpe 	return NULL;
40cdc71fe4SJason Gunthorpe }
41cdc71fe4SJason Gunthorpe 
vfio_noiommu_release(void * iommu_data)42cdc71fe4SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data)
43cdc71fe4SJason Gunthorpe {
44cdc71fe4SJason Gunthorpe }
45cdc71fe4SJason Gunthorpe 
vfio_noiommu_ioctl(void * iommu_data,unsigned int cmd,unsigned long arg)46cdc71fe4SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data,
47cdc71fe4SJason Gunthorpe 			       unsigned int cmd, unsigned long arg)
48cdc71fe4SJason Gunthorpe {
49cdc71fe4SJason Gunthorpe 	if (cmd == VFIO_CHECK_EXTENSION)
50cdc71fe4SJason Gunthorpe 		return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
51cdc71fe4SJason Gunthorpe 
52cdc71fe4SJason Gunthorpe 	return -ENOTTY;
53cdc71fe4SJason Gunthorpe }
54cdc71fe4SJason Gunthorpe 
vfio_noiommu_attach_group(void * iommu_data,struct iommu_group * iommu_group,enum vfio_group_type type)55cdc71fe4SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data,
56cdc71fe4SJason Gunthorpe 		struct iommu_group *iommu_group, enum vfio_group_type type)
57cdc71fe4SJason Gunthorpe {
58cdc71fe4SJason Gunthorpe 	return 0;
59cdc71fe4SJason Gunthorpe }
60cdc71fe4SJason Gunthorpe 
vfio_noiommu_detach_group(void * iommu_data,struct iommu_group * iommu_group)61cdc71fe4SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data,
62cdc71fe4SJason Gunthorpe 				      struct iommu_group *iommu_group)
63cdc71fe4SJason Gunthorpe {
64cdc71fe4SJason Gunthorpe }
65cdc71fe4SJason Gunthorpe 
66cdc71fe4SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67cdc71fe4SJason Gunthorpe 	.name = "vfio-noiommu",
68cdc71fe4SJason Gunthorpe 	.owner = THIS_MODULE,
69cdc71fe4SJason Gunthorpe 	.open = vfio_noiommu_open,
70cdc71fe4SJason Gunthorpe 	.release = vfio_noiommu_release,
71cdc71fe4SJason Gunthorpe 	.ioctl = vfio_noiommu_ioctl,
72cdc71fe4SJason Gunthorpe 	.attach_group = vfio_noiommu_attach_group,
73cdc71fe4SJason Gunthorpe 	.detach_group = vfio_noiommu_detach_group,
74cdc71fe4SJason Gunthorpe };
75cdc71fe4SJason Gunthorpe 
76cdc71fe4SJason Gunthorpe /*
77cdc71fe4SJason Gunthorpe  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
78cdc71fe4SJason Gunthorpe  * use vfio-noiommu.
79cdc71fe4SJason Gunthorpe  */
vfio_iommu_driver_allowed(struct vfio_container * container,const struct vfio_iommu_driver * driver)80cdc71fe4SJason Gunthorpe static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81cdc71fe4SJason Gunthorpe 				      const struct vfio_iommu_driver *driver)
82cdc71fe4SJason Gunthorpe {
83cdc71fe4SJason Gunthorpe 	if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
84cdc71fe4SJason Gunthorpe 		return true;
85cdc71fe4SJason Gunthorpe 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
86cdc71fe4SJason Gunthorpe }
87cdc71fe4SJason Gunthorpe 
88cdc71fe4SJason Gunthorpe /*
89cdc71fe4SJason Gunthorpe  * IOMMU driver registration
90cdc71fe4SJason Gunthorpe  */
vfio_register_iommu_driver(const struct vfio_iommu_driver_ops * ops)91cdc71fe4SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
92cdc71fe4SJason Gunthorpe {
93cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver, *tmp;
94cdc71fe4SJason Gunthorpe 
95cdc71fe4SJason Gunthorpe 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
96cdc71fe4SJason Gunthorpe 		return -EINVAL;
97cdc71fe4SJason Gunthorpe 
98cdc71fe4SJason Gunthorpe 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
99cdc71fe4SJason Gunthorpe 	if (!driver)
100cdc71fe4SJason Gunthorpe 		return -ENOMEM;
101cdc71fe4SJason Gunthorpe 
102cdc71fe4SJason Gunthorpe 	driver->ops = ops;
103cdc71fe4SJason Gunthorpe 
104cdc71fe4SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
105cdc71fe4SJason Gunthorpe 
106cdc71fe4SJason Gunthorpe 	/* Check for duplicates */
107cdc71fe4SJason Gunthorpe 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108cdc71fe4SJason Gunthorpe 		if (tmp->ops == ops) {
109cdc71fe4SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
110cdc71fe4SJason Gunthorpe 			kfree(driver);
111cdc71fe4SJason Gunthorpe 			return -EINVAL;
112cdc71fe4SJason Gunthorpe 		}
113cdc71fe4SJason Gunthorpe 	}
114cdc71fe4SJason Gunthorpe 
115cdc71fe4SJason Gunthorpe 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
116cdc71fe4SJason Gunthorpe 
117cdc71fe4SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
118cdc71fe4SJason Gunthorpe 
119cdc71fe4SJason Gunthorpe 	return 0;
120cdc71fe4SJason Gunthorpe }
121cdc71fe4SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122cdc71fe4SJason Gunthorpe 
vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops * ops)123cdc71fe4SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124cdc71fe4SJason Gunthorpe {
125cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver;
126cdc71fe4SJason Gunthorpe 
127cdc71fe4SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
128cdc71fe4SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129cdc71fe4SJason Gunthorpe 		if (driver->ops == ops) {
130cdc71fe4SJason Gunthorpe 			list_del(&driver->vfio_next);
131cdc71fe4SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
132cdc71fe4SJason Gunthorpe 			kfree(driver);
133cdc71fe4SJason Gunthorpe 			return;
134cdc71fe4SJason Gunthorpe 		}
135cdc71fe4SJason Gunthorpe 	}
136cdc71fe4SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
137cdc71fe4SJason Gunthorpe }
138cdc71fe4SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139cdc71fe4SJason Gunthorpe 
140cdc71fe4SJason Gunthorpe /*
141cdc71fe4SJason Gunthorpe  * Container objects - containers are created when /dev/vfio/vfio is
142cdc71fe4SJason Gunthorpe  * opened, but their lifecycle extends until the last user is done, so
143cdc71fe4SJason Gunthorpe  * it's freed via kref.  Must support container/group/device being
144cdc71fe4SJason Gunthorpe  * closed in any order.
145cdc71fe4SJason Gunthorpe  */
vfio_container_release(struct kref * kref)146cdc71fe4SJason Gunthorpe static void vfio_container_release(struct kref *kref)
147cdc71fe4SJason Gunthorpe {
148cdc71fe4SJason Gunthorpe 	struct vfio_container *container;
149cdc71fe4SJason Gunthorpe 	container = container_of(kref, struct vfio_container, kref);
150cdc71fe4SJason Gunthorpe 
151cdc71fe4SJason Gunthorpe 	kfree(container);
152cdc71fe4SJason Gunthorpe }
153cdc71fe4SJason Gunthorpe 
vfio_container_get(struct vfio_container * container)154cdc71fe4SJason Gunthorpe static void vfio_container_get(struct vfio_container *container)
155cdc71fe4SJason Gunthorpe {
156cdc71fe4SJason Gunthorpe 	kref_get(&container->kref);
157cdc71fe4SJason Gunthorpe }
158cdc71fe4SJason Gunthorpe 
vfio_container_put(struct vfio_container * container)159cdc71fe4SJason Gunthorpe static void vfio_container_put(struct vfio_container *container)
160cdc71fe4SJason Gunthorpe {
161cdc71fe4SJason Gunthorpe 	kref_put(&container->kref, vfio_container_release);
162cdc71fe4SJason Gunthorpe }
163cdc71fe4SJason Gunthorpe 
vfio_device_container_register(struct vfio_device * device)164cdc71fe4SJason Gunthorpe void vfio_device_container_register(struct vfio_device *device)
165cdc71fe4SJason Gunthorpe {
166cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver =
167cdc71fe4SJason Gunthorpe 		device->group->container->iommu_driver;
168cdc71fe4SJason Gunthorpe 
169cdc71fe4SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->register_device)
170cdc71fe4SJason Gunthorpe 		iommu_driver->ops->register_device(
171cdc71fe4SJason Gunthorpe 			device->group->container->iommu_data, device);
172cdc71fe4SJason Gunthorpe }
173cdc71fe4SJason Gunthorpe 
vfio_device_container_unregister(struct vfio_device * device)174cdc71fe4SJason Gunthorpe void vfio_device_container_unregister(struct vfio_device *device)
175cdc71fe4SJason Gunthorpe {
176cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *iommu_driver =
177cdc71fe4SJason Gunthorpe 		device->group->container->iommu_driver;
178cdc71fe4SJason Gunthorpe 
179cdc71fe4SJason Gunthorpe 	if (iommu_driver && iommu_driver->ops->unregister_device)
180cdc71fe4SJason Gunthorpe 		iommu_driver->ops->unregister_device(
181cdc71fe4SJason Gunthorpe 			device->group->container->iommu_data, device);
182cdc71fe4SJason Gunthorpe }
183cdc71fe4SJason Gunthorpe 
1840d8227b6SJason Gunthorpe static long
vfio_container_ioctl_check_extension(struct vfio_container * container,unsigned long arg)1850d8227b6SJason Gunthorpe vfio_container_ioctl_check_extension(struct vfio_container *container,
186cdc71fe4SJason Gunthorpe 				     unsigned long arg)
187cdc71fe4SJason Gunthorpe {
188cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver;
189cdc71fe4SJason Gunthorpe 	long ret = 0;
190cdc71fe4SJason Gunthorpe 
191cdc71fe4SJason Gunthorpe 	down_read(&container->group_lock);
192cdc71fe4SJason Gunthorpe 
193cdc71fe4SJason Gunthorpe 	driver = container->iommu_driver;
194cdc71fe4SJason Gunthorpe 
195cdc71fe4SJason Gunthorpe 	switch (arg) {
196cdc71fe4SJason Gunthorpe 		/* No base extensions yet */
197cdc71fe4SJason Gunthorpe 	default:
198cdc71fe4SJason Gunthorpe 		/*
199cdc71fe4SJason Gunthorpe 		 * If no driver is set, poll all registered drivers for
200cdc71fe4SJason Gunthorpe 		 * extensions and return the first positive result.  If
201cdc71fe4SJason Gunthorpe 		 * a driver is already set, further queries will be passed
202cdc71fe4SJason Gunthorpe 		 * only to that driver.
203cdc71fe4SJason Gunthorpe 		 */
204cdc71fe4SJason Gunthorpe 		if (!driver) {
205cdc71fe4SJason Gunthorpe 			mutex_lock(&vfio.iommu_drivers_lock);
206cdc71fe4SJason Gunthorpe 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
207cdc71fe4SJason Gunthorpe 					    vfio_next) {
208cdc71fe4SJason Gunthorpe 
209cdc71fe4SJason Gunthorpe 				if (!list_empty(&container->group_list) &&
210cdc71fe4SJason Gunthorpe 				    !vfio_iommu_driver_allowed(container,
211cdc71fe4SJason Gunthorpe 							       driver))
212cdc71fe4SJason Gunthorpe 					continue;
213cdc71fe4SJason Gunthorpe 				if (!try_module_get(driver->ops->owner))
214cdc71fe4SJason Gunthorpe 					continue;
215cdc71fe4SJason Gunthorpe 
216cdc71fe4SJason Gunthorpe 				ret = driver->ops->ioctl(NULL,
217cdc71fe4SJason Gunthorpe 							 VFIO_CHECK_EXTENSION,
218cdc71fe4SJason Gunthorpe 							 arg);
219cdc71fe4SJason Gunthorpe 				module_put(driver->ops->owner);
220cdc71fe4SJason Gunthorpe 				if (ret > 0)
221cdc71fe4SJason Gunthorpe 					break;
222cdc71fe4SJason Gunthorpe 			}
223cdc71fe4SJason Gunthorpe 			mutex_unlock(&vfio.iommu_drivers_lock);
224cdc71fe4SJason Gunthorpe 		} else
225cdc71fe4SJason Gunthorpe 			ret = driver->ops->ioctl(container->iommu_data,
226cdc71fe4SJason Gunthorpe 						 VFIO_CHECK_EXTENSION, arg);
227cdc71fe4SJason Gunthorpe 	}
228cdc71fe4SJason Gunthorpe 
229cdc71fe4SJason Gunthorpe 	up_read(&container->group_lock);
230cdc71fe4SJason Gunthorpe 
231cdc71fe4SJason Gunthorpe 	return ret;
232cdc71fe4SJason Gunthorpe }
233cdc71fe4SJason Gunthorpe 
234cdc71fe4SJason Gunthorpe /* hold write lock on container->group_lock */
__vfio_container_attach_groups(struct vfio_container * container,struct vfio_iommu_driver * driver,void * data)235cdc71fe4SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container,
236cdc71fe4SJason Gunthorpe 					  struct vfio_iommu_driver *driver,
237cdc71fe4SJason Gunthorpe 					  void *data)
238cdc71fe4SJason Gunthorpe {
239cdc71fe4SJason Gunthorpe 	struct vfio_group *group;
240cdc71fe4SJason Gunthorpe 	int ret = -ENODEV;
241cdc71fe4SJason Gunthorpe 
242cdc71fe4SJason Gunthorpe 	list_for_each_entry(group, &container->group_list, container_next) {
243cdc71fe4SJason Gunthorpe 		ret = driver->ops->attach_group(data, group->iommu_group,
244cdc71fe4SJason Gunthorpe 						group->type);
245cdc71fe4SJason Gunthorpe 		if (ret)
246cdc71fe4SJason Gunthorpe 			goto unwind;
247cdc71fe4SJason Gunthorpe 	}
248cdc71fe4SJason Gunthorpe 
249cdc71fe4SJason Gunthorpe 	return ret;
250cdc71fe4SJason Gunthorpe 
251cdc71fe4SJason Gunthorpe unwind:
252cdc71fe4SJason Gunthorpe 	list_for_each_entry_continue_reverse(group, &container->group_list,
253cdc71fe4SJason Gunthorpe 					     container_next) {
254cdc71fe4SJason Gunthorpe 		driver->ops->detach_group(data, group->iommu_group);
255cdc71fe4SJason Gunthorpe 	}
256cdc71fe4SJason Gunthorpe 
257cdc71fe4SJason Gunthorpe 	return ret;
258cdc71fe4SJason Gunthorpe }
259cdc71fe4SJason Gunthorpe 
vfio_ioctl_set_iommu(struct vfio_container * container,unsigned long arg)260cdc71fe4SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container,
261cdc71fe4SJason Gunthorpe 				 unsigned long arg)
262cdc71fe4SJason Gunthorpe {
263cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver;
264cdc71fe4SJason Gunthorpe 	long ret = -ENODEV;
265cdc71fe4SJason Gunthorpe 
266cdc71fe4SJason Gunthorpe 	down_write(&container->group_lock);
267cdc71fe4SJason Gunthorpe 
268cdc71fe4SJason Gunthorpe 	/*
269cdc71fe4SJason Gunthorpe 	 * The container is designed to be an unprivileged interface while
270cdc71fe4SJason Gunthorpe 	 * the group can be assigned to specific users.  Therefore, only by
271cdc71fe4SJason Gunthorpe 	 * adding a group to a container does the user get the privilege of
272cdc71fe4SJason Gunthorpe 	 * enabling the iommu, which may allocate finite resources.  There
273cdc71fe4SJason Gunthorpe 	 * is no unset_iommu, but by removing all the groups from a container,
274cdc71fe4SJason Gunthorpe 	 * the container is deprivileged and returns to an unset state.
275cdc71fe4SJason Gunthorpe 	 */
276cdc71fe4SJason Gunthorpe 	if (list_empty(&container->group_list) || container->iommu_driver) {
277cdc71fe4SJason Gunthorpe 		up_write(&container->group_lock);
278cdc71fe4SJason Gunthorpe 		return -EINVAL;
279cdc71fe4SJason Gunthorpe 	}
280cdc71fe4SJason Gunthorpe 
281cdc71fe4SJason Gunthorpe 	mutex_lock(&vfio.iommu_drivers_lock);
282cdc71fe4SJason Gunthorpe 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283cdc71fe4SJason Gunthorpe 		void *data;
284cdc71fe4SJason Gunthorpe 
285cdc71fe4SJason Gunthorpe 		if (!vfio_iommu_driver_allowed(container, driver))
286cdc71fe4SJason Gunthorpe 			continue;
287cdc71fe4SJason Gunthorpe 		if (!try_module_get(driver->ops->owner))
288cdc71fe4SJason Gunthorpe 			continue;
289cdc71fe4SJason Gunthorpe 
290cdc71fe4SJason Gunthorpe 		/*
291cdc71fe4SJason Gunthorpe 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292cdc71fe4SJason Gunthorpe 		 * so test which iommu driver reported support for this
293cdc71fe4SJason Gunthorpe 		 * extension and call open on them.  We also pass them the
294cdc71fe4SJason Gunthorpe 		 * magic, allowing a single driver to support multiple
295cdc71fe4SJason Gunthorpe 		 * interfaces if they'd like.
296cdc71fe4SJason Gunthorpe 		 */
297cdc71fe4SJason Gunthorpe 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298cdc71fe4SJason Gunthorpe 			module_put(driver->ops->owner);
299cdc71fe4SJason Gunthorpe 			continue;
300cdc71fe4SJason Gunthorpe 		}
301cdc71fe4SJason Gunthorpe 
302cdc71fe4SJason Gunthorpe 		data = driver->ops->open(arg);
303cdc71fe4SJason Gunthorpe 		if (IS_ERR(data)) {
304cdc71fe4SJason Gunthorpe 			ret = PTR_ERR(data);
305cdc71fe4SJason Gunthorpe 			module_put(driver->ops->owner);
306cdc71fe4SJason Gunthorpe 			continue;
307cdc71fe4SJason Gunthorpe 		}
308cdc71fe4SJason Gunthorpe 
309cdc71fe4SJason Gunthorpe 		ret = __vfio_container_attach_groups(container, driver, data);
310cdc71fe4SJason Gunthorpe 		if (ret) {
311cdc71fe4SJason Gunthorpe 			driver->ops->release(data);
312cdc71fe4SJason Gunthorpe 			module_put(driver->ops->owner);
313cdc71fe4SJason Gunthorpe 			continue;
314cdc71fe4SJason Gunthorpe 		}
315cdc71fe4SJason Gunthorpe 
316cdc71fe4SJason Gunthorpe 		container->iommu_driver = driver;
317cdc71fe4SJason Gunthorpe 		container->iommu_data = data;
318cdc71fe4SJason Gunthorpe 		break;
319cdc71fe4SJason Gunthorpe 	}
320cdc71fe4SJason Gunthorpe 
321cdc71fe4SJason Gunthorpe 	mutex_unlock(&vfio.iommu_drivers_lock);
322cdc71fe4SJason Gunthorpe 	up_write(&container->group_lock);
323cdc71fe4SJason Gunthorpe 
324cdc71fe4SJason Gunthorpe 	return ret;
325cdc71fe4SJason Gunthorpe }
326cdc71fe4SJason Gunthorpe 
vfio_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)327cdc71fe4SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep,
328cdc71fe4SJason Gunthorpe 				unsigned int cmd, unsigned long arg)
329cdc71fe4SJason Gunthorpe {
330cdc71fe4SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
331cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver;
332cdc71fe4SJason Gunthorpe 	void *data;
333cdc71fe4SJason Gunthorpe 	long ret = -EINVAL;
334cdc71fe4SJason Gunthorpe 
335cdc71fe4SJason Gunthorpe 	if (!container)
336cdc71fe4SJason Gunthorpe 		return ret;
337cdc71fe4SJason Gunthorpe 
338cdc71fe4SJason Gunthorpe 	switch (cmd) {
339cdc71fe4SJason Gunthorpe 	case VFIO_GET_API_VERSION:
340cdc71fe4SJason Gunthorpe 		ret = VFIO_API_VERSION;
341cdc71fe4SJason Gunthorpe 		break;
342cdc71fe4SJason Gunthorpe 	case VFIO_CHECK_EXTENSION:
343cdc71fe4SJason Gunthorpe 		ret = vfio_container_ioctl_check_extension(container, arg);
344cdc71fe4SJason Gunthorpe 		break;
345cdc71fe4SJason Gunthorpe 	case VFIO_SET_IOMMU:
346cdc71fe4SJason Gunthorpe 		ret = vfio_ioctl_set_iommu(container, arg);
347cdc71fe4SJason Gunthorpe 		break;
348cdc71fe4SJason Gunthorpe 	default:
349cdc71fe4SJason Gunthorpe 		driver = container->iommu_driver;
350cdc71fe4SJason Gunthorpe 		data = container->iommu_data;
351cdc71fe4SJason Gunthorpe 
352cdc71fe4SJason Gunthorpe 		if (driver) /* passthrough all unrecognized ioctls */
353cdc71fe4SJason Gunthorpe 			ret = driver->ops->ioctl(data, cmd, arg);
354cdc71fe4SJason Gunthorpe 	}
355cdc71fe4SJason Gunthorpe 
356cdc71fe4SJason Gunthorpe 	return ret;
357cdc71fe4SJason Gunthorpe }
358cdc71fe4SJason Gunthorpe 
vfio_fops_open(struct inode * inode,struct file * filep)359cdc71fe4SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep)
360cdc71fe4SJason Gunthorpe {
361cdc71fe4SJason Gunthorpe 	struct vfio_container *container;
362cdc71fe4SJason Gunthorpe 
363*0886196cSJason Gunthorpe 	container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
364cdc71fe4SJason Gunthorpe 	if (!container)
365cdc71fe4SJason Gunthorpe 		return -ENOMEM;
366cdc71fe4SJason Gunthorpe 
367cdc71fe4SJason Gunthorpe 	INIT_LIST_HEAD(&container->group_list);
368cdc71fe4SJason Gunthorpe 	init_rwsem(&container->group_lock);
369cdc71fe4SJason Gunthorpe 	kref_init(&container->kref);
370cdc71fe4SJason Gunthorpe 
371cdc71fe4SJason Gunthorpe 	filep->private_data = container;
372cdc71fe4SJason Gunthorpe 
373cdc71fe4SJason Gunthorpe 	return 0;
374cdc71fe4SJason Gunthorpe }
375cdc71fe4SJason Gunthorpe 
vfio_fops_release(struct inode * inode,struct file * filep)376cdc71fe4SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep)
377cdc71fe4SJason Gunthorpe {
378cdc71fe4SJason Gunthorpe 	struct vfio_container *container = filep->private_data;
379cdc71fe4SJason Gunthorpe 
380cdc71fe4SJason Gunthorpe 	filep->private_data = NULL;
381cdc71fe4SJason Gunthorpe 
382cdc71fe4SJason Gunthorpe 	vfio_container_put(container);
383cdc71fe4SJason Gunthorpe 
384cdc71fe4SJason Gunthorpe 	return 0;
385cdc71fe4SJason Gunthorpe }
386cdc71fe4SJason Gunthorpe 
387cdc71fe4SJason Gunthorpe static const struct file_operations vfio_fops = {
388cdc71fe4SJason Gunthorpe 	.owner		= THIS_MODULE,
389cdc71fe4SJason Gunthorpe 	.open		= vfio_fops_open,
390cdc71fe4SJason Gunthorpe 	.release	= vfio_fops_release,
391cdc71fe4SJason Gunthorpe 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
392cdc71fe4SJason Gunthorpe 	.compat_ioctl	= compat_ptr_ioctl,
393cdc71fe4SJason Gunthorpe };
394cdc71fe4SJason Gunthorpe 
vfio_container_from_file(struct file * file)395cdc71fe4SJason Gunthorpe struct vfio_container *vfio_container_from_file(struct file *file)
396cdc71fe4SJason Gunthorpe {
397cdc71fe4SJason Gunthorpe 	struct vfio_container *container;
398cdc71fe4SJason Gunthorpe 
399cdc71fe4SJason Gunthorpe 	/* Sanity check, is this really our fd? */
400cdc71fe4SJason Gunthorpe 	if (file->f_op != &vfio_fops)
401cdc71fe4SJason Gunthorpe 		return NULL;
402cdc71fe4SJason Gunthorpe 
403cdc71fe4SJason Gunthorpe 	container = file->private_data;
404cdc71fe4SJason Gunthorpe 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
405cdc71fe4SJason Gunthorpe 	return container;
406cdc71fe4SJason Gunthorpe }
407cdc71fe4SJason Gunthorpe 
408cdc71fe4SJason Gunthorpe static struct miscdevice vfio_dev = {
409cdc71fe4SJason Gunthorpe 	.minor = VFIO_MINOR,
410cdc71fe4SJason Gunthorpe 	.name = "vfio",
411cdc71fe4SJason Gunthorpe 	.fops = &vfio_fops,
412cdc71fe4SJason Gunthorpe 	.nodename = "vfio/vfio",
413cdc71fe4SJason Gunthorpe 	.mode = S_IRUGO | S_IWUGO,
414cdc71fe4SJason Gunthorpe };
415cdc71fe4SJason Gunthorpe 
vfio_container_attach_group(struct vfio_container * container,struct vfio_group * group)416cdc71fe4SJason Gunthorpe int vfio_container_attach_group(struct vfio_container *container,
417cdc71fe4SJason Gunthorpe 				struct vfio_group *group)
418cdc71fe4SJason Gunthorpe {
419cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver;
420cdc71fe4SJason Gunthorpe 	int ret = 0;
421cdc71fe4SJason Gunthorpe 
422c82e81abSJason Gunthorpe 	lockdep_assert_held(&group->group_lock);
423cdc71fe4SJason Gunthorpe 
424cdc71fe4SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
425cdc71fe4SJason Gunthorpe 		return -EPERM;
426cdc71fe4SJason Gunthorpe 
427cdc71fe4SJason Gunthorpe 	down_write(&container->group_lock);
428cdc71fe4SJason Gunthorpe 
429cdc71fe4SJason Gunthorpe 	/* Real groups and fake groups cannot mix */
430cdc71fe4SJason Gunthorpe 	if (!list_empty(&container->group_list) &&
431cdc71fe4SJason Gunthorpe 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
432cdc71fe4SJason Gunthorpe 		ret = -EPERM;
433cdc71fe4SJason Gunthorpe 		goto out_unlock_container;
434cdc71fe4SJason Gunthorpe 	}
435cdc71fe4SJason Gunthorpe 
436cdc71fe4SJason Gunthorpe 	if (group->type == VFIO_IOMMU) {
437cdc71fe4SJason Gunthorpe 		ret = iommu_group_claim_dma_owner(group->iommu_group, group);
438cdc71fe4SJason Gunthorpe 		if (ret)
439cdc71fe4SJason Gunthorpe 			goto out_unlock_container;
440cdc71fe4SJason Gunthorpe 	}
441cdc71fe4SJason Gunthorpe 
442cdc71fe4SJason Gunthorpe 	driver = container->iommu_driver;
443cdc71fe4SJason Gunthorpe 	if (driver) {
444cdc71fe4SJason Gunthorpe 		ret = driver->ops->attach_group(container->iommu_data,
445cdc71fe4SJason Gunthorpe 						group->iommu_group,
446cdc71fe4SJason Gunthorpe 						group->type);
447cdc71fe4SJason Gunthorpe 		if (ret) {
448cdc71fe4SJason Gunthorpe 			if (group->type == VFIO_IOMMU)
449cdc71fe4SJason Gunthorpe 				iommu_group_release_dma_owner(
450cdc71fe4SJason Gunthorpe 					group->iommu_group);
451cdc71fe4SJason Gunthorpe 			goto out_unlock_container;
452cdc71fe4SJason Gunthorpe 		}
453cdc71fe4SJason Gunthorpe 	}
454cdc71fe4SJason Gunthorpe 
455cdc71fe4SJason Gunthorpe 	group->container = container;
456cdc71fe4SJason Gunthorpe 	group->container_users = 1;
457cdc71fe4SJason Gunthorpe 	container->noiommu = (group->type == VFIO_NO_IOMMU);
458cdc71fe4SJason Gunthorpe 	list_add(&group->container_next, &container->group_list);
459cdc71fe4SJason Gunthorpe 
460cdc71fe4SJason Gunthorpe 	/* Get a reference on the container and mark a user within the group */
461cdc71fe4SJason Gunthorpe 	vfio_container_get(container);
462cdc71fe4SJason Gunthorpe 
463cdc71fe4SJason Gunthorpe out_unlock_container:
464cdc71fe4SJason Gunthorpe 	up_write(&container->group_lock);
465cdc71fe4SJason Gunthorpe 	return ret;
466cdc71fe4SJason Gunthorpe }
467cdc71fe4SJason Gunthorpe 
vfio_group_detach_container(struct vfio_group * group)468cdc71fe4SJason Gunthorpe void vfio_group_detach_container(struct vfio_group *group)
469cdc71fe4SJason Gunthorpe {
470cdc71fe4SJason Gunthorpe 	struct vfio_container *container = group->container;
471cdc71fe4SJason Gunthorpe 	struct vfio_iommu_driver *driver;
472cdc71fe4SJason Gunthorpe 
473c82e81abSJason Gunthorpe 	lockdep_assert_held(&group->group_lock);
474cdc71fe4SJason Gunthorpe 	WARN_ON(group->container_users != 1);
475cdc71fe4SJason Gunthorpe 
476cdc71fe4SJason Gunthorpe 	down_write(&container->group_lock);
477cdc71fe4SJason Gunthorpe 
478cdc71fe4SJason Gunthorpe 	driver = container->iommu_driver;
479cdc71fe4SJason Gunthorpe 	if (driver)
480cdc71fe4SJason Gunthorpe 		driver->ops->detach_group(container->iommu_data,
481cdc71fe4SJason Gunthorpe 					  group->iommu_group);
482cdc71fe4SJason Gunthorpe 
483cdc71fe4SJason Gunthorpe 	if (group->type == VFIO_IOMMU)
484cdc71fe4SJason Gunthorpe 		iommu_group_release_dma_owner(group->iommu_group);
485cdc71fe4SJason Gunthorpe 
486cdc71fe4SJason Gunthorpe 	group->container = NULL;
487cdc71fe4SJason Gunthorpe 	group->container_users = 0;
488cdc71fe4SJason Gunthorpe 	list_del(&group->container_next);
489cdc71fe4SJason Gunthorpe 
490cdc71fe4SJason Gunthorpe 	/* Detaching the last group deprivileges a container, remove iommu */
491cdc71fe4SJason Gunthorpe 	if (driver && list_empty(&container->group_list)) {
492cdc71fe4SJason Gunthorpe 		driver->ops->release(container->iommu_data);
493cdc71fe4SJason Gunthorpe 		module_put(driver->ops->owner);
494cdc71fe4SJason Gunthorpe 		container->iommu_driver = NULL;
495cdc71fe4SJason Gunthorpe 		container->iommu_data = NULL;
496cdc71fe4SJason Gunthorpe 	}
497cdc71fe4SJason Gunthorpe 
498cdc71fe4SJason Gunthorpe 	up_write(&container->group_lock);
499cdc71fe4SJason Gunthorpe 
500cdc71fe4SJason Gunthorpe 	vfio_container_put(container);
501cdc71fe4SJason Gunthorpe }
502cdc71fe4SJason Gunthorpe 
vfio_group_use_container(struct vfio_group * group)50304f930c3SJason Gunthorpe int vfio_group_use_container(struct vfio_group *group)
504cdc71fe4SJason Gunthorpe {
505c82e81abSJason Gunthorpe 	lockdep_assert_held(&group->group_lock);
506cdc71fe4SJason Gunthorpe 
5072a3dab19SJason Gunthorpe 	/*
5082a3dab19SJason Gunthorpe 	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
5092a3dab19SJason Gunthorpe 	 * VFIO_SET_IOMMU hasn't been done yet.
5102a3dab19SJason Gunthorpe 	 */
5112a3dab19SJason Gunthorpe 	if (!group->container->iommu_driver)
512cdc71fe4SJason Gunthorpe 		return -EINVAL;
513cdc71fe4SJason Gunthorpe 
514cdc71fe4SJason Gunthorpe 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
515cdc71fe4SJason Gunthorpe 		return -EPERM;
516cdc71fe4SJason Gunthorpe 
517cdc71fe4SJason Gunthorpe 	get_file(group->opened_file);
518cdc71fe4SJason Gunthorpe 	group->container_users++;
519cdc71fe4SJason Gunthorpe 	return 0;
520cdc71fe4SJason Gunthorpe }
521cdc71fe4SJason Gunthorpe 
vfio_group_unuse_container(struct vfio_group * group)52204f930c3SJason Gunthorpe void vfio_group_unuse_container(struct vfio_group *group)
523cdc71fe4SJason Gunthorpe {
52404f930c3SJason Gunthorpe 	lockdep_assert_held(&group->group_lock);
525bab6fabcSJason Gunthorpe 
52604f930c3SJason Gunthorpe 	WARN_ON(group->container_users <= 1);
52704f930c3SJason Gunthorpe 	group->container_users--;
52804f930c3SJason Gunthorpe 	fput(group->opened_file);
529cdc71fe4SJason Gunthorpe }
530cdc71fe4SJason Gunthorpe 
vfio_device_container_pin_pages(struct vfio_device * device,dma_addr_t iova,int npage,int prot,struct page ** pages)5318da7a0e7SYi Liu int vfio_device_container_pin_pages(struct vfio_device *device,
5328da7a0e7SYi Liu 				    dma_addr_t iova, int npage,
5338da7a0e7SYi Liu 				    int prot, struct page **pages)
534cdc71fe4SJason Gunthorpe {
5358da7a0e7SYi Liu 	struct vfio_container *container = device->group->container;
5368da7a0e7SYi Liu 	struct iommu_group *iommu_group = device->group->iommu_group;
5374741f2e9SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
538cdc71fe4SJason Gunthorpe 
539cdc71fe4SJason Gunthorpe 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
540cdc71fe4SJason Gunthorpe 		return -E2BIG;
541cdc71fe4SJason Gunthorpe 
5424741f2e9SJason Gunthorpe 	if (unlikely(!driver || !driver->ops->pin_pages))
5434741f2e9SJason Gunthorpe 		return -ENOTTY;
5444741f2e9SJason Gunthorpe 	return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
545cdc71fe4SJason Gunthorpe 				      npage, prot, pages);
546cdc71fe4SJason Gunthorpe }
547cdc71fe4SJason Gunthorpe 
vfio_device_container_unpin_pages(struct vfio_device * device,dma_addr_t iova,int npage)5488da7a0e7SYi Liu void vfio_device_container_unpin_pages(struct vfio_device *device,
5494741f2e9SJason Gunthorpe 				       dma_addr_t iova, int npage)
550cdc71fe4SJason Gunthorpe {
5518da7a0e7SYi Liu 	struct vfio_container *container = device->group->container;
5528da7a0e7SYi Liu 
553cdc71fe4SJason Gunthorpe 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
554cdc71fe4SJason Gunthorpe 		return;
555cdc71fe4SJason Gunthorpe 
5564741f2e9SJason Gunthorpe 	container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
5574741f2e9SJason Gunthorpe 						  npage);
558cdc71fe4SJason Gunthorpe }
559cdc71fe4SJason Gunthorpe 
vfio_device_container_dma_rw(struct vfio_device * device,dma_addr_t iova,void * data,size_t len,bool write)5608da7a0e7SYi Liu int vfio_device_container_dma_rw(struct vfio_device *device,
5618da7a0e7SYi Liu 				 dma_addr_t iova, void *data,
5628da7a0e7SYi Liu 				 size_t len, bool write)
563cdc71fe4SJason Gunthorpe {
5648da7a0e7SYi Liu 	struct vfio_container *container = device->group->container;
5654741f2e9SJason Gunthorpe 	struct vfio_iommu_driver *driver = container->iommu_driver;
566cdc71fe4SJason Gunthorpe 
5674741f2e9SJason Gunthorpe 	if (unlikely(!driver || !driver->ops->dma_rw))
5684741f2e9SJason Gunthorpe 		return -ENOTTY;
5694741f2e9SJason Gunthorpe 	return driver->ops->dma_rw(container->iommu_data, iova, data, len,
5704741f2e9SJason Gunthorpe 				   write);
571cdc71fe4SJason Gunthorpe }
572cdc71fe4SJason Gunthorpe 
vfio_container_init(void)573cdc71fe4SJason Gunthorpe int __init vfio_container_init(void)
574cdc71fe4SJason Gunthorpe {
575cdc71fe4SJason Gunthorpe 	int ret;
576cdc71fe4SJason Gunthorpe 
577cdc71fe4SJason Gunthorpe 	mutex_init(&vfio.iommu_drivers_lock);
578cdc71fe4SJason Gunthorpe 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
579cdc71fe4SJason Gunthorpe 
580cdc71fe4SJason Gunthorpe 	ret = misc_register(&vfio_dev);
581cdc71fe4SJason Gunthorpe 	if (ret) {
582cdc71fe4SJason Gunthorpe 		pr_err("vfio: misc device register failed\n");
583cdc71fe4SJason Gunthorpe 		return ret;
584cdc71fe4SJason Gunthorpe 	}
585cdc71fe4SJason Gunthorpe 
586cdc71fe4SJason Gunthorpe 	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
587cdc71fe4SJason Gunthorpe 		ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
588cdc71fe4SJason Gunthorpe 		if (ret)
589cdc71fe4SJason Gunthorpe 			goto err_misc;
590cdc71fe4SJason Gunthorpe 	}
591cdc71fe4SJason Gunthorpe 	return 0;
592cdc71fe4SJason Gunthorpe 
593cdc71fe4SJason Gunthorpe err_misc:
594cdc71fe4SJason Gunthorpe 	misc_deregister(&vfio_dev);
595cdc71fe4SJason Gunthorpe 	return ret;
596cdc71fe4SJason Gunthorpe }
597cdc71fe4SJason Gunthorpe 
vfio_container_cleanup(void)598cdc71fe4SJason Gunthorpe void vfio_container_cleanup(void)
599cdc71fe4SJason Gunthorpe {
600cdc71fe4SJason Gunthorpe 	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
601cdc71fe4SJason Gunthorpe 		vfio_unregister_iommu_driver(&vfio_noiommu_ops);
602cdc71fe4SJason Gunthorpe 	misc_deregister(&vfio_dev);
603cdc71fe4SJason Gunthorpe 	mutex_destroy(&vfio.iommu_drivers_lock);
604cdc71fe4SJason Gunthorpe }
60581ab9890SJason Gunthorpe 
60681ab9890SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR);
60781ab9890SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio");
608