xref: /openbmc/linux/drivers/vfio/container.c (revision 84cc6674)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
4  *
5  * VFIO container (/dev/vfio/vfio)
6  */
7 #include <linux/file.h>
8 #include <linux/slab.h>
9 #include <linux/fs.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
15 
16 #include "vfio.h"
17 
18 struct vfio_container {
19 	struct kref			kref;
20 	struct list_head		group_list;
21 	struct rw_semaphore		group_lock;
22 	struct vfio_iommu_driver	*iommu_driver;
23 	void				*iommu_data;
24 	bool				noiommu;
25 };
26 
27 static struct vfio {
28 	struct list_head		iommu_drivers_list;
29 	struct mutex			iommu_drivers_lock;
30 } vfio;
31 
32 static void *vfio_noiommu_open(unsigned long arg)
33 {
34 	if (arg != VFIO_NOIOMMU_IOMMU)
35 		return ERR_PTR(-EINVAL);
36 	if (!capable(CAP_SYS_RAWIO))
37 		return ERR_PTR(-EPERM);
38 
39 	return NULL;
40 }
41 
42 static void vfio_noiommu_release(void *iommu_data)
43 {
44 }
45 
46 static long vfio_noiommu_ioctl(void *iommu_data,
47 			       unsigned int cmd, unsigned long arg)
48 {
49 	if (cmd == VFIO_CHECK_EXTENSION)
50 		return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
51 
52 	return -ENOTTY;
53 }
54 
55 static int vfio_noiommu_attach_group(void *iommu_data,
56 		struct iommu_group *iommu_group, enum vfio_group_type type)
57 {
58 	return 0;
59 }
60 
61 static void vfio_noiommu_detach_group(void *iommu_data,
62 				      struct iommu_group *iommu_group)
63 {
64 }
65 
66 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67 	.name = "vfio-noiommu",
68 	.owner = THIS_MODULE,
69 	.open = vfio_noiommu_open,
70 	.release = vfio_noiommu_release,
71 	.ioctl = vfio_noiommu_ioctl,
72 	.attach_group = vfio_noiommu_attach_group,
73 	.detach_group = vfio_noiommu_detach_group,
74 };
75 
76 /*
77  * Only noiommu containers can use vfio-noiommu and noiommu containers can only
78  * use vfio-noiommu.
79  */
80 static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81 				      const struct vfio_iommu_driver *driver)
82 {
83 	if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
84 		return true;
85 	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
86 }
87 
88 /*
89  * IOMMU driver registration
90  */
91 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
92 {
93 	struct vfio_iommu_driver *driver, *tmp;
94 
95 	if (WARN_ON(!ops->register_device != !ops->unregister_device))
96 		return -EINVAL;
97 
98 	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
99 	if (!driver)
100 		return -ENOMEM;
101 
102 	driver->ops = ops;
103 
104 	mutex_lock(&vfio.iommu_drivers_lock);
105 
106 	/* Check for duplicates */
107 	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108 		if (tmp->ops == ops) {
109 			mutex_unlock(&vfio.iommu_drivers_lock);
110 			kfree(driver);
111 			return -EINVAL;
112 		}
113 	}
114 
115 	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
116 
117 	mutex_unlock(&vfio.iommu_drivers_lock);
118 
119 	return 0;
120 }
121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122 
123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124 {
125 	struct vfio_iommu_driver *driver;
126 
127 	mutex_lock(&vfio.iommu_drivers_lock);
128 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129 		if (driver->ops == ops) {
130 			list_del(&driver->vfio_next);
131 			mutex_unlock(&vfio.iommu_drivers_lock);
132 			kfree(driver);
133 			return;
134 		}
135 	}
136 	mutex_unlock(&vfio.iommu_drivers_lock);
137 }
138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139 
140 /*
141  * Container objects - containers are created when /dev/vfio/vfio is
142  * opened, but their lifecycle extends until the last user is done, so
143  * it's freed via kref.  Must support container/group/device being
144  * closed in any order.
145  */
146 static void vfio_container_release(struct kref *kref)
147 {
148 	struct vfio_container *container;
149 	container = container_of(kref, struct vfio_container, kref);
150 
151 	kfree(container);
152 }
153 
154 static void vfio_container_get(struct vfio_container *container)
155 {
156 	kref_get(&container->kref);
157 }
158 
159 static void vfio_container_put(struct vfio_container *container)
160 {
161 	kref_put(&container->kref, vfio_container_release);
162 }
163 
164 void vfio_device_container_register(struct vfio_device *device)
165 {
166 	struct vfio_iommu_driver *iommu_driver =
167 		device->group->container->iommu_driver;
168 
169 	if (iommu_driver && iommu_driver->ops->register_device)
170 		iommu_driver->ops->register_device(
171 			device->group->container->iommu_data, device);
172 }
173 
174 void vfio_device_container_unregister(struct vfio_device *device)
175 {
176 	struct vfio_iommu_driver *iommu_driver =
177 		device->group->container->iommu_driver;
178 
179 	if (iommu_driver && iommu_driver->ops->unregister_device)
180 		iommu_driver->ops->unregister_device(
181 			device->group->container->iommu_data, device);
182 }
183 
184 static long
185 vfio_container_ioctl_check_extension(struct vfio_container *container,
186 				     unsigned long arg)
187 {
188 	struct vfio_iommu_driver *driver;
189 	long ret = 0;
190 
191 	down_read(&container->group_lock);
192 
193 	driver = container->iommu_driver;
194 
195 	switch (arg) {
196 		/* No base extensions yet */
197 	default:
198 		/*
199 		 * If no driver is set, poll all registered drivers for
200 		 * extensions and return the first positive result.  If
201 		 * a driver is already set, further queries will be passed
202 		 * only to that driver.
203 		 */
204 		if (!driver) {
205 			mutex_lock(&vfio.iommu_drivers_lock);
206 			list_for_each_entry(driver, &vfio.iommu_drivers_list,
207 					    vfio_next) {
208 
209 				if (!list_empty(&container->group_list) &&
210 				    !vfio_iommu_driver_allowed(container,
211 							       driver))
212 					continue;
213 				if (!try_module_get(driver->ops->owner))
214 					continue;
215 
216 				ret = driver->ops->ioctl(NULL,
217 							 VFIO_CHECK_EXTENSION,
218 							 arg);
219 				module_put(driver->ops->owner);
220 				if (ret > 0)
221 					break;
222 			}
223 			mutex_unlock(&vfio.iommu_drivers_lock);
224 		} else
225 			ret = driver->ops->ioctl(container->iommu_data,
226 						 VFIO_CHECK_EXTENSION, arg);
227 	}
228 
229 	up_read(&container->group_lock);
230 
231 	return ret;
232 }
233 
234 /* hold write lock on container->group_lock */
235 static int __vfio_container_attach_groups(struct vfio_container *container,
236 					  struct vfio_iommu_driver *driver,
237 					  void *data)
238 {
239 	struct vfio_group *group;
240 	int ret = -ENODEV;
241 
242 	list_for_each_entry(group, &container->group_list, container_next) {
243 		ret = driver->ops->attach_group(data, group->iommu_group,
244 						group->type);
245 		if (ret)
246 			goto unwind;
247 	}
248 
249 	return ret;
250 
251 unwind:
252 	list_for_each_entry_continue_reverse(group, &container->group_list,
253 					     container_next) {
254 		driver->ops->detach_group(data, group->iommu_group);
255 	}
256 
257 	return ret;
258 }
259 
260 static long vfio_ioctl_set_iommu(struct vfio_container *container,
261 				 unsigned long arg)
262 {
263 	struct vfio_iommu_driver *driver;
264 	long ret = -ENODEV;
265 
266 	down_write(&container->group_lock);
267 
268 	/*
269 	 * The container is designed to be an unprivileged interface while
270 	 * the group can be assigned to specific users.  Therefore, only by
271 	 * adding a group to a container does the user get the privilege of
272 	 * enabling the iommu, which may allocate finite resources.  There
273 	 * is no unset_iommu, but by removing all the groups from a container,
274 	 * the container is deprivileged and returns to an unset state.
275 	 */
276 	if (list_empty(&container->group_list) || container->iommu_driver) {
277 		up_write(&container->group_lock);
278 		return -EINVAL;
279 	}
280 
281 	mutex_lock(&vfio.iommu_drivers_lock);
282 	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283 		void *data;
284 
285 		if (!vfio_iommu_driver_allowed(container, driver))
286 			continue;
287 		if (!try_module_get(driver->ops->owner))
288 			continue;
289 
290 		/*
291 		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292 		 * so test which iommu driver reported support for this
293 		 * extension and call open on them.  We also pass them the
294 		 * magic, allowing a single driver to support multiple
295 		 * interfaces if they'd like.
296 		 */
297 		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298 			module_put(driver->ops->owner);
299 			continue;
300 		}
301 
302 		data = driver->ops->open(arg);
303 		if (IS_ERR(data)) {
304 			ret = PTR_ERR(data);
305 			module_put(driver->ops->owner);
306 			continue;
307 		}
308 
309 		ret = __vfio_container_attach_groups(container, driver, data);
310 		if (ret) {
311 			driver->ops->release(data);
312 			module_put(driver->ops->owner);
313 			continue;
314 		}
315 
316 		container->iommu_driver = driver;
317 		container->iommu_data = data;
318 		break;
319 	}
320 
321 	mutex_unlock(&vfio.iommu_drivers_lock);
322 	up_write(&container->group_lock);
323 
324 	return ret;
325 }
326 
327 static long vfio_fops_unl_ioctl(struct file *filep,
328 				unsigned int cmd, unsigned long arg)
329 {
330 	struct vfio_container *container = filep->private_data;
331 	struct vfio_iommu_driver *driver;
332 	void *data;
333 	long ret = -EINVAL;
334 
335 	if (!container)
336 		return ret;
337 
338 	switch (cmd) {
339 	case VFIO_GET_API_VERSION:
340 		ret = VFIO_API_VERSION;
341 		break;
342 	case VFIO_CHECK_EXTENSION:
343 		ret = vfio_container_ioctl_check_extension(container, arg);
344 		break;
345 	case VFIO_SET_IOMMU:
346 		ret = vfio_ioctl_set_iommu(container, arg);
347 		break;
348 	default:
349 		driver = container->iommu_driver;
350 		data = container->iommu_data;
351 
352 		if (driver) /* passthrough all unrecognized ioctls */
353 			ret = driver->ops->ioctl(data, cmd, arg);
354 	}
355 
356 	return ret;
357 }
358 
359 static int vfio_fops_open(struct inode *inode, struct file *filep)
360 {
361 	struct vfio_container *container;
362 
363 	container = kzalloc(sizeof(*container), GFP_KERNEL);
364 	if (!container)
365 		return -ENOMEM;
366 
367 	INIT_LIST_HEAD(&container->group_list);
368 	init_rwsem(&container->group_lock);
369 	kref_init(&container->kref);
370 
371 	filep->private_data = container;
372 
373 	return 0;
374 }
375 
376 static int vfio_fops_release(struct inode *inode, struct file *filep)
377 {
378 	struct vfio_container *container = filep->private_data;
379 	struct vfio_iommu_driver *driver = container->iommu_driver;
380 
381 	if (driver && driver->ops->notify)
382 		driver->ops->notify(container->iommu_data,
383 				    VFIO_IOMMU_CONTAINER_CLOSE);
384 
385 	filep->private_data = NULL;
386 
387 	vfio_container_put(container);
388 
389 	return 0;
390 }
391 
392 static const struct file_operations vfio_fops = {
393 	.owner		= THIS_MODULE,
394 	.open		= vfio_fops_open,
395 	.release	= vfio_fops_release,
396 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
397 	.compat_ioctl	= compat_ptr_ioctl,
398 };
399 
400 struct vfio_container *vfio_container_from_file(struct file *file)
401 {
402 	struct vfio_container *container;
403 
404 	/* Sanity check, is this really our fd? */
405 	if (file->f_op != &vfio_fops)
406 		return NULL;
407 
408 	container = file->private_data;
409 	WARN_ON(!container); /* fget ensures we don't race vfio_release */
410 	return container;
411 }
412 
413 static struct miscdevice vfio_dev = {
414 	.minor = VFIO_MINOR,
415 	.name = "vfio",
416 	.fops = &vfio_fops,
417 	.nodename = "vfio/vfio",
418 	.mode = S_IRUGO | S_IWUGO,
419 };
420 
421 int vfio_container_attach_group(struct vfio_container *container,
422 				struct vfio_group *group)
423 {
424 	struct vfio_iommu_driver *driver;
425 	int ret = 0;
426 
427 	lockdep_assert_held(&group->group_lock);
428 
429 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
430 		return -EPERM;
431 
432 	down_write(&container->group_lock);
433 
434 	/* Real groups and fake groups cannot mix */
435 	if (!list_empty(&container->group_list) &&
436 	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
437 		ret = -EPERM;
438 		goto out_unlock_container;
439 	}
440 
441 	if (group->type == VFIO_IOMMU) {
442 		ret = iommu_group_claim_dma_owner(group->iommu_group, group);
443 		if (ret)
444 			goto out_unlock_container;
445 	}
446 
447 	driver = container->iommu_driver;
448 	if (driver) {
449 		ret = driver->ops->attach_group(container->iommu_data,
450 						group->iommu_group,
451 						group->type);
452 		if (ret) {
453 			if (group->type == VFIO_IOMMU)
454 				iommu_group_release_dma_owner(
455 					group->iommu_group);
456 			goto out_unlock_container;
457 		}
458 	}
459 
460 	group->container = container;
461 	group->container_users = 1;
462 	container->noiommu = (group->type == VFIO_NO_IOMMU);
463 	list_add(&group->container_next, &container->group_list);
464 
465 	/* Get a reference on the container and mark a user within the group */
466 	vfio_container_get(container);
467 
468 out_unlock_container:
469 	up_write(&container->group_lock);
470 	return ret;
471 }
472 
473 void vfio_group_detach_container(struct vfio_group *group)
474 {
475 	struct vfio_container *container = group->container;
476 	struct vfio_iommu_driver *driver;
477 
478 	lockdep_assert_held(&group->group_lock);
479 	WARN_ON(group->container_users != 1);
480 
481 	down_write(&container->group_lock);
482 
483 	driver = container->iommu_driver;
484 	if (driver)
485 		driver->ops->detach_group(container->iommu_data,
486 					  group->iommu_group);
487 
488 	if (group->type == VFIO_IOMMU)
489 		iommu_group_release_dma_owner(group->iommu_group);
490 
491 	group->container = NULL;
492 	group->container_users = 0;
493 	list_del(&group->container_next);
494 
495 	/* Detaching the last group deprivileges a container, remove iommu */
496 	if (driver && list_empty(&container->group_list)) {
497 		driver->ops->release(container->iommu_data);
498 		module_put(driver->ops->owner);
499 		container->iommu_driver = NULL;
500 		container->iommu_data = NULL;
501 	}
502 
503 	up_write(&container->group_lock);
504 
505 	vfio_container_put(container);
506 }
507 
508 int vfio_group_use_container(struct vfio_group *group)
509 {
510 	lockdep_assert_held(&group->group_lock);
511 
512 	/*
513 	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
514 	 * VFIO_SET_IOMMU hasn't been done yet.
515 	 */
516 	if (!group->container->iommu_driver)
517 		return -EINVAL;
518 
519 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
520 		return -EPERM;
521 
522 	get_file(group->opened_file);
523 	group->container_users++;
524 	return 0;
525 }
526 
527 void vfio_group_unuse_container(struct vfio_group *group)
528 {
529 	lockdep_assert_held(&group->group_lock);
530 
531 	WARN_ON(group->container_users <= 1);
532 	group->container_users--;
533 	fput(group->opened_file);
534 }
535 
536 int vfio_device_container_pin_pages(struct vfio_device *device,
537 				    dma_addr_t iova, int npage,
538 				    int prot, struct page **pages)
539 {
540 	struct vfio_container *container = device->group->container;
541 	struct iommu_group *iommu_group = device->group->iommu_group;
542 	struct vfio_iommu_driver *driver = container->iommu_driver;
543 
544 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
545 		return -E2BIG;
546 
547 	if (unlikely(!driver || !driver->ops->pin_pages))
548 		return -ENOTTY;
549 	return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
550 				      npage, prot, pages);
551 }
552 
553 void vfio_device_container_unpin_pages(struct vfio_device *device,
554 				       dma_addr_t iova, int npage)
555 {
556 	struct vfio_container *container = device->group->container;
557 
558 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
559 		return;
560 
561 	container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
562 						  npage);
563 }
564 
565 int vfio_device_container_dma_rw(struct vfio_device *device,
566 				 dma_addr_t iova, void *data,
567 				 size_t len, bool write)
568 {
569 	struct vfio_container *container = device->group->container;
570 	struct vfio_iommu_driver *driver = container->iommu_driver;
571 
572 	if (unlikely(!driver || !driver->ops->dma_rw))
573 		return -ENOTTY;
574 	return driver->ops->dma_rw(container->iommu_data, iova, data, len,
575 				   write);
576 }
577 
578 int __init vfio_container_init(void)
579 {
580 	int ret;
581 
582 	mutex_init(&vfio.iommu_drivers_lock);
583 	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
584 
585 	ret = misc_register(&vfio_dev);
586 	if (ret) {
587 		pr_err("vfio: misc device register failed\n");
588 		return ret;
589 	}
590 
591 	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
592 		ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
593 		if (ret)
594 			goto err_misc;
595 	}
596 	return 0;
597 
598 err_misc:
599 	misc_deregister(&vfio_dev);
600 	return ret;
601 }
602 
603 void vfio_container_cleanup(void)
604 {
605 	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
606 		vfio_unregister_iommu_driver(&vfio_noiommu_ops);
607 	misc_deregister(&vfio_dev);
608 	mutex_destroy(&vfio.iommu_drivers_lock);
609 }
610 
611 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
612 MODULE_ALIAS("devname:vfio/vfio");
613