10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 20f3e72b5SJason Gunthorpe /* 30f3e72b5SJason Gunthorpe * VFIO core 40f3e72b5SJason Gunthorpe * 50f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 60f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 70f3e72b5SJason Gunthorpe * 80f3e72b5SJason Gunthorpe * Derived from original vfio: 90f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 100f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 110f3e72b5SJason Gunthorpe */ 120f3e72b5SJason Gunthorpe 130f3e72b5SJason Gunthorpe #include <linux/cdev.h> 140f3e72b5SJason Gunthorpe #include <linux/compat.h> 150f3e72b5SJason Gunthorpe #include <linux/device.h> 160f3e72b5SJason Gunthorpe #include <linux/file.h> 170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h> 180f3e72b5SJason Gunthorpe #include <linux/fs.h> 190f3e72b5SJason Gunthorpe #include <linux/idr.h> 200f3e72b5SJason Gunthorpe #include <linux/iommu.h> 210f3e72b5SJason Gunthorpe #include <linux/list.h> 220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 230f3e72b5SJason Gunthorpe #include <linux/module.h> 240f3e72b5SJason Gunthorpe #include <linux/mutex.h> 250f3e72b5SJason Gunthorpe #include <linux/pci.h> 260f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 270f3e72b5SJason Gunthorpe #include <linux/sched.h> 280f3e72b5SJason Gunthorpe #include <linux/slab.h> 290f3e72b5SJason Gunthorpe #include <linux/stat.h> 300f3e72b5SJason Gunthorpe #include <linux/string.h> 310f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 320f3e72b5SJason Gunthorpe #include <linux/vfio.h> 330f3e72b5SJason Gunthorpe #include <linux/wait.h> 340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 35*8e5c6995SAbhishek Sahu #include <linux/pm_runtime.h> 360f3e72b5SJason Gunthorpe #include "vfio.h" 370f3e72b5SJason Gunthorpe 380f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 390f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 400f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 410f3e72b5SJason Gunthorpe 420f3e72b5SJason Gunthorpe static struct vfio { 430f3e72b5SJason Gunthorpe struct class *class; 440f3e72b5SJason Gunthorpe struct list_head iommu_drivers_list; 450f3e72b5SJason Gunthorpe struct mutex iommu_drivers_lock; 460f3e72b5SJason Gunthorpe struct list_head group_list; 470f3e72b5SJason Gunthorpe struct mutex group_lock; /* locks group_list */ 480f3e72b5SJason Gunthorpe struct ida group_ida; 490f3e72b5SJason Gunthorpe dev_t group_devt; 500f3e72b5SJason Gunthorpe } vfio; 510f3e72b5SJason Gunthorpe 520f3e72b5SJason Gunthorpe struct vfio_iommu_driver { 530f3e72b5SJason Gunthorpe const struct vfio_iommu_driver_ops *ops; 540f3e72b5SJason Gunthorpe struct list_head vfio_next; 550f3e72b5SJason Gunthorpe }; 560f3e72b5SJason Gunthorpe 570f3e72b5SJason Gunthorpe struct vfio_container { 580f3e72b5SJason Gunthorpe struct kref kref; 590f3e72b5SJason Gunthorpe struct list_head group_list; 600f3e72b5SJason Gunthorpe struct rw_semaphore group_lock; 610f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 620f3e72b5SJason Gunthorpe void *iommu_data; 630f3e72b5SJason Gunthorpe bool noiommu; 640f3e72b5SJason Gunthorpe }; 650f3e72b5SJason Gunthorpe 660f3e72b5SJason Gunthorpe struct vfio_group { 670f3e72b5SJason Gunthorpe struct device dev; 680f3e72b5SJason Gunthorpe struct cdev cdev; 690f3e72b5SJason Gunthorpe refcount_t users; 700f3e72b5SJason Gunthorpe unsigned int container_users; 710f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 720f3e72b5SJason Gunthorpe struct vfio_container *container; 730f3e72b5SJason Gunthorpe struct list_head device_list; 740f3e72b5SJason Gunthorpe struct mutex device_lock; 750f3e72b5SJason Gunthorpe struct list_head vfio_next; 760f3e72b5SJason Gunthorpe struct list_head container_next; 770f3e72b5SJason Gunthorpe enum vfio_group_type type; 780f3e72b5SJason Gunthorpe unsigned int dev_counter; 790f3e72b5SJason Gunthorpe struct rw_semaphore group_rwsem; 800f3e72b5SJason Gunthorpe struct kvm *kvm; 810f3e72b5SJason Gunthorpe struct file *opened_file; 820f3e72b5SJason Gunthorpe struct blocking_notifier_head notifier; 830f3e72b5SJason Gunthorpe }; 840f3e72b5SJason Gunthorpe 850f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 860f3e72b5SJason Gunthorpe static bool noiommu __read_mostly; 870f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode, 880f3e72b5SJason Gunthorpe noiommu, bool, S_IRUGO | S_IWUSR); 890f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 900f3e72b5SJason Gunthorpe #endif 910f3e72b5SJason Gunthorpe 920f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 930f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops; 940f3e72b5SJason Gunthorpe 950f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 960f3e72b5SJason Gunthorpe { 970f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 980f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 990f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 1000f3e72b5SJason Gunthorpe 1010f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 1020f3e72b5SJason Gunthorpe return -EINVAL; 1030f3e72b5SJason Gunthorpe 1040f3e72b5SJason Gunthorpe /* 1050f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 1060f3e72b5SJason Gunthorpe */ 1070f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1080f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 1090f3e72b5SJason Gunthorpe if (dev_set) 1100f3e72b5SJason Gunthorpe goto found_get_ref; 1110f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1120f3e72b5SJason Gunthorpe 1130f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 1140f3e72b5SJason Gunthorpe if (!new_dev_set) 1150f3e72b5SJason Gunthorpe return -ENOMEM; 1160f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 1170f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 1180f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 1190f3e72b5SJason Gunthorpe 1200f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1210f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 1220f3e72b5SJason Gunthorpe GFP_KERNEL); 1230f3e72b5SJason Gunthorpe if (!dev_set) { 1240f3e72b5SJason Gunthorpe dev_set = new_dev_set; 1250f3e72b5SJason Gunthorpe goto found_get_ref; 1260f3e72b5SJason Gunthorpe } 1270f3e72b5SJason Gunthorpe 1280f3e72b5SJason Gunthorpe kfree(new_dev_set); 1290f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 1300f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1310f3e72b5SJason Gunthorpe return xa_err(dev_set); 1320f3e72b5SJason Gunthorpe } 1330f3e72b5SJason Gunthorpe 1340f3e72b5SJason Gunthorpe found_get_ref: 1350f3e72b5SJason Gunthorpe dev_set->device_count++; 1360f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1370f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1380f3e72b5SJason Gunthorpe device->dev_set = dev_set; 1390f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 1400f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1410f3e72b5SJason Gunthorpe return 0; 1420f3e72b5SJason Gunthorpe } 1430f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 1440f3e72b5SJason Gunthorpe 1450f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 1460f3e72b5SJason Gunthorpe { 1470f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 1480f3e72b5SJason Gunthorpe 1490f3e72b5SJason Gunthorpe if (!dev_set) 1500f3e72b5SJason Gunthorpe return; 1510f3e72b5SJason Gunthorpe 1520f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1530f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 1540f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1550f3e72b5SJason Gunthorpe 1560f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1570f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 1580f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 1590f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 1600f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 1610f3e72b5SJason Gunthorpe kfree(dev_set); 1620f3e72b5SJason Gunthorpe } 1630f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1640f3e72b5SJason Gunthorpe } 1650f3e72b5SJason Gunthorpe 1660f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 1670f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg) 1680f3e72b5SJason Gunthorpe { 1690f3e72b5SJason Gunthorpe if (arg != VFIO_NOIOMMU_IOMMU) 1700f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 1710f3e72b5SJason Gunthorpe if (!capable(CAP_SYS_RAWIO)) 1720f3e72b5SJason Gunthorpe return ERR_PTR(-EPERM); 1730f3e72b5SJason Gunthorpe 1740f3e72b5SJason Gunthorpe return NULL; 1750f3e72b5SJason Gunthorpe } 1760f3e72b5SJason Gunthorpe 1770f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data) 1780f3e72b5SJason Gunthorpe { 1790f3e72b5SJason Gunthorpe } 1800f3e72b5SJason Gunthorpe 1810f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data, 1820f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 1830f3e72b5SJason Gunthorpe { 1840f3e72b5SJason Gunthorpe if (cmd == VFIO_CHECK_EXTENSION) 1850f3e72b5SJason Gunthorpe return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 1860f3e72b5SJason Gunthorpe 1870f3e72b5SJason Gunthorpe return -ENOTTY; 1880f3e72b5SJason Gunthorpe } 1890f3e72b5SJason Gunthorpe 1900f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data, 1910f3e72b5SJason Gunthorpe struct iommu_group *iommu_group, enum vfio_group_type type) 1920f3e72b5SJason Gunthorpe { 1930f3e72b5SJason Gunthorpe return 0; 1940f3e72b5SJason Gunthorpe } 1950f3e72b5SJason Gunthorpe 1960f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data, 1970f3e72b5SJason Gunthorpe struct iommu_group *iommu_group) 1980f3e72b5SJason Gunthorpe { 1990f3e72b5SJason Gunthorpe } 2000f3e72b5SJason Gunthorpe 2010f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 2020f3e72b5SJason Gunthorpe .name = "vfio-noiommu", 2030f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 2040f3e72b5SJason Gunthorpe .open = vfio_noiommu_open, 2050f3e72b5SJason Gunthorpe .release = vfio_noiommu_release, 2060f3e72b5SJason Gunthorpe .ioctl = vfio_noiommu_ioctl, 2070f3e72b5SJason Gunthorpe .attach_group = vfio_noiommu_attach_group, 2080f3e72b5SJason Gunthorpe .detach_group = vfio_noiommu_detach_group, 2090f3e72b5SJason Gunthorpe }; 2100f3e72b5SJason Gunthorpe 2110f3e72b5SJason Gunthorpe /* 2120f3e72b5SJason Gunthorpe * Only noiommu containers can use vfio-noiommu and noiommu containers can only 2130f3e72b5SJason Gunthorpe * use vfio-noiommu. 2140f3e72b5SJason Gunthorpe */ 2150f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 2160f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 2170f3e72b5SJason Gunthorpe { 2180f3e72b5SJason Gunthorpe return container->noiommu == (driver->ops == &vfio_noiommu_ops); 2190f3e72b5SJason Gunthorpe } 2200f3e72b5SJason Gunthorpe #else 2210f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 2220f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 2230f3e72b5SJason Gunthorpe { 2240f3e72b5SJason Gunthorpe return true; 2250f3e72b5SJason Gunthorpe } 2260f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */ 2270f3e72b5SJason Gunthorpe 2280f3e72b5SJason Gunthorpe /* 2290f3e72b5SJason Gunthorpe * IOMMU driver registration 2300f3e72b5SJason Gunthorpe */ 2310f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 2320f3e72b5SJason Gunthorpe { 2330f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, *tmp; 2340f3e72b5SJason Gunthorpe 2350f3e72b5SJason Gunthorpe if (WARN_ON(!ops->register_device != !ops->unregister_device)) 2360f3e72b5SJason Gunthorpe return -EINVAL; 2370f3e72b5SJason Gunthorpe 2380f3e72b5SJason Gunthorpe driver = kzalloc(sizeof(*driver), GFP_KERNEL); 2390f3e72b5SJason Gunthorpe if (!driver) 2400f3e72b5SJason Gunthorpe return -ENOMEM; 2410f3e72b5SJason Gunthorpe 2420f3e72b5SJason Gunthorpe driver->ops = ops; 2430f3e72b5SJason Gunthorpe 2440f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 2450f3e72b5SJason Gunthorpe 2460f3e72b5SJason Gunthorpe /* Check for duplicates */ 2470f3e72b5SJason Gunthorpe list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 2480f3e72b5SJason Gunthorpe if (tmp->ops == ops) { 2490f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2500f3e72b5SJason Gunthorpe kfree(driver); 2510f3e72b5SJason Gunthorpe return -EINVAL; 2520f3e72b5SJason Gunthorpe } 2530f3e72b5SJason Gunthorpe } 2540f3e72b5SJason Gunthorpe 2550f3e72b5SJason Gunthorpe list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 2560f3e72b5SJason Gunthorpe 2570f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2580f3e72b5SJason Gunthorpe 2590f3e72b5SJason Gunthorpe return 0; 2600f3e72b5SJason Gunthorpe } 2610f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 2620f3e72b5SJason Gunthorpe 2630f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 2640f3e72b5SJason Gunthorpe { 2650f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 2660f3e72b5SJason Gunthorpe 2670f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 2680f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 2690f3e72b5SJason Gunthorpe if (driver->ops == ops) { 2700f3e72b5SJason Gunthorpe list_del(&driver->vfio_next); 2710f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2720f3e72b5SJason Gunthorpe kfree(driver); 2730f3e72b5SJason Gunthorpe return; 2740f3e72b5SJason Gunthorpe } 2750f3e72b5SJason Gunthorpe } 2760f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2770f3e72b5SJason Gunthorpe } 2780f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 2790f3e72b5SJason Gunthorpe 2800f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group); 2810f3e72b5SJason Gunthorpe 2820f3e72b5SJason Gunthorpe /* 2830f3e72b5SJason Gunthorpe * Container objects - containers are created when /dev/vfio/vfio is 2840f3e72b5SJason Gunthorpe * opened, but their lifecycle extends until the last user is done, so 2850f3e72b5SJason Gunthorpe * it's freed via kref. Must support container/group/device being 2860f3e72b5SJason Gunthorpe * closed in any order. 2870f3e72b5SJason Gunthorpe */ 2880f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container) 2890f3e72b5SJason Gunthorpe { 2900f3e72b5SJason Gunthorpe kref_get(&container->kref); 2910f3e72b5SJason Gunthorpe } 2920f3e72b5SJason Gunthorpe 2930f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref) 2940f3e72b5SJason Gunthorpe { 2950f3e72b5SJason Gunthorpe struct vfio_container *container; 2960f3e72b5SJason Gunthorpe container = container_of(kref, struct vfio_container, kref); 2970f3e72b5SJason Gunthorpe 2980f3e72b5SJason Gunthorpe kfree(container); 2990f3e72b5SJason Gunthorpe } 3000f3e72b5SJason Gunthorpe 3010f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container) 3020f3e72b5SJason Gunthorpe { 3030f3e72b5SJason Gunthorpe kref_put(&container->kref, vfio_container_release); 3040f3e72b5SJason Gunthorpe } 3050f3e72b5SJason Gunthorpe 3060f3e72b5SJason Gunthorpe /* 3070f3e72b5SJason Gunthorpe * Group objects - create, release, get, put, search 3080f3e72b5SJason Gunthorpe */ 3090f3e72b5SJason Gunthorpe static struct vfio_group * 3100f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 3110f3e72b5SJason Gunthorpe { 3120f3e72b5SJason Gunthorpe struct vfio_group *group; 3130f3e72b5SJason Gunthorpe 3140f3e72b5SJason Gunthorpe list_for_each_entry(group, &vfio.group_list, vfio_next) { 3150f3e72b5SJason Gunthorpe if (group->iommu_group == iommu_group) { 3160f3e72b5SJason Gunthorpe vfio_group_get(group); 3170f3e72b5SJason Gunthorpe return group; 3180f3e72b5SJason Gunthorpe } 3190f3e72b5SJason Gunthorpe } 3200f3e72b5SJason Gunthorpe return NULL; 3210f3e72b5SJason Gunthorpe } 3220f3e72b5SJason Gunthorpe 3230f3e72b5SJason Gunthorpe static struct vfio_group * 3240f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group) 3250f3e72b5SJason Gunthorpe { 3260f3e72b5SJason Gunthorpe struct vfio_group *group; 3270f3e72b5SJason Gunthorpe 3280f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 3290f3e72b5SJason Gunthorpe group = __vfio_group_get_from_iommu(iommu_group); 3300f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 3310f3e72b5SJason Gunthorpe return group; 3320f3e72b5SJason Gunthorpe } 3330f3e72b5SJason Gunthorpe 3340f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev) 3350f3e72b5SJason Gunthorpe { 3360f3e72b5SJason Gunthorpe struct vfio_group *group = container_of(dev, struct vfio_group, dev); 3370f3e72b5SJason Gunthorpe 3380f3e72b5SJason Gunthorpe mutex_destroy(&group->device_lock); 3390f3e72b5SJason Gunthorpe iommu_group_put(group->iommu_group); 3400f3e72b5SJason Gunthorpe ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 3410f3e72b5SJason Gunthorpe kfree(group); 3420f3e72b5SJason Gunthorpe } 3430f3e72b5SJason Gunthorpe 3440f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 3450f3e72b5SJason Gunthorpe enum vfio_group_type type) 3460f3e72b5SJason Gunthorpe { 3470f3e72b5SJason Gunthorpe struct vfio_group *group; 3480f3e72b5SJason Gunthorpe int minor; 3490f3e72b5SJason Gunthorpe 3500f3e72b5SJason Gunthorpe group = kzalloc(sizeof(*group), GFP_KERNEL); 3510f3e72b5SJason Gunthorpe if (!group) 3520f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 3530f3e72b5SJason Gunthorpe 3540f3e72b5SJason Gunthorpe minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 3550f3e72b5SJason Gunthorpe if (minor < 0) { 3560f3e72b5SJason Gunthorpe kfree(group); 3570f3e72b5SJason Gunthorpe return ERR_PTR(minor); 3580f3e72b5SJason Gunthorpe } 3590f3e72b5SJason Gunthorpe 3600f3e72b5SJason Gunthorpe device_initialize(&group->dev); 3610f3e72b5SJason Gunthorpe group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 3620f3e72b5SJason Gunthorpe group->dev.class = vfio.class; 3630f3e72b5SJason Gunthorpe group->dev.release = vfio_group_release; 3640f3e72b5SJason Gunthorpe cdev_init(&group->cdev, &vfio_group_fops); 3650f3e72b5SJason Gunthorpe group->cdev.owner = THIS_MODULE; 3660f3e72b5SJason Gunthorpe 3670f3e72b5SJason Gunthorpe refcount_set(&group->users, 1); 3680f3e72b5SJason Gunthorpe init_rwsem(&group->group_rwsem); 3690f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&group->device_list); 3700f3e72b5SJason Gunthorpe mutex_init(&group->device_lock); 3710f3e72b5SJason Gunthorpe group->iommu_group = iommu_group; 3720f3e72b5SJason Gunthorpe /* put in vfio_group_release() */ 3730f3e72b5SJason Gunthorpe iommu_group_ref_get(iommu_group); 3740f3e72b5SJason Gunthorpe group->type = type; 3750f3e72b5SJason Gunthorpe BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 3760f3e72b5SJason Gunthorpe 3770f3e72b5SJason Gunthorpe return group; 3780f3e72b5SJason Gunthorpe } 3790f3e72b5SJason Gunthorpe 3800f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 3810f3e72b5SJason Gunthorpe enum vfio_group_type type) 3820f3e72b5SJason Gunthorpe { 3830f3e72b5SJason Gunthorpe struct vfio_group *group; 3840f3e72b5SJason Gunthorpe struct vfio_group *ret; 3850f3e72b5SJason Gunthorpe int err; 3860f3e72b5SJason Gunthorpe 3870f3e72b5SJason Gunthorpe group = vfio_group_alloc(iommu_group, type); 3880f3e72b5SJason Gunthorpe if (IS_ERR(group)) 3890f3e72b5SJason Gunthorpe return group; 3900f3e72b5SJason Gunthorpe 3910f3e72b5SJason Gunthorpe err = dev_set_name(&group->dev, "%s%d", 3920f3e72b5SJason Gunthorpe group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 3930f3e72b5SJason Gunthorpe iommu_group_id(iommu_group)); 3940f3e72b5SJason Gunthorpe if (err) { 3950f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 3960f3e72b5SJason Gunthorpe goto err_put; 3970f3e72b5SJason Gunthorpe } 3980f3e72b5SJason Gunthorpe 3990f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 4000f3e72b5SJason Gunthorpe 4010f3e72b5SJason Gunthorpe /* Did we race creating this group? */ 4020f3e72b5SJason Gunthorpe ret = __vfio_group_get_from_iommu(iommu_group); 4030f3e72b5SJason Gunthorpe if (ret) 4040f3e72b5SJason Gunthorpe goto err_unlock; 4050f3e72b5SJason Gunthorpe 4060f3e72b5SJason Gunthorpe err = cdev_device_add(&group->cdev, &group->dev); 4070f3e72b5SJason Gunthorpe if (err) { 4080f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 4090f3e72b5SJason Gunthorpe goto err_unlock; 4100f3e72b5SJason Gunthorpe } 4110f3e72b5SJason Gunthorpe 4120f3e72b5SJason Gunthorpe list_add(&group->vfio_next, &vfio.group_list); 4130f3e72b5SJason Gunthorpe 4140f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4150f3e72b5SJason Gunthorpe return group; 4160f3e72b5SJason Gunthorpe 4170f3e72b5SJason Gunthorpe err_unlock: 4180f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4190f3e72b5SJason Gunthorpe err_put: 4200f3e72b5SJason Gunthorpe put_device(&group->dev); 4210f3e72b5SJason Gunthorpe return ret; 4220f3e72b5SJason Gunthorpe } 4230f3e72b5SJason Gunthorpe 4240f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group) 4250f3e72b5SJason Gunthorpe { 4260f3e72b5SJason Gunthorpe if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) 4270f3e72b5SJason Gunthorpe return; 4280f3e72b5SJason Gunthorpe 4290f3e72b5SJason Gunthorpe /* 4300f3e72b5SJason Gunthorpe * These data structures all have paired operations that can only be 4310f3e72b5SJason Gunthorpe * undone when the caller holds a live reference on the group. Since all 4320f3e72b5SJason Gunthorpe * pairs must be undone these WARN_ON's indicate some caller did not 4330f3e72b5SJason Gunthorpe * properly hold the group reference. 4340f3e72b5SJason Gunthorpe */ 4350f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&group->device_list)); 4360f3e72b5SJason Gunthorpe WARN_ON(group->container || group->container_users); 4370f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 4380f3e72b5SJason Gunthorpe 4390f3e72b5SJason Gunthorpe list_del(&group->vfio_next); 4400f3e72b5SJason Gunthorpe cdev_device_del(&group->cdev, &group->dev); 4410f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4420f3e72b5SJason Gunthorpe 4430f3e72b5SJason Gunthorpe put_device(&group->dev); 4440f3e72b5SJason Gunthorpe } 4450f3e72b5SJason Gunthorpe 4460f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group) 4470f3e72b5SJason Gunthorpe { 4480f3e72b5SJason Gunthorpe refcount_inc(&group->users); 4490f3e72b5SJason Gunthorpe } 4500f3e72b5SJason Gunthorpe 4510f3e72b5SJason Gunthorpe /* 4520f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 4530f3e72b5SJason Gunthorpe */ 4540f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 4550f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device) 4560f3e72b5SJason Gunthorpe { 4570f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 4580f3e72b5SJason Gunthorpe complete(&device->comp); 4590f3e72b5SJason Gunthorpe } 4600f3e72b5SJason Gunthorpe 4610f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device) 4620f3e72b5SJason Gunthorpe { 4630f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 4640f3e72b5SJason Gunthorpe } 4650f3e72b5SJason Gunthorpe 4660f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 4670f3e72b5SJason Gunthorpe struct device *dev) 4680f3e72b5SJason Gunthorpe { 4690f3e72b5SJason Gunthorpe struct vfio_device *device; 4700f3e72b5SJason Gunthorpe 4710f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 4720f3e72b5SJason Gunthorpe list_for_each_entry(device, &group->device_list, group_next) { 4730f3e72b5SJason Gunthorpe if (device->dev == dev && vfio_device_try_get(device)) { 4740f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 4750f3e72b5SJason Gunthorpe return device; 4760f3e72b5SJason Gunthorpe } 4770f3e72b5SJason Gunthorpe } 4780f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 4790f3e72b5SJason Gunthorpe return NULL; 4800f3e72b5SJason Gunthorpe } 4810f3e72b5SJason Gunthorpe 4820f3e72b5SJason Gunthorpe /* 4830f3e72b5SJason Gunthorpe * VFIO driver API 4840f3e72b5SJason Gunthorpe */ 4850f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev, 4860f3e72b5SJason Gunthorpe const struct vfio_device_ops *ops) 4870f3e72b5SJason Gunthorpe { 4880f3e72b5SJason Gunthorpe init_completion(&device->comp); 4890f3e72b5SJason Gunthorpe device->dev = dev; 4900f3e72b5SJason Gunthorpe device->ops = ops; 4910f3e72b5SJason Gunthorpe } 4920f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev); 4930f3e72b5SJason Gunthorpe 4940f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device) 4950f3e72b5SJason Gunthorpe { 4960f3e72b5SJason Gunthorpe vfio_release_device_set(device); 4970f3e72b5SJason Gunthorpe } 4980f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); 4990f3e72b5SJason Gunthorpe 5000f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 5010f3e72b5SJason Gunthorpe enum vfio_group_type type) 5020f3e72b5SJason Gunthorpe { 5030f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 5040f3e72b5SJason Gunthorpe struct vfio_group *group; 5050f3e72b5SJason Gunthorpe int ret; 5060f3e72b5SJason Gunthorpe 5070f3e72b5SJason Gunthorpe iommu_group = iommu_group_alloc(); 5080f3e72b5SJason Gunthorpe if (IS_ERR(iommu_group)) 5090f3e72b5SJason Gunthorpe return ERR_CAST(iommu_group); 5100f3e72b5SJason Gunthorpe 5110f3e72b5SJason Gunthorpe ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 5120f3e72b5SJason Gunthorpe if (ret) 5130f3e72b5SJason Gunthorpe goto out_put_group; 5140f3e72b5SJason Gunthorpe ret = iommu_group_add_device(iommu_group, dev); 5150f3e72b5SJason Gunthorpe if (ret) 5160f3e72b5SJason Gunthorpe goto out_put_group; 5170f3e72b5SJason Gunthorpe 5180f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, type); 5190f3e72b5SJason Gunthorpe if (IS_ERR(group)) { 5200f3e72b5SJason Gunthorpe ret = PTR_ERR(group); 5210f3e72b5SJason Gunthorpe goto out_remove_device; 5220f3e72b5SJason Gunthorpe } 5230f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5240f3e72b5SJason Gunthorpe return group; 5250f3e72b5SJason Gunthorpe 5260f3e72b5SJason Gunthorpe out_remove_device: 5270f3e72b5SJason Gunthorpe iommu_group_remove_device(dev); 5280f3e72b5SJason Gunthorpe out_put_group: 5290f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5300f3e72b5SJason Gunthorpe return ERR_PTR(ret); 5310f3e72b5SJason Gunthorpe } 5320f3e72b5SJason Gunthorpe 5330f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 5340f3e72b5SJason Gunthorpe { 5350f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 5360f3e72b5SJason Gunthorpe struct vfio_group *group; 5370f3e72b5SJason Gunthorpe 5380f3e72b5SJason Gunthorpe iommu_group = iommu_group_get(dev); 5390f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 5400f3e72b5SJason Gunthorpe if (!iommu_group && noiommu) { 5410f3e72b5SJason Gunthorpe /* 5420f3e72b5SJason Gunthorpe * With noiommu enabled, create an IOMMU group for devices that 5430f3e72b5SJason Gunthorpe * don't already have one, implying no IOMMU hardware/driver 5440f3e72b5SJason Gunthorpe * exists. Taint the kernel because we're about to give a DMA 5450f3e72b5SJason Gunthorpe * capable device to a user without IOMMU protection. 5460f3e72b5SJason Gunthorpe */ 5470f3e72b5SJason Gunthorpe group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 5480f3e72b5SJason Gunthorpe if (!IS_ERR(group)) { 5490f3e72b5SJason Gunthorpe add_taint(TAINT_USER, LOCKDEP_STILL_OK); 5500f3e72b5SJason Gunthorpe dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 5510f3e72b5SJason Gunthorpe } 5520f3e72b5SJason Gunthorpe return group; 5530f3e72b5SJason Gunthorpe } 5540f3e72b5SJason Gunthorpe #endif 5550f3e72b5SJason Gunthorpe if (!iommu_group) 5560f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 5570f3e72b5SJason Gunthorpe 5580f3e72b5SJason Gunthorpe /* 5590f3e72b5SJason Gunthorpe * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 5600f3e72b5SJason Gunthorpe * restore cache coherency. It has to be checked here because it is only 5610f3e72b5SJason Gunthorpe * valid for cases where we are using iommu groups. 5620f3e72b5SJason Gunthorpe */ 5630f3e72b5SJason Gunthorpe if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 5640f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5650f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 5660f3e72b5SJason Gunthorpe } 5670f3e72b5SJason Gunthorpe 5680f3e72b5SJason Gunthorpe group = vfio_group_get_from_iommu(iommu_group); 5690f3e72b5SJason Gunthorpe if (!group) 5700f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, VFIO_IOMMU); 5710f3e72b5SJason Gunthorpe 5720f3e72b5SJason Gunthorpe /* The vfio_group holds a reference to the iommu_group */ 5730f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5740f3e72b5SJason Gunthorpe return group; 5750f3e72b5SJason Gunthorpe } 5760f3e72b5SJason Gunthorpe 5770f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device, 5780f3e72b5SJason Gunthorpe struct vfio_group *group) 5790f3e72b5SJason Gunthorpe { 5800f3e72b5SJason Gunthorpe struct vfio_device *existing_device; 5810f3e72b5SJason Gunthorpe 5820f3e72b5SJason Gunthorpe if (IS_ERR(group)) 5830f3e72b5SJason Gunthorpe return PTR_ERR(group); 5840f3e72b5SJason Gunthorpe 5850f3e72b5SJason Gunthorpe /* 5860f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 5870f3e72b5SJason Gunthorpe * singleton set just for itself. 5880f3e72b5SJason Gunthorpe */ 5890f3e72b5SJason Gunthorpe if (!device->dev_set) 5900f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 5910f3e72b5SJason Gunthorpe 5920f3e72b5SJason Gunthorpe existing_device = vfio_group_get_device(group, device->dev); 5930f3e72b5SJason Gunthorpe if (existing_device) { 5940f3e72b5SJason Gunthorpe dev_WARN(device->dev, "Device already exists on group %d\n", 5950f3e72b5SJason Gunthorpe iommu_group_id(group->iommu_group)); 5960f3e72b5SJason Gunthorpe vfio_device_put(existing_device); 5970f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || 5980f3e72b5SJason Gunthorpe group->type == VFIO_EMULATED_IOMMU) 5990f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 6000f3e72b5SJason Gunthorpe vfio_group_put(group); 6010f3e72b5SJason Gunthorpe return -EBUSY; 6020f3e72b5SJason Gunthorpe } 6030f3e72b5SJason Gunthorpe 6040f3e72b5SJason Gunthorpe /* Our reference on group is moved to the device */ 6050f3e72b5SJason Gunthorpe device->group = group; 6060f3e72b5SJason Gunthorpe 6070f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 6080f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 6090f3e72b5SJason Gunthorpe 6100f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6110f3e72b5SJason Gunthorpe list_add(&device->group_next, &group->device_list); 6120f3e72b5SJason Gunthorpe group->dev_counter++; 6130f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6140f3e72b5SJason Gunthorpe 6150f3e72b5SJason Gunthorpe return 0; 6160f3e72b5SJason Gunthorpe } 6170f3e72b5SJason Gunthorpe 6180f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 6190f3e72b5SJason Gunthorpe { 6200f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 6210f3e72b5SJason Gunthorpe vfio_group_find_or_alloc(device->dev)); 6220f3e72b5SJason Gunthorpe } 6230f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 6240f3e72b5SJason Gunthorpe 6250f3e72b5SJason Gunthorpe /* 6260f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 6270f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 6280f3e72b5SJason Gunthorpe */ 6290f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 6300f3e72b5SJason Gunthorpe { 6310f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 6320f3e72b5SJason Gunthorpe vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 6330f3e72b5SJason Gunthorpe } 6340f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 6350f3e72b5SJason Gunthorpe 6360f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 6370f3e72b5SJason Gunthorpe char *buf) 6380f3e72b5SJason Gunthorpe { 6390f3e72b5SJason Gunthorpe struct vfio_device *it, *device = ERR_PTR(-ENODEV); 6400f3e72b5SJason Gunthorpe 6410f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6420f3e72b5SJason Gunthorpe list_for_each_entry(it, &group->device_list, group_next) { 6430f3e72b5SJason Gunthorpe int ret; 6440f3e72b5SJason Gunthorpe 6450f3e72b5SJason Gunthorpe if (it->ops->match) { 6460f3e72b5SJason Gunthorpe ret = it->ops->match(it, buf); 6470f3e72b5SJason Gunthorpe if (ret < 0) { 6480f3e72b5SJason Gunthorpe device = ERR_PTR(ret); 6490f3e72b5SJason Gunthorpe break; 6500f3e72b5SJason Gunthorpe } 6510f3e72b5SJason Gunthorpe } else { 6520f3e72b5SJason Gunthorpe ret = !strcmp(dev_name(it->dev), buf); 6530f3e72b5SJason Gunthorpe } 6540f3e72b5SJason Gunthorpe 6550f3e72b5SJason Gunthorpe if (ret && vfio_device_try_get(it)) { 6560f3e72b5SJason Gunthorpe device = it; 6570f3e72b5SJason Gunthorpe break; 6580f3e72b5SJason Gunthorpe } 6590f3e72b5SJason Gunthorpe } 6600f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6610f3e72b5SJason Gunthorpe 6620f3e72b5SJason Gunthorpe return device; 6630f3e72b5SJason Gunthorpe } 6640f3e72b5SJason Gunthorpe 6650f3e72b5SJason Gunthorpe /* 6660f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 6670f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 6680f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 6690f3e72b5SJason Gunthorpe { 6700f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 6710f3e72b5SJason Gunthorpe unsigned int i = 0; 6720f3e72b5SJason Gunthorpe bool interrupted = false; 6730f3e72b5SJason Gunthorpe long rc; 6740f3e72b5SJason Gunthorpe 6750f3e72b5SJason Gunthorpe vfio_device_put(device); 6760f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 6770f3e72b5SJason Gunthorpe while (rc <= 0) { 6780f3e72b5SJason Gunthorpe if (device->ops->request) 6790f3e72b5SJason Gunthorpe device->ops->request(device, i++); 6800f3e72b5SJason Gunthorpe 6810f3e72b5SJason Gunthorpe if (interrupted) { 6820f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 6830f3e72b5SJason Gunthorpe HZ * 10); 6840f3e72b5SJason Gunthorpe } else { 6850f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 6860f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 6870f3e72b5SJason Gunthorpe if (rc < 0) { 6880f3e72b5SJason Gunthorpe interrupted = true; 6890f3e72b5SJason Gunthorpe dev_warn(device->dev, 6900f3e72b5SJason Gunthorpe "Device is currently in use, task" 6910f3e72b5SJason Gunthorpe " \"%s\" (%d) " 6920f3e72b5SJason Gunthorpe "blocked until device is released", 6930f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 6940f3e72b5SJason Gunthorpe } 6950f3e72b5SJason Gunthorpe } 6960f3e72b5SJason Gunthorpe } 6970f3e72b5SJason Gunthorpe 6980f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6990f3e72b5SJason Gunthorpe list_del(&device->group_next); 7000f3e72b5SJason Gunthorpe group->dev_counter--; 7010f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 7020f3e72b5SJason Gunthorpe 7030f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 7040f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 7050f3e72b5SJason Gunthorpe 7060f3e72b5SJason Gunthorpe /* Matches the get in vfio_register_group_dev() */ 7070f3e72b5SJason Gunthorpe vfio_group_put(group); 7080f3e72b5SJason Gunthorpe } 7090f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 7100f3e72b5SJason Gunthorpe 7110f3e72b5SJason Gunthorpe /* 7120f3e72b5SJason Gunthorpe * VFIO base fd, /dev/vfio/vfio 7130f3e72b5SJason Gunthorpe */ 7140f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container, 7150f3e72b5SJason Gunthorpe unsigned long arg) 7160f3e72b5SJason Gunthorpe { 7170f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 7180f3e72b5SJason Gunthorpe long ret = 0; 7190f3e72b5SJason Gunthorpe 7200f3e72b5SJason Gunthorpe down_read(&container->group_lock); 7210f3e72b5SJason Gunthorpe 7220f3e72b5SJason Gunthorpe driver = container->iommu_driver; 7230f3e72b5SJason Gunthorpe 7240f3e72b5SJason Gunthorpe switch (arg) { 7250f3e72b5SJason Gunthorpe /* No base extensions yet */ 7260f3e72b5SJason Gunthorpe default: 7270f3e72b5SJason Gunthorpe /* 7280f3e72b5SJason Gunthorpe * If no driver is set, poll all registered drivers for 7290f3e72b5SJason Gunthorpe * extensions and return the first positive result. If 7300f3e72b5SJason Gunthorpe * a driver is already set, further queries will be passed 7310f3e72b5SJason Gunthorpe * only to that driver. 7320f3e72b5SJason Gunthorpe */ 7330f3e72b5SJason Gunthorpe if (!driver) { 7340f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 7350f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, 7360f3e72b5SJason Gunthorpe vfio_next) { 7370f3e72b5SJason Gunthorpe 7380f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 7390f3e72b5SJason Gunthorpe !vfio_iommu_driver_allowed(container, 7400f3e72b5SJason Gunthorpe driver)) 7410f3e72b5SJason Gunthorpe continue; 7420f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 7430f3e72b5SJason Gunthorpe continue; 7440f3e72b5SJason Gunthorpe 7450f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(NULL, 7460f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, 7470f3e72b5SJason Gunthorpe arg); 7480f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 7490f3e72b5SJason Gunthorpe if (ret > 0) 7500f3e72b5SJason Gunthorpe break; 7510f3e72b5SJason Gunthorpe } 7520f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 7530f3e72b5SJason Gunthorpe } else 7540f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(container->iommu_data, 7550f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, arg); 7560f3e72b5SJason Gunthorpe } 7570f3e72b5SJason Gunthorpe 7580f3e72b5SJason Gunthorpe up_read(&container->group_lock); 7590f3e72b5SJason Gunthorpe 7600f3e72b5SJason Gunthorpe return ret; 7610f3e72b5SJason Gunthorpe } 7620f3e72b5SJason Gunthorpe 7630f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */ 7640f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container, 7650f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, 7660f3e72b5SJason Gunthorpe void *data) 7670f3e72b5SJason Gunthorpe { 7680f3e72b5SJason Gunthorpe struct vfio_group *group; 7690f3e72b5SJason Gunthorpe int ret = -ENODEV; 7700f3e72b5SJason Gunthorpe 7710f3e72b5SJason Gunthorpe list_for_each_entry(group, &container->group_list, container_next) { 7720f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(data, group->iommu_group, 7730f3e72b5SJason Gunthorpe group->type); 7740f3e72b5SJason Gunthorpe if (ret) 7750f3e72b5SJason Gunthorpe goto unwind; 7760f3e72b5SJason Gunthorpe } 7770f3e72b5SJason Gunthorpe 7780f3e72b5SJason Gunthorpe return ret; 7790f3e72b5SJason Gunthorpe 7800f3e72b5SJason Gunthorpe unwind: 7810f3e72b5SJason Gunthorpe list_for_each_entry_continue_reverse(group, &container->group_list, 7820f3e72b5SJason Gunthorpe container_next) { 7830f3e72b5SJason Gunthorpe driver->ops->detach_group(data, group->iommu_group); 7840f3e72b5SJason Gunthorpe } 7850f3e72b5SJason Gunthorpe 7860f3e72b5SJason Gunthorpe return ret; 7870f3e72b5SJason Gunthorpe } 7880f3e72b5SJason Gunthorpe 7890f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container, 7900f3e72b5SJason Gunthorpe unsigned long arg) 7910f3e72b5SJason Gunthorpe { 7920f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 7930f3e72b5SJason Gunthorpe long ret = -ENODEV; 7940f3e72b5SJason Gunthorpe 7950f3e72b5SJason Gunthorpe down_write(&container->group_lock); 7960f3e72b5SJason Gunthorpe 7970f3e72b5SJason Gunthorpe /* 7980f3e72b5SJason Gunthorpe * The container is designed to be an unprivileged interface while 7990f3e72b5SJason Gunthorpe * the group can be assigned to specific users. Therefore, only by 8000f3e72b5SJason Gunthorpe * adding a group to a container does the user get the privilege of 8010f3e72b5SJason Gunthorpe * enabling the iommu, which may allocate finite resources. There 8020f3e72b5SJason Gunthorpe * is no unset_iommu, but by removing all the groups from a container, 8030f3e72b5SJason Gunthorpe * the container is deprivileged and returns to an unset state. 8040f3e72b5SJason Gunthorpe */ 8050f3e72b5SJason Gunthorpe if (list_empty(&container->group_list) || container->iommu_driver) { 8060f3e72b5SJason Gunthorpe up_write(&container->group_lock); 8070f3e72b5SJason Gunthorpe return -EINVAL; 8080f3e72b5SJason Gunthorpe } 8090f3e72b5SJason Gunthorpe 8100f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 8110f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 8120f3e72b5SJason Gunthorpe void *data; 8130f3e72b5SJason Gunthorpe 8140f3e72b5SJason Gunthorpe if (!vfio_iommu_driver_allowed(container, driver)) 8150f3e72b5SJason Gunthorpe continue; 8160f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 8170f3e72b5SJason Gunthorpe continue; 8180f3e72b5SJason Gunthorpe 8190f3e72b5SJason Gunthorpe /* 8200f3e72b5SJason Gunthorpe * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 8210f3e72b5SJason Gunthorpe * so test which iommu driver reported support for this 8220f3e72b5SJason Gunthorpe * extension and call open on them. We also pass them the 8230f3e72b5SJason Gunthorpe * magic, allowing a single driver to support multiple 8240f3e72b5SJason Gunthorpe * interfaces if they'd like. 8250f3e72b5SJason Gunthorpe */ 8260f3e72b5SJason Gunthorpe if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 8270f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8280f3e72b5SJason Gunthorpe continue; 8290f3e72b5SJason Gunthorpe } 8300f3e72b5SJason Gunthorpe 8310f3e72b5SJason Gunthorpe data = driver->ops->open(arg); 8320f3e72b5SJason Gunthorpe if (IS_ERR(data)) { 8330f3e72b5SJason Gunthorpe ret = PTR_ERR(data); 8340f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8350f3e72b5SJason Gunthorpe continue; 8360f3e72b5SJason Gunthorpe } 8370f3e72b5SJason Gunthorpe 8380f3e72b5SJason Gunthorpe ret = __vfio_container_attach_groups(container, driver, data); 8390f3e72b5SJason Gunthorpe if (ret) { 8400f3e72b5SJason Gunthorpe driver->ops->release(data); 8410f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8420f3e72b5SJason Gunthorpe continue; 8430f3e72b5SJason Gunthorpe } 8440f3e72b5SJason Gunthorpe 8450f3e72b5SJason Gunthorpe container->iommu_driver = driver; 8460f3e72b5SJason Gunthorpe container->iommu_data = data; 8470f3e72b5SJason Gunthorpe break; 8480f3e72b5SJason Gunthorpe } 8490f3e72b5SJason Gunthorpe 8500f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 8510f3e72b5SJason Gunthorpe up_write(&container->group_lock); 8520f3e72b5SJason Gunthorpe 8530f3e72b5SJason Gunthorpe return ret; 8540f3e72b5SJason Gunthorpe } 8550f3e72b5SJason Gunthorpe 8560f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep, 8570f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 8580f3e72b5SJason Gunthorpe { 8590f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 8600f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 8610f3e72b5SJason Gunthorpe void *data; 8620f3e72b5SJason Gunthorpe long ret = -EINVAL; 8630f3e72b5SJason Gunthorpe 8640f3e72b5SJason Gunthorpe if (!container) 8650f3e72b5SJason Gunthorpe return ret; 8660f3e72b5SJason Gunthorpe 8670f3e72b5SJason Gunthorpe switch (cmd) { 8680f3e72b5SJason Gunthorpe case VFIO_GET_API_VERSION: 8690f3e72b5SJason Gunthorpe ret = VFIO_API_VERSION; 8700f3e72b5SJason Gunthorpe break; 8710f3e72b5SJason Gunthorpe case VFIO_CHECK_EXTENSION: 8720f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(container, arg); 8730f3e72b5SJason Gunthorpe break; 8740f3e72b5SJason Gunthorpe case VFIO_SET_IOMMU: 8750f3e72b5SJason Gunthorpe ret = vfio_ioctl_set_iommu(container, arg); 8760f3e72b5SJason Gunthorpe break; 8770f3e72b5SJason Gunthorpe default: 8780f3e72b5SJason Gunthorpe driver = container->iommu_driver; 8790f3e72b5SJason Gunthorpe data = container->iommu_data; 8800f3e72b5SJason Gunthorpe 8810f3e72b5SJason Gunthorpe if (driver) /* passthrough all unrecognized ioctls */ 8820f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(data, cmd, arg); 8830f3e72b5SJason Gunthorpe } 8840f3e72b5SJason Gunthorpe 8850f3e72b5SJason Gunthorpe return ret; 8860f3e72b5SJason Gunthorpe } 8870f3e72b5SJason Gunthorpe 8880f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep) 8890f3e72b5SJason Gunthorpe { 8900f3e72b5SJason Gunthorpe struct vfio_container *container; 8910f3e72b5SJason Gunthorpe 8920f3e72b5SJason Gunthorpe container = kzalloc(sizeof(*container), GFP_KERNEL); 8930f3e72b5SJason Gunthorpe if (!container) 8940f3e72b5SJason Gunthorpe return -ENOMEM; 8950f3e72b5SJason Gunthorpe 8960f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&container->group_list); 8970f3e72b5SJason Gunthorpe init_rwsem(&container->group_lock); 8980f3e72b5SJason Gunthorpe kref_init(&container->kref); 8990f3e72b5SJason Gunthorpe 9000f3e72b5SJason Gunthorpe filep->private_data = container; 9010f3e72b5SJason Gunthorpe 9020f3e72b5SJason Gunthorpe return 0; 9030f3e72b5SJason Gunthorpe } 9040f3e72b5SJason Gunthorpe 9050f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep) 9060f3e72b5SJason Gunthorpe { 9070f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 9080f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver = container->iommu_driver; 9090f3e72b5SJason Gunthorpe 9100f3e72b5SJason Gunthorpe if (driver && driver->ops->notify) 9110f3e72b5SJason Gunthorpe driver->ops->notify(container->iommu_data, 9120f3e72b5SJason Gunthorpe VFIO_IOMMU_CONTAINER_CLOSE); 9130f3e72b5SJason Gunthorpe 9140f3e72b5SJason Gunthorpe filep->private_data = NULL; 9150f3e72b5SJason Gunthorpe 9160f3e72b5SJason Gunthorpe vfio_container_put(container); 9170f3e72b5SJason Gunthorpe 9180f3e72b5SJason Gunthorpe return 0; 9190f3e72b5SJason Gunthorpe } 9200f3e72b5SJason Gunthorpe 9210f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = { 9220f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 9230f3e72b5SJason Gunthorpe .open = vfio_fops_open, 9240f3e72b5SJason Gunthorpe .release = vfio_fops_release, 9250f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_fops_unl_ioctl, 9260f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 9270f3e72b5SJason Gunthorpe }; 9280f3e72b5SJason Gunthorpe 9290f3e72b5SJason Gunthorpe /* 9300f3e72b5SJason Gunthorpe * VFIO Group fd, /dev/vfio/$GROUP 9310f3e72b5SJason Gunthorpe */ 9320f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group) 9330f3e72b5SJason Gunthorpe { 9340f3e72b5SJason Gunthorpe struct vfio_container *container = group->container; 9350f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 9360f3e72b5SJason Gunthorpe 9370f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 9380f3e72b5SJason Gunthorpe 9390f3e72b5SJason Gunthorpe down_write(&container->group_lock); 9400f3e72b5SJason Gunthorpe 9410f3e72b5SJason Gunthorpe driver = container->iommu_driver; 9420f3e72b5SJason Gunthorpe if (driver) 9430f3e72b5SJason Gunthorpe driver->ops->detach_group(container->iommu_data, 9440f3e72b5SJason Gunthorpe group->iommu_group); 9450f3e72b5SJason Gunthorpe 9460f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 9470f3e72b5SJason Gunthorpe iommu_group_release_dma_owner(group->iommu_group); 9480f3e72b5SJason Gunthorpe 9490f3e72b5SJason Gunthorpe group->container = NULL; 9500f3e72b5SJason Gunthorpe group->container_users = 0; 9510f3e72b5SJason Gunthorpe list_del(&group->container_next); 9520f3e72b5SJason Gunthorpe 9530f3e72b5SJason Gunthorpe /* Detaching the last group deprivileges a container, remove iommu */ 9540f3e72b5SJason Gunthorpe if (driver && list_empty(&container->group_list)) { 9550f3e72b5SJason Gunthorpe driver->ops->release(container->iommu_data); 9560f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 9570f3e72b5SJason Gunthorpe container->iommu_driver = NULL; 9580f3e72b5SJason Gunthorpe container->iommu_data = NULL; 9590f3e72b5SJason Gunthorpe } 9600f3e72b5SJason Gunthorpe 9610f3e72b5SJason Gunthorpe up_write(&container->group_lock); 9620f3e72b5SJason Gunthorpe 9630f3e72b5SJason Gunthorpe vfio_container_put(container); 9640f3e72b5SJason Gunthorpe } 9650f3e72b5SJason Gunthorpe 9660f3e72b5SJason Gunthorpe /* 9670f3e72b5SJason Gunthorpe * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 9680f3e72b5SJason Gunthorpe * if there was no container to unset. Since the ioctl is called on 9690f3e72b5SJason Gunthorpe * the group, we know that still exists, therefore the only valid 9700f3e72b5SJason Gunthorpe * transition here is 1->0. 9710f3e72b5SJason Gunthorpe */ 972b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group) 9730f3e72b5SJason Gunthorpe { 974b3b43590SJason Gunthorpe int ret = 0; 9750f3e72b5SJason Gunthorpe 976b3b43590SJason Gunthorpe down_write(&group->group_rwsem); 977b3b43590SJason Gunthorpe if (!group->container) { 978b3b43590SJason Gunthorpe ret = -EINVAL; 979b3b43590SJason Gunthorpe goto out_unlock; 980b3b43590SJason Gunthorpe } 981b3b43590SJason Gunthorpe if (group->container_users != 1) { 982b3b43590SJason Gunthorpe ret = -EBUSY; 983b3b43590SJason Gunthorpe goto out_unlock; 984b3b43590SJason Gunthorpe } 9850f3e72b5SJason Gunthorpe __vfio_group_unset_container(group); 986b3b43590SJason Gunthorpe 987b3b43590SJason Gunthorpe out_unlock: 988b3b43590SJason Gunthorpe up_write(&group->group_rwsem); 989b3b43590SJason Gunthorpe return ret; 9900f3e72b5SJason Gunthorpe } 9910f3e72b5SJason Gunthorpe 99267671f15SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group, 99367671f15SJason Gunthorpe int __user *arg) 9940f3e72b5SJason Gunthorpe { 9950f3e72b5SJason Gunthorpe struct fd f; 9960f3e72b5SJason Gunthorpe struct vfio_container *container; 9970f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 99867671f15SJason Gunthorpe int container_fd; 9990f3e72b5SJason Gunthorpe int ret = 0; 10000f3e72b5SJason Gunthorpe 10010f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 10020f3e72b5SJason Gunthorpe return -EPERM; 10030f3e72b5SJason Gunthorpe 100467671f15SJason Gunthorpe if (get_user(container_fd, arg)) 100567671f15SJason Gunthorpe return -EFAULT; 100667671f15SJason Gunthorpe if (container_fd < 0) 100767671f15SJason Gunthorpe return -EINVAL; 10080f3e72b5SJason Gunthorpe f = fdget(container_fd); 10090f3e72b5SJason Gunthorpe if (!f.file) 10100f3e72b5SJason Gunthorpe return -EBADF; 10110f3e72b5SJason Gunthorpe 10120f3e72b5SJason Gunthorpe /* Sanity check, is this really our fd? */ 10130f3e72b5SJason Gunthorpe if (f.file->f_op != &vfio_fops) { 101467671f15SJason Gunthorpe ret = -EINVAL; 101567671f15SJason Gunthorpe goto out_fdput; 10160f3e72b5SJason Gunthorpe } 10170f3e72b5SJason Gunthorpe container = f.file->private_data; 10180f3e72b5SJason Gunthorpe WARN_ON(!container); /* fget ensures we don't race vfio_release */ 10190f3e72b5SJason Gunthorpe 102067671f15SJason Gunthorpe down_write(&group->group_rwsem); 102167671f15SJason Gunthorpe 102267671f15SJason Gunthorpe if (group->container || WARN_ON(group->container_users)) { 102367671f15SJason Gunthorpe ret = -EINVAL; 102467671f15SJason Gunthorpe goto out_unlock_group; 102567671f15SJason Gunthorpe } 102667671f15SJason Gunthorpe 10270f3e72b5SJason Gunthorpe down_write(&container->group_lock); 10280f3e72b5SJason Gunthorpe 10290f3e72b5SJason Gunthorpe /* Real groups and fake groups cannot mix */ 10300f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 10310f3e72b5SJason Gunthorpe container->noiommu != (group->type == VFIO_NO_IOMMU)) { 10320f3e72b5SJason Gunthorpe ret = -EPERM; 103367671f15SJason Gunthorpe goto out_unlock_container; 10340f3e72b5SJason Gunthorpe } 10350f3e72b5SJason Gunthorpe 10360f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) { 10370f3e72b5SJason Gunthorpe ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); 10380f3e72b5SJason Gunthorpe if (ret) 103967671f15SJason Gunthorpe goto out_unlock_container; 10400f3e72b5SJason Gunthorpe } 10410f3e72b5SJason Gunthorpe 10420f3e72b5SJason Gunthorpe driver = container->iommu_driver; 10430f3e72b5SJason Gunthorpe if (driver) { 10440f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(container->iommu_data, 10450f3e72b5SJason Gunthorpe group->iommu_group, 10460f3e72b5SJason Gunthorpe group->type); 10470f3e72b5SJason Gunthorpe if (ret) { 10480f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 10490f3e72b5SJason Gunthorpe iommu_group_release_dma_owner( 10500f3e72b5SJason Gunthorpe group->iommu_group); 105167671f15SJason Gunthorpe goto out_unlock_container; 10520f3e72b5SJason Gunthorpe } 10530f3e72b5SJason Gunthorpe } 10540f3e72b5SJason Gunthorpe 10550f3e72b5SJason Gunthorpe group->container = container; 10560f3e72b5SJason Gunthorpe group->container_users = 1; 10570f3e72b5SJason Gunthorpe container->noiommu = (group->type == VFIO_NO_IOMMU); 10580f3e72b5SJason Gunthorpe list_add(&group->container_next, &container->group_list); 10590f3e72b5SJason Gunthorpe 10600f3e72b5SJason Gunthorpe /* Get a reference on the container and mark a user within the group */ 10610f3e72b5SJason Gunthorpe vfio_container_get(container); 10620f3e72b5SJason Gunthorpe 106367671f15SJason Gunthorpe out_unlock_container: 10640f3e72b5SJason Gunthorpe up_write(&container->group_lock); 106567671f15SJason Gunthorpe out_unlock_group: 106667671f15SJason Gunthorpe up_write(&group->group_rwsem); 106767671f15SJason Gunthorpe out_fdput: 10680f3e72b5SJason Gunthorpe fdput(f); 10690f3e72b5SJason Gunthorpe return ret; 10700f3e72b5SJason Gunthorpe } 10710f3e72b5SJason Gunthorpe 10720f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops; 10730f3e72b5SJason Gunthorpe 10740f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 10750f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device) 10760f3e72b5SJason Gunthorpe { 10770f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 10780f3e72b5SJason Gunthorpe } 10790f3e72b5SJason Gunthorpe 10800f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device) 10810f3e72b5SJason Gunthorpe { 10820f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 10830f3e72b5SJason Gunthorpe 10840f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 10850f3e72b5SJason Gunthorpe 10860f3e72b5SJason Gunthorpe if (!group->container || !group->container->iommu_driver || 10870f3e72b5SJason Gunthorpe WARN_ON(!group->container_users)) 10880f3e72b5SJason Gunthorpe return -EINVAL; 10890f3e72b5SJason Gunthorpe 10900f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 10910f3e72b5SJason Gunthorpe return -EPERM; 10920f3e72b5SJason Gunthorpe 10930f3e72b5SJason Gunthorpe get_file(group->opened_file); 10940f3e72b5SJason Gunthorpe group->container_users++; 10950f3e72b5SJason Gunthorpe return 0; 10960f3e72b5SJason Gunthorpe } 10970f3e72b5SJason Gunthorpe 10980f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device) 10990f3e72b5SJason Gunthorpe { 11000f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 11010f3e72b5SJason Gunthorpe WARN_ON(device->group->container_users <= 1); 11020f3e72b5SJason Gunthorpe device->group->container_users--; 11030f3e72b5SJason Gunthorpe fput(device->group->opened_file); 11040f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 11050f3e72b5SJason Gunthorpe } 11060f3e72b5SJason Gunthorpe 11070f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device) 11080f3e72b5SJason Gunthorpe { 11090f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 11100f3e72b5SJason Gunthorpe struct file *filep; 11110f3e72b5SJason Gunthorpe int ret; 11120f3e72b5SJason Gunthorpe 11130f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 11140f3e72b5SJason Gunthorpe ret = vfio_device_assign_container(device); 11150f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 11160f3e72b5SJason Gunthorpe if (ret) 11170f3e72b5SJason Gunthorpe return ERR_PTR(ret); 11180f3e72b5SJason Gunthorpe 11190f3e72b5SJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) { 11200f3e72b5SJason Gunthorpe ret = -ENODEV; 11210f3e72b5SJason Gunthorpe goto err_unassign_container; 11220f3e72b5SJason Gunthorpe } 11230f3e72b5SJason Gunthorpe 11240f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 11250f3e72b5SJason Gunthorpe device->open_count++; 11260f3e72b5SJason Gunthorpe if (device->open_count == 1) { 11270f3e72b5SJason Gunthorpe /* 11280f3e72b5SJason Gunthorpe * Here we pass the KVM pointer with the group under the read 11290f3e72b5SJason Gunthorpe * lock. If the device driver will use it, it must obtain a 11300f3e72b5SJason Gunthorpe * reference and release it during close_device. 11310f3e72b5SJason Gunthorpe */ 11320f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 11330f3e72b5SJason Gunthorpe device->kvm = device->group->kvm; 11340f3e72b5SJason Gunthorpe 11350f3e72b5SJason Gunthorpe if (device->ops->open_device) { 11360f3e72b5SJason Gunthorpe ret = device->ops->open_device(device); 11370f3e72b5SJason Gunthorpe if (ret) 11380f3e72b5SJason Gunthorpe goto err_undo_count; 11390f3e72b5SJason Gunthorpe } 11400f3e72b5SJason Gunthorpe 11410f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 11420f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->register_device) 11430f3e72b5SJason Gunthorpe iommu_driver->ops->register_device( 11440f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 11450f3e72b5SJason Gunthorpe 11460f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 11470f3e72b5SJason Gunthorpe } 11480f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 11490f3e72b5SJason Gunthorpe 11500f3e72b5SJason Gunthorpe /* 11510f3e72b5SJason Gunthorpe * We can't use anon_inode_getfd() because we need to modify 11520f3e72b5SJason Gunthorpe * the f_mode flags directly to allow more than just ioctls 11530f3e72b5SJason Gunthorpe */ 11540f3e72b5SJason Gunthorpe filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 11550f3e72b5SJason Gunthorpe device, O_RDWR); 11560f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 11570f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 11580f3e72b5SJason Gunthorpe goto err_close_device; 11590f3e72b5SJason Gunthorpe } 11600f3e72b5SJason Gunthorpe 11610f3e72b5SJason Gunthorpe /* 11620f3e72b5SJason Gunthorpe * TODO: add an anon_inode interface to do this. 11630f3e72b5SJason Gunthorpe * Appears to be missing by lack of need rather than 11640f3e72b5SJason Gunthorpe * explicitly prevented. Now there's need. 11650f3e72b5SJason Gunthorpe */ 11660f3e72b5SJason Gunthorpe filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 11670f3e72b5SJason Gunthorpe 11680f3e72b5SJason Gunthorpe if (device->group->type == VFIO_NO_IOMMU) 11690f3e72b5SJason Gunthorpe dev_warn(device->dev, "vfio-noiommu device opened by user " 11700f3e72b5SJason Gunthorpe "(%s:%d)\n", current->comm, task_pid_nr(current)); 11710f3e72b5SJason Gunthorpe /* 11720f3e72b5SJason Gunthorpe * On success the ref of device is moved to the file and 11730f3e72b5SJason Gunthorpe * put in vfio_device_fops_release() 11740f3e72b5SJason Gunthorpe */ 11750f3e72b5SJason Gunthorpe return filep; 11760f3e72b5SJason Gunthorpe 11770f3e72b5SJason Gunthorpe err_close_device: 11780f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 11790f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 11800f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) { 11810f3e72b5SJason Gunthorpe device->ops->close_device(device); 11820f3e72b5SJason Gunthorpe 11830f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 11840f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 11850f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 11860f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 11870f3e72b5SJason Gunthorpe } 11880f3e72b5SJason Gunthorpe err_undo_count: 11890f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 11900f3e72b5SJason Gunthorpe device->open_count--; 11910f3e72b5SJason Gunthorpe if (device->open_count == 0 && device->kvm) 11920f3e72b5SJason Gunthorpe device->kvm = NULL; 11930f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 11940f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 11950f3e72b5SJason Gunthorpe err_unassign_container: 11960f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 11970f3e72b5SJason Gunthorpe return ERR_PTR(ret); 11980f3e72b5SJason Gunthorpe } 11990f3e72b5SJason Gunthorpe 1200150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 1201150ee2f9SJason Gunthorpe char __user *arg) 12020f3e72b5SJason Gunthorpe { 12030f3e72b5SJason Gunthorpe struct vfio_device *device; 12040f3e72b5SJason Gunthorpe struct file *filep; 1205150ee2f9SJason Gunthorpe char *buf; 12060f3e72b5SJason Gunthorpe int fdno; 12070f3e72b5SJason Gunthorpe int ret; 12080f3e72b5SJason Gunthorpe 1209150ee2f9SJason Gunthorpe buf = strndup_user(arg, PAGE_SIZE); 1210150ee2f9SJason Gunthorpe if (IS_ERR(buf)) 1211150ee2f9SJason Gunthorpe return PTR_ERR(buf); 1212150ee2f9SJason Gunthorpe 12130f3e72b5SJason Gunthorpe device = vfio_device_get_from_name(group, buf); 1214150ee2f9SJason Gunthorpe kfree(buf); 12150f3e72b5SJason Gunthorpe if (IS_ERR(device)) 12160f3e72b5SJason Gunthorpe return PTR_ERR(device); 12170f3e72b5SJason Gunthorpe 12180f3e72b5SJason Gunthorpe fdno = get_unused_fd_flags(O_CLOEXEC); 12190f3e72b5SJason Gunthorpe if (fdno < 0) { 12200f3e72b5SJason Gunthorpe ret = fdno; 12210f3e72b5SJason Gunthorpe goto err_put_device; 12220f3e72b5SJason Gunthorpe } 12230f3e72b5SJason Gunthorpe 12240f3e72b5SJason Gunthorpe filep = vfio_device_open(device); 12250f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 12260f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 12270f3e72b5SJason Gunthorpe goto err_put_fdno; 12280f3e72b5SJason Gunthorpe } 12290f3e72b5SJason Gunthorpe 12300f3e72b5SJason Gunthorpe fd_install(fdno, filep); 12310f3e72b5SJason Gunthorpe return fdno; 12320f3e72b5SJason Gunthorpe 12330f3e72b5SJason Gunthorpe err_put_fdno: 12340f3e72b5SJason Gunthorpe put_unused_fd(fdno); 12350f3e72b5SJason Gunthorpe err_put_device: 12360f3e72b5SJason Gunthorpe vfio_device_put(device); 12370f3e72b5SJason Gunthorpe return ret; 12380f3e72b5SJason Gunthorpe } 12390f3e72b5SJason Gunthorpe 124099a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group, 124199a27c08SJason Gunthorpe struct vfio_group_status __user *arg) 12420f3e72b5SJason Gunthorpe { 124399a27c08SJason Gunthorpe unsigned long minsz = offsetofend(struct vfio_group_status, flags); 12440f3e72b5SJason Gunthorpe struct vfio_group_status status; 12450f3e72b5SJason Gunthorpe 124699a27c08SJason Gunthorpe if (copy_from_user(&status, arg, minsz)) 12470f3e72b5SJason Gunthorpe return -EFAULT; 12480f3e72b5SJason Gunthorpe 12490f3e72b5SJason Gunthorpe if (status.argsz < minsz) 12500f3e72b5SJason Gunthorpe return -EINVAL; 12510f3e72b5SJason Gunthorpe 12520f3e72b5SJason Gunthorpe status.flags = 0; 12530f3e72b5SJason Gunthorpe 12540f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 12550f3e72b5SJason Gunthorpe if (group->container) 12560f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 12570f3e72b5SJason Gunthorpe VFIO_GROUP_FLAGS_VIABLE; 12580f3e72b5SJason Gunthorpe else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 12590f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_VIABLE; 12600f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 12610f3e72b5SJason Gunthorpe 126299a27c08SJason Gunthorpe if (copy_to_user(arg, &status, minsz)) 12630f3e72b5SJason Gunthorpe return -EFAULT; 126499a27c08SJason Gunthorpe return 0; 12650f3e72b5SJason Gunthorpe } 126699a27c08SJason Gunthorpe 126799a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep, 126899a27c08SJason Gunthorpe unsigned int cmd, unsigned long arg) 126999a27c08SJason Gunthorpe { 127099a27c08SJason Gunthorpe struct vfio_group *group = filep->private_data; 127199a27c08SJason Gunthorpe void __user *uarg = (void __user *)arg; 127299a27c08SJason Gunthorpe 127399a27c08SJason Gunthorpe switch (cmd) { 127499a27c08SJason Gunthorpe case VFIO_GROUP_GET_DEVICE_FD: 127599a27c08SJason Gunthorpe return vfio_group_ioctl_get_device_fd(group, uarg); 127699a27c08SJason Gunthorpe case VFIO_GROUP_GET_STATUS: 127799a27c08SJason Gunthorpe return vfio_group_ioctl_get_status(group, uarg); 12780f3e72b5SJason Gunthorpe case VFIO_GROUP_SET_CONTAINER: 127967671f15SJason Gunthorpe return vfio_group_ioctl_set_container(group, uarg); 12800f3e72b5SJason Gunthorpe case VFIO_GROUP_UNSET_CONTAINER: 1281b3b43590SJason Gunthorpe return vfio_group_ioctl_unset_container(group); 128299a27c08SJason Gunthorpe default: 128399a27c08SJason Gunthorpe return -ENOTTY; 12840f3e72b5SJason Gunthorpe } 12850f3e72b5SJason Gunthorpe } 12860f3e72b5SJason Gunthorpe 12870f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep) 12880f3e72b5SJason Gunthorpe { 12890f3e72b5SJason Gunthorpe struct vfio_group *group = 12900f3e72b5SJason Gunthorpe container_of(inode->i_cdev, struct vfio_group, cdev); 12910f3e72b5SJason Gunthorpe int ret; 12920f3e72b5SJason Gunthorpe 12930f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 12940f3e72b5SJason Gunthorpe 12950f3e72b5SJason Gunthorpe /* users can be zero if this races with vfio_group_put() */ 12960f3e72b5SJason Gunthorpe if (!refcount_inc_not_zero(&group->users)) { 12970f3e72b5SJason Gunthorpe ret = -ENODEV; 12980f3e72b5SJason Gunthorpe goto err_unlock; 12990f3e72b5SJason Gunthorpe } 13000f3e72b5SJason Gunthorpe 13010f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 13020f3e72b5SJason Gunthorpe ret = -EPERM; 13030f3e72b5SJason Gunthorpe goto err_put; 13040f3e72b5SJason Gunthorpe } 13050f3e72b5SJason Gunthorpe 13060f3e72b5SJason Gunthorpe /* 13070f3e72b5SJason Gunthorpe * Do we need multiple instances of the group open? Seems not. 13080f3e72b5SJason Gunthorpe */ 13090f3e72b5SJason Gunthorpe if (group->opened_file) { 13100f3e72b5SJason Gunthorpe ret = -EBUSY; 13110f3e72b5SJason Gunthorpe goto err_put; 13120f3e72b5SJason Gunthorpe } 13130f3e72b5SJason Gunthorpe group->opened_file = filep; 13140f3e72b5SJason Gunthorpe filep->private_data = group; 13150f3e72b5SJason Gunthorpe 13160f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 13170f3e72b5SJason Gunthorpe return 0; 13180f3e72b5SJason Gunthorpe err_put: 13190f3e72b5SJason Gunthorpe vfio_group_put(group); 13200f3e72b5SJason Gunthorpe err_unlock: 13210f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 13220f3e72b5SJason Gunthorpe return ret; 13230f3e72b5SJason Gunthorpe } 13240f3e72b5SJason Gunthorpe 13250f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep) 13260f3e72b5SJason Gunthorpe { 13270f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 13280f3e72b5SJason Gunthorpe 13290f3e72b5SJason Gunthorpe filep->private_data = NULL; 13300f3e72b5SJason Gunthorpe 13310f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 13320f3e72b5SJason Gunthorpe /* 13330f3e72b5SJason Gunthorpe * Device FDs hold a group file reference, therefore the group release 13340f3e72b5SJason Gunthorpe * is only called when there are no open devices. 13350f3e72b5SJason Gunthorpe */ 13360f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 13370f3e72b5SJason Gunthorpe if (group->container) { 13380f3e72b5SJason Gunthorpe WARN_ON(group->container_users != 1); 13390f3e72b5SJason Gunthorpe __vfio_group_unset_container(group); 13400f3e72b5SJason Gunthorpe } 13410f3e72b5SJason Gunthorpe group->opened_file = NULL; 13420f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 13430f3e72b5SJason Gunthorpe 13440f3e72b5SJason Gunthorpe vfio_group_put(group); 13450f3e72b5SJason Gunthorpe 13460f3e72b5SJason Gunthorpe return 0; 13470f3e72b5SJason Gunthorpe } 13480f3e72b5SJason Gunthorpe 13490f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = { 13500f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 13510f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_group_fops_unl_ioctl, 13520f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 13530f3e72b5SJason Gunthorpe .open = vfio_group_fops_open, 13540f3e72b5SJason Gunthorpe .release = vfio_group_fops_release, 13550f3e72b5SJason Gunthorpe }; 13560f3e72b5SJason Gunthorpe 13570f3e72b5SJason Gunthorpe /* 1358*8e5c6995SAbhishek Sahu * Wrapper around pm_runtime_resume_and_get(). 1359*8e5c6995SAbhishek Sahu * Return error code on failure or 0 on success. 1360*8e5c6995SAbhishek Sahu */ 1361*8e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device) 1362*8e5c6995SAbhishek Sahu { 1363*8e5c6995SAbhishek Sahu struct device *dev = device->dev; 1364*8e5c6995SAbhishek Sahu 1365*8e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) { 1366*8e5c6995SAbhishek Sahu int ret; 1367*8e5c6995SAbhishek Sahu 1368*8e5c6995SAbhishek Sahu ret = pm_runtime_resume_and_get(dev); 1369*8e5c6995SAbhishek Sahu if (ret) { 1370*8e5c6995SAbhishek Sahu dev_info_ratelimited(dev, 1371*8e5c6995SAbhishek Sahu "vfio: runtime resume failed %d\n", ret); 1372*8e5c6995SAbhishek Sahu return -EIO; 1373*8e5c6995SAbhishek Sahu } 1374*8e5c6995SAbhishek Sahu } 1375*8e5c6995SAbhishek Sahu 1376*8e5c6995SAbhishek Sahu return 0; 1377*8e5c6995SAbhishek Sahu } 1378*8e5c6995SAbhishek Sahu 1379*8e5c6995SAbhishek Sahu /* 1380*8e5c6995SAbhishek Sahu * Wrapper around pm_runtime_put(). 1381*8e5c6995SAbhishek Sahu */ 1382*8e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device) 1383*8e5c6995SAbhishek Sahu { 1384*8e5c6995SAbhishek Sahu struct device *dev = device->dev; 1385*8e5c6995SAbhishek Sahu 1386*8e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) 1387*8e5c6995SAbhishek Sahu pm_runtime_put(dev); 1388*8e5c6995SAbhishek Sahu } 1389*8e5c6995SAbhishek Sahu 1390*8e5c6995SAbhishek Sahu /* 13910f3e72b5SJason Gunthorpe * VFIO Device fd 13920f3e72b5SJason Gunthorpe */ 13930f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 13940f3e72b5SJason Gunthorpe { 13950f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 13960f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 13970f3e72b5SJason Gunthorpe 13980f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 13990f3e72b5SJason Gunthorpe vfio_assert_device_open(device); 14000f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 14010f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) 14020f3e72b5SJason Gunthorpe device->ops->close_device(device); 14030f3e72b5SJason Gunthorpe 14040f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 14050f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 14060f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 14070f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 14080f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 14090f3e72b5SJason Gunthorpe device->open_count--; 14100f3e72b5SJason Gunthorpe if (device->open_count == 0) 14110f3e72b5SJason Gunthorpe device->kvm = NULL; 14120f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 14130f3e72b5SJason Gunthorpe 14140f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 14150f3e72b5SJason Gunthorpe 14160f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 14170f3e72b5SJason Gunthorpe 14180f3e72b5SJason Gunthorpe vfio_device_put(device); 14190f3e72b5SJason Gunthorpe 14200f3e72b5SJason Gunthorpe return 0; 14210f3e72b5SJason Gunthorpe } 14220f3e72b5SJason Gunthorpe 14230f3e72b5SJason Gunthorpe /* 14240f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 14250f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 14260f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 14270f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 14280f3e72b5SJason Gunthorpe * 14290f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 14300f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 14310f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 14320f3e72b5SJason Gunthorpe * 14330f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 14340f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 14350f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 14360f3e72b5SJason Gunthorpe * 14370f3e72b5SJason Gunthorpe */ 14380f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 14390f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 14400f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 14410f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 14420f3e72b5SJason Gunthorpe { 14430f3e72b5SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 14440f3e72b5SJason Gunthorpe /* 14450f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 14460f3e72b5SJason Gunthorpe * following FSM arcs: 14470f3e72b5SJason Gunthorpe * RESUMING -> STOP 14480f3e72b5SJason Gunthorpe * STOP -> RESUMING 14490f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 14500f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 14510f3e72b5SJason Gunthorpe * 14520f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 14530f3e72b5SJason Gunthorpe * arcs: 14540f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 14550f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 14560f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 14570f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 14580f3e72b5SJason Gunthorpe * Without P2P the driver must implement: 14590f3e72b5SJason Gunthorpe * RUNNING -> STOP 14600f3e72b5SJason Gunthorpe * STOP -> RUNNING 14610f3e72b5SJason Gunthorpe * 14620f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 14630f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 14640f3e72b5SJason Gunthorpe * following ones: 14650f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 14660f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 14670f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 14680f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 14690f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 14700f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 14710f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 14720f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 14730f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 14740f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 14750f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 14760f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 14770f3e72b5SJason Gunthorpe */ 14780f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 14790f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 14800f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 14810f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 14820f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 14830f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 14840f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 14850f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14860f3e72b5SJason Gunthorpe }, 14870f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 14880f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 14890f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 14900f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 14910f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 14920f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 14930f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14940f3e72b5SJason Gunthorpe }, 14950f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 14960f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 14970f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 14980f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 14990f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 15000f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 15010f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 15020f3e72b5SJason Gunthorpe }, 15030f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 15040f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 15050f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 15060f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 15070f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 15080f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 15090f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 15100f3e72b5SJason Gunthorpe }, 15110f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 15120f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 15130f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 15140f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 15150f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 15160f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 15170f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 15180f3e72b5SJason Gunthorpe }, 15190f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 15200f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 15210f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 15220f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 15230f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 15240f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 15250f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 15260f3e72b5SJason Gunthorpe }, 15270f3e72b5SJason Gunthorpe }; 15280f3e72b5SJason Gunthorpe 15290f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 15300f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 15310f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 15320f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 15330f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 15340f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 15350f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 15360f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 15370f3e72b5SJason Gunthorpe }; 15380f3e72b5SJason Gunthorpe 15390f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 15400f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 15410f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 15420f3e72b5SJason Gunthorpe return -EINVAL; 15430f3e72b5SJason Gunthorpe 15440f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 15450f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 15460f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 15470f3e72b5SJason Gunthorpe return -EINVAL; 15480f3e72b5SJason Gunthorpe 15490f3e72b5SJason Gunthorpe /* 15500f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 15510f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 15520f3e72b5SJason Gunthorpe * logical state, as per the above comment. 15530f3e72b5SJason Gunthorpe */ 15540f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 15550f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 15560f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 15570f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 15580f3e72b5SJason Gunthorpe 15590f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 15600f3e72b5SJason Gunthorpe } 15610f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 15620f3e72b5SJason Gunthorpe 15630f3e72b5SJason Gunthorpe /* 15640f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 15650f3e72b5SJason Gunthorpe */ 15660f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 15670f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 15680f3e72b5SJason Gunthorpe { 15690f3e72b5SJason Gunthorpe int ret; 15700f3e72b5SJason Gunthorpe int fd; 15710f3e72b5SJason Gunthorpe 15720f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 15730f3e72b5SJason Gunthorpe if (fd < 0) { 15740f3e72b5SJason Gunthorpe ret = fd; 15750f3e72b5SJason Gunthorpe goto out_fput; 15760f3e72b5SJason Gunthorpe } 15770f3e72b5SJason Gunthorpe 15780f3e72b5SJason Gunthorpe mig->data_fd = fd; 15790f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 15800f3e72b5SJason Gunthorpe ret = -EFAULT; 15810f3e72b5SJason Gunthorpe goto out_put_unused; 15820f3e72b5SJason Gunthorpe } 15830f3e72b5SJason Gunthorpe fd_install(fd, filp); 15840f3e72b5SJason Gunthorpe return 0; 15850f3e72b5SJason Gunthorpe 15860f3e72b5SJason Gunthorpe out_put_unused: 15870f3e72b5SJason Gunthorpe put_unused_fd(fd); 15880f3e72b5SJason Gunthorpe out_fput: 15890f3e72b5SJason Gunthorpe fput(filp); 15900f3e72b5SJason Gunthorpe return ret; 15910f3e72b5SJason Gunthorpe } 15920f3e72b5SJason Gunthorpe 15930f3e72b5SJason Gunthorpe static int 15940f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 15950f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 15960f3e72b5SJason Gunthorpe size_t argsz) 15970f3e72b5SJason Gunthorpe { 15980f3e72b5SJason Gunthorpe size_t minsz = 15990f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 16000f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 16010f3e72b5SJason Gunthorpe struct file *filp = NULL; 16020f3e72b5SJason Gunthorpe int ret; 16030f3e72b5SJason Gunthorpe 16040f3e72b5SJason Gunthorpe if (!device->mig_ops) 16050f3e72b5SJason Gunthorpe return -ENOTTY; 16060f3e72b5SJason Gunthorpe 16070f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 16080f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 16090f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 16100f3e72b5SJason Gunthorpe sizeof(mig)); 16110f3e72b5SJason Gunthorpe if (ret != 1) 16120f3e72b5SJason Gunthorpe return ret; 16130f3e72b5SJason Gunthorpe 16140f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 16150f3e72b5SJason Gunthorpe return -EFAULT; 16160f3e72b5SJason Gunthorpe 16170f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 16180f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 16190f3e72b5SJason Gunthorpe 16200f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 16210f3e72b5SJason Gunthorpe &curr_state); 16220f3e72b5SJason Gunthorpe if (ret) 16230f3e72b5SJason Gunthorpe return ret; 16240f3e72b5SJason Gunthorpe mig.device_state = curr_state; 16250f3e72b5SJason Gunthorpe goto out_copy; 16260f3e72b5SJason Gunthorpe } 16270f3e72b5SJason Gunthorpe 16280f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 16290f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 16300f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 16310f3e72b5SJason Gunthorpe goto out_copy; 16320f3e72b5SJason Gunthorpe 16330f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 16340f3e72b5SJason Gunthorpe out_copy: 16350f3e72b5SJason Gunthorpe mig.data_fd = -1; 16360f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 16370f3e72b5SJason Gunthorpe return -EFAULT; 16380f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 16390f3e72b5SJason Gunthorpe return PTR_ERR(filp); 16400f3e72b5SJason Gunthorpe return 0; 16410f3e72b5SJason Gunthorpe } 16420f3e72b5SJason Gunthorpe 16430f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 16440f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 16450f3e72b5SJason Gunthorpe size_t argsz) 16460f3e72b5SJason Gunthorpe { 16470f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 16480f3e72b5SJason Gunthorpe .flags = device->migration_flags, 16490f3e72b5SJason Gunthorpe }; 16500f3e72b5SJason Gunthorpe int ret; 16510f3e72b5SJason Gunthorpe 16520f3e72b5SJason Gunthorpe if (!device->mig_ops) 16530f3e72b5SJason Gunthorpe return -ENOTTY; 16540f3e72b5SJason Gunthorpe 16550f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 16560f3e72b5SJason Gunthorpe sizeof(mig)); 16570f3e72b5SJason Gunthorpe if (ret != 1) 16580f3e72b5SJason Gunthorpe return ret; 16590f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 16600f3e72b5SJason Gunthorpe return -EFAULT; 16610f3e72b5SJason Gunthorpe return 0; 16620f3e72b5SJason Gunthorpe } 16630f3e72b5SJason Gunthorpe 16640f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 16650f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 16660f3e72b5SJason Gunthorpe { 16670f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 16680f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 16690f3e72b5SJason Gunthorpe 16700f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 16710f3e72b5SJason Gunthorpe return -EFAULT; 16720f3e72b5SJason Gunthorpe 16730f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 16740f3e72b5SJason Gunthorpe return -EINVAL; 16750f3e72b5SJason Gunthorpe 16760f3e72b5SJason Gunthorpe /* Check unknown flags */ 16770f3e72b5SJason Gunthorpe if (feature.flags & 16780f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 16790f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 16800f3e72b5SJason Gunthorpe return -EINVAL; 16810f3e72b5SJason Gunthorpe 16820f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 16830f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 16840f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 16850f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 16860f3e72b5SJason Gunthorpe return -EINVAL; 16870f3e72b5SJason Gunthorpe 16880f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 16890f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 16900f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 16910f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 16920f3e72b5SJason Gunthorpe feature.argsz - minsz); 16930f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 16940f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 16950f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 16960f3e72b5SJason Gunthorpe feature.argsz - minsz); 16970f3e72b5SJason Gunthorpe default: 16980f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 16990f3e72b5SJason Gunthorpe return -EINVAL; 17000f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 17010f3e72b5SJason Gunthorpe arg->data, 17020f3e72b5SJason Gunthorpe feature.argsz - minsz); 17030f3e72b5SJason Gunthorpe } 17040f3e72b5SJason Gunthorpe } 17050f3e72b5SJason Gunthorpe 17060f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 17070f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 17080f3e72b5SJason Gunthorpe { 17090f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 1710*8e5c6995SAbhishek Sahu int ret; 1711*8e5c6995SAbhishek Sahu 1712*8e5c6995SAbhishek Sahu ret = vfio_device_pm_runtime_get(device); 1713*8e5c6995SAbhishek Sahu if (ret) 1714*8e5c6995SAbhishek Sahu return ret; 17150f3e72b5SJason Gunthorpe 17160f3e72b5SJason Gunthorpe switch (cmd) { 17170f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 1718*8e5c6995SAbhishek Sahu ret = vfio_ioctl_device_feature(device, (void __user *)arg); 1719*8e5c6995SAbhishek Sahu break; 1720*8e5c6995SAbhishek Sahu 17210f3e72b5SJason Gunthorpe default: 17220f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 1723*8e5c6995SAbhishek Sahu ret = -EINVAL; 1724*8e5c6995SAbhishek Sahu else 1725*8e5c6995SAbhishek Sahu ret = device->ops->ioctl(device, cmd, arg); 1726*8e5c6995SAbhishek Sahu break; 17270f3e72b5SJason Gunthorpe } 1728*8e5c6995SAbhishek Sahu 1729*8e5c6995SAbhishek Sahu vfio_device_pm_runtime_put(device); 1730*8e5c6995SAbhishek Sahu return ret; 17310f3e72b5SJason Gunthorpe } 17320f3e72b5SJason Gunthorpe 17330f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 17340f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 17350f3e72b5SJason Gunthorpe { 17360f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 17370f3e72b5SJason Gunthorpe 17380f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 17390f3e72b5SJason Gunthorpe return -EINVAL; 17400f3e72b5SJason Gunthorpe 17410f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 17420f3e72b5SJason Gunthorpe } 17430f3e72b5SJason Gunthorpe 17440f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 17450f3e72b5SJason Gunthorpe const char __user *buf, 17460f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 17470f3e72b5SJason Gunthorpe { 17480f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 17490f3e72b5SJason Gunthorpe 17500f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 17510f3e72b5SJason Gunthorpe return -EINVAL; 17520f3e72b5SJason Gunthorpe 17530f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 17540f3e72b5SJason Gunthorpe } 17550f3e72b5SJason Gunthorpe 17560f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 17570f3e72b5SJason Gunthorpe { 17580f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 17590f3e72b5SJason Gunthorpe 17600f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 17610f3e72b5SJason Gunthorpe return -EINVAL; 17620f3e72b5SJason Gunthorpe 17630f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 17640f3e72b5SJason Gunthorpe } 17650f3e72b5SJason Gunthorpe 17660f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = { 17670f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 17680f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 17690f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 17700f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 17710f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 17720f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 17730f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 17740f3e72b5SJason Gunthorpe }; 17750f3e72b5SJason Gunthorpe 17760f3e72b5SJason Gunthorpe /** 17770f3e72b5SJason Gunthorpe * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 17780f3e72b5SJason Gunthorpe * @file: VFIO group file 17790f3e72b5SJason Gunthorpe * 17800f3e72b5SJason Gunthorpe * The returned iommu_group is valid as long as a ref is held on the file. 17810f3e72b5SJason Gunthorpe */ 17820f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file) 17830f3e72b5SJason Gunthorpe { 17840f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 17850f3e72b5SJason Gunthorpe 17860f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 17870f3e72b5SJason Gunthorpe return NULL; 17880f3e72b5SJason Gunthorpe return group->iommu_group; 17890f3e72b5SJason Gunthorpe } 17900f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 17910f3e72b5SJason Gunthorpe 17920f3e72b5SJason Gunthorpe /** 17930f3e72b5SJason Gunthorpe * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 17940f3e72b5SJason Gunthorpe * is always CPU cache coherent 17950f3e72b5SJason Gunthorpe * @file: VFIO group file 17960f3e72b5SJason Gunthorpe * 17970f3e72b5SJason Gunthorpe * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 17980f3e72b5SJason Gunthorpe * bit in DMA transactions. A return of false indicates that the user has 17990f3e72b5SJason Gunthorpe * rights to access additional instructions such as wbinvd on x86. 18000f3e72b5SJason Gunthorpe */ 18010f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file) 18020f3e72b5SJason Gunthorpe { 18030f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 18040f3e72b5SJason Gunthorpe bool ret; 18050f3e72b5SJason Gunthorpe 18060f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 18070f3e72b5SJason Gunthorpe return true; 18080f3e72b5SJason Gunthorpe 18090f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 18100f3e72b5SJason Gunthorpe if (group->container) { 18110f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(group->container, 18120f3e72b5SJason Gunthorpe VFIO_DMA_CC_IOMMU); 18130f3e72b5SJason Gunthorpe } else { 18140f3e72b5SJason Gunthorpe /* 18150f3e72b5SJason Gunthorpe * Since the coherency state is determined only once a container 18160f3e72b5SJason Gunthorpe * is attached the user must do so before they can prove they 18170f3e72b5SJason Gunthorpe * have permission. 18180f3e72b5SJason Gunthorpe */ 18190f3e72b5SJason Gunthorpe ret = true; 18200f3e72b5SJason Gunthorpe } 18210f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 18220f3e72b5SJason Gunthorpe return ret; 18230f3e72b5SJason Gunthorpe } 18240f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 18250f3e72b5SJason Gunthorpe 18260f3e72b5SJason Gunthorpe /** 18270f3e72b5SJason Gunthorpe * vfio_file_set_kvm - Link a kvm with VFIO drivers 18280f3e72b5SJason Gunthorpe * @file: VFIO group file 18290f3e72b5SJason Gunthorpe * @kvm: KVM to link 18300f3e72b5SJason Gunthorpe * 18310f3e72b5SJason Gunthorpe * When a VFIO device is first opened the KVM will be available in 18320f3e72b5SJason Gunthorpe * device->kvm if one was associated with the group. 18330f3e72b5SJason Gunthorpe */ 18340f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 18350f3e72b5SJason Gunthorpe { 18360f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 18370f3e72b5SJason Gunthorpe 18380f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 18390f3e72b5SJason Gunthorpe return; 18400f3e72b5SJason Gunthorpe 18410f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 18420f3e72b5SJason Gunthorpe group->kvm = kvm; 18430f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 18440f3e72b5SJason Gunthorpe } 18450f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 18460f3e72b5SJason Gunthorpe 18470f3e72b5SJason Gunthorpe /** 18480f3e72b5SJason Gunthorpe * vfio_file_has_dev - True if the VFIO file is a handle for device 18490f3e72b5SJason Gunthorpe * @file: VFIO file to check 18500f3e72b5SJason Gunthorpe * @device: Device that must be part of the file 18510f3e72b5SJason Gunthorpe * 18520f3e72b5SJason Gunthorpe * Returns true if given file has permission to manipulate the given device. 18530f3e72b5SJason Gunthorpe */ 18540f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 18550f3e72b5SJason Gunthorpe { 18560f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 18570f3e72b5SJason Gunthorpe 18580f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 18590f3e72b5SJason Gunthorpe return false; 18600f3e72b5SJason Gunthorpe 18610f3e72b5SJason Gunthorpe return group == device->group; 18620f3e72b5SJason Gunthorpe } 18630f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev); 18640f3e72b5SJason Gunthorpe 18650f3e72b5SJason Gunthorpe /* 18660f3e72b5SJason Gunthorpe * Sub-module support 18670f3e72b5SJason Gunthorpe */ 18680f3e72b5SJason Gunthorpe /* 18690f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 18700f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 18710f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 18720f3e72b5SJason Gunthorpe * 18730f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 18740f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 18750f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 18760f3e72b5SJason Gunthorpe */ 18770f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 18780f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 18790f3e72b5SJason Gunthorpe { 18800f3e72b5SJason Gunthorpe void *buf; 18810f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 18820f3e72b5SJason Gunthorpe 18830f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 18840f3e72b5SJason Gunthorpe if (!buf) { 18850f3e72b5SJason Gunthorpe kfree(caps->buf); 18860f3e72b5SJason Gunthorpe caps->buf = NULL; 18870f3e72b5SJason Gunthorpe caps->size = 0; 18880f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 18890f3e72b5SJason Gunthorpe } 18900f3e72b5SJason Gunthorpe 18910f3e72b5SJason Gunthorpe caps->buf = buf; 18920f3e72b5SJason Gunthorpe header = buf + caps->size; 18930f3e72b5SJason Gunthorpe 18940f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 18950f3e72b5SJason Gunthorpe memset(header, 0, size); 18960f3e72b5SJason Gunthorpe 18970f3e72b5SJason Gunthorpe header->id = id; 18980f3e72b5SJason Gunthorpe header->version = version; 18990f3e72b5SJason Gunthorpe 19000f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 19010f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 19020f3e72b5SJason Gunthorpe ; /* nothing */ 19030f3e72b5SJason Gunthorpe 19040f3e72b5SJason Gunthorpe tmp->next = caps->size; 19050f3e72b5SJason Gunthorpe caps->size += size; 19060f3e72b5SJason Gunthorpe 19070f3e72b5SJason Gunthorpe return header; 19080f3e72b5SJason Gunthorpe } 19090f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 19100f3e72b5SJason Gunthorpe 19110f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 19120f3e72b5SJason Gunthorpe { 19130f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 19140f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 19150f3e72b5SJason Gunthorpe 19160f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 19170f3e72b5SJason Gunthorpe tmp->next += offset; 19180f3e72b5SJason Gunthorpe } 19190f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 19200f3e72b5SJason Gunthorpe 19210f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 19220f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 19230f3e72b5SJason Gunthorpe { 19240f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 19250f3e72b5SJason Gunthorpe 19260f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 19270f3e72b5SJason Gunthorpe if (IS_ERR(header)) 19280f3e72b5SJason Gunthorpe return PTR_ERR(header); 19290f3e72b5SJason Gunthorpe 19300f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 19310f3e72b5SJason Gunthorpe 19320f3e72b5SJason Gunthorpe return 0; 19330f3e72b5SJason Gunthorpe } 19340f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 19350f3e72b5SJason Gunthorpe 19360f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 19370f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 19380f3e72b5SJason Gunthorpe { 19390f3e72b5SJason Gunthorpe unsigned long minsz; 19400f3e72b5SJason Gunthorpe size_t size; 19410f3e72b5SJason Gunthorpe 19420f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 19430f3e72b5SJason Gunthorpe 19440f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 19450f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 19460f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 19470f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 19480f3e72b5SJason Gunthorpe return -EINVAL; 19490f3e72b5SJason Gunthorpe 19500f3e72b5SJason Gunthorpe if (data_size) 19510f3e72b5SJason Gunthorpe *data_size = 0; 19520f3e72b5SJason Gunthorpe 19530f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 19540f3e72b5SJason Gunthorpe return -EINVAL; 19550f3e72b5SJason Gunthorpe 19560f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 19570f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 19580f3e72b5SJason Gunthorpe size = 0; 19590f3e72b5SJason Gunthorpe break; 19600f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 19610f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 19620f3e72b5SJason Gunthorpe break; 19630f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 19640f3e72b5SJason Gunthorpe size = sizeof(int32_t); 19650f3e72b5SJason Gunthorpe break; 19660f3e72b5SJason Gunthorpe default: 19670f3e72b5SJason Gunthorpe return -EINVAL; 19680f3e72b5SJason Gunthorpe } 19690f3e72b5SJason Gunthorpe 19700f3e72b5SJason Gunthorpe if (size) { 19710f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 19720f3e72b5SJason Gunthorpe return -EINVAL; 19730f3e72b5SJason Gunthorpe 19740f3e72b5SJason Gunthorpe if (!data_size) 19750f3e72b5SJason Gunthorpe return -EINVAL; 19760f3e72b5SJason Gunthorpe 19770f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 19780f3e72b5SJason Gunthorpe } 19790f3e72b5SJason Gunthorpe 19800f3e72b5SJason Gunthorpe return 0; 19810f3e72b5SJason Gunthorpe } 19820f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 19830f3e72b5SJason Gunthorpe 19840f3e72b5SJason Gunthorpe /* 19850f3e72b5SJason Gunthorpe * Pin contiguous user pages and return their associated host pages for local 19860f3e72b5SJason Gunthorpe * domain only. 19870f3e72b5SJason Gunthorpe * @device [in] : device 19880f3e72b5SJason Gunthorpe * @iova [in] : starting IOVA of user pages to be pinned. 19890f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be pinned. This count should not 19900f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 19910f3e72b5SJason Gunthorpe * @prot [in] : protection flags 19920f3e72b5SJason Gunthorpe * @pages[out] : array of host pages 19930f3e72b5SJason Gunthorpe * Return error or number of pages pinned. 19940f3e72b5SJason Gunthorpe */ 19950f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 19960f3e72b5SJason Gunthorpe int npage, int prot, struct page **pages) 19970f3e72b5SJason Gunthorpe { 19980f3e72b5SJason Gunthorpe struct vfio_container *container; 19990f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 20000f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 20010f3e72b5SJason Gunthorpe int ret; 20020f3e72b5SJason Gunthorpe 20030f3e72b5SJason Gunthorpe if (!pages || !npage || !vfio_assert_device_open(device)) 20040f3e72b5SJason Gunthorpe return -EINVAL; 20050f3e72b5SJason Gunthorpe 20060f3e72b5SJason Gunthorpe if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 20070f3e72b5SJason Gunthorpe return -E2BIG; 20080f3e72b5SJason Gunthorpe 20090f3e72b5SJason Gunthorpe if (group->dev_counter > 1) 20100f3e72b5SJason Gunthorpe return -EINVAL; 20110f3e72b5SJason Gunthorpe 20120f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 20130f3e72b5SJason Gunthorpe container = group->container; 20140f3e72b5SJason Gunthorpe driver = container->iommu_driver; 20150f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->pin_pages)) 20160f3e72b5SJason Gunthorpe ret = driver->ops->pin_pages(container->iommu_data, 20170f3e72b5SJason Gunthorpe group->iommu_group, iova, 20180f3e72b5SJason Gunthorpe npage, prot, pages); 20190f3e72b5SJason Gunthorpe else 20200f3e72b5SJason Gunthorpe ret = -ENOTTY; 20210f3e72b5SJason Gunthorpe 20220f3e72b5SJason Gunthorpe return ret; 20230f3e72b5SJason Gunthorpe } 20240f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages); 20250f3e72b5SJason Gunthorpe 20260f3e72b5SJason Gunthorpe /* 20270f3e72b5SJason Gunthorpe * Unpin contiguous host pages for local domain only. 20280f3e72b5SJason Gunthorpe * @device [in] : device 20290f3e72b5SJason Gunthorpe * @iova [in] : starting address of user pages to be unpinned. 20300f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be unpinned. This count should not 20310f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 20320f3e72b5SJason Gunthorpe */ 20330f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 20340f3e72b5SJason Gunthorpe { 20350f3e72b5SJason Gunthorpe struct vfio_container *container; 20360f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 20370f3e72b5SJason Gunthorpe 20380f3e72b5SJason Gunthorpe if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 20390f3e72b5SJason Gunthorpe return; 20400f3e72b5SJason Gunthorpe 20410f3e72b5SJason Gunthorpe if (WARN_ON(!vfio_assert_device_open(device))) 20420f3e72b5SJason Gunthorpe return; 20430f3e72b5SJason Gunthorpe 20440f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 20450f3e72b5SJason Gunthorpe container = device->group->container; 20460f3e72b5SJason Gunthorpe driver = container->iommu_driver; 20470f3e72b5SJason Gunthorpe 20480f3e72b5SJason Gunthorpe driver->ops->unpin_pages(container->iommu_data, iova, npage); 20490f3e72b5SJason Gunthorpe } 20500f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages); 20510f3e72b5SJason Gunthorpe 20520f3e72b5SJason Gunthorpe /* 20530f3e72b5SJason Gunthorpe * This interface allows the CPUs to perform some sort of virtual DMA on 20540f3e72b5SJason Gunthorpe * behalf of the device. 20550f3e72b5SJason Gunthorpe * 20560f3e72b5SJason Gunthorpe * CPUs read/write from/into a range of IOVAs pointing to user space memory 20570f3e72b5SJason Gunthorpe * into/from a kernel buffer. 20580f3e72b5SJason Gunthorpe * 20590f3e72b5SJason Gunthorpe * As the read/write of user space memory is conducted via the CPUs and is 20600f3e72b5SJason Gunthorpe * not a real device DMA, it is not necessary to pin the user space memory. 20610f3e72b5SJason Gunthorpe * 20620f3e72b5SJason Gunthorpe * @device [in] : VFIO device 20630f3e72b5SJason Gunthorpe * @iova [in] : base IOVA of a user space buffer 20640f3e72b5SJason Gunthorpe * @data [in] : pointer to kernel buffer 20650f3e72b5SJason Gunthorpe * @len [in] : kernel buffer length 20660f3e72b5SJason Gunthorpe * @write : indicate read or write 20670f3e72b5SJason Gunthorpe * Return error code on failure or 0 on success. 20680f3e72b5SJason Gunthorpe */ 20690f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 20700f3e72b5SJason Gunthorpe size_t len, bool write) 20710f3e72b5SJason Gunthorpe { 20720f3e72b5SJason Gunthorpe struct vfio_container *container; 20730f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 20740f3e72b5SJason Gunthorpe int ret = 0; 20750f3e72b5SJason Gunthorpe 20760f3e72b5SJason Gunthorpe if (!data || len <= 0 || !vfio_assert_device_open(device)) 20770f3e72b5SJason Gunthorpe return -EINVAL; 20780f3e72b5SJason Gunthorpe 20790f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 20800f3e72b5SJason Gunthorpe container = device->group->container; 20810f3e72b5SJason Gunthorpe driver = container->iommu_driver; 20820f3e72b5SJason Gunthorpe 20830f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->dma_rw)) 20840f3e72b5SJason Gunthorpe ret = driver->ops->dma_rw(container->iommu_data, 20850f3e72b5SJason Gunthorpe iova, data, len, write); 20860f3e72b5SJason Gunthorpe else 20870f3e72b5SJason Gunthorpe ret = -ENOTTY; 20880f3e72b5SJason Gunthorpe return ret; 20890f3e72b5SJason Gunthorpe } 20900f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw); 20910f3e72b5SJason Gunthorpe 20920f3e72b5SJason Gunthorpe /* 20930f3e72b5SJason Gunthorpe * Module/class support 20940f3e72b5SJason Gunthorpe */ 20950f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode) 20960f3e72b5SJason Gunthorpe { 20970f3e72b5SJason Gunthorpe return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 20980f3e72b5SJason Gunthorpe } 20990f3e72b5SJason Gunthorpe 21000f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = { 21010f3e72b5SJason Gunthorpe .minor = VFIO_MINOR, 21020f3e72b5SJason Gunthorpe .name = "vfio", 21030f3e72b5SJason Gunthorpe .fops = &vfio_fops, 21040f3e72b5SJason Gunthorpe .nodename = "vfio/vfio", 21050f3e72b5SJason Gunthorpe .mode = S_IRUGO | S_IWUGO, 21060f3e72b5SJason Gunthorpe }; 21070f3e72b5SJason Gunthorpe 21080f3e72b5SJason Gunthorpe static int __init vfio_init(void) 21090f3e72b5SJason Gunthorpe { 21100f3e72b5SJason Gunthorpe int ret; 21110f3e72b5SJason Gunthorpe 21120f3e72b5SJason Gunthorpe ida_init(&vfio.group_ida); 21130f3e72b5SJason Gunthorpe mutex_init(&vfio.group_lock); 21140f3e72b5SJason Gunthorpe mutex_init(&vfio.iommu_drivers_lock); 21150f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.group_list); 21160f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.iommu_drivers_list); 21170f3e72b5SJason Gunthorpe 21180f3e72b5SJason Gunthorpe ret = misc_register(&vfio_dev); 21190f3e72b5SJason Gunthorpe if (ret) { 21200f3e72b5SJason Gunthorpe pr_err("vfio: misc device register failed\n"); 21210f3e72b5SJason Gunthorpe return ret; 21220f3e72b5SJason Gunthorpe } 21230f3e72b5SJason Gunthorpe 21240f3e72b5SJason Gunthorpe /* /dev/vfio/$GROUP */ 21250f3e72b5SJason Gunthorpe vfio.class = class_create(THIS_MODULE, "vfio"); 21260f3e72b5SJason Gunthorpe if (IS_ERR(vfio.class)) { 21270f3e72b5SJason Gunthorpe ret = PTR_ERR(vfio.class); 21280f3e72b5SJason Gunthorpe goto err_class; 21290f3e72b5SJason Gunthorpe } 21300f3e72b5SJason Gunthorpe 21310f3e72b5SJason Gunthorpe vfio.class->devnode = vfio_devnode; 21320f3e72b5SJason Gunthorpe 21330f3e72b5SJason Gunthorpe ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 21340f3e72b5SJason Gunthorpe if (ret) 21350f3e72b5SJason Gunthorpe goto err_alloc_chrdev; 21360f3e72b5SJason Gunthorpe 21370f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 21380f3e72b5SJason Gunthorpe ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 21390f3e72b5SJason Gunthorpe #endif 21400f3e72b5SJason Gunthorpe if (ret) 21410f3e72b5SJason Gunthorpe goto err_driver_register; 21420f3e72b5SJason Gunthorpe 21430f3e72b5SJason Gunthorpe pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 21440f3e72b5SJason Gunthorpe return 0; 21450f3e72b5SJason Gunthorpe 21460f3e72b5SJason Gunthorpe err_driver_register: 21470f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 21480f3e72b5SJason Gunthorpe err_alloc_chrdev: 21490f3e72b5SJason Gunthorpe class_destroy(vfio.class); 21500f3e72b5SJason Gunthorpe vfio.class = NULL; 21510f3e72b5SJason Gunthorpe err_class: 21520f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 21530f3e72b5SJason Gunthorpe return ret; 21540f3e72b5SJason Gunthorpe } 21550f3e72b5SJason Gunthorpe 21560f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void) 21570f3e72b5SJason Gunthorpe { 21580f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&vfio.group_list)); 21590f3e72b5SJason Gunthorpe 21600f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 21610f3e72b5SJason Gunthorpe vfio_unregister_iommu_driver(&vfio_noiommu_ops); 21620f3e72b5SJason Gunthorpe #endif 21630f3e72b5SJason Gunthorpe ida_destroy(&vfio.group_ida); 21640f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 21650f3e72b5SJason Gunthorpe class_destroy(vfio.class); 21660f3e72b5SJason Gunthorpe vfio.class = NULL; 21670f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 21680f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 21690f3e72b5SJason Gunthorpe } 21700f3e72b5SJason Gunthorpe 21710f3e72b5SJason Gunthorpe module_init(vfio_init); 21720f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 21730f3e72b5SJason Gunthorpe 21740f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 21750f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 21760f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 21770f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 21780f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR); 21790f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio"); 21800f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 2181