10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 20f3e72b5SJason Gunthorpe /* 30f3e72b5SJason Gunthorpe * VFIO core 40f3e72b5SJason Gunthorpe * 50f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 60f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 70f3e72b5SJason Gunthorpe * 80f3e72b5SJason Gunthorpe * Derived from original vfio: 90f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 100f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 110f3e72b5SJason Gunthorpe */ 120f3e72b5SJason Gunthorpe 130f3e72b5SJason Gunthorpe #include <linux/cdev.h> 140f3e72b5SJason Gunthorpe #include <linux/compat.h> 150f3e72b5SJason Gunthorpe #include <linux/device.h> 160f3e72b5SJason Gunthorpe #include <linux/file.h> 170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h> 180f3e72b5SJason Gunthorpe #include <linux/fs.h> 190f3e72b5SJason Gunthorpe #include <linux/idr.h> 200f3e72b5SJason Gunthorpe #include <linux/iommu.h> 210f3e72b5SJason Gunthorpe #include <linux/list.h> 220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 230f3e72b5SJason Gunthorpe #include <linux/module.h> 240f3e72b5SJason Gunthorpe #include <linux/mutex.h> 250f3e72b5SJason Gunthorpe #include <linux/pci.h> 260f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 270f3e72b5SJason Gunthorpe #include <linux/sched.h> 280f3e72b5SJason Gunthorpe #include <linux/slab.h> 290f3e72b5SJason Gunthorpe #include <linux/stat.h> 300f3e72b5SJason Gunthorpe #include <linux/string.h> 310f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 320f3e72b5SJason Gunthorpe #include <linux/vfio.h> 330f3e72b5SJason Gunthorpe #include <linux/wait.h> 340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 350f3e72b5SJason Gunthorpe #include "vfio.h" 360f3e72b5SJason Gunthorpe 370f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 380f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 390f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 400f3e72b5SJason Gunthorpe 410f3e72b5SJason Gunthorpe static struct vfio { 420f3e72b5SJason Gunthorpe struct class *class; 430f3e72b5SJason Gunthorpe struct list_head iommu_drivers_list; 440f3e72b5SJason Gunthorpe struct mutex iommu_drivers_lock; 450f3e72b5SJason Gunthorpe struct list_head group_list; 460f3e72b5SJason Gunthorpe struct mutex group_lock; /* locks group_list */ 470f3e72b5SJason Gunthorpe struct ida group_ida; 480f3e72b5SJason Gunthorpe dev_t group_devt; 490f3e72b5SJason Gunthorpe } vfio; 500f3e72b5SJason Gunthorpe 510f3e72b5SJason Gunthorpe struct vfio_iommu_driver { 520f3e72b5SJason Gunthorpe const struct vfio_iommu_driver_ops *ops; 530f3e72b5SJason Gunthorpe struct list_head vfio_next; 540f3e72b5SJason Gunthorpe }; 550f3e72b5SJason Gunthorpe 560f3e72b5SJason Gunthorpe struct vfio_container { 570f3e72b5SJason Gunthorpe struct kref kref; 580f3e72b5SJason Gunthorpe struct list_head group_list; 590f3e72b5SJason Gunthorpe struct rw_semaphore group_lock; 600f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 610f3e72b5SJason Gunthorpe void *iommu_data; 620f3e72b5SJason Gunthorpe bool noiommu; 630f3e72b5SJason Gunthorpe }; 640f3e72b5SJason Gunthorpe 650f3e72b5SJason Gunthorpe struct vfio_group { 660f3e72b5SJason Gunthorpe struct device dev; 670f3e72b5SJason Gunthorpe struct cdev cdev; 680f3e72b5SJason Gunthorpe refcount_t users; 690f3e72b5SJason Gunthorpe unsigned int container_users; 700f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 710f3e72b5SJason Gunthorpe struct vfio_container *container; 720f3e72b5SJason Gunthorpe struct list_head device_list; 730f3e72b5SJason Gunthorpe struct mutex device_lock; 740f3e72b5SJason Gunthorpe struct list_head vfio_next; 750f3e72b5SJason Gunthorpe struct list_head container_next; 760f3e72b5SJason Gunthorpe enum vfio_group_type type; 770f3e72b5SJason Gunthorpe unsigned int dev_counter; 780f3e72b5SJason Gunthorpe struct rw_semaphore group_rwsem; 790f3e72b5SJason Gunthorpe struct kvm *kvm; 800f3e72b5SJason Gunthorpe struct file *opened_file; 810f3e72b5SJason Gunthorpe struct blocking_notifier_head notifier; 820f3e72b5SJason Gunthorpe }; 830f3e72b5SJason Gunthorpe 840f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 850f3e72b5SJason Gunthorpe static bool noiommu __read_mostly; 860f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode, 870f3e72b5SJason Gunthorpe noiommu, bool, S_IRUGO | S_IWUSR); 880f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 890f3e72b5SJason Gunthorpe #endif 900f3e72b5SJason Gunthorpe 910f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 920f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops; 930f3e72b5SJason Gunthorpe 940f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 950f3e72b5SJason Gunthorpe { 960f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 970f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 980f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 990f3e72b5SJason Gunthorpe 1000f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 1010f3e72b5SJason Gunthorpe return -EINVAL; 1020f3e72b5SJason Gunthorpe 1030f3e72b5SJason Gunthorpe /* 1040f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 1050f3e72b5SJason Gunthorpe */ 1060f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1070f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 1080f3e72b5SJason Gunthorpe if (dev_set) 1090f3e72b5SJason Gunthorpe goto found_get_ref; 1100f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1110f3e72b5SJason Gunthorpe 1120f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 1130f3e72b5SJason Gunthorpe if (!new_dev_set) 1140f3e72b5SJason Gunthorpe return -ENOMEM; 1150f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 1160f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 1170f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 1180f3e72b5SJason Gunthorpe 1190f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1200f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 1210f3e72b5SJason Gunthorpe GFP_KERNEL); 1220f3e72b5SJason Gunthorpe if (!dev_set) { 1230f3e72b5SJason Gunthorpe dev_set = new_dev_set; 1240f3e72b5SJason Gunthorpe goto found_get_ref; 1250f3e72b5SJason Gunthorpe } 1260f3e72b5SJason Gunthorpe 1270f3e72b5SJason Gunthorpe kfree(new_dev_set); 1280f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 1290f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1300f3e72b5SJason Gunthorpe return xa_err(dev_set); 1310f3e72b5SJason Gunthorpe } 1320f3e72b5SJason Gunthorpe 1330f3e72b5SJason Gunthorpe found_get_ref: 1340f3e72b5SJason Gunthorpe dev_set->device_count++; 1350f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1360f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1370f3e72b5SJason Gunthorpe device->dev_set = dev_set; 1380f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 1390f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1400f3e72b5SJason Gunthorpe return 0; 1410f3e72b5SJason Gunthorpe } 1420f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 1430f3e72b5SJason Gunthorpe 1440f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 1450f3e72b5SJason Gunthorpe { 1460f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 1470f3e72b5SJason Gunthorpe 1480f3e72b5SJason Gunthorpe if (!dev_set) 1490f3e72b5SJason Gunthorpe return; 1500f3e72b5SJason Gunthorpe 1510f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1520f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 1530f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1540f3e72b5SJason Gunthorpe 1550f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1560f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 1570f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 1580f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 1590f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 1600f3e72b5SJason Gunthorpe kfree(dev_set); 1610f3e72b5SJason Gunthorpe } 1620f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1630f3e72b5SJason Gunthorpe } 1640f3e72b5SJason Gunthorpe 1650f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 1660f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg) 1670f3e72b5SJason Gunthorpe { 1680f3e72b5SJason Gunthorpe if (arg != VFIO_NOIOMMU_IOMMU) 1690f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 1700f3e72b5SJason Gunthorpe if (!capable(CAP_SYS_RAWIO)) 1710f3e72b5SJason Gunthorpe return ERR_PTR(-EPERM); 1720f3e72b5SJason Gunthorpe 1730f3e72b5SJason Gunthorpe return NULL; 1740f3e72b5SJason Gunthorpe } 1750f3e72b5SJason Gunthorpe 1760f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data) 1770f3e72b5SJason Gunthorpe { 1780f3e72b5SJason Gunthorpe } 1790f3e72b5SJason Gunthorpe 1800f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data, 1810f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 1820f3e72b5SJason Gunthorpe { 1830f3e72b5SJason Gunthorpe if (cmd == VFIO_CHECK_EXTENSION) 1840f3e72b5SJason Gunthorpe return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 1850f3e72b5SJason Gunthorpe 1860f3e72b5SJason Gunthorpe return -ENOTTY; 1870f3e72b5SJason Gunthorpe } 1880f3e72b5SJason Gunthorpe 1890f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data, 1900f3e72b5SJason Gunthorpe struct iommu_group *iommu_group, enum vfio_group_type type) 1910f3e72b5SJason Gunthorpe { 1920f3e72b5SJason Gunthorpe return 0; 1930f3e72b5SJason Gunthorpe } 1940f3e72b5SJason Gunthorpe 1950f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data, 1960f3e72b5SJason Gunthorpe struct iommu_group *iommu_group) 1970f3e72b5SJason Gunthorpe { 1980f3e72b5SJason Gunthorpe } 1990f3e72b5SJason Gunthorpe 2000f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 2010f3e72b5SJason Gunthorpe .name = "vfio-noiommu", 2020f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 2030f3e72b5SJason Gunthorpe .open = vfio_noiommu_open, 2040f3e72b5SJason Gunthorpe .release = vfio_noiommu_release, 2050f3e72b5SJason Gunthorpe .ioctl = vfio_noiommu_ioctl, 2060f3e72b5SJason Gunthorpe .attach_group = vfio_noiommu_attach_group, 2070f3e72b5SJason Gunthorpe .detach_group = vfio_noiommu_detach_group, 2080f3e72b5SJason Gunthorpe }; 2090f3e72b5SJason Gunthorpe 2100f3e72b5SJason Gunthorpe /* 2110f3e72b5SJason Gunthorpe * Only noiommu containers can use vfio-noiommu and noiommu containers can only 2120f3e72b5SJason Gunthorpe * use vfio-noiommu. 2130f3e72b5SJason Gunthorpe */ 2140f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 2150f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 2160f3e72b5SJason Gunthorpe { 2170f3e72b5SJason Gunthorpe return container->noiommu == (driver->ops == &vfio_noiommu_ops); 2180f3e72b5SJason Gunthorpe } 2190f3e72b5SJason Gunthorpe #else 2200f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 2210f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 2220f3e72b5SJason Gunthorpe { 2230f3e72b5SJason Gunthorpe return true; 2240f3e72b5SJason Gunthorpe } 2250f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */ 2260f3e72b5SJason Gunthorpe 2270f3e72b5SJason Gunthorpe /* 2280f3e72b5SJason Gunthorpe * IOMMU driver registration 2290f3e72b5SJason Gunthorpe */ 2300f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 2310f3e72b5SJason Gunthorpe { 2320f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, *tmp; 2330f3e72b5SJason Gunthorpe 2340f3e72b5SJason Gunthorpe if (WARN_ON(!ops->register_device != !ops->unregister_device)) 2350f3e72b5SJason Gunthorpe return -EINVAL; 2360f3e72b5SJason Gunthorpe 2370f3e72b5SJason Gunthorpe driver = kzalloc(sizeof(*driver), GFP_KERNEL); 2380f3e72b5SJason Gunthorpe if (!driver) 2390f3e72b5SJason Gunthorpe return -ENOMEM; 2400f3e72b5SJason Gunthorpe 2410f3e72b5SJason Gunthorpe driver->ops = ops; 2420f3e72b5SJason Gunthorpe 2430f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 2440f3e72b5SJason Gunthorpe 2450f3e72b5SJason Gunthorpe /* Check for duplicates */ 2460f3e72b5SJason Gunthorpe list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 2470f3e72b5SJason Gunthorpe if (tmp->ops == ops) { 2480f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2490f3e72b5SJason Gunthorpe kfree(driver); 2500f3e72b5SJason Gunthorpe return -EINVAL; 2510f3e72b5SJason Gunthorpe } 2520f3e72b5SJason Gunthorpe } 2530f3e72b5SJason Gunthorpe 2540f3e72b5SJason Gunthorpe list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 2550f3e72b5SJason Gunthorpe 2560f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2570f3e72b5SJason Gunthorpe 2580f3e72b5SJason Gunthorpe return 0; 2590f3e72b5SJason Gunthorpe } 2600f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 2610f3e72b5SJason Gunthorpe 2620f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 2630f3e72b5SJason Gunthorpe { 2640f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 2650f3e72b5SJason Gunthorpe 2660f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 2670f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 2680f3e72b5SJason Gunthorpe if (driver->ops == ops) { 2690f3e72b5SJason Gunthorpe list_del(&driver->vfio_next); 2700f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2710f3e72b5SJason Gunthorpe kfree(driver); 2720f3e72b5SJason Gunthorpe return; 2730f3e72b5SJason Gunthorpe } 2740f3e72b5SJason Gunthorpe } 2750f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2760f3e72b5SJason Gunthorpe } 2770f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 2780f3e72b5SJason Gunthorpe 2790f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group); 2800f3e72b5SJason Gunthorpe 2810f3e72b5SJason Gunthorpe /* 2820f3e72b5SJason Gunthorpe * Container objects - containers are created when /dev/vfio/vfio is 2830f3e72b5SJason Gunthorpe * opened, but their lifecycle extends until the last user is done, so 2840f3e72b5SJason Gunthorpe * it's freed via kref. Must support container/group/device being 2850f3e72b5SJason Gunthorpe * closed in any order. 2860f3e72b5SJason Gunthorpe */ 2870f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container) 2880f3e72b5SJason Gunthorpe { 2890f3e72b5SJason Gunthorpe kref_get(&container->kref); 2900f3e72b5SJason Gunthorpe } 2910f3e72b5SJason Gunthorpe 2920f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref) 2930f3e72b5SJason Gunthorpe { 2940f3e72b5SJason Gunthorpe struct vfio_container *container; 2950f3e72b5SJason Gunthorpe container = container_of(kref, struct vfio_container, kref); 2960f3e72b5SJason Gunthorpe 2970f3e72b5SJason Gunthorpe kfree(container); 2980f3e72b5SJason Gunthorpe } 2990f3e72b5SJason Gunthorpe 3000f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container) 3010f3e72b5SJason Gunthorpe { 3020f3e72b5SJason Gunthorpe kref_put(&container->kref, vfio_container_release); 3030f3e72b5SJason Gunthorpe } 3040f3e72b5SJason Gunthorpe 3050f3e72b5SJason Gunthorpe /* 3060f3e72b5SJason Gunthorpe * Group objects - create, release, get, put, search 3070f3e72b5SJason Gunthorpe */ 3080f3e72b5SJason Gunthorpe static struct vfio_group * 3090f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 3100f3e72b5SJason Gunthorpe { 3110f3e72b5SJason Gunthorpe struct vfio_group *group; 3120f3e72b5SJason Gunthorpe 3130f3e72b5SJason Gunthorpe list_for_each_entry(group, &vfio.group_list, vfio_next) { 3140f3e72b5SJason Gunthorpe if (group->iommu_group == iommu_group) { 3150f3e72b5SJason Gunthorpe vfio_group_get(group); 3160f3e72b5SJason Gunthorpe return group; 3170f3e72b5SJason Gunthorpe } 3180f3e72b5SJason Gunthorpe } 3190f3e72b5SJason Gunthorpe return NULL; 3200f3e72b5SJason Gunthorpe } 3210f3e72b5SJason Gunthorpe 3220f3e72b5SJason Gunthorpe static struct vfio_group * 3230f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group) 3240f3e72b5SJason Gunthorpe { 3250f3e72b5SJason Gunthorpe struct vfio_group *group; 3260f3e72b5SJason Gunthorpe 3270f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 3280f3e72b5SJason Gunthorpe group = __vfio_group_get_from_iommu(iommu_group); 3290f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 3300f3e72b5SJason Gunthorpe return group; 3310f3e72b5SJason Gunthorpe } 3320f3e72b5SJason Gunthorpe 3330f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev) 3340f3e72b5SJason Gunthorpe { 3350f3e72b5SJason Gunthorpe struct vfio_group *group = container_of(dev, struct vfio_group, dev); 3360f3e72b5SJason Gunthorpe 3370f3e72b5SJason Gunthorpe mutex_destroy(&group->device_lock); 3380f3e72b5SJason Gunthorpe iommu_group_put(group->iommu_group); 3390f3e72b5SJason Gunthorpe ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 3400f3e72b5SJason Gunthorpe kfree(group); 3410f3e72b5SJason Gunthorpe } 3420f3e72b5SJason Gunthorpe 3430f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 3440f3e72b5SJason Gunthorpe enum vfio_group_type type) 3450f3e72b5SJason Gunthorpe { 3460f3e72b5SJason Gunthorpe struct vfio_group *group; 3470f3e72b5SJason Gunthorpe int minor; 3480f3e72b5SJason Gunthorpe 3490f3e72b5SJason Gunthorpe group = kzalloc(sizeof(*group), GFP_KERNEL); 3500f3e72b5SJason Gunthorpe if (!group) 3510f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 3520f3e72b5SJason Gunthorpe 3530f3e72b5SJason Gunthorpe minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 3540f3e72b5SJason Gunthorpe if (minor < 0) { 3550f3e72b5SJason Gunthorpe kfree(group); 3560f3e72b5SJason Gunthorpe return ERR_PTR(minor); 3570f3e72b5SJason Gunthorpe } 3580f3e72b5SJason Gunthorpe 3590f3e72b5SJason Gunthorpe device_initialize(&group->dev); 3600f3e72b5SJason Gunthorpe group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 3610f3e72b5SJason Gunthorpe group->dev.class = vfio.class; 3620f3e72b5SJason Gunthorpe group->dev.release = vfio_group_release; 3630f3e72b5SJason Gunthorpe cdev_init(&group->cdev, &vfio_group_fops); 3640f3e72b5SJason Gunthorpe group->cdev.owner = THIS_MODULE; 3650f3e72b5SJason Gunthorpe 3660f3e72b5SJason Gunthorpe refcount_set(&group->users, 1); 3670f3e72b5SJason Gunthorpe init_rwsem(&group->group_rwsem); 3680f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&group->device_list); 3690f3e72b5SJason Gunthorpe mutex_init(&group->device_lock); 3700f3e72b5SJason Gunthorpe group->iommu_group = iommu_group; 3710f3e72b5SJason Gunthorpe /* put in vfio_group_release() */ 3720f3e72b5SJason Gunthorpe iommu_group_ref_get(iommu_group); 3730f3e72b5SJason Gunthorpe group->type = type; 3740f3e72b5SJason Gunthorpe BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 3750f3e72b5SJason Gunthorpe 3760f3e72b5SJason Gunthorpe return group; 3770f3e72b5SJason Gunthorpe } 3780f3e72b5SJason Gunthorpe 3790f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 3800f3e72b5SJason Gunthorpe enum vfio_group_type type) 3810f3e72b5SJason Gunthorpe { 3820f3e72b5SJason Gunthorpe struct vfio_group *group; 3830f3e72b5SJason Gunthorpe struct vfio_group *ret; 3840f3e72b5SJason Gunthorpe int err; 3850f3e72b5SJason Gunthorpe 3860f3e72b5SJason Gunthorpe group = vfio_group_alloc(iommu_group, type); 3870f3e72b5SJason Gunthorpe if (IS_ERR(group)) 3880f3e72b5SJason Gunthorpe return group; 3890f3e72b5SJason Gunthorpe 3900f3e72b5SJason Gunthorpe err = dev_set_name(&group->dev, "%s%d", 3910f3e72b5SJason Gunthorpe group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 3920f3e72b5SJason Gunthorpe iommu_group_id(iommu_group)); 3930f3e72b5SJason Gunthorpe if (err) { 3940f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 3950f3e72b5SJason Gunthorpe goto err_put; 3960f3e72b5SJason Gunthorpe } 3970f3e72b5SJason Gunthorpe 3980f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 3990f3e72b5SJason Gunthorpe 4000f3e72b5SJason Gunthorpe /* Did we race creating this group? */ 4010f3e72b5SJason Gunthorpe ret = __vfio_group_get_from_iommu(iommu_group); 4020f3e72b5SJason Gunthorpe if (ret) 4030f3e72b5SJason Gunthorpe goto err_unlock; 4040f3e72b5SJason Gunthorpe 4050f3e72b5SJason Gunthorpe err = cdev_device_add(&group->cdev, &group->dev); 4060f3e72b5SJason Gunthorpe if (err) { 4070f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 4080f3e72b5SJason Gunthorpe goto err_unlock; 4090f3e72b5SJason Gunthorpe } 4100f3e72b5SJason Gunthorpe 4110f3e72b5SJason Gunthorpe list_add(&group->vfio_next, &vfio.group_list); 4120f3e72b5SJason Gunthorpe 4130f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4140f3e72b5SJason Gunthorpe return group; 4150f3e72b5SJason Gunthorpe 4160f3e72b5SJason Gunthorpe err_unlock: 4170f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4180f3e72b5SJason Gunthorpe err_put: 4190f3e72b5SJason Gunthorpe put_device(&group->dev); 4200f3e72b5SJason Gunthorpe return ret; 4210f3e72b5SJason Gunthorpe } 4220f3e72b5SJason Gunthorpe 4230f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group) 4240f3e72b5SJason Gunthorpe { 4250f3e72b5SJason Gunthorpe if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) 4260f3e72b5SJason Gunthorpe return; 4270f3e72b5SJason Gunthorpe 4280f3e72b5SJason Gunthorpe /* 4290f3e72b5SJason Gunthorpe * These data structures all have paired operations that can only be 4300f3e72b5SJason Gunthorpe * undone when the caller holds a live reference on the group. Since all 4310f3e72b5SJason Gunthorpe * pairs must be undone these WARN_ON's indicate some caller did not 4320f3e72b5SJason Gunthorpe * properly hold the group reference. 4330f3e72b5SJason Gunthorpe */ 4340f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&group->device_list)); 4350f3e72b5SJason Gunthorpe WARN_ON(group->container || group->container_users); 4360f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 4370f3e72b5SJason Gunthorpe 4380f3e72b5SJason Gunthorpe list_del(&group->vfio_next); 4390f3e72b5SJason Gunthorpe cdev_device_del(&group->cdev, &group->dev); 4400f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4410f3e72b5SJason Gunthorpe 4420f3e72b5SJason Gunthorpe put_device(&group->dev); 4430f3e72b5SJason Gunthorpe } 4440f3e72b5SJason Gunthorpe 4450f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group) 4460f3e72b5SJason Gunthorpe { 4470f3e72b5SJason Gunthorpe refcount_inc(&group->users); 4480f3e72b5SJason Gunthorpe } 4490f3e72b5SJason Gunthorpe 4500f3e72b5SJason Gunthorpe /* 4510f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 4520f3e72b5SJason Gunthorpe */ 4530f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 4540f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device) 4550f3e72b5SJason Gunthorpe { 4560f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 4570f3e72b5SJason Gunthorpe complete(&device->comp); 4580f3e72b5SJason Gunthorpe } 4590f3e72b5SJason Gunthorpe 4600f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device) 4610f3e72b5SJason Gunthorpe { 4620f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 4630f3e72b5SJason Gunthorpe } 4640f3e72b5SJason Gunthorpe 4650f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 4660f3e72b5SJason Gunthorpe struct device *dev) 4670f3e72b5SJason Gunthorpe { 4680f3e72b5SJason Gunthorpe struct vfio_device *device; 4690f3e72b5SJason Gunthorpe 4700f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 4710f3e72b5SJason Gunthorpe list_for_each_entry(device, &group->device_list, group_next) { 4720f3e72b5SJason Gunthorpe if (device->dev == dev && vfio_device_try_get(device)) { 4730f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 4740f3e72b5SJason Gunthorpe return device; 4750f3e72b5SJason Gunthorpe } 4760f3e72b5SJason Gunthorpe } 4770f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 4780f3e72b5SJason Gunthorpe return NULL; 4790f3e72b5SJason Gunthorpe } 4800f3e72b5SJason Gunthorpe 4810f3e72b5SJason Gunthorpe /* 4820f3e72b5SJason Gunthorpe * VFIO driver API 4830f3e72b5SJason Gunthorpe */ 4840f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev, 4850f3e72b5SJason Gunthorpe const struct vfio_device_ops *ops) 4860f3e72b5SJason Gunthorpe { 4870f3e72b5SJason Gunthorpe init_completion(&device->comp); 4880f3e72b5SJason Gunthorpe device->dev = dev; 4890f3e72b5SJason Gunthorpe device->ops = ops; 4900f3e72b5SJason Gunthorpe } 4910f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev); 4920f3e72b5SJason Gunthorpe 4930f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device) 4940f3e72b5SJason Gunthorpe { 4950f3e72b5SJason Gunthorpe vfio_release_device_set(device); 4960f3e72b5SJason Gunthorpe } 4970f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); 4980f3e72b5SJason Gunthorpe 4990f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 5000f3e72b5SJason Gunthorpe enum vfio_group_type type) 5010f3e72b5SJason Gunthorpe { 5020f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 5030f3e72b5SJason Gunthorpe struct vfio_group *group; 5040f3e72b5SJason Gunthorpe int ret; 5050f3e72b5SJason Gunthorpe 5060f3e72b5SJason Gunthorpe iommu_group = iommu_group_alloc(); 5070f3e72b5SJason Gunthorpe if (IS_ERR(iommu_group)) 5080f3e72b5SJason Gunthorpe return ERR_CAST(iommu_group); 5090f3e72b5SJason Gunthorpe 5100f3e72b5SJason Gunthorpe ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 5110f3e72b5SJason Gunthorpe if (ret) 5120f3e72b5SJason Gunthorpe goto out_put_group; 5130f3e72b5SJason Gunthorpe ret = iommu_group_add_device(iommu_group, dev); 5140f3e72b5SJason Gunthorpe if (ret) 5150f3e72b5SJason Gunthorpe goto out_put_group; 5160f3e72b5SJason Gunthorpe 5170f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, type); 5180f3e72b5SJason Gunthorpe if (IS_ERR(group)) { 5190f3e72b5SJason Gunthorpe ret = PTR_ERR(group); 5200f3e72b5SJason Gunthorpe goto out_remove_device; 5210f3e72b5SJason Gunthorpe } 5220f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5230f3e72b5SJason Gunthorpe return group; 5240f3e72b5SJason Gunthorpe 5250f3e72b5SJason Gunthorpe out_remove_device: 5260f3e72b5SJason Gunthorpe iommu_group_remove_device(dev); 5270f3e72b5SJason Gunthorpe out_put_group: 5280f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5290f3e72b5SJason Gunthorpe return ERR_PTR(ret); 5300f3e72b5SJason Gunthorpe } 5310f3e72b5SJason Gunthorpe 5320f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 5330f3e72b5SJason Gunthorpe { 5340f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 5350f3e72b5SJason Gunthorpe struct vfio_group *group; 5360f3e72b5SJason Gunthorpe 5370f3e72b5SJason Gunthorpe iommu_group = iommu_group_get(dev); 5380f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 5390f3e72b5SJason Gunthorpe if (!iommu_group && noiommu) { 5400f3e72b5SJason Gunthorpe /* 5410f3e72b5SJason Gunthorpe * With noiommu enabled, create an IOMMU group for devices that 5420f3e72b5SJason Gunthorpe * don't already have one, implying no IOMMU hardware/driver 5430f3e72b5SJason Gunthorpe * exists. Taint the kernel because we're about to give a DMA 5440f3e72b5SJason Gunthorpe * capable device to a user without IOMMU protection. 5450f3e72b5SJason Gunthorpe */ 5460f3e72b5SJason Gunthorpe group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 5470f3e72b5SJason Gunthorpe if (!IS_ERR(group)) { 5480f3e72b5SJason Gunthorpe add_taint(TAINT_USER, LOCKDEP_STILL_OK); 5490f3e72b5SJason Gunthorpe dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 5500f3e72b5SJason Gunthorpe } 5510f3e72b5SJason Gunthorpe return group; 5520f3e72b5SJason Gunthorpe } 5530f3e72b5SJason Gunthorpe #endif 5540f3e72b5SJason Gunthorpe if (!iommu_group) 5550f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 5560f3e72b5SJason Gunthorpe 5570f3e72b5SJason Gunthorpe /* 5580f3e72b5SJason Gunthorpe * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 5590f3e72b5SJason Gunthorpe * restore cache coherency. It has to be checked here because it is only 5600f3e72b5SJason Gunthorpe * valid for cases where we are using iommu groups. 5610f3e72b5SJason Gunthorpe */ 5620f3e72b5SJason Gunthorpe if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 5630f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5640f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 5650f3e72b5SJason Gunthorpe } 5660f3e72b5SJason Gunthorpe 5670f3e72b5SJason Gunthorpe group = vfio_group_get_from_iommu(iommu_group); 5680f3e72b5SJason Gunthorpe if (!group) 5690f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, VFIO_IOMMU); 5700f3e72b5SJason Gunthorpe 5710f3e72b5SJason Gunthorpe /* The vfio_group holds a reference to the iommu_group */ 5720f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5730f3e72b5SJason Gunthorpe return group; 5740f3e72b5SJason Gunthorpe } 5750f3e72b5SJason Gunthorpe 5760f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device, 5770f3e72b5SJason Gunthorpe struct vfio_group *group) 5780f3e72b5SJason Gunthorpe { 5790f3e72b5SJason Gunthorpe struct vfio_device *existing_device; 5800f3e72b5SJason Gunthorpe 5810f3e72b5SJason Gunthorpe if (IS_ERR(group)) 5820f3e72b5SJason Gunthorpe return PTR_ERR(group); 5830f3e72b5SJason Gunthorpe 5840f3e72b5SJason Gunthorpe /* 5850f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 5860f3e72b5SJason Gunthorpe * singleton set just for itself. 5870f3e72b5SJason Gunthorpe */ 5880f3e72b5SJason Gunthorpe if (!device->dev_set) 5890f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 5900f3e72b5SJason Gunthorpe 5910f3e72b5SJason Gunthorpe existing_device = vfio_group_get_device(group, device->dev); 5920f3e72b5SJason Gunthorpe if (existing_device) { 5930f3e72b5SJason Gunthorpe dev_WARN(device->dev, "Device already exists on group %d\n", 5940f3e72b5SJason Gunthorpe iommu_group_id(group->iommu_group)); 5950f3e72b5SJason Gunthorpe vfio_device_put(existing_device); 5960f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || 5970f3e72b5SJason Gunthorpe group->type == VFIO_EMULATED_IOMMU) 5980f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 5990f3e72b5SJason Gunthorpe vfio_group_put(group); 6000f3e72b5SJason Gunthorpe return -EBUSY; 6010f3e72b5SJason Gunthorpe } 6020f3e72b5SJason Gunthorpe 6030f3e72b5SJason Gunthorpe /* Our reference on group is moved to the device */ 6040f3e72b5SJason Gunthorpe device->group = group; 6050f3e72b5SJason Gunthorpe 6060f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 6070f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 6080f3e72b5SJason Gunthorpe 6090f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6100f3e72b5SJason Gunthorpe list_add(&device->group_next, &group->device_list); 6110f3e72b5SJason Gunthorpe group->dev_counter++; 6120f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6130f3e72b5SJason Gunthorpe 6140f3e72b5SJason Gunthorpe return 0; 6150f3e72b5SJason Gunthorpe } 6160f3e72b5SJason Gunthorpe 6170f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 6180f3e72b5SJason Gunthorpe { 6190f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 6200f3e72b5SJason Gunthorpe vfio_group_find_or_alloc(device->dev)); 6210f3e72b5SJason Gunthorpe } 6220f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 6230f3e72b5SJason Gunthorpe 6240f3e72b5SJason Gunthorpe /* 6250f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 6260f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 6270f3e72b5SJason Gunthorpe */ 6280f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 6290f3e72b5SJason Gunthorpe { 6300f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 6310f3e72b5SJason Gunthorpe vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 6320f3e72b5SJason Gunthorpe } 6330f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 6340f3e72b5SJason Gunthorpe 6350f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 6360f3e72b5SJason Gunthorpe char *buf) 6370f3e72b5SJason Gunthorpe { 6380f3e72b5SJason Gunthorpe struct vfio_device *it, *device = ERR_PTR(-ENODEV); 6390f3e72b5SJason Gunthorpe 6400f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6410f3e72b5SJason Gunthorpe list_for_each_entry(it, &group->device_list, group_next) { 6420f3e72b5SJason Gunthorpe int ret; 6430f3e72b5SJason Gunthorpe 6440f3e72b5SJason Gunthorpe if (it->ops->match) { 6450f3e72b5SJason Gunthorpe ret = it->ops->match(it, buf); 6460f3e72b5SJason Gunthorpe if (ret < 0) { 6470f3e72b5SJason Gunthorpe device = ERR_PTR(ret); 6480f3e72b5SJason Gunthorpe break; 6490f3e72b5SJason Gunthorpe } 6500f3e72b5SJason Gunthorpe } else { 6510f3e72b5SJason Gunthorpe ret = !strcmp(dev_name(it->dev), buf); 6520f3e72b5SJason Gunthorpe } 6530f3e72b5SJason Gunthorpe 6540f3e72b5SJason Gunthorpe if (ret && vfio_device_try_get(it)) { 6550f3e72b5SJason Gunthorpe device = it; 6560f3e72b5SJason Gunthorpe break; 6570f3e72b5SJason Gunthorpe } 6580f3e72b5SJason Gunthorpe } 6590f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6600f3e72b5SJason Gunthorpe 6610f3e72b5SJason Gunthorpe return device; 6620f3e72b5SJason Gunthorpe } 6630f3e72b5SJason Gunthorpe 6640f3e72b5SJason Gunthorpe /* 6650f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 6660f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 6670f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 6680f3e72b5SJason Gunthorpe { 6690f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 6700f3e72b5SJason Gunthorpe unsigned int i = 0; 6710f3e72b5SJason Gunthorpe bool interrupted = false; 6720f3e72b5SJason Gunthorpe long rc; 6730f3e72b5SJason Gunthorpe 6740f3e72b5SJason Gunthorpe vfio_device_put(device); 6750f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 6760f3e72b5SJason Gunthorpe while (rc <= 0) { 6770f3e72b5SJason Gunthorpe if (device->ops->request) 6780f3e72b5SJason Gunthorpe device->ops->request(device, i++); 6790f3e72b5SJason Gunthorpe 6800f3e72b5SJason Gunthorpe if (interrupted) { 6810f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 6820f3e72b5SJason Gunthorpe HZ * 10); 6830f3e72b5SJason Gunthorpe } else { 6840f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 6850f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 6860f3e72b5SJason Gunthorpe if (rc < 0) { 6870f3e72b5SJason Gunthorpe interrupted = true; 6880f3e72b5SJason Gunthorpe dev_warn(device->dev, 6890f3e72b5SJason Gunthorpe "Device is currently in use, task" 6900f3e72b5SJason Gunthorpe " \"%s\" (%d) " 6910f3e72b5SJason Gunthorpe "blocked until device is released", 6920f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 6930f3e72b5SJason Gunthorpe } 6940f3e72b5SJason Gunthorpe } 6950f3e72b5SJason Gunthorpe } 6960f3e72b5SJason Gunthorpe 6970f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6980f3e72b5SJason Gunthorpe list_del(&device->group_next); 6990f3e72b5SJason Gunthorpe group->dev_counter--; 7000f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 7010f3e72b5SJason Gunthorpe 7020f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 7030f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 7040f3e72b5SJason Gunthorpe 7050f3e72b5SJason Gunthorpe /* Matches the get in vfio_register_group_dev() */ 7060f3e72b5SJason Gunthorpe vfio_group_put(group); 7070f3e72b5SJason Gunthorpe } 7080f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 7090f3e72b5SJason Gunthorpe 7100f3e72b5SJason Gunthorpe /* 7110f3e72b5SJason Gunthorpe * VFIO base fd, /dev/vfio/vfio 7120f3e72b5SJason Gunthorpe */ 7130f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container, 7140f3e72b5SJason Gunthorpe unsigned long arg) 7150f3e72b5SJason Gunthorpe { 7160f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 7170f3e72b5SJason Gunthorpe long ret = 0; 7180f3e72b5SJason Gunthorpe 7190f3e72b5SJason Gunthorpe down_read(&container->group_lock); 7200f3e72b5SJason Gunthorpe 7210f3e72b5SJason Gunthorpe driver = container->iommu_driver; 7220f3e72b5SJason Gunthorpe 7230f3e72b5SJason Gunthorpe switch (arg) { 7240f3e72b5SJason Gunthorpe /* No base extensions yet */ 7250f3e72b5SJason Gunthorpe default: 7260f3e72b5SJason Gunthorpe /* 7270f3e72b5SJason Gunthorpe * If no driver is set, poll all registered drivers for 7280f3e72b5SJason Gunthorpe * extensions and return the first positive result. If 7290f3e72b5SJason Gunthorpe * a driver is already set, further queries will be passed 7300f3e72b5SJason Gunthorpe * only to that driver. 7310f3e72b5SJason Gunthorpe */ 7320f3e72b5SJason Gunthorpe if (!driver) { 7330f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 7340f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, 7350f3e72b5SJason Gunthorpe vfio_next) { 7360f3e72b5SJason Gunthorpe 7370f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 7380f3e72b5SJason Gunthorpe !vfio_iommu_driver_allowed(container, 7390f3e72b5SJason Gunthorpe driver)) 7400f3e72b5SJason Gunthorpe continue; 7410f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 7420f3e72b5SJason Gunthorpe continue; 7430f3e72b5SJason Gunthorpe 7440f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(NULL, 7450f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, 7460f3e72b5SJason Gunthorpe arg); 7470f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 7480f3e72b5SJason Gunthorpe if (ret > 0) 7490f3e72b5SJason Gunthorpe break; 7500f3e72b5SJason Gunthorpe } 7510f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 7520f3e72b5SJason Gunthorpe } else 7530f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(container->iommu_data, 7540f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, arg); 7550f3e72b5SJason Gunthorpe } 7560f3e72b5SJason Gunthorpe 7570f3e72b5SJason Gunthorpe up_read(&container->group_lock); 7580f3e72b5SJason Gunthorpe 7590f3e72b5SJason Gunthorpe return ret; 7600f3e72b5SJason Gunthorpe } 7610f3e72b5SJason Gunthorpe 7620f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */ 7630f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container, 7640f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, 7650f3e72b5SJason Gunthorpe void *data) 7660f3e72b5SJason Gunthorpe { 7670f3e72b5SJason Gunthorpe struct vfio_group *group; 7680f3e72b5SJason Gunthorpe int ret = -ENODEV; 7690f3e72b5SJason Gunthorpe 7700f3e72b5SJason Gunthorpe list_for_each_entry(group, &container->group_list, container_next) { 7710f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(data, group->iommu_group, 7720f3e72b5SJason Gunthorpe group->type); 7730f3e72b5SJason Gunthorpe if (ret) 7740f3e72b5SJason Gunthorpe goto unwind; 7750f3e72b5SJason Gunthorpe } 7760f3e72b5SJason Gunthorpe 7770f3e72b5SJason Gunthorpe return ret; 7780f3e72b5SJason Gunthorpe 7790f3e72b5SJason Gunthorpe unwind: 7800f3e72b5SJason Gunthorpe list_for_each_entry_continue_reverse(group, &container->group_list, 7810f3e72b5SJason Gunthorpe container_next) { 7820f3e72b5SJason Gunthorpe driver->ops->detach_group(data, group->iommu_group); 7830f3e72b5SJason Gunthorpe } 7840f3e72b5SJason Gunthorpe 7850f3e72b5SJason Gunthorpe return ret; 7860f3e72b5SJason Gunthorpe } 7870f3e72b5SJason Gunthorpe 7880f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container, 7890f3e72b5SJason Gunthorpe unsigned long arg) 7900f3e72b5SJason Gunthorpe { 7910f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 7920f3e72b5SJason Gunthorpe long ret = -ENODEV; 7930f3e72b5SJason Gunthorpe 7940f3e72b5SJason Gunthorpe down_write(&container->group_lock); 7950f3e72b5SJason Gunthorpe 7960f3e72b5SJason Gunthorpe /* 7970f3e72b5SJason Gunthorpe * The container is designed to be an unprivileged interface while 7980f3e72b5SJason Gunthorpe * the group can be assigned to specific users. Therefore, only by 7990f3e72b5SJason Gunthorpe * adding a group to a container does the user get the privilege of 8000f3e72b5SJason Gunthorpe * enabling the iommu, which may allocate finite resources. There 8010f3e72b5SJason Gunthorpe * is no unset_iommu, but by removing all the groups from a container, 8020f3e72b5SJason Gunthorpe * the container is deprivileged and returns to an unset state. 8030f3e72b5SJason Gunthorpe */ 8040f3e72b5SJason Gunthorpe if (list_empty(&container->group_list) || container->iommu_driver) { 8050f3e72b5SJason Gunthorpe up_write(&container->group_lock); 8060f3e72b5SJason Gunthorpe return -EINVAL; 8070f3e72b5SJason Gunthorpe } 8080f3e72b5SJason Gunthorpe 8090f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 8100f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 8110f3e72b5SJason Gunthorpe void *data; 8120f3e72b5SJason Gunthorpe 8130f3e72b5SJason Gunthorpe if (!vfio_iommu_driver_allowed(container, driver)) 8140f3e72b5SJason Gunthorpe continue; 8150f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 8160f3e72b5SJason Gunthorpe continue; 8170f3e72b5SJason Gunthorpe 8180f3e72b5SJason Gunthorpe /* 8190f3e72b5SJason Gunthorpe * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 8200f3e72b5SJason Gunthorpe * so test which iommu driver reported support for this 8210f3e72b5SJason Gunthorpe * extension and call open on them. We also pass them the 8220f3e72b5SJason Gunthorpe * magic, allowing a single driver to support multiple 8230f3e72b5SJason Gunthorpe * interfaces if they'd like. 8240f3e72b5SJason Gunthorpe */ 8250f3e72b5SJason Gunthorpe if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 8260f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8270f3e72b5SJason Gunthorpe continue; 8280f3e72b5SJason Gunthorpe } 8290f3e72b5SJason Gunthorpe 8300f3e72b5SJason Gunthorpe data = driver->ops->open(arg); 8310f3e72b5SJason Gunthorpe if (IS_ERR(data)) { 8320f3e72b5SJason Gunthorpe ret = PTR_ERR(data); 8330f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8340f3e72b5SJason Gunthorpe continue; 8350f3e72b5SJason Gunthorpe } 8360f3e72b5SJason Gunthorpe 8370f3e72b5SJason Gunthorpe ret = __vfio_container_attach_groups(container, driver, data); 8380f3e72b5SJason Gunthorpe if (ret) { 8390f3e72b5SJason Gunthorpe driver->ops->release(data); 8400f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8410f3e72b5SJason Gunthorpe continue; 8420f3e72b5SJason Gunthorpe } 8430f3e72b5SJason Gunthorpe 8440f3e72b5SJason Gunthorpe container->iommu_driver = driver; 8450f3e72b5SJason Gunthorpe container->iommu_data = data; 8460f3e72b5SJason Gunthorpe break; 8470f3e72b5SJason Gunthorpe } 8480f3e72b5SJason Gunthorpe 8490f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 8500f3e72b5SJason Gunthorpe up_write(&container->group_lock); 8510f3e72b5SJason Gunthorpe 8520f3e72b5SJason Gunthorpe return ret; 8530f3e72b5SJason Gunthorpe } 8540f3e72b5SJason Gunthorpe 8550f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep, 8560f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 8570f3e72b5SJason Gunthorpe { 8580f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 8590f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 8600f3e72b5SJason Gunthorpe void *data; 8610f3e72b5SJason Gunthorpe long ret = -EINVAL; 8620f3e72b5SJason Gunthorpe 8630f3e72b5SJason Gunthorpe if (!container) 8640f3e72b5SJason Gunthorpe return ret; 8650f3e72b5SJason Gunthorpe 8660f3e72b5SJason Gunthorpe switch (cmd) { 8670f3e72b5SJason Gunthorpe case VFIO_GET_API_VERSION: 8680f3e72b5SJason Gunthorpe ret = VFIO_API_VERSION; 8690f3e72b5SJason Gunthorpe break; 8700f3e72b5SJason Gunthorpe case VFIO_CHECK_EXTENSION: 8710f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(container, arg); 8720f3e72b5SJason Gunthorpe break; 8730f3e72b5SJason Gunthorpe case VFIO_SET_IOMMU: 8740f3e72b5SJason Gunthorpe ret = vfio_ioctl_set_iommu(container, arg); 8750f3e72b5SJason Gunthorpe break; 8760f3e72b5SJason Gunthorpe default: 8770f3e72b5SJason Gunthorpe driver = container->iommu_driver; 8780f3e72b5SJason Gunthorpe data = container->iommu_data; 8790f3e72b5SJason Gunthorpe 8800f3e72b5SJason Gunthorpe if (driver) /* passthrough all unrecognized ioctls */ 8810f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(data, cmd, arg); 8820f3e72b5SJason Gunthorpe } 8830f3e72b5SJason Gunthorpe 8840f3e72b5SJason Gunthorpe return ret; 8850f3e72b5SJason Gunthorpe } 8860f3e72b5SJason Gunthorpe 8870f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep) 8880f3e72b5SJason Gunthorpe { 8890f3e72b5SJason Gunthorpe struct vfio_container *container; 8900f3e72b5SJason Gunthorpe 8910f3e72b5SJason Gunthorpe container = kzalloc(sizeof(*container), GFP_KERNEL); 8920f3e72b5SJason Gunthorpe if (!container) 8930f3e72b5SJason Gunthorpe return -ENOMEM; 8940f3e72b5SJason Gunthorpe 8950f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&container->group_list); 8960f3e72b5SJason Gunthorpe init_rwsem(&container->group_lock); 8970f3e72b5SJason Gunthorpe kref_init(&container->kref); 8980f3e72b5SJason Gunthorpe 8990f3e72b5SJason Gunthorpe filep->private_data = container; 9000f3e72b5SJason Gunthorpe 9010f3e72b5SJason Gunthorpe return 0; 9020f3e72b5SJason Gunthorpe } 9030f3e72b5SJason Gunthorpe 9040f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep) 9050f3e72b5SJason Gunthorpe { 9060f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 9070f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver = container->iommu_driver; 9080f3e72b5SJason Gunthorpe 9090f3e72b5SJason Gunthorpe if (driver && driver->ops->notify) 9100f3e72b5SJason Gunthorpe driver->ops->notify(container->iommu_data, 9110f3e72b5SJason Gunthorpe VFIO_IOMMU_CONTAINER_CLOSE); 9120f3e72b5SJason Gunthorpe 9130f3e72b5SJason Gunthorpe filep->private_data = NULL; 9140f3e72b5SJason Gunthorpe 9150f3e72b5SJason Gunthorpe vfio_container_put(container); 9160f3e72b5SJason Gunthorpe 9170f3e72b5SJason Gunthorpe return 0; 9180f3e72b5SJason Gunthorpe } 9190f3e72b5SJason Gunthorpe 9200f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = { 9210f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 9220f3e72b5SJason Gunthorpe .open = vfio_fops_open, 9230f3e72b5SJason Gunthorpe .release = vfio_fops_release, 9240f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_fops_unl_ioctl, 9250f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 9260f3e72b5SJason Gunthorpe }; 9270f3e72b5SJason Gunthorpe 9280f3e72b5SJason Gunthorpe /* 9290f3e72b5SJason Gunthorpe * VFIO Group fd, /dev/vfio/$GROUP 9300f3e72b5SJason Gunthorpe */ 9310f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group) 9320f3e72b5SJason Gunthorpe { 9330f3e72b5SJason Gunthorpe struct vfio_container *container = group->container; 9340f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 9350f3e72b5SJason Gunthorpe 9360f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 9370f3e72b5SJason Gunthorpe 9380f3e72b5SJason Gunthorpe down_write(&container->group_lock); 9390f3e72b5SJason Gunthorpe 9400f3e72b5SJason Gunthorpe driver = container->iommu_driver; 9410f3e72b5SJason Gunthorpe if (driver) 9420f3e72b5SJason Gunthorpe driver->ops->detach_group(container->iommu_data, 9430f3e72b5SJason Gunthorpe group->iommu_group); 9440f3e72b5SJason Gunthorpe 9450f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 9460f3e72b5SJason Gunthorpe iommu_group_release_dma_owner(group->iommu_group); 9470f3e72b5SJason Gunthorpe 9480f3e72b5SJason Gunthorpe group->container = NULL; 9490f3e72b5SJason Gunthorpe group->container_users = 0; 9500f3e72b5SJason Gunthorpe list_del(&group->container_next); 9510f3e72b5SJason Gunthorpe 9520f3e72b5SJason Gunthorpe /* Detaching the last group deprivileges a container, remove iommu */ 9530f3e72b5SJason Gunthorpe if (driver && list_empty(&container->group_list)) { 9540f3e72b5SJason Gunthorpe driver->ops->release(container->iommu_data); 9550f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 9560f3e72b5SJason Gunthorpe container->iommu_driver = NULL; 9570f3e72b5SJason Gunthorpe container->iommu_data = NULL; 9580f3e72b5SJason Gunthorpe } 9590f3e72b5SJason Gunthorpe 9600f3e72b5SJason Gunthorpe up_write(&container->group_lock); 9610f3e72b5SJason Gunthorpe 9620f3e72b5SJason Gunthorpe vfio_container_put(container); 9630f3e72b5SJason Gunthorpe } 9640f3e72b5SJason Gunthorpe 9650f3e72b5SJason Gunthorpe /* 9660f3e72b5SJason Gunthorpe * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 9670f3e72b5SJason Gunthorpe * if there was no container to unset. Since the ioctl is called on 9680f3e72b5SJason Gunthorpe * the group, we know that still exists, therefore the only valid 9690f3e72b5SJason Gunthorpe * transition here is 1->0. 9700f3e72b5SJason Gunthorpe */ 9710f3e72b5SJason Gunthorpe static int vfio_group_unset_container(struct vfio_group *group) 9720f3e72b5SJason Gunthorpe { 9730f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 9740f3e72b5SJason Gunthorpe 9750f3e72b5SJason Gunthorpe if (!group->container) 9760f3e72b5SJason Gunthorpe return -EINVAL; 9770f3e72b5SJason Gunthorpe if (group->container_users != 1) 9780f3e72b5SJason Gunthorpe return -EBUSY; 9790f3e72b5SJason Gunthorpe __vfio_group_unset_container(group); 9800f3e72b5SJason Gunthorpe return 0; 9810f3e72b5SJason Gunthorpe } 9820f3e72b5SJason Gunthorpe 9830f3e72b5SJason Gunthorpe static int vfio_group_set_container(struct vfio_group *group, int container_fd) 9840f3e72b5SJason Gunthorpe { 9850f3e72b5SJason Gunthorpe struct fd f; 9860f3e72b5SJason Gunthorpe struct vfio_container *container; 9870f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 9880f3e72b5SJason Gunthorpe int ret = 0; 9890f3e72b5SJason Gunthorpe 9900f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 9910f3e72b5SJason Gunthorpe 9920f3e72b5SJason Gunthorpe if (group->container || WARN_ON(group->container_users)) 9930f3e72b5SJason Gunthorpe return -EINVAL; 9940f3e72b5SJason Gunthorpe 9950f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 9960f3e72b5SJason Gunthorpe return -EPERM; 9970f3e72b5SJason Gunthorpe 9980f3e72b5SJason Gunthorpe f = fdget(container_fd); 9990f3e72b5SJason Gunthorpe if (!f.file) 10000f3e72b5SJason Gunthorpe return -EBADF; 10010f3e72b5SJason Gunthorpe 10020f3e72b5SJason Gunthorpe /* Sanity check, is this really our fd? */ 10030f3e72b5SJason Gunthorpe if (f.file->f_op != &vfio_fops) { 10040f3e72b5SJason Gunthorpe fdput(f); 10050f3e72b5SJason Gunthorpe return -EINVAL; 10060f3e72b5SJason Gunthorpe } 10070f3e72b5SJason Gunthorpe 10080f3e72b5SJason Gunthorpe container = f.file->private_data; 10090f3e72b5SJason Gunthorpe WARN_ON(!container); /* fget ensures we don't race vfio_release */ 10100f3e72b5SJason Gunthorpe 10110f3e72b5SJason Gunthorpe down_write(&container->group_lock); 10120f3e72b5SJason Gunthorpe 10130f3e72b5SJason Gunthorpe /* Real groups and fake groups cannot mix */ 10140f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 10150f3e72b5SJason Gunthorpe container->noiommu != (group->type == VFIO_NO_IOMMU)) { 10160f3e72b5SJason Gunthorpe ret = -EPERM; 10170f3e72b5SJason Gunthorpe goto unlock_out; 10180f3e72b5SJason Gunthorpe } 10190f3e72b5SJason Gunthorpe 10200f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) { 10210f3e72b5SJason Gunthorpe ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); 10220f3e72b5SJason Gunthorpe if (ret) 10230f3e72b5SJason Gunthorpe goto unlock_out; 10240f3e72b5SJason Gunthorpe } 10250f3e72b5SJason Gunthorpe 10260f3e72b5SJason Gunthorpe driver = container->iommu_driver; 10270f3e72b5SJason Gunthorpe if (driver) { 10280f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(container->iommu_data, 10290f3e72b5SJason Gunthorpe group->iommu_group, 10300f3e72b5SJason Gunthorpe group->type); 10310f3e72b5SJason Gunthorpe if (ret) { 10320f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 10330f3e72b5SJason Gunthorpe iommu_group_release_dma_owner( 10340f3e72b5SJason Gunthorpe group->iommu_group); 10350f3e72b5SJason Gunthorpe goto unlock_out; 10360f3e72b5SJason Gunthorpe } 10370f3e72b5SJason Gunthorpe } 10380f3e72b5SJason Gunthorpe 10390f3e72b5SJason Gunthorpe group->container = container; 10400f3e72b5SJason Gunthorpe group->container_users = 1; 10410f3e72b5SJason Gunthorpe container->noiommu = (group->type == VFIO_NO_IOMMU); 10420f3e72b5SJason Gunthorpe list_add(&group->container_next, &container->group_list); 10430f3e72b5SJason Gunthorpe 10440f3e72b5SJason Gunthorpe /* Get a reference on the container and mark a user within the group */ 10450f3e72b5SJason Gunthorpe vfio_container_get(container); 10460f3e72b5SJason Gunthorpe 10470f3e72b5SJason Gunthorpe unlock_out: 10480f3e72b5SJason Gunthorpe up_write(&container->group_lock); 10490f3e72b5SJason Gunthorpe fdput(f); 10500f3e72b5SJason Gunthorpe return ret; 10510f3e72b5SJason Gunthorpe } 10520f3e72b5SJason Gunthorpe 10530f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops; 10540f3e72b5SJason Gunthorpe 10550f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 10560f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device) 10570f3e72b5SJason Gunthorpe { 10580f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 10590f3e72b5SJason Gunthorpe } 10600f3e72b5SJason Gunthorpe 10610f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device) 10620f3e72b5SJason Gunthorpe { 10630f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 10640f3e72b5SJason Gunthorpe 10650f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 10660f3e72b5SJason Gunthorpe 10670f3e72b5SJason Gunthorpe if (!group->container || !group->container->iommu_driver || 10680f3e72b5SJason Gunthorpe WARN_ON(!group->container_users)) 10690f3e72b5SJason Gunthorpe return -EINVAL; 10700f3e72b5SJason Gunthorpe 10710f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 10720f3e72b5SJason Gunthorpe return -EPERM; 10730f3e72b5SJason Gunthorpe 10740f3e72b5SJason Gunthorpe get_file(group->opened_file); 10750f3e72b5SJason Gunthorpe group->container_users++; 10760f3e72b5SJason Gunthorpe return 0; 10770f3e72b5SJason Gunthorpe } 10780f3e72b5SJason Gunthorpe 10790f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device) 10800f3e72b5SJason Gunthorpe { 10810f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 10820f3e72b5SJason Gunthorpe WARN_ON(device->group->container_users <= 1); 10830f3e72b5SJason Gunthorpe device->group->container_users--; 10840f3e72b5SJason Gunthorpe fput(device->group->opened_file); 10850f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 10860f3e72b5SJason Gunthorpe } 10870f3e72b5SJason Gunthorpe 10880f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device) 10890f3e72b5SJason Gunthorpe { 10900f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 10910f3e72b5SJason Gunthorpe struct file *filep; 10920f3e72b5SJason Gunthorpe int ret; 10930f3e72b5SJason Gunthorpe 10940f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 10950f3e72b5SJason Gunthorpe ret = vfio_device_assign_container(device); 10960f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 10970f3e72b5SJason Gunthorpe if (ret) 10980f3e72b5SJason Gunthorpe return ERR_PTR(ret); 10990f3e72b5SJason Gunthorpe 11000f3e72b5SJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) { 11010f3e72b5SJason Gunthorpe ret = -ENODEV; 11020f3e72b5SJason Gunthorpe goto err_unassign_container; 11030f3e72b5SJason Gunthorpe } 11040f3e72b5SJason Gunthorpe 11050f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 11060f3e72b5SJason Gunthorpe device->open_count++; 11070f3e72b5SJason Gunthorpe if (device->open_count == 1) { 11080f3e72b5SJason Gunthorpe /* 11090f3e72b5SJason Gunthorpe * Here we pass the KVM pointer with the group under the read 11100f3e72b5SJason Gunthorpe * lock. If the device driver will use it, it must obtain a 11110f3e72b5SJason Gunthorpe * reference and release it during close_device. 11120f3e72b5SJason Gunthorpe */ 11130f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 11140f3e72b5SJason Gunthorpe device->kvm = device->group->kvm; 11150f3e72b5SJason Gunthorpe 11160f3e72b5SJason Gunthorpe if (device->ops->open_device) { 11170f3e72b5SJason Gunthorpe ret = device->ops->open_device(device); 11180f3e72b5SJason Gunthorpe if (ret) 11190f3e72b5SJason Gunthorpe goto err_undo_count; 11200f3e72b5SJason Gunthorpe } 11210f3e72b5SJason Gunthorpe 11220f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 11230f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->register_device) 11240f3e72b5SJason Gunthorpe iommu_driver->ops->register_device( 11250f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 11260f3e72b5SJason Gunthorpe 11270f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 11280f3e72b5SJason Gunthorpe } 11290f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 11300f3e72b5SJason Gunthorpe 11310f3e72b5SJason Gunthorpe /* 11320f3e72b5SJason Gunthorpe * We can't use anon_inode_getfd() because we need to modify 11330f3e72b5SJason Gunthorpe * the f_mode flags directly to allow more than just ioctls 11340f3e72b5SJason Gunthorpe */ 11350f3e72b5SJason Gunthorpe filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 11360f3e72b5SJason Gunthorpe device, O_RDWR); 11370f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 11380f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 11390f3e72b5SJason Gunthorpe goto err_close_device; 11400f3e72b5SJason Gunthorpe } 11410f3e72b5SJason Gunthorpe 11420f3e72b5SJason Gunthorpe /* 11430f3e72b5SJason Gunthorpe * TODO: add an anon_inode interface to do this. 11440f3e72b5SJason Gunthorpe * Appears to be missing by lack of need rather than 11450f3e72b5SJason Gunthorpe * explicitly prevented. Now there's need. 11460f3e72b5SJason Gunthorpe */ 11470f3e72b5SJason Gunthorpe filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 11480f3e72b5SJason Gunthorpe 11490f3e72b5SJason Gunthorpe if (device->group->type == VFIO_NO_IOMMU) 11500f3e72b5SJason Gunthorpe dev_warn(device->dev, "vfio-noiommu device opened by user " 11510f3e72b5SJason Gunthorpe "(%s:%d)\n", current->comm, task_pid_nr(current)); 11520f3e72b5SJason Gunthorpe /* 11530f3e72b5SJason Gunthorpe * On success the ref of device is moved to the file and 11540f3e72b5SJason Gunthorpe * put in vfio_device_fops_release() 11550f3e72b5SJason Gunthorpe */ 11560f3e72b5SJason Gunthorpe return filep; 11570f3e72b5SJason Gunthorpe 11580f3e72b5SJason Gunthorpe err_close_device: 11590f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 11600f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 11610f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) { 11620f3e72b5SJason Gunthorpe device->ops->close_device(device); 11630f3e72b5SJason Gunthorpe 11640f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 11650f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 11660f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 11670f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 11680f3e72b5SJason Gunthorpe } 11690f3e72b5SJason Gunthorpe err_undo_count: 11700f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 11710f3e72b5SJason Gunthorpe device->open_count--; 11720f3e72b5SJason Gunthorpe if (device->open_count == 0 && device->kvm) 11730f3e72b5SJason Gunthorpe device->kvm = NULL; 11740f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 11750f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 11760f3e72b5SJason Gunthorpe err_unassign_container: 11770f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 11780f3e72b5SJason Gunthorpe return ERR_PTR(ret); 11790f3e72b5SJason Gunthorpe } 11800f3e72b5SJason Gunthorpe 1181*150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 1182*150ee2f9SJason Gunthorpe char __user *arg) 11830f3e72b5SJason Gunthorpe { 11840f3e72b5SJason Gunthorpe struct vfio_device *device; 11850f3e72b5SJason Gunthorpe struct file *filep; 1186*150ee2f9SJason Gunthorpe char *buf; 11870f3e72b5SJason Gunthorpe int fdno; 11880f3e72b5SJason Gunthorpe int ret; 11890f3e72b5SJason Gunthorpe 1190*150ee2f9SJason Gunthorpe buf = strndup_user(arg, PAGE_SIZE); 1191*150ee2f9SJason Gunthorpe if (IS_ERR(buf)) 1192*150ee2f9SJason Gunthorpe return PTR_ERR(buf); 1193*150ee2f9SJason Gunthorpe 11940f3e72b5SJason Gunthorpe device = vfio_device_get_from_name(group, buf); 1195*150ee2f9SJason Gunthorpe kfree(buf); 11960f3e72b5SJason Gunthorpe if (IS_ERR(device)) 11970f3e72b5SJason Gunthorpe return PTR_ERR(device); 11980f3e72b5SJason Gunthorpe 11990f3e72b5SJason Gunthorpe fdno = get_unused_fd_flags(O_CLOEXEC); 12000f3e72b5SJason Gunthorpe if (fdno < 0) { 12010f3e72b5SJason Gunthorpe ret = fdno; 12020f3e72b5SJason Gunthorpe goto err_put_device; 12030f3e72b5SJason Gunthorpe } 12040f3e72b5SJason Gunthorpe 12050f3e72b5SJason Gunthorpe filep = vfio_device_open(device); 12060f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 12070f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 12080f3e72b5SJason Gunthorpe goto err_put_fdno; 12090f3e72b5SJason Gunthorpe } 12100f3e72b5SJason Gunthorpe 12110f3e72b5SJason Gunthorpe fd_install(fdno, filep); 12120f3e72b5SJason Gunthorpe return fdno; 12130f3e72b5SJason Gunthorpe 12140f3e72b5SJason Gunthorpe err_put_fdno: 12150f3e72b5SJason Gunthorpe put_unused_fd(fdno); 12160f3e72b5SJason Gunthorpe err_put_device: 12170f3e72b5SJason Gunthorpe vfio_device_put(device); 12180f3e72b5SJason Gunthorpe return ret; 12190f3e72b5SJason Gunthorpe } 12200f3e72b5SJason Gunthorpe 12210f3e72b5SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep, 12220f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 12230f3e72b5SJason Gunthorpe { 12240f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 1225*150ee2f9SJason Gunthorpe void __user *uarg = (void __user *)arg; 12260f3e72b5SJason Gunthorpe long ret = -ENOTTY; 12270f3e72b5SJason Gunthorpe 12280f3e72b5SJason Gunthorpe switch (cmd) { 1229*150ee2f9SJason Gunthorpe case VFIO_GROUP_GET_DEVICE_FD: 1230*150ee2f9SJason Gunthorpe return vfio_group_ioctl_get_device_fd(group, uarg); 12310f3e72b5SJason Gunthorpe case VFIO_GROUP_GET_STATUS: 12320f3e72b5SJason Gunthorpe { 12330f3e72b5SJason Gunthorpe struct vfio_group_status status; 12340f3e72b5SJason Gunthorpe unsigned long minsz; 12350f3e72b5SJason Gunthorpe 12360f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_group_status, flags); 12370f3e72b5SJason Gunthorpe 12380f3e72b5SJason Gunthorpe if (copy_from_user(&status, (void __user *)arg, minsz)) 12390f3e72b5SJason Gunthorpe return -EFAULT; 12400f3e72b5SJason Gunthorpe 12410f3e72b5SJason Gunthorpe if (status.argsz < minsz) 12420f3e72b5SJason Gunthorpe return -EINVAL; 12430f3e72b5SJason Gunthorpe 12440f3e72b5SJason Gunthorpe status.flags = 0; 12450f3e72b5SJason Gunthorpe 12460f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 12470f3e72b5SJason Gunthorpe if (group->container) 12480f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 12490f3e72b5SJason Gunthorpe VFIO_GROUP_FLAGS_VIABLE; 12500f3e72b5SJason Gunthorpe else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 12510f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_VIABLE; 12520f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 12530f3e72b5SJason Gunthorpe 12540f3e72b5SJason Gunthorpe if (copy_to_user((void __user *)arg, &status, minsz)) 12550f3e72b5SJason Gunthorpe return -EFAULT; 12560f3e72b5SJason Gunthorpe 12570f3e72b5SJason Gunthorpe ret = 0; 12580f3e72b5SJason Gunthorpe break; 12590f3e72b5SJason Gunthorpe } 12600f3e72b5SJason Gunthorpe case VFIO_GROUP_SET_CONTAINER: 12610f3e72b5SJason Gunthorpe { 12620f3e72b5SJason Gunthorpe int fd; 12630f3e72b5SJason Gunthorpe 12640f3e72b5SJason Gunthorpe if (get_user(fd, (int __user *)arg)) 12650f3e72b5SJason Gunthorpe return -EFAULT; 12660f3e72b5SJason Gunthorpe 12670f3e72b5SJason Gunthorpe if (fd < 0) 12680f3e72b5SJason Gunthorpe return -EINVAL; 12690f3e72b5SJason Gunthorpe 12700f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 12710f3e72b5SJason Gunthorpe ret = vfio_group_set_container(group, fd); 12720f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 12730f3e72b5SJason Gunthorpe break; 12740f3e72b5SJason Gunthorpe } 12750f3e72b5SJason Gunthorpe case VFIO_GROUP_UNSET_CONTAINER: 12760f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 12770f3e72b5SJason Gunthorpe ret = vfio_group_unset_container(group); 12780f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 12790f3e72b5SJason Gunthorpe break; 12800f3e72b5SJason Gunthorpe } 12810f3e72b5SJason Gunthorpe 12820f3e72b5SJason Gunthorpe return ret; 12830f3e72b5SJason Gunthorpe } 12840f3e72b5SJason Gunthorpe 12850f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep) 12860f3e72b5SJason Gunthorpe { 12870f3e72b5SJason Gunthorpe struct vfio_group *group = 12880f3e72b5SJason Gunthorpe container_of(inode->i_cdev, struct vfio_group, cdev); 12890f3e72b5SJason Gunthorpe int ret; 12900f3e72b5SJason Gunthorpe 12910f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 12920f3e72b5SJason Gunthorpe 12930f3e72b5SJason Gunthorpe /* users can be zero if this races with vfio_group_put() */ 12940f3e72b5SJason Gunthorpe if (!refcount_inc_not_zero(&group->users)) { 12950f3e72b5SJason Gunthorpe ret = -ENODEV; 12960f3e72b5SJason Gunthorpe goto err_unlock; 12970f3e72b5SJason Gunthorpe } 12980f3e72b5SJason Gunthorpe 12990f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 13000f3e72b5SJason Gunthorpe ret = -EPERM; 13010f3e72b5SJason Gunthorpe goto err_put; 13020f3e72b5SJason Gunthorpe } 13030f3e72b5SJason Gunthorpe 13040f3e72b5SJason Gunthorpe /* 13050f3e72b5SJason Gunthorpe * Do we need multiple instances of the group open? Seems not. 13060f3e72b5SJason Gunthorpe */ 13070f3e72b5SJason Gunthorpe if (group->opened_file) { 13080f3e72b5SJason Gunthorpe ret = -EBUSY; 13090f3e72b5SJason Gunthorpe goto err_put; 13100f3e72b5SJason Gunthorpe } 13110f3e72b5SJason Gunthorpe group->opened_file = filep; 13120f3e72b5SJason Gunthorpe filep->private_data = group; 13130f3e72b5SJason Gunthorpe 13140f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 13150f3e72b5SJason Gunthorpe return 0; 13160f3e72b5SJason Gunthorpe err_put: 13170f3e72b5SJason Gunthorpe vfio_group_put(group); 13180f3e72b5SJason Gunthorpe err_unlock: 13190f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 13200f3e72b5SJason Gunthorpe return ret; 13210f3e72b5SJason Gunthorpe } 13220f3e72b5SJason Gunthorpe 13230f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep) 13240f3e72b5SJason Gunthorpe { 13250f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 13260f3e72b5SJason Gunthorpe 13270f3e72b5SJason Gunthorpe filep->private_data = NULL; 13280f3e72b5SJason Gunthorpe 13290f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 13300f3e72b5SJason Gunthorpe /* 13310f3e72b5SJason Gunthorpe * Device FDs hold a group file reference, therefore the group release 13320f3e72b5SJason Gunthorpe * is only called when there are no open devices. 13330f3e72b5SJason Gunthorpe */ 13340f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 13350f3e72b5SJason Gunthorpe if (group->container) { 13360f3e72b5SJason Gunthorpe WARN_ON(group->container_users != 1); 13370f3e72b5SJason Gunthorpe __vfio_group_unset_container(group); 13380f3e72b5SJason Gunthorpe } 13390f3e72b5SJason Gunthorpe group->opened_file = NULL; 13400f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 13410f3e72b5SJason Gunthorpe 13420f3e72b5SJason Gunthorpe vfio_group_put(group); 13430f3e72b5SJason Gunthorpe 13440f3e72b5SJason Gunthorpe return 0; 13450f3e72b5SJason Gunthorpe } 13460f3e72b5SJason Gunthorpe 13470f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = { 13480f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 13490f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_group_fops_unl_ioctl, 13500f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 13510f3e72b5SJason Gunthorpe .open = vfio_group_fops_open, 13520f3e72b5SJason Gunthorpe .release = vfio_group_fops_release, 13530f3e72b5SJason Gunthorpe }; 13540f3e72b5SJason Gunthorpe 13550f3e72b5SJason Gunthorpe /* 13560f3e72b5SJason Gunthorpe * VFIO Device fd 13570f3e72b5SJason Gunthorpe */ 13580f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 13590f3e72b5SJason Gunthorpe { 13600f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 13610f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 13620f3e72b5SJason Gunthorpe 13630f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 13640f3e72b5SJason Gunthorpe vfio_assert_device_open(device); 13650f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 13660f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) 13670f3e72b5SJason Gunthorpe device->ops->close_device(device); 13680f3e72b5SJason Gunthorpe 13690f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 13700f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 13710f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 13720f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 13730f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 13740f3e72b5SJason Gunthorpe device->open_count--; 13750f3e72b5SJason Gunthorpe if (device->open_count == 0) 13760f3e72b5SJason Gunthorpe device->kvm = NULL; 13770f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 13780f3e72b5SJason Gunthorpe 13790f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 13800f3e72b5SJason Gunthorpe 13810f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 13820f3e72b5SJason Gunthorpe 13830f3e72b5SJason Gunthorpe vfio_device_put(device); 13840f3e72b5SJason Gunthorpe 13850f3e72b5SJason Gunthorpe return 0; 13860f3e72b5SJason Gunthorpe } 13870f3e72b5SJason Gunthorpe 13880f3e72b5SJason Gunthorpe /* 13890f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 13900f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 13910f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 13920f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 13930f3e72b5SJason Gunthorpe * 13940f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 13950f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 13960f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 13970f3e72b5SJason Gunthorpe * 13980f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 13990f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 14000f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 14010f3e72b5SJason Gunthorpe * 14020f3e72b5SJason Gunthorpe */ 14030f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 14040f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 14050f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 14060f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 14070f3e72b5SJason Gunthorpe { 14080f3e72b5SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 14090f3e72b5SJason Gunthorpe /* 14100f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 14110f3e72b5SJason Gunthorpe * following FSM arcs: 14120f3e72b5SJason Gunthorpe * RESUMING -> STOP 14130f3e72b5SJason Gunthorpe * STOP -> RESUMING 14140f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 14150f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 14160f3e72b5SJason Gunthorpe * 14170f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 14180f3e72b5SJason Gunthorpe * arcs: 14190f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 14200f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 14210f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 14220f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 14230f3e72b5SJason Gunthorpe * Without P2P the driver must implement: 14240f3e72b5SJason Gunthorpe * RUNNING -> STOP 14250f3e72b5SJason Gunthorpe * STOP -> RUNNING 14260f3e72b5SJason Gunthorpe * 14270f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 14280f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 14290f3e72b5SJason Gunthorpe * following ones: 14300f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 14310f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 14320f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 14330f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 14340f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 14350f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 14360f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 14370f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 14380f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 14390f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 14400f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 14410f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 14420f3e72b5SJason Gunthorpe */ 14430f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 14440f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 14450f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 14460f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 14470f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 14480f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 14490f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 14500f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14510f3e72b5SJason Gunthorpe }, 14520f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 14530f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 14540f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 14550f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 14560f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 14570f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 14580f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14590f3e72b5SJason Gunthorpe }, 14600f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 14610f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 14620f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 14630f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 14640f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 14650f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 14660f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14670f3e72b5SJason Gunthorpe }, 14680f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 14690f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 14700f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 14710f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 14720f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 14730f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 14740f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14750f3e72b5SJason Gunthorpe }, 14760f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 14770f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 14780f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 14790f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 14800f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 14810f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 14820f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14830f3e72b5SJason Gunthorpe }, 14840f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 14850f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 14860f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 14870f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 14880f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 14890f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 14900f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 14910f3e72b5SJason Gunthorpe }, 14920f3e72b5SJason Gunthorpe }; 14930f3e72b5SJason Gunthorpe 14940f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 14950f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 14960f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 14970f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 14980f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 14990f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 15000f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 15010f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 15020f3e72b5SJason Gunthorpe }; 15030f3e72b5SJason Gunthorpe 15040f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 15050f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 15060f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 15070f3e72b5SJason Gunthorpe return -EINVAL; 15080f3e72b5SJason Gunthorpe 15090f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 15100f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 15110f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 15120f3e72b5SJason Gunthorpe return -EINVAL; 15130f3e72b5SJason Gunthorpe 15140f3e72b5SJason Gunthorpe /* 15150f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 15160f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 15170f3e72b5SJason Gunthorpe * logical state, as per the above comment. 15180f3e72b5SJason Gunthorpe */ 15190f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 15200f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 15210f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 15220f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 15230f3e72b5SJason Gunthorpe 15240f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 15250f3e72b5SJason Gunthorpe } 15260f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 15270f3e72b5SJason Gunthorpe 15280f3e72b5SJason Gunthorpe /* 15290f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 15300f3e72b5SJason Gunthorpe */ 15310f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 15320f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 15330f3e72b5SJason Gunthorpe { 15340f3e72b5SJason Gunthorpe int ret; 15350f3e72b5SJason Gunthorpe int fd; 15360f3e72b5SJason Gunthorpe 15370f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 15380f3e72b5SJason Gunthorpe if (fd < 0) { 15390f3e72b5SJason Gunthorpe ret = fd; 15400f3e72b5SJason Gunthorpe goto out_fput; 15410f3e72b5SJason Gunthorpe } 15420f3e72b5SJason Gunthorpe 15430f3e72b5SJason Gunthorpe mig->data_fd = fd; 15440f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 15450f3e72b5SJason Gunthorpe ret = -EFAULT; 15460f3e72b5SJason Gunthorpe goto out_put_unused; 15470f3e72b5SJason Gunthorpe } 15480f3e72b5SJason Gunthorpe fd_install(fd, filp); 15490f3e72b5SJason Gunthorpe return 0; 15500f3e72b5SJason Gunthorpe 15510f3e72b5SJason Gunthorpe out_put_unused: 15520f3e72b5SJason Gunthorpe put_unused_fd(fd); 15530f3e72b5SJason Gunthorpe out_fput: 15540f3e72b5SJason Gunthorpe fput(filp); 15550f3e72b5SJason Gunthorpe return ret; 15560f3e72b5SJason Gunthorpe } 15570f3e72b5SJason Gunthorpe 15580f3e72b5SJason Gunthorpe static int 15590f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 15600f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 15610f3e72b5SJason Gunthorpe size_t argsz) 15620f3e72b5SJason Gunthorpe { 15630f3e72b5SJason Gunthorpe size_t minsz = 15640f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 15650f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 15660f3e72b5SJason Gunthorpe struct file *filp = NULL; 15670f3e72b5SJason Gunthorpe int ret; 15680f3e72b5SJason Gunthorpe 15690f3e72b5SJason Gunthorpe if (!device->mig_ops) 15700f3e72b5SJason Gunthorpe return -ENOTTY; 15710f3e72b5SJason Gunthorpe 15720f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 15730f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 15740f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 15750f3e72b5SJason Gunthorpe sizeof(mig)); 15760f3e72b5SJason Gunthorpe if (ret != 1) 15770f3e72b5SJason Gunthorpe return ret; 15780f3e72b5SJason Gunthorpe 15790f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 15800f3e72b5SJason Gunthorpe return -EFAULT; 15810f3e72b5SJason Gunthorpe 15820f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 15830f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 15840f3e72b5SJason Gunthorpe 15850f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 15860f3e72b5SJason Gunthorpe &curr_state); 15870f3e72b5SJason Gunthorpe if (ret) 15880f3e72b5SJason Gunthorpe return ret; 15890f3e72b5SJason Gunthorpe mig.device_state = curr_state; 15900f3e72b5SJason Gunthorpe goto out_copy; 15910f3e72b5SJason Gunthorpe } 15920f3e72b5SJason Gunthorpe 15930f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 15940f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 15950f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 15960f3e72b5SJason Gunthorpe goto out_copy; 15970f3e72b5SJason Gunthorpe 15980f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 15990f3e72b5SJason Gunthorpe out_copy: 16000f3e72b5SJason Gunthorpe mig.data_fd = -1; 16010f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 16020f3e72b5SJason Gunthorpe return -EFAULT; 16030f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 16040f3e72b5SJason Gunthorpe return PTR_ERR(filp); 16050f3e72b5SJason Gunthorpe return 0; 16060f3e72b5SJason Gunthorpe } 16070f3e72b5SJason Gunthorpe 16080f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 16090f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 16100f3e72b5SJason Gunthorpe size_t argsz) 16110f3e72b5SJason Gunthorpe { 16120f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 16130f3e72b5SJason Gunthorpe .flags = device->migration_flags, 16140f3e72b5SJason Gunthorpe }; 16150f3e72b5SJason Gunthorpe int ret; 16160f3e72b5SJason Gunthorpe 16170f3e72b5SJason Gunthorpe if (!device->mig_ops) 16180f3e72b5SJason Gunthorpe return -ENOTTY; 16190f3e72b5SJason Gunthorpe 16200f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 16210f3e72b5SJason Gunthorpe sizeof(mig)); 16220f3e72b5SJason Gunthorpe if (ret != 1) 16230f3e72b5SJason Gunthorpe return ret; 16240f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 16250f3e72b5SJason Gunthorpe return -EFAULT; 16260f3e72b5SJason Gunthorpe return 0; 16270f3e72b5SJason Gunthorpe } 16280f3e72b5SJason Gunthorpe 16290f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 16300f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 16310f3e72b5SJason Gunthorpe { 16320f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 16330f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 16340f3e72b5SJason Gunthorpe 16350f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 16360f3e72b5SJason Gunthorpe return -EFAULT; 16370f3e72b5SJason Gunthorpe 16380f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 16390f3e72b5SJason Gunthorpe return -EINVAL; 16400f3e72b5SJason Gunthorpe 16410f3e72b5SJason Gunthorpe /* Check unknown flags */ 16420f3e72b5SJason Gunthorpe if (feature.flags & 16430f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 16440f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 16450f3e72b5SJason Gunthorpe return -EINVAL; 16460f3e72b5SJason Gunthorpe 16470f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 16480f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 16490f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 16500f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 16510f3e72b5SJason Gunthorpe return -EINVAL; 16520f3e72b5SJason Gunthorpe 16530f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 16540f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 16550f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 16560f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 16570f3e72b5SJason Gunthorpe feature.argsz - minsz); 16580f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 16590f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 16600f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 16610f3e72b5SJason Gunthorpe feature.argsz - minsz); 16620f3e72b5SJason Gunthorpe default: 16630f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 16640f3e72b5SJason Gunthorpe return -EINVAL; 16650f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 16660f3e72b5SJason Gunthorpe arg->data, 16670f3e72b5SJason Gunthorpe feature.argsz - minsz); 16680f3e72b5SJason Gunthorpe } 16690f3e72b5SJason Gunthorpe } 16700f3e72b5SJason Gunthorpe 16710f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 16720f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 16730f3e72b5SJason Gunthorpe { 16740f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 16750f3e72b5SJason Gunthorpe 16760f3e72b5SJason Gunthorpe switch (cmd) { 16770f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 16780f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature(device, (void __user *)arg); 16790f3e72b5SJason Gunthorpe default: 16800f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 16810f3e72b5SJason Gunthorpe return -EINVAL; 16820f3e72b5SJason Gunthorpe return device->ops->ioctl(device, cmd, arg); 16830f3e72b5SJason Gunthorpe } 16840f3e72b5SJason Gunthorpe } 16850f3e72b5SJason Gunthorpe 16860f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 16870f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 16880f3e72b5SJason Gunthorpe { 16890f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 16900f3e72b5SJason Gunthorpe 16910f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 16920f3e72b5SJason Gunthorpe return -EINVAL; 16930f3e72b5SJason Gunthorpe 16940f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 16950f3e72b5SJason Gunthorpe } 16960f3e72b5SJason Gunthorpe 16970f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 16980f3e72b5SJason Gunthorpe const char __user *buf, 16990f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 17000f3e72b5SJason Gunthorpe { 17010f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 17020f3e72b5SJason Gunthorpe 17030f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 17040f3e72b5SJason Gunthorpe return -EINVAL; 17050f3e72b5SJason Gunthorpe 17060f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 17070f3e72b5SJason Gunthorpe } 17080f3e72b5SJason Gunthorpe 17090f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 17100f3e72b5SJason Gunthorpe { 17110f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 17120f3e72b5SJason Gunthorpe 17130f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 17140f3e72b5SJason Gunthorpe return -EINVAL; 17150f3e72b5SJason Gunthorpe 17160f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 17170f3e72b5SJason Gunthorpe } 17180f3e72b5SJason Gunthorpe 17190f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = { 17200f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 17210f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 17220f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 17230f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 17240f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 17250f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 17260f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 17270f3e72b5SJason Gunthorpe }; 17280f3e72b5SJason Gunthorpe 17290f3e72b5SJason Gunthorpe /** 17300f3e72b5SJason Gunthorpe * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 17310f3e72b5SJason Gunthorpe * @file: VFIO group file 17320f3e72b5SJason Gunthorpe * 17330f3e72b5SJason Gunthorpe * The returned iommu_group is valid as long as a ref is held on the file. 17340f3e72b5SJason Gunthorpe */ 17350f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file) 17360f3e72b5SJason Gunthorpe { 17370f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 17380f3e72b5SJason Gunthorpe 17390f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 17400f3e72b5SJason Gunthorpe return NULL; 17410f3e72b5SJason Gunthorpe return group->iommu_group; 17420f3e72b5SJason Gunthorpe } 17430f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 17440f3e72b5SJason Gunthorpe 17450f3e72b5SJason Gunthorpe /** 17460f3e72b5SJason Gunthorpe * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 17470f3e72b5SJason Gunthorpe * is always CPU cache coherent 17480f3e72b5SJason Gunthorpe * @file: VFIO group file 17490f3e72b5SJason Gunthorpe * 17500f3e72b5SJason Gunthorpe * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 17510f3e72b5SJason Gunthorpe * bit in DMA transactions. A return of false indicates that the user has 17520f3e72b5SJason Gunthorpe * rights to access additional instructions such as wbinvd on x86. 17530f3e72b5SJason Gunthorpe */ 17540f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file) 17550f3e72b5SJason Gunthorpe { 17560f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 17570f3e72b5SJason Gunthorpe bool ret; 17580f3e72b5SJason Gunthorpe 17590f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 17600f3e72b5SJason Gunthorpe return true; 17610f3e72b5SJason Gunthorpe 17620f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 17630f3e72b5SJason Gunthorpe if (group->container) { 17640f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(group->container, 17650f3e72b5SJason Gunthorpe VFIO_DMA_CC_IOMMU); 17660f3e72b5SJason Gunthorpe } else { 17670f3e72b5SJason Gunthorpe /* 17680f3e72b5SJason Gunthorpe * Since the coherency state is determined only once a container 17690f3e72b5SJason Gunthorpe * is attached the user must do so before they can prove they 17700f3e72b5SJason Gunthorpe * have permission. 17710f3e72b5SJason Gunthorpe */ 17720f3e72b5SJason Gunthorpe ret = true; 17730f3e72b5SJason Gunthorpe } 17740f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 17750f3e72b5SJason Gunthorpe return ret; 17760f3e72b5SJason Gunthorpe } 17770f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 17780f3e72b5SJason Gunthorpe 17790f3e72b5SJason Gunthorpe /** 17800f3e72b5SJason Gunthorpe * vfio_file_set_kvm - Link a kvm with VFIO drivers 17810f3e72b5SJason Gunthorpe * @file: VFIO group file 17820f3e72b5SJason Gunthorpe * @kvm: KVM to link 17830f3e72b5SJason Gunthorpe * 17840f3e72b5SJason Gunthorpe * When a VFIO device is first opened the KVM will be available in 17850f3e72b5SJason Gunthorpe * device->kvm if one was associated with the group. 17860f3e72b5SJason Gunthorpe */ 17870f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 17880f3e72b5SJason Gunthorpe { 17890f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 17900f3e72b5SJason Gunthorpe 17910f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 17920f3e72b5SJason Gunthorpe return; 17930f3e72b5SJason Gunthorpe 17940f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 17950f3e72b5SJason Gunthorpe group->kvm = kvm; 17960f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 17970f3e72b5SJason Gunthorpe } 17980f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 17990f3e72b5SJason Gunthorpe 18000f3e72b5SJason Gunthorpe /** 18010f3e72b5SJason Gunthorpe * vfio_file_has_dev - True if the VFIO file is a handle for device 18020f3e72b5SJason Gunthorpe * @file: VFIO file to check 18030f3e72b5SJason Gunthorpe * @device: Device that must be part of the file 18040f3e72b5SJason Gunthorpe * 18050f3e72b5SJason Gunthorpe * Returns true if given file has permission to manipulate the given device. 18060f3e72b5SJason Gunthorpe */ 18070f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 18080f3e72b5SJason Gunthorpe { 18090f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 18100f3e72b5SJason Gunthorpe 18110f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 18120f3e72b5SJason Gunthorpe return false; 18130f3e72b5SJason Gunthorpe 18140f3e72b5SJason Gunthorpe return group == device->group; 18150f3e72b5SJason Gunthorpe } 18160f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev); 18170f3e72b5SJason Gunthorpe 18180f3e72b5SJason Gunthorpe /* 18190f3e72b5SJason Gunthorpe * Sub-module support 18200f3e72b5SJason Gunthorpe */ 18210f3e72b5SJason Gunthorpe /* 18220f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 18230f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 18240f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 18250f3e72b5SJason Gunthorpe * 18260f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 18270f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 18280f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 18290f3e72b5SJason Gunthorpe */ 18300f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 18310f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 18320f3e72b5SJason Gunthorpe { 18330f3e72b5SJason Gunthorpe void *buf; 18340f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 18350f3e72b5SJason Gunthorpe 18360f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 18370f3e72b5SJason Gunthorpe if (!buf) { 18380f3e72b5SJason Gunthorpe kfree(caps->buf); 18390f3e72b5SJason Gunthorpe caps->buf = NULL; 18400f3e72b5SJason Gunthorpe caps->size = 0; 18410f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 18420f3e72b5SJason Gunthorpe } 18430f3e72b5SJason Gunthorpe 18440f3e72b5SJason Gunthorpe caps->buf = buf; 18450f3e72b5SJason Gunthorpe header = buf + caps->size; 18460f3e72b5SJason Gunthorpe 18470f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 18480f3e72b5SJason Gunthorpe memset(header, 0, size); 18490f3e72b5SJason Gunthorpe 18500f3e72b5SJason Gunthorpe header->id = id; 18510f3e72b5SJason Gunthorpe header->version = version; 18520f3e72b5SJason Gunthorpe 18530f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 18540f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 18550f3e72b5SJason Gunthorpe ; /* nothing */ 18560f3e72b5SJason Gunthorpe 18570f3e72b5SJason Gunthorpe tmp->next = caps->size; 18580f3e72b5SJason Gunthorpe caps->size += size; 18590f3e72b5SJason Gunthorpe 18600f3e72b5SJason Gunthorpe return header; 18610f3e72b5SJason Gunthorpe } 18620f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 18630f3e72b5SJason Gunthorpe 18640f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 18650f3e72b5SJason Gunthorpe { 18660f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 18670f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 18680f3e72b5SJason Gunthorpe 18690f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 18700f3e72b5SJason Gunthorpe tmp->next += offset; 18710f3e72b5SJason Gunthorpe } 18720f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 18730f3e72b5SJason Gunthorpe 18740f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 18750f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 18760f3e72b5SJason Gunthorpe { 18770f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 18780f3e72b5SJason Gunthorpe 18790f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 18800f3e72b5SJason Gunthorpe if (IS_ERR(header)) 18810f3e72b5SJason Gunthorpe return PTR_ERR(header); 18820f3e72b5SJason Gunthorpe 18830f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 18840f3e72b5SJason Gunthorpe 18850f3e72b5SJason Gunthorpe return 0; 18860f3e72b5SJason Gunthorpe } 18870f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 18880f3e72b5SJason Gunthorpe 18890f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 18900f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 18910f3e72b5SJason Gunthorpe { 18920f3e72b5SJason Gunthorpe unsigned long minsz; 18930f3e72b5SJason Gunthorpe size_t size; 18940f3e72b5SJason Gunthorpe 18950f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 18960f3e72b5SJason Gunthorpe 18970f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 18980f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 18990f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 19000f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 19010f3e72b5SJason Gunthorpe return -EINVAL; 19020f3e72b5SJason Gunthorpe 19030f3e72b5SJason Gunthorpe if (data_size) 19040f3e72b5SJason Gunthorpe *data_size = 0; 19050f3e72b5SJason Gunthorpe 19060f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 19070f3e72b5SJason Gunthorpe return -EINVAL; 19080f3e72b5SJason Gunthorpe 19090f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 19100f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 19110f3e72b5SJason Gunthorpe size = 0; 19120f3e72b5SJason Gunthorpe break; 19130f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 19140f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 19150f3e72b5SJason Gunthorpe break; 19160f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 19170f3e72b5SJason Gunthorpe size = sizeof(int32_t); 19180f3e72b5SJason Gunthorpe break; 19190f3e72b5SJason Gunthorpe default: 19200f3e72b5SJason Gunthorpe return -EINVAL; 19210f3e72b5SJason Gunthorpe } 19220f3e72b5SJason Gunthorpe 19230f3e72b5SJason Gunthorpe if (size) { 19240f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 19250f3e72b5SJason Gunthorpe return -EINVAL; 19260f3e72b5SJason Gunthorpe 19270f3e72b5SJason Gunthorpe if (!data_size) 19280f3e72b5SJason Gunthorpe return -EINVAL; 19290f3e72b5SJason Gunthorpe 19300f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 19310f3e72b5SJason Gunthorpe } 19320f3e72b5SJason Gunthorpe 19330f3e72b5SJason Gunthorpe return 0; 19340f3e72b5SJason Gunthorpe } 19350f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 19360f3e72b5SJason Gunthorpe 19370f3e72b5SJason Gunthorpe /* 19380f3e72b5SJason Gunthorpe * Pin contiguous user pages and return their associated host pages for local 19390f3e72b5SJason Gunthorpe * domain only. 19400f3e72b5SJason Gunthorpe * @device [in] : device 19410f3e72b5SJason Gunthorpe * @iova [in] : starting IOVA of user pages to be pinned. 19420f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be pinned. This count should not 19430f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 19440f3e72b5SJason Gunthorpe * @prot [in] : protection flags 19450f3e72b5SJason Gunthorpe * @pages[out] : array of host pages 19460f3e72b5SJason Gunthorpe * Return error or number of pages pinned. 19470f3e72b5SJason Gunthorpe */ 19480f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 19490f3e72b5SJason Gunthorpe int npage, int prot, struct page **pages) 19500f3e72b5SJason Gunthorpe { 19510f3e72b5SJason Gunthorpe struct vfio_container *container; 19520f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 19530f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 19540f3e72b5SJason Gunthorpe int ret; 19550f3e72b5SJason Gunthorpe 19560f3e72b5SJason Gunthorpe if (!pages || !npage || !vfio_assert_device_open(device)) 19570f3e72b5SJason Gunthorpe return -EINVAL; 19580f3e72b5SJason Gunthorpe 19590f3e72b5SJason Gunthorpe if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 19600f3e72b5SJason Gunthorpe return -E2BIG; 19610f3e72b5SJason Gunthorpe 19620f3e72b5SJason Gunthorpe if (group->dev_counter > 1) 19630f3e72b5SJason Gunthorpe return -EINVAL; 19640f3e72b5SJason Gunthorpe 19650f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 19660f3e72b5SJason Gunthorpe container = group->container; 19670f3e72b5SJason Gunthorpe driver = container->iommu_driver; 19680f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->pin_pages)) 19690f3e72b5SJason Gunthorpe ret = driver->ops->pin_pages(container->iommu_data, 19700f3e72b5SJason Gunthorpe group->iommu_group, iova, 19710f3e72b5SJason Gunthorpe npage, prot, pages); 19720f3e72b5SJason Gunthorpe else 19730f3e72b5SJason Gunthorpe ret = -ENOTTY; 19740f3e72b5SJason Gunthorpe 19750f3e72b5SJason Gunthorpe return ret; 19760f3e72b5SJason Gunthorpe } 19770f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages); 19780f3e72b5SJason Gunthorpe 19790f3e72b5SJason Gunthorpe /* 19800f3e72b5SJason Gunthorpe * Unpin contiguous host pages for local domain only. 19810f3e72b5SJason Gunthorpe * @device [in] : device 19820f3e72b5SJason Gunthorpe * @iova [in] : starting address of user pages to be unpinned. 19830f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be unpinned. This count should not 19840f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 19850f3e72b5SJason Gunthorpe */ 19860f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 19870f3e72b5SJason Gunthorpe { 19880f3e72b5SJason Gunthorpe struct vfio_container *container; 19890f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 19900f3e72b5SJason Gunthorpe 19910f3e72b5SJason Gunthorpe if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 19920f3e72b5SJason Gunthorpe return; 19930f3e72b5SJason Gunthorpe 19940f3e72b5SJason Gunthorpe if (WARN_ON(!vfio_assert_device_open(device))) 19950f3e72b5SJason Gunthorpe return; 19960f3e72b5SJason Gunthorpe 19970f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 19980f3e72b5SJason Gunthorpe container = device->group->container; 19990f3e72b5SJason Gunthorpe driver = container->iommu_driver; 20000f3e72b5SJason Gunthorpe 20010f3e72b5SJason Gunthorpe driver->ops->unpin_pages(container->iommu_data, iova, npage); 20020f3e72b5SJason Gunthorpe } 20030f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages); 20040f3e72b5SJason Gunthorpe 20050f3e72b5SJason Gunthorpe /* 20060f3e72b5SJason Gunthorpe * This interface allows the CPUs to perform some sort of virtual DMA on 20070f3e72b5SJason Gunthorpe * behalf of the device. 20080f3e72b5SJason Gunthorpe * 20090f3e72b5SJason Gunthorpe * CPUs read/write from/into a range of IOVAs pointing to user space memory 20100f3e72b5SJason Gunthorpe * into/from a kernel buffer. 20110f3e72b5SJason Gunthorpe * 20120f3e72b5SJason Gunthorpe * As the read/write of user space memory is conducted via the CPUs and is 20130f3e72b5SJason Gunthorpe * not a real device DMA, it is not necessary to pin the user space memory. 20140f3e72b5SJason Gunthorpe * 20150f3e72b5SJason Gunthorpe * @device [in] : VFIO device 20160f3e72b5SJason Gunthorpe * @iova [in] : base IOVA of a user space buffer 20170f3e72b5SJason Gunthorpe * @data [in] : pointer to kernel buffer 20180f3e72b5SJason Gunthorpe * @len [in] : kernel buffer length 20190f3e72b5SJason Gunthorpe * @write : indicate read or write 20200f3e72b5SJason Gunthorpe * Return error code on failure or 0 on success. 20210f3e72b5SJason Gunthorpe */ 20220f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 20230f3e72b5SJason Gunthorpe size_t len, bool write) 20240f3e72b5SJason Gunthorpe { 20250f3e72b5SJason Gunthorpe struct vfio_container *container; 20260f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 20270f3e72b5SJason Gunthorpe int ret = 0; 20280f3e72b5SJason Gunthorpe 20290f3e72b5SJason Gunthorpe if (!data || len <= 0 || !vfio_assert_device_open(device)) 20300f3e72b5SJason Gunthorpe return -EINVAL; 20310f3e72b5SJason Gunthorpe 20320f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 20330f3e72b5SJason Gunthorpe container = device->group->container; 20340f3e72b5SJason Gunthorpe driver = container->iommu_driver; 20350f3e72b5SJason Gunthorpe 20360f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->dma_rw)) 20370f3e72b5SJason Gunthorpe ret = driver->ops->dma_rw(container->iommu_data, 20380f3e72b5SJason Gunthorpe iova, data, len, write); 20390f3e72b5SJason Gunthorpe else 20400f3e72b5SJason Gunthorpe ret = -ENOTTY; 20410f3e72b5SJason Gunthorpe return ret; 20420f3e72b5SJason Gunthorpe } 20430f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw); 20440f3e72b5SJason Gunthorpe 20450f3e72b5SJason Gunthorpe /* 20460f3e72b5SJason Gunthorpe * Module/class support 20470f3e72b5SJason Gunthorpe */ 20480f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode) 20490f3e72b5SJason Gunthorpe { 20500f3e72b5SJason Gunthorpe return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 20510f3e72b5SJason Gunthorpe } 20520f3e72b5SJason Gunthorpe 20530f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = { 20540f3e72b5SJason Gunthorpe .minor = VFIO_MINOR, 20550f3e72b5SJason Gunthorpe .name = "vfio", 20560f3e72b5SJason Gunthorpe .fops = &vfio_fops, 20570f3e72b5SJason Gunthorpe .nodename = "vfio/vfio", 20580f3e72b5SJason Gunthorpe .mode = S_IRUGO | S_IWUGO, 20590f3e72b5SJason Gunthorpe }; 20600f3e72b5SJason Gunthorpe 20610f3e72b5SJason Gunthorpe static int __init vfio_init(void) 20620f3e72b5SJason Gunthorpe { 20630f3e72b5SJason Gunthorpe int ret; 20640f3e72b5SJason Gunthorpe 20650f3e72b5SJason Gunthorpe ida_init(&vfio.group_ida); 20660f3e72b5SJason Gunthorpe mutex_init(&vfio.group_lock); 20670f3e72b5SJason Gunthorpe mutex_init(&vfio.iommu_drivers_lock); 20680f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.group_list); 20690f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.iommu_drivers_list); 20700f3e72b5SJason Gunthorpe 20710f3e72b5SJason Gunthorpe ret = misc_register(&vfio_dev); 20720f3e72b5SJason Gunthorpe if (ret) { 20730f3e72b5SJason Gunthorpe pr_err("vfio: misc device register failed\n"); 20740f3e72b5SJason Gunthorpe return ret; 20750f3e72b5SJason Gunthorpe } 20760f3e72b5SJason Gunthorpe 20770f3e72b5SJason Gunthorpe /* /dev/vfio/$GROUP */ 20780f3e72b5SJason Gunthorpe vfio.class = class_create(THIS_MODULE, "vfio"); 20790f3e72b5SJason Gunthorpe if (IS_ERR(vfio.class)) { 20800f3e72b5SJason Gunthorpe ret = PTR_ERR(vfio.class); 20810f3e72b5SJason Gunthorpe goto err_class; 20820f3e72b5SJason Gunthorpe } 20830f3e72b5SJason Gunthorpe 20840f3e72b5SJason Gunthorpe vfio.class->devnode = vfio_devnode; 20850f3e72b5SJason Gunthorpe 20860f3e72b5SJason Gunthorpe ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 20870f3e72b5SJason Gunthorpe if (ret) 20880f3e72b5SJason Gunthorpe goto err_alloc_chrdev; 20890f3e72b5SJason Gunthorpe 20900f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 20910f3e72b5SJason Gunthorpe ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 20920f3e72b5SJason Gunthorpe #endif 20930f3e72b5SJason Gunthorpe if (ret) 20940f3e72b5SJason Gunthorpe goto err_driver_register; 20950f3e72b5SJason Gunthorpe 20960f3e72b5SJason Gunthorpe pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 20970f3e72b5SJason Gunthorpe return 0; 20980f3e72b5SJason Gunthorpe 20990f3e72b5SJason Gunthorpe err_driver_register: 21000f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 21010f3e72b5SJason Gunthorpe err_alloc_chrdev: 21020f3e72b5SJason Gunthorpe class_destroy(vfio.class); 21030f3e72b5SJason Gunthorpe vfio.class = NULL; 21040f3e72b5SJason Gunthorpe err_class: 21050f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 21060f3e72b5SJason Gunthorpe return ret; 21070f3e72b5SJason Gunthorpe } 21080f3e72b5SJason Gunthorpe 21090f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void) 21100f3e72b5SJason Gunthorpe { 21110f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&vfio.group_list)); 21120f3e72b5SJason Gunthorpe 21130f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 21140f3e72b5SJason Gunthorpe vfio_unregister_iommu_driver(&vfio_noiommu_ops); 21150f3e72b5SJason Gunthorpe #endif 21160f3e72b5SJason Gunthorpe ida_destroy(&vfio.group_ida); 21170f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 21180f3e72b5SJason Gunthorpe class_destroy(vfio.class); 21190f3e72b5SJason Gunthorpe vfio.class = NULL; 21200f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 21210f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 21220f3e72b5SJason Gunthorpe } 21230f3e72b5SJason Gunthorpe 21240f3e72b5SJason Gunthorpe module_init(vfio_init); 21250f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 21260f3e72b5SJason Gunthorpe 21270f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 21280f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 21290f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 21300f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 21310f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR); 21320f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio"); 21330f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 2134