10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 20f3e72b5SJason Gunthorpe /* 30f3e72b5SJason Gunthorpe * VFIO core 40f3e72b5SJason Gunthorpe * 50f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 60f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 70f3e72b5SJason Gunthorpe * 80f3e72b5SJason Gunthorpe * Derived from original vfio: 90f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 100f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 110f3e72b5SJason Gunthorpe */ 120f3e72b5SJason Gunthorpe 130f3e72b5SJason Gunthorpe #include <linux/cdev.h> 140f3e72b5SJason Gunthorpe #include <linux/compat.h> 150f3e72b5SJason Gunthorpe #include <linux/device.h> 160f3e72b5SJason Gunthorpe #include <linux/file.h> 170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h> 180f3e72b5SJason Gunthorpe #include <linux/fs.h> 190f3e72b5SJason Gunthorpe #include <linux/idr.h> 200f3e72b5SJason Gunthorpe #include <linux/iommu.h> 210f3e72b5SJason Gunthorpe #include <linux/list.h> 220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 230f3e72b5SJason Gunthorpe #include <linux/module.h> 240f3e72b5SJason Gunthorpe #include <linux/mutex.h> 250f3e72b5SJason Gunthorpe #include <linux/pci.h> 260f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 270f3e72b5SJason Gunthorpe #include <linux/sched.h> 280f3e72b5SJason Gunthorpe #include <linux/slab.h> 290f3e72b5SJason Gunthorpe #include <linux/stat.h> 300f3e72b5SJason Gunthorpe #include <linux/string.h> 310f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 320f3e72b5SJason Gunthorpe #include <linux/vfio.h> 330f3e72b5SJason Gunthorpe #include <linux/wait.h> 340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 358e5c6995SAbhishek Sahu #include <linux/pm_runtime.h> 3680c4b92aSYishai Hadas #include <linux/interval_tree.h> 3780c4b92aSYishai Hadas #include <linux/iova_bitmap.h> 380f3e72b5SJason Gunthorpe #include "vfio.h" 390f3e72b5SJason Gunthorpe 400f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 410f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 420f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 430f3e72b5SJason Gunthorpe 440f3e72b5SJason Gunthorpe static struct vfio { 450f3e72b5SJason Gunthorpe struct class *class; 460f3e72b5SJason Gunthorpe struct list_head iommu_drivers_list; 470f3e72b5SJason Gunthorpe struct mutex iommu_drivers_lock; 480f3e72b5SJason Gunthorpe struct list_head group_list; 490f3e72b5SJason Gunthorpe struct mutex group_lock; /* locks group_list */ 500f3e72b5SJason Gunthorpe struct ida group_ida; 510f3e72b5SJason Gunthorpe dev_t group_devt; 523c28a761SYi Liu struct class *device_class; 533c28a761SYi Liu struct ida device_ida; 540f3e72b5SJason Gunthorpe } vfio; 550f3e72b5SJason Gunthorpe 560f3e72b5SJason Gunthorpe struct vfio_iommu_driver { 570f3e72b5SJason Gunthorpe const struct vfio_iommu_driver_ops *ops; 580f3e72b5SJason Gunthorpe struct list_head vfio_next; 590f3e72b5SJason Gunthorpe }; 600f3e72b5SJason Gunthorpe 610f3e72b5SJason Gunthorpe struct vfio_container { 620f3e72b5SJason Gunthorpe struct kref kref; 630f3e72b5SJason Gunthorpe struct list_head group_list; 640f3e72b5SJason Gunthorpe struct rw_semaphore group_lock; 650f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 660f3e72b5SJason Gunthorpe void *iommu_data; 670f3e72b5SJason Gunthorpe bool noiommu; 680f3e72b5SJason Gunthorpe }; 690f3e72b5SJason Gunthorpe 700f3e72b5SJason Gunthorpe struct vfio_group { 710f3e72b5SJason Gunthorpe struct device dev; 720f3e72b5SJason Gunthorpe struct cdev cdev; 730f3e72b5SJason Gunthorpe refcount_t users; 740f3e72b5SJason Gunthorpe unsigned int container_users; 750f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 760f3e72b5SJason Gunthorpe struct vfio_container *container; 770f3e72b5SJason Gunthorpe struct list_head device_list; 780f3e72b5SJason Gunthorpe struct mutex device_lock; 790f3e72b5SJason Gunthorpe struct list_head vfio_next; 800f3e72b5SJason Gunthorpe struct list_head container_next; 810f3e72b5SJason Gunthorpe enum vfio_group_type type; 820f3e72b5SJason Gunthorpe struct rw_semaphore group_rwsem; 830f3e72b5SJason Gunthorpe struct kvm *kvm; 840f3e72b5SJason Gunthorpe struct file *opened_file; 850f3e72b5SJason Gunthorpe struct blocking_notifier_head notifier; 860f3e72b5SJason Gunthorpe }; 870f3e72b5SJason Gunthorpe 880f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 890f3e72b5SJason Gunthorpe static bool noiommu __read_mostly; 900f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode, 910f3e72b5SJason Gunthorpe noiommu, bool, S_IRUGO | S_IWUSR); 920f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 930f3e72b5SJason Gunthorpe #endif 940f3e72b5SJason Gunthorpe 950f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 960f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops; 970f3e72b5SJason Gunthorpe 980f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 990f3e72b5SJason Gunthorpe { 1000f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 1010f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 1020f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 1030f3e72b5SJason Gunthorpe 1040f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 1050f3e72b5SJason Gunthorpe return -EINVAL; 1060f3e72b5SJason Gunthorpe 1070f3e72b5SJason Gunthorpe /* 1080f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 1090f3e72b5SJason Gunthorpe */ 1100f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1110f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 1120f3e72b5SJason Gunthorpe if (dev_set) 1130f3e72b5SJason Gunthorpe goto found_get_ref; 1140f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1150f3e72b5SJason Gunthorpe 1160f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 1170f3e72b5SJason Gunthorpe if (!new_dev_set) 1180f3e72b5SJason Gunthorpe return -ENOMEM; 1190f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 1200f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 1210f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 1220f3e72b5SJason Gunthorpe 1230f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1240f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 1250f3e72b5SJason Gunthorpe GFP_KERNEL); 1260f3e72b5SJason Gunthorpe if (!dev_set) { 1270f3e72b5SJason Gunthorpe dev_set = new_dev_set; 1280f3e72b5SJason Gunthorpe goto found_get_ref; 1290f3e72b5SJason Gunthorpe } 1300f3e72b5SJason Gunthorpe 1310f3e72b5SJason Gunthorpe kfree(new_dev_set); 1320f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 1330f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1340f3e72b5SJason Gunthorpe return xa_err(dev_set); 1350f3e72b5SJason Gunthorpe } 1360f3e72b5SJason Gunthorpe 1370f3e72b5SJason Gunthorpe found_get_ref: 1380f3e72b5SJason Gunthorpe dev_set->device_count++; 1390f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1400f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1410f3e72b5SJason Gunthorpe device->dev_set = dev_set; 1420f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 1430f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1440f3e72b5SJason Gunthorpe return 0; 1450f3e72b5SJason Gunthorpe } 1460f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 1470f3e72b5SJason Gunthorpe 1480f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 1490f3e72b5SJason Gunthorpe { 1500f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 1510f3e72b5SJason Gunthorpe 1520f3e72b5SJason Gunthorpe if (!dev_set) 1530f3e72b5SJason Gunthorpe return; 1540f3e72b5SJason Gunthorpe 1550f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1560f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 1570f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1580f3e72b5SJason Gunthorpe 1590f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1600f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 1610f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 1620f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 1630f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 1640f3e72b5SJason Gunthorpe kfree(dev_set); 1650f3e72b5SJason Gunthorpe } 1660f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1670f3e72b5SJason Gunthorpe } 1680f3e72b5SJason Gunthorpe 1690f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 1700f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg) 1710f3e72b5SJason Gunthorpe { 1720f3e72b5SJason Gunthorpe if (arg != VFIO_NOIOMMU_IOMMU) 1730f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 1740f3e72b5SJason Gunthorpe if (!capable(CAP_SYS_RAWIO)) 1750f3e72b5SJason Gunthorpe return ERR_PTR(-EPERM); 1760f3e72b5SJason Gunthorpe 1770f3e72b5SJason Gunthorpe return NULL; 1780f3e72b5SJason Gunthorpe } 1790f3e72b5SJason Gunthorpe 1800f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data) 1810f3e72b5SJason Gunthorpe { 1820f3e72b5SJason Gunthorpe } 1830f3e72b5SJason Gunthorpe 1840f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data, 1850f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 1860f3e72b5SJason Gunthorpe { 1870f3e72b5SJason Gunthorpe if (cmd == VFIO_CHECK_EXTENSION) 1880f3e72b5SJason Gunthorpe return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 1890f3e72b5SJason Gunthorpe 1900f3e72b5SJason Gunthorpe return -ENOTTY; 1910f3e72b5SJason Gunthorpe } 1920f3e72b5SJason Gunthorpe 1930f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data, 1940f3e72b5SJason Gunthorpe struct iommu_group *iommu_group, enum vfio_group_type type) 1950f3e72b5SJason Gunthorpe { 1960f3e72b5SJason Gunthorpe return 0; 1970f3e72b5SJason Gunthorpe } 1980f3e72b5SJason Gunthorpe 1990f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data, 2000f3e72b5SJason Gunthorpe struct iommu_group *iommu_group) 2010f3e72b5SJason Gunthorpe { 2020f3e72b5SJason Gunthorpe } 2030f3e72b5SJason Gunthorpe 2040f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 2050f3e72b5SJason Gunthorpe .name = "vfio-noiommu", 2060f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 2070f3e72b5SJason Gunthorpe .open = vfio_noiommu_open, 2080f3e72b5SJason Gunthorpe .release = vfio_noiommu_release, 2090f3e72b5SJason Gunthorpe .ioctl = vfio_noiommu_ioctl, 2100f3e72b5SJason Gunthorpe .attach_group = vfio_noiommu_attach_group, 2110f3e72b5SJason Gunthorpe .detach_group = vfio_noiommu_detach_group, 2120f3e72b5SJason Gunthorpe }; 2130f3e72b5SJason Gunthorpe 2140f3e72b5SJason Gunthorpe /* 2150f3e72b5SJason Gunthorpe * Only noiommu containers can use vfio-noiommu and noiommu containers can only 2160f3e72b5SJason Gunthorpe * use vfio-noiommu. 2170f3e72b5SJason Gunthorpe */ 2180f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 2190f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 2200f3e72b5SJason Gunthorpe { 2210f3e72b5SJason Gunthorpe return container->noiommu == (driver->ops == &vfio_noiommu_ops); 2220f3e72b5SJason Gunthorpe } 2230f3e72b5SJason Gunthorpe #else 2240f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 2250f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 2260f3e72b5SJason Gunthorpe { 2270f3e72b5SJason Gunthorpe return true; 2280f3e72b5SJason Gunthorpe } 2290f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */ 2300f3e72b5SJason Gunthorpe 2310f3e72b5SJason Gunthorpe /* 2320f3e72b5SJason Gunthorpe * IOMMU driver registration 2330f3e72b5SJason Gunthorpe */ 2340f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 2350f3e72b5SJason Gunthorpe { 2360f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, *tmp; 2370f3e72b5SJason Gunthorpe 2380f3e72b5SJason Gunthorpe if (WARN_ON(!ops->register_device != !ops->unregister_device)) 2390f3e72b5SJason Gunthorpe return -EINVAL; 2400f3e72b5SJason Gunthorpe 2410f3e72b5SJason Gunthorpe driver = kzalloc(sizeof(*driver), GFP_KERNEL); 2420f3e72b5SJason Gunthorpe if (!driver) 2430f3e72b5SJason Gunthorpe return -ENOMEM; 2440f3e72b5SJason Gunthorpe 2450f3e72b5SJason Gunthorpe driver->ops = ops; 2460f3e72b5SJason Gunthorpe 2470f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 2480f3e72b5SJason Gunthorpe 2490f3e72b5SJason Gunthorpe /* Check for duplicates */ 2500f3e72b5SJason Gunthorpe list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 2510f3e72b5SJason Gunthorpe if (tmp->ops == ops) { 2520f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2530f3e72b5SJason Gunthorpe kfree(driver); 2540f3e72b5SJason Gunthorpe return -EINVAL; 2550f3e72b5SJason Gunthorpe } 2560f3e72b5SJason Gunthorpe } 2570f3e72b5SJason Gunthorpe 2580f3e72b5SJason Gunthorpe list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 2590f3e72b5SJason Gunthorpe 2600f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2610f3e72b5SJason Gunthorpe 2620f3e72b5SJason Gunthorpe return 0; 2630f3e72b5SJason Gunthorpe } 2640f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 2650f3e72b5SJason Gunthorpe 2660f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 2670f3e72b5SJason Gunthorpe { 2680f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 2690f3e72b5SJason Gunthorpe 2700f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 2710f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 2720f3e72b5SJason Gunthorpe if (driver->ops == ops) { 2730f3e72b5SJason Gunthorpe list_del(&driver->vfio_next); 2740f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2750f3e72b5SJason Gunthorpe kfree(driver); 2760f3e72b5SJason Gunthorpe return; 2770f3e72b5SJason Gunthorpe } 2780f3e72b5SJason Gunthorpe } 2790f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 2800f3e72b5SJason Gunthorpe } 2810f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 2820f3e72b5SJason Gunthorpe 2830f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group); 2840f3e72b5SJason Gunthorpe 2850f3e72b5SJason Gunthorpe /* 2860f3e72b5SJason Gunthorpe * Container objects - containers are created when /dev/vfio/vfio is 2870f3e72b5SJason Gunthorpe * opened, but their lifecycle extends until the last user is done, so 2880f3e72b5SJason Gunthorpe * it's freed via kref. Must support container/group/device being 2890f3e72b5SJason Gunthorpe * closed in any order. 2900f3e72b5SJason Gunthorpe */ 2910f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container) 2920f3e72b5SJason Gunthorpe { 2930f3e72b5SJason Gunthorpe kref_get(&container->kref); 2940f3e72b5SJason Gunthorpe } 2950f3e72b5SJason Gunthorpe 2960f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref) 2970f3e72b5SJason Gunthorpe { 2980f3e72b5SJason Gunthorpe struct vfio_container *container; 2990f3e72b5SJason Gunthorpe container = container_of(kref, struct vfio_container, kref); 3000f3e72b5SJason Gunthorpe 3010f3e72b5SJason Gunthorpe kfree(container); 3020f3e72b5SJason Gunthorpe } 3030f3e72b5SJason Gunthorpe 3040f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container) 3050f3e72b5SJason Gunthorpe { 3060f3e72b5SJason Gunthorpe kref_put(&container->kref, vfio_container_release); 3070f3e72b5SJason Gunthorpe } 3080f3e72b5SJason Gunthorpe 3090f3e72b5SJason Gunthorpe /* 3100f3e72b5SJason Gunthorpe * Group objects - create, release, get, put, search 3110f3e72b5SJason Gunthorpe */ 3120f3e72b5SJason Gunthorpe static struct vfio_group * 3130f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 3140f3e72b5SJason Gunthorpe { 3150f3e72b5SJason Gunthorpe struct vfio_group *group; 3160f3e72b5SJason Gunthorpe 3170f3e72b5SJason Gunthorpe list_for_each_entry(group, &vfio.group_list, vfio_next) { 3180f3e72b5SJason Gunthorpe if (group->iommu_group == iommu_group) { 3190f3e72b5SJason Gunthorpe vfio_group_get(group); 3200f3e72b5SJason Gunthorpe return group; 3210f3e72b5SJason Gunthorpe } 3220f3e72b5SJason Gunthorpe } 3230f3e72b5SJason Gunthorpe return NULL; 3240f3e72b5SJason Gunthorpe } 3250f3e72b5SJason Gunthorpe 3260f3e72b5SJason Gunthorpe static struct vfio_group * 3270f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group) 3280f3e72b5SJason Gunthorpe { 3290f3e72b5SJason Gunthorpe struct vfio_group *group; 3300f3e72b5SJason Gunthorpe 3310f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 3320f3e72b5SJason Gunthorpe group = __vfio_group_get_from_iommu(iommu_group); 3330f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 3340f3e72b5SJason Gunthorpe return group; 3350f3e72b5SJason Gunthorpe } 3360f3e72b5SJason Gunthorpe 3370f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev) 3380f3e72b5SJason Gunthorpe { 3390f3e72b5SJason Gunthorpe struct vfio_group *group = container_of(dev, struct vfio_group, dev); 3400f3e72b5SJason Gunthorpe 3410f3e72b5SJason Gunthorpe mutex_destroy(&group->device_lock); 3420f3e72b5SJason Gunthorpe iommu_group_put(group->iommu_group); 3430f3e72b5SJason Gunthorpe ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 3440f3e72b5SJason Gunthorpe kfree(group); 3450f3e72b5SJason Gunthorpe } 3460f3e72b5SJason Gunthorpe 3470f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 3480f3e72b5SJason Gunthorpe enum vfio_group_type type) 3490f3e72b5SJason Gunthorpe { 3500f3e72b5SJason Gunthorpe struct vfio_group *group; 3510f3e72b5SJason Gunthorpe int minor; 3520f3e72b5SJason Gunthorpe 3530f3e72b5SJason Gunthorpe group = kzalloc(sizeof(*group), GFP_KERNEL); 3540f3e72b5SJason Gunthorpe if (!group) 3550f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 3560f3e72b5SJason Gunthorpe 3570f3e72b5SJason Gunthorpe minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 3580f3e72b5SJason Gunthorpe if (minor < 0) { 3590f3e72b5SJason Gunthorpe kfree(group); 3600f3e72b5SJason Gunthorpe return ERR_PTR(minor); 3610f3e72b5SJason Gunthorpe } 3620f3e72b5SJason Gunthorpe 3630f3e72b5SJason Gunthorpe device_initialize(&group->dev); 3640f3e72b5SJason Gunthorpe group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 3650f3e72b5SJason Gunthorpe group->dev.class = vfio.class; 3660f3e72b5SJason Gunthorpe group->dev.release = vfio_group_release; 3670f3e72b5SJason Gunthorpe cdev_init(&group->cdev, &vfio_group_fops); 3680f3e72b5SJason Gunthorpe group->cdev.owner = THIS_MODULE; 3690f3e72b5SJason Gunthorpe 3700f3e72b5SJason Gunthorpe refcount_set(&group->users, 1); 3710f3e72b5SJason Gunthorpe init_rwsem(&group->group_rwsem); 3720f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&group->device_list); 3730f3e72b5SJason Gunthorpe mutex_init(&group->device_lock); 3740f3e72b5SJason Gunthorpe group->iommu_group = iommu_group; 3750f3e72b5SJason Gunthorpe /* put in vfio_group_release() */ 3760f3e72b5SJason Gunthorpe iommu_group_ref_get(iommu_group); 3770f3e72b5SJason Gunthorpe group->type = type; 3780f3e72b5SJason Gunthorpe BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 3790f3e72b5SJason Gunthorpe 3800f3e72b5SJason Gunthorpe return group; 3810f3e72b5SJason Gunthorpe } 3820f3e72b5SJason Gunthorpe 3830f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 3840f3e72b5SJason Gunthorpe enum vfio_group_type type) 3850f3e72b5SJason Gunthorpe { 3860f3e72b5SJason Gunthorpe struct vfio_group *group; 3870f3e72b5SJason Gunthorpe struct vfio_group *ret; 3880f3e72b5SJason Gunthorpe int err; 3890f3e72b5SJason Gunthorpe 3900f3e72b5SJason Gunthorpe group = vfio_group_alloc(iommu_group, type); 3910f3e72b5SJason Gunthorpe if (IS_ERR(group)) 3920f3e72b5SJason Gunthorpe return group; 3930f3e72b5SJason Gunthorpe 3940f3e72b5SJason Gunthorpe err = dev_set_name(&group->dev, "%s%d", 3950f3e72b5SJason Gunthorpe group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 3960f3e72b5SJason Gunthorpe iommu_group_id(iommu_group)); 3970f3e72b5SJason Gunthorpe if (err) { 3980f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 3990f3e72b5SJason Gunthorpe goto err_put; 4000f3e72b5SJason Gunthorpe } 4010f3e72b5SJason Gunthorpe 4020f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 4030f3e72b5SJason Gunthorpe 4040f3e72b5SJason Gunthorpe /* Did we race creating this group? */ 4050f3e72b5SJason Gunthorpe ret = __vfio_group_get_from_iommu(iommu_group); 4060f3e72b5SJason Gunthorpe if (ret) 4070f3e72b5SJason Gunthorpe goto err_unlock; 4080f3e72b5SJason Gunthorpe 4090f3e72b5SJason Gunthorpe err = cdev_device_add(&group->cdev, &group->dev); 4100f3e72b5SJason Gunthorpe if (err) { 4110f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 4120f3e72b5SJason Gunthorpe goto err_unlock; 4130f3e72b5SJason Gunthorpe } 4140f3e72b5SJason Gunthorpe 4150f3e72b5SJason Gunthorpe list_add(&group->vfio_next, &vfio.group_list); 4160f3e72b5SJason Gunthorpe 4170f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4180f3e72b5SJason Gunthorpe return group; 4190f3e72b5SJason Gunthorpe 4200f3e72b5SJason Gunthorpe err_unlock: 4210f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4220f3e72b5SJason Gunthorpe err_put: 4230f3e72b5SJason Gunthorpe put_device(&group->dev); 4240f3e72b5SJason Gunthorpe return ret; 4250f3e72b5SJason Gunthorpe } 4260f3e72b5SJason Gunthorpe 4270f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group) 4280f3e72b5SJason Gunthorpe { 4290f3e72b5SJason Gunthorpe if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) 4300f3e72b5SJason Gunthorpe return; 4310f3e72b5SJason Gunthorpe 4320f3e72b5SJason Gunthorpe /* 4330f3e72b5SJason Gunthorpe * These data structures all have paired operations that can only be 4340f3e72b5SJason Gunthorpe * undone when the caller holds a live reference on the group. Since all 4350f3e72b5SJason Gunthorpe * pairs must be undone these WARN_ON's indicate some caller did not 4360f3e72b5SJason Gunthorpe * properly hold the group reference. 4370f3e72b5SJason Gunthorpe */ 4380f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&group->device_list)); 4390f3e72b5SJason Gunthorpe WARN_ON(group->container || group->container_users); 4400f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 4410f3e72b5SJason Gunthorpe 4420f3e72b5SJason Gunthorpe list_del(&group->vfio_next); 4430f3e72b5SJason Gunthorpe cdev_device_del(&group->cdev, &group->dev); 4440f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 4450f3e72b5SJason Gunthorpe 4460f3e72b5SJason Gunthorpe put_device(&group->dev); 4470f3e72b5SJason Gunthorpe } 4480f3e72b5SJason Gunthorpe 4490f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group) 4500f3e72b5SJason Gunthorpe { 4510f3e72b5SJason Gunthorpe refcount_inc(&group->users); 4520f3e72b5SJason Gunthorpe } 4530f3e72b5SJason Gunthorpe 4540f3e72b5SJason Gunthorpe /* 4550f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 4560f3e72b5SJason Gunthorpe */ 4570f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 4584a725b8dSKevin Tian static void vfio_device_put_registration(struct vfio_device *device) 4590f3e72b5SJason Gunthorpe { 4600f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 4610f3e72b5SJason Gunthorpe complete(&device->comp); 4620f3e72b5SJason Gunthorpe } 4630f3e72b5SJason Gunthorpe 4644a725b8dSKevin Tian static bool vfio_device_try_get_registration(struct vfio_device *device) 4650f3e72b5SJason Gunthorpe { 4660f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 4670f3e72b5SJason Gunthorpe } 4680f3e72b5SJason Gunthorpe 4690f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 4700f3e72b5SJason Gunthorpe struct device *dev) 4710f3e72b5SJason Gunthorpe { 4720f3e72b5SJason Gunthorpe struct vfio_device *device; 4730f3e72b5SJason Gunthorpe 4740f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 4750f3e72b5SJason Gunthorpe list_for_each_entry(device, &group->device_list, group_next) { 4764a725b8dSKevin Tian if (device->dev == dev && 4774a725b8dSKevin Tian vfio_device_try_get_registration(device)) { 4780f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 4790f3e72b5SJason Gunthorpe return device; 4800f3e72b5SJason Gunthorpe } 4810f3e72b5SJason Gunthorpe } 4820f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 4830f3e72b5SJason Gunthorpe return NULL; 4840f3e72b5SJason Gunthorpe } 4850f3e72b5SJason Gunthorpe 4860f3e72b5SJason Gunthorpe /* 4870f3e72b5SJason Gunthorpe * VFIO driver API 4880f3e72b5SJason Gunthorpe */ 489cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */ 4903c28a761SYi Liu static void vfio_device_release(struct device *dev) 491cb9ff3f3SKevin Tian { 492cb9ff3f3SKevin Tian struct vfio_device *device = 4933c28a761SYi Liu container_of(dev, struct vfio_device, device); 494cb9ff3f3SKevin Tian 495ebb72b76SKevin Tian vfio_release_device_set(device); 4963c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 497cb9ff3f3SKevin Tian 498cb9ff3f3SKevin Tian /* 499cb9ff3f3SKevin Tian * kvfree() cannot be done here due to a life cycle mess in 500cb9ff3f3SKevin Tian * vfio-ccw. Before the ccw part is fixed all drivers are 501cb9ff3f3SKevin Tian * required to support @release and call vfio_free_device() 502cb9ff3f3SKevin Tian * from there. 503cb9ff3f3SKevin Tian */ 504cb9ff3f3SKevin Tian device->ops->release(device); 505cb9ff3f3SKevin Tian } 506cb9ff3f3SKevin Tian 507cb9ff3f3SKevin Tian /* 508cb9ff3f3SKevin Tian * Allocate and initialize vfio_device so it can be registered to vfio 509cb9ff3f3SKevin Tian * core. 510cb9ff3f3SKevin Tian * 511cb9ff3f3SKevin Tian * Drivers should use the wrapper vfio_alloc_device() for allocation. 512cb9ff3f3SKevin Tian * @size is the size of the structure to be allocated, including any 513cb9ff3f3SKevin Tian * private data used by the driver. 514cb9ff3f3SKevin Tian * 515cb9ff3f3SKevin Tian * Driver may provide an @init callback to cover device private data. 516cb9ff3f3SKevin Tian * 517cb9ff3f3SKevin Tian * Use vfio_put_device() to release the structure after success return. 518cb9ff3f3SKevin Tian */ 519cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, 520cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 521cb9ff3f3SKevin Tian { 522cb9ff3f3SKevin Tian struct vfio_device *device; 523cb9ff3f3SKevin Tian int ret; 524cb9ff3f3SKevin Tian 525cb9ff3f3SKevin Tian if (WARN_ON(size < sizeof(struct vfio_device))) 526cb9ff3f3SKevin Tian return ERR_PTR(-EINVAL); 527cb9ff3f3SKevin Tian 528cb9ff3f3SKevin Tian device = kvzalloc(size, GFP_KERNEL); 529cb9ff3f3SKevin Tian if (!device) 530cb9ff3f3SKevin Tian return ERR_PTR(-ENOMEM); 531cb9ff3f3SKevin Tian 532cb9ff3f3SKevin Tian ret = vfio_init_device(device, dev, ops); 533cb9ff3f3SKevin Tian if (ret) 534cb9ff3f3SKevin Tian goto out_free; 535cb9ff3f3SKevin Tian return device; 536cb9ff3f3SKevin Tian 537cb9ff3f3SKevin Tian out_free: 538cb9ff3f3SKevin Tian kvfree(device); 539cb9ff3f3SKevin Tian return ERR_PTR(ret); 540cb9ff3f3SKevin Tian } 541cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device); 542cb9ff3f3SKevin Tian 543cb9ff3f3SKevin Tian /* 544cb9ff3f3SKevin Tian * Initialize a vfio_device so it can be registered to vfio core. 545cb9ff3f3SKevin Tian * 546cb9ff3f3SKevin Tian * Only vfio-ccw driver should call this interface. 547cb9ff3f3SKevin Tian */ 548cb9ff3f3SKevin Tian int vfio_init_device(struct vfio_device *device, struct device *dev, 549cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 550cb9ff3f3SKevin Tian { 551cb9ff3f3SKevin Tian int ret; 552cb9ff3f3SKevin Tian 5533c28a761SYi Liu ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); 5543c28a761SYi Liu if (ret < 0) { 5553c28a761SYi Liu dev_dbg(dev, "Error to alloc index\n"); 5563c28a761SYi Liu return ret; 5573c28a761SYi Liu } 5583c28a761SYi Liu 5593c28a761SYi Liu device->index = ret; 560ebb72b76SKevin Tian init_completion(&device->comp); 561ebb72b76SKevin Tian device->dev = dev; 562ebb72b76SKevin Tian device->ops = ops; 563cb9ff3f3SKevin Tian 564cb9ff3f3SKevin Tian if (ops->init) { 565cb9ff3f3SKevin Tian ret = ops->init(device); 566cb9ff3f3SKevin Tian if (ret) 567cb9ff3f3SKevin Tian goto out_uninit; 568cb9ff3f3SKevin Tian } 569cb9ff3f3SKevin Tian 5703c28a761SYi Liu device_initialize(&device->device); 5713c28a761SYi Liu device->device.release = vfio_device_release; 5723c28a761SYi Liu device->device.class = vfio.device_class; 5733c28a761SYi Liu device->device.parent = device->dev; 574cb9ff3f3SKevin Tian return 0; 575cb9ff3f3SKevin Tian 576cb9ff3f3SKevin Tian out_uninit: 577ebb72b76SKevin Tian vfio_release_device_set(device); 5783c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 579cb9ff3f3SKevin Tian return ret; 580cb9ff3f3SKevin Tian } 581cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_init_device); 582cb9ff3f3SKevin Tian 583cb9ff3f3SKevin Tian /* 584cb9ff3f3SKevin Tian * The helper called by driver @release callback to free the device 585cb9ff3f3SKevin Tian * structure. Drivers which don't have private data to clean can 586cb9ff3f3SKevin Tian * simply use this helper as its @release. 587cb9ff3f3SKevin Tian */ 588cb9ff3f3SKevin Tian void vfio_free_device(struct vfio_device *device) 589cb9ff3f3SKevin Tian { 590cb9ff3f3SKevin Tian kvfree(device); 591cb9ff3f3SKevin Tian } 592cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_free_device); 593cb9ff3f3SKevin Tian 5940f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 5950f3e72b5SJason Gunthorpe enum vfio_group_type type) 5960f3e72b5SJason Gunthorpe { 5970f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 5980f3e72b5SJason Gunthorpe struct vfio_group *group; 5990f3e72b5SJason Gunthorpe int ret; 6000f3e72b5SJason Gunthorpe 6010f3e72b5SJason Gunthorpe iommu_group = iommu_group_alloc(); 6020f3e72b5SJason Gunthorpe if (IS_ERR(iommu_group)) 6030f3e72b5SJason Gunthorpe return ERR_CAST(iommu_group); 6040f3e72b5SJason Gunthorpe 6050f3e72b5SJason Gunthorpe ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 6060f3e72b5SJason Gunthorpe if (ret) 6070f3e72b5SJason Gunthorpe goto out_put_group; 6080f3e72b5SJason Gunthorpe ret = iommu_group_add_device(iommu_group, dev); 6090f3e72b5SJason Gunthorpe if (ret) 6100f3e72b5SJason Gunthorpe goto out_put_group; 6110f3e72b5SJason Gunthorpe 6120f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, type); 6130f3e72b5SJason Gunthorpe if (IS_ERR(group)) { 6140f3e72b5SJason Gunthorpe ret = PTR_ERR(group); 6150f3e72b5SJason Gunthorpe goto out_remove_device; 6160f3e72b5SJason Gunthorpe } 6170f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 6180f3e72b5SJason Gunthorpe return group; 6190f3e72b5SJason Gunthorpe 6200f3e72b5SJason Gunthorpe out_remove_device: 6210f3e72b5SJason Gunthorpe iommu_group_remove_device(dev); 6220f3e72b5SJason Gunthorpe out_put_group: 6230f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 6240f3e72b5SJason Gunthorpe return ERR_PTR(ret); 6250f3e72b5SJason Gunthorpe } 6260f3e72b5SJason Gunthorpe 6270f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 6280f3e72b5SJason Gunthorpe { 6290f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 6300f3e72b5SJason Gunthorpe struct vfio_group *group; 6310f3e72b5SJason Gunthorpe 6320f3e72b5SJason Gunthorpe iommu_group = iommu_group_get(dev); 6330f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 6340f3e72b5SJason Gunthorpe if (!iommu_group && noiommu) { 6350f3e72b5SJason Gunthorpe /* 6360f3e72b5SJason Gunthorpe * With noiommu enabled, create an IOMMU group for devices that 6370f3e72b5SJason Gunthorpe * don't already have one, implying no IOMMU hardware/driver 6380f3e72b5SJason Gunthorpe * exists. Taint the kernel because we're about to give a DMA 6390f3e72b5SJason Gunthorpe * capable device to a user without IOMMU protection. 6400f3e72b5SJason Gunthorpe */ 6410f3e72b5SJason Gunthorpe group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 6420f3e72b5SJason Gunthorpe if (!IS_ERR(group)) { 6430f3e72b5SJason Gunthorpe add_taint(TAINT_USER, LOCKDEP_STILL_OK); 6440f3e72b5SJason Gunthorpe dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 6450f3e72b5SJason Gunthorpe } 6460f3e72b5SJason Gunthorpe return group; 6470f3e72b5SJason Gunthorpe } 6480f3e72b5SJason Gunthorpe #endif 6490f3e72b5SJason Gunthorpe if (!iommu_group) 6500f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 6510f3e72b5SJason Gunthorpe 6520f3e72b5SJason Gunthorpe /* 6530f3e72b5SJason Gunthorpe * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 6540f3e72b5SJason Gunthorpe * restore cache coherency. It has to be checked here because it is only 6550f3e72b5SJason Gunthorpe * valid for cases where we are using iommu groups. 6560f3e72b5SJason Gunthorpe */ 6570f3e72b5SJason Gunthorpe if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 6580f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 6590f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 6600f3e72b5SJason Gunthorpe } 6610f3e72b5SJason Gunthorpe 6620f3e72b5SJason Gunthorpe group = vfio_group_get_from_iommu(iommu_group); 6630f3e72b5SJason Gunthorpe if (!group) 6640f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, VFIO_IOMMU); 6650f3e72b5SJason Gunthorpe 6660f3e72b5SJason Gunthorpe /* The vfio_group holds a reference to the iommu_group */ 6670f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 6680f3e72b5SJason Gunthorpe return group; 6690f3e72b5SJason Gunthorpe } 6700f3e72b5SJason Gunthorpe 6710f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device, 6720f3e72b5SJason Gunthorpe struct vfio_group *group) 6730f3e72b5SJason Gunthorpe { 6740f3e72b5SJason Gunthorpe struct vfio_device *existing_device; 6753c28a761SYi Liu int ret; 6760f3e72b5SJason Gunthorpe 6770f3e72b5SJason Gunthorpe if (IS_ERR(group)) 6780f3e72b5SJason Gunthorpe return PTR_ERR(group); 6790f3e72b5SJason Gunthorpe 6800f3e72b5SJason Gunthorpe /* 6810f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 6820f3e72b5SJason Gunthorpe * singleton set just for itself. 6830f3e72b5SJason Gunthorpe */ 6840f3e72b5SJason Gunthorpe if (!device->dev_set) 6850f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 6860f3e72b5SJason Gunthorpe 6870f3e72b5SJason Gunthorpe existing_device = vfio_group_get_device(group, device->dev); 6880f3e72b5SJason Gunthorpe if (existing_device) { 6890f3e72b5SJason Gunthorpe dev_WARN(device->dev, "Device already exists on group %d\n", 6900f3e72b5SJason Gunthorpe iommu_group_id(group->iommu_group)); 6914a725b8dSKevin Tian vfio_device_put_registration(existing_device); 6923c28a761SYi Liu ret = -EBUSY; 6933c28a761SYi Liu goto err_out; 6940f3e72b5SJason Gunthorpe } 6950f3e72b5SJason Gunthorpe 6960f3e72b5SJason Gunthorpe /* Our reference on group is moved to the device */ 6970f3e72b5SJason Gunthorpe device->group = group; 6980f3e72b5SJason Gunthorpe 6993c28a761SYi Liu ret = dev_set_name(&device->device, "vfio%d", device->index); 7003c28a761SYi Liu if (ret) 7013c28a761SYi Liu goto err_out; 7023c28a761SYi Liu 7033c28a761SYi Liu ret = device_add(&device->device); 7043c28a761SYi Liu if (ret) 7053c28a761SYi Liu goto err_out; 7063c28a761SYi Liu 7070f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 7080f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 7090f3e72b5SJason Gunthorpe 7100f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 7110f3e72b5SJason Gunthorpe list_add(&device->group_next, &group->device_list); 7120f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 7130f3e72b5SJason Gunthorpe 7140f3e72b5SJason Gunthorpe return 0; 7153c28a761SYi Liu err_out: 7163c28a761SYi Liu if (group->type == VFIO_NO_IOMMU || 7173c28a761SYi Liu group->type == VFIO_EMULATED_IOMMU) 7183c28a761SYi Liu iommu_group_remove_device(device->dev); 7193c28a761SYi Liu vfio_group_put(group); 7203c28a761SYi Liu return ret; 7210f3e72b5SJason Gunthorpe } 7220f3e72b5SJason Gunthorpe 7230f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 7240f3e72b5SJason Gunthorpe { 7250f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 7260f3e72b5SJason Gunthorpe vfio_group_find_or_alloc(device->dev)); 7270f3e72b5SJason Gunthorpe } 7280f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 7290f3e72b5SJason Gunthorpe 7300f3e72b5SJason Gunthorpe /* 7310f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 7320f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 7330f3e72b5SJason Gunthorpe */ 7340f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 7350f3e72b5SJason Gunthorpe { 7360f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 7370f3e72b5SJason Gunthorpe vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 7380f3e72b5SJason Gunthorpe } 7390f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 7400f3e72b5SJason Gunthorpe 7410f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 7420f3e72b5SJason Gunthorpe char *buf) 7430f3e72b5SJason Gunthorpe { 7440f3e72b5SJason Gunthorpe struct vfio_device *it, *device = ERR_PTR(-ENODEV); 7450f3e72b5SJason Gunthorpe 7460f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 7470f3e72b5SJason Gunthorpe list_for_each_entry(it, &group->device_list, group_next) { 7480f3e72b5SJason Gunthorpe int ret; 7490f3e72b5SJason Gunthorpe 7500f3e72b5SJason Gunthorpe if (it->ops->match) { 7510f3e72b5SJason Gunthorpe ret = it->ops->match(it, buf); 7520f3e72b5SJason Gunthorpe if (ret < 0) { 7530f3e72b5SJason Gunthorpe device = ERR_PTR(ret); 7540f3e72b5SJason Gunthorpe break; 7550f3e72b5SJason Gunthorpe } 7560f3e72b5SJason Gunthorpe } else { 7570f3e72b5SJason Gunthorpe ret = !strcmp(dev_name(it->dev), buf); 7580f3e72b5SJason Gunthorpe } 7590f3e72b5SJason Gunthorpe 7604a725b8dSKevin Tian if (ret && vfio_device_try_get_registration(it)) { 7610f3e72b5SJason Gunthorpe device = it; 7620f3e72b5SJason Gunthorpe break; 7630f3e72b5SJason Gunthorpe } 7640f3e72b5SJason Gunthorpe } 7650f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 7660f3e72b5SJason Gunthorpe 7670f3e72b5SJason Gunthorpe return device; 7680f3e72b5SJason Gunthorpe } 7690f3e72b5SJason Gunthorpe 7700f3e72b5SJason Gunthorpe /* 7710f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 7720f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 7730f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 7740f3e72b5SJason Gunthorpe { 7750f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 7760f3e72b5SJason Gunthorpe unsigned int i = 0; 7770f3e72b5SJason Gunthorpe bool interrupted = false; 7780f3e72b5SJason Gunthorpe long rc; 7790f3e72b5SJason Gunthorpe 7804a725b8dSKevin Tian vfio_device_put_registration(device); 7810f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 7820f3e72b5SJason Gunthorpe while (rc <= 0) { 7830f3e72b5SJason Gunthorpe if (device->ops->request) 7840f3e72b5SJason Gunthorpe device->ops->request(device, i++); 7850f3e72b5SJason Gunthorpe 7860f3e72b5SJason Gunthorpe if (interrupted) { 7870f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 7880f3e72b5SJason Gunthorpe HZ * 10); 7890f3e72b5SJason Gunthorpe } else { 7900f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 7910f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 7920f3e72b5SJason Gunthorpe if (rc < 0) { 7930f3e72b5SJason Gunthorpe interrupted = true; 7940f3e72b5SJason Gunthorpe dev_warn(device->dev, 7950f3e72b5SJason Gunthorpe "Device is currently in use, task" 7960f3e72b5SJason Gunthorpe " \"%s\" (%d) " 7970f3e72b5SJason Gunthorpe "blocked until device is released", 7980f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 7990f3e72b5SJason Gunthorpe } 8000f3e72b5SJason Gunthorpe } 8010f3e72b5SJason Gunthorpe } 8020f3e72b5SJason Gunthorpe 8030f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 8040f3e72b5SJason Gunthorpe list_del(&device->group_next); 8050f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 8060f3e72b5SJason Gunthorpe 8073c28a761SYi Liu /* Balances device_add in register path */ 8083c28a761SYi Liu device_del(&device->device); 8093c28a761SYi Liu 8100f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 8110f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 8120f3e72b5SJason Gunthorpe 8130f3e72b5SJason Gunthorpe /* Matches the get in vfio_register_group_dev() */ 8140f3e72b5SJason Gunthorpe vfio_group_put(group); 8150f3e72b5SJason Gunthorpe } 8160f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 8170f3e72b5SJason Gunthorpe 8180f3e72b5SJason Gunthorpe /* 8190f3e72b5SJason Gunthorpe * VFIO base fd, /dev/vfio/vfio 8200f3e72b5SJason Gunthorpe */ 8210f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container, 8220f3e72b5SJason Gunthorpe unsigned long arg) 8230f3e72b5SJason Gunthorpe { 8240f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 8250f3e72b5SJason Gunthorpe long ret = 0; 8260f3e72b5SJason Gunthorpe 8270f3e72b5SJason Gunthorpe down_read(&container->group_lock); 8280f3e72b5SJason Gunthorpe 8290f3e72b5SJason Gunthorpe driver = container->iommu_driver; 8300f3e72b5SJason Gunthorpe 8310f3e72b5SJason Gunthorpe switch (arg) { 8320f3e72b5SJason Gunthorpe /* No base extensions yet */ 8330f3e72b5SJason Gunthorpe default: 8340f3e72b5SJason Gunthorpe /* 8350f3e72b5SJason Gunthorpe * If no driver is set, poll all registered drivers for 8360f3e72b5SJason Gunthorpe * extensions and return the first positive result. If 8370f3e72b5SJason Gunthorpe * a driver is already set, further queries will be passed 8380f3e72b5SJason Gunthorpe * only to that driver. 8390f3e72b5SJason Gunthorpe */ 8400f3e72b5SJason Gunthorpe if (!driver) { 8410f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 8420f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, 8430f3e72b5SJason Gunthorpe vfio_next) { 8440f3e72b5SJason Gunthorpe 8450f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 8460f3e72b5SJason Gunthorpe !vfio_iommu_driver_allowed(container, 8470f3e72b5SJason Gunthorpe driver)) 8480f3e72b5SJason Gunthorpe continue; 8490f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 8500f3e72b5SJason Gunthorpe continue; 8510f3e72b5SJason Gunthorpe 8520f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(NULL, 8530f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, 8540f3e72b5SJason Gunthorpe arg); 8550f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 8560f3e72b5SJason Gunthorpe if (ret > 0) 8570f3e72b5SJason Gunthorpe break; 8580f3e72b5SJason Gunthorpe } 8590f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 8600f3e72b5SJason Gunthorpe } else 8610f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(container->iommu_data, 8620f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, arg); 8630f3e72b5SJason Gunthorpe } 8640f3e72b5SJason Gunthorpe 8650f3e72b5SJason Gunthorpe up_read(&container->group_lock); 8660f3e72b5SJason Gunthorpe 8670f3e72b5SJason Gunthorpe return ret; 8680f3e72b5SJason Gunthorpe } 8690f3e72b5SJason Gunthorpe 8700f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */ 8710f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container, 8720f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, 8730f3e72b5SJason Gunthorpe void *data) 8740f3e72b5SJason Gunthorpe { 8750f3e72b5SJason Gunthorpe struct vfio_group *group; 8760f3e72b5SJason Gunthorpe int ret = -ENODEV; 8770f3e72b5SJason Gunthorpe 8780f3e72b5SJason Gunthorpe list_for_each_entry(group, &container->group_list, container_next) { 8790f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(data, group->iommu_group, 8800f3e72b5SJason Gunthorpe group->type); 8810f3e72b5SJason Gunthorpe if (ret) 8820f3e72b5SJason Gunthorpe goto unwind; 8830f3e72b5SJason Gunthorpe } 8840f3e72b5SJason Gunthorpe 8850f3e72b5SJason Gunthorpe return ret; 8860f3e72b5SJason Gunthorpe 8870f3e72b5SJason Gunthorpe unwind: 8880f3e72b5SJason Gunthorpe list_for_each_entry_continue_reverse(group, &container->group_list, 8890f3e72b5SJason Gunthorpe container_next) { 8900f3e72b5SJason Gunthorpe driver->ops->detach_group(data, group->iommu_group); 8910f3e72b5SJason Gunthorpe } 8920f3e72b5SJason Gunthorpe 8930f3e72b5SJason Gunthorpe return ret; 8940f3e72b5SJason Gunthorpe } 8950f3e72b5SJason Gunthorpe 8960f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container, 8970f3e72b5SJason Gunthorpe unsigned long arg) 8980f3e72b5SJason Gunthorpe { 8990f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 9000f3e72b5SJason Gunthorpe long ret = -ENODEV; 9010f3e72b5SJason Gunthorpe 9020f3e72b5SJason Gunthorpe down_write(&container->group_lock); 9030f3e72b5SJason Gunthorpe 9040f3e72b5SJason Gunthorpe /* 9050f3e72b5SJason Gunthorpe * The container is designed to be an unprivileged interface while 9060f3e72b5SJason Gunthorpe * the group can be assigned to specific users. Therefore, only by 9070f3e72b5SJason Gunthorpe * adding a group to a container does the user get the privilege of 9080f3e72b5SJason Gunthorpe * enabling the iommu, which may allocate finite resources. There 9090f3e72b5SJason Gunthorpe * is no unset_iommu, but by removing all the groups from a container, 9100f3e72b5SJason Gunthorpe * the container is deprivileged and returns to an unset state. 9110f3e72b5SJason Gunthorpe */ 9120f3e72b5SJason Gunthorpe if (list_empty(&container->group_list) || container->iommu_driver) { 9130f3e72b5SJason Gunthorpe up_write(&container->group_lock); 9140f3e72b5SJason Gunthorpe return -EINVAL; 9150f3e72b5SJason Gunthorpe } 9160f3e72b5SJason Gunthorpe 9170f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 9180f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 9190f3e72b5SJason Gunthorpe void *data; 9200f3e72b5SJason Gunthorpe 9210f3e72b5SJason Gunthorpe if (!vfio_iommu_driver_allowed(container, driver)) 9220f3e72b5SJason Gunthorpe continue; 9230f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 9240f3e72b5SJason Gunthorpe continue; 9250f3e72b5SJason Gunthorpe 9260f3e72b5SJason Gunthorpe /* 9270f3e72b5SJason Gunthorpe * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 9280f3e72b5SJason Gunthorpe * so test which iommu driver reported support for this 9290f3e72b5SJason Gunthorpe * extension and call open on them. We also pass them the 9300f3e72b5SJason Gunthorpe * magic, allowing a single driver to support multiple 9310f3e72b5SJason Gunthorpe * interfaces if they'd like. 9320f3e72b5SJason Gunthorpe */ 9330f3e72b5SJason Gunthorpe if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 9340f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 9350f3e72b5SJason Gunthorpe continue; 9360f3e72b5SJason Gunthorpe } 9370f3e72b5SJason Gunthorpe 9380f3e72b5SJason Gunthorpe data = driver->ops->open(arg); 9390f3e72b5SJason Gunthorpe if (IS_ERR(data)) { 9400f3e72b5SJason Gunthorpe ret = PTR_ERR(data); 9410f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 9420f3e72b5SJason Gunthorpe continue; 9430f3e72b5SJason Gunthorpe } 9440f3e72b5SJason Gunthorpe 9450f3e72b5SJason Gunthorpe ret = __vfio_container_attach_groups(container, driver, data); 9460f3e72b5SJason Gunthorpe if (ret) { 9470f3e72b5SJason Gunthorpe driver->ops->release(data); 9480f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 9490f3e72b5SJason Gunthorpe continue; 9500f3e72b5SJason Gunthorpe } 9510f3e72b5SJason Gunthorpe 9520f3e72b5SJason Gunthorpe container->iommu_driver = driver; 9530f3e72b5SJason Gunthorpe container->iommu_data = data; 9540f3e72b5SJason Gunthorpe break; 9550f3e72b5SJason Gunthorpe } 9560f3e72b5SJason Gunthorpe 9570f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 9580f3e72b5SJason Gunthorpe up_write(&container->group_lock); 9590f3e72b5SJason Gunthorpe 9600f3e72b5SJason Gunthorpe return ret; 9610f3e72b5SJason Gunthorpe } 9620f3e72b5SJason Gunthorpe 9630f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep, 9640f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 9650f3e72b5SJason Gunthorpe { 9660f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 9670f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 9680f3e72b5SJason Gunthorpe void *data; 9690f3e72b5SJason Gunthorpe long ret = -EINVAL; 9700f3e72b5SJason Gunthorpe 9710f3e72b5SJason Gunthorpe if (!container) 9720f3e72b5SJason Gunthorpe return ret; 9730f3e72b5SJason Gunthorpe 9740f3e72b5SJason Gunthorpe switch (cmd) { 9750f3e72b5SJason Gunthorpe case VFIO_GET_API_VERSION: 9760f3e72b5SJason Gunthorpe ret = VFIO_API_VERSION; 9770f3e72b5SJason Gunthorpe break; 9780f3e72b5SJason Gunthorpe case VFIO_CHECK_EXTENSION: 9790f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(container, arg); 9800f3e72b5SJason Gunthorpe break; 9810f3e72b5SJason Gunthorpe case VFIO_SET_IOMMU: 9820f3e72b5SJason Gunthorpe ret = vfio_ioctl_set_iommu(container, arg); 9830f3e72b5SJason Gunthorpe break; 9840f3e72b5SJason Gunthorpe default: 9850f3e72b5SJason Gunthorpe driver = container->iommu_driver; 9860f3e72b5SJason Gunthorpe data = container->iommu_data; 9870f3e72b5SJason Gunthorpe 9880f3e72b5SJason Gunthorpe if (driver) /* passthrough all unrecognized ioctls */ 9890f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(data, cmd, arg); 9900f3e72b5SJason Gunthorpe } 9910f3e72b5SJason Gunthorpe 9920f3e72b5SJason Gunthorpe return ret; 9930f3e72b5SJason Gunthorpe } 9940f3e72b5SJason Gunthorpe 9950f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep) 9960f3e72b5SJason Gunthorpe { 9970f3e72b5SJason Gunthorpe struct vfio_container *container; 9980f3e72b5SJason Gunthorpe 9990f3e72b5SJason Gunthorpe container = kzalloc(sizeof(*container), GFP_KERNEL); 10000f3e72b5SJason Gunthorpe if (!container) 10010f3e72b5SJason Gunthorpe return -ENOMEM; 10020f3e72b5SJason Gunthorpe 10030f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&container->group_list); 10040f3e72b5SJason Gunthorpe init_rwsem(&container->group_lock); 10050f3e72b5SJason Gunthorpe kref_init(&container->kref); 10060f3e72b5SJason Gunthorpe 10070f3e72b5SJason Gunthorpe filep->private_data = container; 10080f3e72b5SJason Gunthorpe 10090f3e72b5SJason Gunthorpe return 0; 10100f3e72b5SJason Gunthorpe } 10110f3e72b5SJason Gunthorpe 10120f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep) 10130f3e72b5SJason Gunthorpe { 10140f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 10150f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver = container->iommu_driver; 10160f3e72b5SJason Gunthorpe 10170f3e72b5SJason Gunthorpe if (driver && driver->ops->notify) 10180f3e72b5SJason Gunthorpe driver->ops->notify(container->iommu_data, 10190f3e72b5SJason Gunthorpe VFIO_IOMMU_CONTAINER_CLOSE); 10200f3e72b5SJason Gunthorpe 10210f3e72b5SJason Gunthorpe filep->private_data = NULL; 10220f3e72b5SJason Gunthorpe 10230f3e72b5SJason Gunthorpe vfio_container_put(container); 10240f3e72b5SJason Gunthorpe 10250f3e72b5SJason Gunthorpe return 0; 10260f3e72b5SJason Gunthorpe } 10270f3e72b5SJason Gunthorpe 10280f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = { 10290f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 10300f3e72b5SJason Gunthorpe .open = vfio_fops_open, 10310f3e72b5SJason Gunthorpe .release = vfio_fops_release, 10320f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_fops_unl_ioctl, 10330f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 10340f3e72b5SJason Gunthorpe }; 10350f3e72b5SJason Gunthorpe 10360f3e72b5SJason Gunthorpe /* 10370f3e72b5SJason Gunthorpe * VFIO Group fd, /dev/vfio/$GROUP 10380f3e72b5SJason Gunthorpe */ 1039429a781cSJason Gunthorpe static void vfio_group_detach_container(struct vfio_group *group) 10400f3e72b5SJason Gunthorpe { 10410f3e72b5SJason Gunthorpe struct vfio_container *container = group->container; 10420f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 10430f3e72b5SJason Gunthorpe 10440f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 1045429a781cSJason Gunthorpe WARN_ON(group->container_users != 1); 10460f3e72b5SJason Gunthorpe 10470f3e72b5SJason Gunthorpe down_write(&container->group_lock); 10480f3e72b5SJason Gunthorpe 10490f3e72b5SJason Gunthorpe driver = container->iommu_driver; 10500f3e72b5SJason Gunthorpe if (driver) 10510f3e72b5SJason Gunthorpe driver->ops->detach_group(container->iommu_data, 10520f3e72b5SJason Gunthorpe group->iommu_group); 10530f3e72b5SJason Gunthorpe 10540f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 10550f3e72b5SJason Gunthorpe iommu_group_release_dma_owner(group->iommu_group); 10560f3e72b5SJason Gunthorpe 10570f3e72b5SJason Gunthorpe group->container = NULL; 10580f3e72b5SJason Gunthorpe group->container_users = 0; 10590f3e72b5SJason Gunthorpe list_del(&group->container_next); 10600f3e72b5SJason Gunthorpe 10610f3e72b5SJason Gunthorpe /* Detaching the last group deprivileges a container, remove iommu */ 10620f3e72b5SJason Gunthorpe if (driver && list_empty(&container->group_list)) { 10630f3e72b5SJason Gunthorpe driver->ops->release(container->iommu_data); 10640f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 10650f3e72b5SJason Gunthorpe container->iommu_driver = NULL; 10660f3e72b5SJason Gunthorpe container->iommu_data = NULL; 10670f3e72b5SJason Gunthorpe } 10680f3e72b5SJason Gunthorpe 10690f3e72b5SJason Gunthorpe up_write(&container->group_lock); 10700f3e72b5SJason Gunthorpe 10710f3e72b5SJason Gunthorpe vfio_container_put(container); 10720f3e72b5SJason Gunthorpe } 10730f3e72b5SJason Gunthorpe 10740f3e72b5SJason Gunthorpe /* 10750f3e72b5SJason Gunthorpe * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 10760f3e72b5SJason Gunthorpe * if there was no container to unset. Since the ioctl is called on 10770f3e72b5SJason Gunthorpe * the group, we know that still exists, therefore the only valid 10780f3e72b5SJason Gunthorpe * transition here is 1->0. 10790f3e72b5SJason Gunthorpe */ 1080b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group) 10810f3e72b5SJason Gunthorpe { 1082b3b43590SJason Gunthorpe int ret = 0; 10830f3e72b5SJason Gunthorpe 1084b3b43590SJason Gunthorpe down_write(&group->group_rwsem); 1085b3b43590SJason Gunthorpe if (!group->container) { 1086b3b43590SJason Gunthorpe ret = -EINVAL; 1087b3b43590SJason Gunthorpe goto out_unlock; 1088b3b43590SJason Gunthorpe } 1089b3b43590SJason Gunthorpe if (group->container_users != 1) { 1090b3b43590SJason Gunthorpe ret = -EBUSY; 1091b3b43590SJason Gunthorpe goto out_unlock; 1092b3b43590SJason Gunthorpe } 1093429a781cSJason Gunthorpe vfio_group_detach_container(group); 1094b3b43590SJason Gunthorpe 1095b3b43590SJason Gunthorpe out_unlock: 1096b3b43590SJason Gunthorpe up_write(&group->group_rwsem); 1097b3b43590SJason Gunthorpe return ret; 10980f3e72b5SJason Gunthorpe } 10990f3e72b5SJason Gunthorpe 1100*03e650f6SJason Gunthorpe static struct vfio_container *vfio_container_from_file(struct file *file) 11010f3e72b5SJason Gunthorpe { 11020f3e72b5SJason Gunthorpe struct vfio_container *container; 1103*03e650f6SJason Gunthorpe 1104*03e650f6SJason Gunthorpe /* Sanity check, is this really our fd? */ 1105*03e650f6SJason Gunthorpe if (file->f_op != &vfio_fops) 1106*03e650f6SJason Gunthorpe return NULL; 1107*03e650f6SJason Gunthorpe 1108*03e650f6SJason Gunthorpe container = file->private_data; 1109*03e650f6SJason Gunthorpe WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1110*03e650f6SJason Gunthorpe return container; 1111*03e650f6SJason Gunthorpe } 1112*03e650f6SJason Gunthorpe 1113*03e650f6SJason Gunthorpe static int vfio_container_attach_group(struct vfio_container *container, 1114*03e650f6SJason Gunthorpe struct vfio_group *group) 1115*03e650f6SJason Gunthorpe { 11160f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 11170f3e72b5SJason Gunthorpe int ret = 0; 11180f3e72b5SJason Gunthorpe 1119*03e650f6SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 1120*03e650f6SJason Gunthorpe 11210f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 11220f3e72b5SJason Gunthorpe return -EPERM; 11230f3e72b5SJason Gunthorpe 11240f3e72b5SJason Gunthorpe down_write(&container->group_lock); 11250f3e72b5SJason Gunthorpe 11260f3e72b5SJason Gunthorpe /* Real groups and fake groups cannot mix */ 11270f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 11280f3e72b5SJason Gunthorpe container->noiommu != (group->type == VFIO_NO_IOMMU)) { 11290f3e72b5SJason Gunthorpe ret = -EPERM; 113067671f15SJason Gunthorpe goto out_unlock_container; 11310f3e72b5SJason Gunthorpe } 11320f3e72b5SJason Gunthorpe 11330f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) { 1134*03e650f6SJason Gunthorpe ret = iommu_group_claim_dma_owner(group->iommu_group, group); 11350f3e72b5SJason Gunthorpe if (ret) 113667671f15SJason Gunthorpe goto out_unlock_container; 11370f3e72b5SJason Gunthorpe } 11380f3e72b5SJason Gunthorpe 11390f3e72b5SJason Gunthorpe driver = container->iommu_driver; 11400f3e72b5SJason Gunthorpe if (driver) { 11410f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(container->iommu_data, 11420f3e72b5SJason Gunthorpe group->iommu_group, 11430f3e72b5SJason Gunthorpe group->type); 11440f3e72b5SJason Gunthorpe if (ret) { 11450f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 11460f3e72b5SJason Gunthorpe iommu_group_release_dma_owner( 11470f3e72b5SJason Gunthorpe group->iommu_group); 114867671f15SJason Gunthorpe goto out_unlock_container; 11490f3e72b5SJason Gunthorpe } 11500f3e72b5SJason Gunthorpe } 11510f3e72b5SJason Gunthorpe 11520f3e72b5SJason Gunthorpe group->container = container; 11530f3e72b5SJason Gunthorpe group->container_users = 1; 11540f3e72b5SJason Gunthorpe container->noiommu = (group->type == VFIO_NO_IOMMU); 11550f3e72b5SJason Gunthorpe list_add(&group->container_next, &container->group_list); 11560f3e72b5SJason Gunthorpe 11570f3e72b5SJason Gunthorpe /* Get a reference on the container and mark a user within the group */ 11580f3e72b5SJason Gunthorpe vfio_container_get(container); 11590f3e72b5SJason Gunthorpe 116067671f15SJason Gunthorpe out_unlock_container: 11610f3e72b5SJason Gunthorpe up_write(&container->group_lock); 1162*03e650f6SJason Gunthorpe return ret; 1163*03e650f6SJason Gunthorpe } 1164*03e650f6SJason Gunthorpe 1165*03e650f6SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group, 1166*03e650f6SJason Gunthorpe int __user *arg) 1167*03e650f6SJason Gunthorpe { 1168*03e650f6SJason Gunthorpe struct vfio_container *container; 1169*03e650f6SJason Gunthorpe struct fd f; 1170*03e650f6SJason Gunthorpe int ret; 1171*03e650f6SJason Gunthorpe int fd; 1172*03e650f6SJason Gunthorpe 1173*03e650f6SJason Gunthorpe if (get_user(fd, arg)) 1174*03e650f6SJason Gunthorpe return -EFAULT; 1175*03e650f6SJason Gunthorpe 1176*03e650f6SJason Gunthorpe f = fdget(fd); 1177*03e650f6SJason Gunthorpe if (!f.file) 1178*03e650f6SJason Gunthorpe return -EBADF; 1179*03e650f6SJason Gunthorpe 1180*03e650f6SJason Gunthorpe down_write(&group->group_rwsem); 1181*03e650f6SJason Gunthorpe if (group->container || WARN_ON(group->container_users)) { 1182*03e650f6SJason Gunthorpe ret = -EINVAL; 1183*03e650f6SJason Gunthorpe goto out_unlock; 1184*03e650f6SJason Gunthorpe } 1185*03e650f6SJason Gunthorpe container = vfio_container_from_file(f.file); 1186*03e650f6SJason Gunthorpe ret = -EINVAL; 1187*03e650f6SJason Gunthorpe if (container) { 1188*03e650f6SJason Gunthorpe ret = vfio_container_attach_group(container, group); 1189*03e650f6SJason Gunthorpe goto out_unlock; 1190*03e650f6SJason Gunthorpe } 1191*03e650f6SJason Gunthorpe 1192*03e650f6SJason Gunthorpe out_unlock: 119367671f15SJason Gunthorpe up_write(&group->group_rwsem); 11940f3e72b5SJason Gunthorpe fdput(f); 11950f3e72b5SJason Gunthorpe return ret; 11960f3e72b5SJason Gunthorpe } 11970f3e72b5SJason Gunthorpe 11980f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops; 11990f3e72b5SJason Gunthorpe 12000f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 12010f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device) 12020f3e72b5SJason Gunthorpe { 12030f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 12040f3e72b5SJason Gunthorpe } 12050f3e72b5SJason Gunthorpe 12060f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device) 12070f3e72b5SJason Gunthorpe { 12080f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 12090f3e72b5SJason Gunthorpe 12100f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 12110f3e72b5SJason Gunthorpe 12120f3e72b5SJason Gunthorpe if (!group->container || !group->container->iommu_driver || 12130f3e72b5SJason Gunthorpe WARN_ON(!group->container_users)) 12140f3e72b5SJason Gunthorpe return -EINVAL; 12150f3e72b5SJason Gunthorpe 12160f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 12170f3e72b5SJason Gunthorpe return -EPERM; 12180f3e72b5SJason Gunthorpe 12190f3e72b5SJason Gunthorpe get_file(group->opened_file); 12200f3e72b5SJason Gunthorpe group->container_users++; 12210f3e72b5SJason Gunthorpe return 0; 12220f3e72b5SJason Gunthorpe } 12230f3e72b5SJason Gunthorpe 12240f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device) 12250f3e72b5SJason Gunthorpe { 12260f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 12270f3e72b5SJason Gunthorpe WARN_ON(device->group->container_users <= 1); 12280f3e72b5SJason Gunthorpe device->group->container_users--; 12290f3e72b5SJason Gunthorpe fput(device->group->opened_file); 12300f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 12310f3e72b5SJason Gunthorpe } 12320f3e72b5SJason Gunthorpe 12330f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device) 12340f3e72b5SJason Gunthorpe { 12350f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 12360f3e72b5SJason Gunthorpe struct file *filep; 12370f3e72b5SJason Gunthorpe int ret; 12380f3e72b5SJason Gunthorpe 12390f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 12400f3e72b5SJason Gunthorpe ret = vfio_device_assign_container(device); 12410f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 12420f3e72b5SJason Gunthorpe if (ret) 12430f3e72b5SJason Gunthorpe return ERR_PTR(ret); 12440f3e72b5SJason Gunthorpe 12450f3e72b5SJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) { 12460f3e72b5SJason Gunthorpe ret = -ENODEV; 12470f3e72b5SJason Gunthorpe goto err_unassign_container; 12480f3e72b5SJason Gunthorpe } 12490f3e72b5SJason Gunthorpe 12500f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 12510f3e72b5SJason Gunthorpe device->open_count++; 12520f3e72b5SJason Gunthorpe if (device->open_count == 1) { 12530f3e72b5SJason Gunthorpe /* 12540f3e72b5SJason Gunthorpe * Here we pass the KVM pointer with the group under the read 12550f3e72b5SJason Gunthorpe * lock. If the device driver will use it, it must obtain a 12560f3e72b5SJason Gunthorpe * reference and release it during close_device. 12570f3e72b5SJason Gunthorpe */ 12580f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 12590f3e72b5SJason Gunthorpe device->kvm = device->group->kvm; 12600f3e72b5SJason Gunthorpe 12610f3e72b5SJason Gunthorpe if (device->ops->open_device) { 12620f3e72b5SJason Gunthorpe ret = device->ops->open_device(device); 12630f3e72b5SJason Gunthorpe if (ret) 12640f3e72b5SJason Gunthorpe goto err_undo_count; 12650f3e72b5SJason Gunthorpe } 12660f3e72b5SJason Gunthorpe 12670f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 12680f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->register_device) 12690f3e72b5SJason Gunthorpe iommu_driver->ops->register_device( 12700f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 12710f3e72b5SJason Gunthorpe 12720f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 12730f3e72b5SJason Gunthorpe } 12740f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 12750f3e72b5SJason Gunthorpe 12760f3e72b5SJason Gunthorpe /* 12770f3e72b5SJason Gunthorpe * We can't use anon_inode_getfd() because we need to modify 12780f3e72b5SJason Gunthorpe * the f_mode flags directly to allow more than just ioctls 12790f3e72b5SJason Gunthorpe */ 12800f3e72b5SJason Gunthorpe filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 12810f3e72b5SJason Gunthorpe device, O_RDWR); 12820f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 12830f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 12840f3e72b5SJason Gunthorpe goto err_close_device; 12850f3e72b5SJason Gunthorpe } 12860f3e72b5SJason Gunthorpe 12870f3e72b5SJason Gunthorpe /* 12880f3e72b5SJason Gunthorpe * TODO: add an anon_inode interface to do this. 12890f3e72b5SJason Gunthorpe * Appears to be missing by lack of need rather than 12900f3e72b5SJason Gunthorpe * explicitly prevented. Now there's need. 12910f3e72b5SJason Gunthorpe */ 12920f3e72b5SJason Gunthorpe filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 12930f3e72b5SJason Gunthorpe 12940f3e72b5SJason Gunthorpe if (device->group->type == VFIO_NO_IOMMU) 12950f3e72b5SJason Gunthorpe dev_warn(device->dev, "vfio-noiommu device opened by user " 12960f3e72b5SJason Gunthorpe "(%s:%d)\n", current->comm, task_pid_nr(current)); 12970f3e72b5SJason Gunthorpe /* 12980f3e72b5SJason Gunthorpe * On success the ref of device is moved to the file and 12990f3e72b5SJason Gunthorpe * put in vfio_device_fops_release() 13000f3e72b5SJason Gunthorpe */ 13010f3e72b5SJason Gunthorpe return filep; 13020f3e72b5SJason Gunthorpe 13030f3e72b5SJason Gunthorpe err_close_device: 13040f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 13050f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 13060f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) { 13070f3e72b5SJason Gunthorpe device->ops->close_device(device); 13080f3e72b5SJason Gunthorpe 13090f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 13100f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 13110f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 13120f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 13130f3e72b5SJason Gunthorpe } 13140f3e72b5SJason Gunthorpe err_undo_count: 13150f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 13160f3e72b5SJason Gunthorpe device->open_count--; 13170f3e72b5SJason Gunthorpe if (device->open_count == 0 && device->kvm) 13180f3e72b5SJason Gunthorpe device->kvm = NULL; 13190f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 13200f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 13210f3e72b5SJason Gunthorpe err_unassign_container: 13220f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 13230f3e72b5SJason Gunthorpe return ERR_PTR(ret); 13240f3e72b5SJason Gunthorpe } 13250f3e72b5SJason Gunthorpe 1326150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 1327150ee2f9SJason Gunthorpe char __user *arg) 13280f3e72b5SJason Gunthorpe { 13290f3e72b5SJason Gunthorpe struct vfio_device *device; 13300f3e72b5SJason Gunthorpe struct file *filep; 1331150ee2f9SJason Gunthorpe char *buf; 13320f3e72b5SJason Gunthorpe int fdno; 13330f3e72b5SJason Gunthorpe int ret; 13340f3e72b5SJason Gunthorpe 1335150ee2f9SJason Gunthorpe buf = strndup_user(arg, PAGE_SIZE); 1336150ee2f9SJason Gunthorpe if (IS_ERR(buf)) 1337150ee2f9SJason Gunthorpe return PTR_ERR(buf); 1338150ee2f9SJason Gunthorpe 13390f3e72b5SJason Gunthorpe device = vfio_device_get_from_name(group, buf); 1340150ee2f9SJason Gunthorpe kfree(buf); 13410f3e72b5SJason Gunthorpe if (IS_ERR(device)) 13420f3e72b5SJason Gunthorpe return PTR_ERR(device); 13430f3e72b5SJason Gunthorpe 13440f3e72b5SJason Gunthorpe fdno = get_unused_fd_flags(O_CLOEXEC); 13450f3e72b5SJason Gunthorpe if (fdno < 0) { 13460f3e72b5SJason Gunthorpe ret = fdno; 13470f3e72b5SJason Gunthorpe goto err_put_device; 13480f3e72b5SJason Gunthorpe } 13490f3e72b5SJason Gunthorpe 13500f3e72b5SJason Gunthorpe filep = vfio_device_open(device); 13510f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 13520f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 13530f3e72b5SJason Gunthorpe goto err_put_fdno; 13540f3e72b5SJason Gunthorpe } 13550f3e72b5SJason Gunthorpe 13560f3e72b5SJason Gunthorpe fd_install(fdno, filep); 13570f3e72b5SJason Gunthorpe return fdno; 13580f3e72b5SJason Gunthorpe 13590f3e72b5SJason Gunthorpe err_put_fdno: 13600f3e72b5SJason Gunthorpe put_unused_fd(fdno); 13610f3e72b5SJason Gunthorpe err_put_device: 13624a725b8dSKevin Tian vfio_device_put_registration(device); 13630f3e72b5SJason Gunthorpe return ret; 13640f3e72b5SJason Gunthorpe } 13650f3e72b5SJason Gunthorpe 136699a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group, 136799a27c08SJason Gunthorpe struct vfio_group_status __user *arg) 13680f3e72b5SJason Gunthorpe { 136999a27c08SJason Gunthorpe unsigned long minsz = offsetofend(struct vfio_group_status, flags); 13700f3e72b5SJason Gunthorpe struct vfio_group_status status; 13710f3e72b5SJason Gunthorpe 137299a27c08SJason Gunthorpe if (copy_from_user(&status, arg, minsz)) 13730f3e72b5SJason Gunthorpe return -EFAULT; 13740f3e72b5SJason Gunthorpe 13750f3e72b5SJason Gunthorpe if (status.argsz < minsz) 13760f3e72b5SJason Gunthorpe return -EINVAL; 13770f3e72b5SJason Gunthorpe 13780f3e72b5SJason Gunthorpe status.flags = 0; 13790f3e72b5SJason Gunthorpe 13800f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 13810f3e72b5SJason Gunthorpe if (group->container) 13820f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 13830f3e72b5SJason Gunthorpe VFIO_GROUP_FLAGS_VIABLE; 13840f3e72b5SJason Gunthorpe else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 13850f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_VIABLE; 13860f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 13870f3e72b5SJason Gunthorpe 138899a27c08SJason Gunthorpe if (copy_to_user(arg, &status, minsz)) 13890f3e72b5SJason Gunthorpe return -EFAULT; 139099a27c08SJason Gunthorpe return 0; 13910f3e72b5SJason Gunthorpe } 139299a27c08SJason Gunthorpe 139399a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep, 139499a27c08SJason Gunthorpe unsigned int cmd, unsigned long arg) 139599a27c08SJason Gunthorpe { 139699a27c08SJason Gunthorpe struct vfio_group *group = filep->private_data; 139799a27c08SJason Gunthorpe void __user *uarg = (void __user *)arg; 139899a27c08SJason Gunthorpe 139999a27c08SJason Gunthorpe switch (cmd) { 140099a27c08SJason Gunthorpe case VFIO_GROUP_GET_DEVICE_FD: 140199a27c08SJason Gunthorpe return vfio_group_ioctl_get_device_fd(group, uarg); 140299a27c08SJason Gunthorpe case VFIO_GROUP_GET_STATUS: 140399a27c08SJason Gunthorpe return vfio_group_ioctl_get_status(group, uarg); 14040f3e72b5SJason Gunthorpe case VFIO_GROUP_SET_CONTAINER: 140567671f15SJason Gunthorpe return vfio_group_ioctl_set_container(group, uarg); 14060f3e72b5SJason Gunthorpe case VFIO_GROUP_UNSET_CONTAINER: 1407b3b43590SJason Gunthorpe return vfio_group_ioctl_unset_container(group); 140899a27c08SJason Gunthorpe default: 140999a27c08SJason Gunthorpe return -ENOTTY; 14100f3e72b5SJason Gunthorpe } 14110f3e72b5SJason Gunthorpe } 14120f3e72b5SJason Gunthorpe 14130f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep) 14140f3e72b5SJason Gunthorpe { 14150f3e72b5SJason Gunthorpe struct vfio_group *group = 14160f3e72b5SJason Gunthorpe container_of(inode->i_cdev, struct vfio_group, cdev); 14170f3e72b5SJason Gunthorpe int ret; 14180f3e72b5SJason Gunthorpe 14190f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 14200f3e72b5SJason Gunthorpe 14210f3e72b5SJason Gunthorpe /* users can be zero if this races with vfio_group_put() */ 14220f3e72b5SJason Gunthorpe if (!refcount_inc_not_zero(&group->users)) { 14230f3e72b5SJason Gunthorpe ret = -ENODEV; 14240f3e72b5SJason Gunthorpe goto err_unlock; 14250f3e72b5SJason Gunthorpe } 14260f3e72b5SJason Gunthorpe 14270f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 14280f3e72b5SJason Gunthorpe ret = -EPERM; 14290f3e72b5SJason Gunthorpe goto err_put; 14300f3e72b5SJason Gunthorpe } 14310f3e72b5SJason Gunthorpe 14320f3e72b5SJason Gunthorpe /* 14330f3e72b5SJason Gunthorpe * Do we need multiple instances of the group open? Seems not. 14340f3e72b5SJason Gunthorpe */ 14350f3e72b5SJason Gunthorpe if (group->opened_file) { 14360f3e72b5SJason Gunthorpe ret = -EBUSY; 14370f3e72b5SJason Gunthorpe goto err_put; 14380f3e72b5SJason Gunthorpe } 14390f3e72b5SJason Gunthorpe group->opened_file = filep; 14400f3e72b5SJason Gunthorpe filep->private_data = group; 14410f3e72b5SJason Gunthorpe 14420f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 14430f3e72b5SJason Gunthorpe return 0; 14440f3e72b5SJason Gunthorpe err_put: 14450f3e72b5SJason Gunthorpe vfio_group_put(group); 14460f3e72b5SJason Gunthorpe err_unlock: 14470f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 14480f3e72b5SJason Gunthorpe return ret; 14490f3e72b5SJason Gunthorpe } 14500f3e72b5SJason Gunthorpe 14510f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep) 14520f3e72b5SJason Gunthorpe { 14530f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 14540f3e72b5SJason Gunthorpe 14550f3e72b5SJason Gunthorpe filep->private_data = NULL; 14560f3e72b5SJason Gunthorpe 14570f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 14580f3e72b5SJason Gunthorpe /* 14590f3e72b5SJason Gunthorpe * Device FDs hold a group file reference, therefore the group release 14600f3e72b5SJason Gunthorpe * is only called when there are no open devices. 14610f3e72b5SJason Gunthorpe */ 14620f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 1463429a781cSJason Gunthorpe if (group->container) 1464429a781cSJason Gunthorpe vfio_group_detach_container(group); 14650f3e72b5SJason Gunthorpe group->opened_file = NULL; 14660f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 14670f3e72b5SJason Gunthorpe 14680f3e72b5SJason Gunthorpe vfio_group_put(group); 14690f3e72b5SJason Gunthorpe 14700f3e72b5SJason Gunthorpe return 0; 14710f3e72b5SJason Gunthorpe } 14720f3e72b5SJason Gunthorpe 14730f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = { 14740f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 14750f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_group_fops_unl_ioctl, 14760f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 14770f3e72b5SJason Gunthorpe .open = vfio_group_fops_open, 14780f3e72b5SJason Gunthorpe .release = vfio_group_fops_release, 14790f3e72b5SJason Gunthorpe }; 14800f3e72b5SJason Gunthorpe 14810f3e72b5SJason Gunthorpe /* 14828e5c6995SAbhishek Sahu * Wrapper around pm_runtime_resume_and_get(). 14838e5c6995SAbhishek Sahu * Return error code on failure or 0 on success. 14848e5c6995SAbhishek Sahu */ 14858e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device) 14868e5c6995SAbhishek Sahu { 14878e5c6995SAbhishek Sahu struct device *dev = device->dev; 14888e5c6995SAbhishek Sahu 14898e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) { 14908e5c6995SAbhishek Sahu int ret; 14918e5c6995SAbhishek Sahu 14928e5c6995SAbhishek Sahu ret = pm_runtime_resume_and_get(dev); 14938e5c6995SAbhishek Sahu if (ret) { 14948e5c6995SAbhishek Sahu dev_info_ratelimited(dev, 14958e5c6995SAbhishek Sahu "vfio: runtime resume failed %d\n", ret); 14968e5c6995SAbhishek Sahu return -EIO; 14978e5c6995SAbhishek Sahu } 14988e5c6995SAbhishek Sahu } 14998e5c6995SAbhishek Sahu 15008e5c6995SAbhishek Sahu return 0; 15018e5c6995SAbhishek Sahu } 15028e5c6995SAbhishek Sahu 15038e5c6995SAbhishek Sahu /* 15048e5c6995SAbhishek Sahu * Wrapper around pm_runtime_put(). 15058e5c6995SAbhishek Sahu */ 15068e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device) 15078e5c6995SAbhishek Sahu { 15088e5c6995SAbhishek Sahu struct device *dev = device->dev; 15098e5c6995SAbhishek Sahu 15108e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) 15118e5c6995SAbhishek Sahu pm_runtime_put(dev); 15128e5c6995SAbhishek Sahu } 15138e5c6995SAbhishek Sahu 15148e5c6995SAbhishek Sahu /* 15150f3e72b5SJason Gunthorpe * VFIO Device fd 15160f3e72b5SJason Gunthorpe */ 15170f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 15180f3e72b5SJason Gunthorpe { 15190f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15200f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 15210f3e72b5SJason Gunthorpe 15220f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 15230f3e72b5SJason Gunthorpe vfio_assert_device_open(device); 15240f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 15250f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) 15260f3e72b5SJason Gunthorpe device->ops->close_device(device); 15270f3e72b5SJason Gunthorpe 15280f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 15290f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 15300f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 15310f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 15320f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 15330f3e72b5SJason Gunthorpe device->open_count--; 15340f3e72b5SJason Gunthorpe if (device->open_count == 0) 15350f3e72b5SJason Gunthorpe device->kvm = NULL; 15360f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 15370f3e72b5SJason Gunthorpe 15380f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 15390f3e72b5SJason Gunthorpe 15400f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 15410f3e72b5SJason Gunthorpe 15424a725b8dSKevin Tian vfio_device_put_registration(device); 15430f3e72b5SJason Gunthorpe 15440f3e72b5SJason Gunthorpe return 0; 15450f3e72b5SJason Gunthorpe } 15460f3e72b5SJason Gunthorpe 15470f3e72b5SJason Gunthorpe /* 15480f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 15490f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 15500f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 15510f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 15520f3e72b5SJason Gunthorpe * 15530f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 15540f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 15550f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 15560f3e72b5SJason Gunthorpe * 15570f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 15580f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 15590f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 15600f3e72b5SJason Gunthorpe * 15610f3e72b5SJason Gunthorpe */ 15620f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 15630f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 15640f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 15650f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 15660f3e72b5SJason Gunthorpe { 15670f3e72b5SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 15680f3e72b5SJason Gunthorpe /* 15690f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 15700f3e72b5SJason Gunthorpe * following FSM arcs: 15710f3e72b5SJason Gunthorpe * RESUMING -> STOP 15720f3e72b5SJason Gunthorpe * STOP -> RESUMING 15730f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 15740f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 15750f3e72b5SJason Gunthorpe * 15760f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 15770f3e72b5SJason Gunthorpe * arcs: 15780f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 15790f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 15800f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 15810f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 15820f3e72b5SJason Gunthorpe * Without P2P the driver must implement: 15830f3e72b5SJason Gunthorpe * RUNNING -> STOP 15840f3e72b5SJason Gunthorpe * STOP -> RUNNING 15850f3e72b5SJason Gunthorpe * 15860f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 15870f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 15880f3e72b5SJason Gunthorpe * following ones: 15890f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 15900f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 15910f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 15920f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 15930f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 15940f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 15950f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 15960f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 15970f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 15980f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 15990f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 16000f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 16010f3e72b5SJason Gunthorpe */ 16020f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 16030f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 16040f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 16050f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 16060f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 16070f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 16080f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 16090f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 16100f3e72b5SJason Gunthorpe }, 16110f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 16120f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 16130f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 16140f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 16150f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 16160f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 16170f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 16180f3e72b5SJason Gunthorpe }, 16190f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 16200f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 16210f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 16220f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 16230f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 16240f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 16250f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 16260f3e72b5SJason Gunthorpe }, 16270f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 16280f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 16290f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 16300f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 16310f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 16320f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 16330f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 16340f3e72b5SJason Gunthorpe }, 16350f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 16360f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 16370f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 16380f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 16390f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 16400f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 16410f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 16420f3e72b5SJason Gunthorpe }, 16430f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 16440f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 16450f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 16460f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 16470f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 16480f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 16490f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 16500f3e72b5SJason Gunthorpe }, 16510f3e72b5SJason Gunthorpe }; 16520f3e72b5SJason Gunthorpe 16530f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 16540f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 16550f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 16560f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 16570f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 16580f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 16590f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 16600f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 16610f3e72b5SJason Gunthorpe }; 16620f3e72b5SJason Gunthorpe 16630f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 16640f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 16650f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 16660f3e72b5SJason Gunthorpe return -EINVAL; 16670f3e72b5SJason Gunthorpe 16680f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 16690f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 16700f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 16710f3e72b5SJason Gunthorpe return -EINVAL; 16720f3e72b5SJason Gunthorpe 16730f3e72b5SJason Gunthorpe /* 16740f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 16750f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 16760f3e72b5SJason Gunthorpe * logical state, as per the above comment. 16770f3e72b5SJason Gunthorpe */ 16780f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 16790f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 16800f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 16810f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 16820f3e72b5SJason Gunthorpe 16830f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 16840f3e72b5SJason Gunthorpe } 16850f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 16860f3e72b5SJason Gunthorpe 16870f3e72b5SJason Gunthorpe /* 16880f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 16890f3e72b5SJason Gunthorpe */ 16900f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 16910f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 16920f3e72b5SJason Gunthorpe { 16930f3e72b5SJason Gunthorpe int ret; 16940f3e72b5SJason Gunthorpe int fd; 16950f3e72b5SJason Gunthorpe 16960f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 16970f3e72b5SJason Gunthorpe if (fd < 0) { 16980f3e72b5SJason Gunthorpe ret = fd; 16990f3e72b5SJason Gunthorpe goto out_fput; 17000f3e72b5SJason Gunthorpe } 17010f3e72b5SJason Gunthorpe 17020f3e72b5SJason Gunthorpe mig->data_fd = fd; 17030f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 17040f3e72b5SJason Gunthorpe ret = -EFAULT; 17050f3e72b5SJason Gunthorpe goto out_put_unused; 17060f3e72b5SJason Gunthorpe } 17070f3e72b5SJason Gunthorpe fd_install(fd, filp); 17080f3e72b5SJason Gunthorpe return 0; 17090f3e72b5SJason Gunthorpe 17100f3e72b5SJason Gunthorpe out_put_unused: 17110f3e72b5SJason Gunthorpe put_unused_fd(fd); 17120f3e72b5SJason Gunthorpe out_fput: 17130f3e72b5SJason Gunthorpe fput(filp); 17140f3e72b5SJason Gunthorpe return ret; 17150f3e72b5SJason Gunthorpe } 17160f3e72b5SJason Gunthorpe 17170f3e72b5SJason Gunthorpe static int 17180f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 17190f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 17200f3e72b5SJason Gunthorpe size_t argsz) 17210f3e72b5SJason Gunthorpe { 17220f3e72b5SJason Gunthorpe size_t minsz = 17230f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 17240f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 17250f3e72b5SJason Gunthorpe struct file *filp = NULL; 17260f3e72b5SJason Gunthorpe int ret; 17270f3e72b5SJason Gunthorpe 17280f3e72b5SJason Gunthorpe if (!device->mig_ops) 17290f3e72b5SJason Gunthorpe return -ENOTTY; 17300f3e72b5SJason Gunthorpe 17310f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 17320f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 17330f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 17340f3e72b5SJason Gunthorpe sizeof(mig)); 17350f3e72b5SJason Gunthorpe if (ret != 1) 17360f3e72b5SJason Gunthorpe return ret; 17370f3e72b5SJason Gunthorpe 17380f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 17390f3e72b5SJason Gunthorpe return -EFAULT; 17400f3e72b5SJason Gunthorpe 17410f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 17420f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 17430f3e72b5SJason Gunthorpe 17440f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 17450f3e72b5SJason Gunthorpe &curr_state); 17460f3e72b5SJason Gunthorpe if (ret) 17470f3e72b5SJason Gunthorpe return ret; 17480f3e72b5SJason Gunthorpe mig.device_state = curr_state; 17490f3e72b5SJason Gunthorpe goto out_copy; 17500f3e72b5SJason Gunthorpe } 17510f3e72b5SJason Gunthorpe 17520f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 17530f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 17540f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 17550f3e72b5SJason Gunthorpe goto out_copy; 17560f3e72b5SJason Gunthorpe 17570f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 17580f3e72b5SJason Gunthorpe out_copy: 17590f3e72b5SJason Gunthorpe mig.data_fd = -1; 17600f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 17610f3e72b5SJason Gunthorpe return -EFAULT; 17620f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 17630f3e72b5SJason Gunthorpe return PTR_ERR(filp); 17640f3e72b5SJason Gunthorpe return 0; 17650f3e72b5SJason Gunthorpe } 17660f3e72b5SJason Gunthorpe 17670f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 17680f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 17690f3e72b5SJason Gunthorpe size_t argsz) 17700f3e72b5SJason Gunthorpe { 17710f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 17720f3e72b5SJason Gunthorpe .flags = device->migration_flags, 17730f3e72b5SJason Gunthorpe }; 17740f3e72b5SJason Gunthorpe int ret; 17750f3e72b5SJason Gunthorpe 17760f3e72b5SJason Gunthorpe if (!device->mig_ops) 17770f3e72b5SJason Gunthorpe return -ENOTTY; 17780f3e72b5SJason Gunthorpe 17790f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 17800f3e72b5SJason Gunthorpe sizeof(mig)); 17810f3e72b5SJason Gunthorpe if (ret != 1) 17820f3e72b5SJason Gunthorpe return ret; 17830f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 17840f3e72b5SJason Gunthorpe return -EFAULT; 17850f3e72b5SJason Gunthorpe return 0; 17860f3e72b5SJason Gunthorpe } 17870f3e72b5SJason Gunthorpe 178880c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */ 178980c4b92aSYishai Hadas #define LOG_MAX_RANGES \ 179080c4b92aSYishai Hadas (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) 179180c4b92aSYishai Hadas 179280c4b92aSYishai Hadas static int 179380c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device, 179480c4b92aSYishai Hadas u32 flags, void __user *arg, 179580c4b92aSYishai Hadas size_t argsz) 179680c4b92aSYishai Hadas { 179780c4b92aSYishai Hadas size_t minsz = 179880c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_control, 179980c4b92aSYishai Hadas ranges); 180080c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range __user *ranges; 180180c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_control control; 180280c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range range; 180380c4b92aSYishai Hadas struct rb_root_cached root = RB_ROOT_CACHED; 180480c4b92aSYishai Hadas struct interval_tree_node *nodes; 180580c4b92aSYishai Hadas u64 iova_end; 180680c4b92aSYishai Hadas u32 nnodes; 180780c4b92aSYishai Hadas int i, ret; 180880c4b92aSYishai Hadas 180980c4b92aSYishai Hadas if (!device->log_ops) 181080c4b92aSYishai Hadas return -ENOTTY; 181180c4b92aSYishai Hadas 181280c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 181380c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 181480c4b92aSYishai Hadas sizeof(control)); 181580c4b92aSYishai Hadas if (ret != 1) 181680c4b92aSYishai Hadas return ret; 181780c4b92aSYishai Hadas 181880c4b92aSYishai Hadas if (copy_from_user(&control, arg, minsz)) 181980c4b92aSYishai Hadas return -EFAULT; 182080c4b92aSYishai Hadas 182180c4b92aSYishai Hadas nnodes = control.num_ranges; 182280c4b92aSYishai Hadas if (!nnodes) 182380c4b92aSYishai Hadas return -EINVAL; 182480c4b92aSYishai Hadas 182580c4b92aSYishai Hadas if (nnodes > LOG_MAX_RANGES) 182680c4b92aSYishai Hadas return -E2BIG; 182780c4b92aSYishai Hadas 182880c4b92aSYishai Hadas ranges = u64_to_user_ptr(control.ranges); 182980c4b92aSYishai Hadas nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), 183080c4b92aSYishai Hadas GFP_KERNEL); 183180c4b92aSYishai Hadas if (!nodes) 183280c4b92aSYishai Hadas return -ENOMEM; 183380c4b92aSYishai Hadas 183480c4b92aSYishai Hadas for (i = 0; i < nnodes; i++) { 183580c4b92aSYishai Hadas if (copy_from_user(&range, &ranges[i], sizeof(range))) { 183680c4b92aSYishai Hadas ret = -EFAULT; 183780c4b92aSYishai Hadas goto end; 183880c4b92aSYishai Hadas } 183980c4b92aSYishai Hadas if (!IS_ALIGNED(range.iova, control.page_size) || 184080c4b92aSYishai Hadas !IS_ALIGNED(range.length, control.page_size)) { 184180c4b92aSYishai Hadas ret = -EINVAL; 184280c4b92aSYishai Hadas goto end; 184380c4b92aSYishai Hadas } 184480c4b92aSYishai Hadas 184580c4b92aSYishai Hadas if (check_add_overflow(range.iova, range.length, &iova_end) || 184680c4b92aSYishai Hadas iova_end > ULONG_MAX) { 184780c4b92aSYishai Hadas ret = -EOVERFLOW; 184880c4b92aSYishai Hadas goto end; 184980c4b92aSYishai Hadas } 185080c4b92aSYishai Hadas 185180c4b92aSYishai Hadas nodes[i].start = range.iova; 185280c4b92aSYishai Hadas nodes[i].last = range.iova + range.length - 1; 185380c4b92aSYishai Hadas if (interval_tree_iter_first(&root, nodes[i].start, 185480c4b92aSYishai Hadas nodes[i].last)) { 185580c4b92aSYishai Hadas /* Range overlapping */ 185680c4b92aSYishai Hadas ret = -EINVAL; 185780c4b92aSYishai Hadas goto end; 185880c4b92aSYishai Hadas } 185980c4b92aSYishai Hadas interval_tree_insert(nodes + i, &root); 186080c4b92aSYishai Hadas } 186180c4b92aSYishai Hadas 186280c4b92aSYishai Hadas ret = device->log_ops->log_start(device, &root, nnodes, 186380c4b92aSYishai Hadas &control.page_size); 186480c4b92aSYishai Hadas if (ret) 186580c4b92aSYishai Hadas goto end; 186680c4b92aSYishai Hadas 186780c4b92aSYishai Hadas if (copy_to_user(arg, &control, sizeof(control))) { 186880c4b92aSYishai Hadas ret = -EFAULT; 186980c4b92aSYishai Hadas device->log_ops->log_stop(device); 187080c4b92aSYishai Hadas } 187180c4b92aSYishai Hadas 187280c4b92aSYishai Hadas end: 187380c4b92aSYishai Hadas kfree(nodes); 187480c4b92aSYishai Hadas return ret; 187580c4b92aSYishai Hadas } 187680c4b92aSYishai Hadas 187780c4b92aSYishai Hadas static int 187880c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, 187980c4b92aSYishai Hadas u32 flags, void __user *arg, 188080c4b92aSYishai Hadas size_t argsz) 188180c4b92aSYishai Hadas { 188280c4b92aSYishai Hadas int ret; 188380c4b92aSYishai Hadas 188480c4b92aSYishai Hadas if (!device->log_ops) 188580c4b92aSYishai Hadas return -ENOTTY; 188680c4b92aSYishai Hadas 188780c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 188880c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 0); 188980c4b92aSYishai Hadas if (ret != 1) 189080c4b92aSYishai Hadas return ret; 189180c4b92aSYishai Hadas 189280c4b92aSYishai Hadas return device->log_ops->log_stop(device); 189380c4b92aSYishai Hadas } 189480c4b92aSYishai Hadas 189580c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, 189680c4b92aSYishai Hadas unsigned long iova, size_t length, 189780c4b92aSYishai Hadas void *opaque) 189880c4b92aSYishai Hadas { 189980c4b92aSYishai Hadas struct vfio_device *device = opaque; 190080c4b92aSYishai Hadas 190180c4b92aSYishai Hadas return device->log_ops->log_read_and_clear(device, iova, length, iter); 190280c4b92aSYishai Hadas } 190380c4b92aSYishai Hadas 190480c4b92aSYishai Hadas static int 190580c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device, 190680c4b92aSYishai Hadas u32 flags, void __user *arg, 190780c4b92aSYishai Hadas size_t argsz) 190880c4b92aSYishai Hadas { 190980c4b92aSYishai Hadas size_t minsz = 191080c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_report, 191180c4b92aSYishai Hadas bitmap); 191280c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_report report; 191380c4b92aSYishai Hadas struct iova_bitmap *iter; 191480c4b92aSYishai Hadas u64 iova_end; 191580c4b92aSYishai Hadas int ret; 191680c4b92aSYishai Hadas 191780c4b92aSYishai Hadas if (!device->log_ops) 191880c4b92aSYishai Hadas return -ENOTTY; 191980c4b92aSYishai Hadas 192080c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 192180c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_GET, 192280c4b92aSYishai Hadas sizeof(report)); 192380c4b92aSYishai Hadas if (ret != 1) 192480c4b92aSYishai Hadas return ret; 192580c4b92aSYishai Hadas 192680c4b92aSYishai Hadas if (copy_from_user(&report, arg, minsz)) 192780c4b92aSYishai Hadas return -EFAULT; 192880c4b92aSYishai Hadas 192980c4b92aSYishai Hadas if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) 193080c4b92aSYishai Hadas return -EINVAL; 193180c4b92aSYishai Hadas 193280c4b92aSYishai Hadas if (check_add_overflow(report.iova, report.length, &iova_end) || 193380c4b92aSYishai Hadas iova_end > ULONG_MAX) 193480c4b92aSYishai Hadas return -EOVERFLOW; 193580c4b92aSYishai Hadas 193680c4b92aSYishai Hadas iter = iova_bitmap_alloc(report.iova, report.length, 193780c4b92aSYishai Hadas report.page_size, 193880c4b92aSYishai Hadas u64_to_user_ptr(report.bitmap)); 193980c4b92aSYishai Hadas if (IS_ERR(iter)) 194080c4b92aSYishai Hadas return PTR_ERR(iter); 194180c4b92aSYishai Hadas 194280c4b92aSYishai Hadas ret = iova_bitmap_for_each(iter, device, 194380c4b92aSYishai Hadas vfio_device_log_read_and_clear); 194480c4b92aSYishai Hadas 194580c4b92aSYishai Hadas iova_bitmap_free(iter); 194680c4b92aSYishai Hadas return ret; 194780c4b92aSYishai Hadas } 194880c4b92aSYishai Hadas 19490f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 19500f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 19510f3e72b5SJason Gunthorpe { 19520f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 19530f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 19540f3e72b5SJason Gunthorpe 19550f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 19560f3e72b5SJason Gunthorpe return -EFAULT; 19570f3e72b5SJason Gunthorpe 19580f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 19590f3e72b5SJason Gunthorpe return -EINVAL; 19600f3e72b5SJason Gunthorpe 19610f3e72b5SJason Gunthorpe /* Check unknown flags */ 19620f3e72b5SJason Gunthorpe if (feature.flags & 19630f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 19640f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 19650f3e72b5SJason Gunthorpe return -EINVAL; 19660f3e72b5SJason Gunthorpe 19670f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 19680f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 19690f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 19700f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 19710f3e72b5SJason Gunthorpe return -EINVAL; 19720f3e72b5SJason Gunthorpe 19730f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 19740f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 19750f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 19760f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 19770f3e72b5SJason Gunthorpe feature.argsz - minsz); 19780f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 19790f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 19800f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 19810f3e72b5SJason Gunthorpe feature.argsz - minsz); 198280c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: 198380c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_start( 198480c4b92aSYishai Hadas device, feature.flags, arg->data, 198580c4b92aSYishai Hadas feature.argsz - minsz); 198680c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: 198780c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_stop( 198880c4b92aSYishai Hadas device, feature.flags, arg->data, 198980c4b92aSYishai Hadas feature.argsz - minsz); 199080c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: 199180c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_report( 199280c4b92aSYishai Hadas device, feature.flags, arg->data, 199380c4b92aSYishai Hadas feature.argsz - minsz); 19940f3e72b5SJason Gunthorpe default: 19950f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 19960f3e72b5SJason Gunthorpe return -EINVAL; 19970f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 19980f3e72b5SJason Gunthorpe arg->data, 19990f3e72b5SJason Gunthorpe feature.argsz - minsz); 20000f3e72b5SJason Gunthorpe } 20010f3e72b5SJason Gunthorpe } 20020f3e72b5SJason Gunthorpe 20030f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 20040f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 20050f3e72b5SJason Gunthorpe { 20060f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 20078e5c6995SAbhishek Sahu int ret; 20088e5c6995SAbhishek Sahu 20098e5c6995SAbhishek Sahu ret = vfio_device_pm_runtime_get(device); 20108e5c6995SAbhishek Sahu if (ret) 20118e5c6995SAbhishek Sahu return ret; 20120f3e72b5SJason Gunthorpe 20130f3e72b5SJason Gunthorpe switch (cmd) { 20140f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 20158e5c6995SAbhishek Sahu ret = vfio_ioctl_device_feature(device, (void __user *)arg); 20168e5c6995SAbhishek Sahu break; 20178e5c6995SAbhishek Sahu 20180f3e72b5SJason Gunthorpe default: 20190f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 20208e5c6995SAbhishek Sahu ret = -EINVAL; 20218e5c6995SAbhishek Sahu else 20228e5c6995SAbhishek Sahu ret = device->ops->ioctl(device, cmd, arg); 20238e5c6995SAbhishek Sahu break; 20240f3e72b5SJason Gunthorpe } 20258e5c6995SAbhishek Sahu 20268e5c6995SAbhishek Sahu vfio_device_pm_runtime_put(device); 20278e5c6995SAbhishek Sahu return ret; 20280f3e72b5SJason Gunthorpe } 20290f3e72b5SJason Gunthorpe 20300f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 20310f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 20320f3e72b5SJason Gunthorpe { 20330f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 20340f3e72b5SJason Gunthorpe 20350f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 20360f3e72b5SJason Gunthorpe return -EINVAL; 20370f3e72b5SJason Gunthorpe 20380f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 20390f3e72b5SJason Gunthorpe } 20400f3e72b5SJason Gunthorpe 20410f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 20420f3e72b5SJason Gunthorpe const char __user *buf, 20430f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 20440f3e72b5SJason Gunthorpe { 20450f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 20460f3e72b5SJason Gunthorpe 20470f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 20480f3e72b5SJason Gunthorpe return -EINVAL; 20490f3e72b5SJason Gunthorpe 20500f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 20510f3e72b5SJason Gunthorpe } 20520f3e72b5SJason Gunthorpe 20530f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 20540f3e72b5SJason Gunthorpe { 20550f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 20560f3e72b5SJason Gunthorpe 20570f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 20580f3e72b5SJason Gunthorpe return -EINVAL; 20590f3e72b5SJason Gunthorpe 20600f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 20610f3e72b5SJason Gunthorpe } 20620f3e72b5SJason Gunthorpe 20630f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = { 20640f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 20650f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 20660f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 20670f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 20680f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 20690f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 20700f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 20710f3e72b5SJason Gunthorpe }; 20720f3e72b5SJason Gunthorpe 20730f3e72b5SJason Gunthorpe /** 20740f3e72b5SJason Gunthorpe * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 20750f3e72b5SJason Gunthorpe * @file: VFIO group file 20760f3e72b5SJason Gunthorpe * 20770f3e72b5SJason Gunthorpe * The returned iommu_group is valid as long as a ref is held on the file. 20780f3e72b5SJason Gunthorpe */ 20790f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file) 20800f3e72b5SJason Gunthorpe { 20810f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 20820f3e72b5SJason Gunthorpe 20830f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 20840f3e72b5SJason Gunthorpe return NULL; 20850f3e72b5SJason Gunthorpe return group->iommu_group; 20860f3e72b5SJason Gunthorpe } 20870f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 20880f3e72b5SJason Gunthorpe 20890f3e72b5SJason Gunthorpe /** 20900f3e72b5SJason Gunthorpe * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 20910f3e72b5SJason Gunthorpe * is always CPU cache coherent 20920f3e72b5SJason Gunthorpe * @file: VFIO group file 20930f3e72b5SJason Gunthorpe * 20940f3e72b5SJason Gunthorpe * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 20950f3e72b5SJason Gunthorpe * bit in DMA transactions. A return of false indicates that the user has 20960f3e72b5SJason Gunthorpe * rights to access additional instructions such as wbinvd on x86. 20970f3e72b5SJason Gunthorpe */ 20980f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file) 20990f3e72b5SJason Gunthorpe { 21000f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 21010f3e72b5SJason Gunthorpe bool ret; 21020f3e72b5SJason Gunthorpe 21030f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 21040f3e72b5SJason Gunthorpe return true; 21050f3e72b5SJason Gunthorpe 21060f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 21070f3e72b5SJason Gunthorpe if (group->container) { 21080f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(group->container, 21090f3e72b5SJason Gunthorpe VFIO_DMA_CC_IOMMU); 21100f3e72b5SJason Gunthorpe } else { 21110f3e72b5SJason Gunthorpe /* 21120f3e72b5SJason Gunthorpe * Since the coherency state is determined only once a container 21130f3e72b5SJason Gunthorpe * is attached the user must do so before they can prove they 21140f3e72b5SJason Gunthorpe * have permission. 21150f3e72b5SJason Gunthorpe */ 21160f3e72b5SJason Gunthorpe ret = true; 21170f3e72b5SJason Gunthorpe } 21180f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 21190f3e72b5SJason Gunthorpe return ret; 21200f3e72b5SJason Gunthorpe } 21210f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 21220f3e72b5SJason Gunthorpe 21230f3e72b5SJason Gunthorpe /** 21240f3e72b5SJason Gunthorpe * vfio_file_set_kvm - Link a kvm with VFIO drivers 21250f3e72b5SJason Gunthorpe * @file: VFIO group file 21260f3e72b5SJason Gunthorpe * @kvm: KVM to link 21270f3e72b5SJason Gunthorpe * 21280f3e72b5SJason Gunthorpe * When a VFIO device is first opened the KVM will be available in 21290f3e72b5SJason Gunthorpe * device->kvm if one was associated with the group. 21300f3e72b5SJason Gunthorpe */ 21310f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 21320f3e72b5SJason Gunthorpe { 21330f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 21340f3e72b5SJason Gunthorpe 21350f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 21360f3e72b5SJason Gunthorpe return; 21370f3e72b5SJason Gunthorpe 21380f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 21390f3e72b5SJason Gunthorpe group->kvm = kvm; 21400f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 21410f3e72b5SJason Gunthorpe } 21420f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 21430f3e72b5SJason Gunthorpe 21440f3e72b5SJason Gunthorpe /** 21450f3e72b5SJason Gunthorpe * vfio_file_has_dev - True if the VFIO file is a handle for device 21460f3e72b5SJason Gunthorpe * @file: VFIO file to check 21470f3e72b5SJason Gunthorpe * @device: Device that must be part of the file 21480f3e72b5SJason Gunthorpe * 21490f3e72b5SJason Gunthorpe * Returns true if given file has permission to manipulate the given device. 21500f3e72b5SJason Gunthorpe */ 21510f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 21520f3e72b5SJason Gunthorpe { 21530f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 21540f3e72b5SJason Gunthorpe 21550f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 21560f3e72b5SJason Gunthorpe return false; 21570f3e72b5SJason Gunthorpe 21580f3e72b5SJason Gunthorpe return group == device->group; 21590f3e72b5SJason Gunthorpe } 21600f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev); 21610f3e72b5SJason Gunthorpe 21620f3e72b5SJason Gunthorpe /* 21630f3e72b5SJason Gunthorpe * Sub-module support 21640f3e72b5SJason Gunthorpe */ 21650f3e72b5SJason Gunthorpe /* 21660f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 21670f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 21680f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 21690f3e72b5SJason Gunthorpe * 21700f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 21710f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 21720f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 21730f3e72b5SJason Gunthorpe */ 21740f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 21750f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 21760f3e72b5SJason Gunthorpe { 21770f3e72b5SJason Gunthorpe void *buf; 21780f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 21790f3e72b5SJason Gunthorpe 21800f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 21810f3e72b5SJason Gunthorpe if (!buf) { 21820f3e72b5SJason Gunthorpe kfree(caps->buf); 21830f3e72b5SJason Gunthorpe caps->buf = NULL; 21840f3e72b5SJason Gunthorpe caps->size = 0; 21850f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 21860f3e72b5SJason Gunthorpe } 21870f3e72b5SJason Gunthorpe 21880f3e72b5SJason Gunthorpe caps->buf = buf; 21890f3e72b5SJason Gunthorpe header = buf + caps->size; 21900f3e72b5SJason Gunthorpe 21910f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 21920f3e72b5SJason Gunthorpe memset(header, 0, size); 21930f3e72b5SJason Gunthorpe 21940f3e72b5SJason Gunthorpe header->id = id; 21950f3e72b5SJason Gunthorpe header->version = version; 21960f3e72b5SJason Gunthorpe 21970f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 21980f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 21990f3e72b5SJason Gunthorpe ; /* nothing */ 22000f3e72b5SJason Gunthorpe 22010f3e72b5SJason Gunthorpe tmp->next = caps->size; 22020f3e72b5SJason Gunthorpe caps->size += size; 22030f3e72b5SJason Gunthorpe 22040f3e72b5SJason Gunthorpe return header; 22050f3e72b5SJason Gunthorpe } 22060f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 22070f3e72b5SJason Gunthorpe 22080f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 22090f3e72b5SJason Gunthorpe { 22100f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 22110f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 22120f3e72b5SJason Gunthorpe 22130f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 22140f3e72b5SJason Gunthorpe tmp->next += offset; 22150f3e72b5SJason Gunthorpe } 22160f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 22170f3e72b5SJason Gunthorpe 22180f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 22190f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 22200f3e72b5SJason Gunthorpe { 22210f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 22220f3e72b5SJason Gunthorpe 22230f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 22240f3e72b5SJason Gunthorpe if (IS_ERR(header)) 22250f3e72b5SJason Gunthorpe return PTR_ERR(header); 22260f3e72b5SJason Gunthorpe 22270f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 22280f3e72b5SJason Gunthorpe 22290f3e72b5SJason Gunthorpe return 0; 22300f3e72b5SJason Gunthorpe } 22310f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 22320f3e72b5SJason Gunthorpe 22330f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 22340f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 22350f3e72b5SJason Gunthorpe { 22360f3e72b5SJason Gunthorpe unsigned long minsz; 22370f3e72b5SJason Gunthorpe size_t size; 22380f3e72b5SJason Gunthorpe 22390f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 22400f3e72b5SJason Gunthorpe 22410f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 22420f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 22430f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 22440f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 22450f3e72b5SJason Gunthorpe return -EINVAL; 22460f3e72b5SJason Gunthorpe 22470f3e72b5SJason Gunthorpe if (data_size) 22480f3e72b5SJason Gunthorpe *data_size = 0; 22490f3e72b5SJason Gunthorpe 22500f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 22510f3e72b5SJason Gunthorpe return -EINVAL; 22520f3e72b5SJason Gunthorpe 22530f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 22540f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 22550f3e72b5SJason Gunthorpe size = 0; 22560f3e72b5SJason Gunthorpe break; 22570f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 22580f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 22590f3e72b5SJason Gunthorpe break; 22600f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 22610f3e72b5SJason Gunthorpe size = sizeof(int32_t); 22620f3e72b5SJason Gunthorpe break; 22630f3e72b5SJason Gunthorpe default: 22640f3e72b5SJason Gunthorpe return -EINVAL; 22650f3e72b5SJason Gunthorpe } 22660f3e72b5SJason Gunthorpe 22670f3e72b5SJason Gunthorpe if (size) { 22680f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 22690f3e72b5SJason Gunthorpe return -EINVAL; 22700f3e72b5SJason Gunthorpe 22710f3e72b5SJason Gunthorpe if (!data_size) 22720f3e72b5SJason Gunthorpe return -EINVAL; 22730f3e72b5SJason Gunthorpe 22740f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 22750f3e72b5SJason Gunthorpe } 22760f3e72b5SJason Gunthorpe 22770f3e72b5SJason Gunthorpe return 0; 22780f3e72b5SJason Gunthorpe } 22790f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 22800f3e72b5SJason Gunthorpe 22810f3e72b5SJason Gunthorpe /* 22820f3e72b5SJason Gunthorpe * Pin contiguous user pages and return their associated host pages for local 22830f3e72b5SJason Gunthorpe * domain only. 22840f3e72b5SJason Gunthorpe * @device [in] : device 22850f3e72b5SJason Gunthorpe * @iova [in] : starting IOVA of user pages to be pinned. 22860f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be pinned. This count should not 22870f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 22880f3e72b5SJason Gunthorpe * @prot [in] : protection flags 22890f3e72b5SJason Gunthorpe * @pages[out] : array of host pages 22900f3e72b5SJason Gunthorpe * Return error or number of pages pinned. 229121c13829SJason Gunthorpe * 229221c13829SJason Gunthorpe * A driver may only call this function if the vfio_device was created 229321c13829SJason Gunthorpe * by vfio_register_emulated_iommu_dev(). 22940f3e72b5SJason Gunthorpe */ 22950f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 22960f3e72b5SJason Gunthorpe int npage, int prot, struct page **pages) 22970f3e72b5SJason Gunthorpe { 22980f3e72b5SJason Gunthorpe struct vfio_container *container; 22990f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 23000f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 23010f3e72b5SJason Gunthorpe int ret; 23020f3e72b5SJason Gunthorpe 23030f3e72b5SJason Gunthorpe if (!pages || !npage || !vfio_assert_device_open(device)) 23040f3e72b5SJason Gunthorpe return -EINVAL; 23050f3e72b5SJason Gunthorpe 23060f3e72b5SJason Gunthorpe if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 23070f3e72b5SJason Gunthorpe return -E2BIG; 23080f3e72b5SJason Gunthorpe 23090f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 23100f3e72b5SJason Gunthorpe container = group->container; 23110f3e72b5SJason Gunthorpe driver = container->iommu_driver; 23120f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->pin_pages)) 23130f3e72b5SJason Gunthorpe ret = driver->ops->pin_pages(container->iommu_data, 23140f3e72b5SJason Gunthorpe group->iommu_group, iova, 23150f3e72b5SJason Gunthorpe npage, prot, pages); 23160f3e72b5SJason Gunthorpe else 23170f3e72b5SJason Gunthorpe ret = -ENOTTY; 23180f3e72b5SJason Gunthorpe 23190f3e72b5SJason Gunthorpe return ret; 23200f3e72b5SJason Gunthorpe } 23210f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages); 23220f3e72b5SJason Gunthorpe 23230f3e72b5SJason Gunthorpe /* 23240f3e72b5SJason Gunthorpe * Unpin contiguous host pages for local domain only. 23250f3e72b5SJason Gunthorpe * @device [in] : device 23260f3e72b5SJason Gunthorpe * @iova [in] : starting address of user pages to be unpinned. 23270f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be unpinned. This count should not 23280f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 23290f3e72b5SJason Gunthorpe */ 23300f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 23310f3e72b5SJason Gunthorpe { 23320f3e72b5SJason Gunthorpe struct vfio_container *container; 23330f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 23340f3e72b5SJason Gunthorpe 23350f3e72b5SJason Gunthorpe if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 23360f3e72b5SJason Gunthorpe return; 23370f3e72b5SJason Gunthorpe 23380f3e72b5SJason Gunthorpe if (WARN_ON(!vfio_assert_device_open(device))) 23390f3e72b5SJason Gunthorpe return; 23400f3e72b5SJason Gunthorpe 23410f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 23420f3e72b5SJason Gunthorpe container = device->group->container; 23430f3e72b5SJason Gunthorpe driver = container->iommu_driver; 23440f3e72b5SJason Gunthorpe 23450f3e72b5SJason Gunthorpe driver->ops->unpin_pages(container->iommu_data, iova, npage); 23460f3e72b5SJason Gunthorpe } 23470f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages); 23480f3e72b5SJason Gunthorpe 23490f3e72b5SJason Gunthorpe /* 23500f3e72b5SJason Gunthorpe * This interface allows the CPUs to perform some sort of virtual DMA on 23510f3e72b5SJason Gunthorpe * behalf of the device. 23520f3e72b5SJason Gunthorpe * 23530f3e72b5SJason Gunthorpe * CPUs read/write from/into a range of IOVAs pointing to user space memory 23540f3e72b5SJason Gunthorpe * into/from a kernel buffer. 23550f3e72b5SJason Gunthorpe * 23560f3e72b5SJason Gunthorpe * As the read/write of user space memory is conducted via the CPUs and is 23570f3e72b5SJason Gunthorpe * not a real device DMA, it is not necessary to pin the user space memory. 23580f3e72b5SJason Gunthorpe * 23590f3e72b5SJason Gunthorpe * @device [in] : VFIO device 23600f3e72b5SJason Gunthorpe * @iova [in] : base IOVA of a user space buffer 23610f3e72b5SJason Gunthorpe * @data [in] : pointer to kernel buffer 23620f3e72b5SJason Gunthorpe * @len [in] : kernel buffer length 23630f3e72b5SJason Gunthorpe * @write : indicate read or write 23640f3e72b5SJason Gunthorpe * Return error code on failure or 0 on success. 23650f3e72b5SJason Gunthorpe */ 23660f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 23670f3e72b5SJason Gunthorpe size_t len, bool write) 23680f3e72b5SJason Gunthorpe { 23690f3e72b5SJason Gunthorpe struct vfio_container *container; 23700f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 23710f3e72b5SJason Gunthorpe int ret = 0; 23720f3e72b5SJason Gunthorpe 23730f3e72b5SJason Gunthorpe if (!data || len <= 0 || !vfio_assert_device_open(device)) 23740f3e72b5SJason Gunthorpe return -EINVAL; 23750f3e72b5SJason Gunthorpe 23760f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 23770f3e72b5SJason Gunthorpe container = device->group->container; 23780f3e72b5SJason Gunthorpe driver = container->iommu_driver; 23790f3e72b5SJason Gunthorpe 23800f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->dma_rw)) 23810f3e72b5SJason Gunthorpe ret = driver->ops->dma_rw(container->iommu_data, 23820f3e72b5SJason Gunthorpe iova, data, len, write); 23830f3e72b5SJason Gunthorpe else 23840f3e72b5SJason Gunthorpe ret = -ENOTTY; 23850f3e72b5SJason Gunthorpe return ret; 23860f3e72b5SJason Gunthorpe } 23870f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw); 23880f3e72b5SJason Gunthorpe 23890f3e72b5SJason Gunthorpe /* 23900f3e72b5SJason Gunthorpe * Module/class support 23910f3e72b5SJason Gunthorpe */ 23920f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode) 23930f3e72b5SJason Gunthorpe { 23940f3e72b5SJason Gunthorpe return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 23950f3e72b5SJason Gunthorpe } 23960f3e72b5SJason Gunthorpe 23970f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = { 23980f3e72b5SJason Gunthorpe .minor = VFIO_MINOR, 23990f3e72b5SJason Gunthorpe .name = "vfio", 24000f3e72b5SJason Gunthorpe .fops = &vfio_fops, 24010f3e72b5SJason Gunthorpe .nodename = "vfio/vfio", 24020f3e72b5SJason Gunthorpe .mode = S_IRUGO | S_IWUGO, 24030f3e72b5SJason Gunthorpe }; 24040f3e72b5SJason Gunthorpe 24050f3e72b5SJason Gunthorpe static int __init vfio_init(void) 24060f3e72b5SJason Gunthorpe { 24070f3e72b5SJason Gunthorpe int ret; 24080f3e72b5SJason Gunthorpe 24090f3e72b5SJason Gunthorpe ida_init(&vfio.group_ida); 24103c28a761SYi Liu ida_init(&vfio.device_ida); 24110f3e72b5SJason Gunthorpe mutex_init(&vfio.group_lock); 24120f3e72b5SJason Gunthorpe mutex_init(&vfio.iommu_drivers_lock); 24130f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.group_list); 24140f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.iommu_drivers_list); 24150f3e72b5SJason Gunthorpe 24160f3e72b5SJason Gunthorpe ret = misc_register(&vfio_dev); 24170f3e72b5SJason Gunthorpe if (ret) { 24180f3e72b5SJason Gunthorpe pr_err("vfio: misc device register failed\n"); 24190f3e72b5SJason Gunthorpe return ret; 24200f3e72b5SJason Gunthorpe } 24210f3e72b5SJason Gunthorpe 24220f3e72b5SJason Gunthorpe /* /dev/vfio/$GROUP */ 24230f3e72b5SJason Gunthorpe vfio.class = class_create(THIS_MODULE, "vfio"); 24240f3e72b5SJason Gunthorpe if (IS_ERR(vfio.class)) { 24250f3e72b5SJason Gunthorpe ret = PTR_ERR(vfio.class); 24263c28a761SYi Liu goto err_group_class; 24270f3e72b5SJason Gunthorpe } 24280f3e72b5SJason Gunthorpe 24290f3e72b5SJason Gunthorpe vfio.class->devnode = vfio_devnode; 24300f3e72b5SJason Gunthorpe 24313c28a761SYi Liu /* /sys/class/vfio-dev/vfioX */ 24323c28a761SYi Liu vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); 24333c28a761SYi Liu if (IS_ERR(vfio.device_class)) { 24343c28a761SYi Liu ret = PTR_ERR(vfio.device_class); 24353c28a761SYi Liu goto err_dev_class; 24363c28a761SYi Liu } 24373c28a761SYi Liu 24380f3e72b5SJason Gunthorpe ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 24390f3e72b5SJason Gunthorpe if (ret) 24400f3e72b5SJason Gunthorpe goto err_alloc_chrdev; 24410f3e72b5SJason Gunthorpe 24420f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 24430f3e72b5SJason Gunthorpe ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 24440f3e72b5SJason Gunthorpe #endif 24450f3e72b5SJason Gunthorpe if (ret) 24460f3e72b5SJason Gunthorpe goto err_driver_register; 24470f3e72b5SJason Gunthorpe 24480f3e72b5SJason Gunthorpe pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 24490f3e72b5SJason Gunthorpe return 0; 24500f3e72b5SJason Gunthorpe 24510f3e72b5SJason Gunthorpe err_driver_register: 24520f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 24530f3e72b5SJason Gunthorpe err_alloc_chrdev: 24543c28a761SYi Liu class_destroy(vfio.device_class); 24553c28a761SYi Liu vfio.device_class = NULL; 24563c28a761SYi Liu err_dev_class: 24570f3e72b5SJason Gunthorpe class_destroy(vfio.class); 24580f3e72b5SJason Gunthorpe vfio.class = NULL; 24593c28a761SYi Liu err_group_class: 24600f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 24610f3e72b5SJason Gunthorpe return ret; 24620f3e72b5SJason Gunthorpe } 24630f3e72b5SJason Gunthorpe 24640f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void) 24650f3e72b5SJason Gunthorpe { 24660f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&vfio.group_list)); 24670f3e72b5SJason Gunthorpe 24680f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 24690f3e72b5SJason Gunthorpe vfio_unregister_iommu_driver(&vfio_noiommu_ops); 24700f3e72b5SJason Gunthorpe #endif 24713c28a761SYi Liu ida_destroy(&vfio.device_ida); 24720f3e72b5SJason Gunthorpe ida_destroy(&vfio.group_ida); 24730f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 24743c28a761SYi Liu class_destroy(vfio.device_class); 24753c28a761SYi Liu vfio.device_class = NULL; 24760f3e72b5SJason Gunthorpe class_destroy(vfio.class); 24770f3e72b5SJason Gunthorpe vfio.class = NULL; 24780f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 24790f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 24800f3e72b5SJason Gunthorpe } 24810f3e72b5SJason Gunthorpe 24820f3e72b5SJason Gunthorpe module_init(vfio_init); 24830f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 24840f3e72b5SJason Gunthorpe 24850f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 24860f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 24870f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 24880f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 24890f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR); 24900f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio"); 24910f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 2492