1*0f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 2*0f3e72b5SJason Gunthorpe /* 3*0f3e72b5SJason Gunthorpe * VFIO core 4*0f3e72b5SJason Gunthorpe * 5*0f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6*0f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 7*0f3e72b5SJason Gunthorpe * 8*0f3e72b5SJason Gunthorpe * Derived from original vfio: 9*0f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10*0f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 11*0f3e72b5SJason Gunthorpe */ 12*0f3e72b5SJason Gunthorpe 13*0f3e72b5SJason Gunthorpe #include <linux/cdev.h> 14*0f3e72b5SJason Gunthorpe #include <linux/compat.h> 15*0f3e72b5SJason Gunthorpe #include <linux/device.h> 16*0f3e72b5SJason Gunthorpe #include <linux/file.h> 17*0f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h> 18*0f3e72b5SJason Gunthorpe #include <linux/fs.h> 19*0f3e72b5SJason Gunthorpe #include <linux/idr.h> 20*0f3e72b5SJason Gunthorpe #include <linux/iommu.h> 21*0f3e72b5SJason Gunthorpe #include <linux/list.h> 22*0f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 23*0f3e72b5SJason Gunthorpe #include <linux/module.h> 24*0f3e72b5SJason Gunthorpe #include <linux/mutex.h> 25*0f3e72b5SJason Gunthorpe #include <linux/pci.h> 26*0f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 27*0f3e72b5SJason Gunthorpe #include <linux/sched.h> 28*0f3e72b5SJason Gunthorpe #include <linux/slab.h> 29*0f3e72b5SJason Gunthorpe #include <linux/stat.h> 30*0f3e72b5SJason Gunthorpe #include <linux/string.h> 31*0f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 32*0f3e72b5SJason Gunthorpe #include <linux/vfio.h> 33*0f3e72b5SJason Gunthorpe #include <linux/wait.h> 34*0f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 35*0f3e72b5SJason Gunthorpe #include "vfio.h" 36*0f3e72b5SJason Gunthorpe 37*0f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 38*0f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 39*0f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 40*0f3e72b5SJason Gunthorpe 41*0f3e72b5SJason Gunthorpe static struct vfio { 42*0f3e72b5SJason Gunthorpe struct class *class; 43*0f3e72b5SJason Gunthorpe struct list_head iommu_drivers_list; 44*0f3e72b5SJason Gunthorpe struct mutex iommu_drivers_lock; 45*0f3e72b5SJason Gunthorpe struct list_head group_list; 46*0f3e72b5SJason Gunthorpe struct mutex group_lock; /* locks group_list */ 47*0f3e72b5SJason Gunthorpe struct ida group_ida; 48*0f3e72b5SJason Gunthorpe dev_t group_devt; 49*0f3e72b5SJason Gunthorpe } vfio; 50*0f3e72b5SJason Gunthorpe 51*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver { 52*0f3e72b5SJason Gunthorpe const struct vfio_iommu_driver_ops *ops; 53*0f3e72b5SJason Gunthorpe struct list_head vfio_next; 54*0f3e72b5SJason Gunthorpe }; 55*0f3e72b5SJason Gunthorpe 56*0f3e72b5SJason Gunthorpe struct vfio_container { 57*0f3e72b5SJason Gunthorpe struct kref kref; 58*0f3e72b5SJason Gunthorpe struct list_head group_list; 59*0f3e72b5SJason Gunthorpe struct rw_semaphore group_lock; 60*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 61*0f3e72b5SJason Gunthorpe void *iommu_data; 62*0f3e72b5SJason Gunthorpe bool noiommu; 63*0f3e72b5SJason Gunthorpe }; 64*0f3e72b5SJason Gunthorpe 65*0f3e72b5SJason Gunthorpe struct vfio_group { 66*0f3e72b5SJason Gunthorpe struct device dev; 67*0f3e72b5SJason Gunthorpe struct cdev cdev; 68*0f3e72b5SJason Gunthorpe refcount_t users; 69*0f3e72b5SJason Gunthorpe unsigned int container_users; 70*0f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 71*0f3e72b5SJason Gunthorpe struct vfio_container *container; 72*0f3e72b5SJason Gunthorpe struct list_head device_list; 73*0f3e72b5SJason Gunthorpe struct mutex device_lock; 74*0f3e72b5SJason Gunthorpe struct list_head vfio_next; 75*0f3e72b5SJason Gunthorpe struct list_head container_next; 76*0f3e72b5SJason Gunthorpe enum vfio_group_type type; 77*0f3e72b5SJason Gunthorpe unsigned int dev_counter; 78*0f3e72b5SJason Gunthorpe struct rw_semaphore group_rwsem; 79*0f3e72b5SJason Gunthorpe struct kvm *kvm; 80*0f3e72b5SJason Gunthorpe struct file *opened_file; 81*0f3e72b5SJason Gunthorpe struct blocking_notifier_head notifier; 82*0f3e72b5SJason Gunthorpe }; 83*0f3e72b5SJason Gunthorpe 84*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 85*0f3e72b5SJason Gunthorpe static bool noiommu __read_mostly; 86*0f3e72b5SJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode, 87*0f3e72b5SJason Gunthorpe noiommu, bool, S_IRUGO | S_IWUSR); 88*0f3e72b5SJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 89*0f3e72b5SJason Gunthorpe #endif 90*0f3e72b5SJason Gunthorpe 91*0f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 92*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops; 93*0f3e72b5SJason Gunthorpe 94*0f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 95*0f3e72b5SJason Gunthorpe { 96*0f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 97*0f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 98*0f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 99*0f3e72b5SJason Gunthorpe 100*0f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 101*0f3e72b5SJason Gunthorpe return -EINVAL; 102*0f3e72b5SJason Gunthorpe 103*0f3e72b5SJason Gunthorpe /* 104*0f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 105*0f3e72b5SJason Gunthorpe */ 106*0f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 107*0f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 108*0f3e72b5SJason Gunthorpe if (dev_set) 109*0f3e72b5SJason Gunthorpe goto found_get_ref; 110*0f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 111*0f3e72b5SJason Gunthorpe 112*0f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 113*0f3e72b5SJason Gunthorpe if (!new_dev_set) 114*0f3e72b5SJason Gunthorpe return -ENOMEM; 115*0f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 116*0f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 117*0f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 118*0f3e72b5SJason Gunthorpe 119*0f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 120*0f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 121*0f3e72b5SJason Gunthorpe GFP_KERNEL); 122*0f3e72b5SJason Gunthorpe if (!dev_set) { 123*0f3e72b5SJason Gunthorpe dev_set = new_dev_set; 124*0f3e72b5SJason Gunthorpe goto found_get_ref; 125*0f3e72b5SJason Gunthorpe } 126*0f3e72b5SJason Gunthorpe 127*0f3e72b5SJason Gunthorpe kfree(new_dev_set); 128*0f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 129*0f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 130*0f3e72b5SJason Gunthorpe return xa_err(dev_set); 131*0f3e72b5SJason Gunthorpe } 132*0f3e72b5SJason Gunthorpe 133*0f3e72b5SJason Gunthorpe found_get_ref: 134*0f3e72b5SJason Gunthorpe dev_set->device_count++; 135*0f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 136*0f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 137*0f3e72b5SJason Gunthorpe device->dev_set = dev_set; 138*0f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 139*0f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 140*0f3e72b5SJason Gunthorpe return 0; 141*0f3e72b5SJason Gunthorpe } 142*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 143*0f3e72b5SJason Gunthorpe 144*0f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 145*0f3e72b5SJason Gunthorpe { 146*0f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 147*0f3e72b5SJason Gunthorpe 148*0f3e72b5SJason Gunthorpe if (!dev_set) 149*0f3e72b5SJason Gunthorpe return; 150*0f3e72b5SJason Gunthorpe 151*0f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 152*0f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 153*0f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 154*0f3e72b5SJason Gunthorpe 155*0f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 156*0f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 157*0f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 158*0f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 159*0f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 160*0f3e72b5SJason Gunthorpe kfree(dev_set); 161*0f3e72b5SJason Gunthorpe } 162*0f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 163*0f3e72b5SJason Gunthorpe } 164*0f3e72b5SJason Gunthorpe 165*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 166*0f3e72b5SJason Gunthorpe static void *vfio_noiommu_open(unsigned long arg) 167*0f3e72b5SJason Gunthorpe { 168*0f3e72b5SJason Gunthorpe if (arg != VFIO_NOIOMMU_IOMMU) 169*0f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 170*0f3e72b5SJason Gunthorpe if (!capable(CAP_SYS_RAWIO)) 171*0f3e72b5SJason Gunthorpe return ERR_PTR(-EPERM); 172*0f3e72b5SJason Gunthorpe 173*0f3e72b5SJason Gunthorpe return NULL; 174*0f3e72b5SJason Gunthorpe } 175*0f3e72b5SJason Gunthorpe 176*0f3e72b5SJason Gunthorpe static void vfio_noiommu_release(void *iommu_data) 177*0f3e72b5SJason Gunthorpe { 178*0f3e72b5SJason Gunthorpe } 179*0f3e72b5SJason Gunthorpe 180*0f3e72b5SJason Gunthorpe static long vfio_noiommu_ioctl(void *iommu_data, 181*0f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 182*0f3e72b5SJason Gunthorpe { 183*0f3e72b5SJason Gunthorpe if (cmd == VFIO_CHECK_EXTENSION) 184*0f3e72b5SJason Gunthorpe return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 185*0f3e72b5SJason Gunthorpe 186*0f3e72b5SJason Gunthorpe return -ENOTTY; 187*0f3e72b5SJason Gunthorpe } 188*0f3e72b5SJason Gunthorpe 189*0f3e72b5SJason Gunthorpe static int vfio_noiommu_attach_group(void *iommu_data, 190*0f3e72b5SJason Gunthorpe struct iommu_group *iommu_group, enum vfio_group_type type) 191*0f3e72b5SJason Gunthorpe { 192*0f3e72b5SJason Gunthorpe return 0; 193*0f3e72b5SJason Gunthorpe } 194*0f3e72b5SJason Gunthorpe 195*0f3e72b5SJason Gunthorpe static void vfio_noiommu_detach_group(void *iommu_data, 196*0f3e72b5SJason Gunthorpe struct iommu_group *iommu_group) 197*0f3e72b5SJason Gunthorpe { 198*0f3e72b5SJason Gunthorpe } 199*0f3e72b5SJason Gunthorpe 200*0f3e72b5SJason Gunthorpe static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 201*0f3e72b5SJason Gunthorpe .name = "vfio-noiommu", 202*0f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 203*0f3e72b5SJason Gunthorpe .open = vfio_noiommu_open, 204*0f3e72b5SJason Gunthorpe .release = vfio_noiommu_release, 205*0f3e72b5SJason Gunthorpe .ioctl = vfio_noiommu_ioctl, 206*0f3e72b5SJason Gunthorpe .attach_group = vfio_noiommu_attach_group, 207*0f3e72b5SJason Gunthorpe .detach_group = vfio_noiommu_detach_group, 208*0f3e72b5SJason Gunthorpe }; 209*0f3e72b5SJason Gunthorpe 210*0f3e72b5SJason Gunthorpe /* 211*0f3e72b5SJason Gunthorpe * Only noiommu containers can use vfio-noiommu and noiommu containers can only 212*0f3e72b5SJason Gunthorpe * use vfio-noiommu. 213*0f3e72b5SJason Gunthorpe */ 214*0f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 215*0f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 216*0f3e72b5SJason Gunthorpe { 217*0f3e72b5SJason Gunthorpe return container->noiommu == (driver->ops == &vfio_noiommu_ops); 218*0f3e72b5SJason Gunthorpe } 219*0f3e72b5SJason Gunthorpe #else 220*0f3e72b5SJason Gunthorpe static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 221*0f3e72b5SJason Gunthorpe const struct vfio_iommu_driver *driver) 222*0f3e72b5SJason Gunthorpe { 223*0f3e72b5SJason Gunthorpe return true; 224*0f3e72b5SJason Gunthorpe } 225*0f3e72b5SJason Gunthorpe #endif /* CONFIG_VFIO_NOIOMMU */ 226*0f3e72b5SJason Gunthorpe 227*0f3e72b5SJason Gunthorpe /* 228*0f3e72b5SJason Gunthorpe * IOMMU driver registration 229*0f3e72b5SJason Gunthorpe */ 230*0f3e72b5SJason Gunthorpe int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 231*0f3e72b5SJason Gunthorpe { 232*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, *tmp; 233*0f3e72b5SJason Gunthorpe 234*0f3e72b5SJason Gunthorpe if (WARN_ON(!ops->register_device != !ops->unregister_device)) 235*0f3e72b5SJason Gunthorpe return -EINVAL; 236*0f3e72b5SJason Gunthorpe 237*0f3e72b5SJason Gunthorpe driver = kzalloc(sizeof(*driver), GFP_KERNEL); 238*0f3e72b5SJason Gunthorpe if (!driver) 239*0f3e72b5SJason Gunthorpe return -ENOMEM; 240*0f3e72b5SJason Gunthorpe 241*0f3e72b5SJason Gunthorpe driver->ops = ops; 242*0f3e72b5SJason Gunthorpe 243*0f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 244*0f3e72b5SJason Gunthorpe 245*0f3e72b5SJason Gunthorpe /* Check for duplicates */ 246*0f3e72b5SJason Gunthorpe list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 247*0f3e72b5SJason Gunthorpe if (tmp->ops == ops) { 248*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 249*0f3e72b5SJason Gunthorpe kfree(driver); 250*0f3e72b5SJason Gunthorpe return -EINVAL; 251*0f3e72b5SJason Gunthorpe } 252*0f3e72b5SJason Gunthorpe } 253*0f3e72b5SJason Gunthorpe 254*0f3e72b5SJason Gunthorpe list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 255*0f3e72b5SJason Gunthorpe 256*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 257*0f3e72b5SJason Gunthorpe 258*0f3e72b5SJason Gunthorpe return 0; 259*0f3e72b5SJason Gunthorpe } 260*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 261*0f3e72b5SJason Gunthorpe 262*0f3e72b5SJason Gunthorpe void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 263*0f3e72b5SJason Gunthorpe { 264*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 265*0f3e72b5SJason Gunthorpe 266*0f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 267*0f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 268*0f3e72b5SJason Gunthorpe if (driver->ops == ops) { 269*0f3e72b5SJason Gunthorpe list_del(&driver->vfio_next); 270*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 271*0f3e72b5SJason Gunthorpe kfree(driver); 272*0f3e72b5SJason Gunthorpe return; 273*0f3e72b5SJason Gunthorpe } 274*0f3e72b5SJason Gunthorpe } 275*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 276*0f3e72b5SJason Gunthorpe } 277*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 278*0f3e72b5SJason Gunthorpe 279*0f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group); 280*0f3e72b5SJason Gunthorpe 281*0f3e72b5SJason Gunthorpe /* 282*0f3e72b5SJason Gunthorpe * Container objects - containers are created when /dev/vfio/vfio is 283*0f3e72b5SJason Gunthorpe * opened, but their lifecycle extends until the last user is done, so 284*0f3e72b5SJason Gunthorpe * it's freed via kref. Must support container/group/device being 285*0f3e72b5SJason Gunthorpe * closed in any order. 286*0f3e72b5SJason Gunthorpe */ 287*0f3e72b5SJason Gunthorpe static void vfio_container_get(struct vfio_container *container) 288*0f3e72b5SJason Gunthorpe { 289*0f3e72b5SJason Gunthorpe kref_get(&container->kref); 290*0f3e72b5SJason Gunthorpe } 291*0f3e72b5SJason Gunthorpe 292*0f3e72b5SJason Gunthorpe static void vfio_container_release(struct kref *kref) 293*0f3e72b5SJason Gunthorpe { 294*0f3e72b5SJason Gunthorpe struct vfio_container *container; 295*0f3e72b5SJason Gunthorpe container = container_of(kref, struct vfio_container, kref); 296*0f3e72b5SJason Gunthorpe 297*0f3e72b5SJason Gunthorpe kfree(container); 298*0f3e72b5SJason Gunthorpe } 299*0f3e72b5SJason Gunthorpe 300*0f3e72b5SJason Gunthorpe static void vfio_container_put(struct vfio_container *container) 301*0f3e72b5SJason Gunthorpe { 302*0f3e72b5SJason Gunthorpe kref_put(&container->kref, vfio_container_release); 303*0f3e72b5SJason Gunthorpe } 304*0f3e72b5SJason Gunthorpe 305*0f3e72b5SJason Gunthorpe /* 306*0f3e72b5SJason Gunthorpe * Group objects - create, release, get, put, search 307*0f3e72b5SJason Gunthorpe */ 308*0f3e72b5SJason Gunthorpe static struct vfio_group * 309*0f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 310*0f3e72b5SJason Gunthorpe { 311*0f3e72b5SJason Gunthorpe struct vfio_group *group; 312*0f3e72b5SJason Gunthorpe 313*0f3e72b5SJason Gunthorpe list_for_each_entry(group, &vfio.group_list, vfio_next) { 314*0f3e72b5SJason Gunthorpe if (group->iommu_group == iommu_group) { 315*0f3e72b5SJason Gunthorpe vfio_group_get(group); 316*0f3e72b5SJason Gunthorpe return group; 317*0f3e72b5SJason Gunthorpe } 318*0f3e72b5SJason Gunthorpe } 319*0f3e72b5SJason Gunthorpe return NULL; 320*0f3e72b5SJason Gunthorpe } 321*0f3e72b5SJason Gunthorpe 322*0f3e72b5SJason Gunthorpe static struct vfio_group * 323*0f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group) 324*0f3e72b5SJason Gunthorpe { 325*0f3e72b5SJason Gunthorpe struct vfio_group *group; 326*0f3e72b5SJason Gunthorpe 327*0f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 328*0f3e72b5SJason Gunthorpe group = __vfio_group_get_from_iommu(iommu_group); 329*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 330*0f3e72b5SJason Gunthorpe return group; 331*0f3e72b5SJason Gunthorpe } 332*0f3e72b5SJason Gunthorpe 333*0f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev) 334*0f3e72b5SJason Gunthorpe { 335*0f3e72b5SJason Gunthorpe struct vfio_group *group = container_of(dev, struct vfio_group, dev); 336*0f3e72b5SJason Gunthorpe 337*0f3e72b5SJason Gunthorpe mutex_destroy(&group->device_lock); 338*0f3e72b5SJason Gunthorpe iommu_group_put(group->iommu_group); 339*0f3e72b5SJason Gunthorpe ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 340*0f3e72b5SJason Gunthorpe kfree(group); 341*0f3e72b5SJason Gunthorpe } 342*0f3e72b5SJason Gunthorpe 343*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 344*0f3e72b5SJason Gunthorpe enum vfio_group_type type) 345*0f3e72b5SJason Gunthorpe { 346*0f3e72b5SJason Gunthorpe struct vfio_group *group; 347*0f3e72b5SJason Gunthorpe int minor; 348*0f3e72b5SJason Gunthorpe 349*0f3e72b5SJason Gunthorpe group = kzalloc(sizeof(*group), GFP_KERNEL); 350*0f3e72b5SJason Gunthorpe if (!group) 351*0f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 352*0f3e72b5SJason Gunthorpe 353*0f3e72b5SJason Gunthorpe minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 354*0f3e72b5SJason Gunthorpe if (minor < 0) { 355*0f3e72b5SJason Gunthorpe kfree(group); 356*0f3e72b5SJason Gunthorpe return ERR_PTR(minor); 357*0f3e72b5SJason Gunthorpe } 358*0f3e72b5SJason Gunthorpe 359*0f3e72b5SJason Gunthorpe device_initialize(&group->dev); 360*0f3e72b5SJason Gunthorpe group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 361*0f3e72b5SJason Gunthorpe group->dev.class = vfio.class; 362*0f3e72b5SJason Gunthorpe group->dev.release = vfio_group_release; 363*0f3e72b5SJason Gunthorpe cdev_init(&group->cdev, &vfio_group_fops); 364*0f3e72b5SJason Gunthorpe group->cdev.owner = THIS_MODULE; 365*0f3e72b5SJason Gunthorpe 366*0f3e72b5SJason Gunthorpe refcount_set(&group->users, 1); 367*0f3e72b5SJason Gunthorpe init_rwsem(&group->group_rwsem); 368*0f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&group->device_list); 369*0f3e72b5SJason Gunthorpe mutex_init(&group->device_lock); 370*0f3e72b5SJason Gunthorpe group->iommu_group = iommu_group; 371*0f3e72b5SJason Gunthorpe /* put in vfio_group_release() */ 372*0f3e72b5SJason Gunthorpe iommu_group_ref_get(iommu_group); 373*0f3e72b5SJason Gunthorpe group->type = type; 374*0f3e72b5SJason Gunthorpe BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 375*0f3e72b5SJason Gunthorpe 376*0f3e72b5SJason Gunthorpe return group; 377*0f3e72b5SJason Gunthorpe } 378*0f3e72b5SJason Gunthorpe 379*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 380*0f3e72b5SJason Gunthorpe enum vfio_group_type type) 381*0f3e72b5SJason Gunthorpe { 382*0f3e72b5SJason Gunthorpe struct vfio_group *group; 383*0f3e72b5SJason Gunthorpe struct vfio_group *ret; 384*0f3e72b5SJason Gunthorpe int err; 385*0f3e72b5SJason Gunthorpe 386*0f3e72b5SJason Gunthorpe group = vfio_group_alloc(iommu_group, type); 387*0f3e72b5SJason Gunthorpe if (IS_ERR(group)) 388*0f3e72b5SJason Gunthorpe return group; 389*0f3e72b5SJason Gunthorpe 390*0f3e72b5SJason Gunthorpe err = dev_set_name(&group->dev, "%s%d", 391*0f3e72b5SJason Gunthorpe group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 392*0f3e72b5SJason Gunthorpe iommu_group_id(iommu_group)); 393*0f3e72b5SJason Gunthorpe if (err) { 394*0f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 395*0f3e72b5SJason Gunthorpe goto err_put; 396*0f3e72b5SJason Gunthorpe } 397*0f3e72b5SJason Gunthorpe 398*0f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 399*0f3e72b5SJason Gunthorpe 400*0f3e72b5SJason Gunthorpe /* Did we race creating this group? */ 401*0f3e72b5SJason Gunthorpe ret = __vfio_group_get_from_iommu(iommu_group); 402*0f3e72b5SJason Gunthorpe if (ret) 403*0f3e72b5SJason Gunthorpe goto err_unlock; 404*0f3e72b5SJason Gunthorpe 405*0f3e72b5SJason Gunthorpe err = cdev_device_add(&group->cdev, &group->dev); 406*0f3e72b5SJason Gunthorpe if (err) { 407*0f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 408*0f3e72b5SJason Gunthorpe goto err_unlock; 409*0f3e72b5SJason Gunthorpe } 410*0f3e72b5SJason Gunthorpe 411*0f3e72b5SJason Gunthorpe list_add(&group->vfio_next, &vfio.group_list); 412*0f3e72b5SJason Gunthorpe 413*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 414*0f3e72b5SJason Gunthorpe return group; 415*0f3e72b5SJason Gunthorpe 416*0f3e72b5SJason Gunthorpe err_unlock: 417*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 418*0f3e72b5SJason Gunthorpe err_put: 419*0f3e72b5SJason Gunthorpe put_device(&group->dev); 420*0f3e72b5SJason Gunthorpe return ret; 421*0f3e72b5SJason Gunthorpe } 422*0f3e72b5SJason Gunthorpe 423*0f3e72b5SJason Gunthorpe static void vfio_group_put(struct vfio_group *group) 424*0f3e72b5SJason Gunthorpe { 425*0f3e72b5SJason Gunthorpe if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) 426*0f3e72b5SJason Gunthorpe return; 427*0f3e72b5SJason Gunthorpe 428*0f3e72b5SJason Gunthorpe /* 429*0f3e72b5SJason Gunthorpe * These data structures all have paired operations that can only be 430*0f3e72b5SJason Gunthorpe * undone when the caller holds a live reference on the group. Since all 431*0f3e72b5SJason Gunthorpe * pairs must be undone these WARN_ON's indicate some caller did not 432*0f3e72b5SJason Gunthorpe * properly hold the group reference. 433*0f3e72b5SJason Gunthorpe */ 434*0f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&group->device_list)); 435*0f3e72b5SJason Gunthorpe WARN_ON(group->container || group->container_users); 436*0f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 437*0f3e72b5SJason Gunthorpe 438*0f3e72b5SJason Gunthorpe list_del(&group->vfio_next); 439*0f3e72b5SJason Gunthorpe cdev_device_del(&group->cdev, &group->dev); 440*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 441*0f3e72b5SJason Gunthorpe 442*0f3e72b5SJason Gunthorpe put_device(&group->dev); 443*0f3e72b5SJason Gunthorpe } 444*0f3e72b5SJason Gunthorpe 445*0f3e72b5SJason Gunthorpe static void vfio_group_get(struct vfio_group *group) 446*0f3e72b5SJason Gunthorpe { 447*0f3e72b5SJason Gunthorpe refcount_inc(&group->users); 448*0f3e72b5SJason Gunthorpe } 449*0f3e72b5SJason Gunthorpe 450*0f3e72b5SJason Gunthorpe /* 451*0f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 452*0f3e72b5SJason Gunthorpe */ 453*0f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 454*0f3e72b5SJason Gunthorpe static void vfio_device_put(struct vfio_device *device) 455*0f3e72b5SJason Gunthorpe { 456*0f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 457*0f3e72b5SJason Gunthorpe complete(&device->comp); 458*0f3e72b5SJason Gunthorpe } 459*0f3e72b5SJason Gunthorpe 460*0f3e72b5SJason Gunthorpe static bool vfio_device_try_get(struct vfio_device *device) 461*0f3e72b5SJason Gunthorpe { 462*0f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 463*0f3e72b5SJason Gunthorpe } 464*0f3e72b5SJason Gunthorpe 465*0f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 466*0f3e72b5SJason Gunthorpe struct device *dev) 467*0f3e72b5SJason Gunthorpe { 468*0f3e72b5SJason Gunthorpe struct vfio_device *device; 469*0f3e72b5SJason Gunthorpe 470*0f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 471*0f3e72b5SJason Gunthorpe list_for_each_entry(device, &group->device_list, group_next) { 472*0f3e72b5SJason Gunthorpe if (device->dev == dev && vfio_device_try_get(device)) { 473*0f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 474*0f3e72b5SJason Gunthorpe return device; 475*0f3e72b5SJason Gunthorpe } 476*0f3e72b5SJason Gunthorpe } 477*0f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 478*0f3e72b5SJason Gunthorpe return NULL; 479*0f3e72b5SJason Gunthorpe } 480*0f3e72b5SJason Gunthorpe 481*0f3e72b5SJason Gunthorpe /* 482*0f3e72b5SJason Gunthorpe * VFIO driver API 483*0f3e72b5SJason Gunthorpe */ 484*0f3e72b5SJason Gunthorpe void vfio_init_group_dev(struct vfio_device *device, struct device *dev, 485*0f3e72b5SJason Gunthorpe const struct vfio_device_ops *ops) 486*0f3e72b5SJason Gunthorpe { 487*0f3e72b5SJason Gunthorpe init_completion(&device->comp); 488*0f3e72b5SJason Gunthorpe device->dev = dev; 489*0f3e72b5SJason Gunthorpe device->ops = ops; 490*0f3e72b5SJason Gunthorpe } 491*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_init_group_dev); 492*0f3e72b5SJason Gunthorpe 493*0f3e72b5SJason Gunthorpe void vfio_uninit_group_dev(struct vfio_device *device) 494*0f3e72b5SJason Gunthorpe { 495*0f3e72b5SJason Gunthorpe vfio_release_device_set(device); 496*0f3e72b5SJason Gunthorpe } 497*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); 498*0f3e72b5SJason Gunthorpe 499*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 500*0f3e72b5SJason Gunthorpe enum vfio_group_type type) 501*0f3e72b5SJason Gunthorpe { 502*0f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 503*0f3e72b5SJason Gunthorpe struct vfio_group *group; 504*0f3e72b5SJason Gunthorpe int ret; 505*0f3e72b5SJason Gunthorpe 506*0f3e72b5SJason Gunthorpe iommu_group = iommu_group_alloc(); 507*0f3e72b5SJason Gunthorpe if (IS_ERR(iommu_group)) 508*0f3e72b5SJason Gunthorpe return ERR_CAST(iommu_group); 509*0f3e72b5SJason Gunthorpe 510*0f3e72b5SJason Gunthorpe ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 511*0f3e72b5SJason Gunthorpe if (ret) 512*0f3e72b5SJason Gunthorpe goto out_put_group; 513*0f3e72b5SJason Gunthorpe ret = iommu_group_add_device(iommu_group, dev); 514*0f3e72b5SJason Gunthorpe if (ret) 515*0f3e72b5SJason Gunthorpe goto out_put_group; 516*0f3e72b5SJason Gunthorpe 517*0f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, type); 518*0f3e72b5SJason Gunthorpe if (IS_ERR(group)) { 519*0f3e72b5SJason Gunthorpe ret = PTR_ERR(group); 520*0f3e72b5SJason Gunthorpe goto out_remove_device; 521*0f3e72b5SJason Gunthorpe } 522*0f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 523*0f3e72b5SJason Gunthorpe return group; 524*0f3e72b5SJason Gunthorpe 525*0f3e72b5SJason Gunthorpe out_remove_device: 526*0f3e72b5SJason Gunthorpe iommu_group_remove_device(dev); 527*0f3e72b5SJason Gunthorpe out_put_group: 528*0f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 529*0f3e72b5SJason Gunthorpe return ERR_PTR(ret); 530*0f3e72b5SJason Gunthorpe } 531*0f3e72b5SJason Gunthorpe 532*0f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 533*0f3e72b5SJason Gunthorpe { 534*0f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 535*0f3e72b5SJason Gunthorpe struct vfio_group *group; 536*0f3e72b5SJason Gunthorpe 537*0f3e72b5SJason Gunthorpe iommu_group = iommu_group_get(dev); 538*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 539*0f3e72b5SJason Gunthorpe if (!iommu_group && noiommu) { 540*0f3e72b5SJason Gunthorpe /* 541*0f3e72b5SJason Gunthorpe * With noiommu enabled, create an IOMMU group for devices that 542*0f3e72b5SJason Gunthorpe * don't already have one, implying no IOMMU hardware/driver 543*0f3e72b5SJason Gunthorpe * exists. Taint the kernel because we're about to give a DMA 544*0f3e72b5SJason Gunthorpe * capable device to a user without IOMMU protection. 545*0f3e72b5SJason Gunthorpe */ 546*0f3e72b5SJason Gunthorpe group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 547*0f3e72b5SJason Gunthorpe if (!IS_ERR(group)) { 548*0f3e72b5SJason Gunthorpe add_taint(TAINT_USER, LOCKDEP_STILL_OK); 549*0f3e72b5SJason Gunthorpe dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 550*0f3e72b5SJason Gunthorpe } 551*0f3e72b5SJason Gunthorpe return group; 552*0f3e72b5SJason Gunthorpe } 553*0f3e72b5SJason Gunthorpe #endif 554*0f3e72b5SJason Gunthorpe if (!iommu_group) 555*0f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 556*0f3e72b5SJason Gunthorpe 557*0f3e72b5SJason Gunthorpe /* 558*0f3e72b5SJason Gunthorpe * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 559*0f3e72b5SJason Gunthorpe * restore cache coherency. It has to be checked here because it is only 560*0f3e72b5SJason Gunthorpe * valid for cases where we are using iommu groups. 561*0f3e72b5SJason Gunthorpe */ 562*0f3e72b5SJason Gunthorpe if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 563*0f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 564*0f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 565*0f3e72b5SJason Gunthorpe } 566*0f3e72b5SJason Gunthorpe 567*0f3e72b5SJason Gunthorpe group = vfio_group_get_from_iommu(iommu_group); 568*0f3e72b5SJason Gunthorpe if (!group) 569*0f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, VFIO_IOMMU); 570*0f3e72b5SJason Gunthorpe 571*0f3e72b5SJason Gunthorpe /* The vfio_group holds a reference to the iommu_group */ 572*0f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 573*0f3e72b5SJason Gunthorpe return group; 574*0f3e72b5SJason Gunthorpe } 575*0f3e72b5SJason Gunthorpe 576*0f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device, 577*0f3e72b5SJason Gunthorpe struct vfio_group *group) 578*0f3e72b5SJason Gunthorpe { 579*0f3e72b5SJason Gunthorpe struct vfio_device *existing_device; 580*0f3e72b5SJason Gunthorpe 581*0f3e72b5SJason Gunthorpe if (IS_ERR(group)) 582*0f3e72b5SJason Gunthorpe return PTR_ERR(group); 583*0f3e72b5SJason Gunthorpe 584*0f3e72b5SJason Gunthorpe /* 585*0f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 586*0f3e72b5SJason Gunthorpe * singleton set just for itself. 587*0f3e72b5SJason Gunthorpe */ 588*0f3e72b5SJason Gunthorpe if (!device->dev_set) 589*0f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 590*0f3e72b5SJason Gunthorpe 591*0f3e72b5SJason Gunthorpe existing_device = vfio_group_get_device(group, device->dev); 592*0f3e72b5SJason Gunthorpe if (existing_device) { 593*0f3e72b5SJason Gunthorpe dev_WARN(device->dev, "Device already exists on group %d\n", 594*0f3e72b5SJason Gunthorpe iommu_group_id(group->iommu_group)); 595*0f3e72b5SJason Gunthorpe vfio_device_put(existing_device); 596*0f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || 597*0f3e72b5SJason Gunthorpe group->type == VFIO_EMULATED_IOMMU) 598*0f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 599*0f3e72b5SJason Gunthorpe vfio_group_put(group); 600*0f3e72b5SJason Gunthorpe return -EBUSY; 601*0f3e72b5SJason Gunthorpe } 602*0f3e72b5SJason Gunthorpe 603*0f3e72b5SJason Gunthorpe /* Our reference on group is moved to the device */ 604*0f3e72b5SJason Gunthorpe device->group = group; 605*0f3e72b5SJason Gunthorpe 606*0f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 607*0f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 608*0f3e72b5SJason Gunthorpe 609*0f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 610*0f3e72b5SJason Gunthorpe list_add(&device->group_next, &group->device_list); 611*0f3e72b5SJason Gunthorpe group->dev_counter++; 612*0f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 613*0f3e72b5SJason Gunthorpe 614*0f3e72b5SJason Gunthorpe return 0; 615*0f3e72b5SJason Gunthorpe } 616*0f3e72b5SJason Gunthorpe 617*0f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 618*0f3e72b5SJason Gunthorpe { 619*0f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 620*0f3e72b5SJason Gunthorpe vfio_group_find_or_alloc(device->dev)); 621*0f3e72b5SJason Gunthorpe } 622*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 623*0f3e72b5SJason Gunthorpe 624*0f3e72b5SJason Gunthorpe /* 625*0f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 626*0f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 627*0f3e72b5SJason Gunthorpe */ 628*0f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 629*0f3e72b5SJason Gunthorpe { 630*0f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 631*0f3e72b5SJason Gunthorpe vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 632*0f3e72b5SJason Gunthorpe } 633*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 634*0f3e72b5SJason Gunthorpe 635*0f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 636*0f3e72b5SJason Gunthorpe char *buf) 637*0f3e72b5SJason Gunthorpe { 638*0f3e72b5SJason Gunthorpe struct vfio_device *it, *device = ERR_PTR(-ENODEV); 639*0f3e72b5SJason Gunthorpe 640*0f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 641*0f3e72b5SJason Gunthorpe list_for_each_entry(it, &group->device_list, group_next) { 642*0f3e72b5SJason Gunthorpe int ret; 643*0f3e72b5SJason Gunthorpe 644*0f3e72b5SJason Gunthorpe if (it->ops->match) { 645*0f3e72b5SJason Gunthorpe ret = it->ops->match(it, buf); 646*0f3e72b5SJason Gunthorpe if (ret < 0) { 647*0f3e72b5SJason Gunthorpe device = ERR_PTR(ret); 648*0f3e72b5SJason Gunthorpe break; 649*0f3e72b5SJason Gunthorpe } 650*0f3e72b5SJason Gunthorpe } else { 651*0f3e72b5SJason Gunthorpe ret = !strcmp(dev_name(it->dev), buf); 652*0f3e72b5SJason Gunthorpe } 653*0f3e72b5SJason Gunthorpe 654*0f3e72b5SJason Gunthorpe if (ret && vfio_device_try_get(it)) { 655*0f3e72b5SJason Gunthorpe device = it; 656*0f3e72b5SJason Gunthorpe break; 657*0f3e72b5SJason Gunthorpe } 658*0f3e72b5SJason Gunthorpe } 659*0f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 660*0f3e72b5SJason Gunthorpe 661*0f3e72b5SJason Gunthorpe return device; 662*0f3e72b5SJason Gunthorpe } 663*0f3e72b5SJason Gunthorpe 664*0f3e72b5SJason Gunthorpe /* 665*0f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 666*0f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 667*0f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 668*0f3e72b5SJason Gunthorpe { 669*0f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 670*0f3e72b5SJason Gunthorpe unsigned int i = 0; 671*0f3e72b5SJason Gunthorpe bool interrupted = false; 672*0f3e72b5SJason Gunthorpe long rc; 673*0f3e72b5SJason Gunthorpe 674*0f3e72b5SJason Gunthorpe vfio_device_put(device); 675*0f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 676*0f3e72b5SJason Gunthorpe while (rc <= 0) { 677*0f3e72b5SJason Gunthorpe if (device->ops->request) 678*0f3e72b5SJason Gunthorpe device->ops->request(device, i++); 679*0f3e72b5SJason Gunthorpe 680*0f3e72b5SJason Gunthorpe if (interrupted) { 681*0f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 682*0f3e72b5SJason Gunthorpe HZ * 10); 683*0f3e72b5SJason Gunthorpe } else { 684*0f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 685*0f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 686*0f3e72b5SJason Gunthorpe if (rc < 0) { 687*0f3e72b5SJason Gunthorpe interrupted = true; 688*0f3e72b5SJason Gunthorpe dev_warn(device->dev, 689*0f3e72b5SJason Gunthorpe "Device is currently in use, task" 690*0f3e72b5SJason Gunthorpe " \"%s\" (%d) " 691*0f3e72b5SJason Gunthorpe "blocked until device is released", 692*0f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 693*0f3e72b5SJason Gunthorpe } 694*0f3e72b5SJason Gunthorpe } 695*0f3e72b5SJason Gunthorpe } 696*0f3e72b5SJason Gunthorpe 697*0f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 698*0f3e72b5SJason Gunthorpe list_del(&device->group_next); 699*0f3e72b5SJason Gunthorpe group->dev_counter--; 700*0f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 701*0f3e72b5SJason Gunthorpe 702*0f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 703*0f3e72b5SJason Gunthorpe iommu_group_remove_device(device->dev); 704*0f3e72b5SJason Gunthorpe 705*0f3e72b5SJason Gunthorpe /* Matches the get in vfio_register_group_dev() */ 706*0f3e72b5SJason Gunthorpe vfio_group_put(group); 707*0f3e72b5SJason Gunthorpe } 708*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 709*0f3e72b5SJason Gunthorpe 710*0f3e72b5SJason Gunthorpe /* 711*0f3e72b5SJason Gunthorpe * VFIO base fd, /dev/vfio/vfio 712*0f3e72b5SJason Gunthorpe */ 713*0f3e72b5SJason Gunthorpe static long vfio_ioctl_check_extension(struct vfio_container *container, 714*0f3e72b5SJason Gunthorpe unsigned long arg) 715*0f3e72b5SJason Gunthorpe { 716*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 717*0f3e72b5SJason Gunthorpe long ret = 0; 718*0f3e72b5SJason Gunthorpe 719*0f3e72b5SJason Gunthorpe down_read(&container->group_lock); 720*0f3e72b5SJason Gunthorpe 721*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 722*0f3e72b5SJason Gunthorpe 723*0f3e72b5SJason Gunthorpe switch (arg) { 724*0f3e72b5SJason Gunthorpe /* No base extensions yet */ 725*0f3e72b5SJason Gunthorpe default: 726*0f3e72b5SJason Gunthorpe /* 727*0f3e72b5SJason Gunthorpe * If no driver is set, poll all registered drivers for 728*0f3e72b5SJason Gunthorpe * extensions and return the first positive result. If 729*0f3e72b5SJason Gunthorpe * a driver is already set, further queries will be passed 730*0f3e72b5SJason Gunthorpe * only to that driver. 731*0f3e72b5SJason Gunthorpe */ 732*0f3e72b5SJason Gunthorpe if (!driver) { 733*0f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 734*0f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, 735*0f3e72b5SJason Gunthorpe vfio_next) { 736*0f3e72b5SJason Gunthorpe 737*0f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 738*0f3e72b5SJason Gunthorpe !vfio_iommu_driver_allowed(container, 739*0f3e72b5SJason Gunthorpe driver)) 740*0f3e72b5SJason Gunthorpe continue; 741*0f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 742*0f3e72b5SJason Gunthorpe continue; 743*0f3e72b5SJason Gunthorpe 744*0f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(NULL, 745*0f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, 746*0f3e72b5SJason Gunthorpe arg); 747*0f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 748*0f3e72b5SJason Gunthorpe if (ret > 0) 749*0f3e72b5SJason Gunthorpe break; 750*0f3e72b5SJason Gunthorpe } 751*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 752*0f3e72b5SJason Gunthorpe } else 753*0f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(container->iommu_data, 754*0f3e72b5SJason Gunthorpe VFIO_CHECK_EXTENSION, arg); 755*0f3e72b5SJason Gunthorpe } 756*0f3e72b5SJason Gunthorpe 757*0f3e72b5SJason Gunthorpe up_read(&container->group_lock); 758*0f3e72b5SJason Gunthorpe 759*0f3e72b5SJason Gunthorpe return ret; 760*0f3e72b5SJason Gunthorpe } 761*0f3e72b5SJason Gunthorpe 762*0f3e72b5SJason Gunthorpe /* hold write lock on container->group_lock */ 763*0f3e72b5SJason Gunthorpe static int __vfio_container_attach_groups(struct vfio_container *container, 764*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver, 765*0f3e72b5SJason Gunthorpe void *data) 766*0f3e72b5SJason Gunthorpe { 767*0f3e72b5SJason Gunthorpe struct vfio_group *group; 768*0f3e72b5SJason Gunthorpe int ret = -ENODEV; 769*0f3e72b5SJason Gunthorpe 770*0f3e72b5SJason Gunthorpe list_for_each_entry(group, &container->group_list, container_next) { 771*0f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(data, group->iommu_group, 772*0f3e72b5SJason Gunthorpe group->type); 773*0f3e72b5SJason Gunthorpe if (ret) 774*0f3e72b5SJason Gunthorpe goto unwind; 775*0f3e72b5SJason Gunthorpe } 776*0f3e72b5SJason Gunthorpe 777*0f3e72b5SJason Gunthorpe return ret; 778*0f3e72b5SJason Gunthorpe 779*0f3e72b5SJason Gunthorpe unwind: 780*0f3e72b5SJason Gunthorpe list_for_each_entry_continue_reverse(group, &container->group_list, 781*0f3e72b5SJason Gunthorpe container_next) { 782*0f3e72b5SJason Gunthorpe driver->ops->detach_group(data, group->iommu_group); 783*0f3e72b5SJason Gunthorpe } 784*0f3e72b5SJason Gunthorpe 785*0f3e72b5SJason Gunthorpe return ret; 786*0f3e72b5SJason Gunthorpe } 787*0f3e72b5SJason Gunthorpe 788*0f3e72b5SJason Gunthorpe static long vfio_ioctl_set_iommu(struct vfio_container *container, 789*0f3e72b5SJason Gunthorpe unsigned long arg) 790*0f3e72b5SJason Gunthorpe { 791*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 792*0f3e72b5SJason Gunthorpe long ret = -ENODEV; 793*0f3e72b5SJason Gunthorpe 794*0f3e72b5SJason Gunthorpe down_write(&container->group_lock); 795*0f3e72b5SJason Gunthorpe 796*0f3e72b5SJason Gunthorpe /* 797*0f3e72b5SJason Gunthorpe * The container is designed to be an unprivileged interface while 798*0f3e72b5SJason Gunthorpe * the group can be assigned to specific users. Therefore, only by 799*0f3e72b5SJason Gunthorpe * adding a group to a container does the user get the privilege of 800*0f3e72b5SJason Gunthorpe * enabling the iommu, which may allocate finite resources. There 801*0f3e72b5SJason Gunthorpe * is no unset_iommu, but by removing all the groups from a container, 802*0f3e72b5SJason Gunthorpe * the container is deprivileged and returns to an unset state. 803*0f3e72b5SJason Gunthorpe */ 804*0f3e72b5SJason Gunthorpe if (list_empty(&container->group_list) || container->iommu_driver) { 805*0f3e72b5SJason Gunthorpe up_write(&container->group_lock); 806*0f3e72b5SJason Gunthorpe return -EINVAL; 807*0f3e72b5SJason Gunthorpe } 808*0f3e72b5SJason Gunthorpe 809*0f3e72b5SJason Gunthorpe mutex_lock(&vfio.iommu_drivers_lock); 810*0f3e72b5SJason Gunthorpe list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 811*0f3e72b5SJason Gunthorpe void *data; 812*0f3e72b5SJason Gunthorpe 813*0f3e72b5SJason Gunthorpe if (!vfio_iommu_driver_allowed(container, driver)) 814*0f3e72b5SJason Gunthorpe continue; 815*0f3e72b5SJason Gunthorpe if (!try_module_get(driver->ops->owner)) 816*0f3e72b5SJason Gunthorpe continue; 817*0f3e72b5SJason Gunthorpe 818*0f3e72b5SJason Gunthorpe /* 819*0f3e72b5SJason Gunthorpe * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 820*0f3e72b5SJason Gunthorpe * so test which iommu driver reported support for this 821*0f3e72b5SJason Gunthorpe * extension and call open on them. We also pass them the 822*0f3e72b5SJason Gunthorpe * magic, allowing a single driver to support multiple 823*0f3e72b5SJason Gunthorpe * interfaces if they'd like. 824*0f3e72b5SJason Gunthorpe */ 825*0f3e72b5SJason Gunthorpe if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 826*0f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 827*0f3e72b5SJason Gunthorpe continue; 828*0f3e72b5SJason Gunthorpe } 829*0f3e72b5SJason Gunthorpe 830*0f3e72b5SJason Gunthorpe data = driver->ops->open(arg); 831*0f3e72b5SJason Gunthorpe if (IS_ERR(data)) { 832*0f3e72b5SJason Gunthorpe ret = PTR_ERR(data); 833*0f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 834*0f3e72b5SJason Gunthorpe continue; 835*0f3e72b5SJason Gunthorpe } 836*0f3e72b5SJason Gunthorpe 837*0f3e72b5SJason Gunthorpe ret = __vfio_container_attach_groups(container, driver, data); 838*0f3e72b5SJason Gunthorpe if (ret) { 839*0f3e72b5SJason Gunthorpe driver->ops->release(data); 840*0f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 841*0f3e72b5SJason Gunthorpe continue; 842*0f3e72b5SJason Gunthorpe } 843*0f3e72b5SJason Gunthorpe 844*0f3e72b5SJason Gunthorpe container->iommu_driver = driver; 845*0f3e72b5SJason Gunthorpe container->iommu_data = data; 846*0f3e72b5SJason Gunthorpe break; 847*0f3e72b5SJason Gunthorpe } 848*0f3e72b5SJason Gunthorpe 849*0f3e72b5SJason Gunthorpe mutex_unlock(&vfio.iommu_drivers_lock); 850*0f3e72b5SJason Gunthorpe up_write(&container->group_lock); 851*0f3e72b5SJason Gunthorpe 852*0f3e72b5SJason Gunthorpe return ret; 853*0f3e72b5SJason Gunthorpe } 854*0f3e72b5SJason Gunthorpe 855*0f3e72b5SJason Gunthorpe static long vfio_fops_unl_ioctl(struct file *filep, 856*0f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 857*0f3e72b5SJason Gunthorpe { 858*0f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 859*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 860*0f3e72b5SJason Gunthorpe void *data; 861*0f3e72b5SJason Gunthorpe long ret = -EINVAL; 862*0f3e72b5SJason Gunthorpe 863*0f3e72b5SJason Gunthorpe if (!container) 864*0f3e72b5SJason Gunthorpe return ret; 865*0f3e72b5SJason Gunthorpe 866*0f3e72b5SJason Gunthorpe switch (cmd) { 867*0f3e72b5SJason Gunthorpe case VFIO_GET_API_VERSION: 868*0f3e72b5SJason Gunthorpe ret = VFIO_API_VERSION; 869*0f3e72b5SJason Gunthorpe break; 870*0f3e72b5SJason Gunthorpe case VFIO_CHECK_EXTENSION: 871*0f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(container, arg); 872*0f3e72b5SJason Gunthorpe break; 873*0f3e72b5SJason Gunthorpe case VFIO_SET_IOMMU: 874*0f3e72b5SJason Gunthorpe ret = vfio_ioctl_set_iommu(container, arg); 875*0f3e72b5SJason Gunthorpe break; 876*0f3e72b5SJason Gunthorpe default: 877*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 878*0f3e72b5SJason Gunthorpe data = container->iommu_data; 879*0f3e72b5SJason Gunthorpe 880*0f3e72b5SJason Gunthorpe if (driver) /* passthrough all unrecognized ioctls */ 881*0f3e72b5SJason Gunthorpe ret = driver->ops->ioctl(data, cmd, arg); 882*0f3e72b5SJason Gunthorpe } 883*0f3e72b5SJason Gunthorpe 884*0f3e72b5SJason Gunthorpe return ret; 885*0f3e72b5SJason Gunthorpe } 886*0f3e72b5SJason Gunthorpe 887*0f3e72b5SJason Gunthorpe static int vfio_fops_open(struct inode *inode, struct file *filep) 888*0f3e72b5SJason Gunthorpe { 889*0f3e72b5SJason Gunthorpe struct vfio_container *container; 890*0f3e72b5SJason Gunthorpe 891*0f3e72b5SJason Gunthorpe container = kzalloc(sizeof(*container), GFP_KERNEL); 892*0f3e72b5SJason Gunthorpe if (!container) 893*0f3e72b5SJason Gunthorpe return -ENOMEM; 894*0f3e72b5SJason Gunthorpe 895*0f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&container->group_list); 896*0f3e72b5SJason Gunthorpe init_rwsem(&container->group_lock); 897*0f3e72b5SJason Gunthorpe kref_init(&container->kref); 898*0f3e72b5SJason Gunthorpe 899*0f3e72b5SJason Gunthorpe filep->private_data = container; 900*0f3e72b5SJason Gunthorpe 901*0f3e72b5SJason Gunthorpe return 0; 902*0f3e72b5SJason Gunthorpe } 903*0f3e72b5SJason Gunthorpe 904*0f3e72b5SJason Gunthorpe static int vfio_fops_release(struct inode *inode, struct file *filep) 905*0f3e72b5SJason Gunthorpe { 906*0f3e72b5SJason Gunthorpe struct vfio_container *container = filep->private_data; 907*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver = container->iommu_driver; 908*0f3e72b5SJason Gunthorpe 909*0f3e72b5SJason Gunthorpe if (driver && driver->ops->notify) 910*0f3e72b5SJason Gunthorpe driver->ops->notify(container->iommu_data, 911*0f3e72b5SJason Gunthorpe VFIO_IOMMU_CONTAINER_CLOSE); 912*0f3e72b5SJason Gunthorpe 913*0f3e72b5SJason Gunthorpe filep->private_data = NULL; 914*0f3e72b5SJason Gunthorpe 915*0f3e72b5SJason Gunthorpe vfio_container_put(container); 916*0f3e72b5SJason Gunthorpe 917*0f3e72b5SJason Gunthorpe return 0; 918*0f3e72b5SJason Gunthorpe } 919*0f3e72b5SJason Gunthorpe 920*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_fops = { 921*0f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 922*0f3e72b5SJason Gunthorpe .open = vfio_fops_open, 923*0f3e72b5SJason Gunthorpe .release = vfio_fops_release, 924*0f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_fops_unl_ioctl, 925*0f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 926*0f3e72b5SJason Gunthorpe }; 927*0f3e72b5SJason Gunthorpe 928*0f3e72b5SJason Gunthorpe /* 929*0f3e72b5SJason Gunthorpe * VFIO Group fd, /dev/vfio/$GROUP 930*0f3e72b5SJason Gunthorpe */ 931*0f3e72b5SJason Gunthorpe static void __vfio_group_unset_container(struct vfio_group *group) 932*0f3e72b5SJason Gunthorpe { 933*0f3e72b5SJason Gunthorpe struct vfio_container *container = group->container; 934*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 935*0f3e72b5SJason Gunthorpe 936*0f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 937*0f3e72b5SJason Gunthorpe 938*0f3e72b5SJason Gunthorpe down_write(&container->group_lock); 939*0f3e72b5SJason Gunthorpe 940*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 941*0f3e72b5SJason Gunthorpe if (driver) 942*0f3e72b5SJason Gunthorpe driver->ops->detach_group(container->iommu_data, 943*0f3e72b5SJason Gunthorpe group->iommu_group); 944*0f3e72b5SJason Gunthorpe 945*0f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 946*0f3e72b5SJason Gunthorpe iommu_group_release_dma_owner(group->iommu_group); 947*0f3e72b5SJason Gunthorpe 948*0f3e72b5SJason Gunthorpe group->container = NULL; 949*0f3e72b5SJason Gunthorpe group->container_users = 0; 950*0f3e72b5SJason Gunthorpe list_del(&group->container_next); 951*0f3e72b5SJason Gunthorpe 952*0f3e72b5SJason Gunthorpe /* Detaching the last group deprivileges a container, remove iommu */ 953*0f3e72b5SJason Gunthorpe if (driver && list_empty(&container->group_list)) { 954*0f3e72b5SJason Gunthorpe driver->ops->release(container->iommu_data); 955*0f3e72b5SJason Gunthorpe module_put(driver->ops->owner); 956*0f3e72b5SJason Gunthorpe container->iommu_driver = NULL; 957*0f3e72b5SJason Gunthorpe container->iommu_data = NULL; 958*0f3e72b5SJason Gunthorpe } 959*0f3e72b5SJason Gunthorpe 960*0f3e72b5SJason Gunthorpe up_write(&container->group_lock); 961*0f3e72b5SJason Gunthorpe 962*0f3e72b5SJason Gunthorpe vfio_container_put(container); 963*0f3e72b5SJason Gunthorpe } 964*0f3e72b5SJason Gunthorpe 965*0f3e72b5SJason Gunthorpe /* 966*0f3e72b5SJason Gunthorpe * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 967*0f3e72b5SJason Gunthorpe * if there was no container to unset. Since the ioctl is called on 968*0f3e72b5SJason Gunthorpe * the group, we know that still exists, therefore the only valid 969*0f3e72b5SJason Gunthorpe * transition here is 1->0. 970*0f3e72b5SJason Gunthorpe */ 971*0f3e72b5SJason Gunthorpe static int vfio_group_unset_container(struct vfio_group *group) 972*0f3e72b5SJason Gunthorpe { 973*0f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 974*0f3e72b5SJason Gunthorpe 975*0f3e72b5SJason Gunthorpe if (!group->container) 976*0f3e72b5SJason Gunthorpe return -EINVAL; 977*0f3e72b5SJason Gunthorpe if (group->container_users != 1) 978*0f3e72b5SJason Gunthorpe return -EBUSY; 979*0f3e72b5SJason Gunthorpe __vfio_group_unset_container(group); 980*0f3e72b5SJason Gunthorpe return 0; 981*0f3e72b5SJason Gunthorpe } 982*0f3e72b5SJason Gunthorpe 983*0f3e72b5SJason Gunthorpe static int vfio_group_set_container(struct vfio_group *group, int container_fd) 984*0f3e72b5SJason Gunthorpe { 985*0f3e72b5SJason Gunthorpe struct fd f; 986*0f3e72b5SJason Gunthorpe struct vfio_container *container; 987*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 988*0f3e72b5SJason Gunthorpe int ret = 0; 989*0f3e72b5SJason Gunthorpe 990*0f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 991*0f3e72b5SJason Gunthorpe 992*0f3e72b5SJason Gunthorpe if (group->container || WARN_ON(group->container_users)) 993*0f3e72b5SJason Gunthorpe return -EINVAL; 994*0f3e72b5SJason Gunthorpe 995*0f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 996*0f3e72b5SJason Gunthorpe return -EPERM; 997*0f3e72b5SJason Gunthorpe 998*0f3e72b5SJason Gunthorpe f = fdget(container_fd); 999*0f3e72b5SJason Gunthorpe if (!f.file) 1000*0f3e72b5SJason Gunthorpe return -EBADF; 1001*0f3e72b5SJason Gunthorpe 1002*0f3e72b5SJason Gunthorpe /* Sanity check, is this really our fd? */ 1003*0f3e72b5SJason Gunthorpe if (f.file->f_op != &vfio_fops) { 1004*0f3e72b5SJason Gunthorpe fdput(f); 1005*0f3e72b5SJason Gunthorpe return -EINVAL; 1006*0f3e72b5SJason Gunthorpe } 1007*0f3e72b5SJason Gunthorpe 1008*0f3e72b5SJason Gunthorpe container = f.file->private_data; 1009*0f3e72b5SJason Gunthorpe WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1010*0f3e72b5SJason Gunthorpe 1011*0f3e72b5SJason Gunthorpe down_write(&container->group_lock); 1012*0f3e72b5SJason Gunthorpe 1013*0f3e72b5SJason Gunthorpe /* Real groups and fake groups cannot mix */ 1014*0f3e72b5SJason Gunthorpe if (!list_empty(&container->group_list) && 1015*0f3e72b5SJason Gunthorpe container->noiommu != (group->type == VFIO_NO_IOMMU)) { 1016*0f3e72b5SJason Gunthorpe ret = -EPERM; 1017*0f3e72b5SJason Gunthorpe goto unlock_out; 1018*0f3e72b5SJason Gunthorpe } 1019*0f3e72b5SJason Gunthorpe 1020*0f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) { 1021*0f3e72b5SJason Gunthorpe ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); 1022*0f3e72b5SJason Gunthorpe if (ret) 1023*0f3e72b5SJason Gunthorpe goto unlock_out; 1024*0f3e72b5SJason Gunthorpe } 1025*0f3e72b5SJason Gunthorpe 1026*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 1027*0f3e72b5SJason Gunthorpe if (driver) { 1028*0f3e72b5SJason Gunthorpe ret = driver->ops->attach_group(container->iommu_data, 1029*0f3e72b5SJason Gunthorpe group->iommu_group, 1030*0f3e72b5SJason Gunthorpe group->type); 1031*0f3e72b5SJason Gunthorpe if (ret) { 1032*0f3e72b5SJason Gunthorpe if (group->type == VFIO_IOMMU) 1033*0f3e72b5SJason Gunthorpe iommu_group_release_dma_owner( 1034*0f3e72b5SJason Gunthorpe group->iommu_group); 1035*0f3e72b5SJason Gunthorpe goto unlock_out; 1036*0f3e72b5SJason Gunthorpe } 1037*0f3e72b5SJason Gunthorpe } 1038*0f3e72b5SJason Gunthorpe 1039*0f3e72b5SJason Gunthorpe group->container = container; 1040*0f3e72b5SJason Gunthorpe group->container_users = 1; 1041*0f3e72b5SJason Gunthorpe container->noiommu = (group->type == VFIO_NO_IOMMU); 1042*0f3e72b5SJason Gunthorpe list_add(&group->container_next, &container->group_list); 1043*0f3e72b5SJason Gunthorpe 1044*0f3e72b5SJason Gunthorpe /* Get a reference on the container and mark a user within the group */ 1045*0f3e72b5SJason Gunthorpe vfio_container_get(container); 1046*0f3e72b5SJason Gunthorpe 1047*0f3e72b5SJason Gunthorpe unlock_out: 1048*0f3e72b5SJason Gunthorpe up_write(&container->group_lock); 1049*0f3e72b5SJason Gunthorpe fdput(f); 1050*0f3e72b5SJason Gunthorpe return ret; 1051*0f3e72b5SJason Gunthorpe } 1052*0f3e72b5SJason Gunthorpe 1053*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops; 1054*0f3e72b5SJason Gunthorpe 1055*0f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 1056*0f3e72b5SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device) 1057*0f3e72b5SJason Gunthorpe { 1058*0f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 1059*0f3e72b5SJason Gunthorpe } 1060*0f3e72b5SJason Gunthorpe 1061*0f3e72b5SJason Gunthorpe static int vfio_device_assign_container(struct vfio_device *device) 1062*0f3e72b5SJason Gunthorpe { 1063*0f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 1064*0f3e72b5SJason Gunthorpe 1065*0f3e72b5SJason Gunthorpe lockdep_assert_held_write(&group->group_rwsem); 1066*0f3e72b5SJason Gunthorpe 1067*0f3e72b5SJason Gunthorpe if (!group->container || !group->container->iommu_driver || 1068*0f3e72b5SJason Gunthorpe WARN_ON(!group->container_users)) 1069*0f3e72b5SJason Gunthorpe return -EINVAL; 1070*0f3e72b5SJason Gunthorpe 1071*0f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 1072*0f3e72b5SJason Gunthorpe return -EPERM; 1073*0f3e72b5SJason Gunthorpe 1074*0f3e72b5SJason Gunthorpe get_file(group->opened_file); 1075*0f3e72b5SJason Gunthorpe group->container_users++; 1076*0f3e72b5SJason Gunthorpe return 0; 1077*0f3e72b5SJason Gunthorpe } 1078*0f3e72b5SJason Gunthorpe 1079*0f3e72b5SJason Gunthorpe static void vfio_device_unassign_container(struct vfio_device *device) 1080*0f3e72b5SJason Gunthorpe { 1081*0f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 1082*0f3e72b5SJason Gunthorpe WARN_ON(device->group->container_users <= 1); 1083*0f3e72b5SJason Gunthorpe device->group->container_users--; 1084*0f3e72b5SJason Gunthorpe fput(device->group->opened_file); 1085*0f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 1086*0f3e72b5SJason Gunthorpe } 1087*0f3e72b5SJason Gunthorpe 1088*0f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device) 1089*0f3e72b5SJason Gunthorpe { 1090*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 1091*0f3e72b5SJason Gunthorpe struct file *filep; 1092*0f3e72b5SJason Gunthorpe int ret; 1093*0f3e72b5SJason Gunthorpe 1094*0f3e72b5SJason Gunthorpe down_write(&device->group->group_rwsem); 1095*0f3e72b5SJason Gunthorpe ret = vfio_device_assign_container(device); 1096*0f3e72b5SJason Gunthorpe up_write(&device->group->group_rwsem); 1097*0f3e72b5SJason Gunthorpe if (ret) 1098*0f3e72b5SJason Gunthorpe return ERR_PTR(ret); 1099*0f3e72b5SJason Gunthorpe 1100*0f3e72b5SJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) { 1101*0f3e72b5SJason Gunthorpe ret = -ENODEV; 1102*0f3e72b5SJason Gunthorpe goto err_unassign_container; 1103*0f3e72b5SJason Gunthorpe } 1104*0f3e72b5SJason Gunthorpe 1105*0f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 1106*0f3e72b5SJason Gunthorpe device->open_count++; 1107*0f3e72b5SJason Gunthorpe if (device->open_count == 1) { 1108*0f3e72b5SJason Gunthorpe /* 1109*0f3e72b5SJason Gunthorpe * Here we pass the KVM pointer with the group under the read 1110*0f3e72b5SJason Gunthorpe * lock. If the device driver will use it, it must obtain a 1111*0f3e72b5SJason Gunthorpe * reference and release it during close_device. 1112*0f3e72b5SJason Gunthorpe */ 1113*0f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 1114*0f3e72b5SJason Gunthorpe device->kvm = device->group->kvm; 1115*0f3e72b5SJason Gunthorpe 1116*0f3e72b5SJason Gunthorpe if (device->ops->open_device) { 1117*0f3e72b5SJason Gunthorpe ret = device->ops->open_device(device); 1118*0f3e72b5SJason Gunthorpe if (ret) 1119*0f3e72b5SJason Gunthorpe goto err_undo_count; 1120*0f3e72b5SJason Gunthorpe } 1121*0f3e72b5SJason Gunthorpe 1122*0f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 1123*0f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->register_device) 1124*0f3e72b5SJason Gunthorpe iommu_driver->ops->register_device( 1125*0f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 1126*0f3e72b5SJason Gunthorpe 1127*0f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 1128*0f3e72b5SJason Gunthorpe } 1129*0f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 1130*0f3e72b5SJason Gunthorpe 1131*0f3e72b5SJason Gunthorpe /* 1132*0f3e72b5SJason Gunthorpe * We can't use anon_inode_getfd() because we need to modify 1133*0f3e72b5SJason Gunthorpe * the f_mode flags directly to allow more than just ioctls 1134*0f3e72b5SJason Gunthorpe */ 1135*0f3e72b5SJason Gunthorpe filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1136*0f3e72b5SJason Gunthorpe device, O_RDWR); 1137*0f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 1138*0f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 1139*0f3e72b5SJason Gunthorpe goto err_close_device; 1140*0f3e72b5SJason Gunthorpe } 1141*0f3e72b5SJason Gunthorpe 1142*0f3e72b5SJason Gunthorpe /* 1143*0f3e72b5SJason Gunthorpe * TODO: add an anon_inode interface to do this. 1144*0f3e72b5SJason Gunthorpe * Appears to be missing by lack of need rather than 1145*0f3e72b5SJason Gunthorpe * explicitly prevented. Now there's need. 1146*0f3e72b5SJason Gunthorpe */ 1147*0f3e72b5SJason Gunthorpe filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 1148*0f3e72b5SJason Gunthorpe 1149*0f3e72b5SJason Gunthorpe if (device->group->type == VFIO_NO_IOMMU) 1150*0f3e72b5SJason Gunthorpe dev_warn(device->dev, "vfio-noiommu device opened by user " 1151*0f3e72b5SJason Gunthorpe "(%s:%d)\n", current->comm, task_pid_nr(current)); 1152*0f3e72b5SJason Gunthorpe /* 1153*0f3e72b5SJason Gunthorpe * On success the ref of device is moved to the file and 1154*0f3e72b5SJason Gunthorpe * put in vfio_device_fops_release() 1155*0f3e72b5SJason Gunthorpe */ 1156*0f3e72b5SJason Gunthorpe return filep; 1157*0f3e72b5SJason Gunthorpe 1158*0f3e72b5SJason Gunthorpe err_close_device: 1159*0f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 1160*0f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 1161*0f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) { 1162*0f3e72b5SJason Gunthorpe device->ops->close_device(device); 1163*0f3e72b5SJason Gunthorpe 1164*0f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 1165*0f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 1166*0f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 1167*0f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 1168*0f3e72b5SJason Gunthorpe } 1169*0f3e72b5SJason Gunthorpe err_undo_count: 1170*0f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 1171*0f3e72b5SJason Gunthorpe device->open_count--; 1172*0f3e72b5SJason Gunthorpe if (device->open_count == 0 && device->kvm) 1173*0f3e72b5SJason Gunthorpe device->kvm = NULL; 1174*0f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 1175*0f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 1176*0f3e72b5SJason Gunthorpe err_unassign_container: 1177*0f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 1178*0f3e72b5SJason Gunthorpe return ERR_PTR(ret); 1179*0f3e72b5SJason Gunthorpe } 1180*0f3e72b5SJason Gunthorpe 1181*0f3e72b5SJason Gunthorpe static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1182*0f3e72b5SJason Gunthorpe { 1183*0f3e72b5SJason Gunthorpe struct vfio_device *device; 1184*0f3e72b5SJason Gunthorpe struct file *filep; 1185*0f3e72b5SJason Gunthorpe int fdno; 1186*0f3e72b5SJason Gunthorpe int ret; 1187*0f3e72b5SJason Gunthorpe 1188*0f3e72b5SJason Gunthorpe device = vfio_device_get_from_name(group, buf); 1189*0f3e72b5SJason Gunthorpe if (IS_ERR(device)) 1190*0f3e72b5SJason Gunthorpe return PTR_ERR(device); 1191*0f3e72b5SJason Gunthorpe 1192*0f3e72b5SJason Gunthorpe fdno = get_unused_fd_flags(O_CLOEXEC); 1193*0f3e72b5SJason Gunthorpe if (fdno < 0) { 1194*0f3e72b5SJason Gunthorpe ret = fdno; 1195*0f3e72b5SJason Gunthorpe goto err_put_device; 1196*0f3e72b5SJason Gunthorpe } 1197*0f3e72b5SJason Gunthorpe 1198*0f3e72b5SJason Gunthorpe filep = vfio_device_open(device); 1199*0f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 1200*0f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 1201*0f3e72b5SJason Gunthorpe goto err_put_fdno; 1202*0f3e72b5SJason Gunthorpe } 1203*0f3e72b5SJason Gunthorpe 1204*0f3e72b5SJason Gunthorpe fd_install(fdno, filep); 1205*0f3e72b5SJason Gunthorpe return fdno; 1206*0f3e72b5SJason Gunthorpe 1207*0f3e72b5SJason Gunthorpe err_put_fdno: 1208*0f3e72b5SJason Gunthorpe put_unused_fd(fdno); 1209*0f3e72b5SJason Gunthorpe err_put_device: 1210*0f3e72b5SJason Gunthorpe vfio_device_put(device); 1211*0f3e72b5SJason Gunthorpe return ret; 1212*0f3e72b5SJason Gunthorpe } 1213*0f3e72b5SJason Gunthorpe 1214*0f3e72b5SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep, 1215*0f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 1216*0f3e72b5SJason Gunthorpe { 1217*0f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 1218*0f3e72b5SJason Gunthorpe long ret = -ENOTTY; 1219*0f3e72b5SJason Gunthorpe 1220*0f3e72b5SJason Gunthorpe switch (cmd) { 1221*0f3e72b5SJason Gunthorpe case VFIO_GROUP_GET_STATUS: 1222*0f3e72b5SJason Gunthorpe { 1223*0f3e72b5SJason Gunthorpe struct vfio_group_status status; 1224*0f3e72b5SJason Gunthorpe unsigned long minsz; 1225*0f3e72b5SJason Gunthorpe 1226*0f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_group_status, flags); 1227*0f3e72b5SJason Gunthorpe 1228*0f3e72b5SJason Gunthorpe if (copy_from_user(&status, (void __user *)arg, minsz)) 1229*0f3e72b5SJason Gunthorpe return -EFAULT; 1230*0f3e72b5SJason Gunthorpe 1231*0f3e72b5SJason Gunthorpe if (status.argsz < minsz) 1232*0f3e72b5SJason Gunthorpe return -EINVAL; 1233*0f3e72b5SJason Gunthorpe 1234*0f3e72b5SJason Gunthorpe status.flags = 0; 1235*0f3e72b5SJason Gunthorpe 1236*0f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 1237*0f3e72b5SJason Gunthorpe if (group->container) 1238*0f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 1239*0f3e72b5SJason Gunthorpe VFIO_GROUP_FLAGS_VIABLE; 1240*0f3e72b5SJason Gunthorpe else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 1241*0f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1242*0f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 1243*0f3e72b5SJason Gunthorpe 1244*0f3e72b5SJason Gunthorpe if (copy_to_user((void __user *)arg, &status, minsz)) 1245*0f3e72b5SJason Gunthorpe return -EFAULT; 1246*0f3e72b5SJason Gunthorpe 1247*0f3e72b5SJason Gunthorpe ret = 0; 1248*0f3e72b5SJason Gunthorpe break; 1249*0f3e72b5SJason Gunthorpe } 1250*0f3e72b5SJason Gunthorpe case VFIO_GROUP_SET_CONTAINER: 1251*0f3e72b5SJason Gunthorpe { 1252*0f3e72b5SJason Gunthorpe int fd; 1253*0f3e72b5SJason Gunthorpe 1254*0f3e72b5SJason Gunthorpe if (get_user(fd, (int __user *)arg)) 1255*0f3e72b5SJason Gunthorpe return -EFAULT; 1256*0f3e72b5SJason Gunthorpe 1257*0f3e72b5SJason Gunthorpe if (fd < 0) 1258*0f3e72b5SJason Gunthorpe return -EINVAL; 1259*0f3e72b5SJason Gunthorpe 1260*0f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 1261*0f3e72b5SJason Gunthorpe ret = vfio_group_set_container(group, fd); 1262*0f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 1263*0f3e72b5SJason Gunthorpe break; 1264*0f3e72b5SJason Gunthorpe } 1265*0f3e72b5SJason Gunthorpe case VFIO_GROUP_UNSET_CONTAINER: 1266*0f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 1267*0f3e72b5SJason Gunthorpe ret = vfio_group_unset_container(group); 1268*0f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 1269*0f3e72b5SJason Gunthorpe break; 1270*0f3e72b5SJason Gunthorpe case VFIO_GROUP_GET_DEVICE_FD: 1271*0f3e72b5SJason Gunthorpe { 1272*0f3e72b5SJason Gunthorpe char *buf; 1273*0f3e72b5SJason Gunthorpe 1274*0f3e72b5SJason Gunthorpe buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1275*0f3e72b5SJason Gunthorpe if (IS_ERR(buf)) 1276*0f3e72b5SJason Gunthorpe return PTR_ERR(buf); 1277*0f3e72b5SJason Gunthorpe 1278*0f3e72b5SJason Gunthorpe ret = vfio_group_get_device_fd(group, buf); 1279*0f3e72b5SJason Gunthorpe kfree(buf); 1280*0f3e72b5SJason Gunthorpe break; 1281*0f3e72b5SJason Gunthorpe } 1282*0f3e72b5SJason Gunthorpe } 1283*0f3e72b5SJason Gunthorpe 1284*0f3e72b5SJason Gunthorpe return ret; 1285*0f3e72b5SJason Gunthorpe } 1286*0f3e72b5SJason Gunthorpe 1287*0f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1288*0f3e72b5SJason Gunthorpe { 1289*0f3e72b5SJason Gunthorpe struct vfio_group *group = 1290*0f3e72b5SJason Gunthorpe container_of(inode->i_cdev, struct vfio_group, cdev); 1291*0f3e72b5SJason Gunthorpe int ret; 1292*0f3e72b5SJason Gunthorpe 1293*0f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 1294*0f3e72b5SJason Gunthorpe 1295*0f3e72b5SJason Gunthorpe /* users can be zero if this races with vfio_group_put() */ 1296*0f3e72b5SJason Gunthorpe if (!refcount_inc_not_zero(&group->users)) { 1297*0f3e72b5SJason Gunthorpe ret = -ENODEV; 1298*0f3e72b5SJason Gunthorpe goto err_unlock; 1299*0f3e72b5SJason Gunthorpe } 1300*0f3e72b5SJason Gunthorpe 1301*0f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 1302*0f3e72b5SJason Gunthorpe ret = -EPERM; 1303*0f3e72b5SJason Gunthorpe goto err_put; 1304*0f3e72b5SJason Gunthorpe } 1305*0f3e72b5SJason Gunthorpe 1306*0f3e72b5SJason Gunthorpe /* 1307*0f3e72b5SJason Gunthorpe * Do we need multiple instances of the group open? Seems not. 1308*0f3e72b5SJason Gunthorpe */ 1309*0f3e72b5SJason Gunthorpe if (group->opened_file) { 1310*0f3e72b5SJason Gunthorpe ret = -EBUSY; 1311*0f3e72b5SJason Gunthorpe goto err_put; 1312*0f3e72b5SJason Gunthorpe } 1313*0f3e72b5SJason Gunthorpe group->opened_file = filep; 1314*0f3e72b5SJason Gunthorpe filep->private_data = group; 1315*0f3e72b5SJason Gunthorpe 1316*0f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 1317*0f3e72b5SJason Gunthorpe return 0; 1318*0f3e72b5SJason Gunthorpe err_put: 1319*0f3e72b5SJason Gunthorpe vfio_group_put(group); 1320*0f3e72b5SJason Gunthorpe err_unlock: 1321*0f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 1322*0f3e72b5SJason Gunthorpe return ret; 1323*0f3e72b5SJason Gunthorpe } 1324*0f3e72b5SJason Gunthorpe 1325*0f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1326*0f3e72b5SJason Gunthorpe { 1327*0f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 1328*0f3e72b5SJason Gunthorpe 1329*0f3e72b5SJason Gunthorpe filep->private_data = NULL; 1330*0f3e72b5SJason Gunthorpe 1331*0f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 1332*0f3e72b5SJason Gunthorpe /* 1333*0f3e72b5SJason Gunthorpe * Device FDs hold a group file reference, therefore the group release 1334*0f3e72b5SJason Gunthorpe * is only called when there are no open devices. 1335*0f3e72b5SJason Gunthorpe */ 1336*0f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 1337*0f3e72b5SJason Gunthorpe if (group->container) { 1338*0f3e72b5SJason Gunthorpe WARN_ON(group->container_users != 1); 1339*0f3e72b5SJason Gunthorpe __vfio_group_unset_container(group); 1340*0f3e72b5SJason Gunthorpe } 1341*0f3e72b5SJason Gunthorpe group->opened_file = NULL; 1342*0f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 1343*0f3e72b5SJason Gunthorpe 1344*0f3e72b5SJason Gunthorpe vfio_group_put(group); 1345*0f3e72b5SJason Gunthorpe 1346*0f3e72b5SJason Gunthorpe return 0; 1347*0f3e72b5SJason Gunthorpe } 1348*0f3e72b5SJason Gunthorpe 1349*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = { 1350*0f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 1351*0f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1352*0f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 1353*0f3e72b5SJason Gunthorpe .open = vfio_group_fops_open, 1354*0f3e72b5SJason Gunthorpe .release = vfio_group_fops_release, 1355*0f3e72b5SJason Gunthorpe }; 1356*0f3e72b5SJason Gunthorpe 1357*0f3e72b5SJason Gunthorpe /* 1358*0f3e72b5SJason Gunthorpe * VFIO Device fd 1359*0f3e72b5SJason Gunthorpe */ 1360*0f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1361*0f3e72b5SJason Gunthorpe { 1362*0f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 1363*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *iommu_driver; 1364*0f3e72b5SJason Gunthorpe 1365*0f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 1366*0f3e72b5SJason Gunthorpe vfio_assert_device_open(device); 1367*0f3e72b5SJason Gunthorpe down_read(&device->group->group_rwsem); 1368*0f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) 1369*0f3e72b5SJason Gunthorpe device->ops->close_device(device); 1370*0f3e72b5SJason Gunthorpe 1371*0f3e72b5SJason Gunthorpe iommu_driver = device->group->container->iommu_driver; 1372*0f3e72b5SJason Gunthorpe if (iommu_driver && iommu_driver->ops->unregister_device) 1373*0f3e72b5SJason Gunthorpe iommu_driver->ops->unregister_device( 1374*0f3e72b5SJason Gunthorpe device->group->container->iommu_data, device); 1375*0f3e72b5SJason Gunthorpe up_read(&device->group->group_rwsem); 1376*0f3e72b5SJason Gunthorpe device->open_count--; 1377*0f3e72b5SJason Gunthorpe if (device->open_count == 0) 1378*0f3e72b5SJason Gunthorpe device->kvm = NULL; 1379*0f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 1380*0f3e72b5SJason Gunthorpe 1381*0f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 1382*0f3e72b5SJason Gunthorpe 1383*0f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 1384*0f3e72b5SJason Gunthorpe 1385*0f3e72b5SJason Gunthorpe vfio_device_put(device); 1386*0f3e72b5SJason Gunthorpe 1387*0f3e72b5SJason Gunthorpe return 0; 1388*0f3e72b5SJason Gunthorpe } 1389*0f3e72b5SJason Gunthorpe 1390*0f3e72b5SJason Gunthorpe /* 1391*0f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 1392*0f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 1393*0f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 1394*0f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 1395*0f3e72b5SJason Gunthorpe * 1396*0f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 1397*0f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 1398*0f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 1399*0f3e72b5SJason Gunthorpe * 1400*0f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 1401*0f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 1402*0f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 1403*0f3e72b5SJason Gunthorpe * 1404*0f3e72b5SJason Gunthorpe */ 1405*0f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 1406*0f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 1407*0f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 1408*0f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 1409*0f3e72b5SJason Gunthorpe { 1410*0f3e72b5SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 1411*0f3e72b5SJason Gunthorpe /* 1412*0f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 1413*0f3e72b5SJason Gunthorpe * following FSM arcs: 1414*0f3e72b5SJason Gunthorpe * RESUMING -> STOP 1415*0f3e72b5SJason Gunthorpe * STOP -> RESUMING 1416*0f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 1417*0f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 1418*0f3e72b5SJason Gunthorpe * 1419*0f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 1420*0f3e72b5SJason Gunthorpe * arcs: 1421*0f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 1422*0f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 1423*0f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 1424*0f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 1425*0f3e72b5SJason Gunthorpe * Without P2P the driver must implement: 1426*0f3e72b5SJason Gunthorpe * RUNNING -> STOP 1427*0f3e72b5SJason Gunthorpe * STOP -> RUNNING 1428*0f3e72b5SJason Gunthorpe * 1429*0f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 1430*0f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 1431*0f3e72b5SJason Gunthorpe * following ones: 1432*0f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 1433*0f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 1434*0f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 1435*0f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 1436*0f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 1437*0f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 1438*0f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 1439*0f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 1440*0f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 1441*0f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 1442*0f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 1443*0f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 1444*0f3e72b5SJason Gunthorpe */ 1445*0f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 1446*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 1447*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1448*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 1449*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 1450*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 1451*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1452*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1453*0f3e72b5SJason Gunthorpe }, 1454*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 1455*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 1456*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 1457*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 1458*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 1459*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1460*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1461*0f3e72b5SJason Gunthorpe }, 1462*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 1463*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1464*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 1465*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 1466*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 1467*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 1468*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1469*0f3e72b5SJason Gunthorpe }, 1470*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 1471*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1472*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 1473*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 1474*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 1475*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 1476*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1477*0f3e72b5SJason Gunthorpe }, 1478*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 1479*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1480*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 1481*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 1482*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 1483*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1484*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1485*0f3e72b5SJason Gunthorpe }, 1486*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 1487*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 1488*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 1489*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 1490*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 1491*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 1492*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1493*0f3e72b5SJason Gunthorpe }, 1494*0f3e72b5SJason Gunthorpe }; 1495*0f3e72b5SJason Gunthorpe 1496*0f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 1497*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 1498*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 1499*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 1500*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 1501*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 1502*0f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 1503*0f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 1504*0f3e72b5SJason Gunthorpe }; 1505*0f3e72b5SJason Gunthorpe 1506*0f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 1507*0f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 1508*0f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 1509*0f3e72b5SJason Gunthorpe return -EINVAL; 1510*0f3e72b5SJason Gunthorpe 1511*0f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 1512*0f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 1513*0f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 1514*0f3e72b5SJason Gunthorpe return -EINVAL; 1515*0f3e72b5SJason Gunthorpe 1516*0f3e72b5SJason Gunthorpe /* 1517*0f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 1518*0f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 1519*0f3e72b5SJason Gunthorpe * logical state, as per the above comment. 1520*0f3e72b5SJason Gunthorpe */ 1521*0f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 1522*0f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 1523*0f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 1524*0f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 1525*0f3e72b5SJason Gunthorpe 1526*0f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 1527*0f3e72b5SJason Gunthorpe } 1528*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 1529*0f3e72b5SJason Gunthorpe 1530*0f3e72b5SJason Gunthorpe /* 1531*0f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 1532*0f3e72b5SJason Gunthorpe */ 1533*0f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 1534*0f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 1535*0f3e72b5SJason Gunthorpe { 1536*0f3e72b5SJason Gunthorpe int ret; 1537*0f3e72b5SJason Gunthorpe int fd; 1538*0f3e72b5SJason Gunthorpe 1539*0f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 1540*0f3e72b5SJason Gunthorpe if (fd < 0) { 1541*0f3e72b5SJason Gunthorpe ret = fd; 1542*0f3e72b5SJason Gunthorpe goto out_fput; 1543*0f3e72b5SJason Gunthorpe } 1544*0f3e72b5SJason Gunthorpe 1545*0f3e72b5SJason Gunthorpe mig->data_fd = fd; 1546*0f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 1547*0f3e72b5SJason Gunthorpe ret = -EFAULT; 1548*0f3e72b5SJason Gunthorpe goto out_put_unused; 1549*0f3e72b5SJason Gunthorpe } 1550*0f3e72b5SJason Gunthorpe fd_install(fd, filp); 1551*0f3e72b5SJason Gunthorpe return 0; 1552*0f3e72b5SJason Gunthorpe 1553*0f3e72b5SJason Gunthorpe out_put_unused: 1554*0f3e72b5SJason Gunthorpe put_unused_fd(fd); 1555*0f3e72b5SJason Gunthorpe out_fput: 1556*0f3e72b5SJason Gunthorpe fput(filp); 1557*0f3e72b5SJason Gunthorpe return ret; 1558*0f3e72b5SJason Gunthorpe } 1559*0f3e72b5SJason Gunthorpe 1560*0f3e72b5SJason Gunthorpe static int 1561*0f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 1562*0f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 1563*0f3e72b5SJason Gunthorpe size_t argsz) 1564*0f3e72b5SJason Gunthorpe { 1565*0f3e72b5SJason Gunthorpe size_t minsz = 1566*0f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 1567*0f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 1568*0f3e72b5SJason Gunthorpe struct file *filp = NULL; 1569*0f3e72b5SJason Gunthorpe int ret; 1570*0f3e72b5SJason Gunthorpe 1571*0f3e72b5SJason Gunthorpe if (!device->mig_ops) 1572*0f3e72b5SJason Gunthorpe return -ENOTTY; 1573*0f3e72b5SJason Gunthorpe 1574*0f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 1575*0f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 1576*0f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 1577*0f3e72b5SJason Gunthorpe sizeof(mig)); 1578*0f3e72b5SJason Gunthorpe if (ret != 1) 1579*0f3e72b5SJason Gunthorpe return ret; 1580*0f3e72b5SJason Gunthorpe 1581*0f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 1582*0f3e72b5SJason Gunthorpe return -EFAULT; 1583*0f3e72b5SJason Gunthorpe 1584*0f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 1585*0f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 1586*0f3e72b5SJason Gunthorpe 1587*0f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 1588*0f3e72b5SJason Gunthorpe &curr_state); 1589*0f3e72b5SJason Gunthorpe if (ret) 1590*0f3e72b5SJason Gunthorpe return ret; 1591*0f3e72b5SJason Gunthorpe mig.device_state = curr_state; 1592*0f3e72b5SJason Gunthorpe goto out_copy; 1593*0f3e72b5SJason Gunthorpe } 1594*0f3e72b5SJason Gunthorpe 1595*0f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 1596*0f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 1597*0f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 1598*0f3e72b5SJason Gunthorpe goto out_copy; 1599*0f3e72b5SJason Gunthorpe 1600*0f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 1601*0f3e72b5SJason Gunthorpe out_copy: 1602*0f3e72b5SJason Gunthorpe mig.data_fd = -1; 1603*0f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 1604*0f3e72b5SJason Gunthorpe return -EFAULT; 1605*0f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 1606*0f3e72b5SJason Gunthorpe return PTR_ERR(filp); 1607*0f3e72b5SJason Gunthorpe return 0; 1608*0f3e72b5SJason Gunthorpe } 1609*0f3e72b5SJason Gunthorpe 1610*0f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 1611*0f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 1612*0f3e72b5SJason Gunthorpe size_t argsz) 1613*0f3e72b5SJason Gunthorpe { 1614*0f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 1615*0f3e72b5SJason Gunthorpe .flags = device->migration_flags, 1616*0f3e72b5SJason Gunthorpe }; 1617*0f3e72b5SJason Gunthorpe int ret; 1618*0f3e72b5SJason Gunthorpe 1619*0f3e72b5SJason Gunthorpe if (!device->mig_ops) 1620*0f3e72b5SJason Gunthorpe return -ENOTTY; 1621*0f3e72b5SJason Gunthorpe 1622*0f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 1623*0f3e72b5SJason Gunthorpe sizeof(mig)); 1624*0f3e72b5SJason Gunthorpe if (ret != 1) 1625*0f3e72b5SJason Gunthorpe return ret; 1626*0f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 1627*0f3e72b5SJason Gunthorpe return -EFAULT; 1628*0f3e72b5SJason Gunthorpe return 0; 1629*0f3e72b5SJason Gunthorpe } 1630*0f3e72b5SJason Gunthorpe 1631*0f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 1632*0f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 1633*0f3e72b5SJason Gunthorpe { 1634*0f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 1635*0f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 1636*0f3e72b5SJason Gunthorpe 1637*0f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 1638*0f3e72b5SJason Gunthorpe return -EFAULT; 1639*0f3e72b5SJason Gunthorpe 1640*0f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 1641*0f3e72b5SJason Gunthorpe return -EINVAL; 1642*0f3e72b5SJason Gunthorpe 1643*0f3e72b5SJason Gunthorpe /* Check unknown flags */ 1644*0f3e72b5SJason Gunthorpe if (feature.flags & 1645*0f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 1646*0f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 1647*0f3e72b5SJason Gunthorpe return -EINVAL; 1648*0f3e72b5SJason Gunthorpe 1649*0f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 1650*0f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 1651*0f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 1652*0f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 1653*0f3e72b5SJason Gunthorpe return -EINVAL; 1654*0f3e72b5SJason Gunthorpe 1655*0f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 1656*0f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 1657*0f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 1658*0f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 1659*0f3e72b5SJason Gunthorpe feature.argsz - minsz); 1660*0f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 1661*0f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 1662*0f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 1663*0f3e72b5SJason Gunthorpe feature.argsz - minsz); 1664*0f3e72b5SJason Gunthorpe default: 1665*0f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 1666*0f3e72b5SJason Gunthorpe return -EINVAL; 1667*0f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 1668*0f3e72b5SJason Gunthorpe arg->data, 1669*0f3e72b5SJason Gunthorpe feature.argsz - minsz); 1670*0f3e72b5SJason Gunthorpe } 1671*0f3e72b5SJason Gunthorpe } 1672*0f3e72b5SJason Gunthorpe 1673*0f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 1674*0f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 1675*0f3e72b5SJason Gunthorpe { 1676*0f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 1677*0f3e72b5SJason Gunthorpe 1678*0f3e72b5SJason Gunthorpe switch (cmd) { 1679*0f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 1680*0f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature(device, (void __user *)arg); 1681*0f3e72b5SJason Gunthorpe default: 1682*0f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 1683*0f3e72b5SJason Gunthorpe return -EINVAL; 1684*0f3e72b5SJason Gunthorpe return device->ops->ioctl(device, cmd, arg); 1685*0f3e72b5SJason Gunthorpe } 1686*0f3e72b5SJason Gunthorpe } 1687*0f3e72b5SJason Gunthorpe 1688*0f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1689*0f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 1690*0f3e72b5SJason Gunthorpe { 1691*0f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 1692*0f3e72b5SJason Gunthorpe 1693*0f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 1694*0f3e72b5SJason Gunthorpe return -EINVAL; 1695*0f3e72b5SJason Gunthorpe 1696*0f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 1697*0f3e72b5SJason Gunthorpe } 1698*0f3e72b5SJason Gunthorpe 1699*0f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 1700*0f3e72b5SJason Gunthorpe const char __user *buf, 1701*0f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 1702*0f3e72b5SJason Gunthorpe { 1703*0f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 1704*0f3e72b5SJason Gunthorpe 1705*0f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 1706*0f3e72b5SJason Gunthorpe return -EINVAL; 1707*0f3e72b5SJason Gunthorpe 1708*0f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 1709*0f3e72b5SJason Gunthorpe } 1710*0f3e72b5SJason Gunthorpe 1711*0f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1712*0f3e72b5SJason Gunthorpe { 1713*0f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 1714*0f3e72b5SJason Gunthorpe 1715*0f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 1716*0f3e72b5SJason Gunthorpe return -EINVAL; 1717*0f3e72b5SJason Gunthorpe 1718*0f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 1719*0f3e72b5SJason Gunthorpe } 1720*0f3e72b5SJason Gunthorpe 1721*0f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = { 1722*0f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 1723*0f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 1724*0f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 1725*0f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 1726*0f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1727*0f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 1728*0f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 1729*0f3e72b5SJason Gunthorpe }; 1730*0f3e72b5SJason Gunthorpe 1731*0f3e72b5SJason Gunthorpe /** 1732*0f3e72b5SJason Gunthorpe * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 1733*0f3e72b5SJason Gunthorpe * @file: VFIO group file 1734*0f3e72b5SJason Gunthorpe * 1735*0f3e72b5SJason Gunthorpe * The returned iommu_group is valid as long as a ref is held on the file. 1736*0f3e72b5SJason Gunthorpe */ 1737*0f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file) 1738*0f3e72b5SJason Gunthorpe { 1739*0f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 1740*0f3e72b5SJason Gunthorpe 1741*0f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 1742*0f3e72b5SJason Gunthorpe return NULL; 1743*0f3e72b5SJason Gunthorpe return group->iommu_group; 1744*0f3e72b5SJason Gunthorpe } 1745*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 1746*0f3e72b5SJason Gunthorpe 1747*0f3e72b5SJason Gunthorpe /** 1748*0f3e72b5SJason Gunthorpe * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 1749*0f3e72b5SJason Gunthorpe * is always CPU cache coherent 1750*0f3e72b5SJason Gunthorpe * @file: VFIO group file 1751*0f3e72b5SJason Gunthorpe * 1752*0f3e72b5SJason Gunthorpe * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 1753*0f3e72b5SJason Gunthorpe * bit in DMA transactions. A return of false indicates that the user has 1754*0f3e72b5SJason Gunthorpe * rights to access additional instructions such as wbinvd on x86. 1755*0f3e72b5SJason Gunthorpe */ 1756*0f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file) 1757*0f3e72b5SJason Gunthorpe { 1758*0f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 1759*0f3e72b5SJason Gunthorpe bool ret; 1760*0f3e72b5SJason Gunthorpe 1761*0f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 1762*0f3e72b5SJason Gunthorpe return true; 1763*0f3e72b5SJason Gunthorpe 1764*0f3e72b5SJason Gunthorpe down_read(&group->group_rwsem); 1765*0f3e72b5SJason Gunthorpe if (group->container) { 1766*0f3e72b5SJason Gunthorpe ret = vfio_ioctl_check_extension(group->container, 1767*0f3e72b5SJason Gunthorpe VFIO_DMA_CC_IOMMU); 1768*0f3e72b5SJason Gunthorpe } else { 1769*0f3e72b5SJason Gunthorpe /* 1770*0f3e72b5SJason Gunthorpe * Since the coherency state is determined only once a container 1771*0f3e72b5SJason Gunthorpe * is attached the user must do so before they can prove they 1772*0f3e72b5SJason Gunthorpe * have permission. 1773*0f3e72b5SJason Gunthorpe */ 1774*0f3e72b5SJason Gunthorpe ret = true; 1775*0f3e72b5SJason Gunthorpe } 1776*0f3e72b5SJason Gunthorpe up_read(&group->group_rwsem); 1777*0f3e72b5SJason Gunthorpe return ret; 1778*0f3e72b5SJason Gunthorpe } 1779*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 1780*0f3e72b5SJason Gunthorpe 1781*0f3e72b5SJason Gunthorpe /** 1782*0f3e72b5SJason Gunthorpe * vfio_file_set_kvm - Link a kvm with VFIO drivers 1783*0f3e72b5SJason Gunthorpe * @file: VFIO group file 1784*0f3e72b5SJason Gunthorpe * @kvm: KVM to link 1785*0f3e72b5SJason Gunthorpe * 1786*0f3e72b5SJason Gunthorpe * When a VFIO device is first opened the KVM will be available in 1787*0f3e72b5SJason Gunthorpe * device->kvm if one was associated with the group. 1788*0f3e72b5SJason Gunthorpe */ 1789*0f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 1790*0f3e72b5SJason Gunthorpe { 1791*0f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 1792*0f3e72b5SJason Gunthorpe 1793*0f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 1794*0f3e72b5SJason Gunthorpe return; 1795*0f3e72b5SJason Gunthorpe 1796*0f3e72b5SJason Gunthorpe down_write(&group->group_rwsem); 1797*0f3e72b5SJason Gunthorpe group->kvm = kvm; 1798*0f3e72b5SJason Gunthorpe up_write(&group->group_rwsem); 1799*0f3e72b5SJason Gunthorpe } 1800*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 1801*0f3e72b5SJason Gunthorpe 1802*0f3e72b5SJason Gunthorpe /** 1803*0f3e72b5SJason Gunthorpe * vfio_file_has_dev - True if the VFIO file is a handle for device 1804*0f3e72b5SJason Gunthorpe * @file: VFIO file to check 1805*0f3e72b5SJason Gunthorpe * @device: Device that must be part of the file 1806*0f3e72b5SJason Gunthorpe * 1807*0f3e72b5SJason Gunthorpe * Returns true if given file has permission to manipulate the given device. 1808*0f3e72b5SJason Gunthorpe */ 1809*0f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 1810*0f3e72b5SJason Gunthorpe { 1811*0f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 1812*0f3e72b5SJason Gunthorpe 1813*0f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 1814*0f3e72b5SJason Gunthorpe return false; 1815*0f3e72b5SJason Gunthorpe 1816*0f3e72b5SJason Gunthorpe return group == device->group; 1817*0f3e72b5SJason Gunthorpe } 1818*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev); 1819*0f3e72b5SJason Gunthorpe 1820*0f3e72b5SJason Gunthorpe /* 1821*0f3e72b5SJason Gunthorpe * Sub-module support 1822*0f3e72b5SJason Gunthorpe */ 1823*0f3e72b5SJason Gunthorpe /* 1824*0f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 1825*0f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 1826*0f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 1827*0f3e72b5SJason Gunthorpe * 1828*0f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 1829*0f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 1830*0f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 1831*0f3e72b5SJason Gunthorpe */ 1832*0f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 1833*0f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 1834*0f3e72b5SJason Gunthorpe { 1835*0f3e72b5SJason Gunthorpe void *buf; 1836*0f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 1837*0f3e72b5SJason Gunthorpe 1838*0f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 1839*0f3e72b5SJason Gunthorpe if (!buf) { 1840*0f3e72b5SJason Gunthorpe kfree(caps->buf); 1841*0f3e72b5SJason Gunthorpe caps->buf = NULL; 1842*0f3e72b5SJason Gunthorpe caps->size = 0; 1843*0f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 1844*0f3e72b5SJason Gunthorpe } 1845*0f3e72b5SJason Gunthorpe 1846*0f3e72b5SJason Gunthorpe caps->buf = buf; 1847*0f3e72b5SJason Gunthorpe header = buf + caps->size; 1848*0f3e72b5SJason Gunthorpe 1849*0f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 1850*0f3e72b5SJason Gunthorpe memset(header, 0, size); 1851*0f3e72b5SJason Gunthorpe 1852*0f3e72b5SJason Gunthorpe header->id = id; 1853*0f3e72b5SJason Gunthorpe header->version = version; 1854*0f3e72b5SJason Gunthorpe 1855*0f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 1856*0f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 1857*0f3e72b5SJason Gunthorpe ; /* nothing */ 1858*0f3e72b5SJason Gunthorpe 1859*0f3e72b5SJason Gunthorpe tmp->next = caps->size; 1860*0f3e72b5SJason Gunthorpe caps->size += size; 1861*0f3e72b5SJason Gunthorpe 1862*0f3e72b5SJason Gunthorpe return header; 1863*0f3e72b5SJason Gunthorpe } 1864*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 1865*0f3e72b5SJason Gunthorpe 1866*0f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 1867*0f3e72b5SJason Gunthorpe { 1868*0f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 1869*0f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 1870*0f3e72b5SJason Gunthorpe 1871*0f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 1872*0f3e72b5SJason Gunthorpe tmp->next += offset; 1873*0f3e72b5SJason Gunthorpe } 1874*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 1875*0f3e72b5SJason Gunthorpe 1876*0f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 1877*0f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 1878*0f3e72b5SJason Gunthorpe { 1879*0f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 1880*0f3e72b5SJason Gunthorpe 1881*0f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 1882*0f3e72b5SJason Gunthorpe if (IS_ERR(header)) 1883*0f3e72b5SJason Gunthorpe return PTR_ERR(header); 1884*0f3e72b5SJason Gunthorpe 1885*0f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 1886*0f3e72b5SJason Gunthorpe 1887*0f3e72b5SJason Gunthorpe return 0; 1888*0f3e72b5SJason Gunthorpe } 1889*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 1890*0f3e72b5SJason Gunthorpe 1891*0f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 1892*0f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 1893*0f3e72b5SJason Gunthorpe { 1894*0f3e72b5SJason Gunthorpe unsigned long minsz; 1895*0f3e72b5SJason Gunthorpe size_t size; 1896*0f3e72b5SJason Gunthorpe 1897*0f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 1898*0f3e72b5SJason Gunthorpe 1899*0f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 1900*0f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 1901*0f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 1902*0f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 1903*0f3e72b5SJason Gunthorpe return -EINVAL; 1904*0f3e72b5SJason Gunthorpe 1905*0f3e72b5SJason Gunthorpe if (data_size) 1906*0f3e72b5SJason Gunthorpe *data_size = 0; 1907*0f3e72b5SJason Gunthorpe 1908*0f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 1909*0f3e72b5SJason Gunthorpe return -EINVAL; 1910*0f3e72b5SJason Gunthorpe 1911*0f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 1912*0f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 1913*0f3e72b5SJason Gunthorpe size = 0; 1914*0f3e72b5SJason Gunthorpe break; 1915*0f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 1916*0f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 1917*0f3e72b5SJason Gunthorpe break; 1918*0f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 1919*0f3e72b5SJason Gunthorpe size = sizeof(int32_t); 1920*0f3e72b5SJason Gunthorpe break; 1921*0f3e72b5SJason Gunthorpe default: 1922*0f3e72b5SJason Gunthorpe return -EINVAL; 1923*0f3e72b5SJason Gunthorpe } 1924*0f3e72b5SJason Gunthorpe 1925*0f3e72b5SJason Gunthorpe if (size) { 1926*0f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 1927*0f3e72b5SJason Gunthorpe return -EINVAL; 1928*0f3e72b5SJason Gunthorpe 1929*0f3e72b5SJason Gunthorpe if (!data_size) 1930*0f3e72b5SJason Gunthorpe return -EINVAL; 1931*0f3e72b5SJason Gunthorpe 1932*0f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 1933*0f3e72b5SJason Gunthorpe } 1934*0f3e72b5SJason Gunthorpe 1935*0f3e72b5SJason Gunthorpe return 0; 1936*0f3e72b5SJason Gunthorpe } 1937*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 1938*0f3e72b5SJason Gunthorpe 1939*0f3e72b5SJason Gunthorpe /* 1940*0f3e72b5SJason Gunthorpe * Pin contiguous user pages and return their associated host pages for local 1941*0f3e72b5SJason Gunthorpe * domain only. 1942*0f3e72b5SJason Gunthorpe * @device [in] : device 1943*0f3e72b5SJason Gunthorpe * @iova [in] : starting IOVA of user pages to be pinned. 1944*0f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be pinned. This count should not 1945*0f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1946*0f3e72b5SJason Gunthorpe * @prot [in] : protection flags 1947*0f3e72b5SJason Gunthorpe * @pages[out] : array of host pages 1948*0f3e72b5SJason Gunthorpe * Return error or number of pages pinned. 1949*0f3e72b5SJason Gunthorpe */ 1950*0f3e72b5SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 1951*0f3e72b5SJason Gunthorpe int npage, int prot, struct page **pages) 1952*0f3e72b5SJason Gunthorpe { 1953*0f3e72b5SJason Gunthorpe struct vfio_container *container; 1954*0f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 1955*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 1956*0f3e72b5SJason Gunthorpe int ret; 1957*0f3e72b5SJason Gunthorpe 1958*0f3e72b5SJason Gunthorpe if (!pages || !npage || !vfio_assert_device_open(device)) 1959*0f3e72b5SJason Gunthorpe return -EINVAL; 1960*0f3e72b5SJason Gunthorpe 1961*0f3e72b5SJason Gunthorpe if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 1962*0f3e72b5SJason Gunthorpe return -E2BIG; 1963*0f3e72b5SJason Gunthorpe 1964*0f3e72b5SJason Gunthorpe if (group->dev_counter > 1) 1965*0f3e72b5SJason Gunthorpe return -EINVAL; 1966*0f3e72b5SJason Gunthorpe 1967*0f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 1968*0f3e72b5SJason Gunthorpe container = group->container; 1969*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 1970*0f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->pin_pages)) 1971*0f3e72b5SJason Gunthorpe ret = driver->ops->pin_pages(container->iommu_data, 1972*0f3e72b5SJason Gunthorpe group->iommu_group, iova, 1973*0f3e72b5SJason Gunthorpe npage, prot, pages); 1974*0f3e72b5SJason Gunthorpe else 1975*0f3e72b5SJason Gunthorpe ret = -ENOTTY; 1976*0f3e72b5SJason Gunthorpe 1977*0f3e72b5SJason Gunthorpe return ret; 1978*0f3e72b5SJason Gunthorpe } 1979*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages); 1980*0f3e72b5SJason Gunthorpe 1981*0f3e72b5SJason Gunthorpe /* 1982*0f3e72b5SJason Gunthorpe * Unpin contiguous host pages for local domain only. 1983*0f3e72b5SJason Gunthorpe * @device [in] : device 1984*0f3e72b5SJason Gunthorpe * @iova [in] : starting address of user pages to be unpinned. 1985*0f3e72b5SJason Gunthorpe * @npage [in] : count of pages to be unpinned. This count should not 1986*0f3e72b5SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1987*0f3e72b5SJason Gunthorpe */ 1988*0f3e72b5SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 1989*0f3e72b5SJason Gunthorpe { 1990*0f3e72b5SJason Gunthorpe struct vfio_container *container; 1991*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 1992*0f3e72b5SJason Gunthorpe 1993*0f3e72b5SJason Gunthorpe if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 1994*0f3e72b5SJason Gunthorpe return; 1995*0f3e72b5SJason Gunthorpe 1996*0f3e72b5SJason Gunthorpe if (WARN_ON(!vfio_assert_device_open(device))) 1997*0f3e72b5SJason Gunthorpe return; 1998*0f3e72b5SJason Gunthorpe 1999*0f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 2000*0f3e72b5SJason Gunthorpe container = device->group->container; 2001*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 2002*0f3e72b5SJason Gunthorpe 2003*0f3e72b5SJason Gunthorpe driver->ops->unpin_pages(container->iommu_data, iova, npage); 2004*0f3e72b5SJason Gunthorpe } 2005*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages); 2006*0f3e72b5SJason Gunthorpe 2007*0f3e72b5SJason Gunthorpe /* 2008*0f3e72b5SJason Gunthorpe * This interface allows the CPUs to perform some sort of virtual DMA on 2009*0f3e72b5SJason Gunthorpe * behalf of the device. 2010*0f3e72b5SJason Gunthorpe * 2011*0f3e72b5SJason Gunthorpe * CPUs read/write from/into a range of IOVAs pointing to user space memory 2012*0f3e72b5SJason Gunthorpe * into/from a kernel buffer. 2013*0f3e72b5SJason Gunthorpe * 2014*0f3e72b5SJason Gunthorpe * As the read/write of user space memory is conducted via the CPUs and is 2015*0f3e72b5SJason Gunthorpe * not a real device DMA, it is not necessary to pin the user space memory. 2016*0f3e72b5SJason Gunthorpe * 2017*0f3e72b5SJason Gunthorpe * @device [in] : VFIO device 2018*0f3e72b5SJason Gunthorpe * @iova [in] : base IOVA of a user space buffer 2019*0f3e72b5SJason Gunthorpe * @data [in] : pointer to kernel buffer 2020*0f3e72b5SJason Gunthorpe * @len [in] : kernel buffer length 2021*0f3e72b5SJason Gunthorpe * @write : indicate read or write 2022*0f3e72b5SJason Gunthorpe * Return error code on failure or 0 on success. 2023*0f3e72b5SJason Gunthorpe */ 2024*0f3e72b5SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 2025*0f3e72b5SJason Gunthorpe size_t len, bool write) 2026*0f3e72b5SJason Gunthorpe { 2027*0f3e72b5SJason Gunthorpe struct vfio_container *container; 2028*0f3e72b5SJason Gunthorpe struct vfio_iommu_driver *driver; 2029*0f3e72b5SJason Gunthorpe int ret = 0; 2030*0f3e72b5SJason Gunthorpe 2031*0f3e72b5SJason Gunthorpe if (!data || len <= 0 || !vfio_assert_device_open(device)) 2032*0f3e72b5SJason Gunthorpe return -EINVAL; 2033*0f3e72b5SJason Gunthorpe 2034*0f3e72b5SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 2035*0f3e72b5SJason Gunthorpe container = device->group->container; 2036*0f3e72b5SJason Gunthorpe driver = container->iommu_driver; 2037*0f3e72b5SJason Gunthorpe 2038*0f3e72b5SJason Gunthorpe if (likely(driver && driver->ops->dma_rw)) 2039*0f3e72b5SJason Gunthorpe ret = driver->ops->dma_rw(container->iommu_data, 2040*0f3e72b5SJason Gunthorpe iova, data, len, write); 2041*0f3e72b5SJason Gunthorpe else 2042*0f3e72b5SJason Gunthorpe ret = -ENOTTY; 2043*0f3e72b5SJason Gunthorpe return ret; 2044*0f3e72b5SJason Gunthorpe } 2045*0f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw); 2046*0f3e72b5SJason Gunthorpe 2047*0f3e72b5SJason Gunthorpe /* 2048*0f3e72b5SJason Gunthorpe * Module/class support 2049*0f3e72b5SJason Gunthorpe */ 2050*0f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode) 2051*0f3e72b5SJason Gunthorpe { 2052*0f3e72b5SJason Gunthorpe return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 2053*0f3e72b5SJason Gunthorpe } 2054*0f3e72b5SJason Gunthorpe 2055*0f3e72b5SJason Gunthorpe static struct miscdevice vfio_dev = { 2056*0f3e72b5SJason Gunthorpe .minor = VFIO_MINOR, 2057*0f3e72b5SJason Gunthorpe .name = "vfio", 2058*0f3e72b5SJason Gunthorpe .fops = &vfio_fops, 2059*0f3e72b5SJason Gunthorpe .nodename = "vfio/vfio", 2060*0f3e72b5SJason Gunthorpe .mode = S_IRUGO | S_IWUGO, 2061*0f3e72b5SJason Gunthorpe }; 2062*0f3e72b5SJason Gunthorpe 2063*0f3e72b5SJason Gunthorpe static int __init vfio_init(void) 2064*0f3e72b5SJason Gunthorpe { 2065*0f3e72b5SJason Gunthorpe int ret; 2066*0f3e72b5SJason Gunthorpe 2067*0f3e72b5SJason Gunthorpe ida_init(&vfio.group_ida); 2068*0f3e72b5SJason Gunthorpe mutex_init(&vfio.group_lock); 2069*0f3e72b5SJason Gunthorpe mutex_init(&vfio.iommu_drivers_lock); 2070*0f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.group_list); 2071*0f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&vfio.iommu_drivers_list); 2072*0f3e72b5SJason Gunthorpe 2073*0f3e72b5SJason Gunthorpe ret = misc_register(&vfio_dev); 2074*0f3e72b5SJason Gunthorpe if (ret) { 2075*0f3e72b5SJason Gunthorpe pr_err("vfio: misc device register failed\n"); 2076*0f3e72b5SJason Gunthorpe return ret; 2077*0f3e72b5SJason Gunthorpe } 2078*0f3e72b5SJason Gunthorpe 2079*0f3e72b5SJason Gunthorpe /* /dev/vfio/$GROUP */ 2080*0f3e72b5SJason Gunthorpe vfio.class = class_create(THIS_MODULE, "vfio"); 2081*0f3e72b5SJason Gunthorpe if (IS_ERR(vfio.class)) { 2082*0f3e72b5SJason Gunthorpe ret = PTR_ERR(vfio.class); 2083*0f3e72b5SJason Gunthorpe goto err_class; 2084*0f3e72b5SJason Gunthorpe } 2085*0f3e72b5SJason Gunthorpe 2086*0f3e72b5SJason Gunthorpe vfio.class->devnode = vfio_devnode; 2087*0f3e72b5SJason Gunthorpe 2088*0f3e72b5SJason Gunthorpe ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 2089*0f3e72b5SJason Gunthorpe if (ret) 2090*0f3e72b5SJason Gunthorpe goto err_alloc_chrdev; 2091*0f3e72b5SJason Gunthorpe 2092*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 2093*0f3e72b5SJason Gunthorpe ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 2094*0f3e72b5SJason Gunthorpe #endif 2095*0f3e72b5SJason Gunthorpe if (ret) 2096*0f3e72b5SJason Gunthorpe goto err_driver_register; 2097*0f3e72b5SJason Gunthorpe 2098*0f3e72b5SJason Gunthorpe pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 2099*0f3e72b5SJason Gunthorpe return 0; 2100*0f3e72b5SJason Gunthorpe 2101*0f3e72b5SJason Gunthorpe err_driver_register: 2102*0f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2103*0f3e72b5SJason Gunthorpe err_alloc_chrdev: 2104*0f3e72b5SJason Gunthorpe class_destroy(vfio.class); 2105*0f3e72b5SJason Gunthorpe vfio.class = NULL; 2106*0f3e72b5SJason Gunthorpe err_class: 2107*0f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 2108*0f3e72b5SJason Gunthorpe return ret; 2109*0f3e72b5SJason Gunthorpe } 2110*0f3e72b5SJason Gunthorpe 2111*0f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void) 2112*0f3e72b5SJason Gunthorpe { 2113*0f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&vfio.group_list)); 2114*0f3e72b5SJason Gunthorpe 2115*0f3e72b5SJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 2116*0f3e72b5SJason Gunthorpe vfio_unregister_iommu_driver(&vfio_noiommu_ops); 2117*0f3e72b5SJason Gunthorpe #endif 2118*0f3e72b5SJason Gunthorpe ida_destroy(&vfio.group_ida); 2119*0f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2120*0f3e72b5SJason Gunthorpe class_destroy(vfio.class); 2121*0f3e72b5SJason Gunthorpe vfio.class = NULL; 2122*0f3e72b5SJason Gunthorpe misc_deregister(&vfio_dev); 2123*0f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 2124*0f3e72b5SJason Gunthorpe } 2125*0f3e72b5SJason Gunthorpe 2126*0f3e72b5SJason Gunthorpe module_init(vfio_init); 2127*0f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 2128*0f3e72b5SJason Gunthorpe 2129*0f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 2130*0f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 2131*0f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 2132*0f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 2133*0f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR); 2134*0f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio"); 2135*0f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 2136