10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 20f3e72b5SJason Gunthorpe /* 30f3e72b5SJason Gunthorpe * VFIO core 40f3e72b5SJason Gunthorpe * 50f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 60f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 70f3e72b5SJason Gunthorpe * 80f3e72b5SJason Gunthorpe * Derived from original vfio: 90f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 100f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 110f3e72b5SJason Gunthorpe */ 120f3e72b5SJason Gunthorpe 130f3e72b5SJason Gunthorpe #include <linux/cdev.h> 140f3e72b5SJason Gunthorpe #include <linux/compat.h> 150f3e72b5SJason Gunthorpe #include <linux/device.h> 160f3e72b5SJason Gunthorpe #include <linux/file.h> 170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h> 180f3e72b5SJason Gunthorpe #include <linux/fs.h> 190f3e72b5SJason Gunthorpe #include <linux/idr.h> 200f3e72b5SJason Gunthorpe #include <linux/iommu.h> 210f3e72b5SJason Gunthorpe #include <linux/list.h> 220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 230f3e72b5SJason Gunthorpe #include <linux/module.h> 240f3e72b5SJason Gunthorpe #include <linux/mutex.h> 250f3e72b5SJason Gunthorpe #include <linux/pci.h> 260f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 270f3e72b5SJason Gunthorpe #include <linux/sched.h> 280f3e72b5SJason Gunthorpe #include <linux/slab.h> 290f3e72b5SJason Gunthorpe #include <linux/stat.h> 300f3e72b5SJason Gunthorpe #include <linux/string.h> 310f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 320f3e72b5SJason Gunthorpe #include <linux/vfio.h> 330f3e72b5SJason Gunthorpe #include <linux/wait.h> 340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 358e5c6995SAbhishek Sahu #include <linux/pm_runtime.h> 3680c4b92aSYishai Hadas #include <linux/interval_tree.h> 3780c4b92aSYishai Hadas #include <linux/iova_bitmap.h> 380f3e72b5SJason Gunthorpe #include "vfio.h" 390f3e72b5SJason Gunthorpe 400f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 410f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 420f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 430f3e72b5SJason Gunthorpe 440f3e72b5SJason Gunthorpe static struct vfio { 450f3e72b5SJason Gunthorpe struct class *class; 460f3e72b5SJason Gunthorpe struct list_head group_list; 470f3e72b5SJason Gunthorpe struct mutex group_lock; /* locks group_list */ 480f3e72b5SJason Gunthorpe struct ida group_ida; 490f3e72b5SJason Gunthorpe dev_t group_devt; 503c28a761SYi Liu struct class *device_class; 513c28a761SYi Liu struct ida device_ida; 520f3e72b5SJason Gunthorpe } vfio; 530f3e72b5SJason Gunthorpe 540f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 550f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops; 560f3e72b5SJason Gunthorpe 570f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 580f3e72b5SJason Gunthorpe { 590f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 600f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 610f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 620f3e72b5SJason Gunthorpe 630f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 640f3e72b5SJason Gunthorpe return -EINVAL; 650f3e72b5SJason Gunthorpe 660f3e72b5SJason Gunthorpe /* 670f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 680f3e72b5SJason Gunthorpe */ 690f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 700f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 710f3e72b5SJason Gunthorpe if (dev_set) 720f3e72b5SJason Gunthorpe goto found_get_ref; 730f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 740f3e72b5SJason Gunthorpe 750f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 760f3e72b5SJason Gunthorpe if (!new_dev_set) 770f3e72b5SJason Gunthorpe return -ENOMEM; 780f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 790f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 800f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 810f3e72b5SJason Gunthorpe 820f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 830f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 840f3e72b5SJason Gunthorpe GFP_KERNEL); 850f3e72b5SJason Gunthorpe if (!dev_set) { 860f3e72b5SJason Gunthorpe dev_set = new_dev_set; 870f3e72b5SJason Gunthorpe goto found_get_ref; 880f3e72b5SJason Gunthorpe } 890f3e72b5SJason Gunthorpe 900f3e72b5SJason Gunthorpe kfree(new_dev_set); 910f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 920f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 930f3e72b5SJason Gunthorpe return xa_err(dev_set); 940f3e72b5SJason Gunthorpe } 950f3e72b5SJason Gunthorpe 960f3e72b5SJason Gunthorpe found_get_ref: 970f3e72b5SJason Gunthorpe dev_set->device_count++; 980f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 990f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1000f3e72b5SJason Gunthorpe device->dev_set = dev_set; 1010f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 1020f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1030f3e72b5SJason Gunthorpe return 0; 1040f3e72b5SJason Gunthorpe } 1050f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 1060f3e72b5SJason Gunthorpe 1070f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 1080f3e72b5SJason Gunthorpe { 1090f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 1100f3e72b5SJason Gunthorpe 1110f3e72b5SJason Gunthorpe if (!dev_set) 1120f3e72b5SJason Gunthorpe return; 1130f3e72b5SJason Gunthorpe 1140f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1150f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 1160f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1170f3e72b5SJason Gunthorpe 1180f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1190f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 1200f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 1210f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 1220f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 1230f3e72b5SJason Gunthorpe kfree(dev_set); 1240f3e72b5SJason Gunthorpe } 1250f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1260f3e72b5SJason Gunthorpe } 1270f3e72b5SJason Gunthorpe 1280f3e72b5SJason Gunthorpe /* 1290f3e72b5SJason Gunthorpe * Group objects - create, release, get, put, search 1300f3e72b5SJason Gunthorpe */ 1310f3e72b5SJason Gunthorpe static struct vfio_group * 1320f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 1330f3e72b5SJason Gunthorpe { 1340f3e72b5SJason Gunthorpe struct vfio_group *group; 1350f3e72b5SJason Gunthorpe 1363dd59a7dSJason Gunthorpe /* 1373dd59a7dSJason Gunthorpe * group->iommu_group from the vfio.group_list cannot be NULL 1383dd59a7dSJason Gunthorpe * under the vfio.group_lock. 1393dd59a7dSJason Gunthorpe */ 1400f3e72b5SJason Gunthorpe list_for_each_entry(group, &vfio.group_list, vfio_next) { 1410f3e72b5SJason Gunthorpe if (group->iommu_group == iommu_group) { 142ca5f21b2SJason Gunthorpe refcount_inc(&group->drivers); 1430f3e72b5SJason Gunthorpe return group; 1440f3e72b5SJason Gunthorpe } 1450f3e72b5SJason Gunthorpe } 1460f3e72b5SJason Gunthorpe return NULL; 1470f3e72b5SJason Gunthorpe } 1480f3e72b5SJason Gunthorpe 1490f3e72b5SJason Gunthorpe static struct vfio_group * 1500f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group) 1510f3e72b5SJason Gunthorpe { 1520f3e72b5SJason Gunthorpe struct vfio_group *group; 1530f3e72b5SJason Gunthorpe 1540f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 1550f3e72b5SJason Gunthorpe group = __vfio_group_get_from_iommu(iommu_group); 1560f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 1570f3e72b5SJason Gunthorpe return group; 1580f3e72b5SJason Gunthorpe } 1590f3e72b5SJason Gunthorpe 1600f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev) 1610f3e72b5SJason Gunthorpe { 1620f3e72b5SJason Gunthorpe struct vfio_group *group = container_of(dev, struct vfio_group, dev); 1630f3e72b5SJason Gunthorpe 1640f3e72b5SJason Gunthorpe mutex_destroy(&group->device_lock); 165c82e81abSJason Gunthorpe mutex_destroy(&group->group_lock); 1663dd59a7dSJason Gunthorpe WARN_ON(group->iommu_group); 1670f3e72b5SJason Gunthorpe ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 1680f3e72b5SJason Gunthorpe kfree(group); 1690f3e72b5SJason Gunthorpe } 1700f3e72b5SJason Gunthorpe 1710f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 1720f3e72b5SJason Gunthorpe enum vfio_group_type type) 1730f3e72b5SJason Gunthorpe { 1740f3e72b5SJason Gunthorpe struct vfio_group *group; 1750f3e72b5SJason Gunthorpe int minor; 1760f3e72b5SJason Gunthorpe 1770f3e72b5SJason Gunthorpe group = kzalloc(sizeof(*group), GFP_KERNEL); 1780f3e72b5SJason Gunthorpe if (!group) 1790f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 1800f3e72b5SJason Gunthorpe 1810f3e72b5SJason Gunthorpe minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 1820f3e72b5SJason Gunthorpe if (minor < 0) { 1830f3e72b5SJason Gunthorpe kfree(group); 1840f3e72b5SJason Gunthorpe return ERR_PTR(minor); 1850f3e72b5SJason Gunthorpe } 1860f3e72b5SJason Gunthorpe 1870f3e72b5SJason Gunthorpe device_initialize(&group->dev); 1880f3e72b5SJason Gunthorpe group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 1890f3e72b5SJason Gunthorpe group->dev.class = vfio.class; 1900f3e72b5SJason Gunthorpe group->dev.release = vfio_group_release; 1910f3e72b5SJason Gunthorpe cdev_init(&group->cdev, &vfio_group_fops); 1920f3e72b5SJason Gunthorpe group->cdev.owner = THIS_MODULE; 1930f3e72b5SJason Gunthorpe 194ca5f21b2SJason Gunthorpe refcount_set(&group->drivers, 1); 195c82e81abSJason Gunthorpe mutex_init(&group->group_lock); 1960f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&group->device_list); 1970f3e72b5SJason Gunthorpe mutex_init(&group->device_lock); 1980f3e72b5SJason Gunthorpe group->iommu_group = iommu_group; 1990f3e72b5SJason Gunthorpe /* put in vfio_group_release() */ 2000f3e72b5SJason Gunthorpe iommu_group_ref_get(iommu_group); 2010f3e72b5SJason Gunthorpe group->type = type; 2020f3e72b5SJason Gunthorpe BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 2030f3e72b5SJason Gunthorpe 2040f3e72b5SJason Gunthorpe return group; 2050f3e72b5SJason Gunthorpe } 2060f3e72b5SJason Gunthorpe 2070f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 2080f3e72b5SJason Gunthorpe enum vfio_group_type type) 2090f3e72b5SJason Gunthorpe { 2100f3e72b5SJason Gunthorpe struct vfio_group *group; 2110f3e72b5SJason Gunthorpe struct vfio_group *ret; 2120f3e72b5SJason Gunthorpe int err; 2130f3e72b5SJason Gunthorpe 2140f3e72b5SJason Gunthorpe group = vfio_group_alloc(iommu_group, type); 2150f3e72b5SJason Gunthorpe if (IS_ERR(group)) 2160f3e72b5SJason Gunthorpe return group; 2170f3e72b5SJason Gunthorpe 2180f3e72b5SJason Gunthorpe err = dev_set_name(&group->dev, "%s%d", 2190f3e72b5SJason Gunthorpe group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 2200f3e72b5SJason Gunthorpe iommu_group_id(iommu_group)); 2210f3e72b5SJason Gunthorpe if (err) { 2220f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 2230f3e72b5SJason Gunthorpe goto err_put; 2240f3e72b5SJason Gunthorpe } 2250f3e72b5SJason Gunthorpe 2260f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 2270f3e72b5SJason Gunthorpe 2280f3e72b5SJason Gunthorpe /* Did we race creating this group? */ 2290f3e72b5SJason Gunthorpe ret = __vfio_group_get_from_iommu(iommu_group); 2300f3e72b5SJason Gunthorpe if (ret) 2310f3e72b5SJason Gunthorpe goto err_unlock; 2320f3e72b5SJason Gunthorpe 2330f3e72b5SJason Gunthorpe err = cdev_device_add(&group->cdev, &group->dev); 2340f3e72b5SJason Gunthorpe if (err) { 2350f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 2360f3e72b5SJason Gunthorpe goto err_unlock; 2370f3e72b5SJason Gunthorpe } 2380f3e72b5SJason Gunthorpe 2390f3e72b5SJason Gunthorpe list_add(&group->vfio_next, &vfio.group_list); 2400f3e72b5SJason Gunthorpe 2410f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 2420f3e72b5SJason Gunthorpe return group; 2430f3e72b5SJason Gunthorpe 2440f3e72b5SJason Gunthorpe err_unlock: 2450f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 2460f3e72b5SJason Gunthorpe err_put: 2470f3e72b5SJason Gunthorpe put_device(&group->dev); 2480f3e72b5SJason Gunthorpe return ret; 2490f3e72b5SJason Gunthorpe } 2500f3e72b5SJason Gunthorpe 251ca5f21b2SJason Gunthorpe static void vfio_device_remove_group(struct vfio_device *device) 252ca5f21b2SJason Gunthorpe { 253ca5f21b2SJason Gunthorpe struct vfio_group *group = device->group; 2543dd59a7dSJason Gunthorpe struct iommu_group *iommu_group; 255ca5f21b2SJason Gunthorpe 256ca5f21b2SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 257ca5f21b2SJason Gunthorpe iommu_group_remove_device(device->dev); 258ca5f21b2SJason Gunthorpe 259ca5f21b2SJason Gunthorpe /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ 260ca5f21b2SJason Gunthorpe if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) 2610f3e72b5SJason Gunthorpe return; 262ca5f21b2SJason Gunthorpe list_del(&group->vfio_next); 263ca5f21b2SJason Gunthorpe 264ca5f21b2SJason Gunthorpe /* 265ca5f21b2SJason Gunthorpe * We could concurrently probe another driver in the group that might 266ca5f21b2SJason Gunthorpe * race vfio_device_remove_group() with vfio_get_group(), so we have to 267ca5f21b2SJason Gunthorpe * ensure that the sysfs is all cleaned up under lock otherwise the 268ca5f21b2SJason Gunthorpe * cdev_device_add() will fail due to the name aready existing. 269ca5f21b2SJason Gunthorpe */ 270ca5f21b2SJason Gunthorpe cdev_device_del(&group->cdev, &group->dev); 271ca5f21b2SJason Gunthorpe 2723dd59a7dSJason Gunthorpe mutex_lock(&group->group_lock); 2730f3e72b5SJason Gunthorpe /* 2740f3e72b5SJason Gunthorpe * These data structures all have paired operations that can only be 2753dd59a7dSJason Gunthorpe * undone when the caller holds a live reference on the device. Since 2763dd59a7dSJason Gunthorpe * all pairs must be undone these WARN_ON's indicate some caller did not 2770f3e72b5SJason Gunthorpe * properly hold the group reference. 2780f3e72b5SJason Gunthorpe */ 2790f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&group->device_list)); 2800f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 2810f3e72b5SJason Gunthorpe 2823dd59a7dSJason Gunthorpe /* 2833dd59a7dSJason Gunthorpe * Revoke all users of group->iommu_group. At this point we know there 2843dd59a7dSJason Gunthorpe * are no devices active because we are unplugging the last one. Setting 2853dd59a7dSJason Gunthorpe * iommu_group to NULL blocks all new users. 2863dd59a7dSJason Gunthorpe */ 2873dd59a7dSJason Gunthorpe if (group->container) 2883dd59a7dSJason Gunthorpe vfio_group_detach_container(group); 2893dd59a7dSJason Gunthorpe iommu_group = group->iommu_group; 2903dd59a7dSJason Gunthorpe group->iommu_group = NULL; 2913dd59a7dSJason Gunthorpe mutex_unlock(&group->group_lock); 2923dd59a7dSJason Gunthorpe mutex_unlock(&vfio.group_lock); 2933dd59a7dSJason Gunthorpe 2943dd59a7dSJason Gunthorpe iommu_group_put(iommu_group); 2950f3e72b5SJason Gunthorpe put_device(&group->dev); 2960f3e72b5SJason Gunthorpe } 2970f3e72b5SJason Gunthorpe 2980f3e72b5SJason Gunthorpe /* 2990f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 3000f3e72b5SJason Gunthorpe */ 3010f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 3024a725b8dSKevin Tian static void vfio_device_put_registration(struct vfio_device *device) 3030f3e72b5SJason Gunthorpe { 3040f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 3050f3e72b5SJason Gunthorpe complete(&device->comp); 3060f3e72b5SJason Gunthorpe } 3070f3e72b5SJason Gunthorpe 3084a725b8dSKevin Tian static bool vfio_device_try_get_registration(struct vfio_device *device) 3090f3e72b5SJason Gunthorpe { 3100f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 3110f3e72b5SJason Gunthorpe } 3120f3e72b5SJason Gunthorpe 3130f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 3140f3e72b5SJason Gunthorpe struct device *dev) 3150f3e72b5SJason Gunthorpe { 3160f3e72b5SJason Gunthorpe struct vfio_device *device; 3170f3e72b5SJason Gunthorpe 3180f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 3190f3e72b5SJason Gunthorpe list_for_each_entry(device, &group->device_list, group_next) { 3204a725b8dSKevin Tian if (device->dev == dev && 3214a725b8dSKevin Tian vfio_device_try_get_registration(device)) { 3220f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 3230f3e72b5SJason Gunthorpe return device; 3240f3e72b5SJason Gunthorpe } 3250f3e72b5SJason Gunthorpe } 3260f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 3270f3e72b5SJason Gunthorpe return NULL; 3280f3e72b5SJason Gunthorpe } 3290f3e72b5SJason Gunthorpe 3300f3e72b5SJason Gunthorpe /* 3310f3e72b5SJason Gunthorpe * VFIO driver API 3320f3e72b5SJason Gunthorpe */ 333cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */ 3343c28a761SYi Liu static void vfio_device_release(struct device *dev) 335cb9ff3f3SKevin Tian { 336cb9ff3f3SKevin Tian struct vfio_device *device = 3373c28a761SYi Liu container_of(dev, struct vfio_device, device); 338cb9ff3f3SKevin Tian 339ebb72b76SKevin Tian vfio_release_device_set(device); 3403c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 341cb9ff3f3SKevin Tian 342cb9ff3f3SKevin Tian /* 343cb9ff3f3SKevin Tian * kvfree() cannot be done here due to a life cycle mess in 344cb9ff3f3SKevin Tian * vfio-ccw. Before the ccw part is fixed all drivers are 345cb9ff3f3SKevin Tian * required to support @release and call vfio_free_device() 346cb9ff3f3SKevin Tian * from there. 347cb9ff3f3SKevin Tian */ 348cb9ff3f3SKevin Tian device->ops->release(device); 349cb9ff3f3SKevin Tian } 350cb9ff3f3SKevin Tian 351cb9ff3f3SKevin Tian /* 352cb9ff3f3SKevin Tian * Allocate and initialize vfio_device so it can be registered to vfio 353cb9ff3f3SKevin Tian * core. 354cb9ff3f3SKevin Tian * 355cb9ff3f3SKevin Tian * Drivers should use the wrapper vfio_alloc_device() for allocation. 356cb9ff3f3SKevin Tian * @size is the size of the structure to be allocated, including any 357cb9ff3f3SKevin Tian * private data used by the driver. 358cb9ff3f3SKevin Tian * 359cb9ff3f3SKevin Tian * Driver may provide an @init callback to cover device private data. 360cb9ff3f3SKevin Tian * 361cb9ff3f3SKevin Tian * Use vfio_put_device() to release the structure after success return. 362cb9ff3f3SKevin Tian */ 363cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, 364cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 365cb9ff3f3SKevin Tian { 366cb9ff3f3SKevin Tian struct vfio_device *device; 367cb9ff3f3SKevin Tian int ret; 368cb9ff3f3SKevin Tian 369cb9ff3f3SKevin Tian if (WARN_ON(size < sizeof(struct vfio_device))) 370cb9ff3f3SKevin Tian return ERR_PTR(-EINVAL); 371cb9ff3f3SKevin Tian 372cb9ff3f3SKevin Tian device = kvzalloc(size, GFP_KERNEL); 373cb9ff3f3SKevin Tian if (!device) 374cb9ff3f3SKevin Tian return ERR_PTR(-ENOMEM); 375cb9ff3f3SKevin Tian 376cb9ff3f3SKevin Tian ret = vfio_init_device(device, dev, ops); 377cb9ff3f3SKevin Tian if (ret) 378cb9ff3f3SKevin Tian goto out_free; 379cb9ff3f3SKevin Tian return device; 380cb9ff3f3SKevin Tian 381cb9ff3f3SKevin Tian out_free: 382cb9ff3f3SKevin Tian kvfree(device); 383cb9ff3f3SKevin Tian return ERR_PTR(ret); 384cb9ff3f3SKevin Tian } 385cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device); 386cb9ff3f3SKevin Tian 387cb9ff3f3SKevin Tian /* 388cb9ff3f3SKevin Tian * Initialize a vfio_device so it can be registered to vfio core. 389cb9ff3f3SKevin Tian * 390cb9ff3f3SKevin Tian * Only vfio-ccw driver should call this interface. 391cb9ff3f3SKevin Tian */ 392cb9ff3f3SKevin Tian int vfio_init_device(struct vfio_device *device, struct device *dev, 393cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 394cb9ff3f3SKevin Tian { 395cb9ff3f3SKevin Tian int ret; 396cb9ff3f3SKevin Tian 3973c28a761SYi Liu ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); 3983c28a761SYi Liu if (ret < 0) { 3993c28a761SYi Liu dev_dbg(dev, "Error to alloc index\n"); 4003c28a761SYi Liu return ret; 4013c28a761SYi Liu } 4023c28a761SYi Liu 4033c28a761SYi Liu device->index = ret; 404ebb72b76SKevin Tian init_completion(&device->comp); 405ebb72b76SKevin Tian device->dev = dev; 406ebb72b76SKevin Tian device->ops = ops; 407cb9ff3f3SKevin Tian 408cb9ff3f3SKevin Tian if (ops->init) { 409cb9ff3f3SKevin Tian ret = ops->init(device); 410cb9ff3f3SKevin Tian if (ret) 411cb9ff3f3SKevin Tian goto out_uninit; 412cb9ff3f3SKevin Tian } 413cb9ff3f3SKevin Tian 4143c28a761SYi Liu device_initialize(&device->device); 4153c28a761SYi Liu device->device.release = vfio_device_release; 4163c28a761SYi Liu device->device.class = vfio.device_class; 4173c28a761SYi Liu device->device.parent = device->dev; 418cb9ff3f3SKevin Tian return 0; 419cb9ff3f3SKevin Tian 420cb9ff3f3SKevin Tian out_uninit: 421ebb72b76SKevin Tian vfio_release_device_set(device); 4223c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 423cb9ff3f3SKevin Tian return ret; 424cb9ff3f3SKevin Tian } 425cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_init_device); 426cb9ff3f3SKevin Tian 427cb9ff3f3SKevin Tian /* 428cb9ff3f3SKevin Tian * The helper called by driver @release callback to free the device 429cb9ff3f3SKevin Tian * structure. Drivers which don't have private data to clean can 430cb9ff3f3SKevin Tian * simply use this helper as its @release. 431cb9ff3f3SKevin Tian */ 432cb9ff3f3SKevin Tian void vfio_free_device(struct vfio_device *device) 433cb9ff3f3SKevin Tian { 434cb9ff3f3SKevin Tian kvfree(device); 435cb9ff3f3SKevin Tian } 436cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_free_device); 437cb9ff3f3SKevin Tian 4380f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 4390f3e72b5SJason Gunthorpe enum vfio_group_type type) 4400f3e72b5SJason Gunthorpe { 4410f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 4420f3e72b5SJason Gunthorpe struct vfio_group *group; 4430f3e72b5SJason Gunthorpe int ret; 4440f3e72b5SJason Gunthorpe 4450f3e72b5SJason Gunthorpe iommu_group = iommu_group_alloc(); 4460f3e72b5SJason Gunthorpe if (IS_ERR(iommu_group)) 4470f3e72b5SJason Gunthorpe return ERR_CAST(iommu_group); 4480f3e72b5SJason Gunthorpe 4490f3e72b5SJason Gunthorpe ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 4500f3e72b5SJason Gunthorpe if (ret) 4510f3e72b5SJason Gunthorpe goto out_put_group; 4520f3e72b5SJason Gunthorpe ret = iommu_group_add_device(iommu_group, dev); 4530f3e72b5SJason Gunthorpe if (ret) 4540f3e72b5SJason Gunthorpe goto out_put_group; 4550f3e72b5SJason Gunthorpe 4560f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, type); 4570f3e72b5SJason Gunthorpe if (IS_ERR(group)) { 4580f3e72b5SJason Gunthorpe ret = PTR_ERR(group); 4590f3e72b5SJason Gunthorpe goto out_remove_device; 4600f3e72b5SJason Gunthorpe } 4610f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 4620f3e72b5SJason Gunthorpe return group; 4630f3e72b5SJason Gunthorpe 4640f3e72b5SJason Gunthorpe out_remove_device: 4650f3e72b5SJason Gunthorpe iommu_group_remove_device(dev); 4660f3e72b5SJason Gunthorpe out_put_group: 4670f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 4680f3e72b5SJason Gunthorpe return ERR_PTR(ret); 4690f3e72b5SJason Gunthorpe } 4700f3e72b5SJason Gunthorpe 4710f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 4720f3e72b5SJason Gunthorpe { 4730f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 4740f3e72b5SJason Gunthorpe struct vfio_group *group; 4750f3e72b5SJason Gunthorpe 4760f3e72b5SJason Gunthorpe iommu_group = iommu_group_get(dev); 477444d43ecSJason Gunthorpe if (!iommu_group && vfio_noiommu) { 4780f3e72b5SJason Gunthorpe /* 4790f3e72b5SJason Gunthorpe * With noiommu enabled, create an IOMMU group for devices that 4800f3e72b5SJason Gunthorpe * don't already have one, implying no IOMMU hardware/driver 4810f3e72b5SJason Gunthorpe * exists. Taint the kernel because we're about to give a DMA 4820f3e72b5SJason Gunthorpe * capable device to a user without IOMMU protection. 4830f3e72b5SJason Gunthorpe */ 4840f3e72b5SJason Gunthorpe group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 4850f3e72b5SJason Gunthorpe if (!IS_ERR(group)) { 4860f3e72b5SJason Gunthorpe add_taint(TAINT_USER, LOCKDEP_STILL_OK); 4870f3e72b5SJason Gunthorpe dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 4880f3e72b5SJason Gunthorpe } 4890f3e72b5SJason Gunthorpe return group; 4900f3e72b5SJason Gunthorpe } 491444d43ecSJason Gunthorpe 4920f3e72b5SJason Gunthorpe if (!iommu_group) 4930f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 4940f3e72b5SJason Gunthorpe 4950f3e72b5SJason Gunthorpe /* 4960f3e72b5SJason Gunthorpe * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 4970f3e72b5SJason Gunthorpe * restore cache coherency. It has to be checked here because it is only 4980f3e72b5SJason Gunthorpe * valid for cases where we are using iommu groups. 4990f3e72b5SJason Gunthorpe */ 5000f3e72b5SJason Gunthorpe if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 5010f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5020f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 5030f3e72b5SJason Gunthorpe } 5040f3e72b5SJason Gunthorpe 5050f3e72b5SJason Gunthorpe group = vfio_group_get_from_iommu(iommu_group); 5060f3e72b5SJason Gunthorpe if (!group) 5070f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, VFIO_IOMMU); 5080f3e72b5SJason Gunthorpe 5090f3e72b5SJason Gunthorpe /* The vfio_group holds a reference to the iommu_group */ 5100f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5110f3e72b5SJason Gunthorpe return group; 5120f3e72b5SJason Gunthorpe } 5130f3e72b5SJason Gunthorpe 5140f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device, 5150f3e72b5SJason Gunthorpe struct vfio_group *group) 5160f3e72b5SJason Gunthorpe { 5170f3e72b5SJason Gunthorpe struct vfio_device *existing_device; 5183c28a761SYi Liu int ret; 5190f3e72b5SJason Gunthorpe 520ca5f21b2SJason Gunthorpe /* 521ca5f21b2SJason Gunthorpe * In all cases group is the output of one of the group allocation 522ca5f21b2SJason Gunthorpe * functions and we have group->drivers incremented for us. 523ca5f21b2SJason Gunthorpe */ 5240f3e72b5SJason Gunthorpe if (IS_ERR(group)) 5250f3e72b5SJason Gunthorpe return PTR_ERR(group); 5260f3e72b5SJason Gunthorpe 5270f3e72b5SJason Gunthorpe /* 5280f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 5290f3e72b5SJason Gunthorpe * singleton set just for itself. 5300f3e72b5SJason Gunthorpe */ 5310f3e72b5SJason Gunthorpe if (!device->dev_set) 5320f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 5330f3e72b5SJason Gunthorpe 5340f3e72b5SJason Gunthorpe existing_device = vfio_group_get_device(group, device->dev); 5350f3e72b5SJason Gunthorpe if (existing_device) { 5363dd59a7dSJason Gunthorpe /* 5373dd59a7dSJason Gunthorpe * group->iommu_group is non-NULL because we hold the drivers 5383dd59a7dSJason Gunthorpe * refcount. 5393dd59a7dSJason Gunthorpe */ 5400f3e72b5SJason Gunthorpe dev_WARN(device->dev, "Device already exists on group %d\n", 5410f3e72b5SJason Gunthorpe iommu_group_id(group->iommu_group)); 5424a725b8dSKevin Tian vfio_device_put_registration(existing_device); 5433c28a761SYi Liu ret = -EBUSY; 5443c28a761SYi Liu goto err_out; 5450f3e72b5SJason Gunthorpe } 5460f3e72b5SJason Gunthorpe 5470f3e72b5SJason Gunthorpe /* Our reference on group is moved to the device */ 5480f3e72b5SJason Gunthorpe device->group = group; 5490f3e72b5SJason Gunthorpe 5503c28a761SYi Liu ret = dev_set_name(&device->device, "vfio%d", device->index); 5513c28a761SYi Liu if (ret) 5523c28a761SYi Liu goto err_out; 5533c28a761SYi Liu 5543c28a761SYi Liu ret = device_add(&device->device); 5553c28a761SYi Liu if (ret) 5563c28a761SYi Liu goto err_out; 5573c28a761SYi Liu 5580f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 5590f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 5600f3e72b5SJason Gunthorpe 5610f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 5620f3e72b5SJason Gunthorpe list_add(&device->group_next, &group->device_list); 5630f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 5640f3e72b5SJason Gunthorpe 5650f3e72b5SJason Gunthorpe return 0; 5663c28a761SYi Liu err_out: 567ca5f21b2SJason Gunthorpe vfio_device_remove_group(device); 5683c28a761SYi Liu return ret; 5690f3e72b5SJason Gunthorpe } 5700f3e72b5SJason Gunthorpe 5710f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 5720f3e72b5SJason Gunthorpe { 5730f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 5740f3e72b5SJason Gunthorpe vfio_group_find_or_alloc(device->dev)); 5750f3e72b5SJason Gunthorpe } 5760f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 5770f3e72b5SJason Gunthorpe 5780f3e72b5SJason Gunthorpe /* 5790f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 5800f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 5810f3e72b5SJason Gunthorpe */ 5820f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 5830f3e72b5SJason Gunthorpe { 5840f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 5850f3e72b5SJason Gunthorpe vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 5860f3e72b5SJason Gunthorpe } 5870f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 5880f3e72b5SJason Gunthorpe 5890f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 5900f3e72b5SJason Gunthorpe char *buf) 5910f3e72b5SJason Gunthorpe { 5920f3e72b5SJason Gunthorpe struct vfio_device *it, *device = ERR_PTR(-ENODEV); 5930f3e72b5SJason Gunthorpe 5940f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 5950f3e72b5SJason Gunthorpe list_for_each_entry(it, &group->device_list, group_next) { 5960f3e72b5SJason Gunthorpe int ret; 5970f3e72b5SJason Gunthorpe 5980f3e72b5SJason Gunthorpe if (it->ops->match) { 5990f3e72b5SJason Gunthorpe ret = it->ops->match(it, buf); 6000f3e72b5SJason Gunthorpe if (ret < 0) { 6010f3e72b5SJason Gunthorpe device = ERR_PTR(ret); 6020f3e72b5SJason Gunthorpe break; 6030f3e72b5SJason Gunthorpe } 6040f3e72b5SJason Gunthorpe } else { 6050f3e72b5SJason Gunthorpe ret = !strcmp(dev_name(it->dev), buf); 6060f3e72b5SJason Gunthorpe } 6070f3e72b5SJason Gunthorpe 6084a725b8dSKevin Tian if (ret && vfio_device_try_get_registration(it)) { 6090f3e72b5SJason Gunthorpe device = it; 6100f3e72b5SJason Gunthorpe break; 6110f3e72b5SJason Gunthorpe } 6120f3e72b5SJason Gunthorpe } 6130f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6140f3e72b5SJason Gunthorpe 6150f3e72b5SJason Gunthorpe return device; 6160f3e72b5SJason Gunthorpe } 6170f3e72b5SJason Gunthorpe 6180f3e72b5SJason Gunthorpe /* 6190f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 6200f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 6210f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 6220f3e72b5SJason Gunthorpe { 6230f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 6240f3e72b5SJason Gunthorpe unsigned int i = 0; 6250f3e72b5SJason Gunthorpe bool interrupted = false; 6260f3e72b5SJason Gunthorpe long rc; 6270f3e72b5SJason Gunthorpe 6284a725b8dSKevin Tian vfio_device_put_registration(device); 6290f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 6300f3e72b5SJason Gunthorpe while (rc <= 0) { 6310f3e72b5SJason Gunthorpe if (device->ops->request) 6320f3e72b5SJason Gunthorpe device->ops->request(device, i++); 6330f3e72b5SJason Gunthorpe 6340f3e72b5SJason Gunthorpe if (interrupted) { 6350f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 6360f3e72b5SJason Gunthorpe HZ * 10); 6370f3e72b5SJason Gunthorpe } else { 6380f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 6390f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 6400f3e72b5SJason Gunthorpe if (rc < 0) { 6410f3e72b5SJason Gunthorpe interrupted = true; 6420f3e72b5SJason Gunthorpe dev_warn(device->dev, 6430f3e72b5SJason Gunthorpe "Device is currently in use, task" 6440f3e72b5SJason Gunthorpe " \"%s\" (%d) " 6450f3e72b5SJason Gunthorpe "blocked until device is released", 6460f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 6470f3e72b5SJason Gunthorpe } 6480f3e72b5SJason Gunthorpe } 6490f3e72b5SJason Gunthorpe } 6500f3e72b5SJason Gunthorpe 6510f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6520f3e72b5SJason Gunthorpe list_del(&device->group_next); 6530f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6540f3e72b5SJason Gunthorpe 6553c28a761SYi Liu /* Balances device_add in register path */ 6563c28a761SYi Liu device_del(&device->device); 6573c28a761SYi Liu 658ca5f21b2SJason Gunthorpe vfio_device_remove_group(device); 6590f3e72b5SJason Gunthorpe } 6600f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 6610f3e72b5SJason Gunthorpe 6620f3e72b5SJason Gunthorpe /* 6630f3e72b5SJason Gunthorpe * VFIO Group fd, /dev/vfio/$GROUP 6640f3e72b5SJason Gunthorpe */ 6650f3e72b5SJason Gunthorpe /* 6660f3e72b5SJason Gunthorpe * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 6670f3e72b5SJason Gunthorpe * if there was no container to unset. Since the ioctl is called on 6680f3e72b5SJason Gunthorpe * the group, we know that still exists, therefore the only valid 6690f3e72b5SJason Gunthorpe * transition here is 1->0. 6700f3e72b5SJason Gunthorpe */ 671b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group) 6720f3e72b5SJason Gunthorpe { 673b3b43590SJason Gunthorpe int ret = 0; 6740f3e72b5SJason Gunthorpe 675c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 676b3b43590SJason Gunthorpe if (!group->container) { 677b3b43590SJason Gunthorpe ret = -EINVAL; 678b3b43590SJason Gunthorpe goto out_unlock; 679b3b43590SJason Gunthorpe } 680b3b43590SJason Gunthorpe if (group->container_users != 1) { 681b3b43590SJason Gunthorpe ret = -EBUSY; 682b3b43590SJason Gunthorpe goto out_unlock; 683b3b43590SJason Gunthorpe } 684429a781cSJason Gunthorpe vfio_group_detach_container(group); 685b3b43590SJason Gunthorpe 686b3b43590SJason Gunthorpe out_unlock: 687c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 688b3b43590SJason Gunthorpe return ret; 6890f3e72b5SJason Gunthorpe } 6900f3e72b5SJason Gunthorpe 69103e650f6SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group, 69203e650f6SJason Gunthorpe int __user *arg) 69303e650f6SJason Gunthorpe { 69403e650f6SJason Gunthorpe struct vfio_container *container; 69503e650f6SJason Gunthorpe struct fd f; 69603e650f6SJason Gunthorpe int ret; 69703e650f6SJason Gunthorpe int fd; 69803e650f6SJason Gunthorpe 69903e650f6SJason Gunthorpe if (get_user(fd, arg)) 70003e650f6SJason Gunthorpe return -EFAULT; 70103e650f6SJason Gunthorpe 70203e650f6SJason Gunthorpe f = fdget(fd); 70303e650f6SJason Gunthorpe if (!f.file) 70403e650f6SJason Gunthorpe return -EBADF; 70503e650f6SJason Gunthorpe 706c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 70703e650f6SJason Gunthorpe if (group->container || WARN_ON(group->container_users)) { 70803e650f6SJason Gunthorpe ret = -EINVAL; 70903e650f6SJason Gunthorpe goto out_unlock; 71003e650f6SJason Gunthorpe } 7113dd59a7dSJason Gunthorpe if (!group->iommu_group) { 7123dd59a7dSJason Gunthorpe ret = -ENODEV; 7133dd59a7dSJason Gunthorpe goto out_unlock; 7143dd59a7dSJason Gunthorpe } 7153dd59a7dSJason Gunthorpe 71603e650f6SJason Gunthorpe container = vfio_container_from_file(f.file); 71703e650f6SJason Gunthorpe ret = -EINVAL; 71803e650f6SJason Gunthorpe if (container) { 71903e650f6SJason Gunthorpe ret = vfio_container_attach_group(container, group); 72003e650f6SJason Gunthorpe goto out_unlock; 72103e650f6SJason Gunthorpe } 72203e650f6SJason Gunthorpe 72303e650f6SJason Gunthorpe out_unlock: 724c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 7250f3e72b5SJason Gunthorpe fdput(f); 7260f3e72b5SJason Gunthorpe return ret; 7270f3e72b5SJason Gunthorpe } 7280f3e72b5SJason Gunthorpe 7290f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops; 7300f3e72b5SJason Gunthorpe 7310f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 732cdc71fe4SJason Gunthorpe bool vfio_assert_device_open(struct vfio_device *device) 7330f3e72b5SJason Gunthorpe { 7340f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 7350f3e72b5SJason Gunthorpe } 7360f3e72b5SJason Gunthorpe 7370f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device) 7380f3e72b5SJason Gunthorpe { 7390f3e72b5SJason Gunthorpe struct file *filep; 7400f3e72b5SJason Gunthorpe int ret; 7410f3e72b5SJason Gunthorpe 742c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 7430f3e72b5SJason Gunthorpe ret = vfio_device_assign_container(device); 744c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 7450f3e72b5SJason Gunthorpe if (ret) 7460f3e72b5SJason Gunthorpe return ERR_PTR(ret); 7470f3e72b5SJason Gunthorpe 7480f3e72b5SJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) { 7490f3e72b5SJason Gunthorpe ret = -ENODEV; 7500f3e72b5SJason Gunthorpe goto err_unassign_container; 7510f3e72b5SJason Gunthorpe } 7520f3e72b5SJason Gunthorpe 7530f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 7540f3e72b5SJason Gunthorpe device->open_count++; 7550f3e72b5SJason Gunthorpe if (device->open_count == 1) { 7560f3e72b5SJason Gunthorpe /* 7570f3e72b5SJason Gunthorpe * Here we pass the KVM pointer with the group under the read 7580f3e72b5SJason Gunthorpe * lock. If the device driver will use it, it must obtain a 7590f3e72b5SJason Gunthorpe * reference and release it during close_device. 7600f3e72b5SJason Gunthorpe */ 761c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 7620f3e72b5SJason Gunthorpe device->kvm = device->group->kvm; 7630f3e72b5SJason Gunthorpe 7640f3e72b5SJason Gunthorpe if (device->ops->open_device) { 7650f3e72b5SJason Gunthorpe ret = device->ops->open_device(device); 7660f3e72b5SJason Gunthorpe if (ret) 7670f3e72b5SJason Gunthorpe goto err_undo_count; 7680f3e72b5SJason Gunthorpe } 7699446162eSJason Gunthorpe vfio_device_container_register(device); 770c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 7710f3e72b5SJason Gunthorpe } 7720f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 7730f3e72b5SJason Gunthorpe 7740f3e72b5SJason Gunthorpe /* 7750f3e72b5SJason Gunthorpe * We can't use anon_inode_getfd() because we need to modify 7760f3e72b5SJason Gunthorpe * the f_mode flags directly to allow more than just ioctls 7770f3e72b5SJason Gunthorpe */ 7780f3e72b5SJason Gunthorpe filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 7790f3e72b5SJason Gunthorpe device, O_RDWR); 7800f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 7810f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 7820f3e72b5SJason Gunthorpe goto err_close_device; 7830f3e72b5SJason Gunthorpe } 7840f3e72b5SJason Gunthorpe 7850f3e72b5SJason Gunthorpe /* 7860f3e72b5SJason Gunthorpe * TODO: add an anon_inode interface to do this. 7870f3e72b5SJason Gunthorpe * Appears to be missing by lack of need rather than 7880f3e72b5SJason Gunthorpe * explicitly prevented. Now there's need. 7890f3e72b5SJason Gunthorpe */ 7900f3e72b5SJason Gunthorpe filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 7910f3e72b5SJason Gunthorpe 7920f3e72b5SJason Gunthorpe if (device->group->type == VFIO_NO_IOMMU) 7930f3e72b5SJason Gunthorpe dev_warn(device->dev, "vfio-noiommu device opened by user " 7940f3e72b5SJason Gunthorpe "(%s:%d)\n", current->comm, task_pid_nr(current)); 7950f3e72b5SJason Gunthorpe /* 7960f3e72b5SJason Gunthorpe * On success the ref of device is moved to the file and 7970f3e72b5SJason Gunthorpe * put in vfio_device_fops_release() 7980f3e72b5SJason Gunthorpe */ 7990f3e72b5SJason Gunthorpe return filep; 8000f3e72b5SJason Gunthorpe 8010f3e72b5SJason Gunthorpe err_close_device: 8020f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 803c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 8040f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) { 8050f3e72b5SJason Gunthorpe device->ops->close_device(device); 8060f3e72b5SJason Gunthorpe 8079446162eSJason Gunthorpe vfio_device_container_unregister(device); 8080f3e72b5SJason Gunthorpe } 8090f3e72b5SJason Gunthorpe err_undo_count: 810c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 8110f3e72b5SJason Gunthorpe device->open_count--; 8120f3e72b5SJason Gunthorpe if (device->open_count == 0 && device->kvm) 8130f3e72b5SJason Gunthorpe device->kvm = NULL; 8140f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 8150f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 8160f3e72b5SJason Gunthorpe err_unassign_container: 8170f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 8180f3e72b5SJason Gunthorpe return ERR_PTR(ret); 8190f3e72b5SJason Gunthorpe } 8200f3e72b5SJason Gunthorpe 821150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 822150ee2f9SJason Gunthorpe char __user *arg) 8230f3e72b5SJason Gunthorpe { 8240f3e72b5SJason Gunthorpe struct vfio_device *device; 8250f3e72b5SJason Gunthorpe struct file *filep; 826150ee2f9SJason Gunthorpe char *buf; 8270f3e72b5SJason Gunthorpe int fdno; 8280f3e72b5SJason Gunthorpe int ret; 8290f3e72b5SJason Gunthorpe 830150ee2f9SJason Gunthorpe buf = strndup_user(arg, PAGE_SIZE); 831150ee2f9SJason Gunthorpe if (IS_ERR(buf)) 832150ee2f9SJason Gunthorpe return PTR_ERR(buf); 833150ee2f9SJason Gunthorpe 8340f3e72b5SJason Gunthorpe device = vfio_device_get_from_name(group, buf); 835150ee2f9SJason Gunthorpe kfree(buf); 8360f3e72b5SJason Gunthorpe if (IS_ERR(device)) 8370f3e72b5SJason Gunthorpe return PTR_ERR(device); 8380f3e72b5SJason Gunthorpe 8390f3e72b5SJason Gunthorpe fdno = get_unused_fd_flags(O_CLOEXEC); 8400f3e72b5SJason Gunthorpe if (fdno < 0) { 8410f3e72b5SJason Gunthorpe ret = fdno; 8420f3e72b5SJason Gunthorpe goto err_put_device; 8430f3e72b5SJason Gunthorpe } 8440f3e72b5SJason Gunthorpe 8450f3e72b5SJason Gunthorpe filep = vfio_device_open(device); 8460f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 8470f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 8480f3e72b5SJason Gunthorpe goto err_put_fdno; 8490f3e72b5SJason Gunthorpe } 8500f3e72b5SJason Gunthorpe 8510f3e72b5SJason Gunthorpe fd_install(fdno, filep); 8520f3e72b5SJason Gunthorpe return fdno; 8530f3e72b5SJason Gunthorpe 8540f3e72b5SJason Gunthorpe err_put_fdno: 8550f3e72b5SJason Gunthorpe put_unused_fd(fdno); 8560f3e72b5SJason Gunthorpe err_put_device: 8574a725b8dSKevin Tian vfio_device_put_registration(device); 8580f3e72b5SJason Gunthorpe return ret; 8590f3e72b5SJason Gunthorpe } 8600f3e72b5SJason Gunthorpe 86199a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group, 86299a27c08SJason Gunthorpe struct vfio_group_status __user *arg) 8630f3e72b5SJason Gunthorpe { 86499a27c08SJason Gunthorpe unsigned long minsz = offsetofend(struct vfio_group_status, flags); 8650f3e72b5SJason Gunthorpe struct vfio_group_status status; 8660f3e72b5SJason Gunthorpe 86799a27c08SJason Gunthorpe if (copy_from_user(&status, arg, minsz)) 8680f3e72b5SJason Gunthorpe return -EFAULT; 8690f3e72b5SJason Gunthorpe 8700f3e72b5SJason Gunthorpe if (status.argsz < minsz) 8710f3e72b5SJason Gunthorpe return -EINVAL; 8720f3e72b5SJason Gunthorpe 8730f3e72b5SJason Gunthorpe status.flags = 0; 8740f3e72b5SJason Gunthorpe 875c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 8763dd59a7dSJason Gunthorpe if (!group->iommu_group) { 8773dd59a7dSJason Gunthorpe mutex_unlock(&group->group_lock); 8783dd59a7dSJason Gunthorpe return -ENODEV; 8793dd59a7dSJason Gunthorpe } 8803dd59a7dSJason Gunthorpe 8810f3e72b5SJason Gunthorpe if (group->container) 8820f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 8830f3e72b5SJason Gunthorpe VFIO_GROUP_FLAGS_VIABLE; 8840f3e72b5SJason Gunthorpe else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 8850f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_VIABLE; 886c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 8870f3e72b5SJason Gunthorpe 88899a27c08SJason Gunthorpe if (copy_to_user(arg, &status, minsz)) 8890f3e72b5SJason Gunthorpe return -EFAULT; 89099a27c08SJason Gunthorpe return 0; 8910f3e72b5SJason Gunthorpe } 89299a27c08SJason Gunthorpe 89399a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep, 89499a27c08SJason Gunthorpe unsigned int cmd, unsigned long arg) 89599a27c08SJason Gunthorpe { 89699a27c08SJason Gunthorpe struct vfio_group *group = filep->private_data; 89799a27c08SJason Gunthorpe void __user *uarg = (void __user *)arg; 89899a27c08SJason Gunthorpe 89999a27c08SJason Gunthorpe switch (cmd) { 90099a27c08SJason Gunthorpe case VFIO_GROUP_GET_DEVICE_FD: 90199a27c08SJason Gunthorpe return vfio_group_ioctl_get_device_fd(group, uarg); 90299a27c08SJason Gunthorpe case VFIO_GROUP_GET_STATUS: 90399a27c08SJason Gunthorpe return vfio_group_ioctl_get_status(group, uarg); 9040f3e72b5SJason Gunthorpe case VFIO_GROUP_SET_CONTAINER: 90567671f15SJason Gunthorpe return vfio_group_ioctl_set_container(group, uarg); 9060f3e72b5SJason Gunthorpe case VFIO_GROUP_UNSET_CONTAINER: 907b3b43590SJason Gunthorpe return vfio_group_ioctl_unset_container(group); 90899a27c08SJason Gunthorpe default: 90999a27c08SJason Gunthorpe return -ENOTTY; 9100f3e72b5SJason Gunthorpe } 9110f3e72b5SJason Gunthorpe } 9120f3e72b5SJason Gunthorpe 9130f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep) 9140f3e72b5SJason Gunthorpe { 9150f3e72b5SJason Gunthorpe struct vfio_group *group = 9160f3e72b5SJason Gunthorpe container_of(inode->i_cdev, struct vfio_group, cdev); 9170f3e72b5SJason Gunthorpe int ret; 9180f3e72b5SJason Gunthorpe 919c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 9200f3e72b5SJason Gunthorpe 921912b74d2SJason Gunthorpe /* 922912b74d2SJason Gunthorpe * drivers can be zero if this races with vfio_device_remove_group(), it 923912b74d2SJason Gunthorpe * will be stable at 0 under the group rwsem 924912b74d2SJason Gunthorpe */ 925912b74d2SJason Gunthorpe if (refcount_read(&group->drivers) == 0) { 9260f3e72b5SJason Gunthorpe ret = -ENODEV; 927912b74d2SJason Gunthorpe goto out_unlock; 9280f3e72b5SJason Gunthorpe } 9290f3e72b5SJason Gunthorpe 9300f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 9310f3e72b5SJason Gunthorpe ret = -EPERM; 932912b74d2SJason Gunthorpe goto out_unlock; 9330f3e72b5SJason Gunthorpe } 9340f3e72b5SJason Gunthorpe 9350f3e72b5SJason Gunthorpe /* 9360f3e72b5SJason Gunthorpe * Do we need multiple instances of the group open? Seems not. 9370f3e72b5SJason Gunthorpe */ 9380f3e72b5SJason Gunthorpe if (group->opened_file) { 9390f3e72b5SJason Gunthorpe ret = -EBUSY; 940912b74d2SJason Gunthorpe goto out_unlock; 9410f3e72b5SJason Gunthorpe } 9420f3e72b5SJason Gunthorpe group->opened_file = filep; 9430f3e72b5SJason Gunthorpe filep->private_data = group; 944912b74d2SJason Gunthorpe ret = 0; 945912b74d2SJason Gunthorpe out_unlock: 946c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 9470f3e72b5SJason Gunthorpe return ret; 9480f3e72b5SJason Gunthorpe } 9490f3e72b5SJason Gunthorpe 9500f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep) 9510f3e72b5SJason Gunthorpe { 9520f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 9530f3e72b5SJason Gunthorpe 9540f3e72b5SJason Gunthorpe filep->private_data = NULL; 9550f3e72b5SJason Gunthorpe 956c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 9570f3e72b5SJason Gunthorpe /* 9580f3e72b5SJason Gunthorpe * Device FDs hold a group file reference, therefore the group release 9590f3e72b5SJason Gunthorpe * is only called when there are no open devices. 9600f3e72b5SJason Gunthorpe */ 9610f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 962429a781cSJason Gunthorpe if (group->container) 963429a781cSJason Gunthorpe vfio_group_detach_container(group); 9640f3e72b5SJason Gunthorpe group->opened_file = NULL; 965c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 9660f3e72b5SJason Gunthorpe return 0; 9670f3e72b5SJason Gunthorpe } 9680f3e72b5SJason Gunthorpe 9690f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = { 9700f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 9710f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_group_fops_unl_ioctl, 9720f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 9730f3e72b5SJason Gunthorpe .open = vfio_group_fops_open, 9740f3e72b5SJason Gunthorpe .release = vfio_group_fops_release, 9750f3e72b5SJason Gunthorpe }; 9760f3e72b5SJason Gunthorpe 9770f3e72b5SJason Gunthorpe /* 9788e5c6995SAbhishek Sahu * Wrapper around pm_runtime_resume_and_get(). 9798e5c6995SAbhishek Sahu * Return error code on failure or 0 on success. 9808e5c6995SAbhishek Sahu */ 9818e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device) 9828e5c6995SAbhishek Sahu { 9838e5c6995SAbhishek Sahu struct device *dev = device->dev; 9848e5c6995SAbhishek Sahu 9858e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) { 9868e5c6995SAbhishek Sahu int ret; 9878e5c6995SAbhishek Sahu 9888e5c6995SAbhishek Sahu ret = pm_runtime_resume_and_get(dev); 9898e5c6995SAbhishek Sahu if (ret) { 9908e5c6995SAbhishek Sahu dev_info_ratelimited(dev, 9918e5c6995SAbhishek Sahu "vfio: runtime resume failed %d\n", ret); 9928e5c6995SAbhishek Sahu return -EIO; 9938e5c6995SAbhishek Sahu } 9948e5c6995SAbhishek Sahu } 9958e5c6995SAbhishek Sahu 9968e5c6995SAbhishek Sahu return 0; 9978e5c6995SAbhishek Sahu } 9988e5c6995SAbhishek Sahu 9998e5c6995SAbhishek Sahu /* 10008e5c6995SAbhishek Sahu * Wrapper around pm_runtime_put(). 10018e5c6995SAbhishek Sahu */ 10028e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device) 10038e5c6995SAbhishek Sahu { 10048e5c6995SAbhishek Sahu struct device *dev = device->dev; 10058e5c6995SAbhishek Sahu 10068e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) 10078e5c6995SAbhishek Sahu pm_runtime_put(dev); 10088e5c6995SAbhishek Sahu } 10098e5c6995SAbhishek Sahu 10108e5c6995SAbhishek Sahu /* 10110f3e72b5SJason Gunthorpe * VFIO Device fd 10120f3e72b5SJason Gunthorpe */ 10130f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 10140f3e72b5SJason Gunthorpe { 10150f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 10160f3e72b5SJason Gunthorpe 10170f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 10180f3e72b5SJason Gunthorpe vfio_assert_device_open(device); 1019c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 10200f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) 10210f3e72b5SJason Gunthorpe device->ops->close_device(device); 10220f3e72b5SJason Gunthorpe 10239446162eSJason Gunthorpe vfio_device_container_unregister(device); 1024c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 10250f3e72b5SJason Gunthorpe device->open_count--; 10260f3e72b5SJason Gunthorpe if (device->open_count == 0) 10270f3e72b5SJason Gunthorpe device->kvm = NULL; 10280f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 10290f3e72b5SJason Gunthorpe 10300f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 10310f3e72b5SJason Gunthorpe 10320f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 10330f3e72b5SJason Gunthorpe 10344a725b8dSKevin Tian vfio_device_put_registration(device); 10350f3e72b5SJason Gunthorpe 10360f3e72b5SJason Gunthorpe return 0; 10370f3e72b5SJason Gunthorpe } 10380f3e72b5SJason Gunthorpe 10390f3e72b5SJason Gunthorpe /* 10400f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 10410f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 10420f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 10430f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 10440f3e72b5SJason Gunthorpe * 10450f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 10460f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 10470f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 10480f3e72b5SJason Gunthorpe * 10490f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 10500f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 10510f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 10520f3e72b5SJason Gunthorpe * 10530f3e72b5SJason Gunthorpe */ 10540f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 10550f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 10560f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 10570f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 10580f3e72b5SJason Gunthorpe { 10590f3e72b5SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 10600f3e72b5SJason Gunthorpe /* 10610f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 10620f3e72b5SJason Gunthorpe * following FSM arcs: 10630f3e72b5SJason Gunthorpe * RESUMING -> STOP 10640f3e72b5SJason Gunthorpe * STOP -> RESUMING 10650f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 10660f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 10670f3e72b5SJason Gunthorpe * 10680f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 10690f3e72b5SJason Gunthorpe * arcs: 10700f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 10710f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 10720f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 10730f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 10740f3e72b5SJason Gunthorpe * Without P2P the driver must implement: 10750f3e72b5SJason Gunthorpe * RUNNING -> STOP 10760f3e72b5SJason Gunthorpe * STOP -> RUNNING 10770f3e72b5SJason Gunthorpe * 10780f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 10790f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 10800f3e72b5SJason Gunthorpe * following ones: 10810f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 10820f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 10830f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 10840f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 10850f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 10860f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 10870f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 10880f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 10890f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 10900f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 10910f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 10920f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 10930f3e72b5SJason Gunthorpe */ 10940f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 10950f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 10960f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 10970f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 10980f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 10990f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 11000f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 11010f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11020f3e72b5SJason Gunthorpe }, 11030f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 11040f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 11050f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 11060f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 11070f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 11080f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 11090f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11100f3e72b5SJason Gunthorpe }, 11110f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 11120f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 11130f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 11140f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 11150f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 11160f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 11170f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11180f3e72b5SJason Gunthorpe }, 11190f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 11200f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 11210f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 11220f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 11230f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 11240f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 11250f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11260f3e72b5SJason Gunthorpe }, 11270f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 11280f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 11290f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 11300f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 11310f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 11320f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 11330f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11340f3e72b5SJason Gunthorpe }, 11350f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 11360f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 11370f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 11380f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 11390f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 11400f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 11410f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11420f3e72b5SJason Gunthorpe }, 11430f3e72b5SJason Gunthorpe }; 11440f3e72b5SJason Gunthorpe 11450f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 11460f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 11470f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 11480f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 11490f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 11500f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 11510f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 11520f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 11530f3e72b5SJason Gunthorpe }; 11540f3e72b5SJason Gunthorpe 11550f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 11560f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 11570f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 11580f3e72b5SJason Gunthorpe return -EINVAL; 11590f3e72b5SJason Gunthorpe 11600f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 11610f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 11620f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 11630f3e72b5SJason Gunthorpe return -EINVAL; 11640f3e72b5SJason Gunthorpe 11650f3e72b5SJason Gunthorpe /* 11660f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 11670f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 11680f3e72b5SJason Gunthorpe * logical state, as per the above comment. 11690f3e72b5SJason Gunthorpe */ 11700f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 11710f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 11720f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 11730f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 11740f3e72b5SJason Gunthorpe 11750f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 11760f3e72b5SJason Gunthorpe } 11770f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 11780f3e72b5SJason Gunthorpe 11790f3e72b5SJason Gunthorpe /* 11800f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 11810f3e72b5SJason Gunthorpe */ 11820f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 11830f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 11840f3e72b5SJason Gunthorpe { 11850f3e72b5SJason Gunthorpe int ret; 11860f3e72b5SJason Gunthorpe int fd; 11870f3e72b5SJason Gunthorpe 11880f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 11890f3e72b5SJason Gunthorpe if (fd < 0) { 11900f3e72b5SJason Gunthorpe ret = fd; 11910f3e72b5SJason Gunthorpe goto out_fput; 11920f3e72b5SJason Gunthorpe } 11930f3e72b5SJason Gunthorpe 11940f3e72b5SJason Gunthorpe mig->data_fd = fd; 11950f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 11960f3e72b5SJason Gunthorpe ret = -EFAULT; 11970f3e72b5SJason Gunthorpe goto out_put_unused; 11980f3e72b5SJason Gunthorpe } 11990f3e72b5SJason Gunthorpe fd_install(fd, filp); 12000f3e72b5SJason Gunthorpe return 0; 12010f3e72b5SJason Gunthorpe 12020f3e72b5SJason Gunthorpe out_put_unused: 12030f3e72b5SJason Gunthorpe put_unused_fd(fd); 12040f3e72b5SJason Gunthorpe out_fput: 12050f3e72b5SJason Gunthorpe fput(filp); 12060f3e72b5SJason Gunthorpe return ret; 12070f3e72b5SJason Gunthorpe } 12080f3e72b5SJason Gunthorpe 12090f3e72b5SJason Gunthorpe static int 12100f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 12110f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 12120f3e72b5SJason Gunthorpe size_t argsz) 12130f3e72b5SJason Gunthorpe { 12140f3e72b5SJason Gunthorpe size_t minsz = 12150f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 12160f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 12170f3e72b5SJason Gunthorpe struct file *filp = NULL; 12180f3e72b5SJason Gunthorpe int ret; 12190f3e72b5SJason Gunthorpe 12200f3e72b5SJason Gunthorpe if (!device->mig_ops) 12210f3e72b5SJason Gunthorpe return -ENOTTY; 12220f3e72b5SJason Gunthorpe 12230f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 12240f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 12250f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 12260f3e72b5SJason Gunthorpe sizeof(mig)); 12270f3e72b5SJason Gunthorpe if (ret != 1) 12280f3e72b5SJason Gunthorpe return ret; 12290f3e72b5SJason Gunthorpe 12300f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 12310f3e72b5SJason Gunthorpe return -EFAULT; 12320f3e72b5SJason Gunthorpe 12330f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 12340f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 12350f3e72b5SJason Gunthorpe 12360f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 12370f3e72b5SJason Gunthorpe &curr_state); 12380f3e72b5SJason Gunthorpe if (ret) 12390f3e72b5SJason Gunthorpe return ret; 12400f3e72b5SJason Gunthorpe mig.device_state = curr_state; 12410f3e72b5SJason Gunthorpe goto out_copy; 12420f3e72b5SJason Gunthorpe } 12430f3e72b5SJason Gunthorpe 12440f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 12450f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 12460f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 12470f3e72b5SJason Gunthorpe goto out_copy; 12480f3e72b5SJason Gunthorpe 12490f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 12500f3e72b5SJason Gunthorpe out_copy: 12510f3e72b5SJason Gunthorpe mig.data_fd = -1; 12520f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 12530f3e72b5SJason Gunthorpe return -EFAULT; 12540f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 12550f3e72b5SJason Gunthorpe return PTR_ERR(filp); 12560f3e72b5SJason Gunthorpe return 0; 12570f3e72b5SJason Gunthorpe } 12580f3e72b5SJason Gunthorpe 12590f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 12600f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 12610f3e72b5SJason Gunthorpe size_t argsz) 12620f3e72b5SJason Gunthorpe { 12630f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 12640f3e72b5SJason Gunthorpe .flags = device->migration_flags, 12650f3e72b5SJason Gunthorpe }; 12660f3e72b5SJason Gunthorpe int ret; 12670f3e72b5SJason Gunthorpe 12680f3e72b5SJason Gunthorpe if (!device->mig_ops) 12690f3e72b5SJason Gunthorpe return -ENOTTY; 12700f3e72b5SJason Gunthorpe 12710f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 12720f3e72b5SJason Gunthorpe sizeof(mig)); 12730f3e72b5SJason Gunthorpe if (ret != 1) 12740f3e72b5SJason Gunthorpe return ret; 12750f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 12760f3e72b5SJason Gunthorpe return -EFAULT; 12770f3e72b5SJason Gunthorpe return 0; 12780f3e72b5SJason Gunthorpe } 12790f3e72b5SJason Gunthorpe 128080c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */ 128180c4b92aSYishai Hadas #define LOG_MAX_RANGES \ 128280c4b92aSYishai Hadas (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) 128380c4b92aSYishai Hadas 128480c4b92aSYishai Hadas static int 128580c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device, 128680c4b92aSYishai Hadas u32 flags, void __user *arg, 128780c4b92aSYishai Hadas size_t argsz) 128880c4b92aSYishai Hadas { 128980c4b92aSYishai Hadas size_t minsz = 129080c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_control, 129180c4b92aSYishai Hadas ranges); 129280c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range __user *ranges; 129380c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_control control; 129480c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range range; 129580c4b92aSYishai Hadas struct rb_root_cached root = RB_ROOT_CACHED; 129680c4b92aSYishai Hadas struct interval_tree_node *nodes; 129780c4b92aSYishai Hadas u64 iova_end; 129880c4b92aSYishai Hadas u32 nnodes; 129980c4b92aSYishai Hadas int i, ret; 130080c4b92aSYishai Hadas 130180c4b92aSYishai Hadas if (!device->log_ops) 130280c4b92aSYishai Hadas return -ENOTTY; 130380c4b92aSYishai Hadas 130480c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 130580c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 130680c4b92aSYishai Hadas sizeof(control)); 130780c4b92aSYishai Hadas if (ret != 1) 130880c4b92aSYishai Hadas return ret; 130980c4b92aSYishai Hadas 131080c4b92aSYishai Hadas if (copy_from_user(&control, arg, minsz)) 131180c4b92aSYishai Hadas return -EFAULT; 131280c4b92aSYishai Hadas 131380c4b92aSYishai Hadas nnodes = control.num_ranges; 131480c4b92aSYishai Hadas if (!nnodes) 131580c4b92aSYishai Hadas return -EINVAL; 131680c4b92aSYishai Hadas 131780c4b92aSYishai Hadas if (nnodes > LOG_MAX_RANGES) 131880c4b92aSYishai Hadas return -E2BIG; 131980c4b92aSYishai Hadas 132080c4b92aSYishai Hadas ranges = u64_to_user_ptr(control.ranges); 132180c4b92aSYishai Hadas nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), 132280c4b92aSYishai Hadas GFP_KERNEL); 132380c4b92aSYishai Hadas if (!nodes) 132480c4b92aSYishai Hadas return -ENOMEM; 132580c4b92aSYishai Hadas 132680c4b92aSYishai Hadas for (i = 0; i < nnodes; i++) { 132780c4b92aSYishai Hadas if (copy_from_user(&range, &ranges[i], sizeof(range))) { 132880c4b92aSYishai Hadas ret = -EFAULT; 132980c4b92aSYishai Hadas goto end; 133080c4b92aSYishai Hadas } 133180c4b92aSYishai Hadas if (!IS_ALIGNED(range.iova, control.page_size) || 133280c4b92aSYishai Hadas !IS_ALIGNED(range.length, control.page_size)) { 133380c4b92aSYishai Hadas ret = -EINVAL; 133480c4b92aSYishai Hadas goto end; 133580c4b92aSYishai Hadas } 133680c4b92aSYishai Hadas 133780c4b92aSYishai Hadas if (check_add_overflow(range.iova, range.length, &iova_end) || 133880c4b92aSYishai Hadas iova_end > ULONG_MAX) { 133980c4b92aSYishai Hadas ret = -EOVERFLOW; 134080c4b92aSYishai Hadas goto end; 134180c4b92aSYishai Hadas } 134280c4b92aSYishai Hadas 134380c4b92aSYishai Hadas nodes[i].start = range.iova; 134480c4b92aSYishai Hadas nodes[i].last = range.iova + range.length - 1; 134580c4b92aSYishai Hadas if (interval_tree_iter_first(&root, nodes[i].start, 134680c4b92aSYishai Hadas nodes[i].last)) { 134780c4b92aSYishai Hadas /* Range overlapping */ 134880c4b92aSYishai Hadas ret = -EINVAL; 134980c4b92aSYishai Hadas goto end; 135080c4b92aSYishai Hadas } 135180c4b92aSYishai Hadas interval_tree_insert(nodes + i, &root); 135280c4b92aSYishai Hadas } 135380c4b92aSYishai Hadas 135480c4b92aSYishai Hadas ret = device->log_ops->log_start(device, &root, nnodes, 135580c4b92aSYishai Hadas &control.page_size); 135680c4b92aSYishai Hadas if (ret) 135780c4b92aSYishai Hadas goto end; 135880c4b92aSYishai Hadas 135980c4b92aSYishai Hadas if (copy_to_user(arg, &control, sizeof(control))) { 136080c4b92aSYishai Hadas ret = -EFAULT; 136180c4b92aSYishai Hadas device->log_ops->log_stop(device); 136280c4b92aSYishai Hadas } 136380c4b92aSYishai Hadas 136480c4b92aSYishai Hadas end: 136580c4b92aSYishai Hadas kfree(nodes); 136680c4b92aSYishai Hadas return ret; 136780c4b92aSYishai Hadas } 136880c4b92aSYishai Hadas 136980c4b92aSYishai Hadas static int 137080c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, 137180c4b92aSYishai Hadas u32 flags, void __user *arg, 137280c4b92aSYishai Hadas size_t argsz) 137380c4b92aSYishai Hadas { 137480c4b92aSYishai Hadas int ret; 137580c4b92aSYishai Hadas 137680c4b92aSYishai Hadas if (!device->log_ops) 137780c4b92aSYishai Hadas return -ENOTTY; 137880c4b92aSYishai Hadas 137980c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 138080c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 0); 138180c4b92aSYishai Hadas if (ret != 1) 138280c4b92aSYishai Hadas return ret; 138380c4b92aSYishai Hadas 138480c4b92aSYishai Hadas return device->log_ops->log_stop(device); 138580c4b92aSYishai Hadas } 138680c4b92aSYishai Hadas 138780c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, 138880c4b92aSYishai Hadas unsigned long iova, size_t length, 138980c4b92aSYishai Hadas void *opaque) 139080c4b92aSYishai Hadas { 139180c4b92aSYishai Hadas struct vfio_device *device = opaque; 139280c4b92aSYishai Hadas 139380c4b92aSYishai Hadas return device->log_ops->log_read_and_clear(device, iova, length, iter); 139480c4b92aSYishai Hadas } 139580c4b92aSYishai Hadas 139680c4b92aSYishai Hadas static int 139780c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device, 139880c4b92aSYishai Hadas u32 flags, void __user *arg, 139980c4b92aSYishai Hadas size_t argsz) 140080c4b92aSYishai Hadas { 140180c4b92aSYishai Hadas size_t minsz = 140280c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_report, 140380c4b92aSYishai Hadas bitmap); 140480c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_report report; 140580c4b92aSYishai Hadas struct iova_bitmap *iter; 140680c4b92aSYishai Hadas u64 iova_end; 140780c4b92aSYishai Hadas int ret; 140880c4b92aSYishai Hadas 140980c4b92aSYishai Hadas if (!device->log_ops) 141080c4b92aSYishai Hadas return -ENOTTY; 141180c4b92aSYishai Hadas 141280c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 141380c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_GET, 141480c4b92aSYishai Hadas sizeof(report)); 141580c4b92aSYishai Hadas if (ret != 1) 141680c4b92aSYishai Hadas return ret; 141780c4b92aSYishai Hadas 141880c4b92aSYishai Hadas if (copy_from_user(&report, arg, minsz)) 141980c4b92aSYishai Hadas return -EFAULT; 142080c4b92aSYishai Hadas 142180c4b92aSYishai Hadas if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) 142280c4b92aSYishai Hadas return -EINVAL; 142380c4b92aSYishai Hadas 142480c4b92aSYishai Hadas if (check_add_overflow(report.iova, report.length, &iova_end) || 142580c4b92aSYishai Hadas iova_end > ULONG_MAX) 142680c4b92aSYishai Hadas return -EOVERFLOW; 142780c4b92aSYishai Hadas 142880c4b92aSYishai Hadas iter = iova_bitmap_alloc(report.iova, report.length, 142980c4b92aSYishai Hadas report.page_size, 143080c4b92aSYishai Hadas u64_to_user_ptr(report.bitmap)); 143180c4b92aSYishai Hadas if (IS_ERR(iter)) 143280c4b92aSYishai Hadas return PTR_ERR(iter); 143380c4b92aSYishai Hadas 143480c4b92aSYishai Hadas ret = iova_bitmap_for_each(iter, device, 143580c4b92aSYishai Hadas vfio_device_log_read_and_clear); 143680c4b92aSYishai Hadas 143780c4b92aSYishai Hadas iova_bitmap_free(iter); 143880c4b92aSYishai Hadas return ret; 143980c4b92aSYishai Hadas } 144080c4b92aSYishai Hadas 14410f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 14420f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 14430f3e72b5SJason Gunthorpe { 14440f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 14450f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 14460f3e72b5SJason Gunthorpe 14470f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 14480f3e72b5SJason Gunthorpe return -EFAULT; 14490f3e72b5SJason Gunthorpe 14500f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 14510f3e72b5SJason Gunthorpe return -EINVAL; 14520f3e72b5SJason Gunthorpe 14530f3e72b5SJason Gunthorpe /* Check unknown flags */ 14540f3e72b5SJason Gunthorpe if (feature.flags & 14550f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 14560f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 14570f3e72b5SJason Gunthorpe return -EINVAL; 14580f3e72b5SJason Gunthorpe 14590f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 14600f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 14610f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 14620f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 14630f3e72b5SJason Gunthorpe return -EINVAL; 14640f3e72b5SJason Gunthorpe 14650f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 14660f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 14670f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 14680f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 14690f3e72b5SJason Gunthorpe feature.argsz - minsz); 14700f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 14710f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 14720f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 14730f3e72b5SJason Gunthorpe feature.argsz - minsz); 147480c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: 147580c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_start( 147680c4b92aSYishai Hadas device, feature.flags, arg->data, 147780c4b92aSYishai Hadas feature.argsz - minsz); 147880c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: 147980c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_stop( 148080c4b92aSYishai Hadas device, feature.flags, arg->data, 148180c4b92aSYishai Hadas feature.argsz - minsz); 148280c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: 148380c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_report( 148480c4b92aSYishai Hadas device, feature.flags, arg->data, 148580c4b92aSYishai Hadas feature.argsz - minsz); 14860f3e72b5SJason Gunthorpe default: 14870f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 14880f3e72b5SJason Gunthorpe return -EINVAL; 14890f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 14900f3e72b5SJason Gunthorpe arg->data, 14910f3e72b5SJason Gunthorpe feature.argsz - minsz); 14920f3e72b5SJason Gunthorpe } 14930f3e72b5SJason Gunthorpe } 14940f3e72b5SJason Gunthorpe 14950f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 14960f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 14970f3e72b5SJason Gunthorpe { 14980f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 14998e5c6995SAbhishek Sahu int ret; 15008e5c6995SAbhishek Sahu 15018e5c6995SAbhishek Sahu ret = vfio_device_pm_runtime_get(device); 15028e5c6995SAbhishek Sahu if (ret) 15038e5c6995SAbhishek Sahu return ret; 15040f3e72b5SJason Gunthorpe 15050f3e72b5SJason Gunthorpe switch (cmd) { 15060f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 15078e5c6995SAbhishek Sahu ret = vfio_ioctl_device_feature(device, (void __user *)arg); 15088e5c6995SAbhishek Sahu break; 15098e5c6995SAbhishek Sahu 15100f3e72b5SJason Gunthorpe default: 15110f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 15128e5c6995SAbhishek Sahu ret = -EINVAL; 15138e5c6995SAbhishek Sahu else 15148e5c6995SAbhishek Sahu ret = device->ops->ioctl(device, cmd, arg); 15158e5c6995SAbhishek Sahu break; 15160f3e72b5SJason Gunthorpe } 15178e5c6995SAbhishek Sahu 15188e5c6995SAbhishek Sahu vfio_device_pm_runtime_put(device); 15198e5c6995SAbhishek Sahu return ret; 15200f3e72b5SJason Gunthorpe } 15210f3e72b5SJason Gunthorpe 15220f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 15230f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 15240f3e72b5SJason Gunthorpe { 15250f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15260f3e72b5SJason Gunthorpe 15270f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 15280f3e72b5SJason Gunthorpe return -EINVAL; 15290f3e72b5SJason Gunthorpe 15300f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 15310f3e72b5SJason Gunthorpe } 15320f3e72b5SJason Gunthorpe 15330f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 15340f3e72b5SJason Gunthorpe const char __user *buf, 15350f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 15360f3e72b5SJason Gunthorpe { 15370f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15380f3e72b5SJason Gunthorpe 15390f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 15400f3e72b5SJason Gunthorpe return -EINVAL; 15410f3e72b5SJason Gunthorpe 15420f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 15430f3e72b5SJason Gunthorpe } 15440f3e72b5SJason Gunthorpe 15450f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 15460f3e72b5SJason Gunthorpe { 15470f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15480f3e72b5SJason Gunthorpe 15490f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 15500f3e72b5SJason Gunthorpe return -EINVAL; 15510f3e72b5SJason Gunthorpe 15520f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 15530f3e72b5SJason Gunthorpe } 15540f3e72b5SJason Gunthorpe 15550f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = { 15560f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 15570f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 15580f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 15590f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 15600f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 15610f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 15620f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 15630f3e72b5SJason Gunthorpe }; 15640f3e72b5SJason Gunthorpe 15650f3e72b5SJason Gunthorpe /** 15660f3e72b5SJason Gunthorpe * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 15670f3e72b5SJason Gunthorpe * @file: VFIO group file 15680f3e72b5SJason Gunthorpe * 1569819da99aSJason Gunthorpe * The returned iommu_group is valid as long as a ref is held on the file. This 1570819da99aSJason Gunthorpe * returns a reference on the group. This function is deprecated, only the SPAPR 1571819da99aSJason Gunthorpe * path in kvm should call it. 15720f3e72b5SJason Gunthorpe */ 15730f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file) 15740f3e72b5SJason Gunthorpe { 15750f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 15763dd59a7dSJason Gunthorpe struct iommu_group *iommu_group = NULL; 15770f3e72b5SJason Gunthorpe 15784b22ef04SJason Gunthorpe if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) 15794b22ef04SJason Gunthorpe return NULL; 15804b22ef04SJason Gunthorpe 15814b22ef04SJason Gunthorpe if (!vfio_file_is_group(file)) 15820f3e72b5SJason Gunthorpe return NULL; 15833dd59a7dSJason Gunthorpe 15843dd59a7dSJason Gunthorpe mutex_lock(&group->group_lock); 15853dd59a7dSJason Gunthorpe if (group->iommu_group) { 15863dd59a7dSJason Gunthorpe iommu_group = group->iommu_group; 15873dd59a7dSJason Gunthorpe iommu_group_ref_get(iommu_group); 15883dd59a7dSJason Gunthorpe } 15893dd59a7dSJason Gunthorpe mutex_unlock(&group->group_lock); 15903dd59a7dSJason Gunthorpe return iommu_group; 15910f3e72b5SJason Gunthorpe } 15920f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 15930f3e72b5SJason Gunthorpe 15940f3e72b5SJason Gunthorpe /** 15954b22ef04SJason Gunthorpe * vfio_file_is_group - True if the file is usable with VFIO aPIS 15964b22ef04SJason Gunthorpe * @file: VFIO group file 15974b22ef04SJason Gunthorpe */ 15984b22ef04SJason Gunthorpe bool vfio_file_is_group(struct file *file) 15994b22ef04SJason Gunthorpe { 16004b22ef04SJason Gunthorpe return file->f_op == &vfio_group_fops; 16014b22ef04SJason Gunthorpe } 16024b22ef04SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_is_group); 16034b22ef04SJason Gunthorpe 16044b22ef04SJason Gunthorpe /** 16050f3e72b5SJason Gunthorpe * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 16060f3e72b5SJason Gunthorpe * is always CPU cache coherent 16070f3e72b5SJason Gunthorpe * @file: VFIO group file 16080f3e72b5SJason Gunthorpe * 16090f3e72b5SJason Gunthorpe * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 16100f3e72b5SJason Gunthorpe * bit in DMA transactions. A return of false indicates that the user has 16110f3e72b5SJason Gunthorpe * rights to access additional instructions such as wbinvd on x86. 16120f3e72b5SJason Gunthorpe */ 16130f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file) 16140f3e72b5SJason Gunthorpe { 16150f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 16160f3e72b5SJason Gunthorpe bool ret; 16170f3e72b5SJason Gunthorpe 1618*b1b8132aSAlex Williamson if (!vfio_file_is_group(file)) 16190f3e72b5SJason Gunthorpe return true; 16200f3e72b5SJason Gunthorpe 1621c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 16220f3e72b5SJason Gunthorpe if (group->container) { 16231408640dSJason Gunthorpe ret = vfio_container_ioctl_check_extension(group->container, 16240f3e72b5SJason Gunthorpe VFIO_DMA_CC_IOMMU); 16250f3e72b5SJason Gunthorpe } else { 16260f3e72b5SJason Gunthorpe /* 16270f3e72b5SJason Gunthorpe * Since the coherency state is determined only once a container 16280f3e72b5SJason Gunthorpe * is attached the user must do so before they can prove they 16290f3e72b5SJason Gunthorpe * have permission. 16300f3e72b5SJason Gunthorpe */ 16310f3e72b5SJason Gunthorpe ret = true; 16320f3e72b5SJason Gunthorpe } 1633c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 16340f3e72b5SJason Gunthorpe return ret; 16350f3e72b5SJason Gunthorpe } 16360f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 16370f3e72b5SJason Gunthorpe 16380f3e72b5SJason Gunthorpe /** 16390f3e72b5SJason Gunthorpe * vfio_file_set_kvm - Link a kvm with VFIO drivers 16400f3e72b5SJason Gunthorpe * @file: VFIO group file 16410f3e72b5SJason Gunthorpe * @kvm: KVM to link 16420f3e72b5SJason Gunthorpe * 16430f3e72b5SJason Gunthorpe * When a VFIO device is first opened the KVM will be available in 16440f3e72b5SJason Gunthorpe * device->kvm if one was associated with the group. 16450f3e72b5SJason Gunthorpe */ 16460f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 16470f3e72b5SJason Gunthorpe { 16480f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 16490f3e72b5SJason Gunthorpe 1650*b1b8132aSAlex Williamson if (!vfio_file_is_group(file)) 16510f3e72b5SJason Gunthorpe return; 16520f3e72b5SJason Gunthorpe 1653c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 16540f3e72b5SJason Gunthorpe group->kvm = kvm; 1655c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 16560f3e72b5SJason Gunthorpe } 16570f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 16580f3e72b5SJason Gunthorpe 16590f3e72b5SJason Gunthorpe /** 16600f3e72b5SJason Gunthorpe * vfio_file_has_dev - True if the VFIO file is a handle for device 16610f3e72b5SJason Gunthorpe * @file: VFIO file to check 16620f3e72b5SJason Gunthorpe * @device: Device that must be part of the file 16630f3e72b5SJason Gunthorpe * 16640f3e72b5SJason Gunthorpe * Returns true if given file has permission to manipulate the given device. 16650f3e72b5SJason Gunthorpe */ 16660f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 16670f3e72b5SJason Gunthorpe { 16680f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 16690f3e72b5SJason Gunthorpe 1670*b1b8132aSAlex Williamson if (!vfio_file_is_group(file)) 16710f3e72b5SJason Gunthorpe return false; 16720f3e72b5SJason Gunthorpe 16730f3e72b5SJason Gunthorpe return group == device->group; 16740f3e72b5SJason Gunthorpe } 16750f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev); 16760f3e72b5SJason Gunthorpe 16770f3e72b5SJason Gunthorpe /* 16780f3e72b5SJason Gunthorpe * Sub-module support 16790f3e72b5SJason Gunthorpe */ 16800f3e72b5SJason Gunthorpe /* 16810f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 16820f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 16830f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 16840f3e72b5SJason Gunthorpe * 16850f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 16860f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 16870f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 16880f3e72b5SJason Gunthorpe */ 16890f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 16900f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 16910f3e72b5SJason Gunthorpe { 16920f3e72b5SJason Gunthorpe void *buf; 16930f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 16940f3e72b5SJason Gunthorpe 16950f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 16960f3e72b5SJason Gunthorpe if (!buf) { 16970f3e72b5SJason Gunthorpe kfree(caps->buf); 16980f3e72b5SJason Gunthorpe caps->buf = NULL; 16990f3e72b5SJason Gunthorpe caps->size = 0; 17000f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 17010f3e72b5SJason Gunthorpe } 17020f3e72b5SJason Gunthorpe 17030f3e72b5SJason Gunthorpe caps->buf = buf; 17040f3e72b5SJason Gunthorpe header = buf + caps->size; 17050f3e72b5SJason Gunthorpe 17060f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 17070f3e72b5SJason Gunthorpe memset(header, 0, size); 17080f3e72b5SJason Gunthorpe 17090f3e72b5SJason Gunthorpe header->id = id; 17100f3e72b5SJason Gunthorpe header->version = version; 17110f3e72b5SJason Gunthorpe 17120f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 17130f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 17140f3e72b5SJason Gunthorpe ; /* nothing */ 17150f3e72b5SJason Gunthorpe 17160f3e72b5SJason Gunthorpe tmp->next = caps->size; 17170f3e72b5SJason Gunthorpe caps->size += size; 17180f3e72b5SJason Gunthorpe 17190f3e72b5SJason Gunthorpe return header; 17200f3e72b5SJason Gunthorpe } 17210f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 17220f3e72b5SJason Gunthorpe 17230f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 17240f3e72b5SJason Gunthorpe { 17250f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 17260f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 17270f3e72b5SJason Gunthorpe 17280f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 17290f3e72b5SJason Gunthorpe tmp->next += offset; 17300f3e72b5SJason Gunthorpe } 17310f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 17320f3e72b5SJason Gunthorpe 17330f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 17340f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 17350f3e72b5SJason Gunthorpe { 17360f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 17370f3e72b5SJason Gunthorpe 17380f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 17390f3e72b5SJason Gunthorpe if (IS_ERR(header)) 17400f3e72b5SJason Gunthorpe return PTR_ERR(header); 17410f3e72b5SJason Gunthorpe 17420f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 17430f3e72b5SJason Gunthorpe 17440f3e72b5SJason Gunthorpe return 0; 17450f3e72b5SJason Gunthorpe } 17460f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 17470f3e72b5SJason Gunthorpe 17480f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 17490f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 17500f3e72b5SJason Gunthorpe { 17510f3e72b5SJason Gunthorpe unsigned long minsz; 17520f3e72b5SJason Gunthorpe size_t size; 17530f3e72b5SJason Gunthorpe 17540f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 17550f3e72b5SJason Gunthorpe 17560f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 17570f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 17580f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 17590f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 17600f3e72b5SJason Gunthorpe return -EINVAL; 17610f3e72b5SJason Gunthorpe 17620f3e72b5SJason Gunthorpe if (data_size) 17630f3e72b5SJason Gunthorpe *data_size = 0; 17640f3e72b5SJason Gunthorpe 17650f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 17660f3e72b5SJason Gunthorpe return -EINVAL; 17670f3e72b5SJason Gunthorpe 17680f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 17690f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 17700f3e72b5SJason Gunthorpe size = 0; 17710f3e72b5SJason Gunthorpe break; 17720f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 17730f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 17740f3e72b5SJason Gunthorpe break; 17750f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 17760f3e72b5SJason Gunthorpe size = sizeof(int32_t); 17770f3e72b5SJason Gunthorpe break; 17780f3e72b5SJason Gunthorpe default: 17790f3e72b5SJason Gunthorpe return -EINVAL; 17800f3e72b5SJason Gunthorpe } 17810f3e72b5SJason Gunthorpe 17820f3e72b5SJason Gunthorpe if (size) { 17830f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 17840f3e72b5SJason Gunthorpe return -EINVAL; 17850f3e72b5SJason Gunthorpe 17860f3e72b5SJason Gunthorpe if (!data_size) 17870f3e72b5SJason Gunthorpe return -EINVAL; 17880f3e72b5SJason Gunthorpe 17890f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 17900f3e72b5SJason Gunthorpe } 17910f3e72b5SJason Gunthorpe 17920f3e72b5SJason Gunthorpe return 0; 17930f3e72b5SJason Gunthorpe } 17940f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 17950f3e72b5SJason Gunthorpe 17960f3e72b5SJason Gunthorpe /* 17970f3e72b5SJason Gunthorpe * Module/class support 17980f3e72b5SJason Gunthorpe */ 17990f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode) 18000f3e72b5SJason Gunthorpe { 18010f3e72b5SJason Gunthorpe return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 18020f3e72b5SJason Gunthorpe } 18030f3e72b5SJason Gunthorpe 1804c41da462SJason Gunthorpe static int __init vfio_init(void) 1805c41da462SJason Gunthorpe { 1806c41da462SJason Gunthorpe int ret; 1807c41da462SJason Gunthorpe 1808c41da462SJason Gunthorpe ida_init(&vfio.group_ida); 1809c41da462SJason Gunthorpe ida_init(&vfio.device_ida); 1810c41da462SJason Gunthorpe mutex_init(&vfio.group_lock); 1811c41da462SJason Gunthorpe INIT_LIST_HEAD(&vfio.group_list); 1812c41da462SJason Gunthorpe 1813c41da462SJason Gunthorpe ret = vfio_container_init(); 1814c41da462SJason Gunthorpe if (ret) 1815c41da462SJason Gunthorpe return ret; 1816c41da462SJason Gunthorpe 18170f3e72b5SJason Gunthorpe /* /dev/vfio/$GROUP */ 18180f3e72b5SJason Gunthorpe vfio.class = class_create(THIS_MODULE, "vfio"); 18190f3e72b5SJason Gunthorpe if (IS_ERR(vfio.class)) { 18200f3e72b5SJason Gunthorpe ret = PTR_ERR(vfio.class); 18213c28a761SYi Liu goto err_group_class; 18220f3e72b5SJason Gunthorpe } 18230f3e72b5SJason Gunthorpe 18240f3e72b5SJason Gunthorpe vfio.class->devnode = vfio_devnode; 18250f3e72b5SJason Gunthorpe 18263c28a761SYi Liu /* /sys/class/vfio-dev/vfioX */ 18273c28a761SYi Liu vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); 18283c28a761SYi Liu if (IS_ERR(vfio.device_class)) { 18293c28a761SYi Liu ret = PTR_ERR(vfio.device_class); 18303c28a761SYi Liu goto err_dev_class; 18313c28a761SYi Liu } 18323c28a761SYi Liu 18330f3e72b5SJason Gunthorpe ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 18340f3e72b5SJason Gunthorpe if (ret) 18350f3e72b5SJason Gunthorpe goto err_alloc_chrdev; 18360f3e72b5SJason Gunthorpe 18370f3e72b5SJason Gunthorpe pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 18380f3e72b5SJason Gunthorpe return 0; 18390f3e72b5SJason Gunthorpe 18400f3e72b5SJason Gunthorpe err_alloc_chrdev: 18413c28a761SYi Liu class_destroy(vfio.device_class); 18423c28a761SYi Liu vfio.device_class = NULL; 18433c28a761SYi Liu err_dev_class: 18440f3e72b5SJason Gunthorpe class_destroy(vfio.class); 18450f3e72b5SJason Gunthorpe vfio.class = NULL; 18463c28a761SYi Liu err_group_class: 1847c41da462SJason Gunthorpe vfio_container_cleanup(); 18480f3e72b5SJason Gunthorpe return ret; 18490f3e72b5SJason Gunthorpe } 18500f3e72b5SJason Gunthorpe 18510f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void) 18520f3e72b5SJason Gunthorpe { 18530f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&vfio.group_list)); 18540f3e72b5SJason Gunthorpe 18553c28a761SYi Liu ida_destroy(&vfio.device_ida); 18560f3e72b5SJason Gunthorpe ida_destroy(&vfio.group_ida); 18570f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 18583c28a761SYi Liu class_destroy(vfio.device_class); 18593c28a761SYi Liu vfio.device_class = NULL; 18600f3e72b5SJason Gunthorpe class_destroy(vfio.class); 1861c41da462SJason Gunthorpe vfio_container_cleanup(); 18620f3e72b5SJason Gunthorpe vfio.class = NULL; 18630f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 18640f3e72b5SJason Gunthorpe } 18650f3e72b5SJason Gunthorpe 18660f3e72b5SJason Gunthorpe module_init(vfio_init); 18670f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 18680f3e72b5SJason Gunthorpe 18690f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 18700f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 18710f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 18720f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 18730f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR); 18740f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio"); 18750f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 1876