10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 20f3e72b5SJason Gunthorpe /* 30f3e72b5SJason Gunthorpe * VFIO core 40f3e72b5SJason Gunthorpe * 50f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 60f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 70f3e72b5SJason Gunthorpe * 80f3e72b5SJason Gunthorpe * Derived from original vfio: 90f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 100f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 110f3e72b5SJason Gunthorpe */ 120f3e72b5SJason Gunthorpe 130f3e72b5SJason Gunthorpe #include <linux/cdev.h> 140f3e72b5SJason Gunthorpe #include <linux/compat.h> 150f3e72b5SJason Gunthorpe #include <linux/device.h> 160f3e72b5SJason Gunthorpe #include <linux/file.h> 170f3e72b5SJason Gunthorpe #include <linux/anon_inodes.h> 180f3e72b5SJason Gunthorpe #include <linux/fs.h> 190f3e72b5SJason Gunthorpe #include <linux/idr.h> 200f3e72b5SJason Gunthorpe #include <linux/iommu.h> 210f3e72b5SJason Gunthorpe #include <linux/list.h> 220f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 230f3e72b5SJason Gunthorpe #include <linux/module.h> 240f3e72b5SJason Gunthorpe #include <linux/mutex.h> 250f3e72b5SJason Gunthorpe #include <linux/pci.h> 260f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 270f3e72b5SJason Gunthorpe #include <linux/sched.h> 280f3e72b5SJason Gunthorpe #include <linux/slab.h> 290f3e72b5SJason Gunthorpe #include <linux/stat.h> 300f3e72b5SJason Gunthorpe #include <linux/string.h> 310f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 320f3e72b5SJason Gunthorpe #include <linux/vfio.h> 330f3e72b5SJason Gunthorpe #include <linux/wait.h> 340f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 358e5c6995SAbhishek Sahu #include <linux/pm_runtime.h> 3680c4b92aSYishai Hadas #include <linux/interval_tree.h> 3780c4b92aSYishai Hadas #include <linux/iova_bitmap.h> 380f3e72b5SJason Gunthorpe #include "vfio.h" 390f3e72b5SJason Gunthorpe 400f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 410f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 420f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 430f3e72b5SJason Gunthorpe 440f3e72b5SJason Gunthorpe static struct vfio { 450f3e72b5SJason Gunthorpe struct class *class; 460f3e72b5SJason Gunthorpe struct list_head group_list; 470f3e72b5SJason Gunthorpe struct mutex group_lock; /* locks group_list */ 480f3e72b5SJason Gunthorpe struct ida group_ida; 490f3e72b5SJason Gunthorpe dev_t group_devt; 503c28a761SYi Liu struct class *device_class; 513c28a761SYi Liu struct ida device_ida; 520f3e72b5SJason Gunthorpe } vfio; 530f3e72b5SJason Gunthorpe 540f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 550f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops; 560f3e72b5SJason Gunthorpe 570f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 580f3e72b5SJason Gunthorpe { 590f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 600f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 610f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 620f3e72b5SJason Gunthorpe 630f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 640f3e72b5SJason Gunthorpe return -EINVAL; 650f3e72b5SJason Gunthorpe 660f3e72b5SJason Gunthorpe /* 670f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 680f3e72b5SJason Gunthorpe */ 690f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 700f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 710f3e72b5SJason Gunthorpe if (dev_set) 720f3e72b5SJason Gunthorpe goto found_get_ref; 730f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 740f3e72b5SJason Gunthorpe 750f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 760f3e72b5SJason Gunthorpe if (!new_dev_set) 770f3e72b5SJason Gunthorpe return -ENOMEM; 780f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 790f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 800f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 810f3e72b5SJason Gunthorpe 820f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 830f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 840f3e72b5SJason Gunthorpe GFP_KERNEL); 850f3e72b5SJason Gunthorpe if (!dev_set) { 860f3e72b5SJason Gunthorpe dev_set = new_dev_set; 870f3e72b5SJason Gunthorpe goto found_get_ref; 880f3e72b5SJason Gunthorpe } 890f3e72b5SJason Gunthorpe 900f3e72b5SJason Gunthorpe kfree(new_dev_set); 910f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 920f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 930f3e72b5SJason Gunthorpe return xa_err(dev_set); 940f3e72b5SJason Gunthorpe } 950f3e72b5SJason Gunthorpe 960f3e72b5SJason Gunthorpe found_get_ref: 970f3e72b5SJason Gunthorpe dev_set->device_count++; 980f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 990f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1000f3e72b5SJason Gunthorpe device->dev_set = dev_set; 1010f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 1020f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1030f3e72b5SJason Gunthorpe return 0; 1040f3e72b5SJason Gunthorpe } 1050f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 1060f3e72b5SJason Gunthorpe 1070f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 1080f3e72b5SJason Gunthorpe { 1090f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 1100f3e72b5SJason Gunthorpe 1110f3e72b5SJason Gunthorpe if (!dev_set) 1120f3e72b5SJason Gunthorpe return; 1130f3e72b5SJason Gunthorpe 1140f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1150f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 1160f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1170f3e72b5SJason Gunthorpe 1180f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1190f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 1200f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 1210f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 1220f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 1230f3e72b5SJason Gunthorpe kfree(dev_set); 1240f3e72b5SJason Gunthorpe } 1250f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1260f3e72b5SJason Gunthorpe } 1270f3e72b5SJason Gunthorpe 1280f3e72b5SJason Gunthorpe /* 1290f3e72b5SJason Gunthorpe * Group objects - create, release, get, put, search 1300f3e72b5SJason Gunthorpe */ 1310f3e72b5SJason Gunthorpe static struct vfio_group * 1320f3e72b5SJason Gunthorpe __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 1330f3e72b5SJason Gunthorpe { 1340f3e72b5SJason Gunthorpe struct vfio_group *group; 1350f3e72b5SJason Gunthorpe 1360f3e72b5SJason Gunthorpe list_for_each_entry(group, &vfio.group_list, vfio_next) { 1370f3e72b5SJason Gunthorpe if (group->iommu_group == iommu_group) { 138ca5f21b2SJason Gunthorpe refcount_inc(&group->drivers); 1390f3e72b5SJason Gunthorpe return group; 1400f3e72b5SJason Gunthorpe } 1410f3e72b5SJason Gunthorpe } 1420f3e72b5SJason Gunthorpe return NULL; 1430f3e72b5SJason Gunthorpe } 1440f3e72b5SJason Gunthorpe 1450f3e72b5SJason Gunthorpe static struct vfio_group * 1460f3e72b5SJason Gunthorpe vfio_group_get_from_iommu(struct iommu_group *iommu_group) 1470f3e72b5SJason Gunthorpe { 1480f3e72b5SJason Gunthorpe struct vfio_group *group; 1490f3e72b5SJason Gunthorpe 1500f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 1510f3e72b5SJason Gunthorpe group = __vfio_group_get_from_iommu(iommu_group); 1520f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 1530f3e72b5SJason Gunthorpe return group; 1540f3e72b5SJason Gunthorpe } 1550f3e72b5SJason Gunthorpe 1560f3e72b5SJason Gunthorpe static void vfio_group_release(struct device *dev) 1570f3e72b5SJason Gunthorpe { 1580f3e72b5SJason Gunthorpe struct vfio_group *group = container_of(dev, struct vfio_group, dev); 1590f3e72b5SJason Gunthorpe 1600f3e72b5SJason Gunthorpe mutex_destroy(&group->device_lock); 161c82e81abSJason Gunthorpe mutex_destroy(&group->group_lock); 1620f3e72b5SJason Gunthorpe iommu_group_put(group->iommu_group); 1630f3e72b5SJason Gunthorpe ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 1640f3e72b5SJason Gunthorpe kfree(group); 1650f3e72b5SJason Gunthorpe } 1660f3e72b5SJason Gunthorpe 1670f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 1680f3e72b5SJason Gunthorpe enum vfio_group_type type) 1690f3e72b5SJason Gunthorpe { 1700f3e72b5SJason Gunthorpe struct vfio_group *group; 1710f3e72b5SJason Gunthorpe int minor; 1720f3e72b5SJason Gunthorpe 1730f3e72b5SJason Gunthorpe group = kzalloc(sizeof(*group), GFP_KERNEL); 1740f3e72b5SJason Gunthorpe if (!group) 1750f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 1760f3e72b5SJason Gunthorpe 1770f3e72b5SJason Gunthorpe minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 1780f3e72b5SJason Gunthorpe if (minor < 0) { 1790f3e72b5SJason Gunthorpe kfree(group); 1800f3e72b5SJason Gunthorpe return ERR_PTR(minor); 1810f3e72b5SJason Gunthorpe } 1820f3e72b5SJason Gunthorpe 1830f3e72b5SJason Gunthorpe device_initialize(&group->dev); 1840f3e72b5SJason Gunthorpe group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 1850f3e72b5SJason Gunthorpe group->dev.class = vfio.class; 1860f3e72b5SJason Gunthorpe group->dev.release = vfio_group_release; 1870f3e72b5SJason Gunthorpe cdev_init(&group->cdev, &vfio_group_fops); 1880f3e72b5SJason Gunthorpe group->cdev.owner = THIS_MODULE; 1890f3e72b5SJason Gunthorpe 190ca5f21b2SJason Gunthorpe refcount_set(&group->drivers, 1); 191c82e81abSJason Gunthorpe mutex_init(&group->group_lock); 192912b74d2SJason Gunthorpe init_swait_queue_head(&group->opened_file_wait); 1930f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&group->device_list); 1940f3e72b5SJason Gunthorpe mutex_init(&group->device_lock); 1950f3e72b5SJason Gunthorpe group->iommu_group = iommu_group; 1960f3e72b5SJason Gunthorpe /* put in vfio_group_release() */ 1970f3e72b5SJason Gunthorpe iommu_group_ref_get(iommu_group); 1980f3e72b5SJason Gunthorpe group->type = type; 1990f3e72b5SJason Gunthorpe BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 2000f3e72b5SJason Gunthorpe 2010f3e72b5SJason Gunthorpe return group; 2020f3e72b5SJason Gunthorpe } 2030f3e72b5SJason Gunthorpe 2040f3e72b5SJason Gunthorpe static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 2050f3e72b5SJason Gunthorpe enum vfio_group_type type) 2060f3e72b5SJason Gunthorpe { 2070f3e72b5SJason Gunthorpe struct vfio_group *group; 2080f3e72b5SJason Gunthorpe struct vfio_group *ret; 2090f3e72b5SJason Gunthorpe int err; 2100f3e72b5SJason Gunthorpe 2110f3e72b5SJason Gunthorpe group = vfio_group_alloc(iommu_group, type); 2120f3e72b5SJason Gunthorpe if (IS_ERR(group)) 2130f3e72b5SJason Gunthorpe return group; 2140f3e72b5SJason Gunthorpe 2150f3e72b5SJason Gunthorpe err = dev_set_name(&group->dev, "%s%d", 2160f3e72b5SJason Gunthorpe group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 2170f3e72b5SJason Gunthorpe iommu_group_id(iommu_group)); 2180f3e72b5SJason Gunthorpe if (err) { 2190f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 2200f3e72b5SJason Gunthorpe goto err_put; 2210f3e72b5SJason Gunthorpe } 2220f3e72b5SJason Gunthorpe 2230f3e72b5SJason Gunthorpe mutex_lock(&vfio.group_lock); 2240f3e72b5SJason Gunthorpe 2250f3e72b5SJason Gunthorpe /* Did we race creating this group? */ 2260f3e72b5SJason Gunthorpe ret = __vfio_group_get_from_iommu(iommu_group); 2270f3e72b5SJason Gunthorpe if (ret) 2280f3e72b5SJason Gunthorpe goto err_unlock; 2290f3e72b5SJason Gunthorpe 2300f3e72b5SJason Gunthorpe err = cdev_device_add(&group->cdev, &group->dev); 2310f3e72b5SJason Gunthorpe if (err) { 2320f3e72b5SJason Gunthorpe ret = ERR_PTR(err); 2330f3e72b5SJason Gunthorpe goto err_unlock; 2340f3e72b5SJason Gunthorpe } 2350f3e72b5SJason Gunthorpe 2360f3e72b5SJason Gunthorpe list_add(&group->vfio_next, &vfio.group_list); 2370f3e72b5SJason Gunthorpe 2380f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 2390f3e72b5SJason Gunthorpe return group; 2400f3e72b5SJason Gunthorpe 2410f3e72b5SJason Gunthorpe err_unlock: 2420f3e72b5SJason Gunthorpe mutex_unlock(&vfio.group_lock); 2430f3e72b5SJason Gunthorpe err_put: 2440f3e72b5SJason Gunthorpe put_device(&group->dev); 2450f3e72b5SJason Gunthorpe return ret; 2460f3e72b5SJason Gunthorpe } 2470f3e72b5SJason Gunthorpe 248ca5f21b2SJason Gunthorpe static void vfio_device_remove_group(struct vfio_device *device) 249ca5f21b2SJason Gunthorpe { 250ca5f21b2SJason Gunthorpe struct vfio_group *group = device->group; 251ca5f21b2SJason Gunthorpe 252ca5f21b2SJason Gunthorpe if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 253ca5f21b2SJason Gunthorpe iommu_group_remove_device(device->dev); 254ca5f21b2SJason Gunthorpe 255ca5f21b2SJason Gunthorpe /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ 256ca5f21b2SJason Gunthorpe if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) 2570f3e72b5SJason Gunthorpe return; 258ca5f21b2SJason Gunthorpe list_del(&group->vfio_next); 259ca5f21b2SJason Gunthorpe 260ca5f21b2SJason Gunthorpe /* 261ca5f21b2SJason Gunthorpe * We could concurrently probe another driver in the group that might 262ca5f21b2SJason Gunthorpe * race vfio_device_remove_group() with vfio_get_group(), so we have to 263ca5f21b2SJason Gunthorpe * ensure that the sysfs is all cleaned up under lock otherwise the 264ca5f21b2SJason Gunthorpe * cdev_device_add() will fail due to the name aready existing. 265ca5f21b2SJason Gunthorpe */ 266ca5f21b2SJason Gunthorpe cdev_device_del(&group->cdev, &group->dev); 267ca5f21b2SJason Gunthorpe 268ca5f21b2SJason Gunthorpe /* 269ca5f21b2SJason Gunthorpe * Before we allow the last driver in the group to be unplugged the 270ca5f21b2SJason Gunthorpe * group must be sanitized so nothing else is or can reference it. This 271ca5f21b2SJason Gunthorpe * is because the group->iommu_group pointer should only be used so long 272ca5f21b2SJason Gunthorpe * as a device driver is attached to a device in the group. 273ca5f21b2SJason Gunthorpe */ 274912b74d2SJason Gunthorpe while (group->opened_file) { 275912b74d2SJason Gunthorpe mutex_unlock(&vfio.group_lock); 276912b74d2SJason Gunthorpe swait_event_idle_exclusive(group->opened_file_wait, 277912b74d2SJason Gunthorpe !group->opened_file); 278912b74d2SJason Gunthorpe mutex_lock(&vfio.group_lock); 279912b74d2SJason Gunthorpe } 280912b74d2SJason Gunthorpe mutex_unlock(&vfio.group_lock); 2810f3e72b5SJason Gunthorpe 2820f3e72b5SJason Gunthorpe /* 2830f3e72b5SJason Gunthorpe * These data structures all have paired operations that can only be 2840f3e72b5SJason Gunthorpe * undone when the caller holds a live reference on the group. Since all 2850f3e72b5SJason Gunthorpe * pairs must be undone these WARN_ON's indicate some caller did not 2860f3e72b5SJason Gunthorpe * properly hold the group reference. 2870f3e72b5SJason Gunthorpe */ 2880f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&group->device_list)); 2890f3e72b5SJason Gunthorpe WARN_ON(group->container || group->container_users); 2900f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 291ca5f21b2SJason Gunthorpe group->iommu_group = NULL; 2920f3e72b5SJason Gunthorpe 2930f3e72b5SJason Gunthorpe put_device(&group->dev); 2940f3e72b5SJason Gunthorpe } 2950f3e72b5SJason Gunthorpe 2960f3e72b5SJason Gunthorpe /* 2970f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 2980f3e72b5SJason Gunthorpe */ 2990f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 3004a725b8dSKevin Tian static void vfio_device_put_registration(struct vfio_device *device) 3010f3e72b5SJason Gunthorpe { 3020f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 3030f3e72b5SJason Gunthorpe complete(&device->comp); 3040f3e72b5SJason Gunthorpe } 3050f3e72b5SJason Gunthorpe 3064a725b8dSKevin Tian static bool vfio_device_try_get_registration(struct vfio_device *device) 3070f3e72b5SJason Gunthorpe { 3080f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 3090f3e72b5SJason Gunthorpe } 3100f3e72b5SJason Gunthorpe 3110f3e72b5SJason Gunthorpe static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 3120f3e72b5SJason Gunthorpe struct device *dev) 3130f3e72b5SJason Gunthorpe { 3140f3e72b5SJason Gunthorpe struct vfio_device *device; 3150f3e72b5SJason Gunthorpe 3160f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 3170f3e72b5SJason Gunthorpe list_for_each_entry(device, &group->device_list, group_next) { 3184a725b8dSKevin Tian if (device->dev == dev && 3194a725b8dSKevin Tian vfio_device_try_get_registration(device)) { 3200f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 3210f3e72b5SJason Gunthorpe return device; 3220f3e72b5SJason Gunthorpe } 3230f3e72b5SJason Gunthorpe } 3240f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 3250f3e72b5SJason Gunthorpe return NULL; 3260f3e72b5SJason Gunthorpe } 3270f3e72b5SJason Gunthorpe 3280f3e72b5SJason Gunthorpe /* 3290f3e72b5SJason Gunthorpe * VFIO driver API 3300f3e72b5SJason Gunthorpe */ 331cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */ 3323c28a761SYi Liu static void vfio_device_release(struct device *dev) 333cb9ff3f3SKevin Tian { 334cb9ff3f3SKevin Tian struct vfio_device *device = 3353c28a761SYi Liu container_of(dev, struct vfio_device, device); 336cb9ff3f3SKevin Tian 337ebb72b76SKevin Tian vfio_release_device_set(device); 3383c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 339cb9ff3f3SKevin Tian 340cb9ff3f3SKevin Tian /* 341cb9ff3f3SKevin Tian * kvfree() cannot be done here due to a life cycle mess in 342cb9ff3f3SKevin Tian * vfio-ccw. Before the ccw part is fixed all drivers are 343cb9ff3f3SKevin Tian * required to support @release and call vfio_free_device() 344cb9ff3f3SKevin Tian * from there. 345cb9ff3f3SKevin Tian */ 346cb9ff3f3SKevin Tian device->ops->release(device); 347cb9ff3f3SKevin Tian } 348cb9ff3f3SKevin Tian 349cb9ff3f3SKevin Tian /* 350cb9ff3f3SKevin Tian * Allocate and initialize vfio_device so it can be registered to vfio 351cb9ff3f3SKevin Tian * core. 352cb9ff3f3SKevin Tian * 353cb9ff3f3SKevin Tian * Drivers should use the wrapper vfio_alloc_device() for allocation. 354cb9ff3f3SKevin Tian * @size is the size of the structure to be allocated, including any 355cb9ff3f3SKevin Tian * private data used by the driver. 356cb9ff3f3SKevin Tian * 357cb9ff3f3SKevin Tian * Driver may provide an @init callback to cover device private data. 358cb9ff3f3SKevin Tian * 359cb9ff3f3SKevin Tian * Use vfio_put_device() to release the structure after success return. 360cb9ff3f3SKevin Tian */ 361cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, 362cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 363cb9ff3f3SKevin Tian { 364cb9ff3f3SKevin Tian struct vfio_device *device; 365cb9ff3f3SKevin Tian int ret; 366cb9ff3f3SKevin Tian 367cb9ff3f3SKevin Tian if (WARN_ON(size < sizeof(struct vfio_device))) 368cb9ff3f3SKevin Tian return ERR_PTR(-EINVAL); 369cb9ff3f3SKevin Tian 370cb9ff3f3SKevin Tian device = kvzalloc(size, GFP_KERNEL); 371cb9ff3f3SKevin Tian if (!device) 372cb9ff3f3SKevin Tian return ERR_PTR(-ENOMEM); 373cb9ff3f3SKevin Tian 374cb9ff3f3SKevin Tian ret = vfio_init_device(device, dev, ops); 375cb9ff3f3SKevin Tian if (ret) 376cb9ff3f3SKevin Tian goto out_free; 377cb9ff3f3SKevin Tian return device; 378cb9ff3f3SKevin Tian 379cb9ff3f3SKevin Tian out_free: 380cb9ff3f3SKevin Tian kvfree(device); 381cb9ff3f3SKevin Tian return ERR_PTR(ret); 382cb9ff3f3SKevin Tian } 383cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device); 384cb9ff3f3SKevin Tian 385cb9ff3f3SKevin Tian /* 386cb9ff3f3SKevin Tian * Initialize a vfio_device so it can be registered to vfio core. 387cb9ff3f3SKevin Tian * 388cb9ff3f3SKevin Tian * Only vfio-ccw driver should call this interface. 389cb9ff3f3SKevin Tian */ 390cb9ff3f3SKevin Tian int vfio_init_device(struct vfio_device *device, struct device *dev, 391cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 392cb9ff3f3SKevin Tian { 393cb9ff3f3SKevin Tian int ret; 394cb9ff3f3SKevin Tian 3953c28a761SYi Liu ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); 3963c28a761SYi Liu if (ret < 0) { 3973c28a761SYi Liu dev_dbg(dev, "Error to alloc index\n"); 3983c28a761SYi Liu return ret; 3993c28a761SYi Liu } 4003c28a761SYi Liu 4013c28a761SYi Liu device->index = ret; 402ebb72b76SKevin Tian init_completion(&device->comp); 403ebb72b76SKevin Tian device->dev = dev; 404ebb72b76SKevin Tian device->ops = ops; 405cb9ff3f3SKevin Tian 406cb9ff3f3SKevin Tian if (ops->init) { 407cb9ff3f3SKevin Tian ret = ops->init(device); 408cb9ff3f3SKevin Tian if (ret) 409cb9ff3f3SKevin Tian goto out_uninit; 410cb9ff3f3SKevin Tian } 411cb9ff3f3SKevin Tian 4123c28a761SYi Liu device_initialize(&device->device); 4133c28a761SYi Liu device->device.release = vfio_device_release; 4143c28a761SYi Liu device->device.class = vfio.device_class; 4153c28a761SYi Liu device->device.parent = device->dev; 416cb9ff3f3SKevin Tian return 0; 417cb9ff3f3SKevin Tian 418cb9ff3f3SKevin Tian out_uninit: 419ebb72b76SKevin Tian vfio_release_device_set(device); 4203c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 421cb9ff3f3SKevin Tian return ret; 422cb9ff3f3SKevin Tian } 423cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_init_device); 424cb9ff3f3SKevin Tian 425cb9ff3f3SKevin Tian /* 426cb9ff3f3SKevin Tian * The helper called by driver @release callback to free the device 427cb9ff3f3SKevin Tian * structure. Drivers which don't have private data to clean can 428cb9ff3f3SKevin Tian * simply use this helper as its @release. 429cb9ff3f3SKevin Tian */ 430cb9ff3f3SKevin Tian void vfio_free_device(struct vfio_device *device) 431cb9ff3f3SKevin Tian { 432cb9ff3f3SKevin Tian kvfree(device); 433cb9ff3f3SKevin Tian } 434cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(vfio_free_device); 435cb9ff3f3SKevin Tian 4360f3e72b5SJason Gunthorpe static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 4370f3e72b5SJason Gunthorpe enum vfio_group_type type) 4380f3e72b5SJason Gunthorpe { 4390f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 4400f3e72b5SJason Gunthorpe struct vfio_group *group; 4410f3e72b5SJason Gunthorpe int ret; 4420f3e72b5SJason Gunthorpe 4430f3e72b5SJason Gunthorpe iommu_group = iommu_group_alloc(); 4440f3e72b5SJason Gunthorpe if (IS_ERR(iommu_group)) 4450f3e72b5SJason Gunthorpe return ERR_CAST(iommu_group); 4460f3e72b5SJason Gunthorpe 4470f3e72b5SJason Gunthorpe ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 4480f3e72b5SJason Gunthorpe if (ret) 4490f3e72b5SJason Gunthorpe goto out_put_group; 4500f3e72b5SJason Gunthorpe ret = iommu_group_add_device(iommu_group, dev); 4510f3e72b5SJason Gunthorpe if (ret) 4520f3e72b5SJason Gunthorpe goto out_put_group; 4530f3e72b5SJason Gunthorpe 4540f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, type); 4550f3e72b5SJason Gunthorpe if (IS_ERR(group)) { 4560f3e72b5SJason Gunthorpe ret = PTR_ERR(group); 4570f3e72b5SJason Gunthorpe goto out_remove_device; 4580f3e72b5SJason Gunthorpe } 4590f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 4600f3e72b5SJason Gunthorpe return group; 4610f3e72b5SJason Gunthorpe 4620f3e72b5SJason Gunthorpe out_remove_device: 4630f3e72b5SJason Gunthorpe iommu_group_remove_device(dev); 4640f3e72b5SJason Gunthorpe out_put_group: 4650f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 4660f3e72b5SJason Gunthorpe return ERR_PTR(ret); 4670f3e72b5SJason Gunthorpe } 4680f3e72b5SJason Gunthorpe 4690f3e72b5SJason Gunthorpe static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 4700f3e72b5SJason Gunthorpe { 4710f3e72b5SJason Gunthorpe struct iommu_group *iommu_group; 4720f3e72b5SJason Gunthorpe struct vfio_group *group; 4730f3e72b5SJason Gunthorpe 4740f3e72b5SJason Gunthorpe iommu_group = iommu_group_get(dev); 475444d43ecSJason Gunthorpe if (!iommu_group && vfio_noiommu) { 4760f3e72b5SJason Gunthorpe /* 4770f3e72b5SJason Gunthorpe * With noiommu enabled, create an IOMMU group for devices that 4780f3e72b5SJason Gunthorpe * don't already have one, implying no IOMMU hardware/driver 4790f3e72b5SJason Gunthorpe * exists. Taint the kernel because we're about to give a DMA 4800f3e72b5SJason Gunthorpe * capable device to a user without IOMMU protection. 4810f3e72b5SJason Gunthorpe */ 4820f3e72b5SJason Gunthorpe group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 4830f3e72b5SJason Gunthorpe if (!IS_ERR(group)) { 4840f3e72b5SJason Gunthorpe add_taint(TAINT_USER, LOCKDEP_STILL_OK); 4850f3e72b5SJason Gunthorpe dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 4860f3e72b5SJason Gunthorpe } 4870f3e72b5SJason Gunthorpe return group; 4880f3e72b5SJason Gunthorpe } 489444d43ecSJason Gunthorpe 4900f3e72b5SJason Gunthorpe if (!iommu_group) 4910f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 4920f3e72b5SJason Gunthorpe 4930f3e72b5SJason Gunthorpe /* 4940f3e72b5SJason Gunthorpe * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 4950f3e72b5SJason Gunthorpe * restore cache coherency. It has to be checked here because it is only 4960f3e72b5SJason Gunthorpe * valid for cases where we are using iommu groups. 4970f3e72b5SJason Gunthorpe */ 4980f3e72b5SJason Gunthorpe if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 4990f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5000f3e72b5SJason Gunthorpe return ERR_PTR(-EINVAL); 5010f3e72b5SJason Gunthorpe } 5020f3e72b5SJason Gunthorpe 5030f3e72b5SJason Gunthorpe group = vfio_group_get_from_iommu(iommu_group); 5040f3e72b5SJason Gunthorpe if (!group) 5050f3e72b5SJason Gunthorpe group = vfio_create_group(iommu_group, VFIO_IOMMU); 5060f3e72b5SJason Gunthorpe 5070f3e72b5SJason Gunthorpe /* The vfio_group holds a reference to the iommu_group */ 5080f3e72b5SJason Gunthorpe iommu_group_put(iommu_group); 5090f3e72b5SJason Gunthorpe return group; 5100f3e72b5SJason Gunthorpe } 5110f3e72b5SJason Gunthorpe 5120f3e72b5SJason Gunthorpe static int __vfio_register_dev(struct vfio_device *device, 5130f3e72b5SJason Gunthorpe struct vfio_group *group) 5140f3e72b5SJason Gunthorpe { 5150f3e72b5SJason Gunthorpe struct vfio_device *existing_device; 5163c28a761SYi Liu int ret; 5170f3e72b5SJason Gunthorpe 518ca5f21b2SJason Gunthorpe /* 519ca5f21b2SJason Gunthorpe * In all cases group is the output of one of the group allocation 520ca5f21b2SJason Gunthorpe * functions and we have group->drivers incremented for us. 521ca5f21b2SJason Gunthorpe */ 5220f3e72b5SJason Gunthorpe if (IS_ERR(group)) 5230f3e72b5SJason Gunthorpe return PTR_ERR(group); 5240f3e72b5SJason Gunthorpe 5250f3e72b5SJason Gunthorpe /* 5260f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 5270f3e72b5SJason Gunthorpe * singleton set just for itself. 5280f3e72b5SJason Gunthorpe */ 5290f3e72b5SJason Gunthorpe if (!device->dev_set) 5300f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 5310f3e72b5SJason Gunthorpe 5320f3e72b5SJason Gunthorpe existing_device = vfio_group_get_device(group, device->dev); 5330f3e72b5SJason Gunthorpe if (existing_device) { 5340f3e72b5SJason Gunthorpe dev_WARN(device->dev, "Device already exists on group %d\n", 5350f3e72b5SJason Gunthorpe iommu_group_id(group->iommu_group)); 5364a725b8dSKevin Tian vfio_device_put_registration(existing_device); 5373c28a761SYi Liu ret = -EBUSY; 5383c28a761SYi Liu goto err_out; 5390f3e72b5SJason Gunthorpe } 5400f3e72b5SJason Gunthorpe 5410f3e72b5SJason Gunthorpe /* Our reference on group is moved to the device */ 5420f3e72b5SJason Gunthorpe device->group = group; 5430f3e72b5SJason Gunthorpe 5443c28a761SYi Liu ret = dev_set_name(&device->device, "vfio%d", device->index); 5453c28a761SYi Liu if (ret) 5463c28a761SYi Liu goto err_out; 5473c28a761SYi Liu 5483c28a761SYi Liu ret = device_add(&device->device); 5493c28a761SYi Liu if (ret) 5503c28a761SYi Liu goto err_out; 5513c28a761SYi Liu 5520f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 5530f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 5540f3e72b5SJason Gunthorpe 5550f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 5560f3e72b5SJason Gunthorpe list_add(&device->group_next, &group->device_list); 5570f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 5580f3e72b5SJason Gunthorpe 5590f3e72b5SJason Gunthorpe return 0; 5603c28a761SYi Liu err_out: 561ca5f21b2SJason Gunthorpe vfio_device_remove_group(device); 5623c28a761SYi Liu return ret; 5630f3e72b5SJason Gunthorpe } 5640f3e72b5SJason Gunthorpe 5650f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 5660f3e72b5SJason Gunthorpe { 5670f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 5680f3e72b5SJason Gunthorpe vfio_group_find_or_alloc(device->dev)); 5690f3e72b5SJason Gunthorpe } 5700f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 5710f3e72b5SJason Gunthorpe 5720f3e72b5SJason Gunthorpe /* 5730f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 5740f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 5750f3e72b5SJason Gunthorpe */ 5760f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 5770f3e72b5SJason Gunthorpe { 5780f3e72b5SJason Gunthorpe return __vfio_register_dev(device, 5790f3e72b5SJason Gunthorpe vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 5800f3e72b5SJason Gunthorpe } 5810f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 5820f3e72b5SJason Gunthorpe 5830f3e72b5SJason Gunthorpe static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 5840f3e72b5SJason Gunthorpe char *buf) 5850f3e72b5SJason Gunthorpe { 5860f3e72b5SJason Gunthorpe struct vfio_device *it, *device = ERR_PTR(-ENODEV); 5870f3e72b5SJason Gunthorpe 5880f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 5890f3e72b5SJason Gunthorpe list_for_each_entry(it, &group->device_list, group_next) { 5900f3e72b5SJason Gunthorpe int ret; 5910f3e72b5SJason Gunthorpe 5920f3e72b5SJason Gunthorpe if (it->ops->match) { 5930f3e72b5SJason Gunthorpe ret = it->ops->match(it, buf); 5940f3e72b5SJason Gunthorpe if (ret < 0) { 5950f3e72b5SJason Gunthorpe device = ERR_PTR(ret); 5960f3e72b5SJason Gunthorpe break; 5970f3e72b5SJason Gunthorpe } 5980f3e72b5SJason Gunthorpe } else { 5990f3e72b5SJason Gunthorpe ret = !strcmp(dev_name(it->dev), buf); 6000f3e72b5SJason Gunthorpe } 6010f3e72b5SJason Gunthorpe 6024a725b8dSKevin Tian if (ret && vfio_device_try_get_registration(it)) { 6030f3e72b5SJason Gunthorpe device = it; 6040f3e72b5SJason Gunthorpe break; 6050f3e72b5SJason Gunthorpe } 6060f3e72b5SJason Gunthorpe } 6070f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6080f3e72b5SJason Gunthorpe 6090f3e72b5SJason Gunthorpe return device; 6100f3e72b5SJason Gunthorpe } 6110f3e72b5SJason Gunthorpe 6120f3e72b5SJason Gunthorpe /* 6130f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 6140f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 6150f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 6160f3e72b5SJason Gunthorpe { 6170f3e72b5SJason Gunthorpe struct vfio_group *group = device->group; 6180f3e72b5SJason Gunthorpe unsigned int i = 0; 6190f3e72b5SJason Gunthorpe bool interrupted = false; 6200f3e72b5SJason Gunthorpe long rc; 6210f3e72b5SJason Gunthorpe 6224a725b8dSKevin Tian vfio_device_put_registration(device); 6230f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 6240f3e72b5SJason Gunthorpe while (rc <= 0) { 6250f3e72b5SJason Gunthorpe if (device->ops->request) 6260f3e72b5SJason Gunthorpe device->ops->request(device, i++); 6270f3e72b5SJason Gunthorpe 6280f3e72b5SJason Gunthorpe if (interrupted) { 6290f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 6300f3e72b5SJason Gunthorpe HZ * 10); 6310f3e72b5SJason Gunthorpe } else { 6320f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 6330f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 6340f3e72b5SJason Gunthorpe if (rc < 0) { 6350f3e72b5SJason Gunthorpe interrupted = true; 6360f3e72b5SJason Gunthorpe dev_warn(device->dev, 6370f3e72b5SJason Gunthorpe "Device is currently in use, task" 6380f3e72b5SJason Gunthorpe " \"%s\" (%d) " 6390f3e72b5SJason Gunthorpe "blocked until device is released", 6400f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 6410f3e72b5SJason Gunthorpe } 6420f3e72b5SJason Gunthorpe } 6430f3e72b5SJason Gunthorpe } 6440f3e72b5SJason Gunthorpe 6450f3e72b5SJason Gunthorpe mutex_lock(&group->device_lock); 6460f3e72b5SJason Gunthorpe list_del(&device->group_next); 6470f3e72b5SJason Gunthorpe mutex_unlock(&group->device_lock); 6480f3e72b5SJason Gunthorpe 6493c28a761SYi Liu /* Balances device_add in register path */ 6503c28a761SYi Liu device_del(&device->device); 6513c28a761SYi Liu 652ca5f21b2SJason Gunthorpe vfio_device_remove_group(device); 6530f3e72b5SJason Gunthorpe } 6540f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 6550f3e72b5SJason Gunthorpe 6560f3e72b5SJason Gunthorpe /* 6570f3e72b5SJason Gunthorpe * VFIO Group fd, /dev/vfio/$GROUP 6580f3e72b5SJason Gunthorpe */ 6590f3e72b5SJason Gunthorpe /* 6600f3e72b5SJason Gunthorpe * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 6610f3e72b5SJason Gunthorpe * if there was no container to unset. Since the ioctl is called on 6620f3e72b5SJason Gunthorpe * the group, we know that still exists, therefore the only valid 6630f3e72b5SJason Gunthorpe * transition here is 1->0. 6640f3e72b5SJason Gunthorpe */ 665b3b43590SJason Gunthorpe static int vfio_group_ioctl_unset_container(struct vfio_group *group) 6660f3e72b5SJason Gunthorpe { 667b3b43590SJason Gunthorpe int ret = 0; 6680f3e72b5SJason Gunthorpe 669c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 670b3b43590SJason Gunthorpe if (!group->container) { 671b3b43590SJason Gunthorpe ret = -EINVAL; 672b3b43590SJason Gunthorpe goto out_unlock; 673b3b43590SJason Gunthorpe } 674b3b43590SJason Gunthorpe if (group->container_users != 1) { 675b3b43590SJason Gunthorpe ret = -EBUSY; 676b3b43590SJason Gunthorpe goto out_unlock; 677b3b43590SJason Gunthorpe } 678429a781cSJason Gunthorpe vfio_group_detach_container(group); 679b3b43590SJason Gunthorpe 680b3b43590SJason Gunthorpe out_unlock: 681c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 682b3b43590SJason Gunthorpe return ret; 6830f3e72b5SJason Gunthorpe } 6840f3e72b5SJason Gunthorpe 68503e650f6SJason Gunthorpe static int vfio_group_ioctl_set_container(struct vfio_group *group, 68603e650f6SJason Gunthorpe int __user *arg) 68703e650f6SJason Gunthorpe { 68803e650f6SJason Gunthorpe struct vfio_container *container; 68903e650f6SJason Gunthorpe struct fd f; 69003e650f6SJason Gunthorpe int ret; 69103e650f6SJason Gunthorpe int fd; 69203e650f6SJason Gunthorpe 69303e650f6SJason Gunthorpe if (get_user(fd, arg)) 69403e650f6SJason Gunthorpe return -EFAULT; 69503e650f6SJason Gunthorpe 69603e650f6SJason Gunthorpe f = fdget(fd); 69703e650f6SJason Gunthorpe if (!f.file) 69803e650f6SJason Gunthorpe return -EBADF; 69903e650f6SJason Gunthorpe 700c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 70103e650f6SJason Gunthorpe if (group->container || WARN_ON(group->container_users)) { 70203e650f6SJason Gunthorpe ret = -EINVAL; 70303e650f6SJason Gunthorpe goto out_unlock; 70403e650f6SJason Gunthorpe } 70503e650f6SJason Gunthorpe container = vfio_container_from_file(f.file); 70603e650f6SJason Gunthorpe ret = -EINVAL; 70703e650f6SJason Gunthorpe if (container) { 70803e650f6SJason Gunthorpe ret = vfio_container_attach_group(container, group); 70903e650f6SJason Gunthorpe goto out_unlock; 71003e650f6SJason Gunthorpe } 71103e650f6SJason Gunthorpe 71203e650f6SJason Gunthorpe out_unlock: 713c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 7140f3e72b5SJason Gunthorpe fdput(f); 7150f3e72b5SJason Gunthorpe return ret; 7160f3e72b5SJason Gunthorpe } 7170f3e72b5SJason Gunthorpe 7180f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops; 7190f3e72b5SJason Gunthorpe 7200f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 721cdc71fe4SJason Gunthorpe bool vfio_assert_device_open(struct vfio_device *device) 7220f3e72b5SJason Gunthorpe { 7230f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 7240f3e72b5SJason Gunthorpe } 7250f3e72b5SJason Gunthorpe 7260f3e72b5SJason Gunthorpe static struct file *vfio_device_open(struct vfio_device *device) 7270f3e72b5SJason Gunthorpe { 7280f3e72b5SJason Gunthorpe struct file *filep; 7290f3e72b5SJason Gunthorpe int ret; 7300f3e72b5SJason Gunthorpe 731c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 7320f3e72b5SJason Gunthorpe ret = vfio_device_assign_container(device); 733c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 7340f3e72b5SJason Gunthorpe if (ret) 7350f3e72b5SJason Gunthorpe return ERR_PTR(ret); 7360f3e72b5SJason Gunthorpe 7370f3e72b5SJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) { 7380f3e72b5SJason Gunthorpe ret = -ENODEV; 7390f3e72b5SJason Gunthorpe goto err_unassign_container; 7400f3e72b5SJason Gunthorpe } 7410f3e72b5SJason Gunthorpe 7420f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 7430f3e72b5SJason Gunthorpe device->open_count++; 7440f3e72b5SJason Gunthorpe if (device->open_count == 1) { 7450f3e72b5SJason Gunthorpe /* 7460f3e72b5SJason Gunthorpe * Here we pass the KVM pointer with the group under the read 7470f3e72b5SJason Gunthorpe * lock. If the device driver will use it, it must obtain a 7480f3e72b5SJason Gunthorpe * reference and release it during close_device. 7490f3e72b5SJason Gunthorpe */ 750c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 7510f3e72b5SJason Gunthorpe device->kvm = device->group->kvm; 7520f3e72b5SJason Gunthorpe 7530f3e72b5SJason Gunthorpe if (device->ops->open_device) { 7540f3e72b5SJason Gunthorpe ret = device->ops->open_device(device); 7550f3e72b5SJason Gunthorpe if (ret) 7560f3e72b5SJason Gunthorpe goto err_undo_count; 7570f3e72b5SJason Gunthorpe } 7589446162eSJason Gunthorpe vfio_device_container_register(device); 759c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 7600f3e72b5SJason Gunthorpe } 7610f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 7620f3e72b5SJason Gunthorpe 7630f3e72b5SJason Gunthorpe /* 7640f3e72b5SJason Gunthorpe * We can't use anon_inode_getfd() because we need to modify 7650f3e72b5SJason Gunthorpe * the f_mode flags directly to allow more than just ioctls 7660f3e72b5SJason Gunthorpe */ 7670f3e72b5SJason Gunthorpe filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 7680f3e72b5SJason Gunthorpe device, O_RDWR); 7690f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 7700f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 7710f3e72b5SJason Gunthorpe goto err_close_device; 7720f3e72b5SJason Gunthorpe } 7730f3e72b5SJason Gunthorpe 7740f3e72b5SJason Gunthorpe /* 7750f3e72b5SJason Gunthorpe * TODO: add an anon_inode interface to do this. 7760f3e72b5SJason Gunthorpe * Appears to be missing by lack of need rather than 7770f3e72b5SJason Gunthorpe * explicitly prevented. Now there's need. 7780f3e72b5SJason Gunthorpe */ 7790f3e72b5SJason Gunthorpe filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 7800f3e72b5SJason Gunthorpe 7810f3e72b5SJason Gunthorpe if (device->group->type == VFIO_NO_IOMMU) 7820f3e72b5SJason Gunthorpe dev_warn(device->dev, "vfio-noiommu device opened by user " 7830f3e72b5SJason Gunthorpe "(%s:%d)\n", current->comm, task_pid_nr(current)); 7840f3e72b5SJason Gunthorpe /* 7850f3e72b5SJason Gunthorpe * On success the ref of device is moved to the file and 7860f3e72b5SJason Gunthorpe * put in vfio_device_fops_release() 7870f3e72b5SJason Gunthorpe */ 7880f3e72b5SJason Gunthorpe return filep; 7890f3e72b5SJason Gunthorpe 7900f3e72b5SJason Gunthorpe err_close_device: 7910f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 792c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 7930f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) { 7940f3e72b5SJason Gunthorpe device->ops->close_device(device); 7950f3e72b5SJason Gunthorpe 7969446162eSJason Gunthorpe vfio_device_container_unregister(device); 7970f3e72b5SJason Gunthorpe } 7980f3e72b5SJason Gunthorpe err_undo_count: 799c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 8000f3e72b5SJason Gunthorpe device->open_count--; 8010f3e72b5SJason Gunthorpe if (device->open_count == 0 && device->kvm) 8020f3e72b5SJason Gunthorpe device->kvm = NULL; 8030f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 8040f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 8050f3e72b5SJason Gunthorpe err_unassign_container: 8060f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 8070f3e72b5SJason Gunthorpe return ERR_PTR(ret); 8080f3e72b5SJason Gunthorpe } 8090f3e72b5SJason Gunthorpe 810150ee2f9SJason Gunthorpe static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 811150ee2f9SJason Gunthorpe char __user *arg) 8120f3e72b5SJason Gunthorpe { 8130f3e72b5SJason Gunthorpe struct vfio_device *device; 8140f3e72b5SJason Gunthorpe struct file *filep; 815150ee2f9SJason Gunthorpe char *buf; 8160f3e72b5SJason Gunthorpe int fdno; 8170f3e72b5SJason Gunthorpe int ret; 8180f3e72b5SJason Gunthorpe 819150ee2f9SJason Gunthorpe buf = strndup_user(arg, PAGE_SIZE); 820150ee2f9SJason Gunthorpe if (IS_ERR(buf)) 821150ee2f9SJason Gunthorpe return PTR_ERR(buf); 822150ee2f9SJason Gunthorpe 8230f3e72b5SJason Gunthorpe device = vfio_device_get_from_name(group, buf); 824150ee2f9SJason Gunthorpe kfree(buf); 8250f3e72b5SJason Gunthorpe if (IS_ERR(device)) 8260f3e72b5SJason Gunthorpe return PTR_ERR(device); 8270f3e72b5SJason Gunthorpe 8280f3e72b5SJason Gunthorpe fdno = get_unused_fd_flags(O_CLOEXEC); 8290f3e72b5SJason Gunthorpe if (fdno < 0) { 8300f3e72b5SJason Gunthorpe ret = fdno; 8310f3e72b5SJason Gunthorpe goto err_put_device; 8320f3e72b5SJason Gunthorpe } 8330f3e72b5SJason Gunthorpe 8340f3e72b5SJason Gunthorpe filep = vfio_device_open(device); 8350f3e72b5SJason Gunthorpe if (IS_ERR(filep)) { 8360f3e72b5SJason Gunthorpe ret = PTR_ERR(filep); 8370f3e72b5SJason Gunthorpe goto err_put_fdno; 8380f3e72b5SJason Gunthorpe } 8390f3e72b5SJason Gunthorpe 8400f3e72b5SJason Gunthorpe fd_install(fdno, filep); 8410f3e72b5SJason Gunthorpe return fdno; 8420f3e72b5SJason Gunthorpe 8430f3e72b5SJason Gunthorpe err_put_fdno: 8440f3e72b5SJason Gunthorpe put_unused_fd(fdno); 8450f3e72b5SJason Gunthorpe err_put_device: 8464a725b8dSKevin Tian vfio_device_put_registration(device); 8470f3e72b5SJason Gunthorpe return ret; 8480f3e72b5SJason Gunthorpe } 8490f3e72b5SJason Gunthorpe 85099a27c08SJason Gunthorpe static int vfio_group_ioctl_get_status(struct vfio_group *group, 85199a27c08SJason Gunthorpe struct vfio_group_status __user *arg) 8520f3e72b5SJason Gunthorpe { 85399a27c08SJason Gunthorpe unsigned long minsz = offsetofend(struct vfio_group_status, flags); 8540f3e72b5SJason Gunthorpe struct vfio_group_status status; 8550f3e72b5SJason Gunthorpe 85699a27c08SJason Gunthorpe if (copy_from_user(&status, arg, minsz)) 8570f3e72b5SJason Gunthorpe return -EFAULT; 8580f3e72b5SJason Gunthorpe 8590f3e72b5SJason Gunthorpe if (status.argsz < minsz) 8600f3e72b5SJason Gunthorpe return -EINVAL; 8610f3e72b5SJason Gunthorpe 8620f3e72b5SJason Gunthorpe status.flags = 0; 8630f3e72b5SJason Gunthorpe 864c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 8650f3e72b5SJason Gunthorpe if (group->container) 8660f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 8670f3e72b5SJason Gunthorpe VFIO_GROUP_FLAGS_VIABLE; 8680f3e72b5SJason Gunthorpe else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 8690f3e72b5SJason Gunthorpe status.flags |= VFIO_GROUP_FLAGS_VIABLE; 870c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 8710f3e72b5SJason Gunthorpe 87299a27c08SJason Gunthorpe if (copy_to_user(arg, &status, minsz)) 8730f3e72b5SJason Gunthorpe return -EFAULT; 87499a27c08SJason Gunthorpe return 0; 8750f3e72b5SJason Gunthorpe } 87699a27c08SJason Gunthorpe 87799a27c08SJason Gunthorpe static long vfio_group_fops_unl_ioctl(struct file *filep, 87899a27c08SJason Gunthorpe unsigned int cmd, unsigned long arg) 87999a27c08SJason Gunthorpe { 88099a27c08SJason Gunthorpe struct vfio_group *group = filep->private_data; 88199a27c08SJason Gunthorpe void __user *uarg = (void __user *)arg; 88299a27c08SJason Gunthorpe 88399a27c08SJason Gunthorpe switch (cmd) { 88499a27c08SJason Gunthorpe case VFIO_GROUP_GET_DEVICE_FD: 88599a27c08SJason Gunthorpe return vfio_group_ioctl_get_device_fd(group, uarg); 88699a27c08SJason Gunthorpe case VFIO_GROUP_GET_STATUS: 88799a27c08SJason Gunthorpe return vfio_group_ioctl_get_status(group, uarg); 8880f3e72b5SJason Gunthorpe case VFIO_GROUP_SET_CONTAINER: 88967671f15SJason Gunthorpe return vfio_group_ioctl_set_container(group, uarg); 8900f3e72b5SJason Gunthorpe case VFIO_GROUP_UNSET_CONTAINER: 891b3b43590SJason Gunthorpe return vfio_group_ioctl_unset_container(group); 89299a27c08SJason Gunthorpe default: 89399a27c08SJason Gunthorpe return -ENOTTY; 8940f3e72b5SJason Gunthorpe } 8950f3e72b5SJason Gunthorpe } 8960f3e72b5SJason Gunthorpe 8970f3e72b5SJason Gunthorpe static int vfio_group_fops_open(struct inode *inode, struct file *filep) 8980f3e72b5SJason Gunthorpe { 8990f3e72b5SJason Gunthorpe struct vfio_group *group = 9000f3e72b5SJason Gunthorpe container_of(inode->i_cdev, struct vfio_group, cdev); 9010f3e72b5SJason Gunthorpe int ret; 9020f3e72b5SJason Gunthorpe 903c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 9040f3e72b5SJason Gunthorpe 905912b74d2SJason Gunthorpe /* 906912b74d2SJason Gunthorpe * drivers can be zero if this races with vfio_device_remove_group(), it 907912b74d2SJason Gunthorpe * will be stable at 0 under the group rwsem 908912b74d2SJason Gunthorpe */ 909912b74d2SJason Gunthorpe if (refcount_read(&group->drivers) == 0) { 9100f3e72b5SJason Gunthorpe ret = -ENODEV; 911912b74d2SJason Gunthorpe goto out_unlock; 9120f3e72b5SJason Gunthorpe } 9130f3e72b5SJason Gunthorpe 9140f3e72b5SJason Gunthorpe if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 9150f3e72b5SJason Gunthorpe ret = -EPERM; 916912b74d2SJason Gunthorpe goto out_unlock; 9170f3e72b5SJason Gunthorpe } 9180f3e72b5SJason Gunthorpe 9190f3e72b5SJason Gunthorpe /* 9200f3e72b5SJason Gunthorpe * Do we need multiple instances of the group open? Seems not. 9210f3e72b5SJason Gunthorpe */ 9220f3e72b5SJason Gunthorpe if (group->opened_file) { 9230f3e72b5SJason Gunthorpe ret = -EBUSY; 924912b74d2SJason Gunthorpe goto out_unlock; 9250f3e72b5SJason Gunthorpe } 9260f3e72b5SJason Gunthorpe group->opened_file = filep; 9270f3e72b5SJason Gunthorpe filep->private_data = group; 928912b74d2SJason Gunthorpe ret = 0; 929912b74d2SJason Gunthorpe out_unlock: 930c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 9310f3e72b5SJason Gunthorpe return ret; 9320f3e72b5SJason Gunthorpe } 9330f3e72b5SJason Gunthorpe 9340f3e72b5SJason Gunthorpe static int vfio_group_fops_release(struct inode *inode, struct file *filep) 9350f3e72b5SJason Gunthorpe { 9360f3e72b5SJason Gunthorpe struct vfio_group *group = filep->private_data; 9370f3e72b5SJason Gunthorpe 9380f3e72b5SJason Gunthorpe filep->private_data = NULL; 9390f3e72b5SJason Gunthorpe 940c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 9410f3e72b5SJason Gunthorpe /* 9420f3e72b5SJason Gunthorpe * Device FDs hold a group file reference, therefore the group release 9430f3e72b5SJason Gunthorpe * is only called when there are no open devices. 9440f3e72b5SJason Gunthorpe */ 9450f3e72b5SJason Gunthorpe WARN_ON(group->notifier.head); 946429a781cSJason Gunthorpe if (group->container) 947429a781cSJason Gunthorpe vfio_group_detach_container(group); 9480f3e72b5SJason Gunthorpe group->opened_file = NULL; 949c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 950912b74d2SJason Gunthorpe swake_up_one(&group->opened_file_wait); 9510f3e72b5SJason Gunthorpe 9520f3e72b5SJason Gunthorpe return 0; 9530f3e72b5SJason Gunthorpe } 9540f3e72b5SJason Gunthorpe 9550f3e72b5SJason Gunthorpe static const struct file_operations vfio_group_fops = { 9560f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 9570f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_group_fops_unl_ioctl, 9580f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 9590f3e72b5SJason Gunthorpe .open = vfio_group_fops_open, 9600f3e72b5SJason Gunthorpe .release = vfio_group_fops_release, 9610f3e72b5SJason Gunthorpe }; 9620f3e72b5SJason Gunthorpe 9630f3e72b5SJason Gunthorpe /* 9648e5c6995SAbhishek Sahu * Wrapper around pm_runtime_resume_and_get(). 9658e5c6995SAbhishek Sahu * Return error code on failure or 0 on success. 9668e5c6995SAbhishek Sahu */ 9678e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device) 9688e5c6995SAbhishek Sahu { 9698e5c6995SAbhishek Sahu struct device *dev = device->dev; 9708e5c6995SAbhishek Sahu 9718e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) { 9728e5c6995SAbhishek Sahu int ret; 9738e5c6995SAbhishek Sahu 9748e5c6995SAbhishek Sahu ret = pm_runtime_resume_and_get(dev); 9758e5c6995SAbhishek Sahu if (ret) { 9768e5c6995SAbhishek Sahu dev_info_ratelimited(dev, 9778e5c6995SAbhishek Sahu "vfio: runtime resume failed %d\n", ret); 9788e5c6995SAbhishek Sahu return -EIO; 9798e5c6995SAbhishek Sahu } 9808e5c6995SAbhishek Sahu } 9818e5c6995SAbhishek Sahu 9828e5c6995SAbhishek Sahu return 0; 9838e5c6995SAbhishek Sahu } 9848e5c6995SAbhishek Sahu 9858e5c6995SAbhishek Sahu /* 9868e5c6995SAbhishek Sahu * Wrapper around pm_runtime_put(). 9878e5c6995SAbhishek Sahu */ 9888e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device) 9898e5c6995SAbhishek Sahu { 9908e5c6995SAbhishek Sahu struct device *dev = device->dev; 9918e5c6995SAbhishek Sahu 9928e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) 9938e5c6995SAbhishek Sahu pm_runtime_put(dev); 9948e5c6995SAbhishek Sahu } 9958e5c6995SAbhishek Sahu 9968e5c6995SAbhishek Sahu /* 9970f3e72b5SJason Gunthorpe * VFIO Device fd 9980f3e72b5SJason Gunthorpe */ 9990f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 10000f3e72b5SJason Gunthorpe { 10010f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 10020f3e72b5SJason Gunthorpe 10030f3e72b5SJason Gunthorpe mutex_lock(&device->dev_set->lock); 10040f3e72b5SJason Gunthorpe vfio_assert_device_open(device); 1005c82e81abSJason Gunthorpe mutex_lock(&device->group->group_lock); 10060f3e72b5SJason Gunthorpe if (device->open_count == 1 && device->ops->close_device) 10070f3e72b5SJason Gunthorpe device->ops->close_device(device); 10080f3e72b5SJason Gunthorpe 10099446162eSJason Gunthorpe vfio_device_container_unregister(device); 1010c82e81abSJason Gunthorpe mutex_unlock(&device->group->group_lock); 10110f3e72b5SJason Gunthorpe device->open_count--; 10120f3e72b5SJason Gunthorpe if (device->open_count == 0) 10130f3e72b5SJason Gunthorpe device->kvm = NULL; 10140f3e72b5SJason Gunthorpe mutex_unlock(&device->dev_set->lock); 10150f3e72b5SJason Gunthorpe 10160f3e72b5SJason Gunthorpe module_put(device->dev->driver->owner); 10170f3e72b5SJason Gunthorpe 10180f3e72b5SJason Gunthorpe vfio_device_unassign_container(device); 10190f3e72b5SJason Gunthorpe 10204a725b8dSKevin Tian vfio_device_put_registration(device); 10210f3e72b5SJason Gunthorpe 10220f3e72b5SJason Gunthorpe return 0; 10230f3e72b5SJason Gunthorpe } 10240f3e72b5SJason Gunthorpe 10250f3e72b5SJason Gunthorpe /* 10260f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 10270f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 10280f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 10290f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 10300f3e72b5SJason Gunthorpe * 10310f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 10320f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 10330f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 10340f3e72b5SJason Gunthorpe * 10350f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 10360f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 10370f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 10380f3e72b5SJason Gunthorpe * 10390f3e72b5SJason Gunthorpe */ 10400f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 10410f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 10420f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 10430f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 10440f3e72b5SJason Gunthorpe { 10450f3e72b5SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 10460f3e72b5SJason Gunthorpe /* 10470f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 10480f3e72b5SJason Gunthorpe * following FSM arcs: 10490f3e72b5SJason Gunthorpe * RESUMING -> STOP 10500f3e72b5SJason Gunthorpe * STOP -> RESUMING 10510f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 10520f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 10530f3e72b5SJason Gunthorpe * 10540f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 10550f3e72b5SJason Gunthorpe * arcs: 10560f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 10570f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 10580f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 10590f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 10600f3e72b5SJason Gunthorpe * Without P2P the driver must implement: 10610f3e72b5SJason Gunthorpe * RUNNING -> STOP 10620f3e72b5SJason Gunthorpe * STOP -> RUNNING 10630f3e72b5SJason Gunthorpe * 10640f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 10650f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 10660f3e72b5SJason Gunthorpe * following ones: 10670f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 10680f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 10690f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 10700f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 10710f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 10720f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 10730f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 10740f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 10750f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 10760f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 10770f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 10780f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 10790f3e72b5SJason Gunthorpe */ 10800f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 10810f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 10820f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 10830f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 10840f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 10850f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 10860f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 10870f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 10880f3e72b5SJason Gunthorpe }, 10890f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 10900f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 10910f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 10920f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 10930f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 10940f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 10950f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 10960f3e72b5SJason Gunthorpe }, 10970f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 10980f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 10990f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 11000f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 11010f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 11020f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 11030f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11040f3e72b5SJason Gunthorpe }, 11050f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 11060f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 11070f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 11080f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 11090f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 11100f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 11110f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11120f3e72b5SJason Gunthorpe }, 11130f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 11140f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 11150f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 11160f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 11170f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 11180f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 11190f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11200f3e72b5SJason Gunthorpe }, 11210f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 11220f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 11230f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 11240f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 11250f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 11260f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 11270f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 11280f3e72b5SJason Gunthorpe }, 11290f3e72b5SJason Gunthorpe }; 11300f3e72b5SJason Gunthorpe 11310f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 11320f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 11330f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 11340f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 11350f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 11360f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 11370f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 11380f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 11390f3e72b5SJason Gunthorpe }; 11400f3e72b5SJason Gunthorpe 11410f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 11420f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 11430f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 11440f3e72b5SJason Gunthorpe return -EINVAL; 11450f3e72b5SJason Gunthorpe 11460f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 11470f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 11480f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 11490f3e72b5SJason Gunthorpe return -EINVAL; 11500f3e72b5SJason Gunthorpe 11510f3e72b5SJason Gunthorpe /* 11520f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 11530f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 11540f3e72b5SJason Gunthorpe * logical state, as per the above comment. 11550f3e72b5SJason Gunthorpe */ 11560f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 11570f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 11580f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 11590f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 11600f3e72b5SJason Gunthorpe 11610f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 11620f3e72b5SJason Gunthorpe } 11630f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 11640f3e72b5SJason Gunthorpe 11650f3e72b5SJason Gunthorpe /* 11660f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 11670f3e72b5SJason Gunthorpe */ 11680f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 11690f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 11700f3e72b5SJason Gunthorpe { 11710f3e72b5SJason Gunthorpe int ret; 11720f3e72b5SJason Gunthorpe int fd; 11730f3e72b5SJason Gunthorpe 11740f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 11750f3e72b5SJason Gunthorpe if (fd < 0) { 11760f3e72b5SJason Gunthorpe ret = fd; 11770f3e72b5SJason Gunthorpe goto out_fput; 11780f3e72b5SJason Gunthorpe } 11790f3e72b5SJason Gunthorpe 11800f3e72b5SJason Gunthorpe mig->data_fd = fd; 11810f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 11820f3e72b5SJason Gunthorpe ret = -EFAULT; 11830f3e72b5SJason Gunthorpe goto out_put_unused; 11840f3e72b5SJason Gunthorpe } 11850f3e72b5SJason Gunthorpe fd_install(fd, filp); 11860f3e72b5SJason Gunthorpe return 0; 11870f3e72b5SJason Gunthorpe 11880f3e72b5SJason Gunthorpe out_put_unused: 11890f3e72b5SJason Gunthorpe put_unused_fd(fd); 11900f3e72b5SJason Gunthorpe out_fput: 11910f3e72b5SJason Gunthorpe fput(filp); 11920f3e72b5SJason Gunthorpe return ret; 11930f3e72b5SJason Gunthorpe } 11940f3e72b5SJason Gunthorpe 11950f3e72b5SJason Gunthorpe static int 11960f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 11970f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 11980f3e72b5SJason Gunthorpe size_t argsz) 11990f3e72b5SJason Gunthorpe { 12000f3e72b5SJason Gunthorpe size_t minsz = 12010f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 12020f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 12030f3e72b5SJason Gunthorpe struct file *filp = NULL; 12040f3e72b5SJason Gunthorpe int ret; 12050f3e72b5SJason Gunthorpe 12060f3e72b5SJason Gunthorpe if (!device->mig_ops) 12070f3e72b5SJason Gunthorpe return -ENOTTY; 12080f3e72b5SJason Gunthorpe 12090f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 12100f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 12110f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 12120f3e72b5SJason Gunthorpe sizeof(mig)); 12130f3e72b5SJason Gunthorpe if (ret != 1) 12140f3e72b5SJason Gunthorpe return ret; 12150f3e72b5SJason Gunthorpe 12160f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 12170f3e72b5SJason Gunthorpe return -EFAULT; 12180f3e72b5SJason Gunthorpe 12190f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 12200f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 12210f3e72b5SJason Gunthorpe 12220f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 12230f3e72b5SJason Gunthorpe &curr_state); 12240f3e72b5SJason Gunthorpe if (ret) 12250f3e72b5SJason Gunthorpe return ret; 12260f3e72b5SJason Gunthorpe mig.device_state = curr_state; 12270f3e72b5SJason Gunthorpe goto out_copy; 12280f3e72b5SJason Gunthorpe } 12290f3e72b5SJason Gunthorpe 12300f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 12310f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 12320f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 12330f3e72b5SJason Gunthorpe goto out_copy; 12340f3e72b5SJason Gunthorpe 12350f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 12360f3e72b5SJason Gunthorpe out_copy: 12370f3e72b5SJason Gunthorpe mig.data_fd = -1; 12380f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 12390f3e72b5SJason Gunthorpe return -EFAULT; 12400f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 12410f3e72b5SJason Gunthorpe return PTR_ERR(filp); 12420f3e72b5SJason Gunthorpe return 0; 12430f3e72b5SJason Gunthorpe } 12440f3e72b5SJason Gunthorpe 12450f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 12460f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 12470f3e72b5SJason Gunthorpe size_t argsz) 12480f3e72b5SJason Gunthorpe { 12490f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 12500f3e72b5SJason Gunthorpe .flags = device->migration_flags, 12510f3e72b5SJason Gunthorpe }; 12520f3e72b5SJason Gunthorpe int ret; 12530f3e72b5SJason Gunthorpe 12540f3e72b5SJason Gunthorpe if (!device->mig_ops) 12550f3e72b5SJason Gunthorpe return -ENOTTY; 12560f3e72b5SJason Gunthorpe 12570f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 12580f3e72b5SJason Gunthorpe sizeof(mig)); 12590f3e72b5SJason Gunthorpe if (ret != 1) 12600f3e72b5SJason Gunthorpe return ret; 12610f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 12620f3e72b5SJason Gunthorpe return -EFAULT; 12630f3e72b5SJason Gunthorpe return 0; 12640f3e72b5SJason Gunthorpe } 12650f3e72b5SJason Gunthorpe 126680c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */ 126780c4b92aSYishai Hadas #define LOG_MAX_RANGES \ 126880c4b92aSYishai Hadas (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) 126980c4b92aSYishai Hadas 127080c4b92aSYishai Hadas static int 127180c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device, 127280c4b92aSYishai Hadas u32 flags, void __user *arg, 127380c4b92aSYishai Hadas size_t argsz) 127480c4b92aSYishai Hadas { 127580c4b92aSYishai Hadas size_t minsz = 127680c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_control, 127780c4b92aSYishai Hadas ranges); 127880c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range __user *ranges; 127980c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_control control; 128080c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range range; 128180c4b92aSYishai Hadas struct rb_root_cached root = RB_ROOT_CACHED; 128280c4b92aSYishai Hadas struct interval_tree_node *nodes; 128380c4b92aSYishai Hadas u64 iova_end; 128480c4b92aSYishai Hadas u32 nnodes; 128580c4b92aSYishai Hadas int i, ret; 128680c4b92aSYishai Hadas 128780c4b92aSYishai Hadas if (!device->log_ops) 128880c4b92aSYishai Hadas return -ENOTTY; 128980c4b92aSYishai Hadas 129080c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 129180c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 129280c4b92aSYishai Hadas sizeof(control)); 129380c4b92aSYishai Hadas if (ret != 1) 129480c4b92aSYishai Hadas return ret; 129580c4b92aSYishai Hadas 129680c4b92aSYishai Hadas if (copy_from_user(&control, arg, minsz)) 129780c4b92aSYishai Hadas return -EFAULT; 129880c4b92aSYishai Hadas 129980c4b92aSYishai Hadas nnodes = control.num_ranges; 130080c4b92aSYishai Hadas if (!nnodes) 130180c4b92aSYishai Hadas return -EINVAL; 130280c4b92aSYishai Hadas 130380c4b92aSYishai Hadas if (nnodes > LOG_MAX_RANGES) 130480c4b92aSYishai Hadas return -E2BIG; 130580c4b92aSYishai Hadas 130680c4b92aSYishai Hadas ranges = u64_to_user_ptr(control.ranges); 130780c4b92aSYishai Hadas nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), 130880c4b92aSYishai Hadas GFP_KERNEL); 130980c4b92aSYishai Hadas if (!nodes) 131080c4b92aSYishai Hadas return -ENOMEM; 131180c4b92aSYishai Hadas 131280c4b92aSYishai Hadas for (i = 0; i < nnodes; i++) { 131380c4b92aSYishai Hadas if (copy_from_user(&range, &ranges[i], sizeof(range))) { 131480c4b92aSYishai Hadas ret = -EFAULT; 131580c4b92aSYishai Hadas goto end; 131680c4b92aSYishai Hadas } 131780c4b92aSYishai Hadas if (!IS_ALIGNED(range.iova, control.page_size) || 131880c4b92aSYishai Hadas !IS_ALIGNED(range.length, control.page_size)) { 131980c4b92aSYishai Hadas ret = -EINVAL; 132080c4b92aSYishai Hadas goto end; 132180c4b92aSYishai Hadas } 132280c4b92aSYishai Hadas 132380c4b92aSYishai Hadas if (check_add_overflow(range.iova, range.length, &iova_end) || 132480c4b92aSYishai Hadas iova_end > ULONG_MAX) { 132580c4b92aSYishai Hadas ret = -EOVERFLOW; 132680c4b92aSYishai Hadas goto end; 132780c4b92aSYishai Hadas } 132880c4b92aSYishai Hadas 132980c4b92aSYishai Hadas nodes[i].start = range.iova; 133080c4b92aSYishai Hadas nodes[i].last = range.iova + range.length - 1; 133180c4b92aSYishai Hadas if (interval_tree_iter_first(&root, nodes[i].start, 133280c4b92aSYishai Hadas nodes[i].last)) { 133380c4b92aSYishai Hadas /* Range overlapping */ 133480c4b92aSYishai Hadas ret = -EINVAL; 133580c4b92aSYishai Hadas goto end; 133680c4b92aSYishai Hadas } 133780c4b92aSYishai Hadas interval_tree_insert(nodes + i, &root); 133880c4b92aSYishai Hadas } 133980c4b92aSYishai Hadas 134080c4b92aSYishai Hadas ret = device->log_ops->log_start(device, &root, nnodes, 134180c4b92aSYishai Hadas &control.page_size); 134280c4b92aSYishai Hadas if (ret) 134380c4b92aSYishai Hadas goto end; 134480c4b92aSYishai Hadas 134580c4b92aSYishai Hadas if (copy_to_user(arg, &control, sizeof(control))) { 134680c4b92aSYishai Hadas ret = -EFAULT; 134780c4b92aSYishai Hadas device->log_ops->log_stop(device); 134880c4b92aSYishai Hadas } 134980c4b92aSYishai Hadas 135080c4b92aSYishai Hadas end: 135180c4b92aSYishai Hadas kfree(nodes); 135280c4b92aSYishai Hadas return ret; 135380c4b92aSYishai Hadas } 135480c4b92aSYishai Hadas 135580c4b92aSYishai Hadas static int 135680c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, 135780c4b92aSYishai Hadas u32 flags, void __user *arg, 135880c4b92aSYishai Hadas size_t argsz) 135980c4b92aSYishai Hadas { 136080c4b92aSYishai Hadas int ret; 136180c4b92aSYishai Hadas 136280c4b92aSYishai Hadas if (!device->log_ops) 136380c4b92aSYishai Hadas return -ENOTTY; 136480c4b92aSYishai Hadas 136580c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 136680c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 0); 136780c4b92aSYishai Hadas if (ret != 1) 136880c4b92aSYishai Hadas return ret; 136980c4b92aSYishai Hadas 137080c4b92aSYishai Hadas return device->log_ops->log_stop(device); 137180c4b92aSYishai Hadas } 137280c4b92aSYishai Hadas 137380c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, 137480c4b92aSYishai Hadas unsigned long iova, size_t length, 137580c4b92aSYishai Hadas void *opaque) 137680c4b92aSYishai Hadas { 137780c4b92aSYishai Hadas struct vfio_device *device = opaque; 137880c4b92aSYishai Hadas 137980c4b92aSYishai Hadas return device->log_ops->log_read_and_clear(device, iova, length, iter); 138080c4b92aSYishai Hadas } 138180c4b92aSYishai Hadas 138280c4b92aSYishai Hadas static int 138380c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device, 138480c4b92aSYishai Hadas u32 flags, void __user *arg, 138580c4b92aSYishai Hadas size_t argsz) 138680c4b92aSYishai Hadas { 138780c4b92aSYishai Hadas size_t minsz = 138880c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_report, 138980c4b92aSYishai Hadas bitmap); 139080c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_report report; 139180c4b92aSYishai Hadas struct iova_bitmap *iter; 139280c4b92aSYishai Hadas u64 iova_end; 139380c4b92aSYishai Hadas int ret; 139480c4b92aSYishai Hadas 139580c4b92aSYishai Hadas if (!device->log_ops) 139680c4b92aSYishai Hadas return -ENOTTY; 139780c4b92aSYishai Hadas 139880c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 139980c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_GET, 140080c4b92aSYishai Hadas sizeof(report)); 140180c4b92aSYishai Hadas if (ret != 1) 140280c4b92aSYishai Hadas return ret; 140380c4b92aSYishai Hadas 140480c4b92aSYishai Hadas if (copy_from_user(&report, arg, minsz)) 140580c4b92aSYishai Hadas return -EFAULT; 140680c4b92aSYishai Hadas 140780c4b92aSYishai Hadas if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) 140880c4b92aSYishai Hadas return -EINVAL; 140980c4b92aSYishai Hadas 141080c4b92aSYishai Hadas if (check_add_overflow(report.iova, report.length, &iova_end) || 141180c4b92aSYishai Hadas iova_end > ULONG_MAX) 141280c4b92aSYishai Hadas return -EOVERFLOW; 141380c4b92aSYishai Hadas 141480c4b92aSYishai Hadas iter = iova_bitmap_alloc(report.iova, report.length, 141580c4b92aSYishai Hadas report.page_size, 141680c4b92aSYishai Hadas u64_to_user_ptr(report.bitmap)); 141780c4b92aSYishai Hadas if (IS_ERR(iter)) 141880c4b92aSYishai Hadas return PTR_ERR(iter); 141980c4b92aSYishai Hadas 142080c4b92aSYishai Hadas ret = iova_bitmap_for_each(iter, device, 142180c4b92aSYishai Hadas vfio_device_log_read_and_clear); 142280c4b92aSYishai Hadas 142380c4b92aSYishai Hadas iova_bitmap_free(iter); 142480c4b92aSYishai Hadas return ret; 142580c4b92aSYishai Hadas } 142680c4b92aSYishai Hadas 14270f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 14280f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 14290f3e72b5SJason Gunthorpe { 14300f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 14310f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 14320f3e72b5SJason Gunthorpe 14330f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 14340f3e72b5SJason Gunthorpe return -EFAULT; 14350f3e72b5SJason Gunthorpe 14360f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 14370f3e72b5SJason Gunthorpe return -EINVAL; 14380f3e72b5SJason Gunthorpe 14390f3e72b5SJason Gunthorpe /* Check unknown flags */ 14400f3e72b5SJason Gunthorpe if (feature.flags & 14410f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 14420f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 14430f3e72b5SJason Gunthorpe return -EINVAL; 14440f3e72b5SJason Gunthorpe 14450f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 14460f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 14470f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 14480f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 14490f3e72b5SJason Gunthorpe return -EINVAL; 14500f3e72b5SJason Gunthorpe 14510f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 14520f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 14530f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 14540f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 14550f3e72b5SJason Gunthorpe feature.argsz - minsz); 14560f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 14570f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 14580f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 14590f3e72b5SJason Gunthorpe feature.argsz - minsz); 146080c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: 146180c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_start( 146280c4b92aSYishai Hadas device, feature.flags, arg->data, 146380c4b92aSYishai Hadas feature.argsz - minsz); 146480c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: 146580c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_stop( 146680c4b92aSYishai Hadas device, feature.flags, arg->data, 146780c4b92aSYishai Hadas feature.argsz - minsz); 146880c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: 146980c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_report( 147080c4b92aSYishai Hadas device, feature.flags, arg->data, 147180c4b92aSYishai Hadas feature.argsz - minsz); 14720f3e72b5SJason Gunthorpe default: 14730f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 14740f3e72b5SJason Gunthorpe return -EINVAL; 14750f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 14760f3e72b5SJason Gunthorpe arg->data, 14770f3e72b5SJason Gunthorpe feature.argsz - minsz); 14780f3e72b5SJason Gunthorpe } 14790f3e72b5SJason Gunthorpe } 14800f3e72b5SJason Gunthorpe 14810f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 14820f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 14830f3e72b5SJason Gunthorpe { 14840f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 14858e5c6995SAbhishek Sahu int ret; 14868e5c6995SAbhishek Sahu 14878e5c6995SAbhishek Sahu ret = vfio_device_pm_runtime_get(device); 14888e5c6995SAbhishek Sahu if (ret) 14898e5c6995SAbhishek Sahu return ret; 14900f3e72b5SJason Gunthorpe 14910f3e72b5SJason Gunthorpe switch (cmd) { 14920f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 14938e5c6995SAbhishek Sahu ret = vfio_ioctl_device_feature(device, (void __user *)arg); 14948e5c6995SAbhishek Sahu break; 14958e5c6995SAbhishek Sahu 14960f3e72b5SJason Gunthorpe default: 14970f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 14988e5c6995SAbhishek Sahu ret = -EINVAL; 14998e5c6995SAbhishek Sahu else 15008e5c6995SAbhishek Sahu ret = device->ops->ioctl(device, cmd, arg); 15018e5c6995SAbhishek Sahu break; 15020f3e72b5SJason Gunthorpe } 15038e5c6995SAbhishek Sahu 15048e5c6995SAbhishek Sahu vfio_device_pm_runtime_put(device); 15058e5c6995SAbhishek Sahu return ret; 15060f3e72b5SJason Gunthorpe } 15070f3e72b5SJason Gunthorpe 15080f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 15090f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 15100f3e72b5SJason Gunthorpe { 15110f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15120f3e72b5SJason Gunthorpe 15130f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 15140f3e72b5SJason Gunthorpe return -EINVAL; 15150f3e72b5SJason Gunthorpe 15160f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 15170f3e72b5SJason Gunthorpe } 15180f3e72b5SJason Gunthorpe 15190f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 15200f3e72b5SJason Gunthorpe const char __user *buf, 15210f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 15220f3e72b5SJason Gunthorpe { 15230f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15240f3e72b5SJason Gunthorpe 15250f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 15260f3e72b5SJason Gunthorpe return -EINVAL; 15270f3e72b5SJason Gunthorpe 15280f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 15290f3e72b5SJason Gunthorpe } 15300f3e72b5SJason Gunthorpe 15310f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 15320f3e72b5SJason Gunthorpe { 15330f3e72b5SJason Gunthorpe struct vfio_device *device = filep->private_data; 15340f3e72b5SJason Gunthorpe 15350f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 15360f3e72b5SJason Gunthorpe return -EINVAL; 15370f3e72b5SJason Gunthorpe 15380f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 15390f3e72b5SJason Gunthorpe } 15400f3e72b5SJason Gunthorpe 15410f3e72b5SJason Gunthorpe static const struct file_operations vfio_device_fops = { 15420f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 15430f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 15440f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 15450f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 15460f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 15470f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 15480f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 15490f3e72b5SJason Gunthorpe }; 15500f3e72b5SJason Gunthorpe 15510f3e72b5SJason Gunthorpe /** 15520f3e72b5SJason Gunthorpe * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 15530f3e72b5SJason Gunthorpe * @file: VFIO group file 15540f3e72b5SJason Gunthorpe * 1555*819da99aSJason Gunthorpe * The returned iommu_group is valid as long as a ref is held on the file. This 1556*819da99aSJason Gunthorpe * returns a reference on the group. This function is deprecated, only the SPAPR 1557*819da99aSJason Gunthorpe * path in kvm should call it. 15580f3e72b5SJason Gunthorpe */ 15590f3e72b5SJason Gunthorpe struct iommu_group *vfio_file_iommu_group(struct file *file) 15600f3e72b5SJason Gunthorpe { 15610f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 15620f3e72b5SJason Gunthorpe 15634b22ef04SJason Gunthorpe if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) 15644b22ef04SJason Gunthorpe return NULL; 15654b22ef04SJason Gunthorpe 15664b22ef04SJason Gunthorpe if (!vfio_file_is_group(file)) 15670f3e72b5SJason Gunthorpe return NULL; 1568*819da99aSJason Gunthorpe iommu_group_ref_get(group->iommu_group); 15690f3e72b5SJason Gunthorpe return group->iommu_group; 15700f3e72b5SJason Gunthorpe } 15710f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 15720f3e72b5SJason Gunthorpe 15730f3e72b5SJason Gunthorpe /** 15744b22ef04SJason Gunthorpe * vfio_file_is_group - True if the file is usable with VFIO aPIS 15754b22ef04SJason Gunthorpe * @file: VFIO group file 15764b22ef04SJason Gunthorpe */ 15774b22ef04SJason Gunthorpe bool vfio_file_is_group(struct file *file) 15784b22ef04SJason Gunthorpe { 15794b22ef04SJason Gunthorpe return file->f_op == &vfio_group_fops; 15804b22ef04SJason Gunthorpe } 15814b22ef04SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_is_group); 15824b22ef04SJason Gunthorpe 15834b22ef04SJason Gunthorpe /** 15840f3e72b5SJason Gunthorpe * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 15850f3e72b5SJason Gunthorpe * is always CPU cache coherent 15860f3e72b5SJason Gunthorpe * @file: VFIO group file 15870f3e72b5SJason Gunthorpe * 15880f3e72b5SJason Gunthorpe * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 15890f3e72b5SJason Gunthorpe * bit in DMA transactions. A return of false indicates that the user has 15900f3e72b5SJason Gunthorpe * rights to access additional instructions such as wbinvd on x86. 15910f3e72b5SJason Gunthorpe */ 15920f3e72b5SJason Gunthorpe bool vfio_file_enforced_coherent(struct file *file) 15930f3e72b5SJason Gunthorpe { 15940f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 15950f3e72b5SJason Gunthorpe bool ret; 15960f3e72b5SJason Gunthorpe 15970f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 15980f3e72b5SJason Gunthorpe return true; 15990f3e72b5SJason Gunthorpe 1600c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 16010f3e72b5SJason Gunthorpe if (group->container) { 16021408640dSJason Gunthorpe ret = vfio_container_ioctl_check_extension(group->container, 16030f3e72b5SJason Gunthorpe VFIO_DMA_CC_IOMMU); 16040f3e72b5SJason Gunthorpe } else { 16050f3e72b5SJason Gunthorpe /* 16060f3e72b5SJason Gunthorpe * Since the coherency state is determined only once a container 16070f3e72b5SJason Gunthorpe * is attached the user must do so before they can prove they 16080f3e72b5SJason Gunthorpe * have permission. 16090f3e72b5SJason Gunthorpe */ 16100f3e72b5SJason Gunthorpe ret = true; 16110f3e72b5SJason Gunthorpe } 1612c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 16130f3e72b5SJason Gunthorpe return ret; 16140f3e72b5SJason Gunthorpe } 16150f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 16160f3e72b5SJason Gunthorpe 16170f3e72b5SJason Gunthorpe /** 16180f3e72b5SJason Gunthorpe * vfio_file_set_kvm - Link a kvm with VFIO drivers 16190f3e72b5SJason Gunthorpe * @file: VFIO group file 16200f3e72b5SJason Gunthorpe * @kvm: KVM to link 16210f3e72b5SJason Gunthorpe * 16220f3e72b5SJason Gunthorpe * When a VFIO device is first opened the KVM will be available in 16230f3e72b5SJason Gunthorpe * device->kvm if one was associated with the group. 16240f3e72b5SJason Gunthorpe */ 16250f3e72b5SJason Gunthorpe void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 16260f3e72b5SJason Gunthorpe { 16270f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 16280f3e72b5SJason Gunthorpe 16290f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 16300f3e72b5SJason Gunthorpe return; 16310f3e72b5SJason Gunthorpe 1632c82e81abSJason Gunthorpe mutex_lock(&group->group_lock); 16330f3e72b5SJason Gunthorpe group->kvm = kvm; 1634c82e81abSJason Gunthorpe mutex_unlock(&group->group_lock); 16350f3e72b5SJason Gunthorpe } 16360f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 16370f3e72b5SJason Gunthorpe 16380f3e72b5SJason Gunthorpe /** 16390f3e72b5SJason Gunthorpe * vfio_file_has_dev - True if the VFIO file is a handle for device 16400f3e72b5SJason Gunthorpe * @file: VFIO file to check 16410f3e72b5SJason Gunthorpe * @device: Device that must be part of the file 16420f3e72b5SJason Gunthorpe * 16430f3e72b5SJason Gunthorpe * Returns true if given file has permission to manipulate the given device. 16440f3e72b5SJason Gunthorpe */ 16450f3e72b5SJason Gunthorpe bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 16460f3e72b5SJason Gunthorpe { 16470f3e72b5SJason Gunthorpe struct vfio_group *group = file->private_data; 16480f3e72b5SJason Gunthorpe 16490f3e72b5SJason Gunthorpe if (file->f_op != &vfio_group_fops) 16500f3e72b5SJason Gunthorpe return false; 16510f3e72b5SJason Gunthorpe 16520f3e72b5SJason Gunthorpe return group == device->group; 16530f3e72b5SJason Gunthorpe } 16540f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_file_has_dev); 16550f3e72b5SJason Gunthorpe 16560f3e72b5SJason Gunthorpe /* 16570f3e72b5SJason Gunthorpe * Sub-module support 16580f3e72b5SJason Gunthorpe */ 16590f3e72b5SJason Gunthorpe /* 16600f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 16610f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 16620f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 16630f3e72b5SJason Gunthorpe * 16640f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 16650f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 16660f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 16670f3e72b5SJason Gunthorpe */ 16680f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 16690f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 16700f3e72b5SJason Gunthorpe { 16710f3e72b5SJason Gunthorpe void *buf; 16720f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 16730f3e72b5SJason Gunthorpe 16740f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 16750f3e72b5SJason Gunthorpe if (!buf) { 16760f3e72b5SJason Gunthorpe kfree(caps->buf); 16770f3e72b5SJason Gunthorpe caps->buf = NULL; 16780f3e72b5SJason Gunthorpe caps->size = 0; 16790f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 16800f3e72b5SJason Gunthorpe } 16810f3e72b5SJason Gunthorpe 16820f3e72b5SJason Gunthorpe caps->buf = buf; 16830f3e72b5SJason Gunthorpe header = buf + caps->size; 16840f3e72b5SJason Gunthorpe 16850f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 16860f3e72b5SJason Gunthorpe memset(header, 0, size); 16870f3e72b5SJason Gunthorpe 16880f3e72b5SJason Gunthorpe header->id = id; 16890f3e72b5SJason Gunthorpe header->version = version; 16900f3e72b5SJason Gunthorpe 16910f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 16920f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 16930f3e72b5SJason Gunthorpe ; /* nothing */ 16940f3e72b5SJason Gunthorpe 16950f3e72b5SJason Gunthorpe tmp->next = caps->size; 16960f3e72b5SJason Gunthorpe caps->size += size; 16970f3e72b5SJason Gunthorpe 16980f3e72b5SJason Gunthorpe return header; 16990f3e72b5SJason Gunthorpe } 17000f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 17010f3e72b5SJason Gunthorpe 17020f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 17030f3e72b5SJason Gunthorpe { 17040f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 17050f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 17060f3e72b5SJason Gunthorpe 17070f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 17080f3e72b5SJason Gunthorpe tmp->next += offset; 17090f3e72b5SJason Gunthorpe } 17100f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 17110f3e72b5SJason Gunthorpe 17120f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 17130f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 17140f3e72b5SJason Gunthorpe { 17150f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 17160f3e72b5SJason Gunthorpe 17170f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 17180f3e72b5SJason Gunthorpe if (IS_ERR(header)) 17190f3e72b5SJason Gunthorpe return PTR_ERR(header); 17200f3e72b5SJason Gunthorpe 17210f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 17220f3e72b5SJason Gunthorpe 17230f3e72b5SJason Gunthorpe return 0; 17240f3e72b5SJason Gunthorpe } 17250f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 17260f3e72b5SJason Gunthorpe 17270f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 17280f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 17290f3e72b5SJason Gunthorpe { 17300f3e72b5SJason Gunthorpe unsigned long minsz; 17310f3e72b5SJason Gunthorpe size_t size; 17320f3e72b5SJason Gunthorpe 17330f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 17340f3e72b5SJason Gunthorpe 17350f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 17360f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 17370f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 17380f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 17390f3e72b5SJason Gunthorpe return -EINVAL; 17400f3e72b5SJason Gunthorpe 17410f3e72b5SJason Gunthorpe if (data_size) 17420f3e72b5SJason Gunthorpe *data_size = 0; 17430f3e72b5SJason Gunthorpe 17440f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 17450f3e72b5SJason Gunthorpe return -EINVAL; 17460f3e72b5SJason Gunthorpe 17470f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 17480f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 17490f3e72b5SJason Gunthorpe size = 0; 17500f3e72b5SJason Gunthorpe break; 17510f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 17520f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 17530f3e72b5SJason Gunthorpe break; 17540f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 17550f3e72b5SJason Gunthorpe size = sizeof(int32_t); 17560f3e72b5SJason Gunthorpe break; 17570f3e72b5SJason Gunthorpe default: 17580f3e72b5SJason Gunthorpe return -EINVAL; 17590f3e72b5SJason Gunthorpe } 17600f3e72b5SJason Gunthorpe 17610f3e72b5SJason Gunthorpe if (size) { 17620f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 17630f3e72b5SJason Gunthorpe return -EINVAL; 17640f3e72b5SJason Gunthorpe 17650f3e72b5SJason Gunthorpe if (!data_size) 17660f3e72b5SJason Gunthorpe return -EINVAL; 17670f3e72b5SJason Gunthorpe 17680f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 17690f3e72b5SJason Gunthorpe } 17700f3e72b5SJason Gunthorpe 17710f3e72b5SJason Gunthorpe return 0; 17720f3e72b5SJason Gunthorpe } 17730f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 17740f3e72b5SJason Gunthorpe 17750f3e72b5SJason Gunthorpe /* 17760f3e72b5SJason Gunthorpe * Module/class support 17770f3e72b5SJason Gunthorpe */ 17780f3e72b5SJason Gunthorpe static char *vfio_devnode(struct device *dev, umode_t *mode) 17790f3e72b5SJason Gunthorpe { 17800f3e72b5SJason Gunthorpe return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 17810f3e72b5SJason Gunthorpe } 17820f3e72b5SJason Gunthorpe 1783c41da462SJason Gunthorpe static int __init vfio_init(void) 1784c41da462SJason Gunthorpe { 1785c41da462SJason Gunthorpe int ret; 1786c41da462SJason Gunthorpe 1787c41da462SJason Gunthorpe ida_init(&vfio.group_ida); 1788c41da462SJason Gunthorpe ida_init(&vfio.device_ida); 1789c41da462SJason Gunthorpe mutex_init(&vfio.group_lock); 1790c41da462SJason Gunthorpe INIT_LIST_HEAD(&vfio.group_list); 1791c41da462SJason Gunthorpe 1792c41da462SJason Gunthorpe ret = vfio_container_init(); 1793c41da462SJason Gunthorpe if (ret) 1794c41da462SJason Gunthorpe return ret; 1795c41da462SJason Gunthorpe 17960f3e72b5SJason Gunthorpe /* /dev/vfio/$GROUP */ 17970f3e72b5SJason Gunthorpe vfio.class = class_create(THIS_MODULE, "vfio"); 17980f3e72b5SJason Gunthorpe if (IS_ERR(vfio.class)) { 17990f3e72b5SJason Gunthorpe ret = PTR_ERR(vfio.class); 18003c28a761SYi Liu goto err_group_class; 18010f3e72b5SJason Gunthorpe } 18020f3e72b5SJason Gunthorpe 18030f3e72b5SJason Gunthorpe vfio.class->devnode = vfio_devnode; 18040f3e72b5SJason Gunthorpe 18053c28a761SYi Liu /* /sys/class/vfio-dev/vfioX */ 18063c28a761SYi Liu vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); 18073c28a761SYi Liu if (IS_ERR(vfio.device_class)) { 18083c28a761SYi Liu ret = PTR_ERR(vfio.device_class); 18093c28a761SYi Liu goto err_dev_class; 18103c28a761SYi Liu } 18113c28a761SYi Liu 18120f3e72b5SJason Gunthorpe ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 18130f3e72b5SJason Gunthorpe if (ret) 18140f3e72b5SJason Gunthorpe goto err_alloc_chrdev; 18150f3e72b5SJason Gunthorpe 18160f3e72b5SJason Gunthorpe pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 18170f3e72b5SJason Gunthorpe return 0; 18180f3e72b5SJason Gunthorpe 18190f3e72b5SJason Gunthorpe err_alloc_chrdev: 18203c28a761SYi Liu class_destroy(vfio.device_class); 18213c28a761SYi Liu vfio.device_class = NULL; 18223c28a761SYi Liu err_dev_class: 18230f3e72b5SJason Gunthorpe class_destroy(vfio.class); 18240f3e72b5SJason Gunthorpe vfio.class = NULL; 18253c28a761SYi Liu err_group_class: 1826c41da462SJason Gunthorpe vfio_container_cleanup(); 18270f3e72b5SJason Gunthorpe return ret; 18280f3e72b5SJason Gunthorpe } 18290f3e72b5SJason Gunthorpe 18300f3e72b5SJason Gunthorpe static void __exit vfio_cleanup(void) 18310f3e72b5SJason Gunthorpe { 18320f3e72b5SJason Gunthorpe WARN_ON(!list_empty(&vfio.group_list)); 18330f3e72b5SJason Gunthorpe 18343c28a761SYi Liu ida_destroy(&vfio.device_ida); 18350f3e72b5SJason Gunthorpe ida_destroy(&vfio.group_ida); 18360f3e72b5SJason Gunthorpe unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 18373c28a761SYi Liu class_destroy(vfio.device_class); 18383c28a761SYi Liu vfio.device_class = NULL; 18390f3e72b5SJason Gunthorpe class_destroy(vfio.class); 1840c41da462SJason Gunthorpe vfio_container_cleanup(); 18410f3e72b5SJason Gunthorpe vfio.class = NULL; 18420f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 18430f3e72b5SJason Gunthorpe } 18440f3e72b5SJason Gunthorpe 18450f3e72b5SJason Gunthorpe module_init(vfio_init); 18460f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 18470f3e72b5SJason Gunthorpe 18480f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 18490f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 18500f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 18510f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 18520f3e72b5SJason Gunthorpe MODULE_ALIAS_MISCDEV(VFIO_MINOR); 18530f3e72b5SJason Gunthorpe MODULE_ALIAS("devname:vfio/vfio"); 18540f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 1855