10f3e72b5SJason Gunthorpe // SPDX-License-Identifier: GPL-2.0-only 20f3e72b5SJason Gunthorpe /* 30f3e72b5SJason Gunthorpe * VFIO core 40f3e72b5SJason Gunthorpe * 50f3e72b5SJason Gunthorpe * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 60f3e72b5SJason Gunthorpe * Author: Alex Williamson <alex.williamson@redhat.com> 70f3e72b5SJason Gunthorpe * 80f3e72b5SJason Gunthorpe * Derived from original vfio: 90f3e72b5SJason Gunthorpe * Copyright 2010 Cisco Systems, Inc. All rights reserved. 100f3e72b5SJason Gunthorpe * Author: Tom Lyon, pugs@cisco.com 110f3e72b5SJason Gunthorpe */ 120f3e72b5SJason Gunthorpe 130f3e72b5SJason Gunthorpe #include <linux/cdev.h> 140f3e72b5SJason Gunthorpe #include <linux/compat.h> 150f3e72b5SJason Gunthorpe #include <linux/device.h> 160f3e72b5SJason Gunthorpe #include <linux/fs.h> 170f3e72b5SJason Gunthorpe #include <linux/idr.h> 180f3e72b5SJason Gunthorpe #include <linux/iommu.h> 192b48f52fSMatthew Rosato #ifdef CONFIG_HAVE_KVM 202b48f52fSMatthew Rosato #include <linux/kvm_host.h> 212b48f52fSMatthew Rosato #endif 220f3e72b5SJason Gunthorpe #include <linux/list.h> 230f3e72b5SJason Gunthorpe #include <linux/miscdevice.h> 240f3e72b5SJason Gunthorpe #include <linux/module.h> 250f3e72b5SJason Gunthorpe #include <linux/mutex.h> 260f3e72b5SJason Gunthorpe #include <linux/pci.h> 270f3e72b5SJason Gunthorpe #include <linux/rwsem.h> 280f3e72b5SJason Gunthorpe #include <linux/sched.h> 290f3e72b5SJason Gunthorpe #include <linux/slab.h> 300f3e72b5SJason Gunthorpe #include <linux/stat.h> 310f3e72b5SJason Gunthorpe #include <linux/string.h> 320f3e72b5SJason Gunthorpe #include <linux/uaccess.h> 330f3e72b5SJason Gunthorpe #include <linux/vfio.h> 340f3e72b5SJason Gunthorpe #include <linux/wait.h> 350f3e72b5SJason Gunthorpe #include <linux/sched/signal.h> 368e5c6995SAbhishek Sahu #include <linux/pm_runtime.h> 3780c4b92aSYishai Hadas #include <linux/interval_tree.h> 3880c4b92aSYishai Hadas #include <linux/iova_bitmap.h> 392a3dab19SJason Gunthorpe #include <linux/iommufd.h> 400f3e72b5SJason Gunthorpe #include "vfio.h" 410f3e72b5SJason Gunthorpe 420f3e72b5SJason Gunthorpe #define DRIVER_VERSION "0.3" 430f3e72b5SJason Gunthorpe #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 440f3e72b5SJason Gunthorpe #define DRIVER_DESC "VFIO - User Level meta-driver" 450f3e72b5SJason Gunthorpe 460f3e72b5SJason Gunthorpe static struct vfio { 473c28a761SYi Liu struct class *device_class; 483c28a761SYi Liu struct ida device_ida; 490f3e72b5SJason Gunthorpe } vfio; 500f3e72b5SJason Gunthorpe 51c9a397ceSJason Gunthorpe #ifdef CONFIG_VFIO_NOIOMMU 52c9a397ceSJason Gunthorpe bool vfio_noiommu __read_mostly; 53c9a397ceSJason Gunthorpe module_param_named(enable_unsafe_noiommu_mode, 54c9a397ceSJason Gunthorpe vfio_noiommu, bool, S_IRUGO | S_IWUSR); 55c9a397ceSJason Gunthorpe MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 56c9a397ceSJason Gunthorpe #endif 57c9a397ceSJason Gunthorpe 580f3e72b5SJason Gunthorpe static DEFINE_XARRAY(vfio_device_set_xa); 590f3e72b5SJason Gunthorpe 600f3e72b5SJason Gunthorpe int vfio_assign_device_set(struct vfio_device *device, void *set_id) 610f3e72b5SJason Gunthorpe { 620f3e72b5SJason Gunthorpe unsigned long idx = (unsigned long)set_id; 630f3e72b5SJason Gunthorpe struct vfio_device_set *new_dev_set; 640f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set; 650f3e72b5SJason Gunthorpe 660f3e72b5SJason Gunthorpe if (WARN_ON(!set_id)) 670f3e72b5SJason Gunthorpe return -EINVAL; 680f3e72b5SJason Gunthorpe 690f3e72b5SJason Gunthorpe /* 700f3e72b5SJason Gunthorpe * Atomically acquire a singleton object in the xarray for this set_id 710f3e72b5SJason Gunthorpe */ 720f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 730f3e72b5SJason Gunthorpe dev_set = xa_load(&vfio_device_set_xa, idx); 740f3e72b5SJason Gunthorpe if (dev_set) 750f3e72b5SJason Gunthorpe goto found_get_ref; 760f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 770f3e72b5SJason Gunthorpe 780f3e72b5SJason Gunthorpe new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 790f3e72b5SJason Gunthorpe if (!new_dev_set) 800f3e72b5SJason Gunthorpe return -ENOMEM; 810f3e72b5SJason Gunthorpe mutex_init(&new_dev_set->lock); 820f3e72b5SJason Gunthorpe INIT_LIST_HEAD(&new_dev_set->device_list); 830f3e72b5SJason Gunthorpe new_dev_set->set_id = set_id; 840f3e72b5SJason Gunthorpe 850f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 860f3e72b5SJason Gunthorpe dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 870f3e72b5SJason Gunthorpe GFP_KERNEL); 880f3e72b5SJason Gunthorpe if (!dev_set) { 890f3e72b5SJason Gunthorpe dev_set = new_dev_set; 900f3e72b5SJason Gunthorpe goto found_get_ref; 910f3e72b5SJason Gunthorpe } 920f3e72b5SJason Gunthorpe 930f3e72b5SJason Gunthorpe kfree(new_dev_set); 940f3e72b5SJason Gunthorpe if (xa_is_err(dev_set)) { 950f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 960f3e72b5SJason Gunthorpe return xa_err(dev_set); 970f3e72b5SJason Gunthorpe } 980f3e72b5SJason Gunthorpe 990f3e72b5SJason Gunthorpe found_get_ref: 1000f3e72b5SJason Gunthorpe dev_set->device_count++; 1010f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1020f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1030f3e72b5SJason Gunthorpe device->dev_set = dev_set; 1040f3e72b5SJason Gunthorpe list_add_tail(&device->dev_set_list, &dev_set->device_list); 1050f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1060f3e72b5SJason Gunthorpe return 0; 1070f3e72b5SJason Gunthorpe } 1080f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_assign_device_set); 1090f3e72b5SJason Gunthorpe 1100f3e72b5SJason Gunthorpe static void vfio_release_device_set(struct vfio_device *device) 1110f3e72b5SJason Gunthorpe { 1120f3e72b5SJason Gunthorpe struct vfio_device_set *dev_set = device->dev_set; 1130f3e72b5SJason Gunthorpe 1140f3e72b5SJason Gunthorpe if (!dev_set) 1150f3e72b5SJason Gunthorpe return; 1160f3e72b5SJason Gunthorpe 1170f3e72b5SJason Gunthorpe mutex_lock(&dev_set->lock); 1180f3e72b5SJason Gunthorpe list_del(&device->dev_set_list); 1190f3e72b5SJason Gunthorpe mutex_unlock(&dev_set->lock); 1200f3e72b5SJason Gunthorpe 1210f3e72b5SJason Gunthorpe xa_lock(&vfio_device_set_xa); 1220f3e72b5SJason Gunthorpe if (!--dev_set->device_count) { 1230f3e72b5SJason Gunthorpe __xa_erase(&vfio_device_set_xa, 1240f3e72b5SJason Gunthorpe (unsigned long)dev_set->set_id); 1250f3e72b5SJason Gunthorpe mutex_destroy(&dev_set->lock); 1260f3e72b5SJason Gunthorpe kfree(dev_set); 1270f3e72b5SJason Gunthorpe } 1280f3e72b5SJason Gunthorpe xa_unlock(&vfio_device_set_xa); 1290f3e72b5SJason Gunthorpe } 1300f3e72b5SJason Gunthorpe 1315cd189e4SAnthony DeRossi unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) 1325cd189e4SAnthony DeRossi { 1335cd189e4SAnthony DeRossi struct vfio_device *cur; 1345cd189e4SAnthony DeRossi unsigned int open_count = 0; 1355cd189e4SAnthony DeRossi 1365cd189e4SAnthony DeRossi lockdep_assert_held(&dev_set->lock); 1375cd189e4SAnthony DeRossi 1385cd189e4SAnthony DeRossi list_for_each_entry(cur, &dev_set->device_list, dev_set_list) 1395cd189e4SAnthony DeRossi open_count += cur->open_count; 1405cd189e4SAnthony DeRossi return open_count; 1415cd189e4SAnthony DeRossi } 1425cd189e4SAnthony DeRossi EXPORT_SYMBOL_GPL(vfio_device_set_open_count); 1435cd189e4SAnthony DeRossi 144a80e1de9SYi Liu struct vfio_device * 145a80e1de9SYi Liu vfio_find_device_in_devset(struct vfio_device_set *dev_set, 146a80e1de9SYi Liu struct device *dev) 147a80e1de9SYi Liu { 148a80e1de9SYi Liu struct vfio_device *cur; 149a80e1de9SYi Liu 150a80e1de9SYi Liu lockdep_assert_held(&dev_set->lock); 151a80e1de9SYi Liu 152a80e1de9SYi Liu list_for_each_entry(cur, &dev_set->device_list, dev_set_list) 153a80e1de9SYi Liu if (cur->dev == dev) 154a80e1de9SYi Liu return cur; 155a80e1de9SYi Liu return NULL; 156a80e1de9SYi Liu } 157a80e1de9SYi Liu EXPORT_SYMBOL_GPL(vfio_find_device_in_devset); 158a80e1de9SYi Liu 1590f3e72b5SJason Gunthorpe /* 1600f3e72b5SJason Gunthorpe * Device objects - create, release, get, put, search 1610f3e72b5SJason Gunthorpe */ 1620f3e72b5SJason Gunthorpe /* Device reference always implies a group reference */ 1639eefba80SYi Liu void vfio_device_put_registration(struct vfio_device *device) 1640f3e72b5SJason Gunthorpe { 1650f3e72b5SJason Gunthorpe if (refcount_dec_and_test(&device->refcount)) 1660f3e72b5SJason Gunthorpe complete(&device->comp); 1670f3e72b5SJason Gunthorpe } 1680f3e72b5SJason Gunthorpe 1699eefba80SYi Liu bool vfio_device_try_get_registration(struct vfio_device *device) 1700f3e72b5SJason Gunthorpe { 1710f3e72b5SJason Gunthorpe return refcount_inc_not_zero(&device->refcount); 1720f3e72b5SJason Gunthorpe } 1730f3e72b5SJason Gunthorpe 1740f3e72b5SJason Gunthorpe /* 1750f3e72b5SJason Gunthorpe * VFIO driver API 1760f3e72b5SJason Gunthorpe */ 177cb9ff3f3SKevin Tian /* Release helper called by vfio_put_device() */ 1783c28a761SYi Liu static void vfio_device_release(struct device *dev) 179cb9ff3f3SKevin Tian { 180cb9ff3f3SKevin Tian struct vfio_device *device = 1813c28a761SYi Liu container_of(dev, struct vfio_device, device); 182cb9ff3f3SKevin Tian 183ebb72b76SKevin Tian vfio_release_device_set(device); 1843c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 185cb9ff3f3SKevin Tian 186913447d0SEric Farman if (device->ops->release) 187cb9ff3f3SKevin Tian device->ops->release(device); 188913447d0SEric Farman 189913447d0SEric Farman kvfree(device); 190cb9ff3f3SKevin Tian } 191cb9ff3f3SKevin Tian 192d1104f93SEric Farman static int vfio_init_device(struct vfio_device *device, struct device *dev, 193d1104f93SEric Farman const struct vfio_device_ops *ops); 194d1104f93SEric Farman 195cb9ff3f3SKevin Tian /* 196cb9ff3f3SKevin Tian * Allocate and initialize vfio_device so it can be registered to vfio 197cb9ff3f3SKevin Tian * core. 198cb9ff3f3SKevin Tian * 199cb9ff3f3SKevin Tian * Drivers should use the wrapper vfio_alloc_device() for allocation. 200cb9ff3f3SKevin Tian * @size is the size of the structure to be allocated, including any 201cb9ff3f3SKevin Tian * private data used by the driver. 202cb9ff3f3SKevin Tian * 203cb9ff3f3SKevin Tian * Driver may provide an @init callback to cover device private data. 204cb9ff3f3SKevin Tian * 205cb9ff3f3SKevin Tian * Use vfio_put_device() to release the structure after success return. 206cb9ff3f3SKevin Tian */ 207cb9ff3f3SKevin Tian struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, 208cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 209cb9ff3f3SKevin Tian { 210cb9ff3f3SKevin Tian struct vfio_device *device; 211cb9ff3f3SKevin Tian int ret; 212cb9ff3f3SKevin Tian 213cb9ff3f3SKevin Tian if (WARN_ON(size < sizeof(struct vfio_device))) 214cb9ff3f3SKevin Tian return ERR_PTR(-EINVAL); 215cb9ff3f3SKevin Tian 216cb9ff3f3SKevin Tian device = kvzalloc(size, GFP_KERNEL); 217cb9ff3f3SKevin Tian if (!device) 218cb9ff3f3SKevin Tian return ERR_PTR(-ENOMEM); 219cb9ff3f3SKevin Tian 220cb9ff3f3SKevin Tian ret = vfio_init_device(device, dev, ops); 221cb9ff3f3SKevin Tian if (ret) 222cb9ff3f3SKevin Tian goto out_free; 223cb9ff3f3SKevin Tian return device; 224cb9ff3f3SKevin Tian 225cb9ff3f3SKevin Tian out_free: 226cb9ff3f3SKevin Tian kvfree(device); 227cb9ff3f3SKevin Tian return ERR_PTR(ret); 228cb9ff3f3SKevin Tian } 229cb9ff3f3SKevin Tian EXPORT_SYMBOL_GPL(_vfio_alloc_device); 230cb9ff3f3SKevin Tian 231cb9ff3f3SKevin Tian /* 232cb9ff3f3SKevin Tian * Initialize a vfio_device so it can be registered to vfio core. 233cb9ff3f3SKevin Tian */ 234d1104f93SEric Farman static int vfio_init_device(struct vfio_device *device, struct device *dev, 235cb9ff3f3SKevin Tian const struct vfio_device_ops *ops) 236cb9ff3f3SKevin Tian { 237cb9ff3f3SKevin Tian int ret; 238cb9ff3f3SKevin Tian 2393c28a761SYi Liu ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); 2403c28a761SYi Liu if (ret < 0) { 2413c28a761SYi Liu dev_dbg(dev, "Error to alloc index\n"); 2423c28a761SYi Liu return ret; 2433c28a761SYi Liu } 2443c28a761SYi Liu 2453c28a761SYi Liu device->index = ret; 246ebb72b76SKevin Tian init_completion(&device->comp); 247ebb72b76SKevin Tian device->dev = dev; 248ebb72b76SKevin Tian device->ops = ops; 249cb9ff3f3SKevin Tian 250cb9ff3f3SKevin Tian if (ops->init) { 251cb9ff3f3SKevin Tian ret = ops->init(device); 252cb9ff3f3SKevin Tian if (ret) 253cb9ff3f3SKevin Tian goto out_uninit; 254cb9ff3f3SKevin Tian } 255cb9ff3f3SKevin Tian 2563c28a761SYi Liu device_initialize(&device->device); 2573c28a761SYi Liu device->device.release = vfio_device_release; 2583c28a761SYi Liu device->device.class = vfio.device_class; 2593c28a761SYi Liu device->device.parent = device->dev; 260cb9ff3f3SKevin Tian return 0; 261cb9ff3f3SKevin Tian 262cb9ff3f3SKevin Tian out_uninit: 263ebb72b76SKevin Tian vfio_release_device_set(device); 2643c28a761SYi Liu ida_free(&vfio.device_ida, device->index); 265cb9ff3f3SKevin Tian return ret; 266cb9ff3f3SKevin Tian } 267cb9ff3f3SKevin Tian 26849ea02d3SYi Liu static int __vfio_register_dev(struct vfio_device *device, 26949ea02d3SYi Liu enum vfio_group_type type) 27049ea02d3SYi Liu { 27149ea02d3SYi Liu int ret; 27249ea02d3SYi Liu 2737d12578cSYi Liu if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) && 2747d12578cSYi Liu (!device->ops->bind_iommufd || 2757d12578cSYi Liu !device->ops->unbind_iommufd || 2769048c734SYi Liu !device->ops->attach_ioas || 2779048c734SYi Liu !device->ops->detach_ioas))) 278a4d1f91dSJason Gunthorpe return -EINVAL; 279a4d1f91dSJason Gunthorpe 2800f3e72b5SJason Gunthorpe /* 2810f3e72b5SJason Gunthorpe * If the driver doesn't specify a set then the device is added to a 2820f3e72b5SJason Gunthorpe * singleton set just for itself. 2830f3e72b5SJason Gunthorpe */ 2840f3e72b5SJason Gunthorpe if (!device->dev_set) 2850f3e72b5SJason Gunthorpe vfio_assign_device_set(device, device); 2860f3e72b5SJason Gunthorpe 2873c28a761SYi Liu ret = dev_set_name(&device->device, "vfio%d", device->index); 2883c28a761SYi Liu if (ret) 28949ea02d3SYi Liu return ret; 29049ea02d3SYi Liu 29149ea02d3SYi Liu ret = vfio_device_set_group(device, type); 29249ea02d3SYi Liu if (ret) 29349ea02d3SYi Liu return ret; 2943c28a761SYi Liu 2953c28a761SYi Liu ret = device_add(&device->device); 2963c28a761SYi Liu if (ret) 2973c28a761SYi Liu goto err_out; 2983c28a761SYi Liu 2990f3e72b5SJason Gunthorpe /* Refcounting can't start until the driver calls register */ 3000f3e72b5SJason Gunthorpe refcount_set(&device->refcount, 1); 3010f3e72b5SJason Gunthorpe 30232e09228SYi Liu vfio_device_group_register(device); 3030f3e72b5SJason Gunthorpe 3040f3e72b5SJason Gunthorpe return 0; 3053c28a761SYi Liu err_out: 306ca5f21b2SJason Gunthorpe vfio_device_remove_group(device); 3073c28a761SYi Liu return ret; 3080f3e72b5SJason Gunthorpe } 3090f3e72b5SJason Gunthorpe 3100f3e72b5SJason Gunthorpe int vfio_register_group_dev(struct vfio_device *device) 3110f3e72b5SJason Gunthorpe { 31249ea02d3SYi Liu return __vfio_register_dev(device, VFIO_IOMMU); 3130f3e72b5SJason Gunthorpe } 3140f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_group_dev); 3150f3e72b5SJason Gunthorpe 3160f3e72b5SJason Gunthorpe /* 3170f3e72b5SJason Gunthorpe * Register a virtual device without IOMMU backing. The user of this 3180f3e72b5SJason Gunthorpe * device must not be able to directly trigger unmediated DMA. 3190f3e72b5SJason Gunthorpe */ 3200f3e72b5SJason Gunthorpe int vfio_register_emulated_iommu_dev(struct vfio_device *device) 3210f3e72b5SJason Gunthorpe { 32249ea02d3SYi Liu return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); 3230f3e72b5SJason Gunthorpe } 3240f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 3250f3e72b5SJason Gunthorpe 3260f3e72b5SJason Gunthorpe /* 3270f3e72b5SJason Gunthorpe * Decrement the device reference count and wait for the device to be 3280f3e72b5SJason Gunthorpe * removed. Open file descriptors for the device... */ 3290f3e72b5SJason Gunthorpe void vfio_unregister_group_dev(struct vfio_device *device) 3300f3e72b5SJason Gunthorpe { 3310f3e72b5SJason Gunthorpe unsigned int i = 0; 3320f3e72b5SJason Gunthorpe bool interrupted = false; 3330f3e72b5SJason Gunthorpe long rc; 3340f3e72b5SJason Gunthorpe 335291872a5SYi Liu /* 336291872a5SYi Liu * Prevent new device opened by userspace via the 337291872a5SYi Liu * VFIO_GROUP_GET_DEVICE_FD in the group path. 338291872a5SYi Liu */ 339291872a5SYi Liu vfio_device_group_unregister(device); 340291872a5SYi Liu 341*38c24544SYi Liu /* Balances device_add in register path */ 342*38c24544SYi Liu device_del(&device->device); 343*38c24544SYi Liu 3444a725b8dSKevin Tian vfio_device_put_registration(device); 3450f3e72b5SJason Gunthorpe rc = try_wait_for_completion(&device->comp); 3460f3e72b5SJason Gunthorpe while (rc <= 0) { 3470f3e72b5SJason Gunthorpe if (device->ops->request) 3480f3e72b5SJason Gunthorpe device->ops->request(device, i++); 3490f3e72b5SJason Gunthorpe 3500f3e72b5SJason Gunthorpe if (interrupted) { 3510f3e72b5SJason Gunthorpe rc = wait_for_completion_timeout(&device->comp, 3520f3e72b5SJason Gunthorpe HZ * 10); 3530f3e72b5SJason Gunthorpe } else { 3540f3e72b5SJason Gunthorpe rc = wait_for_completion_interruptible_timeout( 3550f3e72b5SJason Gunthorpe &device->comp, HZ * 10); 3560f3e72b5SJason Gunthorpe if (rc < 0) { 3570f3e72b5SJason Gunthorpe interrupted = true; 3580f3e72b5SJason Gunthorpe dev_warn(device->dev, 3590f3e72b5SJason Gunthorpe "Device is currently in use, task" 3600f3e72b5SJason Gunthorpe " \"%s\" (%d) " 3610f3e72b5SJason Gunthorpe "blocked until device is released", 3620f3e72b5SJason Gunthorpe current->comm, task_pid_nr(current)); 3630f3e72b5SJason Gunthorpe } 3640f3e72b5SJason Gunthorpe } 3650f3e72b5SJason Gunthorpe } 3660f3e72b5SJason Gunthorpe 36749ea02d3SYi Liu /* Balances vfio_device_set_group in register path */ 368ca5f21b2SJason Gunthorpe vfio_device_remove_group(device); 3690f3e72b5SJason Gunthorpe } 3700f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 3710f3e72b5SJason Gunthorpe 3722b48f52fSMatthew Rosato #ifdef CONFIG_HAVE_KVM 3732b48f52fSMatthew Rosato void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) 3742b48f52fSMatthew Rosato { 3752b48f52fSMatthew Rosato void (*pfn)(struct kvm *kvm); 3762b48f52fSMatthew Rosato bool (*fn)(struct kvm *kvm); 3772b48f52fSMatthew Rosato bool ret; 3782b48f52fSMatthew Rosato 3792b48f52fSMatthew Rosato lockdep_assert_held(&device->dev_set->lock); 3802b48f52fSMatthew Rosato 3812b48f52fSMatthew Rosato pfn = symbol_get(kvm_put_kvm); 3822b48f52fSMatthew Rosato if (WARN_ON(!pfn)) 3832b48f52fSMatthew Rosato return; 3842b48f52fSMatthew Rosato 3852b48f52fSMatthew Rosato fn = symbol_get(kvm_get_kvm_safe); 3862b48f52fSMatthew Rosato if (WARN_ON(!fn)) { 3872b48f52fSMatthew Rosato symbol_put(kvm_put_kvm); 3882b48f52fSMatthew Rosato return; 3892b48f52fSMatthew Rosato } 3902b48f52fSMatthew Rosato 3912b48f52fSMatthew Rosato ret = fn(kvm); 3922b48f52fSMatthew Rosato symbol_put(kvm_get_kvm_safe); 3932b48f52fSMatthew Rosato if (!ret) { 3942b48f52fSMatthew Rosato symbol_put(kvm_put_kvm); 3952b48f52fSMatthew Rosato return; 3962b48f52fSMatthew Rosato } 3972b48f52fSMatthew Rosato 3982b48f52fSMatthew Rosato device->put_kvm = pfn; 3992b48f52fSMatthew Rosato device->kvm = kvm; 4002b48f52fSMatthew Rosato } 4012b48f52fSMatthew Rosato 4022b48f52fSMatthew Rosato void vfio_device_put_kvm(struct vfio_device *device) 4032b48f52fSMatthew Rosato { 4042b48f52fSMatthew Rosato lockdep_assert_held(&device->dev_set->lock); 4052b48f52fSMatthew Rosato 4062b48f52fSMatthew Rosato if (!device->kvm) 4072b48f52fSMatthew Rosato return; 4082b48f52fSMatthew Rosato 4092b48f52fSMatthew Rosato if (WARN_ON(!device->put_kvm)) 4102b48f52fSMatthew Rosato goto clear; 4112b48f52fSMatthew Rosato 4122b48f52fSMatthew Rosato device->put_kvm(device->kvm); 4132b48f52fSMatthew Rosato device->put_kvm = NULL; 4142b48f52fSMatthew Rosato symbol_put(kvm_put_kvm); 4152b48f52fSMatthew Rosato 4162b48f52fSMatthew Rosato clear: 4172b48f52fSMatthew Rosato device->kvm = NULL; 4182b48f52fSMatthew Rosato } 4192b48f52fSMatthew Rosato #endif 4202b48f52fSMatthew Rosato 4210f3e72b5SJason Gunthorpe /* true if the vfio_device has open_device() called but not close_device() */ 4224741f2e9SJason Gunthorpe static bool vfio_assert_device_open(struct vfio_device *device) 4230f3e72b5SJason Gunthorpe { 4240f3e72b5SJason Gunthorpe return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 4250f3e72b5SJason Gunthorpe } 4260f3e72b5SJason Gunthorpe 427b1a3b5c6SYi Liu struct vfio_device_file * 428b1a3b5c6SYi Liu vfio_allocate_device_file(struct vfio_device *device) 429b1a3b5c6SYi Liu { 430b1a3b5c6SYi Liu struct vfio_device_file *df; 431b1a3b5c6SYi Liu 432b1a3b5c6SYi Liu df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT); 433b1a3b5c6SYi Liu if (!df) 434b1a3b5c6SYi Liu return ERR_PTR(-ENOMEM); 435b1a3b5c6SYi Liu 436b1a3b5c6SYi Liu df->device = device; 43734aeeecdSYi Liu spin_lock_init(&df->kvm_ref_lock); 438b1a3b5c6SYi Liu 439b1a3b5c6SYi Liu return df; 440b1a3b5c6SYi Liu } 441b1a3b5c6SYi Liu 44205f37e1cSYi Liu static int vfio_df_device_first_open(struct vfio_device_file *df) 443294aaccbSJason Gunthorpe { 44405f37e1cSYi Liu struct vfio_device *device = df->device; 44505f37e1cSYi Liu struct iommufd_ctx *iommufd = df->iommufd; 446294aaccbSJason Gunthorpe int ret; 447294aaccbSJason Gunthorpe 448294aaccbSJason Gunthorpe lockdep_assert_held(&device->dev_set->lock); 449294aaccbSJason Gunthorpe 450294aaccbSJason Gunthorpe if (!try_module_get(device->dev->driver->owner)) 451294aaccbSJason Gunthorpe return -ENODEV; 452294aaccbSJason Gunthorpe 4535c8d3d93SYi Liu if (iommufd) 45431014aefSYi Liu ret = vfio_df_iommufd_bind(df); 4555c8d3d93SYi Liu else 4565c8d3d93SYi Liu ret = vfio_device_group_use_iommu(device); 457bab6fabcSJason Gunthorpe if (ret) 458bab6fabcSJason Gunthorpe goto err_module_put; 459bab6fabcSJason Gunthorpe 460294aaccbSJason Gunthorpe if (device->ops->open_device) { 461294aaccbSJason Gunthorpe ret = device->ops->open_device(device); 462294aaccbSJason Gunthorpe if (ret) 4635c8d3d93SYi Liu goto err_unuse_iommu; 464294aaccbSJason Gunthorpe } 465294aaccbSJason Gunthorpe return 0; 466294aaccbSJason Gunthorpe 4675c8d3d93SYi Liu err_unuse_iommu: 4685c8d3d93SYi Liu if (iommufd) 46931014aefSYi Liu vfio_df_iommufd_unbind(df); 4705c8d3d93SYi Liu else 4715c8d3d93SYi Liu vfio_device_group_unuse_iommu(device); 472bab6fabcSJason Gunthorpe err_module_put: 473294aaccbSJason Gunthorpe module_put(device->dev->driver->owner); 474294aaccbSJason Gunthorpe return ret; 475294aaccbSJason Gunthorpe } 476294aaccbSJason Gunthorpe 47705f37e1cSYi Liu static void vfio_df_device_last_close(struct vfio_device_file *df) 478294aaccbSJason Gunthorpe { 47905f37e1cSYi Liu struct vfio_device *device = df->device; 48005f37e1cSYi Liu struct iommufd_ctx *iommufd = df->iommufd; 48105f37e1cSYi Liu 482294aaccbSJason Gunthorpe lockdep_assert_held(&device->dev_set->lock); 483294aaccbSJason Gunthorpe 484294aaccbSJason Gunthorpe if (device->ops->close_device) 485294aaccbSJason Gunthorpe device->ops->close_device(device); 4865c8d3d93SYi Liu if (iommufd) 48731014aefSYi Liu vfio_df_iommufd_unbind(df); 4885c8d3d93SYi Liu else 4895c8d3d93SYi Liu vfio_device_group_unuse_iommu(device); 490294aaccbSJason Gunthorpe module_put(device->dev->driver->owner); 491294aaccbSJason Gunthorpe } 492294aaccbSJason Gunthorpe 49305f37e1cSYi Liu int vfio_df_open(struct vfio_device_file *df) 4940f3e72b5SJason Gunthorpe { 49505f37e1cSYi Liu struct vfio_device *device = df->device; 4965cfff077SYi Liu int ret = 0; 4970f3e72b5SJason Gunthorpe 4982b48f52fSMatthew Rosato lockdep_assert_held(&device->dev_set->lock); 4992b48f52fSMatthew Rosato 500839e692fSYi Liu /* 501839e692fSYi Liu * Only the group path allows the device to be opened multiple 502839e692fSYi Liu * times. The device cdev path doesn't have a secure way for it. 503839e692fSYi Liu */ 504839e692fSYi Liu if (device->open_count != 0 && !df->group) 505839e692fSYi Liu return -EINVAL; 506839e692fSYi Liu 5070f3e72b5SJason Gunthorpe device->open_count++; 5080f3e72b5SJason Gunthorpe if (device->open_count == 1) { 50905f37e1cSYi Liu ret = vfio_df_device_first_open(df); 5100f3e72b5SJason Gunthorpe if (ret) 5115cfff077SYi Liu device->open_count--; 5120f3e72b5SJason Gunthorpe } 5130f3e72b5SJason Gunthorpe 5145cfff077SYi Liu return ret; 5155cfff077SYi Liu } 5165cfff077SYi Liu 51705f37e1cSYi Liu void vfio_df_close(struct vfio_device_file *df) 5185cfff077SYi Liu { 51905f37e1cSYi Liu struct vfio_device *device = df->device; 52005f37e1cSYi Liu 5212b48f52fSMatthew Rosato lockdep_assert_held(&device->dev_set->lock); 5222b48f52fSMatthew Rosato 5235cfff077SYi Liu vfio_assert_device_open(device); 5245cfff077SYi Liu if (device->open_count == 1) 52505f37e1cSYi Liu vfio_df_device_last_close(df); 5265cfff077SYi Liu device->open_count--; 5275cfff077SYi Liu } 5285cfff077SYi Liu 5290f3e72b5SJason Gunthorpe /* 5308e5c6995SAbhishek Sahu * Wrapper around pm_runtime_resume_and_get(). 5318e5c6995SAbhishek Sahu * Return error code on failure or 0 on success. 5328e5c6995SAbhishek Sahu */ 5338e5c6995SAbhishek Sahu static inline int vfio_device_pm_runtime_get(struct vfio_device *device) 5348e5c6995SAbhishek Sahu { 5358e5c6995SAbhishek Sahu struct device *dev = device->dev; 5368e5c6995SAbhishek Sahu 5378e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) { 5388e5c6995SAbhishek Sahu int ret; 5398e5c6995SAbhishek Sahu 5408e5c6995SAbhishek Sahu ret = pm_runtime_resume_and_get(dev); 5418e5c6995SAbhishek Sahu if (ret) { 5428e5c6995SAbhishek Sahu dev_info_ratelimited(dev, 5438e5c6995SAbhishek Sahu "vfio: runtime resume failed %d\n", ret); 5448e5c6995SAbhishek Sahu return -EIO; 5458e5c6995SAbhishek Sahu } 5468e5c6995SAbhishek Sahu } 5478e5c6995SAbhishek Sahu 5488e5c6995SAbhishek Sahu return 0; 5498e5c6995SAbhishek Sahu } 5508e5c6995SAbhishek Sahu 5518e5c6995SAbhishek Sahu /* 5528e5c6995SAbhishek Sahu * Wrapper around pm_runtime_put(). 5538e5c6995SAbhishek Sahu */ 5548e5c6995SAbhishek Sahu static inline void vfio_device_pm_runtime_put(struct vfio_device *device) 5558e5c6995SAbhishek Sahu { 5568e5c6995SAbhishek Sahu struct device *dev = device->dev; 5578e5c6995SAbhishek Sahu 5588e5c6995SAbhishek Sahu if (dev->driver && dev->driver->pm) 5598e5c6995SAbhishek Sahu pm_runtime_put(dev); 5608e5c6995SAbhishek Sahu } 5618e5c6995SAbhishek Sahu 5628e5c6995SAbhishek Sahu /* 5630f3e72b5SJason Gunthorpe * VFIO Device fd 5640f3e72b5SJason Gunthorpe */ 5650f3e72b5SJason Gunthorpe static int vfio_device_fops_release(struct inode *inode, struct file *filep) 5660f3e72b5SJason Gunthorpe { 567b1a3b5c6SYi Liu struct vfio_device_file *df = filep->private_data; 568b1a3b5c6SYi Liu struct vfio_device *device = df->device; 5690f3e72b5SJason Gunthorpe 57005f37e1cSYi Liu vfio_df_group_close(df); 5710f3e72b5SJason Gunthorpe 5724a725b8dSKevin Tian vfio_device_put_registration(device); 5730f3e72b5SJason Gunthorpe 574b1a3b5c6SYi Liu kfree(df); 575b1a3b5c6SYi Liu 5760f3e72b5SJason Gunthorpe return 0; 5770f3e72b5SJason Gunthorpe } 5780f3e72b5SJason Gunthorpe 5790f3e72b5SJason Gunthorpe /* 5800f3e72b5SJason Gunthorpe * vfio_mig_get_next_state - Compute the next step in the FSM 5810f3e72b5SJason Gunthorpe * @cur_fsm - The current state the device is in 5820f3e72b5SJason Gunthorpe * @new_fsm - The target state to reach 5830f3e72b5SJason Gunthorpe * @next_fsm - Pointer to the next step to get to new_fsm 5840f3e72b5SJason Gunthorpe * 5850f3e72b5SJason Gunthorpe * Return 0 upon success, otherwise -errno 5860f3e72b5SJason Gunthorpe * Upon success the next step in the state progression between cur_fsm and 5870f3e72b5SJason Gunthorpe * new_fsm will be set in next_fsm. 5880f3e72b5SJason Gunthorpe * 5890f3e72b5SJason Gunthorpe * This breaks down requests for combination transitions into smaller steps and 5900f3e72b5SJason Gunthorpe * returns the next step to get to new_fsm. The function may need to be called 5910f3e72b5SJason Gunthorpe * multiple times before reaching new_fsm. 5920f3e72b5SJason Gunthorpe * 5930f3e72b5SJason Gunthorpe */ 5940f3e72b5SJason Gunthorpe int vfio_mig_get_next_state(struct vfio_device *device, 5950f3e72b5SJason Gunthorpe enum vfio_device_mig_state cur_fsm, 5960f3e72b5SJason Gunthorpe enum vfio_device_mig_state new_fsm, 5970f3e72b5SJason Gunthorpe enum vfio_device_mig_state *next_fsm) 5980f3e72b5SJason Gunthorpe { 5994db52602SJason Gunthorpe enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; 6000f3e72b5SJason Gunthorpe /* 6010f3e72b5SJason Gunthorpe * The coding in this table requires the driver to implement the 6020f3e72b5SJason Gunthorpe * following FSM arcs: 6030f3e72b5SJason Gunthorpe * RESUMING -> STOP 6040f3e72b5SJason Gunthorpe * STOP -> RESUMING 6050f3e72b5SJason Gunthorpe * STOP -> STOP_COPY 6060f3e72b5SJason Gunthorpe * STOP_COPY -> STOP 6070f3e72b5SJason Gunthorpe * 6080f3e72b5SJason Gunthorpe * If P2P is supported then the driver must also implement these FSM 6090f3e72b5SJason Gunthorpe * arcs: 6100f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P 6110f3e72b5SJason Gunthorpe * RUNNING_P2P -> RUNNING 6120f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP 6130f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P 6144db52602SJason Gunthorpe * 6154db52602SJason Gunthorpe * If precopy is supported then the driver must support these additional 6164db52602SJason Gunthorpe * FSM arcs: 6174db52602SJason Gunthorpe * RUNNING -> PRE_COPY 6184db52602SJason Gunthorpe * PRE_COPY -> RUNNING 6194db52602SJason Gunthorpe * PRE_COPY -> STOP_COPY 6204db52602SJason Gunthorpe * However, if precopy and P2P are supported together then the driver 6214db52602SJason Gunthorpe * must support these additional arcs beyond the P2P arcs above: 6224db52602SJason Gunthorpe * PRE_COPY -> RUNNING 6234db52602SJason Gunthorpe * PRE_COPY -> PRE_COPY_P2P 6244db52602SJason Gunthorpe * PRE_COPY_P2P -> PRE_COPY 6254db52602SJason Gunthorpe * PRE_COPY_P2P -> RUNNING_P2P 6264db52602SJason Gunthorpe * PRE_COPY_P2P -> STOP_COPY 6274db52602SJason Gunthorpe * RUNNING -> PRE_COPY 6284db52602SJason Gunthorpe * RUNNING_P2P -> PRE_COPY_P2P 6294db52602SJason Gunthorpe * 6304db52602SJason Gunthorpe * Without P2P and precopy the driver must implement: 6310f3e72b5SJason Gunthorpe * RUNNING -> STOP 6320f3e72b5SJason Gunthorpe * STOP -> RUNNING 6330f3e72b5SJason Gunthorpe * 6340f3e72b5SJason Gunthorpe * The coding will step through multiple states for some combination 6350f3e72b5SJason Gunthorpe * transitions; if all optional features are supported, this means the 6360f3e72b5SJason Gunthorpe * following ones: 6374db52602SJason Gunthorpe * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY 6384db52602SJason Gunthorpe * PRE_COPY -> RUNNING -> RUNNING_P2P 6394db52602SJason Gunthorpe * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP 6404db52602SJason Gunthorpe * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING 6414db52602SJason Gunthorpe * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING 6424db52602SJason Gunthorpe * PRE_COPY_P2P -> RUNNING_P2P -> STOP 6434db52602SJason Gunthorpe * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING 6440f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P 6454db52602SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P 6460f3e72b5SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 6474db52602SJason Gunthorpe * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY 6480f3e72b5SJason Gunthorpe * RESUMING -> STOP -> STOP_COPY 6494db52602SJason Gunthorpe * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P 6500f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP 6510f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 6520f3e72b5SJason Gunthorpe * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 6534db52602SJason Gunthorpe * RUNNING_P2P -> RUNNING -> PRE_COPY 6540f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> RESUMING 6550f3e72b5SJason Gunthorpe * RUNNING_P2P -> STOP -> STOP_COPY 6564db52602SJason Gunthorpe * STOP -> RUNNING_P2P -> PRE_COPY_P2P 6570f3e72b5SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING 6584db52602SJason Gunthorpe * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY 6590f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RESUMING 6600f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P 6610f3e72b5SJason Gunthorpe * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 6624db52602SJason Gunthorpe * 6634db52602SJason Gunthorpe * The following transitions are blocked: 6644db52602SJason Gunthorpe * STOP_COPY -> PRE_COPY 6654db52602SJason Gunthorpe * STOP_COPY -> PRE_COPY_P2P 6660f3e72b5SJason Gunthorpe */ 6670f3e72b5SJason Gunthorpe static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 6680f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = { 6690f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 6700f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 6714db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 6724db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 6730f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 6740f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 6750f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 6760f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 6770f3e72b5SJason Gunthorpe }, 6780f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = { 6790f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 6800f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 6814db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 6824db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 6830f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 6840f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 6850f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 6860f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 6870f3e72b5SJason Gunthorpe }, 6884db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = { 6894db52602SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, 6904db52602SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 6914db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 6924db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 6934db52602SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 6944db52602SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, 6954db52602SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, 6964db52602SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 6974db52602SJason Gunthorpe }, 6984db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { 6994db52602SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 7004db52602SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 7014db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 7024db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 7034db52602SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 7044db52602SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 7054db52602SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 7064db52602SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 7074db52602SJason Gunthorpe }, 7080f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = { 7090f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 7100f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 7114db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, 7124db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, 7130f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 7140f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 7150f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 7160f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 7170f3e72b5SJason Gunthorpe }, 7180f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = { 7190f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 7200f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 7214db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, 7224db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, 7230f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 7240f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 7250f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 7260f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 7270f3e72b5SJason Gunthorpe }, 7280f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = { 7290f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 7300f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 7314db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, 7324db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 7330f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 7340f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 7350f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 7360f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 7370f3e72b5SJason Gunthorpe }, 7380f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = { 7390f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 7400f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 7414db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, 7424db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, 7430f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 7440f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 7450f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 7460f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 7470f3e72b5SJason Gunthorpe }, 7480f3e72b5SJason Gunthorpe }; 7490f3e72b5SJason Gunthorpe 7500f3e72b5SJason Gunthorpe static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 7510f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 7520f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 7534db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY] = 7544db52602SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, 7554db52602SJason Gunthorpe [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | 7564db52602SJason Gunthorpe VFIO_MIGRATION_P2P | 7574db52602SJason Gunthorpe VFIO_MIGRATION_PRE_COPY, 7580f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 7590f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 7600f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_RUNNING_P2P] = 7610f3e72b5SJason Gunthorpe VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 7620f3e72b5SJason Gunthorpe [VFIO_DEVICE_STATE_ERROR] = ~0U, 7630f3e72b5SJason Gunthorpe }; 7640f3e72b5SJason Gunthorpe 7650f3e72b5SJason Gunthorpe if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 7660f3e72b5SJason Gunthorpe (state_flags_table[cur_fsm] & device->migration_flags) != 7670f3e72b5SJason Gunthorpe state_flags_table[cur_fsm])) 7680f3e72b5SJason Gunthorpe return -EINVAL; 7690f3e72b5SJason Gunthorpe 7700f3e72b5SJason Gunthorpe if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 7710f3e72b5SJason Gunthorpe (state_flags_table[new_fsm] & device->migration_flags) != 7720f3e72b5SJason Gunthorpe state_flags_table[new_fsm]) 7730f3e72b5SJason Gunthorpe return -EINVAL; 7740f3e72b5SJason Gunthorpe 7750f3e72b5SJason Gunthorpe /* 7760f3e72b5SJason Gunthorpe * Arcs touching optional and unsupported states are skipped over. The 7770f3e72b5SJason Gunthorpe * driver will instead see an arc from the original state to the next 7780f3e72b5SJason Gunthorpe * logical state, as per the above comment. 7790f3e72b5SJason Gunthorpe */ 7800f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 7810f3e72b5SJason Gunthorpe while ((state_flags_table[*next_fsm] & device->migration_flags) != 7820f3e72b5SJason Gunthorpe state_flags_table[*next_fsm]) 7830f3e72b5SJason Gunthorpe *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 7840f3e72b5SJason Gunthorpe 7850f3e72b5SJason Gunthorpe return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 7860f3e72b5SJason Gunthorpe } 7870f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 7880f3e72b5SJason Gunthorpe 7890f3e72b5SJason Gunthorpe /* 7900f3e72b5SJason Gunthorpe * Convert the drivers's struct file into a FD number and return it to userspace 7910f3e72b5SJason Gunthorpe */ 7920f3e72b5SJason Gunthorpe static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 7930f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state *mig) 7940f3e72b5SJason Gunthorpe { 7950f3e72b5SJason Gunthorpe int ret; 7960f3e72b5SJason Gunthorpe int fd; 7970f3e72b5SJason Gunthorpe 7980f3e72b5SJason Gunthorpe fd = get_unused_fd_flags(O_CLOEXEC); 7990f3e72b5SJason Gunthorpe if (fd < 0) { 8000f3e72b5SJason Gunthorpe ret = fd; 8010f3e72b5SJason Gunthorpe goto out_fput; 8020f3e72b5SJason Gunthorpe } 8030f3e72b5SJason Gunthorpe 8040f3e72b5SJason Gunthorpe mig->data_fd = fd; 8050f3e72b5SJason Gunthorpe if (copy_to_user(arg, mig, sizeof(*mig))) { 8060f3e72b5SJason Gunthorpe ret = -EFAULT; 8070f3e72b5SJason Gunthorpe goto out_put_unused; 8080f3e72b5SJason Gunthorpe } 8090f3e72b5SJason Gunthorpe fd_install(fd, filp); 8100f3e72b5SJason Gunthorpe return 0; 8110f3e72b5SJason Gunthorpe 8120f3e72b5SJason Gunthorpe out_put_unused: 8130f3e72b5SJason Gunthorpe put_unused_fd(fd); 8140f3e72b5SJason Gunthorpe out_fput: 8150f3e72b5SJason Gunthorpe fput(filp); 8160f3e72b5SJason Gunthorpe return ret; 8170f3e72b5SJason Gunthorpe } 8180f3e72b5SJason Gunthorpe 8190f3e72b5SJason Gunthorpe static int 8200f3e72b5SJason Gunthorpe vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 8210f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 8220f3e72b5SJason Gunthorpe size_t argsz) 8230f3e72b5SJason Gunthorpe { 8240f3e72b5SJason Gunthorpe size_t minsz = 8250f3e72b5SJason Gunthorpe offsetofend(struct vfio_device_feature_mig_state, data_fd); 8260f3e72b5SJason Gunthorpe struct vfio_device_feature_mig_state mig; 8270f3e72b5SJason Gunthorpe struct file *filp = NULL; 8280f3e72b5SJason Gunthorpe int ret; 8290f3e72b5SJason Gunthorpe 8300f3e72b5SJason Gunthorpe if (!device->mig_ops) 8310f3e72b5SJason Gunthorpe return -ENOTTY; 8320f3e72b5SJason Gunthorpe 8330f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, 8340f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_SET | 8350f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET, 8360f3e72b5SJason Gunthorpe sizeof(mig)); 8370f3e72b5SJason Gunthorpe if (ret != 1) 8380f3e72b5SJason Gunthorpe return ret; 8390f3e72b5SJason Gunthorpe 8400f3e72b5SJason Gunthorpe if (copy_from_user(&mig, arg, minsz)) 8410f3e72b5SJason Gunthorpe return -EFAULT; 8420f3e72b5SJason Gunthorpe 8430f3e72b5SJason Gunthorpe if (flags & VFIO_DEVICE_FEATURE_GET) { 8440f3e72b5SJason Gunthorpe enum vfio_device_mig_state curr_state; 8450f3e72b5SJason Gunthorpe 8460f3e72b5SJason Gunthorpe ret = device->mig_ops->migration_get_state(device, 8470f3e72b5SJason Gunthorpe &curr_state); 8480f3e72b5SJason Gunthorpe if (ret) 8490f3e72b5SJason Gunthorpe return ret; 8500f3e72b5SJason Gunthorpe mig.device_state = curr_state; 8510f3e72b5SJason Gunthorpe goto out_copy; 8520f3e72b5SJason Gunthorpe } 8530f3e72b5SJason Gunthorpe 8540f3e72b5SJason Gunthorpe /* Handle the VFIO_DEVICE_FEATURE_SET */ 8550f3e72b5SJason Gunthorpe filp = device->mig_ops->migration_set_state(device, mig.device_state); 8560f3e72b5SJason Gunthorpe if (IS_ERR(filp) || !filp) 8570f3e72b5SJason Gunthorpe goto out_copy; 8580f3e72b5SJason Gunthorpe 8590f3e72b5SJason Gunthorpe return vfio_ioct_mig_return_fd(filp, arg, &mig); 8600f3e72b5SJason Gunthorpe out_copy: 8610f3e72b5SJason Gunthorpe mig.data_fd = -1; 8620f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 8630f3e72b5SJason Gunthorpe return -EFAULT; 8640f3e72b5SJason Gunthorpe if (IS_ERR(filp)) 8650f3e72b5SJason Gunthorpe return PTR_ERR(filp); 8660f3e72b5SJason Gunthorpe return 0; 8670f3e72b5SJason Gunthorpe } 8680f3e72b5SJason Gunthorpe 8694e016f96SYishai Hadas static int 8704e016f96SYishai Hadas vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, 8714e016f96SYishai Hadas u32 flags, void __user *arg, 8724e016f96SYishai Hadas size_t argsz) 8734e016f96SYishai Hadas { 8744e016f96SYishai Hadas struct vfio_device_feature_mig_data_size data_size = {}; 8754e016f96SYishai Hadas unsigned long stop_copy_length; 8764e016f96SYishai Hadas int ret; 8774e016f96SYishai Hadas 8784e016f96SYishai Hadas if (!device->mig_ops) 8794e016f96SYishai Hadas return -ENOTTY; 8804e016f96SYishai Hadas 8814e016f96SYishai Hadas ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 8824e016f96SYishai Hadas sizeof(data_size)); 8834e016f96SYishai Hadas if (ret != 1) 8844e016f96SYishai Hadas return ret; 8854e016f96SYishai Hadas 8864e016f96SYishai Hadas ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); 8874e016f96SYishai Hadas if (ret) 8884e016f96SYishai Hadas return ret; 8894e016f96SYishai Hadas 8904e016f96SYishai Hadas data_size.stop_copy_length = stop_copy_length; 8914e016f96SYishai Hadas if (copy_to_user(arg, &data_size, sizeof(data_size))) 8924e016f96SYishai Hadas return -EFAULT; 8934e016f96SYishai Hadas 8944e016f96SYishai Hadas return 0; 8954e016f96SYishai Hadas } 8964e016f96SYishai Hadas 8970f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 8980f3e72b5SJason Gunthorpe u32 flags, void __user *arg, 8990f3e72b5SJason Gunthorpe size_t argsz) 9000f3e72b5SJason Gunthorpe { 9010f3e72b5SJason Gunthorpe struct vfio_device_feature_migration mig = { 9020f3e72b5SJason Gunthorpe .flags = device->migration_flags, 9030f3e72b5SJason Gunthorpe }; 9040f3e72b5SJason Gunthorpe int ret; 9050f3e72b5SJason Gunthorpe 9060f3e72b5SJason Gunthorpe if (!device->mig_ops) 9070f3e72b5SJason Gunthorpe return -ENOTTY; 9080f3e72b5SJason Gunthorpe 9090f3e72b5SJason Gunthorpe ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 9100f3e72b5SJason Gunthorpe sizeof(mig)); 9110f3e72b5SJason Gunthorpe if (ret != 1) 9120f3e72b5SJason Gunthorpe return ret; 9130f3e72b5SJason Gunthorpe if (copy_to_user(arg, &mig, sizeof(mig))) 9140f3e72b5SJason Gunthorpe return -EFAULT; 9150f3e72b5SJason Gunthorpe return 0; 9160f3e72b5SJason Gunthorpe } 9170f3e72b5SJason Gunthorpe 91880c4b92aSYishai Hadas /* Ranges should fit into a single kernel page */ 91980c4b92aSYishai Hadas #define LOG_MAX_RANGES \ 92080c4b92aSYishai Hadas (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) 92180c4b92aSYishai Hadas 92280c4b92aSYishai Hadas static int 92380c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_start(struct vfio_device *device, 92480c4b92aSYishai Hadas u32 flags, void __user *arg, 92580c4b92aSYishai Hadas size_t argsz) 92680c4b92aSYishai Hadas { 92780c4b92aSYishai Hadas size_t minsz = 92880c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_control, 92980c4b92aSYishai Hadas ranges); 93080c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range __user *ranges; 93180c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_control control; 93280c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_range range; 93380c4b92aSYishai Hadas struct rb_root_cached root = RB_ROOT_CACHED; 93480c4b92aSYishai Hadas struct interval_tree_node *nodes; 93580c4b92aSYishai Hadas u64 iova_end; 93680c4b92aSYishai Hadas u32 nnodes; 93780c4b92aSYishai Hadas int i, ret; 93880c4b92aSYishai Hadas 93980c4b92aSYishai Hadas if (!device->log_ops) 94080c4b92aSYishai Hadas return -ENOTTY; 94180c4b92aSYishai Hadas 94280c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 94380c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 94480c4b92aSYishai Hadas sizeof(control)); 94580c4b92aSYishai Hadas if (ret != 1) 94680c4b92aSYishai Hadas return ret; 94780c4b92aSYishai Hadas 94880c4b92aSYishai Hadas if (copy_from_user(&control, arg, minsz)) 94980c4b92aSYishai Hadas return -EFAULT; 95080c4b92aSYishai Hadas 95180c4b92aSYishai Hadas nnodes = control.num_ranges; 95280c4b92aSYishai Hadas if (!nnodes) 95380c4b92aSYishai Hadas return -EINVAL; 95480c4b92aSYishai Hadas 95580c4b92aSYishai Hadas if (nnodes > LOG_MAX_RANGES) 95680c4b92aSYishai Hadas return -E2BIG; 95780c4b92aSYishai Hadas 95880c4b92aSYishai Hadas ranges = u64_to_user_ptr(control.ranges); 95980c4b92aSYishai Hadas nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), 96080c4b92aSYishai Hadas GFP_KERNEL); 96180c4b92aSYishai Hadas if (!nodes) 96280c4b92aSYishai Hadas return -ENOMEM; 96380c4b92aSYishai Hadas 96480c4b92aSYishai Hadas for (i = 0; i < nnodes; i++) { 96580c4b92aSYishai Hadas if (copy_from_user(&range, &ranges[i], sizeof(range))) { 96680c4b92aSYishai Hadas ret = -EFAULT; 96780c4b92aSYishai Hadas goto end; 96880c4b92aSYishai Hadas } 96980c4b92aSYishai Hadas if (!IS_ALIGNED(range.iova, control.page_size) || 97080c4b92aSYishai Hadas !IS_ALIGNED(range.length, control.page_size)) { 97180c4b92aSYishai Hadas ret = -EINVAL; 97280c4b92aSYishai Hadas goto end; 97380c4b92aSYishai Hadas } 97480c4b92aSYishai Hadas 97580c4b92aSYishai Hadas if (check_add_overflow(range.iova, range.length, &iova_end) || 97680c4b92aSYishai Hadas iova_end > ULONG_MAX) { 97780c4b92aSYishai Hadas ret = -EOVERFLOW; 97880c4b92aSYishai Hadas goto end; 97980c4b92aSYishai Hadas } 98080c4b92aSYishai Hadas 98180c4b92aSYishai Hadas nodes[i].start = range.iova; 98280c4b92aSYishai Hadas nodes[i].last = range.iova + range.length - 1; 98380c4b92aSYishai Hadas if (interval_tree_iter_first(&root, nodes[i].start, 98480c4b92aSYishai Hadas nodes[i].last)) { 98580c4b92aSYishai Hadas /* Range overlapping */ 98680c4b92aSYishai Hadas ret = -EINVAL; 98780c4b92aSYishai Hadas goto end; 98880c4b92aSYishai Hadas } 98980c4b92aSYishai Hadas interval_tree_insert(nodes + i, &root); 99080c4b92aSYishai Hadas } 99180c4b92aSYishai Hadas 99280c4b92aSYishai Hadas ret = device->log_ops->log_start(device, &root, nnodes, 99380c4b92aSYishai Hadas &control.page_size); 99480c4b92aSYishai Hadas if (ret) 99580c4b92aSYishai Hadas goto end; 99680c4b92aSYishai Hadas 99780c4b92aSYishai Hadas if (copy_to_user(arg, &control, sizeof(control))) { 99880c4b92aSYishai Hadas ret = -EFAULT; 99980c4b92aSYishai Hadas device->log_ops->log_stop(device); 100080c4b92aSYishai Hadas } 100180c4b92aSYishai Hadas 100280c4b92aSYishai Hadas end: 100380c4b92aSYishai Hadas kfree(nodes); 100480c4b92aSYishai Hadas return ret; 100580c4b92aSYishai Hadas } 100680c4b92aSYishai Hadas 100780c4b92aSYishai Hadas static int 100880c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, 100980c4b92aSYishai Hadas u32 flags, void __user *arg, 101080c4b92aSYishai Hadas size_t argsz) 101180c4b92aSYishai Hadas { 101280c4b92aSYishai Hadas int ret; 101380c4b92aSYishai Hadas 101480c4b92aSYishai Hadas if (!device->log_ops) 101580c4b92aSYishai Hadas return -ENOTTY; 101680c4b92aSYishai Hadas 101780c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 101880c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_SET, 0); 101980c4b92aSYishai Hadas if (ret != 1) 102080c4b92aSYishai Hadas return ret; 102180c4b92aSYishai Hadas 102280c4b92aSYishai Hadas return device->log_ops->log_stop(device); 102380c4b92aSYishai Hadas } 102480c4b92aSYishai Hadas 102580c4b92aSYishai Hadas static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, 102680c4b92aSYishai Hadas unsigned long iova, size_t length, 102780c4b92aSYishai Hadas void *opaque) 102880c4b92aSYishai Hadas { 102980c4b92aSYishai Hadas struct vfio_device *device = opaque; 103080c4b92aSYishai Hadas 103180c4b92aSYishai Hadas return device->log_ops->log_read_and_clear(device, iova, length, iter); 103280c4b92aSYishai Hadas } 103380c4b92aSYishai Hadas 103480c4b92aSYishai Hadas static int 103580c4b92aSYishai Hadas vfio_ioctl_device_feature_logging_report(struct vfio_device *device, 103680c4b92aSYishai Hadas u32 flags, void __user *arg, 103780c4b92aSYishai Hadas size_t argsz) 103880c4b92aSYishai Hadas { 103980c4b92aSYishai Hadas size_t minsz = 104080c4b92aSYishai Hadas offsetofend(struct vfio_device_feature_dma_logging_report, 104180c4b92aSYishai Hadas bitmap); 104280c4b92aSYishai Hadas struct vfio_device_feature_dma_logging_report report; 104380c4b92aSYishai Hadas struct iova_bitmap *iter; 104480c4b92aSYishai Hadas u64 iova_end; 104580c4b92aSYishai Hadas int ret; 104680c4b92aSYishai Hadas 104780c4b92aSYishai Hadas if (!device->log_ops) 104880c4b92aSYishai Hadas return -ENOTTY; 104980c4b92aSYishai Hadas 105080c4b92aSYishai Hadas ret = vfio_check_feature(flags, argsz, 105180c4b92aSYishai Hadas VFIO_DEVICE_FEATURE_GET, 105280c4b92aSYishai Hadas sizeof(report)); 105380c4b92aSYishai Hadas if (ret != 1) 105480c4b92aSYishai Hadas return ret; 105580c4b92aSYishai Hadas 105680c4b92aSYishai Hadas if (copy_from_user(&report, arg, minsz)) 105780c4b92aSYishai Hadas return -EFAULT; 105880c4b92aSYishai Hadas 105980c4b92aSYishai Hadas if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) 106080c4b92aSYishai Hadas return -EINVAL; 106180c4b92aSYishai Hadas 106280c4b92aSYishai Hadas if (check_add_overflow(report.iova, report.length, &iova_end) || 106380c4b92aSYishai Hadas iova_end > ULONG_MAX) 106480c4b92aSYishai Hadas return -EOVERFLOW; 106580c4b92aSYishai Hadas 106680c4b92aSYishai Hadas iter = iova_bitmap_alloc(report.iova, report.length, 106780c4b92aSYishai Hadas report.page_size, 106880c4b92aSYishai Hadas u64_to_user_ptr(report.bitmap)); 106980c4b92aSYishai Hadas if (IS_ERR(iter)) 107080c4b92aSYishai Hadas return PTR_ERR(iter); 107180c4b92aSYishai Hadas 107280c4b92aSYishai Hadas ret = iova_bitmap_for_each(iter, device, 107380c4b92aSYishai Hadas vfio_device_log_read_and_clear); 107480c4b92aSYishai Hadas 107580c4b92aSYishai Hadas iova_bitmap_free(iter); 107680c4b92aSYishai Hadas return ret; 107780c4b92aSYishai Hadas } 107880c4b92aSYishai Hadas 10790f3e72b5SJason Gunthorpe static int vfio_ioctl_device_feature(struct vfio_device *device, 10800f3e72b5SJason Gunthorpe struct vfio_device_feature __user *arg) 10810f3e72b5SJason Gunthorpe { 10820f3e72b5SJason Gunthorpe size_t minsz = offsetofend(struct vfio_device_feature, flags); 10830f3e72b5SJason Gunthorpe struct vfio_device_feature feature; 10840f3e72b5SJason Gunthorpe 10850f3e72b5SJason Gunthorpe if (copy_from_user(&feature, arg, minsz)) 10860f3e72b5SJason Gunthorpe return -EFAULT; 10870f3e72b5SJason Gunthorpe 10880f3e72b5SJason Gunthorpe if (feature.argsz < minsz) 10890f3e72b5SJason Gunthorpe return -EINVAL; 10900f3e72b5SJason Gunthorpe 10910f3e72b5SJason Gunthorpe /* Check unknown flags */ 10920f3e72b5SJason Gunthorpe if (feature.flags & 10930f3e72b5SJason Gunthorpe ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 10940f3e72b5SJason Gunthorpe VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 10950f3e72b5SJason Gunthorpe return -EINVAL; 10960f3e72b5SJason Gunthorpe 10970f3e72b5SJason Gunthorpe /* GET & SET are mutually exclusive except with PROBE */ 10980f3e72b5SJason Gunthorpe if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 10990f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_SET) && 11000f3e72b5SJason Gunthorpe (feature.flags & VFIO_DEVICE_FEATURE_GET)) 11010f3e72b5SJason Gunthorpe return -EINVAL; 11020f3e72b5SJason Gunthorpe 11030f3e72b5SJason Gunthorpe switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 11040f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIGRATION: 11050f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_migration( 11060f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 11070f3e72b5SJason Gunthorpe feature.argsz - minsz); 11080f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 11090f3e72b5SJason Gunthorpe return vfio_ioctl_device_feature_mig_device_state( 11100f3e72b5SJason Gunthorpe device, feature.flags, arg->data, 11110f3e72b5SJason Gunthorpe feature.argsz - minsz); 111280c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: 111380c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_start( 111480c4b92aSYishai Hadas device, feature.flags, arg->data, 111580c4b92aSYishai Hadas feature.argsz - minsz); 111680c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: 111780c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_stop( 111880c4b92aSYishai Hadas device, feature.flags, arg->data, 111980c4b92aSYishai Hadas feature.argsz - minsz); 112080c4b92aSYishai Hadas case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: 112180c4b92aSYishai Hadas return vfio_ioctl_device_feature_logging_report( 112280c4b92aSYishai Hadas device, feature.flags, arg->data, 112380c4b92aSYishai Hadas feature.argsz - minsz); 11244e016f96SYishai Hadas case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: 11254e016f96SYishai Hadas return vfio_ioctl_device_feature_migration_data_size( 11264e016f96SYishai Hadas device, feature.flags, arg->data, 11274e016f96SYishai Hadas feature.argsz - minsz); 11280f3e72b5SJason Gunthorpe default: 11290f3e72b5SJason Gunthorpe if (unlikely(!device->ops->device_feature)) 11300f3e72b5SJason Gunthorpe return -EINVAL; 11310f3e72b5SJason Gunthorpe return device->ops->device_feature(device, feature.flags, 11320f3e72b5SJason Gunthorpe arg->data, 11330f3e72b5SJason Gunthorpe feature.argsz - minsz); 11340f3e72b5SJason Gunthorpe } 11350f3e72b5SJason Gunthorpe } 11360f3e72b5SJason Gunthorpe 11370f3e72b5SJason Gunthorpe static long vfio_device_fops_unl_ioctl(struct file *filep, 11380f3e72b5SJason Gunthorpe unsigned int cmd, unsigned long arg) 11390f3e72b5SJason Gunthorpe { 1140b1a3b5c6SYi Liu struct vfio_device_file *df = filep->private_data; 1141b1a3b5c6SYi Liu struct vfio_device *device = df->device; 11428e5c6995SAbhishek Sahu int ret; 11438e5c6995SAbhishek Sahu 114482d93f58SYi Liu /* Paired with smp_store_release() following vfio_df_open() */ 114582d93f58SYi Liu if (!smp_load_acquire(&df->access_granted)) 114682d93f58SYi Liu return -EINVAL; 114782d93f58SYi Liu 11488e5c6995SAbhishek Sahu ret = vfio_device_pm_runtime_get(device); 11498e5c6995SAbhishek Sahu if (ret) 11508e5c6995SAbhishek Sahu return ret; 11510f3e72b5SJason Gunthorpe 11520f3e72b5SJason Gunthorpe switch (cmd) { 11530f3e72b5SJason Gunthorpe case VFIO_DEVICE_FEATURE: 11548e5c6995SAbhishek Sahu ret = vfio_ioctl_device_feature(device, (void __user *)arg); 11558e5c6995SAbhishek Sahu break; 11568e5c6995SAbhishek Sahu 11570f3e72b5SJason Gunthorpe default: 11580f3e72b5SJason Gunthorpe if (unlikely(!device->ops->ioctl)) 11598e5c6995SAbhishek Sahu ret = -EINVAL; 11608e5c6995SAbhishek Sahu else 11618e5c6995SAbhishek Sahu ret = device->ops->ioctl(device, cmd, arg); 11628e5c6995SAbhishek Sahu break; 11630f3e72b5SJason Gunthorpe } 11648e5c6995SAbhishek Sahu 11658e5c6995SAbhishek Sahu vfio_device_pm_runtime_put(device); 11668e5c6995SAbhishek Sahu return ret; 11670f3e72b5SJason Gunthorpe } 11680f3e72b5SJason Gunthorpe 11690f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 11700f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 11710f3e72b5SJason Gunthorpe { 1172b1a3b5c6SYi Liu struct vfio_device_file *df = filep->private_data; 1173b1a3b5c6SYi Liu struct vfio_device *device = df->device; 11740f3e72b5SJason Gunthorpe 117582d93f58SYi Liu /* Paired with smp_store_release() following vfio_df_open() */ 117682d93f58SYi Liu if (!smp_load_acquire(&df->access_granted)) 117782d93f58SYi Liu return -EINVAL; 117882d93f58SYi Liu 11790f3e72b5SJason Gunthorpe if (unlikely(!device->ops->read)) 11800f3e72b5SJason Gunthorpe return -EINVAL; 11810f3e72b5SJason Gunthorpe 11820f3e72b5SJason Gunthorpe return device->ops->read(device, buf, count, ppos); 11830f3e72b5SJason Gunthorpe } 11840f3e72b5SJason Gunthorpe 11850f3e72b5SJason Gunthorpe static ssize_t vfio_device_fops_write(struct file *filep, 11860f3e72b5SJason Gunthorpe const char __user *buf, 11870f3e72b5SJason Gunthorpe size_t count, loff_t *ppos) 11880f3e72b5SJason Gunthorpe { 1189b1a3b5c6SYi Liu struct vfio_device_file *df = filep->private_data; 1190b1a3b5c6SYi Liu struct vfio_device *device = df->device; 11910f3e72b5SJason Gunthorpe 119282d93f58SYi Liu /* Paired with smp_store_release() following vfio_df_open() */ 119382d93f58SYi Liu if (!smp_load_acquire(&df->access_granted)) 119482d93f58SYi Liu return -EINVAL; 119582d93f58SYi Liu 11960f3e72b5SJason Gunthorpe if (unlikely(!device->ops->write)) 11970f3e72b5SJason Gunthorpe return -EINVAL; 11980f3e72b5SJason Gunthorpe 11990f3e72b5SJason Gunthorpe return device->ops->write(device, buf, count, ppos); 12000f3e72b5SJason Gunthorpe } 12010f3e72b5SJason Gunthorpe 12020f3e72b5SJason Gunthorpe static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 12030f3e72b5SJason Gunthorpe { 1204b1a3b5c6SYi Liu struct vfio_device_file *df = filep->private_data; 1205b1a3b5c6SYi Liu struct vfio_device *device = df->device; 12060f3e72b5SJason Gunthorpe 120782d93f58SYi Liu /* Paired with smp_store_release() following vfio_df_open() */ 120882d93f58SYi Liu if (!smp_load_acquire(&df->access_granted)) 120982d93f58SYi Liu return -EINVAL; 121082d93f58SYi Liu 12110f3e72b5SJason Gunthorpe if (unlikely(!device->ops->mmap)) 12120f3e72b5SJason Gunthorpe return -EINVAL; 12130f3e72b5SJason Gunthorpe 12140f3e72b5SJason Gunthorpe return device->ops->mmap(device, vma); 12150f3e72b5SJason Gunthorpe } 12160f3e72b5SJason Gunthorpe 12179eefba80SYi Liu const struct file_operations vfio_device_fops = { 12180f3e72b5SJason Gunthorpe .owner = THIS_MODULE, 12190f3e72b5SJason Gunthorpe .release = vfio_device_fops_release, 12200f3e72b5SJason Gunthorpe .read = vfio_device_fops_read, 12210f3e72b5SJason Gunthorpe .write = vfio_device_fops_write, 12220f3e72b5SJason Gunthorpe .unlocked_ioctl = vfio_device_fops_unl_ioctl, 12230f3e72b5SJason Gunthorpe .compat_ioctl = compat_ptr_ioctl, 12240f3e72b5SJason Gunthorpe .mmap = vfio_device_fops_mmap, 12250f3e72b5SJason Gunthorpe }; 12260f3e72b5SJason Gunthorpe 122734aeeecdSYi Liu static struct vfio_device *vfio_device_from_file(struct file *file) 122834aeeecdSYi Liu { 122934aeeecdSYi Liu struct vfio_device_file *df = file->private_data; 123034aeeecdSYi Liu 123134aeeecdSYi Liu if (file->f_op != &vfio_device_fops) 123234aeeecdSYi Liu return NULL; 123334aeeecdSYi Liu return df->device; 123434aeeecdSYi Liu } 123534aeeecdSYi Liu 1236b1a59be8SYi Liu /** 1237b1a59be8SYi Liu * vfio_file_is_valid - True if the file is valid vfio file 1238b1a59be8SYi Liu * @file: VFIO group file or VFIO device file 1239b1a59be8SYi Liu */ 1240b1a59be8SYi Liu bool vfio_file_is_valid(struct file *file) 1241b1a59be8SYi Liu { 124234aeeecdSYi Liu return vfio_group_from_file(file) || 124334aeeecdSYi Liu vfio_device_from_file(file); 1244b1a59be8SYi Liu } 1245b1a59be8SYi Liu EXPORT_SYMBOL_GPL(vfio_file_is_valid); 1246b1a59be8SYi Liu 1247b1a59be8SYi Liu /** 1248b1a59be8SYi Liu * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 1249b1a59be8SYi Liu * is always CPU cache coherent 1250b1a59be8SYi Liu * @file: VFIO group file or VFIO device file 1251b1a59be8SYi Liu * 1252b1a59be8SYi Liu * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 1253b1a59be8SYi Liu * bit in DMA transactions. A return of false indicates that the user has 1254b1a59be8SYi Liu * rights to access additional instructions such as wbinvd on x86. 1255b1a59be8SYi Liu */ 1256b1a59be8SYi Liu bool vfio_file_enforced_coherent(struct file *file) 1257b1a59be8SYi Liu { 125834aeeecdSYi Liu struct vfio_device *device; 1259b1a59be8SYi Liu struct vfio_group *group; 1260b1a59be8SYi Liu 1261b1a59be8SYi Liu group = vfio_group_from_file(file); 1262b1a59be8SYi Liu if (group) 1263b1a59be8SYi Liu return vfio_group_enforced_coherent(group); 1264b1a59be8SYi Liu 126534aeeecdSYi Liu device = vfio_device_from_file(file); 126634aeeecdSYi Liu if (device) 126734aeeecdSYi Liu return device_iommu_capable(device->dev, 126834aeeecdSYi Liu IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 126934aeeecdSYi Liu 1270b1a59be8SYi Liu return true; 1271b1a59be8SYi Liu } 1272b1a59be8SYi Liu EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 1273b1a59be8SYi Liu 127434aeeecdSYi Liu static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm) 127534aeeecdSYi Liu { 127634aeeecdSYi Liu struct vfio_device_file *df = file->private_data; 127734aeeecdSYi Liu 127834aeeecdSYi Liu /* 127934aeeecdSYi Liu * The kvm is first recorded in the vfio_device_file, and will 128034aeeecdSYi Liu * be propagated to vfio_device::kvm when the file is bound to 128134aeeecdSYi Liu * iommufd successfully in the vfio device cdev path. 128234aeeecdSYi Liu */ 128334aeeecdSYi Liu spin_lock(&df->kvm_ref_lock); 128434aeeecdSYi Liu df->kvm = kvm; 128534aeeecdSYi Liu spin_unlock(&df->kvm_ref_lock); 128634aeeecdSYi Liu } 128734aeeecdSYi Liu 1288b1a59be8SYi Liu /** 1289b1a59be8SYi Liu * vfio_file_set_kvm - Link a kvm with VFIO drivers 1290b1a59be8SYi Liu * @file: VFIO group file or VFIO device file 1291b1a59be8SYi Liu * @kvm: KVM to link 1292b1a59be8SYi Liu * 1293b1a59be8SYi Liu * When a VFIO device is first opened the KVM will be available in 1294b1a59be8SYi Liu * device->kvm if one was associated with the file. 1295b1a59be8SYi Liu */ 1296b1a59be8SYi Liu void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 1297b1a59be8SYi Liu { 1298b1a59be8SYi Liu struct vfio_group *group; 1299b1a59be8SYi Liu 1300b1a59be8SYi Liu group = vfio_group_from_file(file); 1301b1a59be8SYi Liu if (group) 1302b1a59be8SYi Liu vfio_group_set_kvm(group, kvm); 130334aeeecdSYi Liu 130434aeeecdSYi Liu if (vfio_device_from_file(file)) 130534aeeecdSYi Liu vfio_device_file_set_kvm(file, kvm); 1306b1a59be8SYi Liu } 1307b1a59be8SYi Liu EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 1308b1a59be8SYi Liu 13090f3e72b5SJason Gunthorpe /* 13100f3e72b5SJason Gunthorpe * Sub-module support 13110f3e72b5SJason Gunthorpe */ 13120f3e72b5SJason Gunthorpe /* 13130f3e72b5SJason Gunthorpe * Helper for managing a buffer of info chain capabilities, allocate or 13140f3e72b5SJason Gunthorpe * reallocate a buffer with additional @size, filling in @id and @version 13150f3e72b5SJason Gunthorpe * of the capability. A pointer to the new capability is returned. 13160f3e72b5SJason Gunthorpe * 13170f3e72b5SJason Gunthorpe * NB. The chain is based at the head of the buffer, so new entries are 13180f3e72b5SJason Gunthorpe * added to the tail, vfio_info_cap_shift() should be called to fixup the 13190f3e72b5SJason Gunthorpe * next offsets prior to copying to the user buffer. 13200f3e72b5SJason Gunthorpe */ 13210f3e72b5SJason Gunthorpe struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 13220f3e72b5SJason Gunthorpe size_t size, u16 id, u16 version) 13230f3e72b5SJason Gunthorpe { 13240f3e72b5SJason Gunthorpe void *buf; 13250f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header, *tmp; 13260f3e72b5SJason Gunthorpe 13270f3e72b5SJason Gunthorpe buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 13280f3e72b5SJason Gunthorpe if (!buf) { 13290f3e72b5SJason Gunthorpe kfree(caps->buf); 13300f3e72b5SJason Gunthorpe caps->buf = NULL; 13310f3e72b5SJason Gunthorpe caps->size = 0; 13320f3e72b5SJason Gunthorpe return ERR_PTR(-ENOMEM); 13330f3e72b5SJason Gunthorpe } 13340f3e72b5SJason Gunthorpe 13350f3e72b5SJason Gunthorpe caps->buf = buf; 13360f3e72b5SJason Gunthorpe header = buf + caps->size; 13370f3e72b5SJason Gunthorpe 13380f3e72b5SJason Gunthorpe /* Eventually copied to user buffer, zero */ 13390f3e72b5SJason Gunthorpe memset(header, 0, size); 13400f3e72b5SJason Gunthorpe 13410f3e72b5SJason Gunthorpe header->id = id; 13420f3e72b5SJason Gunthorpe header->version = version; 13430f3e72b5SJason Gunthorpe 13440f3e72b5SJason Gunthorpe /* Add to the end of the capability chain */ 13450f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next) 13460f3e72b5SJason Gunthorpe ; /* nothing */ 13470f3e72b5SJason Gunthorpe 13480f3e72b5SJason Gunthorpe tmp->next = caps->size; 13490f3e72b5SJason Gunthorpe caps->size += size; 13500f3e72b5SJason Gunthorpe 13510f3e72b5SJason Gunthorpe return header; 13520f3e72b5SJason Gunthorpe } 13530f3e72b5SJason Gunthorpe EXPORT_SYMBOL_GPL(vfio_info_cap_add); 13540f3e72b5SJason Gunthorpe 13550f3e72b5SJason Gunthorpe void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 13560f3e72b5SJason Gunthorpe { 13570f3e72b5SJason Gunthorpe struct vfio_info_cap_header *tmp; 13580f3e72b5SJason Gunthorpe void *buf = (void *)caps->buf; 13590f3e72b5SJason Gunthorpe 13600f3e72b5SJason Gunthorpe for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 13610f3e72b5SJason Gunthorpe tmp->next += offset; 13620f3e72b5SJason Gunthorpe } 13630f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_cap_shift); 13640f3e72b5SJason Gunthorpe 13650f3e72b5SJason Gunthorpe int vfio_info_add_capability(struct vfio_info_cap *caps, 13660f3e72b5SJason Gunthorpe struct vfio_info_cap_header *cap, size_t size) 13670f3e72b5SJason Gunthorpe { 13680f3e72b5SJason Gunthorpe struct vfio_info_cap_header *header; 13690f3e72b5SJason Gunthorpe 13700f3e72b5SJason Gunthorpe header = vfio_info_cap_add(caps, size, cap->id, cap->version); 13710f3e72b5SJason Gunthorpe if (IS_ERR(header)) 13720f3e72b5SJason Gunthorpe return PTR_ERR(header); 13730f3e72b5SJason Gunthorpe 13740f3e72b5SJason Gunthorpe memcpy(header + 1, cap + 1, size - sizeof(*header)); 13750f3e72b5SJason Gunthorpe 13760f3e72b5SJason Gunthorpe return 0; 13770f3e72b5SJason Gunthorpe } 13780f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_info_add_capability); 13790f3e72b5SJason Gunthorpe 13800f3e72b5SJason Gunthorpe int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 13810f3e72b5SJason Gunthorpe int max_irq_type, size_t *data_size) 13820f3e72b5SJason Gunthorpe { 13830f3e72b5SJason Gunthorpe unsigned long minsz; 13840f3e72b5SJason Gunthorpe size_t size; 13850f3e72b5SJason Gunthorpe 13860f3e72b5SJason Gunthorpe minsz = offsetofend(struct vfio_irq_set, count); 13870f3e72b5SJason Gunthorpe 13880f3e72b5SJason Gunthorpe if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 13890f3e72b5SJason Gunthorpe (hdr->count >= (U32_MAX - hdr->start)) || 13900f3e72b5SJason Gunthorpe (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 13910f3e72b5SJason Gunthorpe VFIO_IRQ_SET_ACTION_TYPE_MASK))) 13920f3e72b5SJason Gunthorpe return -EINVAL; 13930f3e72b5SJason Gunthorpe 13940f3e72b5SJason Gunthorpe if (data_size) 13950f3e72b5SJason Gunthorpe *data_size = 0; 13960f3e72b5SJason Gunthorpe 13970f3e72b5SJason Gunthorpe if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 13980f3e72b5SJason Gunthorpe return -EINVAL; 13990f3e72b5SJason Gunthorpe 14000f3e72b5SJason Gunthorpe switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 14010f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_NONE: 14020f3e72b5SJason Gunthorpe size = 0; 14030f3e72b5SJason Gunthorpe break; 14040f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_BOOL: 14050f3e72b5SJason Gunthorpe size = sizeof(uint8_t); 14060f3e72b5SJason Gunthorpe break; 14070f3e72b5SJason Gunthorpe case VFIO_IRQ_SET_DATA_EVENTFD: 14080f3e72b5SJason Gunthorpe size = sizeof(int32_t); 14090f3e72b5SJason Gunthorpe break; 14100f3e72b5SJason Gunthorpe default: 14110f3e72b5SJason Gunthorpe return -EINVAL; 14120f3e72b5SJason Gunthorpe } 14130f3e72b5SJason Gunthorpe 14140f3e72b5SJason Gunthorpe if (size) { 14150f3e72b5SJason Gunthorpe if (hdr->argsz - minsz < hdr->count * size) 14160f3e72b5SJason Gunthorpe return -EINVAL; 14170f3e72b5SJason Gunthorpe 14180f3e72b5SJason Gunthorpe if (!data_size) 14190f3e72b5SJason Gunthorpe return -EINVAL; 14200f3e72b5SJason Gunthorpe 14210f3e72b5SJason Gunthorpe *data_size = hdr->count * size; 14220f3e72b5SJason Gunthorpe } 14230f3e72b5SJason Gunthorpe 14240f3e72b5SJason Gunthorpe return 0; 14250f3e72b5SJason Gunthorpe } 14260f3e72b5SJason Gunthorpe EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 14270f3e72b5SJason Gunthorpe 14280f3e72b5SJason Gunthorpe /* 14294741f2e9SJason Gunthorpe * Pin contiguous user pages and return their associated host pages for local 14304741f2e9SJason Gunthorpe * domain only. 14314741f2e9SJason Gunthorpe * @device [in] : device 14324741f2e9SJason Gunthorpe * @iova [in] : starting IOVA of user pages to be pinned. 14334741f2e9SJason Gunthorpe * @npage [in] : count of pages to be pinned. This count should not 14344741f2e9SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 14354741f2e9SJason Gunthorpe * @prot [in] : protection flags 14364741f2e9SJason Gunthorpe * @pages[out] : array of host pages 14374741f2e9SJason Gunthorpe * Return error or number of pages pinned. 14384741f2e9SJason Gunthorpe * 14394741f2e9SJason Gunthorpe * A driver may only call this function if the vfio_device was created 14408da7a0e7SYi Liu * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). 14414741f2e9SJason Gunthorpe */ 14424741f2e9SJason Gunthorpe int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 14434741f2e9SJason Gunthorpe int npage, int prot, struct page **pages) 14444741f2e9SJason Gunthorpe { 14454741f2e9SJason Gunthorpe /* group->container cannot change while a vfio device is open */ 14464741f2e9SJason Gunthorpe if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) 14474741f2e9SJason Gunthorpe return -EINVAL; 14488da7a0e7SYi Liu if (vfio_device_has_container(device)) 14498da7a0e7SYi Liu return vfio_device_container_pin_pages(device, iova, 14508da7a0e7SYi Liu npage, prot, pages); 14514741f2e9SJason Gunthorpe if (device->iommufd_access) { 14524741f2e9SJason Gunthorpe int ret; 14534741f2e9SJason Gunthorpe 14544741f2e9SJason Gunthorpe if (iova > ULONG_MAX) 14554741f2e9SJason Gunthorpe return -EINVAL; 14564741f2e9SJason Gunthorpe /* 14574741f2e9SJason Gunthorpe * VFIO ignores the sub page offset, npages is from the start of 14584741f2e9SJason Gunthorpe * a PAGE_SIZE chunk of IOVA. The caller is expected to recover 14594741f2e9SJason Gunthorpe * the sub page offset by doing: 14604741f2e9SJason Gunthorpe * pages[0] + (iova % PAGE_SIZE) 14614741f2e9SJason Gunthorpe */ 14624741f2e9SJason Gunthorpe ret = iommufd_access_pin_pages( 14634741f2e9SJason Gunthorpe device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), 14644741f2e9SJason Gunthorpe npage * PAGE_SIZE, pages, 14654741f2e9SJason Gunthorpe (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); 14664741f2e9SJason Gunthorpe if (ret) 14674741f2e9SJason Gunthorpe return ret; 14684741f2e9SJason Gunthorpe return npage; 14694741f2e9SJason Gunthorpe } 14704741f2e9SJason Gunthorpe return -EINVAL; 14714741f2e9SJason Gunthorpe } 14724741f2e9SJason Gunthorpe EXPORT_SYMBOL(vfio_pin_pages); 14734741f2e9SJason Gunthorpe 14744741f2e9SJason Gunthorpe /* 14754741f2e9SJason Gunthorpe * Unpin contiguous host pages for local domain only. 14764741f2e9SJason Gunthorpe * @device [in] : device 14774741f2e9SJason Gunthorpe * @iova [in] : starting address of user pages to be unpinned. 14784741f2e9SJason Gunthorpe * @npage [in] : count of pages to be unpinned. This count should not 14794741f2e9SJason Gunthorpe * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 14804741f2e9SJason Gunthorpe */ 14814741f2e9SJason Gunthorpe void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 14824741f2e9SJason Gunthorpe { 14834741f2e9SJason Gunthorpe if (WARN_ON(!vfio_assert_device_open(device))) 14844741f2e9SJason Gunthorpe return; 14854741f2e9SJason Gunthorpe 14868da7a0e7SYi Liu if (vfio_device_has_container(device)) { 14878da7a0e7SYi Liu vfio_device_container_unpin_pages(device, iova, npage); 14884741f2e9SJason Gunthorpe return; 14894741f2e9SJason Gunthorpe } 14904741f2e9SJason Gunthorpe if (device->iommufd_access) { 14914741f2e9SJason Gunthorpe if (WARN_ON(iova > ULONG_MAX)) 14924741f2e9SJason Gunthorpe return; 14934741f2e9SJason Gunthorpe iommufd_access_unpin_pages(device->iommufd_access, 14944741f2e9SJason Gunthorpe ALIGN_DOWN(iova, PAGE_SIZE), 14954741f2e9SJason Gunthorpe npage * PAGE_SIZE); 14964741f2e9SJason Gunthorpe return; 14974741f2e9SJason Gunthorpe } 14984741f2e9SJason Gunthorpe } 14994741f2e9SJason Gunthorpe EXPORT_SYMBOL(vfio_unpin_pages); 15004741f2e9SJason Gunthorpe 15014741f2e9SJason Gunthorpe /* 15024741f2e9SJason Gunthorpe * This interface allows the CPUs to perform some sort of virtual DMA on 15034741f2e9SJason Gunthorpe * behalf of the device. 15044741f2e9SJason Gunthorpe * 15054741f2e9SJason Gunthorpe * CPUs read/write from/into a range of IOVAs pointing to user space memory 15064741f2e9SJason Gunthorpe * into/from a kernel buffer. 15074741f2e9SJason Gunthorpe * 15084741f2e9SJason Gunthorpe * As the read/write of user space memory is conducted via the CPUs and is 15094741f2e9SJason Gunthorpe * not a real device DMA, it is not necessary to pin the user space memory. 15104741f2e9SJason Gunthorpe * 15114741f2e9SJason Gunthorpe * @device [in] : VFIO device 15124741f2e9SJason Gunthorpe * @iova [in] : base IOVA of a user space buffer 15134741f2e9SJason Gunthorpe * @data [in] : pointer to kernel buffer 15144741f2e9SJason Gunthorpe * @len [in] : kernel buffer length 15154741f2e9SJason Gunthorpe * @write : indicate read or write 15164741f2e9SJason Gunthorpe * Return error code on failure or 0 on success. 15174741f2e9SJason Gunthorpe */ 15184741f2e9SJason Gunthorpe int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 15194741f2e9SJason Gunthorpe size_t len, bool write) 15204741f2e9SJason Gunthorpe { 15214741f2e9SJason Gunthorpe if (!data || len <= 0 || !vfio_assert_device_open(device)) 15224741f2e9SJason Gunthorpe return -EINVAL; 15234741f2e9SJason Gunthorpe 15248da7a0e7SYi Liu if (vfio_device_has_container(device)) 15258da7a0e7SYi Liu return vfio_device_container_dma_rw(device, iova, 15264741f2e9SJason Gunthorpe data, len, write); 15274741f2e9SJason Gunthorpe 15284741f2e9SJason Gunthorpe if (device->iommufd_access) { 15294741f2e9SJason Gunthorpe unsigned int flags = 0; 15304741f2e9SJason Gunthorpe 15314741f2e9SJason Gunthorpe if (iova > ULONG_MAX) 15324741f2e9SJason Gunthorpe return -EINVAL; 15334741f2e9SJason Gunthorpe 15344741f2e9SJason Gunthorpe /* VFIO historically tries to auto-detect a kthread */ 15354741f2e9SJason Gunthorpe if (!current->mm) 15364741f2e9SJason Gunthorpe flags |= IOMMUFD_ACCESS_RW_KTHREAD; 15374741f2e9SJason Gunthorpe if (write) 15384741f2e9SJason Gunthorpe flags |= IOMMUFD_ACCESS_RW_WRITE; 15394741f2e9SJason Gunthorpe return iommufd_access_rw(device->iommufd_access, iova, data, 15404741f2e9SJason Gunthorpe len, flags); 15414741f2e9SJason Gunthorpe } 15424741f2e9SJason Gunthorpe return -EINVAL; 15434741f2e9SJason Gunthorpe } 15444741f2e9SJason Gunthorpe EXPORT_SYMBOL(vfio_dma_rw); 15454741f2e9SJason Gunthorpe 15464741f2e9SJason Gunthorpe /* 15470f3e72b5SJason Gunthorpe * Module/class support 15480f3e72b5SJason Gunthorpe */ 15491334e47eSYi Liu static int __init vfio_init(void) 15501334e47eSYi Liu { 15511334e47eSYi Liu int ret; 15521334e47eSYi Liu 15531334e47eSYi Liu ida_init(&vfio.device_ida); 15541334e47eSYi Liu 15551334e47eSYi Liu ret = vfio_group_init(); 15561334e47eSYi Liu if (ret) 15571334e47eSYi Liu return ret; 15581334e47eSYi Liu 1559e2d55709SJason Gunthorpe ret = vfio_virqfd_init(); 1560e2d55709SJason Gunthorpe if (ret) 1561e2d55709SJason Gunthorpe goto err_virqfd; 1562e2d55709SJason Gunthorpe 15631334e47eSYi Liu /* /sys/class/vfio-dev/vfioX */ 15641aaba11dSGreg Kroah-Hartman vfio.device_class = class_create("vfio-dev"); 15651334e47eSYi Liu if (IS_ERR(vfio.device_class)) { 15661334e47eSYi Liu ret = PTR_ERR(vfio.device_class); 15671334e47eSYi Liu goto err_dev_class; 15681334e47eSYi Liu } 15691334e47eSYi Liu 15701334e47eSYi Liu pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 15711334e47eSYi Liu return 0; 15721334e47eSYi Liu 15731334e47eSYi Liu err_dev_class: 1574e2d55709SJason Gunthorpe vfio_virqfd_exit(); 1575e2d55709SJason Gunthorpe err_virqfd: 15761334e47eSYi Liu vfio_group_cleanup(); 15771334e47eSYi Liu return ret; 15781334e47eSYi Liu } 15791334e47eSYi Liu 15801334e47eSYi Liu static void __exit vfio_cleanup(void) 15811334e47eSYi Liu { 15821334e47eSYi Liu ida_destroy(&vfio.device_ida); 15833c28a761SYi Liu class_destroy(vfio.device_class); 15843c28a761SYi Liu vfio.device_class = NULL; 1585e2d55709SJason Gunthorpe vfio_virqfd_exit(); 15861334e47eSYi Liu vfio_group_cleanup(); 15870f3e72b5SJason Gunthorpe xa_destroy(&vfio_device_set_xa); 15880f3e72b5SJason Gunthorpe } 15890f3e72b5SJason Gunthorpe 15900f3e72b5SJason Gunthorpe module_init(vfio_init); 15910f3e72b5SJason Gunthorpe module_exit(vfio_cleanup); 15920f3e72b5SJason Gunthorpe 15930f3e72b5SJason Gunthorpe MODULE_VERSION(DRIVER_VERSION); 15940f3e72b5SJason Gunthorpe MODULE_LICENSE("GPL v2"); 15950f3e72b5SJason Gunthorpe MODULE_AUTHOR(DRIVER_AUTHOR); 15960f3e72b5SJason Gunthorpe MODULE_DESCRIPTION(DRIVER_DESC); 15970f3e72b5SJason Gunthorpe MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 1598