1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VFIO core 4 * 5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 * Author: Alex Williamson <alex.williamson@redhat.com> 7 * 8 * Derived from original vfio: 9 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 * Author: Tom Lyon, pugs@cisco.com 11 */ 12 13 #include <linux/cdev.h> 14 #include <linux/compat.h> 15 #include <linux/device.h> 16 #include <linux/fs.h> 17 #include <linux/idr.h> 18 #include <linux/iommu.h> 19 #include <linux/list.h> 20 #include <linux/miscdevice.h> 21 #include <linux/module.h> 22 #include <linux/mutex.h> 23 #include <linux/pci.h> 24 #include <linux/rwsem.h> 25 #include <linux/sched.h> 26 #include <linux/slab.h> 27 #include <linux/stat.h> 28 #include <linux/string.h> 29 #include <linux/uaccess.h> 30 #include <linux/vfio.h> 31 #include <linux/wait.h> 32 #include <linux/sched/signal.h> 33 #include <linux/pm_runtime.h> 34 #include <linux/interval_tree.h> 35 #include <linux/iova_bitmap.h> 36 #include <linux/iommufd.h> 37 #include "vfio.h" 38 39 #define DRIVER_VERSION "0.3" 40 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 41 #define DRIVER_DESC "VFIO - User Level meta-driver" 42 43 static struct vfio { 44 struct class *device_class; 45 struct ida device_ida; 46 } vfio; 47 48 #ifdef CONFIG_VFIO_NOIOMMU 49 bool vfio_noiommu __read_mostly; 50 module_param_named(enable_unsafe_noiommu_mode, 51 vfio_noiommu, bool, S_IRUGO | S_IWUSR); 52 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 53 #endif 54 55 static DEFINE_XARRAY(vfio_device_set_xa); 56 57 int vfio_assign_device_set(struct vfio_device *device, void *set_id) 58 { 59 unsigned long idx = (unsigned long)set_id; 60 struct vfio_device_set *new_dev_set; 61 struct vfio_device_set *dev_set; 62 63 if (WARN_ON(!set_id)) 64 return -EINVAL; 65 66 /* 67 * Atomically acquire a singleton object in the xarray for this set_id 68 */ 69 xa_lock(&vfio_device_set_xa); 70 dev_set = xa_load(&vfio_device_set_xa, idx); 71 if (dev_set) 72 goto found_get_ref; 73 xa_unlock(&vfio_device_set_xa); 74 75 new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 76 if (!new_dev_set) 77 return -ENOMEM; 78 mutex_init(&new_dev_set->lock); 79 INIT_LIST_HEAD(&new_dev_set->device_list); 80 new_dev_set->set_id = set_id; 81 82 xa_lock(&vfio_device_set_xa); 83 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 84 GFP_KERNEL); 85 if (!dev_set) { 86 dev_set = new_dev_set; 87 goto found_get_ref; 88 } 89 90 kfree(new_dev_set); 91 if (xa_is_err(dev_set)) { 92 xa_unlock(&vfio_device_set_xa); 93 return xa_err(dev_set); 94 } 95 96 found_get_ref: 97 dev_set->device_count++; 98 xa_unlock(&vfio_device_set_xa); 99 mutex_lock(&dev_set->lock); 100 device->dev_set = dev_set; 101 list_add_tail(&device->dev_set_list, &dev_set->device_list); 102 mutex_unlock(&dev_set->lock); 103 return 0; 104 } 105 EXPORT_SYMBOL_GPL(vfio_assign_device_set); 106 107 static void vfio_release_device_set(struct vfio_device *device) 108 { 109 struct vfio_device_set *dev_set = device->dev_set; 110 111 if (!dev_set) 112 return; 113 114 mutex_lock(&dev_set->lock); 115 list_del(&device->dev_set_list); 116 mutex_unlock(&dev_set->lock); 117 118 xa_lock(&vfio_device_set_xa); 119 if (!--dev_set->device_count) { 120 __xa_erase(&vfio_device_set_xa, 121 (unsigned long)dev_set->set_id); 122 mutex_destroy(&dev_set->lock); 123 kfree(dev_set); 124 } 125 xa_unlock(&vfio_device_set_xa); 126 } 127 128 unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) 129 { 130 struct vfio_device *cur; 131 unsigned int open_count = 0; 132 133 lockdep_assert_held(&dev_set->lock); 134 135 list_for_each_entry(cur, &dev_set->device_list, dev_set_list) 136 open_count += cur->open_count; 137 return open_count; 138 } 139 EXPORT_SYMBOL_GPL(vfio_device_set_open_count); 140 141 /* 142 * Device objects - create, release, get, put, search 143 */ 144 /* Device reference always implies a group reference */ 145 void vfio_device_put_registration(struct vfio_device *device) 146 { 147 if (refcount_dec_and_test(&device->refcount)) 148 complete(&device->comp); 149 } 150 151 bool vfio_device_try_get_registration(struct vfio_device *device) 152 { 153 return refcount_inc_not_zero(&device->refcount); 154 } 155 156 /* 157 * VFIO driver API 158 */ 159 /* Release helper called by vfio_put_device() */ 160 static void vfio_device_release(struct device *dev) 161 { 162 struct vfio_device *device = 163 container_of(dev, struct vfio_device, device); 164 165 vfio_release_device_set(device); 166 ida_free(&vfio.device_ida, device->index); 167 168 if (device->ops->release) 169 device->ops->release(device); 170 171 kvfree(device); 172 } 173 174 static int vfio_init_device(struct vfio_device *device, struct device *dev, 175 const struct vfio_device_ops *ops); 176 177 /* 178 * Allocate and initialize vfio_device so it can be registered to vfio 179 * core. 180 * 181 * Drivers should use the wrapper vfio_alloc_device() for allocation. 182 * @size is the size of the structure to be allocated, including any 183 * private data used by the driver. 184 * 185 * Driver may provide an @init callback to cover device private data. 186 * 187 * Use vfio_put_device() to release the structure after success return. 188 */ 189 struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, 190 const struct vfio_device_ops *ops) 191 { 192 struct vfio_device *device; 193 int ret; 194 195 if (WARN_ON(size < sizeof(struct vfio_device))) 196 return ERR_PTR(-EINVAL); 197 198 device = kvzalloc(size, GFP_KERNEL); 199 if (!device) 200 return ERR_PTR(-ENOMEM); 201 202 ret = vfio_init_device(device, dev, ops); 203 if (ret) 204 goto out_free; 205 return device; 206 207 out_free: 208 kvfree(device); 209 return ERR_PTR(ret); 210 } 211 EXPORT_SYMBOL_GPL(_vfio_alloc_device); 212 213 /* 214 * Initialize a vfio_device so it can be registered to vfio core. 215 */ 216 static int vfio_init_device(struct vfio_device *device, struct device *dev, 217 const struct vfio_device_ops *ops) 218 { 219 int ret; 220 221 ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); 222 if (ret < 0) { 223 dev_dbg(dev, "Error to alloc index\n"); 224 return ret; 225 } 226 227 device->index = ret; 228 init_completion(&device->comp); 229 device->dev = dev; 230 device->ops = ops; 231 232 if (ops->init) { 233 ret = ops->init(device); 234 if (ret) 235 goto out_uninit; 236 } 237 238 device_initialize(&device->device); 239 device->device.release = vfio_device_release; 240 device->device.class = vfio.device_class; 241 device->device.parent = device->dev; 242 return 0; 243 244 out_uninit: 245 vfio_release_device_set(device); 246 ida_free(&vfio.device_ida, device->index); 247 return ret; 248 } 249 250 static int __vfio_register_dev(struct vfio_device *device, 251 enum vfio_group_type type) 252 { 253 int ret; 254 255 if (WARN_ON(device->ops->bind_iommufd && 256 (!device->ops->unbind_iommufd || 257 !device->ops->attach_ioas))) 258 return -EINVAL; 259 260 /* 261 * If the driver doesn't specify a set then the device is added to a 262 * singleton set just for itself. 263 */ 264 if (!device->dev_set) 265 vfio_assign_device_set(device, device); 266 267 ret = dev_set_name(&device->device, "vfio%d", device->index); 268 if (ret) 269 return ret; 270 271 ret = vfio_device_set_group(device, type); 272 if (ret) 273 return ret; 274 275 ret = device_add(&device->device); 276 if (ret) 277 goto err_out; 278 279 /* Refcounting can't start until the driver calls register */ 280 refcount_set(&device->refcount, 1); 281 282 vfio_device_group_register(device); 283 284 return 0; 285 err_out: 286 vfio_device_remove_group(device); 287 return ret; 288 } 289 290 int vfio_register_group_dev(struct vfio_device *device) 291 { 292 return __vfio_register_dev(device, VFIO_IOMMU); 293 } 294 EXPORT_SYMBOL_GPL(vfio_register_group_dev); 295 296 /* 297 * Register a virtual device without IOMMU backing. The user of this 298 * device must not be able to directly trigger unmediated DMA. 299 */ 300 int vfio_register_emulated_iommu_dev(struct vfio_device *device) 301 { 302 return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); 303 } 304 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 305 306 /* 307 * Decrement the device reference count and wait for the device to be 308 * removed. Open file descriptors for the device... */ 309 void vfio_unregister_group_dev(struct vfio_device *device) 310 { 311 unsigned int i = 0; 312 bool interrupted = false; 313 long rc; 314 315 vfio_device_put_registration(device); 316 rc = try_wait_for_completion(&device->comp); 317 while (rc <= 0) { 318 if (device->ops->request) 319 device->ops->request(device, i++); 320 321 if (interrupted) { 322 rc = wait_for_completion_timeout(&device->comp, 323 HZ * 10); 324 } else { 325 rc = wait_for_completion_interruptible_timeout( 326 &device->comp, HZ * 10); 327 if (rc < 0) { 328 interrupted = true; 329 dev_warn(device->dev, 330 "Device is currently in use, task" 331 " \"%s\" (%d) " 332 "blocked until device is released", 333 current->comm, task_pid_nr(current)); 334 } 335 } 336 } 337 338 vfio_device_group_unregister(device); 339 340 /* Balances device_add in register path */ 341 device_del(&device->device); 342 343 /* Balances vfio_device_set_group in register path */ 344 vfio_device_remove_group(device); 345 } 346 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 347 348 /* true if the vfio_device has open_device() called but not close_device() */ 349 static bool vfio_assert_device_open(struct vfio_device *device) 350 { 351 return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 352 } 353 354 static int vfio_device_first_open(struct vfio_device *device, 355 struct iommufd_ctx *iommufd, struct kvm *kvm) 356 { 357 int ret; 358 359 lockdep_assert_held(&device->dev_set->lock); 360 361 if (!try_module_get(device->dev->driver->owner)) 362 return -ENODEV; 363 364 if (iommufd) 365 ret = vfio_iommufd_bind(device, iommufd); 366 else 367 ret = vfio_device_group_use_iommu(device); 368 if (ret) 369 goto err_module_put; 370 371 device->kvm = kvm; 372 if (device->ops->open_device) { 373 ret = device->ops->open_device(device); 374 if (ret) 375 goto err_unuse_iommu; 376 } 377 return 0; 378 379 err_unuse_iommu: 380 device->kvm = NULL; 381 if (iommufd) 382 vfio_iommufd_unbind(device); 383 else 384 vfio_device_group_unuse_iommu(device); 385 err_module_put: 386 module_put(device->dev->driver->owner); 387 return ret; 388 } 389 390 static void vfio_device_last_close(struct vfio_device *device, 391 struct iommufd_ctx *iommufd) 392 { 393 lockdep_assert_held(&device->dev_set->lock); 394 395 if (device->ops->close_device) 396 device->ops->close_device(device); 397 device->kvm = NULL; 398 if (iommufd) 399 vfio_iommufd_unbind(device); 400 else 401 vfio_device_group_unuse_iommu(device); 402 module_put(device->dev->driver->owner); 403 } 404 405 int vfio_device_open(struct vfio_device *device, 406 struct iommufd_ctx *iommufd, struct kvm *kvm) 407 { 408 int ret = 0; 409 410 mutex_lock(&device->dev_set->lock); 411 device->open_count++; 412 if (device->open_count == 1) { 413 ret = vfio_device_first_open(device, iommufd, kvm); 414 if (ret) 415 device->open_count--; 416 } 417 mutex_unlock(&device->dev_set->lock); 418 419 return ret; 420 } 421 422 void vfio_device_close(struct vfio_device *device, 423 struct iommufd_ctx *iommufd) 424 { 425 mutex_lock(&device->dev_set->lock); 426 vfio_assert_device_open(device); 427 if (device->open_count == 1) 428 vfio_device_last_close(device, iommufd); 429 device->open_count--; 430 mutex_unlock(&device->dev_set->lock); 431 } 432 433 /* 434 * Wrapper around pm_runtime_resume_and_get(). 435 * Return error code on failure or 0 on success. 436 */ 437 static inline int vfio_device_pm_runtime_get(struct vfio_device *device) 438 { 439 struct device *dev = device->dev; 440 441 if (dev->driver && dev->driver->pm) { 442 int ret; 443 444 ret = pm_runtime_resume_and_get(dev); 445 if (ret) { 446 dev_info_ratelimited(dev, 447 "vfio: runtime resume failed %d\n", ret); 448 return -EIO; 449 } 450 } 451 452 return 0; 453 } 454 455 /* 456 * Wrapper around pm_runtime_put(). 457 */ 458 static inline void vfio_device_pm_runtime_put(struct vfio_device *device) 459 { 460 struct device *dev = device->dev; 461 462 if (dev->driver && dev->driver->pm) 463 pm_runtime_put(dev); 464 } 465 466 /* 467 * VFIO Device fd 468 */ 469 static int vfio_device_fops_release(struct inode *inode, struct file *filep) 470 { 471 struct vfio_device *device = filep->private_data; 472 473 vfio_device_group_close(device); 474 475 vfio_device_put_registration(device); 476 477 return 0; 478 } 479 480 /* 481 * vfio_mig_get_next_state - Compute the next step in the FSM 482 * @cur_fsm - The current state the device is in 483 * @new_fsm - The target state to reach 484 * @next_fsm - Pointer to the next step to get to new_fsm 485 * 486 * Return 0 upon success, otherwise -errno 487 * Upon success the next step in the state progression between cur_fsm and 488 * new_fsm will be set in next_fsm. 489 * 490 * This breaks down requests for combination transitions into smaller steps and 491 * returns the next step to get to new_fsm. The function may need to be called 492 * multiple times before reaching new_fsm. 493 * 494 */ 495 int vfio_mig_get_next_state(struct vfio_device *device, 496 enum vfio_device_mig_state cur_fsm, 497 enum vfio_device_mig_state new_fsm, 498 enum vfio_device_mig_state *next_fsm) 499 { 500 enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; 501 /* 502 * The coding in this table requires the driver to implement the 503 * following FSM arcs: 504 * RESUMING -> STOP 505 * STOP -> RESUMING 506 * STOP -> STOP_COPY 507 * STOP_COPY -> STOP 508 * 509 * If P2P is supported then the driver must also implement these FSM 510 * arcs: 511 * RUNNING -> RUNNING_P2P 512 * RUNNING_P2P -> RUNNING 513 * RUNNING_P2P -> STOP 514 * STOP -> RUNNING_P2P 515 * 516 * If precopy is supported then the driver must support these additional 517 * FSM arcs: 518 * RUNNING -> PRE_COPY 519 * PRE_COPY -> RUNNING 520 * PRE_COPY -> STOP_COPY 521 * However, if precopy and P2P are supported together then the driver 522 * must support these additional arcs beyond the P2P arcs above: 523 * PRE_COPY -> RUNNING 524 * PRE_COPY -> PRE_COPY_P2P 525 * PRE_COPY_P2P -> PRE_COPY 526 * PRE_COPY_P2P -> RUNNING_P2P 527 * PRE_COPY_P2P -> STOP_COPY 528 * RUNNING -> PRE_COPY 529 * RUNNING_P2P -> PRE_COPY_P2P 530 * 531 * Without P2P and precopy the driver must implement: 532 * RUNNING -> STOP 533 * STOP -> RUNNING 534 * 535 * The coding will step through multiple states for some combination 536 * transitions; if all optional features are supported, this means the 537 * following ones: 538 * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY 539 * PRE_COPY -> RUNNING -> RUNNING_P2P 540 * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP 541 * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING 542 * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING 543 * PRE_COPY_P2P -> RUNNING_P2P -> STOP 544 * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING 545 * RESUMING -> STOP -> RUNNING_P2P 546 * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P 547 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 548 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY 549 * RESUMING -> STOP -> STOP_COPY 550 * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P 551 * RUNNING -> RUNNING_P2P -> STOP 552 * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 553 * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 554 * RUNNING_P2P -> RUNNING -> PRE_COPY 555 * RUNNING_P2P -> STOP -> RESUMING 556 * RUNNING_P2P -> STOP -> STOP_COPY 557 * STOP -> RUNNING_P2P -> PRE_COPY_P2P 558 * STOP -> RUNNING_P2P -> RUNNING 559 * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY 560 * STOP_COPY -> STOP -> RESUMING 561 * STOP_COPY -> STOP -> RUNNING_P2P 562 * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 563 * 564 * The following transitions are blocked: 565 * STOP_COPY -> PRE_COPY 566 * STOP_COPY -> PRE_COPY_P2P 567 */ 568 static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 569 [VFIO_DEVICE_STATE_STOP] = { 570 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 571 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 572 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 573 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 574 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 575 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 576 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 577 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 578 }, 579 [VFIO_DEVICE_STATE_RUNNING] = { 580 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 581 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 582 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 583 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 584 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 585 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 586 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 587 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 588 }, 589 [VFIO_DEVICE_STATE_PRE_COPY] = { 590 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, 591 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 592 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 593 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 594 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 595 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, 596 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, 597 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 598 }, 599 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { 600 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 601 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 602 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 603 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 604 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 605 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 606 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 607 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 608 }, 609 [VFIO_DEVICE_STATE_STOP_COPY] = { 610 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 611 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 612 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, 613 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, 614 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 615 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 616 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 617 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 618 }, 619 [VFIO_DEVICE_STATE_RESUMING] = { 620 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 621 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 622 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, 623 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, 624 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 625 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 626 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 627 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 628 }, 629 [VFIO_DEVICE_STATE_RUNNING_P2P] = { 630 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 631 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 632 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, 633 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 634 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 635 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 636 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 637 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 638 }, 639 [VFIO_DEVICE_STATE_ERROR] = { 640 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 641 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 642 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, 643 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, 644 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 645 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 646 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 647 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 648 }, 649 }; 650 651 static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 652 [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 653 [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 654 [VFIO_DEVICE_STATE_PRE_COPY] = 655 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, 656 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | 657 VFIO_MIGRATION_P2P | 658 VFIO_MIGRATION_PRE_COPY, 659 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 660 [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 661 [VFIO_DEVICE_STATE_RUNNING_P2P] = 662 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 663 [VFIO_DEVICE_STATE_ERROR] = ~0U, 664 }; 665 666 if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 667 (state_flags_table[cur_fsm] & device->migration_flags) != 668 state_flags_table[cur_fsm])) 669 return -EINVAL; 670 671 if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 672 (state_flags_table[new_fsm] & device->migration_flags) != 673 state_flags_table[new_fsm]) 674 return -EINVAL; 675 676 /* 677 * Arcs touching optional and unsupported states are skipped over. The 678 * driver will instead see an arc from the original state to the next 679 * logical state, as per the above comment. 680 */ 681 *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 682 while ((state_flags_table[*next_fsm] & device->migration_flags) != 683 state_flags_table[*next_fsm]) 684 *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 685 686 return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 687 } 688 EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 689 690 /* 691 * Convert the drivers's struct file into a FD number and return it to userspace 692 */ 693 static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 694 struct vfio_device_feature_mig_state *mig) 695 { 696 int ret; 697 int fd; 698 699 fd = get_unused_fd_flags(O_CLOEXEC); 700 if (fd < 0) { 701 ret = fd; 702 goto out_fput; 703 } 704 705 mig->data_fd = fd; 706 if (copy_to_user(arg, mig, sizeof(*mig))) { 707 ret = -EFAULT; 708 goto out_put_unused; 709 } 710 fd_install(fd, filp); 711 return 0; 712 713 out_put_unused: 714 put_unused_fd(fd); 715 out_fput: 716 fput(filp); 717 return ret; 718 } 719 720 static int 721 vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 722 u32 flags, void __user *arg, 723 size_t argsz) 724 { 725 size_t minsz = 726 offsetofend(struct vfio_device_feature_mig_state, data_fd); 727 struct vfio_device_feature_mig_state mig; 728 struct file *filp = NULL; 729 int ret; 730 731 if (!device->mig_ops) 732 return -ENOTTY; 733 734 ret = vfio_check_feature(flags, argsz, 735 VFIO_DEVICE_FEATURE_SET | 736 VFIO_DEVICE_FEATURE_GET, 737 sizeof(mig)); 738 if (ret != 1) 739 return ret; 740 741 if (copy_from_user(&mig, arg, minsz)) 742 return -EFAULT; 743 744 if (flags & VFIO_DEVICE_FEATURE_GET) { 745 enum vfio_device_mig_state curr_state; 746 747 ret = device->mig_ops->migration_get_state(device, 748 &curr_state); 749 if (ret) 750 return ret; 751 mig.device_state = curr_state; 752 goto out_copy; 753 } 754 755 /* Handle the VFIO_DEVICE_FEATURE_SET */ 756 filp = device->mig_ops->migration_set_state(device, mig.device_state); 757 if (IS_ERR(filp) || !filp) 758 goto out_copy; 759 760 return vfio_ioct_mig_return_fd(filp, arg, &mig); 761 out_copy: 762 mig.data_fd = -1; 763 if (copy_to_user(arg, &mig, sizeof(mig))) 764 return -EFAULT; 765 if (IS_ERR(filp)) 766 return PTR_ERR(filp); 767 return 0; 768 } 769 770 static int 771 vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, 772 u32 flags, void __user *arg, 773 size_t argsz) 774 { 775 struct vfio_device_feature_mig_data_size data_size = {}; 776 unsigned long stop_copy_length; 777 int ret; 778 779 if (!device->mig_ops) 780 return -ENOTTY; 781 782 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 783 sizeof(data_size)); 784 if (ret != 1) 785 return ret; 786 787 ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); 788 if (ret) 789 return ret; 790 791 data_size.stop_copy_length = stop_copy_length; 792 if (copy_to_user(arg, &data_size, sizeof(data_size))) 793 return -EFAULT; 794 795 return 0; 796 } 797 798 static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 799 u32 flags, void __user *arg, 800 size_t argsz) 801 { 802 struct vfio_device_feature_migration mig = { 803 .flags = device->migration_flags, 804 }; 805 int ret; 806 807 if (!device->mig_ops) 808 return -ENOTTY; 809 810 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 811 sizeof(mig)); 812 if (ret != 1) 813 return ret; 814 if (copy_to_user(arg, &mig, sizeof(mig))) 815 return -EFAULT; 816 return 0; 817 } 818 819 /* Ranges should fit into a single kernel page */ 820 #define LOG_MAX_RANGES \ 821 (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) 822 823 static int 824 vfio_ioctl_device_feature_logging_start(struct vfio_device *device, 825 u32 flags, void __user *arg, 826 size_t argsz) 827 { 828 size_t minsz = 829 offsetofend(struct vfio_device_feature_dma_logging_control, 830 ranges); 831 struct vfio_device_feature_dma_logging_range __user *ranges; 832 struct vfio_device_feature_dma_logging_control control; 833 struct vfio_device_feature_dma_logging_range range; 834 struct rb_root_cached root = RB_ROOT_CACHED; 835 struct interval_tree_node *nodes; 836 u64 iova_end; 837 u32 nnodes; 838 int i, ret; 839 840 if (!device->log_ops) 841 return -ENOTTY; 842 843 ret = vfio_check_feature(flags, argsz, 844 VFIO_DEVICE_FEATURE_SET, 845 sizeof(control)); 846 if (ret != 1) 847 return ret; 848 849 if (copy_from_user(&control, arg, minsz)) 850 return -EFAULT; 851 852 nnodes = control.num_ranges; 853 if (!nnodes) 854 return -EINVAL; 855 856 if (nnodes > LOG_MAX_RANGES) 857 return -E2BIG; 858 859 ranges = u64_to_user_ptr(control.ranges); 860 nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), 861 GFP_KERNEL); 862 if (!nodes) 863 return -ENOMEM; 864 865 for (i = 0; i < nnodes; i++) { 866 if (copy_from_user(&range, &ranges[i], sizeof(range))) { 867 ret = -EFAULT; 868 goto end; 869 } 870 if (!IS_ALIGNED(range.iova, control.page_size) || 871 !IS_ALIGNED(range.length, control.page_size)) { 872 ret = -EINVAL; 873 goto end; 874 } 875 876 if (check_add_overflow(range.iova, range.length, &iova_end) || 877 iova_end > ULONG_MAX) { 878 ret = -EOVERFLOW; 879 goto end; 880 } 881 882 nodes[i].start = range.iova; 883 nodes[i].last = range.iova + range.length - 1; 884 if (interval_tree_iter_first(&root, nodes[i].start, 885 nodes[i].last)) { 886 /* Range overlapping */ 887 ret = -EINVAL; 888 goto end; 889 } 890 interval_tree_insert(nodes + i, &root); 891 } 892 893 ret = device->log_ops->log_start(device, &root, nnodes, 894 &control.page_size); 895 if (ret) 896 goto end; 897 898 if (copy_to_user(arg, &control, sizeof(control))) { 899 ret = -EFAULT; 900 device->log_ops->log_stop(device); 901 } 902 903 end: 904 kfree(nodes); 905 return ret; 906 } 907 908 static int 909 vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, 910 u32 flags, void __user *arg, 911 size_t argsz) 912 { 913 int ret; 914 915 if (!device->log_ops) 916 return -ENOTTY; 917 918 ret = vfio_check_feature(flags, argsz, 919 VFIO_DEVICE_FEATURE_SET, 0); 920 if (ret != 1) 921 return ret; 922 923 return device->log_ops->log_stop(device); 924 } 925 926 static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, 927 unsigned long iova, size_t length, 928 void *opaque) 929 { 930 struct vfio_device *device = opaque; 931 932 return device->log_ops->log_read_and_clear(device, iova, length, iter); 933 } 934 935 static int 936 vfio_ioctl_device_feature_logging_report(struct vfio_device *device, 937 u32 flags, void __user *arg, 938 size_t argsz) 939 { 940 size_t minsz = 941 offsetofend(struct vfio_device_feature_dma_logging_report, 942 bitmap); 943 struct vfio_device_feature_dma_logging_report report; 944 struct iova_bitmap *iter; 945 u64 iova_end; 946 int ret; 947 948 if (!device->log_ops) 949 return -ENOTTY; 950 951 ret = vfio_check_feature(flags, argsz, 952 VFIO_DEVICE_FEATURE_GET, 953 sizeof(report)); 954 if (ret != 1) 955 return ret; 956 957 if (copy_from_user(&report, arg, minsz)) 958 return -EFAULT; 959 960 if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) 961 return -EINVAL; 962 963 if (check_add_overflow(report.iova, report.length, &iova_end) || 964 iova_end > ULONG_MAX) 965 return -EOVERFLOW; 966 967 iter = iova_bitmap_alloc(report.iova, report.length, 968 report.page_size, 969 u64_to_user_ptr(report.bitmap)); 970 if (IS_ERR(iter)) 971 return PTR_ERR(iter); 972 973 ret = iova_bitmap_for_each(iter, device, 974 vfio_device_log_read_and_clear); 975 976 iova_bitmap_free(iter); 977 return ret; 978 } 979 980 static int vfio_ioctl_device_feature(struct vfio_device *device, 981 struct vfio_device_feature __user *arg) 982 { 983 size_t minsz = offsetofend(struct vfio_device_feature, flags); 984 struct vfio_device_feature feature; 985 986 if (copy_from_user(&feature, arg, minsz)) 987 return -EFAULT; 988 989 if (feature.argsz < minsz) 990 return -EINVAL; 991 992 /* Check unknown flags */ 993 if (feature.flags & 994 ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 995 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 996 return -EINVAL; 997 998 /* GET & SET are mutually exclusive except with PROBE */ 999 if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 1000 (feature.flags & VFIO_DEVICE_FEATURE_SET) && 1001 (feature.flags & VFIO_DEVICE_FEATURE_GET)) 1002 return -EINVAL; 1003 1004 switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 1005 case VFIO_DEVICE_FEATURE_MIGRATION: 1006 return vfio_ioctl_device_feature_migration( 1007 device, feature.flags, arg->data, 1008 feature.argsz - minsz); 1009 case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 1010 return vfio_ioctl_device_feature_mig_device_state( 1011 device, feature.flags, arg->data, 1012 feature.argsz - minsz); 1013 case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: 1014 return vfio_ioctl_device_feature_logging_start( 1015 device, feature.flags, arg->data, 1016 feature.argsz - minsz); 1017 case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: 1018 return vfio_ioctl_device_feature_logging_stop( 1019 device, feature.flags, arg->data, 1020 feature.argsz - minsz); 1021 case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: 1022 return vfio_ioctl_device_feature_logging_report( 1023 device, feature.flags, arg->data, 1024 feature.argsz - minsz); 1025 case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: 1026 return vfio_ioctl_device_feature_migration_data_size( 1027 device, feature.flags, arg->data, 1028 feature.argsz - minsz); 1029 default: 1030 if (unlikely(!device->ops->device_feature)) 1031 return -EINVAL; 1032 return device->ops->device_feature(device, feature.flags, 1033 arg->data, 1034 feature.argsz - minsz); 1035 } 1036 } 1037 1038 static long vfio_device_fops_unl_ioctl(struct file *filep, 1039 unsigned int cmd, unsigned long arg) 1040 { 1041 struct vfio_device *device = filep->private_data; 1042 int ret; 1043 1044 ret = vfio_device_pm_runtime_get(device); 1045 if (ret) 1046 return ret; 1047 1048 switch (cmd) { 1049 case VFIO_DEVICE_FEATURE: 1050 ret = vfio_ioctl_device_feature(device, (void __user *)arg); 1051 break; 1052 1053 default: 1054 if (unlikely(!device->ops->ioctl)) 1055 ret = -EINVAL; 1056 else 1057 ret = device->ops->ioctl(device, cmd, arg); 1058 break; 1059 } 1060 1061 vfio_device_pm_runtime_put(device); 1062 return ret; 1063 } 1064 1065 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1066 size_t count, loff_t *ppos) 1067 { 1068 struct vfio_device *device = filep->private_data; 1069 1070 if (unlikely(!device->ops->read)) 1071 return -EINVAL; 1072 1073 return device->ops->read(device, buf, count, ppos); 1074 } 1075 1076 static ssize_t vfio_device_fops_write(struct file *filep, 1077 const char __user *buf, 1078 size_t count, loff_t *ppos) 1079 { 1080 struct vfio_device *device = filep->private_data; 1081 1082 if (unlikely(!device->ops->write)) 1083 return -EINVAL; 1084 1085 return device->ops->write(device, buf, count, ppos); 1086 } 1087 1088 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1089 { 1090 struct vfio_device *device = filep->private_data; 1091 1092 if (unlikely(!device->ops->mmap)) 1093 return -EINVAL; 1094 1095 return device->ops->mmap(device, vma); 1096 } 1097 1098 const struct file_operations vfio_device_fops = { 1099 .owner = THIS_MODULE, 1100 .release = vfio_device_fops_release, 1101 .read = vfio_device_fops_read, 1102 .write = vfio_device_fops_write, 1103 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1104 .compat_ioctl = compat_ptr_ioctl, 1105 .mmap = vfio_device_fops_mmap, 1106 }; 1107 1108 /* 1109 * Sub-module support 1110 */ 1111 /* 1112 * Helper for managing a buffer of info chain capabilities, allocate or 1113 * reallocate a buffer with additional @size, filling in @id and @version 1114 * of the capability. A pointer to the new capability is returned. 1115 * 1116 * NB. The chain is based at the head of the buffer, so new entries are 1117 * added to the tail, vfio_info_cap_shift() should be called to fixup the 1118 * next offsets prior to copying to the user buffer. 1119 */ 1120 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 1121 size_t size, u16 id, u16 version) 1122 { 1123 void *buf; 1124 struct vfio_info_cap_header *header, *tmp; 1125 1126 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 1127 if (!buf) { 1128 kfree(caps->buf); 1129 caps->buf = NULL; 1130 caps->size = 0; 1131 return ERR_PTR(-ENOMEM); 1132 } 1133 1134 caps->buf = buf; 1135 header = buf + caps->size; 1136 1137 /* Eventually copied to user buffer, zero */ 1138 memset(header, 0, size); 1139 1140 header->id = id; 1141 header->version = version; 1142 1143 /* Add to the end of the capability chain */ 1144 for (tmp = buf; tmp->next; tmp = buf + tmp->next) 1145 ; /* nothing */ 1146 1147 tmp->next = caps->size; 1148 caps->size += size; 1149 1150 return header; 1151 } 1152 EXPORT_SYMBOL_GPL(vfio_info_cap_add); 1153 1154 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 1155 { 1156 struct vfio_info_cap_header *tmp; 1157 void *buf = (void *)caps->buf; 1158 1159 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 1160 tmp->next += offset; 1161 } 1162 EXPORT_SYMBOL(vfio_info_cap_shift); 1163 1164 int vfio_info_add_capability(struct vfio_info_cap *caps, 1165 struct vfio_info_cap_header *cap, size_t size) 1166 { 1167 struct vfio_info_cap_header *header; 1168 1169 header = vfio_info_cap_add(caps, size, cap->id, cap->version); 1170 if (IS_ERR(header)) 1171 return PTR_ERR(header); 1172 1173 memcpy(header + 1, cap + 1, size - sizeof(*header)); 1174 1175 return 0; 1176 } 1177 EXPORT_SYMBOL(vfio_info_add_capability); 1178 1179 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 1180 int max_irq_type, size_t *data_size) 1181 { 1182 unsigned long minsz; 1183 size_t size; 1184 1185 minsz = offsetofend(struct vfio_irq_set, count); 1186 1187 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 1188 (hdr->count >= (U32_MAX - hdr->start)) || 1189 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 1190 VFIO_IRQ_SET_ACTION_TYPE_MASK))) 1191 return -EINVAL; 1192 1193 if (data_size) 1194 *data_size = 0; 1195 1196 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 1197 return -EINVAL; 1198 1199 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 1200 case VFIO_IRQ_SET_DATA_NONE: 1201 size = 0; 1202 break; 1203 case VFIO_IRQ_SET_DATA_BOOL: 1204 size = sizeof(uint8_t); 1205 break; 1206 case VFIO_IRQ_SET_DATA_EVENTFD: 1207 size = sizeof(int32_t); 1208 break; 1209 default: 1210 return -EINVAL; 1211 } 1212 1213 if (size) { 1214 if (hdr->argsz - minsz < hdr->count * size) 1215 return -EINVAL; 1216 1217 if (!data_size) 1218 return -EINVAL; 1219 1220 *data_size = hdr->count * size; 1221 } 1222 1223 return 0; 1224 } 1225 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 1226 1227 /* 1228 * Pin contiguous user pages and return their associated host pages for local 1229 * domain only. 1230 * @device [in] : device 1231 * @iova [in] : starting IOVA of user pages to be pinned. 1232 * @npage [in] : count of pages to be pinned. This count should not 1233 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1234 * @prot [in] : protection flags 1235 * @pages[out] : array of host pages 1236 * Return error or number of pages pinned. 1237 * 1238 * A driver may only call this function if the vfio_device was created 1239 * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). 1240 */ 1241 int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 1242 int npage, int prot, struct page **pages) 1243 { 1244 /* group->container cannot change while a vfio device is open */ 1245 if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) 1246 return -EINVAL; 1247 if (vfio_device_has_container(device)) 1248 return vfio_device_container_pin_pages(device, iova, 1249 npage, prot, pages); 1250 if (device->iommufd_access) { 1251 int ret; 1252 1253 if (iova > ULONG_MAX) 1254 return -EINVAL; 1255 /* 1256 * VFIO ignores the sub page offset, npages is from the start of 1257 * a PAGE_SIZE chunk of IOVA. The caller is expected to recover 1258 * the sub page offset by doing: 1259 * pages[0] + (iova % PAGE_SIZE) 1260 */ 1261 ret = iommufd_access_pin_pages( 1262 device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), 1263 npage * PAGE_SIZE, pages, 1264 (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); 1265 if (ret) 1266 return ret; 1267 return npage; 1268 } 1269 return -EINVAL; 1270 } 1271 EXPORT_SYMBOL(vfio_pin_pages); 1272 1273 /* 1274 * Unpin contiguous host pages for local domain only. 1275 * @device [in] : device 1276 * @iova [in] : starting address of user pages to be unpinned. 1277 * @npage [in] : count of pages to be unpinned. This count should not 1278 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1279 */ 1280 void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 1281 { 1282 if (WARN_ON(!vfio_assert_device_open(device))) 1283 return; 1284 1285 if (vfio_device_has_container(device)) { 1286 vfio_device_container_unpin_pages(device, iova, npage); 1287 return; 1288 } 1289 if (device->iommufd_access) { 1290 if (WARN_ON(iova > ULONG_MAX)) 1291 return; 1292 iommufd_access_unpin_pages(device->iommufd_access, 1293 ALIGN_DOWN(iova, PAGE_SIZE), 1294 npage * PAGE_SIZE); 1295 return; 1296 } 1297 } 1298 EXPORT_SYMBOL(vfio_unpin_pages); 1299 1300 /* 1301 * This interface allows the CPUs to perform some sort of virtual DMA on 1302 * behalf of the device. 1303 * 1304 * CPUs read/write from/into a range of IOVAs pointing to user space memory 1305 * into/from a kernel buffer. 1306 * 1307 * As the read/write of user space memory is conducted via the CPUs and is 1308 * not a real device DMA, it is not necessary to pin the user space memory. 1309 * 1310 * @device [in] : VFIO device 1311 * @iova [in] : base IOVA of a user space buffer 1312 * @data [in] : pointer to kernel buffer 1313 * @len [in] : kernel buffer length 1314 * @write : indicate read or write 1315 * Return error code on failure or 0 on success. 1316 */ 1317 int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 1318 size_t len, bool write) 1319 { 1320 if (!data || len <= 0 || !vfio_assert_device_open(device)) 1321 return -EINVAL; 1322 1323 if (vfio_device_has_container(device)) 1324 return vfio_device_container_dma_rw(device, iova, 1325 data, len, write); 1326 1327 if (device->iommufd_access) { 1328 unsigned int flags = 0; 1329 1330 if (iova > ULONG_MAX) 1331 return -EINVAL; 1332 1333 /* VFIO historically tries to auto-detect a kthread */ 1334 if (!current->mm) 1335 flags |= IOMMUFD_ACCESS_RW_KTHREAD; 1336 if (write) 1337 flags |= IOMMUFD_ACCESS_RW_WRITE; 1338 return iommufd_access_rw(device->iommufd_access, iova, data, 1339 len, flags); 1340 } 1341 return -EINVAL; 1342 } 1343 EXPORT_SYMBOL(vfio_dma_rw); 1344 1345 /* 1346 * Module/class support 1347 */ 1348 static int __init vfio_init(void) 1349 { 1350 int ret; 1351 1352 ida_init(&vfio.device_ida); 1353 1354 ret = vfio_group_init(); 1355 if (ret) 1356 return ret; 1357 1358 ret = vfio_virqfd_init(); 1359 if (ret) 1360 goto err_virqfd; 1361 1362 /* /sys/class/vfio-dev/vfioX */ 1363 vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); 1364 if (IS_ERR(vfio.device_class)) { 1365 ret = PTR_ERR(vfio.device_class); 1366 goto err_dev_class; 1367 } 1368 1369 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 1370 return 0; 1371 1372 err_dev_class: 1373 vfio_virqfd_exit(); 1374 err_virqfd: 1375 vfio_group_cleanup(); 1376 return ret; 1377 } 1378 1379 static void __exit vfio_cleanup(void) 1380 { 1381 ida_destroy(&vfio.device_ida); 1382 class_destroy(vfio.device_class); 1383 vfio.device_class = NULL; 1384 vfio_virqfd_exit(); 1385 vfio_group_cleanup(); 1386 xa_destroy(&vfio_device_set_xa); 1387 } 1388 1389 module_init(vfio_init); 1390 module_exit(vfio_cleanup); 1391 1392 MODULE_VERSION(DRIVER_VERSION); 1393 MODULE_LICENSE("GPL v2"); 1394 MODULE_AUTHOR(DRIVER_AUTHOR); 1395 MODULE_DESCRIPTION(DRIVER_DESC); 1396 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 1397