1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 4 * 5 * VFIO container (/dev/vfio/vfio) 6 */ 7 #include <linux/file.h> 8 #include <linux/slab.h> 9 #include <linux/fs.h> 10 #include <linux/capability.h> 11 #include <linux/iommu.h> 12 #include <linux/miscdevice.h> 13 #include <linux/vfio.h> 14 #include <uapi/linux/vfio.h> 15 16 #include "vfio.h" 17 18 struct vfio_container { 19 struct kref kref; 20 struct list_head group_list; 21 struct rw_semaphore group_lock; 22 struct vfio_iommu_driver *iommu_driver; 23 void *iommu_data; 24 bool noiommu; 25 }; 26 27 static struct vfio { 28 struct list_head iommu_drivers_list; 29 struct mutex iommu_drivers_lock; 30 } vfio; 31 32 static void *vfio_noiommu_open(unsigned long arg) 33 { 34 if (arg != VFIO_NOIOMMU_IOMMU) 35 return ERR_PTR(-EINVAL); 36 if (!capable(CAP_SYS_RAWIO)) 37 return ERR_PTR(-EPERM); 38 39 return NULL; 40 } 41 42 static void vfio_noiommu_release(void *iommu_data) 43 { 44 } 45 46 static long vfio_noiommu_ioctl(void *iommu_data, 47 unsigned int cmd, unsigned long arg) 48 { 49 if (cmd == VFIO_CHECK_EXTENSION) 50 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 51 52 return -ENOTTY; 53 } 54 55 static int vfio_noiommu_attach_group(void *iommu_data, 56 struct iommu_group *iommu_group, enum vfio_group_type type) 57 { 58 return 0; 59 } 60 61 static void vfio_noiommu_detach_group(void *iommu_data, 62 struct iommu_group *iommu_group) 63 { 64 } 65 66 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 67 .name = "vfio-noiommu", 68 .owner = THIS_MODULE, 69 .open = vfio_noiommu_open, 70 .release = vfio_noiommu_release, 71 .ioctl = vfio_noiommu_ioctl, 72 .attach_group = vfio_noiommu_attach_group, 73 .detach_group = vfio_noiommu_detach_group, 74 }; 75 76 /* 77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only 78 * use vfio-noiommu. 79 */ 80 static bool vfio_iommu_driver_allowed(struct vfio_container *container, 81 const struct vfio_iommu_driver *driver) 82 { 83 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU)) 84 return true; 85 return container->noiommu == (driver->ops == &vfio_noiommu_ops); 86 } 87 88 /* 89 * IOMMU driver registration 90 */ 91 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 92 { 93 struct vfio_iommu_driver *driver, *tmp; 94 95 if (WARN_ON(!ops->register_device != !ops->unregister_device)) 96 return -EINVAL; 97 98 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 99 if (!driver) 100 return -ENOMEM; 101 102 driver->ops = ops; 103 104 mutex_lock(&vfio.iommu_drivers_lock); 105 106 /* Check for duplicates */ 107 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 108 if (tmp->ops == ops) { 109 mutex_unlock(&vfio.iommu_drivers_lock); 110 kfree(driver); 111 return -EINVAL; 112 } 113 } 114 115 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 116 117 mutex_unlock(&vfio.iommu_drivers_lock); 118 119 return 0; 120 } 121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 122 123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 124 { 125 struct vfio_iommu_driver *driver; 126 127 mutex_lock(&vfio.iommu_drivers_lock); 128 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 129 if (driver->ops == ops) { 130 list_del(&driver->vfio_next); 131 mutex_unlock(&vfio.iommu_drivers_lock); 132 kfree(driver); 133 return; 134 } 135 } 136 mutex_unlock(&vfio.iommu_drivers_lock); 137 } 138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 139 140 /* 141 * Container objects - containers are created when /dev/vfio/vfio is 142 * opened, but their lifecycle extends until the last user is done, so 143 * it's freed via kref. Must support container/group/device being 144 * closed in any order. 145 */ 146 static void vfio_container_release(struct kref *kref) 147 { 148 struct vfio_container *container; 149 container = container_of(kref, struct vfio_container, kref); 150 151 kfree(container); 152 } 153 154 static void vfio_container_get(struct vfio_container *container) 155 { 156 kref_get(&container->kref); 157 } 158 159 static void vfio_container_put(struct vfio_container *container) 160 { 161 kref_put(&container->kref, vfio_container_release); 162 } 163 164 void vfio_device_container_register(struct vfio_device *device) 165 { 166 struct vfio_iommu_driver *iommu_driver = 167 device->group->container->iommu_driver; 168 169 if (iommu_driver && iommu_driver->ops->register_device) 170 iommu_driver->ops->register_device( 171 device->group->container->iommu_data, device); 172 } 173 174 void vfio_device_container_unregister(struct vfio_device *device) 175 { 176 struct vfio_iommu_driver *iommu_driver = 177 device->group->container->iommu_driver; 178 179 if (iommu_driver && iommu_driver->ops->unregister_device) 180 iommu_driver->ops->unregister_device( 181 device->group->container->iommu_data, device); 182 } 183 184 static long 185 vfio_container_ioctl_check_extension(struct vfio_container *container, 186 unsigned long arg) 187 { 188 struct vfio_iommu_driver *driver; 189 long ret = 0; 190 191 down_read(&container->group_lock); 192 193 driver = container->iommu_driver; 194 195 switch (arg) { 196 /* No base extensions yet */ 197 default: 198 /* 199 * If no driver is set, poll all registered drivers for 200 * extensions and return the first positive result. If 201 * a driver is already set, further queries will be passed 202 * only to that driver. 203 */ 204 if (!driver) { 205 mutex_lock(&vfio.iommu_drivers_lock); 206 list_for_each_entry(driver, &vfio.iommu_drivers_list, 207 vfio_next) { 208 209 if (!list_empty(&container->group_list) && 210 !vfio_iommu_driver_allowed(container, 211 driver)) 212 continue; 213 if (!try_module_get(driver->ops->owner)) 214 continue; 215 216 ret = driver->ops->ioctl(NULL, 217 VFIO_CHECK_EXTENSION, 218 arg); 219 module_put(driver->ops->owner); 220 if (ret > 0) 221 break; 222 } 223 mutex_unlock(&vfio.iommu_drivers_lock); 224 } else 225 ret = driver->ops->ioctl(container->iommu_data, 226 VFIO_CHECK_EXTENSION, arg); 227 } 228 229 up_read(&container->group_lock); 230 231 return ret; 232 } 233 234 /* hold write lock on container->group_lock */ 235 static int __vfio_container_attach_groups(struct vfio_container *container, 236 struct vfio_iommu_driver *driver, 237 void *data) 238 { 239 struct vfio_group *group; 240 int ret = -ENODEV; 241 242 list_for_each_entry(group, &container->group_list, container_next) { 243 ret = driver->ops->attach_group(data, group->iommu_group, 244 group->type); 245 if (ret) 246 goto unwind; 247 } 248 249 return ret; 250 251 unwind: 252 list_for_each_entry_continue_reverse(group, &container->group_list, 253 container_next) { 254 driver->ops->detach_group(data, group->iommu_group); 255 } 256 257 return ret; 258 } 259 260 static long vfio_ioctl_set_iommu(struct vfio_container *container, 261 unsigned long arg) 262 { 263 struct vfio_iommu_driver *driver; 264 long ret = -ENODEV; 265 266 down_write(&container->group_lock); 267 268 /* 269 * The container is designed to be an unprivileged interface while 270 * the group can be assigned to specific users. Therefore, only by 271 * adding a group to a container does the user get the privilege of 272 * enabling the iommu, which may allocate finite resources. There 273 * is no unset_iommu, but by removing all the groups from a container, 274 * the container is deprivileged and returns to an unset state. 275 */ 276 if (list_empty(&container->group_list) || container->iommu_driver) { 277 up_write(&container->group_lock); 278 return -EINVAL; 279 } 280 281 mutex_lock(&vfio.iommu_drivers_lock); 282 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 283 void *data; 284 285 if (!vfio_iommu_driver_allowed(container, driver)) 286 continue; 287 if (!try_module_get(driver->ops->owner)) 288 continue; 289 290 /* 291 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 292 * so test which iommu driver reported support for this 293 * extension and call open on them. We also pass them the 294 * magic, allowing a single driver to support multiple 295 * interfaces if they'd like. 296 */ 297 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 298 module_put(driver->ops->owner); 299 continue; 300 } 301 302 data = driver->ops->open(arg); 303 if (IS_ERR(data)) { 304 ret = PTR_ERR(data); 305 module_put(driver->ops->owner); 306 continue; 307 } 308 309 ret = __vfio_container_attach_groups(container, driver, data); 310 if (ret) { 311 driver->ops->release(data); 312 module_put(driver->ops->owner); 313 continue; 314 } 315 316 container->iommu_driver = driver; 317 container->iommu_data = data; 318 break; 319 } 320 321 mutex_unlock(&vfio.iommu_drivers_lock); 322 up_write(&container->group_lock); 323 324 return ret; 325 } 326 327 static long vfio_fops_unl_ioctl(struct file *filep, 328 unsigned int cmd, unsigned long arg) 329 { 330 struct vfio_container *container = filep->private_data; 331 struct vfio_iommu_driver *driver; 332 void *data; 333 long ret = -EINVAL; 334 335 if (!container) 336 return ret; 337 338 switch (cmd) { 339 case VFIO_GET_API_VERSION: 340 ret = VFIO_API_VERSION; 341 break; 342 case VFIO_CHECK_EXTENSION: 343 ret = vfio_container_ioctl_check_extension(container, arg); 344 break; 345 case VFIO_SET_IOMMU: 346 ret = vfio_ioctl_set_iommu(container, arg); 347 break; 348 default: 349 driver = container->iommu_driver; 350 data = container->iommu_data; 351 352 if (driver) /* passthrough all unrecognized ioctls */ 353 ret = driver->ops->ioctl(data, cmd, arg); 354 } 355 356 return ret; 357 } 358 359 static int vfio_fops_open(struct inode *inode, struct file *filep) 360 { 361 struct vfio_container *container; 362 363 container = kzalloc(sizeof(*container), GFP_KERNEL); 364 if (!container) 365 return -ENOMEM; 366 367 INIT_LIST_HEAD(&container->group_list); 368 init_rwsem(&container->group_lock); 369 kref_init(&container->kref); 370 371 filep->private_data = container; 372 373 return 0; 374 } 375 376 static int vfio_fops_release(struct inode *inode, struct file *filep) 377 { 378 struct vfio_container *container = filep->private_data; 379 struct vfio_iommu_driver *driver = container->iommu_driver; 380 381 if (driver && driver->ops->notify) 382 driver->ops->notify(container->iommu_data, 383 VFIO_IOMMU_CONTAINER_CLOSE); 384 385 filep->private_data = NULL; 386 387 vfio_container_put(container); 388 389 return 0; 390 } 391 392 static const struct file_operations vfio_fops = { 393 .owner = THIS_MODULE, 394 .open = vfio_fops_open, 395 .release = vfio_fops_release, 396 .unlocked_ioctl = vfio_fops_unl_ioctl, 397 .compat_ioctl = compat_ptr_ioctl, 398 }; 399 400 struct vfio_container *vfio_container_from_file(struct file *file) 401 { 402 struct vfio_container *container; 403 404 /* Sanity check, is this really our fd? */ 405 if (file->f_op != &vfio_fops) 406 return NULL; 407 408 container = file->private_data; 409 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 410 return container; 411 } 412 413 static struct miscdevice vfio_dev = { 414 .minor = VFIO_MINOR, 415 .name = "vfio", 416 .fops = &vfio_fops, 417 .nodename = "vfio/vfio", 418 .mode = S_IRUGO | S_IWUGO, 419 }; 420 421 int vfio_container_attach_group(struct vfio_container *container, 422 struct vfio_group *group) 423 { 424 struct vfio_iommu_driver *driver; 425 int ret = 0; 426 427 lockdep_assert_held(&group->group_lock); 428 429 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 430 return -EPERM; 431 432 down_write(&container->group_lock); 433 434 /* Real groups and fake groups cannot mix */ 435 if (!list_empty(&container->group_list) && 436 container->noiommu != (group->type == VFIO_NO_IOMMU)) { 437 ret = -EPERM; 438 goto out_unlock_container; 439 } 440 441 if (group->type == VFIO_IOMMU) { 442 ret = iommu_group_claim_dma_owner(group->iommu_group, group); 443 if (ret) 444 goto out_unlock_container; 445 } 446 447 driver = container->iommu_driver; 448 if (driver) { 449 ret = driver->ops->attach_group(container->iommu_data, 450 group->iommu_group, 451 group->type); 452 if (ret) { 453 if (group->type == VFIO_IOMMU) 454 iommu_group_release_dma_owner( 455 group->iommu_group); 456 goto out_unlock_container; 457 } 458 } 459 460 group->container = container; 461 group->container_users = 1; 462 container->noiommu = (group->type == VFIO_NO_IOMMU); 463 list_add(&group->container_next, &container->group_list); 464 465 /* Get a reference on the container and mark a user within the group */ 466 vfio_container_get(container); 467 468 out_unlock_container: 469 up_write(&container->group_lock); 470 return ret; 471 } 472 473 void vfio_group_detach_container(struct vfio_group *group) 474 { 475 struct vfio_container *container = group->container; 476 struct vfio_iommu_driver *driver; 477 478 lockdep_assert_held(&group->group_lock); 479 WARN_ON(group->container_users != 1); 480 481 down_write(&container->group_lock); 482 483 driver = container->iommu_driver; 484 if (driver) 485 driver->ops->detach_group(container->iommu_data, 486 group->iommu_group); 487 488 if (group->type == VFIO_IOMMU) 489 iommu_group_release_dma_owner(group->iommu_group); 490 491 group->container = NULL; 492 group->container_users = 0; 493 list_del(&group->container_next); 494 495 /* Detaching the last group deprivileges a container, remove iommu */ 496 if (driver && list_empty(&container->group_list)) { 497 driver->ops->release(container->iommu_data); 498 module_put(driver->ops->owner); 499 container->iommu_driver = NULL; 500 container->iommu_data = NULL; 501 } 502 503 up_write(&container->group_lock); 504 505 vfio_container_put(container); 506 } 507 508 int vfio_group_use_container(struct vfio_group *group) 509 { 510 lockdep_assert_held(&group->group_lock); 511 512 /* 513 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but 514 * VFIO_SET_IOMMU hasn't been done yet. 515 */ 516 if (!group->container->iommu_driver) 517 return -EINVAL; 518 519 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 520 return -EPERM; 521 522 get_file(group->opened_file); 523 group->container_users++; 524 return 0; 525 } 526 527 void vfio_group_unuse_container(struct vfio_group *group) 528 { 529 lockdep_assert_held(&group->group_lock); 530 531 WARN_ON(group->container_users <= 1); 532 group->container_users--; 533 fput(group->opened_file); 534 } 535 536 int vfio_device_container_pin_pages(struct vfio_device *device, 537 dma_addr_t iova, int npage, 538 int prot, struct page **pages) 539 { 540 struct vfio_container *container = device->group->container; 541 struct iommu_group *iommu_group = device->group->iommu_group; 542 struct vfio_iommu_driver *driver = container->iommu_driver; 543 544 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 545 return -E2BIG; 546 547 if (unlikely(!driver || !driver->ops->pin_pages)) 548 return -ENOTTY; 549 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, 550 npage, prot, pages); 551 } 552 553 void vfio_device_container_unpin_pages(struct vfio_device *device, 554 dma_addr_t iova, int npage) 555 { 556 struct vfio_container *container = device->group->container; 557 558 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 559 return; 560 561 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, 562 npage); 563 } 564 565 int vfio_device_container_dma_rw(struct vfio_device *device, 566 dma_addr_t iova, void *data, 567 size_t len, bool write) 568 { 569 struct vfio_container *container = device->group->container; 570 struct vfio_iommu_driver *driver = container->iommu_driver; 571 572 if (unlikely(!driver || !driver->ops->dma_rw)) 573 return -ENOTTY; 574 return driver->ops->dma_rw(container->iommu_data, iova, data, len, 575 write); 576 } 577 578 int __init vfio_container_init(void) 579 { 580 int ret; 581 582 mutex_init(&vfio.iommu_drivers_lock); 583 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 584 585 ret = misc_register(&vfio_dev); 586 if (ret) { 587 pr_err("vfio: misc device register failed\n"); 588 return ret; 589 } 590 591 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) { 592 ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 593 if (ret) 594 goto err_misc; 595 } 596 return 0; 597 598 err_misc: 599 misc_deregister(&vfio_dev); 600 return ret; 601 } 602 603 void vfio_container_cleanup(void) 604 { 605 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) 606 vfio_unregister_iommu_driver(&vfio_noiommu_ops); 607 misc_deregister(&vfio_dev); 608 mutex_destroy(&vfio.iommu_drivers_lock); 609 } 610 611 MODULE_ALIAS_MISCDEV(VFIO_MINOR); 612 MODULE_ALIAS("devname:vfio/vfio"); 613