1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #include <linux/iommufd.h> 5 #include <linux/slab.h> 6 #include <linux/iommu.h> 7 #include <uapi/linux/iommufd.h> 8 #include "../iommu-priv.h" 9 10 #include "io_pagetable.h" 11 #include "iommufd_private.h" 12 13 static bool allow_unsafe_interrupts; 14 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); 15 MODULE_PARM_DESC( 16 allow_unsafe_interrupts, 17 "Allow IOMMUFD to bind to devices even if the platform cannot isolate " 18 "the MSI interrupt window. Enabling this is a security weakness."); 19 20 static void iommufd_group_release(struct kref *kref) 21 { 22 struct iommufd_group *igroup = 23 container_of(kref, struct iommufd_group, ref); 24 25 WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); 26 27 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, 28 NULL, GFP_KERNEL); 29 iommu_group_put(igroup->group); 30 mutex_destroy(&igroup->lock); 31 kfree(igroup); 32 } 33 34 static void iommufd_put_group(struct iommufd_group *group) 35 { 36 kref_put(&group->ref, iommufd_group_release); 37 } 38 39 static bool iommufd_group_try_get(struct iommufd_group *igroup, 40 struct iommu_group *group) 41 { 42 if (!igroup) 43 return false; 44 /* 45 * group ID's cannot be re-used until the group is put back which does 46 * not happen if we could get an igroup pointer under the xa_lock. 47 */ 48 if (WARN_ON(igroup->group != group)) 49 return false; 50 return kref_get_unless_zero(&igroup->ref); 51 } 52 53 /* 54 * iommufd needs to store some more data for each iommu_group, we keep a 55 * parallel xarray indexed by iommu_group id to hold this instead of putting it 56 * in the core structure. To keep things simple the iommufd_group memory is 57 * unique within the iommufd_ctx. This makes it easy to check there are no 58 * memory leaks. 59 */ 60 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, 61 struct device *dev) 62 { 63 struct iommufd_group *new_igroup; 64 struct iommufd_group *cur_igroup; 65 struct iommufd_group *igroup; 66 struct iommu_group *group; 67 unsigned int id; 68 69 group = iommu_group_get(dev); 70 if (!group) 71 return ERR_PTR(-ENODEV); 72 73 id = iommu_group_id(group); 74 75 xa_lock(&ictx->groups); 76 igroup = xa_load(&ictx->groups, id); 77 if (iommufd_group_try_get(igroup, group)) { 78 xa_unlock(&ictx->groups); 79 iommu_group_put(group); 80 return igroup; 81 } 82 xa_unlock(&ictx->groups); 83 84 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); 85 if (!new_igroup) { 86 iommu_group_put(group); 87 return ERR_PTR(-ENOMEM); 88 } 89 90 kref_init(&new_igroup->ref); 91 mutex_init(&new_igroup->lock); 92 INIT_LIST_HEAD(&new_igroup->device_list); 93 new_igroup->sw_msi_start = PHYS_ADDR_MAX; 94 /* group reference moves into new_igroup */ 95 new_igroup->group = group; 96 97 /* 98 * The ictx is not additionally refcounted here becase all objects using 99 * an igroup must put it before their destroy completes. 100 */ 101 new_igroup->ictx = ictx; 102 103 /* 104 * We dropped the lock so igroup is invalid. NULL is a safe and likely 105 * value to assume for the xa_cmpxchg algorithm. 106 */ 107 cur_igroup = NULL; 108 xa_lock(&ictx->groups); 109 while (true) { 110 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, 111 GFP_KERNEL); 112 if (xa_is_err(igroup)) { 113 xa_unlock(&ictx->groups); 114 iommufd_put_group(new_igroup); 115 return ERR_PTR(xa_err(igroup)); 116 } 117 118 /* new_group was successfully installed */ 119 if (cur_igroup == igroup) { 120 xa_unlock(&ictx->groups); 121 return new_igroup; 122 } 123 124 /* Check again if the current group is any good */ 125 if (iommufd_group_try_get(igroup, group)) { 126 xa_unlock(&ictx->groups); 127 iommufd_put_group(new_igroup); 128 return igroup; 129 } 130 cur_igroup = igroup; 131 } 132 } 133 134 void iommufd_device_destroy(struct iommufd_object *obj) 135 { 136 struct iommufd_device *idev = 137 container_of(obj, struct iommufd_device, obj); 138 139 iommu_device_release_dma_owner(idev->dev); 140 iommufd_put_group(idev->igroup); 141 if (!iommufd_selftest_is_mock_dev(idev->dev)) 142 iommufd_ctx_put(idev->ictx); 143 } 144 145 /** 146 * iommufd_device_bind - Bind a physical device to an iommu fd 147 * @ictx: iommufd file descriptor 148 * @dev: Pointer to a physical device struct 149 * @id: Output ID number to return to userspace for this device 150 * 151 * A successful bind establishes an ownership over the device and returns 152 * struct iommufd_device pointer, otherwise returns error pointer. 153 * 154 * A driver using this API must set driver_managed_dma and must not touch 155 * the device until this routine succeeds and establishes ownership. 156 * 157 * Binding a PCI device places the entire RID under iommufd control. 158 * 159 * The caller must undo this with iommufd_device_unbind() 160 */ 161 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, 162 struct device *dev, u32 *id) 163 { 164 struct iommufd_device *idev; 165 struct iommufd_group *igroup; 166 int rc; 167 168 /* 169 * iommufd always sets IOMMU_CACHE because we offer no way for userspace 170 * to restore cache coherency. 171 */ 172 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) 173 return ERR_PTR(-EINVAL); 174 175 igroup = iommufd_get_group(ictx, dev); 176 if (IS_ERR(igroup)) 177 return ERR_CAST(igroup); 178 179 /* 180 * For historical compat with VFIO the insecure interrupt path is 181 * allowed if the module parameter is set. Secure/Isolated means that a 182 * MemWr operation from the device (eg a simple DMA) cannot trigger an 183 * interrupt outside this iommufd context. 184 */ 185 if (!iommufd_selftest_is_mock_dev(dev) && 186 !iommu_group_has_isolated_msi(igroup->group)) { 187 if (!allow_unsafe_interrupts) { 188 rc = -EPERM; 189 goto out_group_put; 190 } 191 192 dev_warn( 193 dev, 194 "MSI interrupts are not secure, they cannot be isolated by the platform. " 195 "Check that platform features like interrupt remapping are enabled. " 196 "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); 197 } 198 199 rc = iommu_device_claim_dma_owner(dev, ictx); 200 if (rc) 201 goto out_group_put; 202 203 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); 204 if (IS_ERR(idev)) { 205 rc = PTR_ERR(idev); 206 goto out_release_owner; 207 } 208 idev->ictx = ictx; 209 if (!iommufd_selftest_is_mock_dev(dev)) 210 iommufd_ctx_get(ictx); 211 idev->dev = dev; 212 idev->enforce_cache_coherency = 213 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 214 /* The calling driver is a user until iommufd_device_unbind() */ 215 refcount_inc(&idev->obj.users); 216 /* igroup refcount moves into iommufd_device */ 217 idev->igroup = igroup; 218 219 /* 220 * If the caller fails after this success it must call 221 * iommufd_unbind_device() which is safe since we hold this refcount. 222 * This also means the device is a leaf in the graph and no other object 223 * can take a reference on it. 224 */ 225 iommufd_object_finalize(ictx, &idev->obj); 226 *id = idev->obj.id; 227 return idev; 228 229 out_release_owner: 230 iommu_device_release_dma_owner(dev); 231 out_group_put: 232 iommufd_put_group(igroup); 233 return ERR_PTR(rc); 234 } 235 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); 236 237 /** 238 * iommufd_ctx_has_group - True if any device within the group is bound 239 * to the ictx 240 * @ictx: iommufd file descriptor 241 * @group: Pointer to a physical iommu_group struct 242 * 243 * True if any device within the group has been bound to this ictx, ex. via 244 * iommufd_device_bind(), therefore implying ictx ownership of the group. 245 */ 246 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) 247 { 248 struct iommufd_object *obj; 249 unsigned long index; 250 251 if (!ictx || !group) 252 return false; 253 254 xa_lock(&ictx->objects); 255 xa_for_each(&ictx->objects, index, obj) { 256 if (obj->type == IOMMUFD_OBJ_DEVICE && 257 container_of(obj, struct iommufd_device, obj) 258 ->igroup->group == group) { 259 xa_unlock(&ictx->objects); 260 return true; 261 } 262 } 263 xa_unlock(&ictx->objects); 264 return false; 265 } 266 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); 267 268 /** 269 * iommufd_device_unbind - Undo iommufd_device_bind() 270 * @idev: Device returned by iommufd_device_bind() 271 * 272 * Release the device from iommufd control. The DMA ownership will return back 273 * to unowned with DMA controlled by the DMA API. This invalidates the 274 * iommufd_device pointer, other APIs that consume it must not be called 275 * concurrently. 276 */ 277 void iommufd_device_unbind(struct iommufd_device *idev) 278 { 279 iommufd_object_destroy_user(idev->ictx, &idev->obj); 280 } 281 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); 282 283 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) 284 { 285 return idev->ictx; 286 } 287 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); 288 289 u32 iommufd_device_to_id(struct iommufd_device *idev) 290 { 291 return idev->obj.id; 292 } 293 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); 294 295 static int iommufd_group_setup_msi(struct iommufd_group *igroup, 296 struct iommufd_hw_pagetable *hwpt) 297 { 298 phys_addr_t sw_msi_start = igroup->sw_msi_start; 299 int rc; 300 301 /* 302 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to 303 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup 304 * the MSI window so iommu_dma_prepare_msi() can install pages into our 305 * domain after request_irq(). If it is not done interrupts will not 306 * work on this domain. 307 * 308 * FIXME: This is conceptually broken for iommufd since we want to allow 309 * userspace to change the domains, eg switch from an identity IOAS to a 310 * DMA IOAS. There is currently no way to create a MSI window that 311 * matches what the IRQ layer actually expects in a newly created 312 * domain. 313 */ 314 if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) { 315 rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); 316 if (rc) 317 return rc; 318 319 /* 320 * iommu_get_msi_cookie() can only be called once per domain, 321 * it returns -EBUSY on later calls. 322 */ 323 hwpt->msi_cookie = true; 324 } 325 return 0; 326 } 327 328 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 329 struct iommufd_device *idev) 330 { 331 int rc; 332 333 mutex_lock(&idev->igroup->lock); 334 335 if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { 336 rc = -EINVAL; 337 goto err_unlock; 338 } 339 340 /* Try to upgrade the domain we have */ 341 if (idev->enforce_cache_coherency) { 342 rc = iommufd_hw_pagetable_enforce_cc(hwpt); 343 if (rc) 344 goto err_unlock; 345 } 346 347 rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev, 348 &idev->igroup->sw_msi_start); 349 if (rc) 350 goto err_unlock; 351 352 /* 353 * Only attach to the group once for the first device that is in the 354 * group. All the other devices will follow this attachment. The user 355 * should attach every device individually to the hwpt as the per-device 356 * reserved regions are only updated during individual device 357 * attachment. 358 */ 359 if (list_empty(&idev->igroup->device_list)) { 360 rc = iommufd_group_setup_msi(idev->igroup, hwpt); 361 if (rc) 362 goto err_unresv; 363 364 rc = iommu_attach_group(hwpt->domain, idev->igroup->group); 365 if (rc) 366 goto err_unresv; 367 idev->igroup->hwpt = hwpt; 368 } 369 refcount_inc(&hwpt->obj.users); 370 list_add_tail(&idev->group_item, &idev->igroup->device_list); 371 mutex_unlock(&idev->igroup->lock); 372 return 0; 373 err_unresv: 374 iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); 375 err_unlock: 376 mutex_unlock(&idev->igroup->lock); 377 return rc; 378 } 379 380 struct iommufd_hw_pagetable * 381 iommufd_hw_pagetable_detach(struct iommufd_device *idev) 382 { 383 struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; 384 385 mutex_lock(&idev->igroup->lock); 386 list_del(&idev->group_item); 387 if (list_empty(&idev->igroup->device_list)) { 388 iommu_detach_group(hwpt->domain, idev->igroup->group); 389 idev->igroup->hwpt = NULL; 390 } 391 iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); 392 mutex_unlock(&idev->igroup->lock); 393 394 /* Caller must destroy hwpt */ 395 return hwpt; 396 } 397 398 static struct iommufd_hw_pagetable * 399 iommufd_device_do_attach(struct iommufd_device *idev, 400 struct iommufd_hw_pagetable *hwpt) 401 { 402 int rc; 403 404 rc = iommufd_hw_pagetable_attach(hwpt, idev); 405 if (rc) 406 return ERR_PTR(rc); 407 return NULL; 408 } 409 410 static struct iommufd_hw_pagetable * 411 iommufd_device_do_replace(struct iommufd_device *idev, 412 struct iommufd_hw_pagetable *hwpt) 413 { 414 struct iommufd_group *igroup = idev->igroup; 415 struct iommufd_hw_pagetable *old_hwpt; 416 unsigned int num_devices = 0; 417 struct iommufd_device *cur; 418 int rc; 419 420 mutex_lock(&idev->igroup->lock); 421 422 if (igroup->hwpt == NULL) { 423 rc = -EINVAL; 424 goto err_unlock; 425 } 426 427 if (hwpt == igroup->hwpt) { 428 mutex_unlock(&idev->igroup->lock); 429 return NULL; 430 } 431 432 /* Try to upgrade the domain we have */ 433 list_for_each_entry(cur, &igroup->device_list, group_item) { 434 num_devices++; 435 if (cur->enforce_cache_coherency) { 436 rc = iommufd_hw_pagetable_enforce_cc(hwpt); 437 if (rc) 438 goto err_unlock; 439 } 440 } 441 442 old_hwpt = igroup->hwpt; 443 if (hwpt->ioas != old_hwpt->ioas) { 444 list_for_each_entry(cur, &igroup->device_list, group_item) { 445 rc = iopt_table_enforce_dev_resv_regions( 446 &hwpt->ioas->iopt, cur->dev, NULL); 447 if (rc) 448 goto err_unresv; 449 } 450 } 451 452 rc = iommufd_group_setup_msi(idev->igroup, hwpt); 453 if (rc) 454 goto err_unresv; 455 456 rc = iommu_group_replace_domain(igroup->group, hwpt->domain); 457 if (rc) 458 goto err_unresv; 459 460 if (hwpt->ioas != old_hwpt->ioas) { 461 list_for_each_entry(cur, &igroup->device_list, group_item) 462 iopt_remove_reserved_iova(&old_hwpt->ioas->iopt, 463 cur->dev); 464 } 465 466 igroup->hwpt = hwpt; 467 468 /* 469 * Move the refcounts held by the device_list to the new hwpt. Retain a 470 * refcount for this thread as the caller will free it. 471 */ 472 refcount_add(num_devices, &hwpt->obj.users); 473 if (num_devices > 1) 474 WARN_ON(refcount_sub_and_test(num_devices - 1, 475 &old_hwpt->obj.users)); 476 mutex_unlock(&idev->igroup->lock); 477 478 /* Caller must destroy old_hwpt */ 479 return old_hwpt; 480 err_unresv: 481 list_for_each_entry(cur, &igroup->device_list, group_item) 482 iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); 483 err_unlock: 484 mutex_unlock(&idev->igroup->lock); 485 return ERR_PTR(rc); 486 } 487 488 typedef struct iommufd_hw_pagetable *(*attach_fn)( 489 struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); 490 491 /* 492 * When automatically managing the domains we search for a compatible domain in 493 * the iopt and if one is found use it, otherwise create a new domain. 494 * Automatic domain selection will never pick a manually created domain. 495 */ 496 static struct iommufd_hw_pagetable * 497 iommufd_device_auto_get_domain(struct iommufd_device *idev, 498 struct iommufd_ioas *ioas, u32 *pt_id, 499 attach_fn do_attach) 500 { 501 /* 502 * iommufd_hw_pagetable_attach() is called by 503 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as 504 * iommufd_device_do_attach(). So if we are in this mode then we prefer 505 * to use the immediate_attach path as it supports drivers that can't 506 * directly allocate a domain. 507 */ 508 bool immediate_attach = do_attach == iommufd_device_do_attach; 509 struct iommufd_hw_pagetable *destroy_hwpt; 510 struct iommufd_hw_pagetable *hwpt; 511 512 /* 513 * There is no differentiation when domains are allocated, so any domain 514 * that is willing to attach to the device is interchangeable with any 515 * other. 516 */ 517 mutex_lock(&ioas->mutex); 518 list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { 519 if (!hwpt->auto_domain) 520 continue; 521 522 if (!iommufd_lock_obj(&hwpt->obj)) 523 continue; 524 destroy_hwpt = (*do_attach)(idev, hwpt); 525 if (IS_ERR(destroy_hwpt)) { 526 iommufd_put_object(&hwpt->obj); 527 /* 528 * -EINVAL means the domain is incompatible with the 529 * device. Other error codes should propagate to 530 * userspace as failure. Success means the domain is 531 * attached. 532 */ 533 if (PTR_ERR(destroy_hwpt) == -EINVAL) 534 continue; 535 goto out_unlock; 536 } 537 *pt_id = hwpt->obj.id; 538 iommufd_put_object(&hwpt->obj); 539 goto out_unlock; 540 } 541 542 hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, 543 immediate_attach); 544 if (IS_ERR(hwpt)) { 545 destroy_hwpt = ERR_CAST(hwpt); 546 goto out_unlock; 547 } 548 549 if (!immediate_attach) { 550 destroy_hwpt = (*do_attach)(idev, hwpt); 551 if (IS_ERR(destroy_hwpt)) 552 goto out_abort; 553 } else { 554 destroy_hwpt = NULL; 555 } 556 557 hwpt->auto_domain = true; 558 *pt_id = hwpt->obj.id; 559 560 iommufd_object_finalize(idev->ictx, &hwpt->obj); 561 mutex_unlock(&ioas->mutex); 562 return destroy_hwpt; 563 564 out_abort: 565 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); 566 out_unlock: 567 mutex_unlock(&ioas->mutex); 568 return destroy_hwpt; 569 } 570 571 static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, 572 attach_fn do_attach) 573 { 574 struct iommufd_hw_pagetable *destroy_hwpt; 575 struct iommufd_object *pt_obj; 576 577 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); 578 if (IS_ERR(pt_obj)) 579 return PTR_ERR(pt_obj); 580 581 switch (pt_obj->type) { 582 case IOMMUFD_OBJ_HW_PAGETABLE: { 583 struct iommufd_hw_pagetable *hwpt = 584 container_of(pt_obj, struct iommufd_hw_pagetable, obj); 585 586 destroy_hwpt = (*do_attach)(idev, hwpt); 587 if (IS_ERR(destroy_hwpt)) 588 goto out_put_pt_obj; 589 break; 590 } 591 case IOMMUFD_OBJ_IOAS: { 592 struct iommufd_ioas *ioas = 593 container_of(pt_obj, struct iommufd_ioas, obj); 594 595 destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, 596 do_attach); 597 if (IS_ERR(destroy_hwpt)) 598 goto out_put_pt_obj; 599 break; 600 } 601 default: 602 destroy_hwpt = ERR_PTR(-EINVAL); 603 goto out_put_pt_obj; 604 } 605 iommufd_put_object(pt_obj); 606 607 /* This destruction has to be after we unlock everything */ 608 if (destroy_hwpt) 609 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); 610 return 0; 611 612 out_put_pt_obj: 613 iommufd_put_object(pt_obj); 614 return PTR_ERR(destroy_hwpt); 615 } 616 617 /** 618 * iommufd_device_attach - Connect a device to an iommu_domain 619 * @idev: device to attach 620 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE 621 * Output the IOMMUFD_OBJ_HW_PAGETABLE ID 622 * 623 * This connects the device to an iommu_domain, either automatically or manually 624 * selected. Once this completes the device could do DMA. 625 * 626 * The caller should return the resulting pt_id back to userspace. 627 * This function is undone by calling iommufd_device_detach(). 628 */ 629 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) 630 { 631 int rc; 632 633 rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); 634 if (rc) 635 return rc; 636 637 /* 638 * Pairs with iommufd_device_detach() - catches caller bugs attempting 639 * to destroy a device with an attachment. 640 */ 641 refcount_inc(&idev->obj.users); 642 return 0; 643 } 644 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); 645 646 /** 647 * iommufd_device_replace - Change the device's iommu_domain 648 * @idev: device to change 649 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE 650 * Output the IOMMUFD_OBJ_HW_PAGETABLE ID 651 * 652 * This is the same as:: 653 * 654 * iommufd_device_detach(); 655 * iommufd_device_attach(); 656 * 657 * If it fails then no change is made to the attachment. The iommu driver may 658 * implement this so there is no disruption in translation. This can only be 659 * called if iommufd_device_attach() has already succeeded. 660 */ 661 int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) 662 { 663 return iommufd_device_change_pt(idev, pt_id, 664 &iommufd_device_do_replace); 665 } 666 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); 667 668 /** 669 * iommufd_device_detach - Disconnect a device to an iommu_domain 670 * @idev: device to detach 671 * 672 * Undo iommufd_device_attach(). This disconnects the idev from the previously 673 * attached pt_id. The device returns back to a blocked DMA translation. 674 */ 675 void iommufd_device_detach(struct iommufd_device *idev) 676 { 677 struct iommufd_hw_pagetable *hwpt; 678 679 hwpt = iommufd_hw_pagetable_detach(idev); 680 iommufd_hw_pagetable_put(idev->ictx, hwpt); 681 refcount_dec(&idev->obj.users); 682 } 683 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); 684 685 /* 686 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at 687 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should 688 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. 689 */ 690 static int iommufd_access_change_ioas(struct iommufd_access *access, 691 struct iommufd_ioas *new_ioas) 692 { 693 u32 iopt_access_list_id = access->iopt_access_list_id; 694 struct iommufd_ioas *cur_ioas = access->ioas; 695 int rc; 696 697 lockdep_assert_held(&access->ioas_lock); 698 699 /* We are racing with a concurrent detach, bail */ 700 if (cur_ioas != access->ioas_unpin) 701 return -EBUSY; 702 703 if (cur_ioas == new_ioas) 704 return 0; 705 706 /* 707 * Set ioas to NULL to block any further iommufd_access_pin_pages(). 708 * iommufd_access_unpin_pages() can continue using access->ioas_unpin. 709 */ 710 access->ioas = NULL; 711 712 if (new_ioas) { 713 rc = iopt_add_access(&new_ioas->iopt, access); 714 if (rc) { 715 access->ioas = cur_ioas; 716 return rc; 717 } 718 refcount_inc(&new_ioas->obj.users); 719 } 720 721 if (cur_ioas) { 722 if (access->ops->unmap) { 723 mutex_unlock(&access->ioas_lock); 724 access->ops->unmap(access->data, 0, ULONG_MAX); 725 mutex_lock(&access->ioas_lock); 726 } 727 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); 728 refcount_dec(&cur_ioas->obj.users); 729 } 730 731 access->ioas = new_ioas; 732 access->ioas_unpin = new_ioas; 733 734 return 0; 735 } 736 737 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) 738 { 739 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); 740 int rc; 741 742 if (IS_ERR(ioas)) 743 return PTR_ERR(ioas); 744 rc = iommufd_access_change_ioas(access, ioas); 745 iommufd_put_object(&ioas->obj); 746 return rc; 747 } 748 749 void iommufd_access_destroy_object(struct iommufd_object *obj) 750 { 751 struct iommufd_access *access = 752 container_of(obj, struct iommufd_access, obj); 753 754 mutex_lock(&access->ioas_lock); 755 if (access->ioas) 756 WARN_ON(iommufd_access_change_ioas(access, NULL)); 757 mutex_unlock(&access->ioas_lock); 758 iommufd_ctx_put(access->ictx); 759 } 760 761 /** 762 * iommufd_access_create - Create an iommufd_access 763 * @ictx: iommufd file descriptor 764 * @ops: Driver's ops to associate with the access 765 * @data: Opaque data to pass into ops functions 766 * @id: Output ID number to return to userspace for this access 767 * 768 * An iommufd_access allows a driver to read/write to the IOAS without using 769 * DMA. The underlying CPU memory can be accessed using the 770 * iommufd_access_pin_pages() or iommufd_access_rw() functions. 771 * 772 * The provided ops are required to use iommufd_access_pin_pages(). 773 */ 774 struct iommufd_access * 775 iommufd_access_create(struct iommufd_ctx *ictx, 776 const struct iommufd_access_ops *ops, void *data, u32 *id) 777 { 778 struct iommufd_access *access; 779 780 /* 781 * There is no uAPI for the access object, but to keep things symmetric 782 * use the object infrastructure anyhow. 783 */ 784 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); 785 if (IS_ERR(access)) 786 return access; 787 788 access->data = data; 789 access->ops = ops; 790 791 if (ops->needs_pin_pages) 792 access->iova_alignment = PAGE_SIZE; 793 else 794 access->iova_alignment = 1; 795 796 /* The calling driver is a user until iommufd_access_destroy() */ 797 refcount_inc(&access->obj.users); 798 access->ictx = ictx; 799 iommufd_ctx_get(ictx); 800 iommufd_object_finalize(ictx, &access->obj); 801 *id = access->obj.id; 802 mutex_init(&access->ioas_lock); 803 return access; 804 } 805 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); 806 807 /** 808 * iommufd_access_destroy - Destroy an iommufd_access 809 * @access: The access to destroy 810 * 811 * The caller must stop using the access before destroying it. 812 */ 813 void iommufd_access_destroy(struct iommufd_access *access) 814 { 815 iommufd_object_destroy_user(access->ictx, &access->obj); 816 } 817 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); 818 819 void iommufd_access_detach(struct iommufd_access *access) 820 { 821 mutex_lock(&access->ioas_lock); 822 if (WARN_ON(!access->ioas)) { 823 mutex_unlock(&access->ioas_lock); 824 return; 825 } 826 WARN_ON(iommufd_access_change_ioas(access, NULL)); 827 mutex_unlock(&access->ioas_lock); 828 } 829 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); 830 831 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) 832 { 833 int rc; 834 835 mutex_lock(&access->ioas_lock); 836 if (WARN_ON(access->ioas)) { 837 mutex_unlock(&access->ioas_lock); 838 return -EINVAL; 839 } 840 841 rc = iommufd_access_change_ioas_id(access, ioas_id); 842 mutex_unlock(&access->ioas_lock); 843 return rc; 844 } 845 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); 846 847 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) 848 { 849 int rc; 850 851 mutex_lock(&access->ioas_lock); 852 if (!access->ioas) { 853 mutex_unlock(&access->ioas_lock); 854 return -ENOENT; 855 } 856 rc = iommufd_access_change_ioas_id(access, ioas_id); 857 mutex_unlock(&access->ioas_lock); 858 return rc; 859 } 860 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); 861 862 /** 863 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it 864 * @iopt: iopt to work on 865 * @iova: Starting iova in the iopt 866 * @length: Number of bytes 867 * 868 * After this function returns there should be no users attached to the pages 869 * linked to this iopt that intersect with iova,length. Anyone that has attached 870 * a user through iopt_access_pages() needs to detach it through 871 * iommufd_access_unpin_pages() before this function returns. 872 * 873 * iommufd_access_destroy() will wait for any outstanding unmap callback to 874 * complete. Once iommufd_access_destroy() no unmap ops are running or will 875 * run in the future. Due to this a driver must not create locking that prevents 876 * unmap to complete while iommufd_access_destroy() is running. 877 */ 878 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 879 unsigned long length) 880 { 881 struct iommufd_ioas *ioas = 882 container_of(iopt, struct iommufd_ioas, iopt); 883 struct iommufd_access *access; 884 unsigned long index; 885 886 xa_lock(&ioas->iopt.access_list); 887 xa_for_each(&ioas->iopt.access_list, index, access) { 888 if (!iommufd_lock_obj(&access->obj)) 889 continue; 890 xa_unlock(&ioas->iopt.access_list); 891 892 access->ops->unmap(access->data, iova, length); 893 894 iommufd_put_object(&access->obj); 895 xa_lock(&ioas->iopt.access_list); 896 } 897 xa_unlock(&ioas->iopt.access_list); 898 } 899 900 /** 901 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages 902 * @access: IOAS access to act on 903 * @iova: Starting IOVA 904 * @length: Number of bytes to access 905 * 906 * Return the struct page's. The caller must stop accessing them before calling 907 * this. The iova/length must exactly match the one provided to access_pages. 908 */ 909 void iommufd_access_unpin_pages(struct iommufd_access *access, 910 unsigned long iova, unsigned long length) 911 { 912 struct iopt_area_contig_iter iter; 913 struct io_pagetable *iopt; 914 unsigned long last_iova; 915 struct iopt_area *area; 916 917 if (WARN_ON(!length) || 918 WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) 919 return; 920 921 mutex_lock(&access->ioas_lock); 922 /* 923 * The driver must be doing something wrong if it calls this before an 924 * iommufd_access_attach() or after an iommufd_access_detach(). 925 */ 926 if (WARN_ON(!access->ioas_unpin)) { 927 mutex_unlock(&access->ioas_lock); 928 return; 929 } 930 iopt = &access->ioas_unpin->iopt; 931 932 down_read(&iopt->iova_rwsem); 933 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 934 iopt_area_remove_access( 935 area, iopt_area_iova_to_index(area, iter.cur_iova), 936 iopt_area_iova_to_index( 937 area, 938 min(last_iova, iopt_area_last_iova(area)))); 939 WARN_ON(!iopt_area_contig_done(&iter)); 940 up_read(&iopt->iova_rwsem); 941 mutex_unlock(&access->ioas_lock); 942 } 943 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); 944 945 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) 946 { 947 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) 948 return false; 949 950 if (!iopt_area_contig_done(iter) && 951 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % 952 PAGE_SIZE) != (PAGE_SIZE - 1)) 953 return false; 954 return true; 955 } 956 957 static bool check_area_prot(struct iopt_area *area, unsigned int flags) 958 { 959 if (flags & IOMMUFD_ACCESS_RW_WRITE) 960 return area->iommu_prot & IOMMU_WRITE; 961 return area->iommu_prot & IOMMU_READ; 962 } 963 964 /** 965 * iommufd_access_pin_pages() - Return a list of pages under the iova 966 * @access: IOAS access to act on 967 * @iova: Starting IOVA 968 * @length: Number of bytes to access 969 * @out_pages: Output page list 970 * @flags: IOPMMUFD_ACCESS_RW_* flags 971 * 972 * Reads @length bytes starting at iova and returns the struct page * pointers. 973 * These can be kmap'd by the caller for CPU access. 974 * 975 * The caller must perform iommufd_access_unpin_pages() when done to balance 976 * this. 977 * 978 * This API always requires a page aligned iova. This happens naturally if the 979 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However 980 * smaller alignments have corner cases where this API can fail on otherwise 981 * aligned iova. 982 */ 983 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, 984 unsigned long length, struct page **out_pages, 985 unsigned int flags) 986 { 987 struct iopt_area_contig_iter iter; 988 struct io_pagetable *iopt; 989 unsigned long last_iova; 990 struct iopt_area *area; 991 int rc; 992 993 /* Driver's ops don't support pin_pages */ 994 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 995 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) 996 return -EINVAL; 997 998 if (!length) 999 return -EINVAL; 1000 if (check_add_overflow(iova, length - 1, &last_iova)) 1001 return -EOVERFLOW; 1002 1003 mutex_lock(&access->ioas_lock); 1004 if (!access->ioas) { 1005 mutex_unlock(&access->ioas_lock); 1006 return -ENOENT; 1007 } 1008 iopt = &access->ioas->iopt; 1009 1010 down_read(&iopt->iova_rwsem); 1011 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1012 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1013 unsigned long last_index = iopt_area_iova_to_index(area, last); 1014 unsigned long index = 1015 iopt_area_iova_to_index(area, iter.cur_iova); 1016 1017 if (area->prevent_access || 1018 !iopt_area_contig_is_aligned(&iter)) { 1019 rc = -EINVAL; 1020 goto err_remove; 1021 } 1022 1023 if (!check_area_prot(area, flags)) { 1024 rc = -EPERM; 1025 goto err_remove; 1026 } 1027 1028 rc = iopt_area_add_access(area, index, last_index, out_pages, 1029 flags); 1030 if (rc) 1031 goto err_remove; 1032 out_pages += last_index - index + 1; 1033 } 1034 if (!iopt_area_contig_done(&iter)) { 1035 rc = -ENOENT; 1036 goto err_remove; 1037 } 1038 1039 up_read(&iopt->iova_rwsem); 1040 mutex_unlock(&access->ioas_lock); 1041 return 0; 1042 1043 err_remove: 1044 if (iova < iter.cur_iova) { 1045 last_iova = iter.cur_iova - 1; 1046 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 1047 iopt_area_remove_access( 1048 area, 1049 iopt_area_iova_to_index(area, iter.cur_iova), 1050 iopt_area_iova_to_index( 1051 area, min(last_iova, 1052 iopt_area_last_iova(area)))); 1053 } 1054 up_read(&iopt->iova_rwsem); 1055 mutex_unlock(&access->ioas_lock); 1056 return rc; 1057 } 1058 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); 1059 1060 /** 1061 * iommufd_access_rw - Read or write data under the iova 1062 * @access: IOAS access to act on 1063 * @iova: Starting IOVA 1064 * @data: Kernel buffer to copy to/from 1065 * @length: Number of bytes to access 1066 * @flags: IOMMUFD_ACCESS_RW_* flags 1067 * 1068 * Copy kernel to/from data into the range given by IOVA/length. If flags 1069 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized 1070 * by changing it into copy_to/from_user(). 1071 */ 1072 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, 1073 void *data, size_t length, unsigned int flags) 1074 { 1075 struct iopt_area_contig_iter iter; 1076 struct io_pagetable *iopt; 1077 struct iopt_area *area; 1078 unsigned long last_iova; 1079 int rc; 1080 1081 if (!length) 1082 return -EINVAL; 1083 if (check_add_overflow(iova, length - 1, &last_iova)) 1084 return -EOVERFLOW; 1085 1086 mutex_lock(&access->ioas_lock); 1087 if (!access->ioas) { 1088 mutex_unlock(&access->ioas_lock); 1089 return -ENOENT; 1090 } 1091 iopt = &access->ioas->iopt; 1092 1093 down_read(&iopt->iova_rwsem); 1094 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1095 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1096 unsigned long bytes = (last - iter.cur_iova) + 1; 1097 1098 if (area->prevent_access) { 1099 rc = -EINVAL; 1100 goto err_out; 1101 } 1102 1103 if (!check_area_prot(area, flags)) { 1104 rc = -EPERM; 1105 goto err_out; 1106 } 1107 1108 rc = iopt_pages_rw_access( 1109 area->pages, iopt_area_start_byte(area, iter.cur_iova), 1110 data, bytes, flags); 1111 if (rc) 1112 goto err_out; 1113 data += bytes; 1114 } 1115 if (!iopt_area_contig_done(&iter)) 1116 rc = -ENOENT; 1117 err_out: 1118 up_read(&iopt->iova_rwsem); 1119 mutex_unlock(&access->ioas_lock); 1120 return rc; 1121 } 1122 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); 1123 1124 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) 1125 { 1126 struct iommu_hw_info *cmd = ucmd->cmd; 1127 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); 1128 const struct iommu_ops *ops; 1129 struct iommufd_device *idev; 1130 unsigned int data_len; 1131 unsigned int copy_len; 1132 void *data; 1133 int rc; 1134 1135 if (cmd->flags || cmd->__reserved) 1136 return -EOPNOTSUPP; 1137 1138 idev = iommufd_get_device(ucmd, cmd->dev_id); 1139 if (IS_ERR(idev)) 1140 return PTR_ERR(idev); 1141 1142 ops = dev_iommu_ops(idev->dev); 1143 if (ops->hw_info) { 1144 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); 1145 if (IS_ERR(data)) { 1146 rc = PTR_ERR(data); 1147 goto out_put; 1148 } 1149 1150 /* 1151 * drivers that have hw_info callback should have a unique 1152 * iommu_hw_info_type. 1153 */ 1154 if (WARN_ON_ONCE(cmd->out_data_type == 1155 IOMMU_HW_INFO_TYPE_NONE)) { 1156 rc = -ENODEV; 1157 goto out_free; 1158 } 1159 } else { 1160 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; 1161 data_len = 0; 1162 data = NULL; 1163 } 1164 1165 copy_len = min(cmd->data_len, data_len); 1166 if (copy_to_user(user_ptr, data, copy_len)) { 1167 rc = -EFAULT; 1168 goto out_free; 1169 } 1170 1171 /* 1172 * Zero the trailing bytes if the user buffer is bigger than the 1173 * data size kernel actually has. 1174 */ 1175 if (copy_len < cmd->data_len) { 1176 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { 1177 rc = -EFAULT; 1178 goto out_free; 1179 } 1180 } 1181 1182 /* 1183 * We return the length the kernel supports so userspace may know what 1184 * the kernel capability is. It could be larger than the input buffer. 1185 */ 1186 cmd->data_len = data_len; 1187 1188 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 1189 out_free: 1190 kfree(data); 1191 out_put: 1192 iommufd_put_object(&idev->obj); 1193 return rc; 1194 } 1195