1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <linux/cdx/cdx_bus.h> 32 #include <trace/events/iommu.h> 33 #include <linux/sched/mm.h> 34 #include <linux/msi.h> 35 36 #include "dma-iommu.h" 37 38 #include "iommu-sva.h" 39 40 static struct kset *iommu_group_kset; 41 static DEFINE_IDA(iommu_group_ida); 42 43 static unsigned int iommu_def_domain_type __read_mostly; 44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 45 static u32 iommu_cmd_line __read_mostly; 46 47 struct iommu_group { 48 struct kobject kobj; 49 struct kobject *devices_kobj; 50 struct list_head devices; 51 struct xarray pasid_array; 52 struct mutex mutex; 53 void *iommu_data; 54 void (*iommu_data_release)(void *iommu_data); 55 char *name; 56 int id; 57 struct iommu_domain *default_domain; 58 struct iommu_domain *blocking_domain; 59 struct iommu_domain *domain; 60 struct list_head entry; 61 unsigned int owner_cnt; 62 void *owner; 63 }; 64 65 struct group_device { 66 struct list_head list; 67 struct device *dev; 68 char *name; 69 }; 70 71 /* Iterate over each struct group_device in a struct iommu_group */ 72 #define for_each_group_device(group, pos) \ 73 list_for_each_entry(pos, &(group)->devices, list) 74 75 struct iommu_group_attribute { 76 struct attribute attr; 77 ssize_t (*show)(struct iommu_group *group, char *buf); 78 ssize_t (*store)(struct iommu_group *group, 79 const char *buf, size_t count); 80 }; 81 82 static const char * const iommu_group_resv_type_string[] = { 83 [IOMMU_RESV_DIRECT] = "direct", 84 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 85 [IOMMU_RESV_RESERVED] = "reserved", 86 [IOMMU_RESV_MSI] = "msi", 87 [IOMMU_RESV_SW_MSI] = "msi", 88 }; 89 90 #define IOMMU_CMD_LINE_DMA_API BIT(0) 91 #define IOMMU_CMD_LINE_STRICT BIT(1) 92 93 static int iommu_bus_notifier(struct notifier_block *nb, 94 unsigned long action, void *data); 95 static void iommu_release_device(struct device *dev); 96 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 97 unsigned type); 98 static int __iommu_attach_device(struct iommu_domain *domain, 99 struct device *dev); 100 static int __iommu_attach_group(struct iommu_domain *domain, 101 struct iommu_group *group); 102 103 enum { 104 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 105 }; 106 107 static int __iommu_device_set_domain(struct iommu_group *group, 108 struct device *dev, 109 struct iommu_domain *new_domain, 110 unsigned int flags); 111 static int __iommu_group_set_domain_internal(struct iommu_group *group, 112 struct iommu_domain *new_domain, 113 unsigned int flags); 114 static int __iommu_group_set_domain(struct iommu_group *group, 115 struct iommu_domain *new_domain) 116 { 117 return __iommu_group_set_domain_internal(group, new_domain, 0); 118 } 119 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 120 struct iommu_domain *new_domain) 121 { 122 WARN_ON(__iommu_group_set_domain_internal( 123 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 124 } 125 126 static int iommu_setup_default_domain(struct iommu_group *group, 127 int target_type); 128 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 129 struct device *dev); 130 static ssize_t iommu_group_store_type(struct iommu_group *group, 131 const char *buf, size_t count); 132 133 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 134 struct iommu_group_attribute iommu_group_attr_##_name = \ 135 __ATTR(_name, _mode, _show, _store) 136 137 #define to_iommu_group_attr(_attr) \ 138 container_of(_attr, struct iommu_group_attribute, attr) 139 #define to_iommu_group(_kobj) \ 140 container_of(_kobj, struct iommu_group, kobj) 141 142 static LIST_HEAD(iommu_device_list); 143 static DEFINE_SPINLOCK(iommu_device_lock); 144 145 static struct bus_type * const iommu_buses[] = { 146 &platform_bus_type, 147 #ifdef CONFIG_PCI 148 &pci_bus_type, 149 #endif 150 #ifdef CONFIG_ARM_AMBA 151 &amba_bustype, 152 #endif 153 #ifdef CONFIG_FSL_MC_BUS 154 &fsl_mc_bus_type, 155 #endif 156 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 157 &host1x_context_device_bus_type, 158 #endif 159 #ifdef CONFIG_CDX_BUS 160 &cdx_bus_type, 161 #endif 162 }; 163 164 /* 165 * Use a function instead of an array here because the domain-type is a 166 * bit-field, so an array would waste memory. 167 */ 168 static const char *iommu_domain_type_str(unsigned int t) 169 { 170 switch (t) { 171 case IOMMU_DOMAIN_BLOCKED: 172 return "Blocked"; 173 case IOMMU_DOMAIN_IDENTITY: 174 return "Passthrough"; 175 case IOMMU_DOMAIN_UNMANAGED: 176 return "Unmanaged"; 177 case IOMMU_DOMAIN_DMA: 178 case IOMMU_DOMAIN_DMA_FQ: 179 return "Translated"; 180 default: 181 return "Unknown"; 182 } 183 } 184 185 static int __init iommu_subsys_init(void) 186 { 187 struct notifier_block *nb; 188 189 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 190 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 191 iommu_set_default_passthrough(false); 192 else 193 iommu_set_default_translated(false); 194 195 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 196 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 197 iommu_set_default_translated(false); 198 } 199 } 200 201 if (!iommu_default_passthrough() && !iommu_dma_strict) 202 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 203 204 pr_info("Default domain type: %s%s\n", 205 iommu_domain_type_str(iommu_def_domain_type), 206 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 207 " (set via kernel command line)" : ""); 208 209 if (!iommu_default_passthrough()) 210 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 211 iommu_dma_strict ? "strict" : "lazy", 212 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 213 " (set via kernel command line)" : ""); 214 215 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 216 if (!nb) 217 return -ENOMEM; 218 219 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 220 nb[i].notifier_call = iommu_bus_notifier; 221 bus_register_notifier(iommu_buses[i], &nb[i]); 222 } 223 224 return 0; 225 } 226 subsys_initcall(iommu_subsys_init); 227 228 static int remove_iommu_group(struct device *dev, void *data) 229 { 230 if (dev->iommu && dev->iommu->iommu_dev == data) 231 iommu_release_device(dev); 232 233 return 0; 234 } 235 236 /** 237 * iommu_device_register() - Register an IOMMU hardware instance 238 * @iommu: IOMMU handle for the instance 239 * @ops: IOMMU ops to associate with the instance 240 * @hwdev: (optional) actual instance device, used for fwnode lookup 241 * 242 * Return: 0 on success, or an error. 243 */ 244 int iommu_device_register(struct iommu_device *iommu, 245 const struct iommu_ops *ops, struct device *hwdev) 246 { 247 int err = 0; 248 249 /* We need to be able to take module references appropriately */ 250 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 251 return -EINVAL; 252 /* 253 * Temporarily enforce global restriction to a single driver. This was 254 * already the de-facto behaviour, since any possible combination of 255 * existing drivers would compete for at least the PCI or platform bus. 256 */ 257 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 258 return -EBUSY; 259 260 iommu->ops = ops; 261 if (hwdev) 262 iommu->fwnode = dev_fwnode(hwdev); 263 264 spin_lock(&iommu_device_lock); 265 list_add_tail(&iommu->list, &iommu_device_list); 266 spin_unlock(&iommu_device_lock); 267 268 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 269 iommu_buses[i]->iommu_ops = ops; 270 err = bus_iommu_probe(iommu_buses[i]); 271 } 272 if (err) 273 iommu_device_unregister(iommu); 274 return err; 275 } 276 EXPORT_SYMBOL_GPL(iommu_device_register); 277 278 void iommu_device_unregister(struct iommu_device *iommu) 279 { 280 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 281 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 282 283 spin_lock(&iommu_device_lock); 284 list_del(&iommu->list); 285 spin_unlock(&iommu_device_lock); 286 } 287 EXPORT_SYMBOL_GPL(iommu_device_unregister); 288 289 static struct dev_iommu *dev_iommu_get(struct device *dev) 290 { 291 struct dev_iommu *param = dev->iommu; 292 293 if (param) 294 return param; 295 296 param = kzalloc(sizeof(*param), GFP_KERNEL); 297 if (!param) 298 return NULL; 299 300 mutex_init(¶m->lock); 301 dev->iommu = param; 302 return param; 303 } 304 305 static void dev_iommu_free(struct device *dev) 306 { 307 struct dev_iommu *param = dev->iommu; 308 309 dev->iommu = NULL; 310 if (param->fwspec) { 311 fwnode_handle_put(param->fwspec->iommu_fwnode); 312 kfree(param->fwspec); 313 } 314 kfree(param); 315 } 316 317 static u32 dev_iommu_get_max_pasids(struct device *dev) 318 { 319 u32 max_pasids = 0, bits = 0; 320 int ret; 321 322 if (dev_is_pci(dev)) { 323 ret = pci_max_pasids(to_pci_dev(dev)); 324 if (ret > 0) 325 max_pasids = ret; 326 } else { 327 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 328 if (!ret) 329 max_pasids = 1UL << bits; 330 } 331 332 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 333 } 334 335 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 336 { 337 const struct iommu_ops *ops = dev->bus->iommu_ops; 338 struct iommu_device *iommu_dev; 339 struct iommu_group *group; 340 static DEFINE_MUTEX(iommu_probe_device_lock); 341 int ret; 342 343 if (!ops) 344 return -ENODEV; 345 /* 346 * Serialise to avoid races between IOMMU drivers registering in 347 * parallel and/or the "replay" calls from ACPI/OF code via client 348 * driver probe. Once the latter have been cleaned up we should 349 * probably be able to use device_lock() here to minimise the scope, 350 * but for now enforcing a simple global ordering is fine. 351 */ 352 mutex_lock(&iommu_probe_device_lock); 353 354 /* Device is probed already if in a group */ 355 if (dev->iommu_group) { 356 ret = 0; 357 goto out_unlock; 358 } 359 360 if (!dev_iommu_get(dev)) { 361 ret = -ENOMEM; 362 goto out_unlock; 363 } 364 365 if (!try_module_get(ops->owner)) { 366 ret = -EINVAL; 367 goto err_free; 368 } 369 370 iommu_dev = ops->probe_device(dev); 371 if (IS_ERR(iommu_dev)) { 372 ret = PTR_ERR(iommu_dev); 373 goto out_module_put; 374 } 375 376 dev->iommu->iommu_dev = iommu_dev; 377 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 378 if (ops->is_attach_deferred) 379 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 380 381 group = ops->device_group(dev); 382 if (WARN_ON_ONCE(group == NULL)) 383 group = ERR_PTR(-EINVAL); 384 if (IS_ERR(group)) { 385 ret = PTR_ERR(group); 386 goto out_release; 387 } 388 389 ret = iommu_group_add_device(group, dev); 390 if (ret) 391 goto err_put_group; 392 393 mutex_lock(&group->mutex); 394 if (group_list && !group->default_domain && list_empty(&group->entry)) 395 list_add_tail(&group->entry, group_list); 396 mutex_unlock(&group->mutex); 397 iommu_group_put(group); 398 399 mutex_unlock(&iommu_probe_device_lock); 400 iommu_device_link(iommu_dev, dev); 401 402 return 0; 403 404 err_put_group: 405 iommu_group_put(group); 406 out_release: 407 if (ops->release_device) 408 ops->release_device(dev); 409 410 out_module_put: 411 module_put(ops->owner); 412 413 err_free: 414 dev_iommu_free(dev); 415 416 out_unlock: 417 mutex_unlock(&iommu_probe_device_lock); 418 419 return ret; 420 } 421 422 int iommu_probe_device(struct device *dev) 423 { 424 const struct iommu_ops *ops; 425 struct iommu_group *group; 426 int ret; 427 428 ret = __iommu_probe_device(dev, NULL); 429 if (ret) 430 goto err_out; 431 432 group = iommu_group_get(dev); 433 if (!group) { 434 ret = -ENODEV; 435 goto err_release; 436 } 437 438 mutex_lock(&group->mutex); 439 440 if (group->default_domain) 441 iommu_create_device_direct_mappings(group->default_domain, dev); 442 443 if (group->domain) { 444 ret = __iommu_device_set_domain(group, dev, group->domain, 0); 445 if (ret) 446 goto err_unlock; 447 } else if (!group->default_domain) { 448 ret = iommu_setup_default_domain(group, 0); 449 if (ret) 450 goto err_unlock; 451 } 452 453 mutex_unlock(&group->mutex); 454 iommu_group_put(group); 455 456 ops = dev_iommu_ops(dev); 457 if (ops->probe_finalize) 458 ops->probe_finalize(dev); 459 460 return 0; 461 462 err_unlock: 463 mutex_unlock(&group->mutex); 464 iommu_group_put(group); 465 err_release: 466 iommu_release_device(dev); 467 468 err_out: 469 return ret; 470 471 } 472 473 static void __iommu_group_free_device(struct iommu_group *group, 474 struct group_device *grp_dev) 475 { 476 struct device *dev = grp_dev->dev; 477 478 sysfs_remove_link(group->devices_kobj, grp_dev->name); 479 sysfs_remove_link(&dev->kobj, "iommu_group"); 480 481 trace_remove_device_from_group(group->id, dev); 482 483 /* 484 * If the group has become empty then ownership must have been 485 * released, and the current domain must be set back to NULL or 486 * the default domain. 487 */ 488 if (list_empty(&group->devices)) 489 WARN_ON(group->owner_cnt || 490 group->domain != group->default_domain); 491 492 kfree(grp_dev->name); 493 kfree(grp_dev); 494 dev->iommu_group = NULL; 495 } 496 497 /* 498 * Remove the iommu_group from the struct device. The attached group must be put 499 * by the caller after releaseing the group->mutex. 500 */ 501 static void __iommu_group_remove_device(struct device *dev) 502 { 503 struct iommu_group *group = dev->iommu_group; 504 struct group_device *device; 505 506 lockdep_assert_held(&group->mutex); 507 for_each_group_device(group, device) { 508 if (device->dev != dev) 509 continue; 510 511 list_del(&device->list); 512 __iommu_group_free_device(group, device); 513 /* Caller must put iommu_group */ 514 return; 515 } 516 WARN(true, "Corrupted iommu_group device_list"); 517 } 518 519 static void iommu_release_device(struct device *dev) 520 { 521 struct iommu_group *group = dev->iommu_group; 522 const struct iommu_ops *ops; 523 524 if (!dev->iommu || !group) 525 return; 526 527 iommu_device_unlink(dev->iommu->iommu_dev, dev); 528 529 mutex_lock(&group->mutex); 530 __iommu_group_remove_device(dev); 531 532 /* 533 * release_device() must stop using any attached domain on the device. 534 * If there are still other devices in the group they are not effected 535 * by this callback. 536 * 537 * The IOMMU driver must set the device to either an identity or 538 * blocking translation and stop using any domain pointer, as it is 539 * going to be freed. 540 */ 541 ops = dev_iommu_ops(dev); 542 if (ops->release_device) 543 ops->release_device(dev); 544 mutex_unlock(&group->mutex); 545 546 /* Pairs with the get in iommu_group_add_device() */ 547 iommu_group_put(group); 548 549 module_put(ops->owner); 550 dev_iommu_free(dev); 551 } 552 553 static int __init iommu_set_def_domain_type(char *str) 554 { 555 bool pt; 556 int ret; 557 558 ret = kstrtobool(str, &pt); 559 if (ret) 560 return ret; 561 562 if (pt) 563 iommu_set_default_passthrough(true); 564 else 565 iommu_set_default_translated(true); 566 567 return 0; 568 } 569 early_param("iommu.passthrough", iommu_set_def_domain_type); 570 571 static int __init iommu_dma_setup(char *str) 572 { 573 int ret = kstrtobool(str, &iommu_dma_strict); 574 575 if (!ret) 576 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 577 return ret; 578 } 579 early_param("iommu.strict", iommu_dma_setup); 580 581 void iommu_set_dma_strict(void) 582 { 583 iommu_dma_strict = true; 584 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 585 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 586 } 587 588 static ssize_t iommu_group_attr_show(struct kobject *kobj, 589 struct attribute *__attr, char *buf) 590 { 591 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 592 struct iommu_group *group = to_iommu_group(kobj); 593 ssize_t ret = -EIO; 594 595 if (attr->show) 596 ret = attr->show(group, buf); 597 return ret; 598 } 599 600 static ssize_t iommu_group_attr_store(struct kobject *kobj, 601 struct attribute *__attr, 602 const char *buf, size_t count) 603 { 604 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 605 struct iommu_group *group = to_iommu_group(kobj); 606 ssize_t ret = -EIO; 607 608 if (attr->store) 609 ret = attr->store(group, buf, count); 610 return ret; 611 } 612 613 static const struct sysfs_ops iommu_group_sysfs_ops = { 614 .show = iommu_group_attr_show, 615 .store = iommu_group_attr_store, 616 }; 617 618 static int iommu_group_create_file(struct iommu_group *group, 619 struct iommu_group_attribute *attr) 620 { 621 return sysfs_create_file(&group->kobj, &attr->attr); 622 } 623 624 static void iommu_group_remove_file(struct iommu_group *group, 625 struct iommu_group_attribute *attr) 626 { 627 sysfs_remove_file(&group->kobj, &attr->attr); 628 } 629 630 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 631 { 632 return sysfs_emit(buf, "%s\n", group->name); 633 } 634 635 /** 636 * iommu_insert_resv_region - Insert a new region in the 637 * list of reserved regions. 638 * @new: new region to insert 639 * @regions: list of regions 640 * 641 * Elements are sorted by start address and overlapping segments 642 * of the same type are merged. 643 */ 644 static int iommu_insert_resv_region(struct iommu_resv_region *new, 645 struct list_head *regions) 646 { 647 struct iommu_resv_region *iter, *tmp, *nr, *top; 648 LIST_HEAD(stack); 649 650 nr = iommu_alloc_resv_region(new->start, new->length, 651 new->prot, new->type, GFP_KERNEL); 652 if (!nr) 653 return -ENOMEM; 654 655 /* First add the new element based on start address sorting */ 656 list_for_each_entry(iter, regions, list) { 657 if (nr->start < iter->start || 658 (nr->start == iter->start && nr->type <= iter->type)) 659 break; 660 } 661 list_add_tail(&nr->list, &iter->list); 662 663 /* Merge overlapping segments of type nr->type in @regions, if any */ 664 list_for_each_entry_safe(iter, tmp, regions, list) { 665 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 666 667 /* no merge needed on elements of different types than @new */ 668 if (iter->type != new->type) { 669 list_move_tail(&iter->list, &stack); 670 continue; 671 } 672 673 /* look for the last stack element of same type as @iter */ 674 list_for_each_entry_reverse(top, &stack, list) 675 if (top->type == iter->type) 676 goto check_overlap; 677 678 list_move_tail(&iter->list, &stack); 679 continue; 680 681 check_overlap: 682 top_end = top->start + top->length - 1; 683 684 if (iter->start > top_end + 1) { 685 list_move_tail(&iter->list, &stack); 686 } else { 687 top->length = max(top_end, iter_end) - top->start + 1; 688 list_del(&iter->list); 689 kfree(iter); 690 } 691 } 692 list_splice(&stack, regions); 693 return 0; 694 } 695 696 static int 697 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 698 struct list_head *group_resv_regions) 699 { 700 struct iommu_resv_region *entry; 701 int ret = 0; 702 703 list_for_each_entry(entry, dev_resv_regions, list) { 704 ret = iommu_insert_resv_region(entry, group_resv_regions); 705 if (ret) 706 break; 707 } 708 return ret; 709 } 710 711 int iommu_get_group_resv_regions(struct iommu_group *group, 712 struct list_head *head) 713 { 714 struct group_device *device; 715 int ret = 0; 716 717 mutex_lock(&group->mutex); 718 for_each_group_device(group, device) { 719 struct list_head dev_resv_regions; 720 721 /* 722 * Non-API groups still expose reserved_regions in sysfs, 723 * so filter out calls that get here that way. 724 */ 725 if (!device->dev->iommu) 726 break; 727 728 INIT_LIST_HEAD(&dev_resv_regions); 729 iommu_get_resv_regions(device->dev, &dev_resv_regions); 730 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 731 iommu_put_resv_regions(device->dev, &dev_resv_regions); 732 if (ret) 733 break; 734 } 735 mutex_unlock(&group->mutex); 736 return ret; 737 } 738 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 739 740 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 741 char *buf) 742 { 743 struct iommu_resv_region *region, *next; 744 struct list_head group_resv_regions; 745 int offset = 0; 746 747 INIT_LIST_HEAD(&group_resv_regions); 748 iommu_get_group_resv_regions(group, &group_resv_regions); 749 750 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 751 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 752 (long long)region->start, 753 (long long)(region->start + 754 region->length - 1), 755 iommu_group_resv_type_string[region->type]); 756 kfree(region); 757 } 758 759 return offset; 760 } 761 762 static ssize_t iommu_group_show_type(struct iommu_group *group, 763 char *buf) 764 { 765 char *type = "unknown"; 766 767 mutex_lock(&group->mutex); 768 if (group->default_domain) { 769 switch (group->default_domain->type) { 770 case IOMMU_DOMAIN_BLOCKED: 771 type = "blocked"; 772 break; 773 case IOMMU_DOMAIN_IDENTITY: 774 type = "identity"; 775 break; 776 case IOMMU_DOMAIN_UNMANAGED: 777 type = "unmanaged"; 778 break; 779 case IOMMU_DOMAIN_DMA: 780 type = "DMA"; 781 break; 782 case IOMMU_DOMAIN_DMA_FQ: 783 type = "DMA-FQ"; 784 break; 785 } 786 } 787 mutex_unlock(&group->mutex); 788 789 return sysfs_emit(buf, "%s\n", type); 790 } 791 792 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 793 794 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 795 iommu_group_show_resv_regions, NULL); 796 797 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 798 iommu_group_store_type); 799 800 static void iommu_group_release(struct kobject *kobj) 801 { 802 struct iommu_group *group = to_iommu_group(kobj); 803 804 pr_debug("Releasing group %d\n", group->id); 805 806 if (group->iommu_data_release) 807 group->iommu_data_release(group->iommu_data); 808 809 ida_free(&iommu_group_ida, group->id); 810 811 if (group->default_domain) 812 iommu_domain_free(group->default_domain); 813 if (group->blocking_domain) 814 iommu_domain_free(group->blocking_domain); 815 816 kfree(group->name); 817 kfree(group); 818 } 819 820 static const struct kobj_type iommu_group_ktype = { 821 .sysfs_ops = &iommu_group_sysfs_ops, 822 .release = iommu_group_release, 823 }; 824 825 /** 826 * iommu_group_alloc - Allocate a new group 827 * 828 * This function is called by an iommu driver to allocate a new iommu 829 * group. The iommu group represents the minimum granularity of the iommu. 830 * Upon successful return, the caller holds a reference to the supplied 831 * group in order to hold the group until devices are added. Use 832 * iommu_group_put() to release this extra reference count, allowing the 833 * group to be automatically reclaimed once it has no devices or external 834 * references. 835 */ 836 struct iommu_group *iommu_group_alloc(void) 837 { 838 struct iommu_group *group; 839 int ret; 840 841 group = kzalloc(sizeof(*group), GFP_KERNEL); 842 if (!group) 843 return ERR_PTR(-ENOMEM); 844 845 group->kobj.kset = iommu_group_kset; 846 mutex_init(&group->mutex); 847 INIT_LIST_HEAD(&group->devices); 848 INIT_LIST_HEAD(&group->entry); 849 xa_init(&group->pasid_array); 850 851 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 852 if (ret < 0) { 853 kfree(group); 854 return ERR_PTR(ret); 855 } 856 group->id = ret; 857 858 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 859 NULL, "%d", group->id); 860 if (ret) { 861 kobject_put(&group->kobj); 862 return ERR_PTR(ret); 863 } 864 865 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 866 if (!group->devices_kobj) { 867 kobject_put(&group->kobj); /* triggers .release & free */ 868 return ERR_PTR(-ENOMEM); 869 } 870 871 /* 872 * The devices_kobj holds a reference on the group kobject, so 873 * as long as that exists so will the group. We can therefore 874 * use the devices_kobj for reference counting. 875 */ 876 kobject_put(&group->kobj); 877 878 ret = iommu_group_create_file(group, 879 &iommu_group_attr_reserved_regions); 880 if (ret) { 881 kobject_put(group->devices_kobj); 882 return ERR_PTR(ret); 883 } 884 885 ret = iommu_group_create_file(group, &iommu_group_attr_type); 886 if (ret) { 887 kobject_put(group->devices_kobj); 888 return ERR_PTR(ret); 889 } 890 891 pr_debug("Allocated group %d\n", group->id); 892 893 return group; 894 } 895 EXPORT_SYMBOL_GPL(iommu_group_alloc); 896 897 /** 898 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 899 * @group: the group 900 * 901 * iommu drivers can store data in the group for use when doing iommu 902 * operations. This function provides a way to retrieve it. Caller 903 * should hold a group reference. 904 */ 905 void *iommu_group_get_iommudata(struct iommu_group *group) 906 { 907 return group->iommu_data; 908 } 909 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 910 911 /** 912 * iommu_group_set_iommudata - set iommu_data for a group 913 * @group: the group 914 * @iommu_data: new data 915 * @release: release function for iommu_data 916 * 917 * iommu drivers can store data in the group for use when doing iommu 918 * operations. This function provides a way to set the data after 919 * the group has been allocated. Caller should hold a group reference. 920 */ 921 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 922 void (*release)(void *iommu_data)) 923 { 924 group->iommu_data = iommu_data; 925 group->iommu_data_release = release; 926 } 927 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 928 929 /** 930 * iommu_group_set_name - set name for a group 931 * @group: the group 932 * @name: name 933 * 934 * Allow iommu driver to set a name for a group. When set it will 935 * appear in a name attribute file under the group in sysfs. 936 */ 937 int iommu_group_set_name(struct iommu_group *group, const char *name) 938 { 939 int ret; 940 941 if (group->name) { 942 iommu_group_remove_file(group, &iommu_group_attr_name); 943 kfree(group->name); 944 group->name = NULL; 945 if (!name) 946 return 0; 947 } 948 949 group->name = kstrdup(name, GFP_KERNEL); 950 if (!group->name) 951 return -ENOMEM; 952 953 ret = iommu_group_create_file(group, &iommu_group_attr_name); 954 if (ret) { 955 kfree(group->name); 956 group->name = NULL; 957 return ret; 958 } 959 960 return 0; 961 } 962 EXPORT_SYMBOL_GPL(iommu_group_set_name); 963 964 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 965 struct device *dev) 966 { 967 struct iommu_resv_region *entry; 968 struct list_head mappings; 969 unsigned long pg_size; 970 int ret = 0; 971 972 if (!iommu_is_dma_domain(domain)) 973 return 0; 974 975 BUG_ON(!domain->pgsize_bitmap); 976 977 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 978 INIT_LIST_HEAD(&mappings); 979 980 iommu_get_resv_regions(dev, &mappings); 981 982 /* We need to consider overlapping regions for different devices */ 983 list_for_each_entry(entry, &mappings, list) { 984 dma_addr_t start, end, addr; 985 size_t map_size = 0; 986 987 start = ALIGN(entry->start, pg_size); 988 end = ALIGN(entry->start + entry->length, pg_size); 989 990 if (entry->type != IOMMU_RESV_DIRECT && 991 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 992 continue; 993 994 for (addr = start; addr <= end; addr += pg_size) { 995 phys_addr_t phys_addr; 996 997 if (addr == end) 998 goto map_end; 999 1000 phys_addr = iommu_iova_to_phys(domain, addr); 1001 if (!phys_addr) { 1002 map_size += pg_size; 1003 continue; 1004 } 1005 1006 map_end: 1007 if (map_size) { 1008 ret = iommu_map(domain, addr - map_size, 1009 addr - map_size, map_size, 1010 entry->prot, GFP_KERNEL); 1011 if (ret) 1012 goto out; 1013 map_size = 0; 1014 } 1015 } 1016 1017 } 1018 1019 iommu_flush_iotlb_all(domain); 1020 1021 out: 1022 iommu_put_resv_regions(dev, &mappings); 1023 1024 return ret; 1025 } 1026 1027 /** 1028 * iommu_group_add_device - add a device to an iommu group 1029 * @group: the group into which to add the device (reference should be held) 1030 * @dev: the device 1031 * 1032 * This function is called by an iommu driver to add a device into a 1033 * group. Adding a device increments the group reference count. 1034 */ 1035 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1036 { 1037 int ret, i = 0; 1038 struct group_device *device; 1039 1040 device = kzalloc(sizeof(*device), GFP_KERNEL); 1041 if (!device) 1042 return -ENOMEM; 1043 1044 device->dev = dev; 1045 1046 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1047 if (ret) 1048 goto err_free_device; 1049 1050 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1051 rename: 1052 if (!device->name) { 1053 ret = -ENOMEM; 1054 goto err_remove_link; 1055 } 1056 1057 ret = sysfs_create_link_nowarn(group->devices_kobj, 1058 &dev->kobj, device->name); 1059 if (ret) { 1060 if (ret == -EEXIST && i >= 0) { 1061 /* 1062 * Account for the slim chance of collision 1063 * and append an instance to the name. 1064 */ 1065 kfree(device->name); 1066 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1067 kobject_name(&dev->kobj), i++); 1068 goto rename; 1069 } 1070 goto err_free_name; 1071 } 1072 1073 iommu_group_ref_get(group); 1074 dev->iommu_group = group; 1075 1076 mutex_lock(&group->mutex); 1077 list_add_tail(&device->list, &group->devices); 1078 mutex_unlock(&group->mutex); 1079 trace_add_device_to_group(group->id, dev); 1080 1081 dev_info(dev, "Adding to iommu group %d\n", group->id); 1082 1083 return 0; 1084 1085 err_free_name: 1086 kfree(device->name); 1087 err_remove_link: 1088 sysfs_remove_link(&dev->kobj, "iommu_group"); 1089 err_free_device: 1090 kfree(device); 1091 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1092 return ret; 1093 } 1094 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1095 1096 /** 1097 * iommu_group_remove_device - remove a device from it's current group 1098 * @dev: device to be removed 1099 * 1100 * This function is called by an iommu driver to remove the device from 1101 * it's current group. This decrements the iommu group reference count. 1102 */ 1103 void iommu_group_remove_device(struct device *dev) 1104 { 1105 struct iommu_group *group = dev->iommu_group; 1106 1107 if (!group) 1108 return; 1109 1110 dev_info(dev, "Removing from iommu group %d\n", group->id); 1111 1112 mutex_lock(&group->mutex); 1113 __iommu_group_remove_device(dev); 1114 mutex_unlock(&group->mutex); 1115 1116 /* Pairs with the get in iommu_group_add_device() */ 1117 iommu_group_put(group); 1118 } 1119 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1120 1121 /** 1122 * iommu_group_for_each_dev - iterate over each device in the group 1123 * @group: the group 1124 * @data: caller opaque data to be passed to callback function 1125 * @fn: caller supplied callback function 1126 * 1127 * This function is called by group users to iterate over group devices. 1128 * Callers should hold a reference count to the group during callback. 1129 * The group->mutex is held across callbacks, which will block calls to 1130 * iommu_group_add/remove_device. 1131 */ 1132 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1133 int (*fn)(struct device *, void *)) 1134 { 1135 struct group_device *device; 1136 int ret = 0; 1137 1138 mutex_lock(&group->mutex); 1139 for_each_group_device(group, device) { 1140 ret = fn(device->dev, data); 1141 if (ret) 1142 break; 1143 } 1144 mutex_unlock(&group->mutex); 1145 1146 return ret; 1147 } 1148 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1149 1150 /** 1151 * iommu_group_get - Return the group for a device and increment reference 1152 * @dev: get the group that this device belongs to 1153 * 1154 * This function is called by iommu drivers and users to get the group 1155 * for the specified device. If found, the group is returned and the group 1156 * reference in incremented, else NULL. 1157 */ 1158 struct iommu_group *iommu_group_get(struct device *dev) 1159 { 1160 struct iommu_group *group = dev->iommu_group; 1161 1162 if (group) 1163 kobject_get(group->devices_kobj); 1164 1165 return group; 1166 } 1167 EXPORT_SYMBOL_GPL(iommu_group_get); 1168 1169 /** 1170 * iommu_group_ref_get - Increment reference on a group 1171 * @group: the group to use, must not be NULL 1172 * 1173 * This function is called by iommu drivers to take additional references on an 1174 * existing group. Returns the given group for convenience. 1175 */ 1176 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1177 { 1178 kobject_get(group->devices_kobj); 1179 return group; 1180 } 1181 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1182 1183 /** 1184 * iommu_group_put - Decrement group reference 1185 * @group: the group to use 1186 * 1187 * This function is called by iommu drivers and users to release the 1188 * iommu group. Once the reference count is zero, the group is released. 1189 */ 1190 void iommu_group_put(struct iommu_group *group) 1191 { 1192 if (group) 1193 kobject_put(group->devices_kobj); 1194 } 1195 EXPORT_SYMBOL_GPL(iommu_group_put); 1196 1197 /** 1198 * iommu_register_device_fault_handler() - Register a device fault handler 1199 * @dev: the device 1200 * @handler: the fault handler 1201 * @data: private data passed as argument to the handler 1202 * 1203 * When an IOMMU fault event is received, this handler gets called with the 1204 * fault event and data as argument. The handler should return 0 on success. If 1205 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1206 * complete the fault by calling iommu_page_response() with one of the following 1207 * response code: 1208 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1209 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1210 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1211 * page faults if possible. 1212 * 1213 * Return 0 if the fault handler was installed successfully, or an error. 1214 */ 1215 int iommu_register_device_fault_handler(struct device *dev, 1216 iommu_dev_fault_handler_t handler, 1217 void *data) 1218 { 1219 struct dev_iommu *param = dev->iommu; 1220 int ret = 0; 1221 1222 if (!param) 1223 return -EINVAL; 1224 1225 mutex_lock(¶m->lock); 1226 /* Only allow one fault handler registered for each device */ 1227 if (param->fault_param) { 1228 ret = -EBUSY; 1229 goto done_unlock; 1230 } 1231 1232 get_device(dev); 1233 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1234 if (!param->fault_param) { 1235 put_device(dev); 1236 ret = -ENOMEM; 1237 goto done_unlock; 1238 } 1239 param->fault_param->handler = handler; 1240 param->fault_param->data = data; 1241 mutex_init(¶m->fault_param->lock); 1242 INIT_LIST_HEAD(¶m->fault_param->faults); 1243 1244 done_unlock: 1245 mutex_unlock(¶m->lock); 1246 1247 return ret; 1248 } 1249 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1250 1251 /** 1252 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1253 * @dev: the device 1254 * 1255 * Remove the device fault handler installed with 1256 * iommu_register_device_fault_handler(). 1257 * 1258 * Return 0 on success, or an error. 1259 */ 1260 int iommu_unregister_device_fault_handler(struct device *dev) 1261 { 1262 struct dev_iommu *param = dev->iommu; 1263 int ret = 0; 1264 1265 if (!param) 1266 return -EINVAL; 1267 1268 mutex_lock(¶m->lock); 1269 1270 if (!param->fault_param) 1271 goto unlock; 1272 1273 /* we cannot unregister handler if there are pending faults */ 1274 if (!list_empty(¶m->fault_param->faults)) { 1275 ret = -EBUSY; 1276 goto unlock; 1277 } 1278 1279 kfree(param->fault_param); 1280 param->fault_param = NULL; 1281 put_device(dev); 1282 unlock: 1283 mutex_unlock(¶m->lock); 1284 1285 return ret; 1286 } 1287 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1288 1289 /** 1290 * iommu_report_device_fault() - Report fault event to device driver 1291 * @dev: the device 1292 * @evt: fault event data 1293 * 1294 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1295 * handler. When this function fails and the fault is recoverable, it is the 1296 * caller's responsibility to complete the fault. 1297 * 1298 * Return 0 on success, or an error. 1299 */ 1300 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1301 { 1302 struct dev_iommu *param = dev->iommu; 1303 struct iommu_fault_event *evt_pending = NULL; 1304 struct iommu_fault_param *fparam; 1305 int ret = 0; 1306 1307 if (!param || !evt) 1308 return -EINVAL; 1309 1310 /* we only report device fault if there is a handler registered */ 1311 mutex_lock(¶m->lock); 1312 fparam = param->fault_param; 1313 if (!fparam || !fparam->handler) { 1314 ret = -EINVAL; 1315 goto done_unlock; 1316 } 1317 1318 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1319 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1320 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1321 GFP_KERNEL); 1322 if (!evt_pending) { 1323 ret = -ENOMEM; 1324 goto done_unlock; 1325 } 1326 mutex_lock(&fparam->lock); 1327 list_add_tail(&evt_pending->list, &fparam->faults); 1328 mutex_unlock(&fparam->lock); 1329 } 1330 1331 ret = fparam->handler(&evt->fault, fparam->data); 1332 if (ret && evt_pending) { 1333 mutex_lock(&fparam->lock); 1334 list_del(&evt_pending->list); 1335 mutex_unlock(&fparam->lock); 1336 kfree(evt_pending); 1337 } 1338 done_unlock: 1339 mutex_unlock(¶m->lock); 1340 return ret; 1341 } 1342 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1343 1344 int iommu_page_response(struct device *dev, 1345 struct iommu_page_response *msg) 1346 { 1347 bool needs_pasid; 1348 int ret = -EINVAL; 1349 struct iommu_fault_event *evt; 1350 struct iommu_fault_page_request *prm; 1351 struct dev_iommu *param = dev->iommu; 1352 const struct iommu_ops *ops = dev_iommu_ops(dev); 1353 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1354 1355 if (!ops->page_response) 1356 return -ENODEV; 1357 1358 if (!param || !param->fault_param) 1359 return -EINVAL; 1360 1361 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1362 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1363 return -EINVAL; 1364 1365 /* Only send response if there is a fault report pending */ 1366 mutex_lock(¶m->fault_param->lock); 1367 if (list_empty(¶m->fault_param->faults)) { 1368 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1369 goto done_unlock; 1370 } 1371 /* 1372 * Check if we have a matching page request pending to respond, 1373 * otherwise return -EINVAL 1374 */ 1375 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1376 prm = &evt->fault.prm; 1377 if (prm->grpid != msg->grpid) 1378 continue; 1379 1380 /* 1381 * If the PASID is required, the corresponding request is 1382 * matched using the group ID, the PASID valid bit and the PASID 1383 * value. Otherwise only the group ID matches request and 1384 * response. 1385 */ 1386 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1387 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1388 continue; 1389 1390 if (!needs_pasid && has_pasid) { 1391 /* No big deal, just clear it. */ 1392 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1393 msg->pasid = 0; 1394 } 1395 1396 ret = ops->page_response(dev, evt, msg); 1397 list_del(&evt->list); 1398 kfree(evt); 1399 break; 1400 } 1401 1402 done_unlock: 1403 mutex_unlock(¶m->fault_param->lock); 1404 return ret; 1405 } 1406 EXPORT_SYMBOL_GPL(iommu_page_response); 1407 1408 /** 1409 * iommu_group_id - Return ID for a group 1410 * @group: the group to ID 1411 * 1412 * Return the unique ID for the group matching the sysfs group number. 1413 */ 1414 int iommu_group_id(struct iommu_group *group) 1415 { 1416 return group->id; 1417 } 1418 EXPORT_SYMBOL_GPL(iommu_group_id); 1419 1420 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1421 unsigned long *devfns); 1422 1423 /* 1424 * To consider a PCI device isolated, we require ACS to support Source 1425 * Validation, Request Redirection, Completer Redirection, and Upstream 1426 * Forwarding. This effectively means that devices cannot spoof their 1427 * requester ID, requests and completions cannot be redirected, and all 1428 * transactions are forwarded upstream, even as it passes through a 1429 * bridge where the target device is downstream. 1430 */ 1431 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1432 1433 /* 1434 * For multifunction devices which are not isolated from each other, find 1435 * all the other non-isolated functions and look for existing groups. For 1436 * each function, we also need to look for aliases to or from other devices 1437 * that may already have a group. 1438 */ 1439 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1440 unsigned long *devfns) 1441 { 1442 struct pci_dev *tmp = NULL; 1443 struct iommu_group *group; 1444 1445 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1446 return NULL; 1447 1448 for_each_pci_dev(tmp) { 1449 if (tmp == pdev || tmp->bus != pdev->bus || 1450 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1451 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1452 continue; 1453 1454 group = get_pci_alias_group(tmp, devfns); 1455 if (group) { 1456 pci_dev_put(tmp); 1457 return group; 1458 } 1459 } 1460 1461 return NULL; 1462 } 1463 1464 /* 1465 * Look for aliases to or from the given device for existing groups. DMA 1466 * aliases are only supported on the same bus, therefore the search 1467 * space is quite small (especially since we're really only looking at pcie 1468 * device, and therefore only expect multiple slots on the root complex or 1469 * downstream switch ports). It's conceivable though that a pair of 1470 * multifunction devices could have aliases between them that would cause a 1471 * loop. To prevent this, we use a bitmap to track where we've been. 1472 */ 1473 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1474 unsigned long *devfns) 1475 { 1476 struct pci_dev *tmp = NULL; 1477 struct iommu_group *group; 1478 1479 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1480 return NULL; 1481 1482 group = iommu_group_get(&pdev->dev); 1483 if (group) 1484 return group; 1485 1486 for_each_pci_dev(tmp) { 1487 if (tmp == pdev || tmp->bus != pdev->bus) 1488 continue; 1489 1490 /* We alias them or they alias us */ 1491 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1492 group = get_pci_alias_group(tmp, devfns); 1493 if (group) { 1494 pci_dev_put(tmp); 1495 return group; 1496 } 1497 1498 group = get_pci_function_alias_group(tmp, devfns); 1499 if (group) { 1500 pci_dev_put(tmp); 1501 return group; 1502 } 1503 } 1504 } 1505 1506 return NULL; 1507 } 1508 1509 struct group_for_pci_data { 1510 struct pci_dev *pdev; 1511 struct iommu_group *group; 1512 }; 1513 1514 /* 1515 * DMA alias iterator callback, return the last seen device. Stop and return 1516 * the IOMMU group if we find one along the way. 1517 */ 1518 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1519 { 1520 struct group_for_pci_data *data = opaque; 1521 1522 data->pdev = pdev; 1523 data->group = iommu_group_get(&pdev->dev); 1524 1525 return data->group != NULL; 1526 } 1527 1528 /* 1529 * Generic device_group call-back function. It just allocates one 1530 * iommu-group per device. 1531 */ 1532 struct iommu_group *generic_device_group(struct device *dev) 1533 { 1534 return iommu_group_alloc(); 1535 } 1536 EXPORT_SYMBOL_GPL(generic_device_group); 1537 1538 /* 1539 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1540 * to find or create an IOMMU group for a device. 1541 */ 1542 struct iommu_group *pci_device_group(struct device *dev) 1543 { 1544 struct pci_dev *pdev = to_pci_dev(dev); 1545 struct group_for_pci_data data; 1546 struct pci_bus *bus; 1547 struct iommu_group *group = NULL; 1548 u64 devfns[4] = { 0 }; 1549 1550 if (WARN_ON(!dev_is_pci(dev))) 1551 return ERR_PTR(-EINVAL); 1552 1553 /* 1554 * Find the upstream DMA alias for the device. A device must not 1555 * be aliased due to topology in order to have its own IOMMU group. 1556 * If we find an alias along the way that already belongs to a 1557 * group, use it. 1558 */ 1559 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1560 return data.group; 1561 1562 pdev = data.pdev; 1563 1564 /* 1565 * Continue upstream from the point of minimum IOMMU granularity 1566 * due to aliases to the point where devices are protected from 1567 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1568 * group, use it. 1569 */ 1570 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1571 if (!bus->self) 1572 continue; 1573 1574 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1575 break; 1576 1577 pdev = bus->self; 1578 1579 group = iommu_group_get(&pdev->dev); 1580 if (group) 1581 return group; 1582 } 1583 1584 /* 1585 * Look for existing groups on device aliases. If we alias another 1586 * device or another device aliases us, use the same group. 1587 */ 1588 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1589 if (group) 1590 return group; 1591 1592 /* 1593 * Look for existing groups on non-isolated functions on the same 1594 * slot and aliases of those funcions, if any. No need to clear 1595 * the search bitmap, the tested devfns are still valid. 1596 */ 1597 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1598 if (group) 1599 return group; 1600 1601 /* No shared group found, allocate new */ 1602 return iommu_group_alloc(); 1603 } 1604 EXPORT_SYMBOL_GPL(pci_device_group); 1605 1606 /* Get the IOMMU group for device on fsl-mc bus */ 1607 struct iommu_group *fsl_mc_device_group(struct device *dev) 1608 { 1609 struct device *cont_dev = fsl_mc_cont_dev(dev); 1610 struct iommu_group *group; 1611 1612 group = iommu_group_get(cont_dev); 1613 if (!group) 1614 group = iommu_group_alloc(); 1615 return group; 1616 } 1617 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1618 1619 static int iommu_get_def_domain_type(struct device *dev) 1620 { 1621 const struct iommu_ops *ops = dev_iommu_ops(dev); 1622 1623 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1624 return IOMMU_DOMAIN_DMA; 1625 1626 if (ops->def_domain_type) 1627 return ops->def_domain_type(dev); 1628 1629 return 0; 1630 } 1631 1632 static struct iommu_domain * 1633 __iommu_group_alloc_default_domain(const struct bus_type *bus, 1634 struct iommu_group *group, int req_type) 1635 { 1636 if (group->default_domain && group->default_domain->type == req_type) 1637 return group->default_domain; 1638 return __iommu_domain_alloc(bus, req_type); 1639 } 1640 1641 /* 1642 * req_type of 0 means "auto" which means to select a domain based on 1643 * iommu_def_domain_type or what the driver actually supports. 1644 */ 1645 static struct iommu_domain * 1646 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1647 { 1648 const struct bus_type *bus = 1649 list_first_entry(&group->devices, struct group_device, list) 1650 ->dev->bus; 1651 struct iommu_domain *dom; 1652 1653 lockdep_assert_held(&group->mutex); 1654 1655 if (req_type) 1656 return __iommu_group_alloc_default_domain(bus, group, req_type); 1657 1658 /* The driver gave no guidance on what type to use, try the default */ 1659 dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type); 1660 if (dom) 1661 return dom; 1662 1663 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1664 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1665 return NULL; 1666 dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA); 1667 if (!dom) 1668 return NULL; 1669 1670 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1671 iommu_def_domain_type, group->name); 1672 return dom; 1673 } 1674 1675 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1676 { 1677 return group->default_domain; 1678 } 1679 1680 static int probe_iommu_group(struct device *dev, void *data) 1681 { 1682 struct list_head *group_list = data; 1683 int ret; 1684 1685 ret = __iommu_probe_device(dev, group_list); 1686 if (ret == -ENODEV) 1687 ret = 0; 1688 1689 return ret; 1690 } 1691 1692 static int iommu_bus_notifier(struct notifier_block *nb, 1693 unsigned long action, void *data) 1694 { 1695 struct device *dev = data; 1696 1697 if (action == BUS_NOTIFY_ADD_DEVICE) { 1698 int ret; 1699 1700 ret = iommu_probe_device(dev); 1701 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1702 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1703 iommu_release_device(dev); 1704 return NOTIFY_OK; 1705 } 1706 1707 return 0; 1708 } 1709 1710 /* A target_type of 0 will select the best domain type and cannot fail */ 1711 static int iommu_get_default_domain_type(struct iommu_group *group, 1712 int target_type) 1713 { 1714 int best_type = target_type; 1715 struct group_device *gdev; 1716 struct device *last_dev; 1717 1718 lockdep_assert_held(&group->mutex); 1719 1720 for_each_group_device(group, gdev) { 1721 unsigned int type = iommu_get_def_domain_type(gdev->dev); 1722 1723 if (best_type && type && best_type != type) { 1724 if (target_type) { 1725 dev_err_ratelimited( 1726 gdev->dev, 1727 "Device cannot be in %s domain\n", 1728 iommu_domain_type_str(target_type)); 1729 return -1; 1730 } 1731 1732 dev_warn( 1733 gdev->dev, 1734 "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1735 iommu_domain_type_str(type), dev_name(last_dev), 1736 iommu_domain_type_str(best_type)); 1737 return 0; 1738 } 1739 if (!best_type) 1740 best_type = type; 1741 last_dev = gdev->dev; 1742 } 1743 return best_type; 1744 } 1745 1746 static void iommu_group_do_probe_finalize(struct device *dev) 1747 { 1748 const struct iommu_ops *ops = dev_iommu_ops(dev); 1749 1750 if (ops->probe_finalize) 1751 ops->probe_finalize(dev); 1752 } 1753 1754 int bus_iommu_probe(const struct bus_type *bus) 1755 { 1756 struct iommu_group *group, *next; 1757 LIST_HEAD(group_list); 1758 int ret; 1759 1760 /* 1761 * This code-path does not allocate the default domain when 1762 * creating the iommu group, so do it after the groups are 1763 * created. 1764 */ 1765 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1766 if (ret) 1767 return ret; 1768 1769 list_for_each_entry_safe(group, next, &group_list, entry) { 1770 struct group_device *gdev; 1771 1772 mutex_lock(&group->mutex); 1773 1774 /* Remove item from the list */ 1775 list_del_init(&group->entry); 1776 1777 ret = iommu_setup_default_domain(group, 0); 1778 if (ret) { 1779 mutex_unlock(&group->mutex); 1780 return ret; 1781 } 1782 mutex_unlock(&group->mutex); 1783 1784 /* 1785 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1786 * of some IOMMU drivers calls arm_iommu_attach_device() which 1787 * in-turn might call back into IOMMU core code, where it tries 1788 * to take group->mutex, resulting in a deadlock. 1789 */ 1790 for_each_group_device(group, gdev) 1791 iommu_group_do_probe_finalize(gdev->dev); 1792 } 1793 1794 return 0; 1795 } 1796 1797 bool iommu_present(const struct bus_type *bus) 1798 { 1799 return bus->iommu_ops != NULL; 1800 } 1801 EXPORT_SYMBOL_GPL(iommu_present); 1802 1803 /** 1804 * device_iommu_capable() - check for a general IOMMU capability 1805 * @dev: device to which the capability would be relevant, if available 1806 * @cap: IOMMU capability 1807 * 1808 * Return: true if an IOMMU is present and supports the given capability 1809 * for the given device, otherwise false. 1810 */ 1811 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1812 { 1813 const struct iommu_ops *ops; 1814 1815 if (!dev->iommu || !dev->iommu->iommu_dev) 1816 return false; 1817 1818 ops = dev_iommu_ops(dev); 1819 if (!ops->capable) 1820 return false; 1821 1822 return ops->capable(dev, cap); 1823 } 1824 EXPORT_SYMBOL_GPL(device_iommu_capable); 1825 1826 /** 1827 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1828 * for a group 1829 * @group: Group to query 1830 * 1831 * IOMMU groups should not have differing values of 1832 * msi_device_has_isolated_msi() for devices in a group. However nothing 1833 * directly prevents this, so ensure mistakes don't result in isolation failures 1834 * by checking that all the devices are the same. 1835 */ 1836 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1837 { 1838 struct group_device *group_dev; 1839 bool ret = true; 1840 1841 mutex_lock(&group->mutex); 1842 for_each_group_device(group, group_dev) 1843 ret &= msi_device_has_isolated_msi(group_dev->dev); 1844 mutex_unlock(&group->mutex); 1845 return ret; 1846 } 1847 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1848 1849 /** 1850 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1851 * @domain: iommu domain 1852 * @handler: fault handler 1853 * @token: user data, will be passed back to the fault handler 1854 * 1855 * This function should be used by IOMMU users which want to be notified 1856 * whenever an IOMMU fault happens. 1857 * 1858 * The fault handler itself should return 0 on success, and an appropriate 1859 * error code otherwise. 1860 */ 1861 void iommu_set_fault_handler(struct iommu_domain *domain, 1862 iommu_fault_handler_t handler, 1863 void *token) 1864 { 1865 BUG_ON(!domain); 1866 1867 domain->handler = handler; 1868 domain->handler_token = token; 1869 } 1870 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1871 1872 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 1873 unsigned type) 1874 { 1875 struct iommu_domain *domain; 1876 unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; 1877 1878 if (bus == NULL || bus->iommu_ops == NULL) 1879 return NULL; 1880 1881 domain = bus->iommu_ops->domain_alloc(alloc_type); 1882 if (!domain) 1883 return NULL; 1884 1885 domain->type = type; 1886 /* 1887 * If not already set, assume all sizes by default; the driver 1888 * may override this later 1889 */ 1890 if (!domain->pgsize_bitmap) 1891 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1892 1893 if (!domain->ops) 1894 domain->ops = bus->iommu_ops->default_domain_ops; 1895 1896 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1897 iommu_domain_free(domain); 1898 domain = NULL; 1899 } 1900 return domain; 1901 } 1902 1903 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) 1904 { 1905 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 1906 } 1907 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 1908 1909 void iommu_domain_free(struct iommu_domain *domain) 1910 { 1911 if (domain->type == IOMMU_DOMAIN_SVA) 1912 mmdrop(domain->mm); 1913 iommu_put_dma_cookie(domain); 1914 domain->ops->free(domain); 1915 } 1916 EXPORT_SYMBOL_GPL(iommu_domain_free); 1917 1918 /* 1919 * Put the group's domain back to the appropriate core-owned domain - either the 1920 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 1921 */ 1922 static void __iommu_group_set_core_domain(struct iommu_group *group) 1923 { 1924 struct iommu_domain *new_domain; 1925 1926 if (group->owner) 1927 new_domain = group->blocking_domain; 1928 else 1929 new_domain = group->default_domain; 1930 1931 __iommu_group_set_domain_nofail(group, new_domain); 1932 } 1933 1934 static int __iommu_attach_device(struct iommu_domain *domain, 1935 struct device *dev) 1936 { 1937 int ret; 1938 1939 if (unlikely(domain->ops->attach_dev == NULL)) 1940 return -ENODEV; 1941 1942 ret = domain->ops->attach_dev(domain, dev); 1943 if (ret) 1944 return ret; 1945 dev->iommu->attach_deferred = 0; 1946 trace_attach_device_to_domain(dev); 1947 return 0; 1948 } 1949 1950 /** 1951 * iommu_attach_device - Attach an IOMMU domain to a device 1952 * @domain: IOMMU domain to attach 1953 * @dev: Device that will be attached 1954 * 1955 * Returns 0 on success and error code on failure 1956 * 1957 * Note that EINVAL can be treated as a soft failure, indicating 1958 * that certain configuration of the domain is incompatible with 1959 * the device. In this case attaching a different domain to the 1960 * device may succeed. 1961 */ 1962 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 1963 { 1964 struct iommu_group *group; 1965 int ret; 1966 1967 group = iommu_group_get(dev); 1968 if (!group) 1969 return -ENODEV; 1970 1971 /* 1972 * Lock the group to make sure the device-count doesn't 1973 * change while we are attaching 1974 */ 1975 mutex_lock(&group->mutex); 1976 ret = -EINVAL; 1977 if (list_count_nodes(&group->devices) != 1) 1978 goto out_unlock; 1979 1980 ret = __iommu_attach_group(domain, group); 1981 1982 out_unlock: 1983 mutex_unlock(&group->mutex); 1984 iommu_group_put(group); 1985 1986 return ret; 1987 } 1988 EXPORT_SYMBOL_GPL(iommu_attach_device); 1989 1990 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 1991 { 1992 if (dev->iommu && dev->iommu->attach_deferred) 1993 return __iommu_attach_device(domain, dev); 1994 1995 return 0; 1996 } 1997 1998 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 1999 { 2000 struct iommu_group *group; 2001 2002 group = iommu_group_get(dev); 2003 if (!group) 2004 return; 2005 2006 mutex_lock(&group->mutex); 2007 if (WARN_ON(domain != group->domain) || 2008 WARN_ON(list_count_nodes(&group->devices) != 1)) 2009 goto out_unlock; 2010 __iommu_group_set_core_domain(group); 2011 2012 out_unlock: 2013 mutex_unlock(&group->mutex); 2014 iommu_group_put(group); 2015 } 2016 EXPORT_SYMBOL_GPL(iommu_detach_device); 2017 2018 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2019 { 2020 struct iommu_domain *domain; 2021 struct iommu_group *group; 2022 2023 group = iommu_group_get(dev); 2024 if (!group) 2025 return NULL; 2026 2027 domain = group->domain; 2028 2029 iommu_group_put(group); 2030 2031 return domain; 2032 } 2033 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2034 2035 /* 2036 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2037 * guarantees that the group and its default domain are valid and correct. 2038 */ 2039 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2040 { 2041 return dev->iommu_group->default_domain; 2042 } 2043 2044 static int __iommu_attach_group(struct iommu_domain *domain, 2045 struct iommu_group *group) 2046 { 2047 if (group->domain && group->domain != group->default_domain && 2048 group->domain != group->blocking_domain) 2049 return -EBUSY; 2050 2051 return __iommu_group_set_domain(group, domain); 2052 } 2053 2054 /** 2055 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2056 * @domain: IOMMU domain to attach 2057 * @group: IOMMU group that will be attached 2058 * 2059 * Returns 0 on success and error code on failure 2060 * 2061 * Note that EINVAL can be treated as a soft failure, indicating 2062 * that certain configuration of the domain is incompatible with 2063 * the group. In this case attaching a different domain to the 2064 * group may succeed. 2065 */ 2066 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2067 { 2068 int ret; 2069 2070 mutex_lock(&group->mutex); 2071 ret = __iommu_attach_group(domain, group); 2072 mutex_unlock(&group->mutex); 2073 2074 return ret; 2075 } 2076 EXPORT_SYMBOL_GPL(iommu_attach_group); 2077 2078 static int __iommu_device_set_domain(struct iommu_group *group, 2079 struct device *dev, 2080 struct iommu_domain *new_domain, 2081 unsigned int flags) 2082 { 2083 int ret; 2084 2085 if (dev->iommu->attach_deferred) { 2086 if (new_domain == group->default_domain) 2087 return 0; 2088 dev->iommu->attach_deferred = 0; 2089 } 2090 2091 ret = __iommu_attach_device(new_domain, dev); 2092 if (ret) { 2093 /* 2094 * If we have a blocking domain then try to attach that in hopes 2095 * of avoiding a UAF. Modern drivers should implement blocking 2096 * domains as global statics that cannot fail. 2097 */ 2098 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2099 group->blocking_domain && 2100 group->blocking_domain != new_domain) 2101 __iommu_attach_device(group->blocking_domain, dev); 2102 return ret; 2103 } 2104 return 0; 2105 } 2106 2107 /* 2108 * If 0 is returned the group's domain is new_domain. If an error is returned 2109 * then the group's domain will be set back to the existing domain unless 2110 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2111 * domains is left inconsistent. This is a driver bug to fail attach with a 2112 * previously good domain. We try to avoid a kernel UAF because of this. 2113 * 2114 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2115 * API works on domains and devices. Bridge that gap by iterating over the 2116 * devices in a group. Ideally we'd have a single device which represents the 2117 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2118 * defined minimum sets, where the physical hardware may be able to distiguish 2119 * members, but we wish to group them at a higher level (ex. untrusted 2120 * multi-function PCI devices). Thus we attach each device. 2121 */ 2122 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2123 struct iommu_domain *new_domain, 2124 unsigned int flags) 2125 { 2126 struct group_device *last_gdev; 2127 struct group_device *gdev; 2128 int result; 2129 int ret; 2130 2131 lockdep_assert_held(&group->mutex); 2132 2133 if (group->domain == new_domain) 2134 return 0; 2135 2136 /* 2137 * New drivers should support default domains, so set_platform_dma() 2138 * op will never be called. Otherwise the NULL domain represents some 2139 * platform specific behavior. 2140 */ 2141 if (!new_domain) { 2142 for_each_group_device(group, gdev) { 2143 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2144 2145 if (!WARN_ON(!ops->set_platform_dma_ops)) 2146 ops->set_platform_dma_ops(gdev->dev); 2147 } 2148 group->domain = NULL; 2149 return 0; 2150 } 2151 2152 /* 2153 * Changing the domain is done by calling attach_dev() on the new 2154 * domain. This switch does not have to be atomic and DMA can be 2155 * discarded during the transition. DMA must only be able to access 2156 * either new_domain or group->domain, never something else. 2157 */ 2158 result = 0; 2159 for_each_group_device(group, gdev) { 2160 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2161 flags); 2162 if (ret) { 2163 result = ret; 2164 /* 2165 * Keep trying the other devices in the group. If a 2166 * driver fails attach to an otherwise good domain, and 2167 * does not support blocking domains, it should at least 2168 * drop its reference on the current domain so we don't 2169 * UAF. 2170 */ 2171 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2172 continue; 2173 goto err_revert; 2174 } 2175 } 2176 group->domain = new_domain; 2177 return result; 2178 2179 err_revert: 2180 /* 2181 * This is called in error unwind paths. A well behaved driver should 2182 * always allow us to attach to a domain that was already attached. 2183 */ 2184 last_gdev = gdev; 2185 for_each_group_device(group, gdev) { 2186 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2187 2188 /* 2189 * If set_platform_dma_ops is not present a NULL domain can 2190 * happen only for first probe, in which case we leave 2191 * group->domain as NULL and let release clean everything up. 2192 */ 2193 if (group->domain) 2194 WARN_ON(__iommu_device_set_domain( 2195 group, gdev->dev, group->domain, 2196 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2197 else if (ops->set_platform_dma_ops) 2198 ops->set_platform_dma_ops(gdev->dev); 2199 if (gdev == last_gdev) 2200 break; 2201 } 2202 return ret; 2203 } 2204 2205 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2206 { 2207 mutex_lock(&group->mutex); 2208 __iommu_group_set_core_domain(group); 2209 mutex_unlock(&group->mutex); 2210 } 2211 EXPORT_SYMBOL_GPL(iommu_detach_group); 2212 2213 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2214 { 2215 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2216 return iova; 2217 2218 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2219 return 0; 2220 2221 return domain->ops->iova_to_phys(domain, iova); 2222 } 2223 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2224 2225 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2226 phys_addr_t paddr, size_t size, size_t *count) 2227 { 2228 unsigned int pgsize_idx, pgsize_idx_next; 2229 unsigned long pgsizes; 2230 size_t offset, pgsize, pgsize_next; 2231 unsigned long addr_merge = paddr | iova; 2232 2233 /* Page sizes supported by the hardware and small enough for @size */ 2234 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2235 2236 /* Constrain the page sizes further based on the maximum alignment */ 2237 if (likely(addr_merge)) 2238 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2239 2240 /* Make sure we have at least one suitable page size */ 2241 BUG_ON(!pgsizes); 2242 2243 /* Pick the biggest page size remaining */ 2244 pgsize_idx = __fls(pgsizes); 2245 pgsize = BIT(pgsize_idx); 2246 if (!count) 2247 return pgsize; 2248 2249 /* Find the next biggest support page size, if it exists */ 2250 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2251 if (!pgsizes) 2252 goto out_set_count; 2253 2254 pgsize_idx_next = __ffs(pgsizes); 2255 pgsize_next = BIT(pgsize_idx_next); 2256 2257 /* 2258 * There's no point trying a bigger page size unless the virtual 2259 * and physical addresses are similarly offset within the larger page. 2260 */ 2261 if ((iova ^ paddr) & (pgsize_next - 1)) 2262 goto out_set_count; 2263 2264 /* Calculate the offset to the next page size alignment boundary */ 2265 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2266 2267 /* 2268 * If size is big enough to accommodate the larger page, reduce 2269 * the number of smaller pages. 2270 */ 2271 if (offset + pgsize_next <= size) 2272 size = offset; 2273 2274 out_set_count: 2275 *count = size >> pgsize_idx; 2276 return pgsize; 2277 } 2278 2279 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2280 phys_addr_t paddr, size_t size, int prot, 2281 gfp_t gfp, size_t *mapped) 2282 { 2283 const struct iommu_domain_ops *ops = domain->ops; 2284 size_t pgsize, count; 2285 int ret; 2286 2287 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2288 2289 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2290 iova, &paddr, pgsize, count); 2291 2292 if (ops->map_pages) { 2293 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2294 gfp, mapped); 2295 } else { 2296 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2297 *mapped = ret ? 0 : pgsize; 2298 } 2299 2300 return ret; 2301 } 2302 2303 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2304 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2305 { 2306 const struct iommu_domain_ops *ops = domain->ops; 2307 unsigned long orig_iova = iova; 2308 unsigned int min_pagesz; 2309 size_t orig_size = size; 2310 phys_addr_t orig_paddr = paddr; 2311 int ret = 0; 2312 2313 if (unlikely(!(ops->map || ops->map_pages) || 2314 domain->pgsize_bitmap == 0UL)) 2315 return -ENODEV; 2316 2317 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2318 return -EINVAL; 2319 2320 /* find out the minimum page size supported */ 2321 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2322 2323 /* 2324 * both the virtual address and the physical one, as well as 2325 * the size of the mapping, must be aligned (at least) to the 2326 * size of the smallest page supported by the hardware 2327 */ 2328 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2329 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2330 iova, &paddr, size, min_pagesz); 2331 return -EINVAL; 2332 } 2333 2334 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2335 2336 while (size) { 2337 size_t mapped = 0; 2338 2339 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2340 &mapped); 2341 /* 2342 * Some pages may have been mapped, even if an error occurred, 2343 * so we should account for those so they can be unmapped. 2344 */ 2345 size -= mapped; 2346 2347 if (ret) 2348 break; 2349 2350 iova += mapped; 2351 paddr += mapped; 2352 } 2353 2354 /* unroll mapping in case something went wrong */ 2355 if (ret) 2356 iommu_unmap(domain, orig_iova, orig_size - size); 2357 else 2358 trace_map(orig_iova, orig_paddr, orig_size); 2359 2360 return ret; 2361 } 2362 2363 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2364 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2365 { 2366 const struct iommu_domain_ops *ops = domain->ops; 2367 int ret; 2368 2369 might_sleep_if(gfpflags_allow_blocking(gfp)); 2370 2371 /* Discourage passing strange GFP flags */ 2372 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2373 __GFP_HIGHMEM))) 2374 return -EINVAL; 2375 2376 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2377 if (ret == 0 && ops->iotlb_sync_map) 2378 ops->iotlb_sync_map(domain, iova, size); 2379 2380 return ret; 2381 } 2382 EXPORT_SYMBOL_GPL(iommu_map); 2383 2384 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2385 unsigned long iova, size_t size, 2386 struct iommu_iotlb_gather *iotlb_gather) 2387 { 2388 const struct iommu_domain_ops *ops = domain->ops; 2389 size_t pgsize, count; 2390 2391 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2392 return ops->unmap_pages ? 2393 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2394 ops->unmap(domain, iova, pgsize, iotlb_gather); 2395 } 2396 2397 static size_t __iommu_unmap(struct iommu_domain *domain, 2398 unsigned long iova, size_t size, 2399 struct iommu_iotlb_gather *iotlb_gather) 2400 { 2401 const struct iommu_domain_ops *ops = domain->ops; 2402 size_t unmapped_page, unmapped = 0; 2403 unsigned long orig_iova = iova; 2404 unsigned int min_pagesz; 2405 2406 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2407 domain->pgsize_bitmap == 0UL)) 2408 return 0; 2409 2410 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2411 return 0; 2412 2413 /* find out the minimum page size supported */ 2414 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2415 2416 /* 2417 * The virtual address, as well as the size of the mapping, must be 2418 * aligned (at least) to the size of the smallest page supported 2419 * by the hardware 2420 */ 2421 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2422 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2423 iova, size, min_pagesz); 2424 return 0; 2425 } 2426 2427 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2428 2429 /* 2430 * Keep iterating until we either unmap 'size' bytes (or more) 2431 * or we hit an area that isn't mapped. 2432 */ 2433 while (unmapped < size) { 2434 unmapped_page = __iommu_unmap_pages(domain, iova, 2435 size - unmapped, 2436 iotlb_gather); 2437 if (!unmapped_page) 2438 break; 2439 2440 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2441 iova, unmapped_page); 2442 2443 iova += unmapped_page; 2444 unmapped += unmapped_page; 2445 } 2446 2447 trace_unmap(orig_iova, size, unmapped); 2448 return unmapped; 2449 } 2450 2451 size_t iommu_unmap(struct iommu_domain *domain, 2452 unsigned long iova, size_t size) 2453 { 2454 struct iommu_iotlb_gather iotlb_gather; 2455 size_t ret; 2456 2457 iommu_iotlb_gather_init(&iotlb_gather); 2458 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2459 iommu_iotlb_sync(domain, &iotlb_gather); 2460 2461 return ret; 2462 } 2463 EXPORT_SYMBOL_GPL(iommu_unmap); 2464 2465 size_t iommu_unmap_fast(struct iommu_domain *domain, 2466 unsigned long iova, size_t size, 2467 struct iommu_iotlb_gather *iotlb_gather) 2468 { 2469 return __iommu_unmap(domain, iova, size, iotlb_gather); 2470 } 2471 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2472 2473 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2474 struct scatterlist *sg, unsigned int nents, int prot, 2475 gfp_t gfp) 2476 { 2477 const struct iommu_domain_ops *ops = domain->ops; 2478 size_t len = 0, mapped = 0; 2479 phys_addr_t start; 2480 unsigned int i = 0; 2481 int ret; 2482 2483 might_sleep_if(gfpflags_allow_blocking(gfp)); 2484 2485 /* Discourage passing strange GFP flags */ 2486 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2487 __GFP_HIGHMEM))) 2488 return -EINVAL; 2489 2490 while (i <= nents) { 2491 phys_addr_t s_phys = sg_phys(sg); 2492 2493 if (len && s_phys != start + len) { 2494 ret = __iommu_map(domain, iova + mapped, start, 2495 len, prot, gfp); 2496 2497 if (ret) 2498 goto out_err; 2499 2500 mapped += len; 2501 len = 0; 2502 } 2503 2504 if (sg_dma_is_bus_address(sg)) 2505 goto next; 2506 2507 if (len) { 2508 len += sg->length; 2509 } else { 2510 len = sg->length; 2511 start = s_phys; 2512 } 2513 2514 next: 2515 if (++i < nents) 2516 sg = sg_next(sg); 2517 } 2518 2519 if (ops->iotlb_sync_map) 2520 ops->iotlb_sync_map(domain, iova, mapped); 2521 return mapped; 2522 2523 out_err: 2524 /* undo mappings already done */ 2525 iommu_unmap(domain, iova, mapped); 2526 2527 return ret; 2528 } 2529 EXPORT_SYMBOL_GPL(iommu_map_sg); 2530 2531 /** 2532 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2533 * @domain: the iommu domain where the fault has happened 2534 * @dev: the device where the fault has happened 2535 * @iova: the faulting address 2536 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2537 * 2538 * This function should be called by the low-level IOMMU implementations 2539 * whenever IOMMU faults happen, to allow high-level users, that are 2540 * interested in such events, to know about them. 2541 * 2542 * This event may be useful for several possible use cases: 2543 * - mere logging of the event 2544 * - dynamic TLB/PTE loading 2545 * - if restarting of the faulting device is required 2546 * 2547 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2548 * PTE/TLB loading will one day be supported, implementations will be able 2549 * to tell whether it succeeded or not according to this return value). 2550 * 2551 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2552 * (though fault handlers can also return -ENOSYS, in case they want to 2553 * elicit the default behavior of the IOMMU drivers). 2554 */ 2555 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2556 unsigned long iova, int flags) 2557 { 2558 int ret = -ENOSYS; 2559 2560 /* 2561 * if upper layers showed interest and installed a fault handler, 2562 * invoke it. 2563 */ 2564 if (domain->handler) 2565 ret = domain->handler(domain, dev, iova, flags, 2566 domain->handler_token); 2567 2568 trace_io_page_fault(dev, iova, flags); 2569 return ret; 2570 } 2571 EXPORT_SYMBOL_GPL(report_iommu_fault); 2572 2573 static int __init iommu_init(void) 2574 { 2575 iommu_group_kset = kset_create_and_add("iommu_groups", 2576 NULL, kernel_kobj); 2577 BUG_ON(!iommu_group_kset); 2578 2579 iommu_debugfs_setup(); 2580 2581 return 0; 2582 } 2583 core_initcall(iommu_init); 2584 2585 int iommu_enable_nesting(struct iommu_domain *domain) 2586 { 2587 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2588 return -EINVAL; 2589 if (!domain->ops->enable_nesting) 2590 return -EINVAL; 2591 return domain->ops->enable_nesting(domain); 2592 } 2593 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2594 2595 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2596 unsigned long quirk) 2597 { 2598 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2599 return -EINVAL; 2600 if (!domain->ops->set_pgtable_quirks) 2601 return -EINVAL; 2602 return domain->ops->set_pgtable_quirks(domain, quirk); 2603 } 2604 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2605 2606 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2607 { 2608 const struct iommu_ops *ops = dev_iommu_ops(dev); 2609 2610 if (ops->get_resv_regions) 2611 ops->get_resv_regions(dev, list); 2612 } 2613 2614 /** 2615 * iommu_put_resv_regions - release resered regions 2616 * @dev: device for which to free reserved regions 2617 * @list: reserved region list for device 2618 * 2619 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2620 */ 2621 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2622 { 2623 struct iommu_resv_region *entry, *next; 2624 2625 list_for_each_entry_safe(entry, next, list, list) { 2626 if (entry->free) 2627 entry->free(dev, entry); 2628 else 2629 kfree(entry); 2630 } 2631 } 2632 EXPORT_SYMBOL(iommu_put_resv_regions); 2633 2634 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2635 size_t length, int prot, 2636 enum iommu_resv_type type, 2637 gfp_t gfp) 2638 { 2639 struct iommu_resv_region *region; 2640 2641 region = kzalloc(sizeof(*region), gfp); 2642 if (!region) 2643 return NULL; 2644 2645 INIT_LIST_HEAD(®ion->list); 2646 region->start = start; 2647 region->length = length; 2648 region->prot = prot; 2649 region->type = type; 2650 return region; 2651 } 2652 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2653 2654 void iommu_set_default_passthrough(bool cmd_line) 2655 { 2656 if (cmd_line) 2657 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2658 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2659 } 2660 2661 void iommu_set_default_translated(bool cmd_line) 2662 { 2663 if (cmd_line) 2664 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2665 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2666 } 2667 2668 bool iommu_default_passthrough(void) 2669 { 2670 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2671 } 2672 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2673 2674 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2675 { 2676 const struct iommu_ops *ops = NULL; 2677 struct iommu_device *iommu; 2678 2679 spin_lock(&iommu_device_lock); 2680 list_for_each_entry(iommu, &iommu_device_list, list) 2681 if (iommu->fwnode == fwnode) { 2682 ops = iommu->ops; 2683 break; 2684 } 2685 spin_unlock(&iommu_device_lock); 2686 return ops; 2687 } 2688 2689 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2690 const struct iommu_ops *ops) 2691 { 2692 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2693 2694 if (fwspec) 2695 return ops == fwspec->ops ? 0 : -EINVAL; 2696 2697 if (!dev_iommu_get(dev)) 2698 return -ENOMEM; 2699 2700 /* Preallocate for the overwhelmingly common case of 1 ID */ 2701 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2702 if (!fwspec) 2703 return -ENOMEM; 2704 2705 of_node_get(to_of_node(iommu_fwnode)); 2706 fwspec->iommu_fwnode = iommu_fwnode; 2707 fwspec->ops = ops; 2708 dev_iommu_fwspec_set(dev, fwspec); 2709 return 0; 2710 } 2711 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2712 2713 void iommu_fwspec_free(struct device *dev) 2714 { 2715 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2716 2717 if (fwspec) { 2718 fwnode_handle_put(fwspec->iommu_fwnode); 2719 kfree(fwspec); 2720 dev_iommu_fwspec_set(dev, NULL); 2721 } 2722 } 2723 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2724 2725 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2726 { 2727 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2728 int i, new_num; 2729 2730 if (!fwspec) 2731 return -EINVAL; 2732 2733 new_num = fwspec->num_ids + num_ids; 2734 if (new_num > 1) { 2735 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2736 GFP_KERNEL); 2737 if (!fwspec) 2738 return -ENOMEM; 2739 2740 dev_iommu_fwspec_set(dev, fwspec); 2741 } 2742 2743 for (i = 0; i < num_ids; i++) 2744 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2745 2746 fwspec->num_ids = new_num; 2747 return 0; 2748 } 2749 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2750 2751 /* 2752 * Per device IOMMU features. 2753 */ 2754 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2755 { 2756 if (dev->iommu && dev->iommu->iommu_dev) { 2757 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2758 2759 if (ops->dev_enable_feat) 2760 return ops->dev_enable_feat(dev, feat); 2761 } 2762 2763 return -ENODEV; 2764 } 2765 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2766 2767 /* 2768 * The device drivers should do the necessary cleanups before calling this. 2769 */ 2770 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2771 { 2772 if (dev->iommu && dev->iommu->iommu_dev) { 2773 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2774 2775 if (ops->dev_disable_feat) 2776 return ops->dev_disable_feat(dev, feat); 2777 } 2778 2779 return -EBUSY; 2780 } 2781 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2782 2783 /** 2784 * iommu_setup_default_domain - Set the default_domain for the group 2785 * @group: Group to change 2786 * @target_type: Domain type to set as the default_domain 2787 * 2788 * Allocate a default domain and set it as the current domain on the group. If 2789 * the group already has a default domain it will be changed to the target_type. 2790 * When target_type is 0 the default domain is selected based on driver and 2791 * system preferences. 2792 */ 2793 static int iommu_setup_default_domain(struct iommu_group *group, 2794 int target_type) 2795 { 2796 struct iommu_domain *old_dom = group->default_domain; 2797 struct group_device *gdev; 2798 struct iommu_domain *dom; 2799 bool direct_failed; 2800 int req_type; 2801 int ret; 2802 2803 lockdep_assert_held(&group->mutex); 2804 2805 req_type = iommu_get_default_domain_type(group, target_type); 2806 if (req_type < 0) 2807 return -EINVAL; 2808 2809 /* 2810 * There are still some drivers which don't support default domains, so 2811 * we ignore the failure and leave group->default_domain NULL. 2812 * 2813 * We assume that the iommu driver starts up the device in 2814 * 'set_platform_dma_ops' mode if it does not support default domains. 2815 */ 2816 dom = iommu_group_alloc_default_domain(group, req_type); 2817 if (!dom) { 2818 /* Once in default_domain mode we never leave */ 2819 if (group->default_domain) 2820 return -ENODEV; 2821 group->default_domain = NULL; 2822 return 0; 2823 } 2824 2825 if (group->default_domain == dom) 2826 return 0; 2827 2828 /* 2829 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 2830 * mapped before their device is attached, in order to guarantee 2831 * continuity with any FW activity 2832 */ 2833 direct_failed = false; 2834 for_each_group_device(group, gdev) { 2835 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 2836 direct_failed = true; 2837 dev_warn_once( 2838 gdev->dev->iommu->iommu_dev->dev, 2839 "IOMMU driver was not able to establish FW requested direct mapping."); 2840 } 2841 } 2842 2843 /* We must set default_domain early for __iommu_device_set_domain */ 2844 group->default_domain = dom; 2845 if (!group->domain) { 2846 /* 2847 * Drivers are not allowed to fail the first domain attach. 2848 * The only way to recover from this is to fail attaching the 2849 * iommu driver and call ops->release_device. Put the domain 2850 * in group->default_domain so it is freed after. 2851 */ 2852 ret = __iommu_group_set_domain_internal( 2853 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 2854 if (WARN_ON(ret)) 2855 goto out_free; 2856 } else { 2857 ret = __iommu_group_set_domain(group, dom); 2858 if (ret) { 2859 iommu_domain_free(dom); 2860 group->default_domain = old_dom; 2861 return ret; 2862 } 2863 } 2864 2865 /* 2866 * Drivers are supposed to allow mappings to be installed in a domain 2867 * before device attachment, but some don't. Hack around this defect by 2868 * trying again after attaching. If this happens it means the device 2869 * will not continuously have the IOMMU_RESV_DIRECT map. 2870 */ 2871 if (direct_failed) { 2872 for_each_group_device(group, gdev) { 2873 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 2874 if (ret) 2875 goto err_restore; 2876 } 2877 } 2878 2879 err_restore: 2880 if (old_dom) { 2881 __iommu_group_set_domain_internal( 2882 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 2883 iommu_domain_free(dom); 2884 old_dom = NULL; 2885 } 2886 out_free: 2887 if (old_dom) 2888 iommu_domain_free(old_dom); 2889 return ret; 2890 } 2891 2892 /* 2893 * Changing the default domain through sysfs requires the users to unbind the 2894 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2895 * transition. Return failure if this isn't met. 2896 * 2897 * We need to consider the race between this and the device release path. 2898 * group->mutex is used here to guarantee that the device release path 2899 * will not be entered at the same time. 2900 */ 2901 static ssize_t iommu_group_store_type(struct iommu_group *group, 2902 const char *buf, size_t count) 2903 { 2904 struct group_device *gdev; 2905 int ret, req_type; 2906 2907 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2908 return -EACCES; 2909 2910 if (WARN_ON(!group) || !group->default_domain) 2911 return -EINVAL; 2912 2913 if (sysfs_streq(buf, "identity")) 2914 req_type = IOMMU_DOMAIN_IDENTITY; 2915 else if (sysfs_streq(buf, "DMA")) 2916 req_type = IOMMU_DOMAIN_DMA; 2917 else if (sysfs_streq(buf, "DMA-FQ")) 2918 req_type = IOMMU_DOMAIN_DMA_FQ; 2919 else if (sysfs_streq(buf, "auto")) 2920 req_type = 0; 2921 else 2922 return -EINVAL; 2923 2924 mutex_lock(&group->mutex); 2925 /* We can bring up a flush queue without tearing down the domain. */ 2926 if (req_type == IOMMU_DOMAIN_DMA_FQ && 2927 group->default_domain->type == IOMMU_DOMAIN_DMA) { 2928 ret = iommu_dma_init_fq(group->default_domain); 2929 if (ret) 2930 goto out_unlock; 2931 2932 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 2933 ret = count; 2934 goto out_unlock; 2935 } 2936 2937 /* Otherwise, ensure that device exists and no driver is bound. */ 2938 if (list_empty(&group->devices) || group->owner_cnt) { 2939 ret = -EPERM; 2940 goto out_unlock; 2941 } 2942 2943 ret = iommu_setup_default_domain(group, req_type); 2944 if (ret) 2945 goto out_unlock; 2946 2947 /* 2948 * Release the mutex here because ops->probe_finalize() call-back of 2949 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 2950 * in-turn might call back into IOMMU core code, where it tries to take 2951 * group->mutex, resulting in a deadlock. 2952 */ 2953 mutex_unlock(&group->mutex); 2954 2955 /* Make sure dma_ops is appropriatley set */ 2956 for_each_group_device(group, gdev) 2957 iommu_group_do_probe_finalize(gdev->dev); 2958 return count; 2959 2960 out_unlock: 2961 mutex_unlock(&group->mutex); 2962 return ret ?: count; 2963 } 2964 2965 static bool iommu_is_default_domain(struct iommu_group *group) 2966 { 2967 if (group->domain == group->default_domain) 2968 return true; 2969 2970 /* 2971 * If the default domain was set to identity and it is still an identity 2972 * domain then we consider this a pass. This happens because of 2973 * amd_iommu_init_device() replacing the default idenytity domain with an 2974 * identity domain that has a different configuration for AMDGPU. 2975 */ 2976 if (group->default_domain && 2977 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 2978 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 2979 return true; 2980 return false; 2981 } 2982 2983 /** 2984 * iommu_device_use_default_domain() - Device driver wants to handle device 2985 * DMA through the kernel DMA API. 2986 * @dev: The device. 2987 * 2988 * The device driver about to bind @dev wants to do DMA through the kernel 2989 * DMA API. Return 0 if it is allowed, otherwise an error. 2990 */ 2991 int iommu_device_use_default_domain(struct device *dev) 2992 { 2993 struct iommu_group *group = iommu_group_get(dev); 2994 int ret = 0; 2995 2996 if (!group) 2997 return 0; 2998 2999 mutex_lock(&group->mutex); 3000 if (group->owner_cnt) { 3001 if (group->owner || !iommu_is_default_domain(group) || 3002 !xa_empty(&group->pasid_array)) { 3003 ret = -EBUSY; 3004 goto unlock_out; 3005 } 3006 } 3007 3008 group->owner_cnt++; 3009 3010 unlock_out: 3011 mutex_unlock(&group->mutex); 3012 iommu_group_put(group); 3013 3014 return ret; 3015 } 3016 3017 /** 3018 * iommu_device_unuse_default_domain() - Device driver stops handling device 3019 * DMA through the kernel DMA API. 3020 * @dev: The device. 3021 * 3022 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3023 * It must be called after iommu_device_use_default_domain(). 3024 */ 3025 void iommu_device_unuse_default_domain(struct device *dev) 3026 { 3027 struct iommu_group *group = iommu_group_get(dev); 3028 3029 if (!group) 3030 return; 3031 3032 mutex_lock(&group->mutex); 3033 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3034 group->owner_cnt--; 3035 3036 mutex_unlock(&group->mutex); 3037 iommu_group_put(group); 3038 } 3039 3040 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3041 { 3042 struct group_device *dev = 3043 list_first_entry(&group->devices, struct group_device, list); 3044 3045 if (group->blocking_domain) 3046 return 0; 3047 3048 group->blocking_domain = 3049 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3050 if (!group->blocking_domain) { 3051 /* 3052 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3053 * create an empty domain instead. 3054 */ 3055 group->blocking_domain = __iommu_domain_alloc( 3056 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3057 if (!group->blocking_domain) 3058 return -EINVAL; 3059 } 3060 return 0; 3061 } 3062 3063 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3064 { 3065 int ret; 3066 3067 if ((group->domain && group->domain != group->default_domain) || 3068 !xa_empty(&group->pasid_array)) 3069 return -EBUSY; 3070 3071 ret = __iommu_group_alloc_blocking_domain(group); 3072 if (ret) 3073 return ret; 3074 ret = __iommu_group_set_domain(group, group->blocking_domain); 3075 if (ret) 3076 return ret; 3077 3078 group->owner = owner; 3079 group->owner_cnt++; 3080 return 0; 3081 } 3082 3083 /** 3084 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3085 * @group: The group. 3086 * @owner: Caller specified pointer. Used for exclusive ownership. 3087 * 3088 * This is to support backward compatibility for vfio which manages the dma 3089 * ownership in iommu_group level. New invocations on this interface should be 3090 * prohibited. Only a single owner may exist for a group. 3091 */ 3092 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3093 { 3094 int ret = 0; 3095 3096 if (WARN_ON(!owner)) 3097 return -EINVAL; 3098 3099 mutex_lock(&group->mutex); 3100 if (group->owner_cnt) { 3101 ret = -EPERM; 3102 goto unlock_out; 3103 } 3104 3105 ret = __iommu_take_dma_ownership(group, owner); 3106 unlock_out: 3107 mutex_unlock(&group->mutex); 3108 3109 return ret; 3110 } 3111 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3112 3113 /** 3114 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3115 * @dev: The device. 3116 * @owner: Caller specified pointer. Used for exclusive ownership. 3117 * 3118 * Claim the DMA ownership of a device. Multiple devices in the same group may 3119 * concurrently claim ownership if they present the same owner value. Returns 0 3120 * on success and error code on failure 3121 */ 3122 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3123 { 3124 struct iommu_group *group; 3125 int ret = 0; 3126 3127 if (WARN_ON(!owner)) 3128 return -EINVAL; 3129 3130 group = iommu_group_get(dev); 3131 if (!group) 3132 return -ENODEV; 3133 3134 mutex_lock(&group->mutex); 3135 if (group->owner_cnt) { 3136 if (group->owner != owner) { 3137 ret = -EPERM; 3138 goto unlock_out; 3139 } 3140 group->owner_cnt++; 3141 goto unlock_out; 3142 } 3143 3144 ret = __iommu_take_dma_ownership(group, owner); 3145 unlock_out: 3146 mutex_unlock(&group->mutex); 3147 iommu_group_put(group); 3148 3149 return ret; 3150 } 3151 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3152 3153 static void __iommu_release_dma_ownership(struct iommu_group *group) 3154 { 3155 if (WARN_ON(!group->owner_cnt || !group->owner || 3156 !xa_empty(&group->pasid_array))) 3157 return; 3158 3159 group->owner_cnt = 0; 3160 group->owner = NULL; 3161 __iommu_group_set_domain_nofail(group, group->default_domain); 3162 } 3163 3164 /** 3165 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3166 * @dev: The device 3167 * 3168 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3169 */ 3170 void iommu_group_release_dma_owner(struct iommu_group *group) 3171 { 3172 mutex_lock(&group->mutex); 3173 __iommu_release_dma_ownership(group); 3174 mutex_unlock(&group->mutex); 3175 } 3176 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3177 3178 /** 3179 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3180 * @group: The device. 3181 * 3182 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3183 */ 3184 void iommu_device_release_dma_owner(struct device *dev) 3185 { 3186 struct iommu_group *group = iommu_group_get(dev); 3187 3188 mutex_lock(&group->mutex); 3189 if (group->owner_cnt > 1) 3190 group->owner_cnt--; 3191 else 3192 __iommu_release_dma_ownership(group); 3193 mutex_unlock(&group->mutex); 3194 iommu_group_put(group); 3195 } 3196 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3197 3198 /** 3199 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3200 * @group: The group. 3201 * 3202 * This provides status query on a given group. It is racy and only for 3203 * non-binding status reporting. 3204 */ 3205 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3206 { 3207 unsigned int user; 3208 3209 mutex_lock(&group->mutex); 3210 user = group->owner_cnt; 3211 mutex_unlock(&group->mutex); 3212 3213 return user; 3214 } 3215 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3216 3217 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3218 struct iommu_group *group, ioasid_t pasid) 3219 { 3220 struct group_device *device; 3221 int ret = 0; 3222 3223 for_each_group_device(group, device) { 3224 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3225 if (ret) 3226 break; 3227 } 3228 3229 return ret; 3230 } 3231 3232 static void __iommu_remove_group_pasid(struct iommu_group *group, 3233 ioasid_t pasid) 3234 { 3235 struct group_device *device; 3236 const struct iommu_ops *ops; 3237 3238 for_each_group_device(group, device) { 3239 ops = dev_iommu_ops(device->dev); 3240 ops->remove_dev_pasid(device->dev, pasid); 3241 } 3242 } 3243 3244 /* 3245 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3246 * @domain: the iommu domain. 3247 * @dev: the attached device. 3248 * @pasid: the pasid of the device. 3249 * 3250 * Return: 0 on success, or an error. 3251 */ 3252 int iommu_attach_device_pasid(struct iommu_domain *domain, 3253 struct device *dev, ioasid_t pasid) 3254 { 3255 struct iommu_group *group; 3256 void *curr; 3257 int ret; 3258 3259 if (!domain->ops->set_dev_pasid) 3260 return -EOPNOTSUPP; 3261 3262 group = iommu_group_get(dev); 3263 if (!group) 3264 return -ENODEV; 3265 3266 mutex_lock(&group->mutex); 3267 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3268 if (curr) { 3269 ret = xa_err(curr) ? : -EBUSY; 3270 goto out_unlock; 3271 } 3272 3273 ret = __iommu_set_group_pasid(domain, group, pasid); 3274 if (ret) { 3275 __iommu_remove_group_pasid(group, pasid); 3276 xa_erase(&group->pasid_array, pasid); 3277 } 3278 out_unlock: 3279 mutex_unlock(&group->mutex); 3280 iommu_group_put(group); 3281 3282 return ret; 3283 } 3284 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3285 3286 /* 3287 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3288 * @domain: the iommu domain. 3289 * @dev: the attached device. 3290 * @pasid: the pasid of the device. 3291 * 3292 * The @domain must have been attached to @pasid of the @dev with 3293 * iommu_attach_device_pasid(). 3294 */ 3295 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3296 ioasid_t pasid) 3297 { 3298 struct iommu_group *group = iommu_group_get(dev); 3299 3300 mutex_lock(&group->mutex); 3301 __iommu_remove_group_pasid(group, pasid); 3302 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3303 mutex_unlock(&group->mutex); 3304 3305 iommu_group_put(group); 3306 } 3307 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3308 3309 /* 3310 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3311 * @dev: the queried device 3312 * @pasid: the pasid of the device 3313 * @type: matched domain type, 0 for any match 3314 * 3315 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3316 * domain attached to pasid of a device. Callers must hold a lock around this 3317 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3318 * type is being manipulated. This API does not internally resolve races with 3319 * attach/detach. 3320 * 3321 * Return: attached domain on success, NULL otherwise. 3322 */ 3323 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3324 ioasid_t pasid, 3325 unsigned int type) 3326 { 3327 struct iommu_domain *domain; 3328 struct iommu_group *group; 3329 3330 group = iommu_group_get(dev); 3331 if (!group) 3332 return NULL; 3333 3334 xa_lock(&group->pasid_array); 3335 domain = xa_load(&group->pasid_array, pasid); 3336 if (type && domain && domain->type != type) 3337 domain = ERR_PTR(-EBUSY); 3338 xa_unlock(&group->pasid_array); 3339 iommu_group_put(group); 3340 3341 return domain; 3342 } 3343 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3344 3345 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3346 struct mm_struct *mm) 3347 { 3348 const struct iommu_ops *ops = dev_iommu_ops(dev); 3349 struct iommu_domain *domain; 3350 3351 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3352 if (!domain) 3353 return NULL; 3354 3355 domain->type = IOMMU_DOMAIN_SVA; 3356 mmgrab(mm); 3357 domain->mm = mm; 3358 domain->iopf_handler = iommu_sva_handle_iopf; 3359 domain->fault_data = mm; 3360 3361 return domain; 3362 } 3363