1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <trace/events/iommu.h> 32 #include <linux/sched/mm.h> 33 #include <linux/msi.h> 34 35 #include "dma-iommu.h" 36 37 #include "iommu-sva.h" 38 39 static struct kset *iommu_group_kset; 40 static DEFINE_IDA(iommu_group_ida); 41 42 static unsigned int iommu_def_domain_type __read_mostly; 43 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 44 static u32 iommu_cmd_line __read_mostly; 45 46 struct iommu_group { 47 struct kobject kobj; 48 struct kobject *devices_kobj; 49 struct list_head devices; 50 struct xarray pasid_array; 51 struct mutex mutex; 52 void *iommu_data; 53 void (*iommu_data_release)(void *iommu_data); 54 char *name; 55 int id; 56 struct iommu_domain *default_domain; 57 struct iommu_domain *blocking_domain; 58 struct iommu_domain *domain; 59 struct list_head entry; 60 unsigned int owner_cnt; 61 void *owner; 62 }; 63 64 struct group_device { 65 struct list_head list; 66 struct device *dev; 67 char *name; 68 }; 69 70 struct iommu_group_attribute { 71 struct attribute attr; 72 ssize_t (*show)(struct iommu_group *group, char *buf); 73 ssize_t (*store)(struct iommu_group *group, 74 const char *buf, size_t count); 75 }; 76 77 static const char * const iommu_group_resv_type_string[] = { 78 [IOMMU_RESV_DIRECT] = "direct", 79 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 80 [IOMMU_RESV_RESERVED] = "reserved", 81 [IOMMU_RESV_MSI] = "msi", 82 [IOMMU_RESV_SW_MSI] = "msi", 83 }; 84 85 #define IOMMU_CMD_LINE_DMA_API BIT(0) 86 #define IOMMU_CMD_LINE_STRICT BIT(1) 87 88 static int iommu_bus_notifier(struct notifier_block *nb, 89 unsigned long action, void *data); 90 static int iommu_alloc_default_domain(struct iommu_group *group, 91 struct device *dev); 92 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 93 unsigned type); 94 static int __iommu_attach_device(struct iommu_domain *domain, 95 struct device *dev); 96 static int __iommu_attach_group(struct iommu_domain *domain, 97 struct iommu_group *group); 98 static int __iommu_group_set_domain(struct iommu_group *group, 99 struct iommu_domain *new_domain); 100 static int iommu_create_device_direct_mappings(struct iommu_group *group, 101 struct device *dev); 102 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 103 static ssize_t iommu_group_store_type(struct iommu_group *group, 104 const char *buf, size_t count); 105 106 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 107 struct iommu_group_attribute iommu_group_attr_##_name = \ 108 __ATTR(_name, _mode, _show, _store) 109 110 #define to_iommu_group_attr(_attr) \ 111 container_of(_attr, struct iommu_group_attribute, attr) 112 #define to_iommu_group(_kobj) \ 113 container_of(_kobj, struct iommu_group, kobj) 114 115 static LIST_HEAD(iommu_device_list); 116 static DEFINE_SPINLOCK(iommu_device_lock); 117 118 static struct bus_type * const iommu_buses[] = { 119 &platform_bus_type, 120 #ifdef CONFIG_PCI 121 &pci_bus_type, 122 #endif 123 #ifdef CONFIG_ARM_AMBA 124 &amba_bustype, 125 #endif 126 #ifdef CONFIG_FSL_MC_BUS 127 &fsl_mc_bus_type, 128 #endif 129 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 130 &host1x_context_device_bus_type, 131 #endif 132 }; 133 134 /* 135 * Use a function instead of an array here because the domain-type is a 136 * bit-field, so an array would waste memory. 137 */ 138 static const char *iommu_domain_type_str(unsigned int t) 139 { 140 switch (t) { 141 case IOMMU_DOMAIN_BLOCKED: 142 return "Blocked"; 143 case IOMMU_DOMAIN_IDENTITY: 144 return "Passthrough"; 145 case IOMMU_DOMAIN_UNMANAGED: 146 return "Unmanaged"; 147 case IOMMU_DOMAIN_DMA: 148 case IOMMU_DOMAIN_DMA_FQ: 149 return "Translated"; 150 default: 151 return "Unknown"; 152 } 153 } 154 155 static int __init iommu_subsys_init(void) 156 { 157 struct notifier_block *nb; 158 159 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 160 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 161 iommu_set_default_passthrough(false); 162 else 163 iommu_set_default_translated(false); 164 165 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 166 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 167 iommu_set_default_translated(false); 168 } 169 } 170 171 if (!iommu_default_passthrough() && !iommu_dma_strict) 172 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 173 174 pr_info("Default domain type: %s %s\n", 175 iommu_domain_type_str(iommu_def_domain_type), 176 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 177 "(set via kernel command line)" : ""); 178 179 if (!iommu_default_passthrough()) 180 pr_info("DMA domain TLB invalidation policy: %s mode %s\n", 181 iommu_dma_strict ? "strict" : "lazy", 182 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 183 "(set via kernel command line)" : ""); 184 185 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 186 if (!nb) 187 return -ENOMEM; 188 189 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 190 nb[i].notifier_call = iommu_bus_notifier; 191 bus_register_notifier(iommu_buses[i], &nb[i]); 192 } 193 194 return 0; 195 } 196 subsys_initcall(iommu_subsys_init); 197 198 static int remove_iommu_group(struct device *dev, void *data) 199 { 200 if (dev->iommu && dev->iommu->iommu_dev == data) 201 iommu_release_device(dev); 202 203 return 0; 204 } 205 206 /** 207 * iommu_device_register() - Register an IOMMU hardware instance 208 * @iommu: IOMMU handle for the instance 209 * @ops: IOMMU ops to associate with the instance 210 * @hwdev: (optional) actual instance device, used for fwnode lookup 211 * 212 * Return: 0 on success, or an error. 213 */ 214 int iommu_device_register(struct iommu_device *iommu, 215 const struct iommu_ops *ops, struct device *hwdev) 216 { 217 int err = 0; 218 219 /* We need to be able to take module references appropriately */ 220 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 221 return -EINVAL; 222 /* 223 * Temporarily enforce global restriction to a single driver. This was 224 * already the de-facto behaviour, since any possible combination of 225 * existing drivers would compete for at least the PCI or platform bus. 226 */ 227 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 228 return -EBUSY; 229 230 iommu->ops = ops; 231 if (hwdev) 232 iommu->fwnode = dev_fwnode(hwdev); 233 234 spin_lock(&iommu_device_lock); 235 list_add_tail(&iommu->list, &iommu_device_list); 236 spin_unlock(&iommu_device_lock); 237 238 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 239 iommu_buses[i]->iommu_ops = ops; 240 err = bus_iommu_probe(iommu_buses[i]); 241 } 242 if (err) 243 iommu_device_unregister(iommu); 244 return err; 245 } 246 EXPORT_SYMBOL_GPL(iommu_device_register); 247 248 void iommu_device_unregister(struct iommu_device *iommu) 249 { 250 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 251 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 252 253 spin_lock(&iommu_device_lock); 254 list_del(&iommu->list); 255 spin_unlock(&iommu_device_lock); 256 } 257 EXPORT_SYMBOL_GPL(iommu_device_unregister); 258 259 static struct dev_iommu *dev_iommu_get(struct device *dev) 260 { 261 struct dev_iommu *param = dev->iommu; 262 263 if (param) 264 return param; 265 266 param = kzalloc(sizeof(*param), GFP_KERNEL); 267 if (!param) 268 return NULL; 269 270 mutex_init(¶m->lock); 271 dev->iommu = param; 272 return param; 273 } 274 275 static void dev_iommu_free(struct device *dev) 276 { 277 struct dev_iommu *param = dev->iommu; 278 279 dev->iommu = NULL; 280 if (param->fwspec) { 281 fwnode_handle_put(param->fwspec->iommu_fwnode); 282 kfree(param->fwspec); 283 } 284 kfree(param); 285 } 286 287 static u32 dev_iommu_get_max_pasids(struct device *dev) 288 { 289 u32 max_pasids = 0, bits = 0; 290 int ret; 291 292 if (dev_is_pci(dev)) { 293 ret = pci_max_pasids(to_pci_dev(dev)); 294 if (ret > 0) 295 max_pasids = ret; 296 } else { 297 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 298 if (!ret) 299 max_pasids = 1UL << bits; 300 } 301 302 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 303 } 304 305 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 306 { 307 const struct iommu_ops *ops = dev->bus->iommu_ops; 308 struct iommu_device *iommu_dev; 309 struct iommu_group *group; 310 static DEFINE_MUTEX(iommu_probe_device_lock); 311 int ret; 312 313 if (!ops) 314 return -ENODEV; 315 /* 316 * Serialise to avoid races between IOMMU drivers registering in 317 * parallel and/or the "replay" calls from ACPI/OF code via client 318 * driver probe. Once the latter have been cleaned up we should 319 * probably be able to use device_lock() here to minimise the scope, 320 * but for now enforcing a simple global ordering is fine. 321 */ 322 mutex_lock(&iommu_probe_device_lock); 323 if (!dev_iommu_get(dev)) { 324 ret = -ENOMEM; 325 goto err_unlock; 326 } 327 328 if (!try_module_get(ops->owner)) { 329 ret = -EINVAL; 330 goto err_free; 331 } 332 333 iommu_dev = ops->probe_device(dev); 334 if (IS_ERR(iommu_dev)) { 335 ret = PTR_ERR(iommu_dev); 336 goto out_module_put; 337 } 338 339 dev->iommu->iommu_dev = iommu_dev; 340 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 341 342 group = iommu_group_get_for_dev(dev); 343 if (IS_ERR(group)) { 344 ret = PTR_ERR(group); 345 goto out_release; 346 } 347 348 mutex_lock(&group->mutex); 349 if (group_list && !group->default_domain && list_empty(&group->entry)) 350 list_add_tail(&group->entry, group_list); 351 mutex_unlock(&group->mutex); 352 iommu_group_put(group); 353 354 mutex_unlock(&iommu_probe_device_lock); 355 iommu_device_link(iommu_dev, dev); 356 357 return 0; 358 359 out_release: 360 if (ops->release_device) 361 ops->release_device(dev); 362 363 out_module_put: 364 module_put(ops->owner); 365 366 err_free: 367 dev_iommu_free(dev); 368 369 err_unlock: 370 mutex_unlock(&iommu_probe_device_lock); 371 372 return ret; 373 } 374 375 static bool iommu_is_attach_deferred(struct device *dev) 376 { 377 const struct iommu_ops *ops = dev_iommu_ops(dev); 378 379 if (ops->is_attach_deferred) 380 return ops->is_attach_deferred(dev); 381 382 return false; 383 } 384 385 static int iommu_group_do_dma_first_attach(struct device *dev, void *data) 386 { 387 struct iommu_domain *domain = data; 388 389 lockdep_assert_held(&dev->iommu_group->mutex); 390 391 if (iommu_is_attach_deferred(dev)) { 392 dev->iommu->attach_deferred = 1; 393 return 0; 394 } 395 396 return __iommu_attach_device(domain, dev); 397 } 398 399 int iommu_probe_device(struct device *dev) 400 { 401 const struct iommu_ops *ops; 402 struct iommu_group *group; 403 int ret; 404 405 ret = __iommu_probe_device(dev, NULL); 406 if (ret) 407 goto err_out; 408 409 group = iommu_group_get(dev); 410 if (!group) { 411 ret = -ENODEV; 412 goto err_release; 413 } 414 415 /* 416 * Try to allocate a default domain - needs support from the 417 * IOMMU driver. There are still some drivers which don't 418 * support default domains, so the return value is not yet 419 * checked. 420 */ 421 mutex_lock(&group->mutex); 422 iommu_alloc_default_domain(group, dev); 423 424 /* 425 * If device joined an existing group which has been claimed, don't 426 * attach the default domain. 427 */ 428 if (group->default_domain && !group->owner) { 429 ret = iommu_group_do_dma_first_attach(dev, group->default_domain); 430 if (ret) { 431 mutex_unlock(&group->mutex); 432 iommu_group_put(group); 433 goto err_release; 434 } 435 } 436 437 iommu_create_device_direct_mappings(group, dev); 438 439 mutex_unlock(&group->mutex); 440 iommu_group_put(group); 441 442 ops = dev_iommu_ops(dev); 443 if (ops->probe_finalize) 444 ops->probe_finalize(dev); 445 446 return 0; 447 448 err_release: 449 iommu_release_device(dev); 450 451 err_out: 452 return ret; 453 454 } 455 456 /* 457 * Remove a device from a group's device list and return the group device 458 * if successful. 459 */ 460 static struct group_device * 461 __iommu_group_remove_device(struct iommu_group *group, struct device *dev) 462 { 463 struct group_device *device; 464 465 lockdep_assert_held(&group->mutex); 466 list_for_each_entry(device, &group->devices, list) { 467 if (device->dev == dev) { 468 list_del(&device->list); 469 return device; 470 } 471 } 472 473 return NULL; 474 } 475 476 /* 477 * Release a device from its group and decrements the iommu group reference 478 * count. 479 */ 480 static void __iommu_group_release_device(struct iommu_group *group, 481 struct group_device *grp_dev) 482 { 483 struct device *dev = grp_dev->dev; 484 485 sysfs_remove_link(group->devices_kobj, grp_dev->name); 486 sysfs_remove_link(&dev->kobj, "iommu_group"); 487 488 trace_remove_device_from_group(group->id, dev); 489 490 kfree(grp_dev->name); 491 kfree(grp_dev); 492 dev->iommu_group = NULL; 493 kobject_put(group->devices_kobj); 494 } 495 496 void iommu_release_device(struct device *dev) 497 { 498 const struct iommu_ops *ops; 499 500 if (!dev->iommu) 501 return; 502 503 iommu_device_unlink(dev->iommu->iommu_dev, dev); 504 505 ops = dev_iommu_ops(dev); 506 if (ops->release_device) 507 ops->release_device(dev); 508 509 iommu_group_remove_device(dev); 510 module_put(ops->owner); 511 dev_iommu_free(dev); 512 } 513 514 static int __init iommu_set_def_domain_type(char *str) 515 { 516 bool pt; 517 int ret; 518 519 ret = kstrtobool(str, &pt); 520 if (ret) 521 return ret; 522 523 if (pt) 524 iommu_set_default_passthrough(true); 525 else 526 iommu_set_default_translated(true); 527 528 return 0; 529 } 530 early_param("iommu.passthrough", iommu_set_def_domain_type); 531 532 static int __init iommu_dma_setup(char *str) 533 { 534 int ret = kstrtobool(str, &iommu_dma_strict); 535 536 if (!ret) 537 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 538 return ret; 539 } 540 early_param("iommu.strict", iommu_dma_setup); 541 542 void iommu_set_dma_strict(void) 543 { 544 iommu_dma_strict = true; 545 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 546 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 547 } 548 549 static ssize_t iommu_group_attr_show(struct kobject *kobj, 550 struct attribute *__attr, char *buf) 551 { 552 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 553 struct iommu_group *group = to_iommu_group(kobj); 554 ssize_t ret = -EIO; 555 556 if (attr->show) 557 ret = attr->show(group, buf); 558 return ret; 559 } 560 561 static ssize_t iommu_group_attr_store(struct kobject *kobj, 562 struct attribute *__attr, 563 const char *buf, size_t count) 564 { 565 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 566 struct iommu_group *group = to_iommu_group(kobj); 567 ssize_t ret = -EIO; 568 569 if (attr->store) 570 ret = attr->store(group, buf, count); 571 return ret; 572 } 573 574 static const struct sysfs_ops iommu_group_sysfs_ops = { 575 .show = iommu_group_attr_show, 576 .store = iommu_group_attr_store, 577 }; 578 579 static int iommu_group_create_file(struct iommu_group *group, 580 struct iommu_group_attribute *attr) 581 { 582 return sysfs_create_file(&group->kobj, &attr->attr); 583 } 584 585 static void iommu_group_remove_file(struct iommu_group *group, 586 struct iommu_group_attribute *attr) 587 { 588 sysfs_remove_file(&group->kobj, &attr->attr); 589 } 590 591 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 592 { 593 return sprintf(buf, "%s\n", group->name); 594 } 595 596 /** 597 * iommu_insert_resv_region - Insert a new region in the 598 * list of reserved regions. 599 * @new: new region to insert 600 * @regions: list of regions 601 * 602 * Elements are sorted by start address and overlapping segments 603 * of the same type are merged. 604 */ 605 static int iommu_insert_resv_region(struct iommu_resv_region *new, 606 struct list_head *regions) 607 { 608 struct iommu_resv_region *iter, *tmp, *nr, *top; 609 LIST_HEAD(stack); 610 611 nr = iommu_alloc_resv_region(new->start, new->length, 612 new->prot, new->type, GFP_KERNEL); 613 if (!nr) 614 return -ENOMEM; 615 616 /* First add the new element based on start address sorting */ 617 list_for_each_entry(iter, regions, list) { 618 if (nr->start < iter->start || 619 (nr->start == iter->start && nr->type <= iter->type)) 620 break; 621 } 622 list_add_tail(&nr->list, &iter->list); 623 624 /* Merge overlapping segments of type nr->type in @regions, if any */ 625 list_for_each_entry_safe(iter, tmp, regions, list) { 626 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 627 628 /* no merge needed on elements of different types than @new */ 629 if (iter->type != new->type) { 630 list_move_tail(&iter->list, &stack); 631 continue; 632 } 633 634 /* look for the last stack element of same type as @iter */ 635 list_for_each_entry_reverse(top, &stack, list) 636 if (top->type == iter->type) 637 goto check_overlap; 638 639 list_move_tail(&iter->list, &stack); 640 continue; 641 642 check_overlap: 643 top_end = top->start + top->length - 1; 644 645 if (iter->start > top_end + 1) { 646 list_move_tail(&iter->list, &stack); 647 } else { 648 top->length = max(top_end, iter_end) - top->start + 1; 649 list_del(&iter->list); 650 kfree(iter); 651 } 652 } 653 list_splice(&stack, regions); 654 return 0; 655 } 656 657 static int 658 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 659 struct list_head *group_resv_regions) 660 { 661 struct iommu_resv_region *entry; 662 int ret = 0; 663 664 list_for_each_entry(entry, dev_resv_regions, list) { 665 ret = iommu_insert_resv_region(entry, group_resv_regions); 666 if (ret) 667 break; 668 } 669 return ret; 670 } 671 672 int iommu_get_group_resv_regions(struct iommu_group *group, 673 struct list_head *head) 674 { 675 struct group_device *device; 676 int ret = 0; 677 678 mutex_lock(&group->mutex); 679 list_for_each_entry(device, &group->devices, list) { 680 struct list_head dev_resv_regions; 681 682 /* 683 * Non-API groups still expose reserved_regions in sysfs, 684 * so filter out calls that get here that way. 685 */ 686 if (!device->dev->iommu) 687 break; 688 689 INIT_LIST_HEAD(&dev_resv_regions); 690 iommu_get_resv_regions(device->dev, &dev_resv_regions); 691 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 692 iommu_put_resv_regions(device->dev, &dev_resv_regions); 693 if (ret) 694 break; 695 } 696 mutex_unlock(&group->mutex); 697 return ret; 698 } 699 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 700 701 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 702 char *buf) 703 { 704 struct iommu_resv_region *region, *next; 705 struct list_head group_resv_regions; 706 char *str = buf; 707 708 INIT_LIST_HEAD(&group_resv_regions); 709 iommu_get_group_resv_regions(group, &group_resv_regions); 710 711 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 712 str += sprintf(str, "0x%016llx 0x%016llx %s\n", 713 (long long int)region->start, 714 (long long int)(region->start + 715 region->length - 1), 716 iommu_group_resv_type_string[region->type]); 717 kfree(region); 718 } 719 720 return (str - buf); 721 } 722 723 static ssize_t iommu_group_show_type(struct iommu_group *group, 724 char *buf) 725 { 726 char *type = "unknown\n"; 727 728 mutex_lock(&group->mutex); 729 if (group->default_domain) { 730 switch (group->default_domain->type) { 731 case IOMMU_DOMAIN_BLOCKED: 732 type = "blocked\n"; 733 break; 734 case IOMMU_DOMAIN_IDENTITY: 735 type = "identity\n"; 736 break; 737 case IOMMU_DOMAIN_UNMANAGED: 738 type = "unmanaged\n"; 739 break; 740 case IOMMU_DOMAIN_DMA: 741 type = "DMA\n"; 742 break; 743 case IOMMU_DOMAIN_DMA_FQ: 744 type = "DMA-FQ\n"; 745 break; 746 } 747 } 748 mutex_unlock(&group->mutex); 749 strcpy(buf, type); 750 751 return strlen(type); 752 } 753 754 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 755 756 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 757 iommu_group_show_resv_regions, NULL); 758 759 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 760 iommu_group_store_type); 761 762 static void iommu_group_release(struct kobject *kobj) 763 { 764 struct iommu_group *group = to_iommu_group(kobj); 765 766 pr_debug("Releasing group %d\n", group->id); 767 768 if (group->iommu_data_release) 769 group->iommu_data_release(group->iommu_data); 770 771 ida_free(&iommu_group_ida, group->id); 772 773 if (group->default_domain) 774 iommu_domain_free(group->default_domain); 775 if (group->blocking_domain) 776 iommu_domain_free(group->blocking_domain); 777 778 kfree(group->name); 779 kfree(group); 780 } 781 782 static const struct kobj_type iommu_group_ktype = { 783 .sysfs_ops = &iommu_group_sysfs_ops, 784 .release = iommu_group_release, 785 }; 786 787 /** 788 * iommu_group_alloc - Allocate a new group 789 * 790 * This function is called by an iommu driver to allocate a new iommu 791 * group. The iommu group represents the minimum granularity of the iommu. 792 * Upon successful return, the caller holds a reference to the supplied 793 * group in order to hold the group until devices are added. Use 794 * iommu_group_put() to release this extra reference count, allowing the 795 * group to be automatically reclaimed once it has no devices or external 796 * references. 797 */ 798 struct iommu_group *iommu_group_alloc(void) 799 { 800 struct iommu_group *group; 801 int ret; 802 803 group = kzalloc(sizeof(*group), GFP_KERNEL); 804 if (!group) 805 return ERR_PTR(-ENOMEM); 806 807 group->kobj.kset = iommu_group_kset; 808 mutex_init(&group->mutex); 809 INIT_LIST_HEAD(&group->devices); 810 INIT_LIST_HEAD(&group->entry); 811 xa_init(&group->pasid_array); 812 813 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 814 if (ret < 0) { 815 kfree(group); 816 return ERR_PTR(ret); 817 } 818 group->id = ret; 819 820 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 821 NULL, "%d", group->id); 822 if (ret) { 823 kobject_put(&group->kobj); 824 return ERR_PTR(ret); 825 } 826 827 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 828 if (!group->devices_kobj) { 829 kobject_put(&group->kobj); /* triggers .release & free */ 830 return ERR_PTR(-ENOMEM); 831 } 832 833 /* 834 * The devices_kobj holds a reference on the group kobject, so 835 * as long as that exists so will the group. We can therefore 836 * use the devices_kobj for reference counting. 837 */ 838 kobject_put(&group->kobj); 839 840 ret = iommu_group_create_file(group, 841 &iommu_group_attr_reserved_regions); 842 if (ret) { 843 kobject_put(group->devices_kobj); 844 return ERR_PTR(ret); 845 } 846 847 ret = iommu_group_create_file(group, &iommu_group_attr_type); 848 if (ret) { 849 kobject_put(group->devices_kobj); 850 return ERR_PTR(ret); 851 } 852 853 pr_debug("Allocated group %d\n", group->id); 854 855 return group; 856 } 857 EXPORT_SYMBOL_GPL(iommu_group_alloc); 858 859 struct iommu_group *iommu_group_get_by_id(int id) 860 { 861 struct kobject *group_kobj; 862 struct iommu_group *group; 863 const char *name; 864 865 if (!iommu_group_kset) 866 return NULL; 867 868 name = kasprintf(GFP_KERNEL, "%d", id); 869 if (!name) 870 return NULL; 871 872 group_kobj = kset_find_obj(iommu_group_kset, name); 873 kfree(name); 874 875 if (!group_kobj) 876 return NULL; 877 878 group = container_of(group_kobj, struct iommu_group, kobj); 879 BUG_ON(group->id != id); 880 881 kobject_get(group->devices_kobj); 882 kobject_put(&group->kobj); 883 884 return group; 885 } 886 EXPORT_SYMBOL_GPL(iommu_group_get_by_id); 887 888 /** 889 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 890 * @group: the group 891 * 892 * iommu drivers can store data in the group for use when doing iommu 893 * operations. This function provides a way to retrieve it. Caller 894 * should hold a group reference. 895 */ 896 void *iommu_group_get_iommudata(struct iommu_group *group) 897 { 898 return group->iommu_data; 899 } 900 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 901 902 /** 903 * iommu_group_set_iommudata - set iommu_data for a group 904 * @group: the group 905 * @iommu_data: new data 906 * @release: release function for iommu_data 907 * 908 * iommu drivers can store data in the group for use when doing iommu 909 * operations. This function provides a way to set the data after 910 * the group has been allocated. Caller should hold a group reference. 911 */ 912 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 913 void (*release)(void *iommu_data)) 914 { 915 group->iommu_data = iommu_data; 916 group->iommu_data_release = release; 917 } 918 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 919 920 /** 921 * iommu_group_set_name - set name for a group 922 * @group: the group 923 * @name: name 924 * 925 * Allow iommu driver to set a name for a group. When set it will 926 * appear in a name attribute file under the group in sysfs. 927 */ 928 int iommu_group_set_name(struct iommu_group *group, const char *name) 929 { 930 int ret; 931 932 if (group->name) { 933 iommu_group_remove_file(group, &iommu_group_attr_name); 934 kfree(group->name); 935 group->name = NULL; 936 if (!name) 937 return 0; 938 } 939 940 group->name = kstrdup(name, GFP_KERNEL); 941 if (!group->name) 942 return -ENOMEM; 943 944 ret = iommu_group_create_file(group, &iommu_group_attr_name); 945 if (ret) { 946 kfree(group->name); 947 group->name = NULL; 948 return ret; 949 } 950 951 return 0; 952 } 953 EXPORT_SYMBOL_GPL(iommu_group_set_name); 954 955 static int iommu_create_device_direct_mappings(struct iommu_group *group, 956 struct device *dev) 957 { 958 struct iommu_domain *domain = group->default_domain; 959 struct iommu_resv_region *entry; 960 struct list_head mappings; 961 unsigned long pg_size; 962 int ret = 0; 963 964 if (!domain || !iommu_is_dma_domain(domain)) 965 return 0; 966 967 BUG_ON(!domain->pgsize_bitmap); 968 969 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 970 INIT_LIST_HEAD(&mappings); 971 972 iommu_get_resv_regions(dev, &mappings); 973 974 /* We need to consider overlapping regions for different devices */ 975 list_for_each_entry(entry, &mappings, list) { 976 dma_addr_t start, end, addr; 977 size_t map_size = 0; 978 979 start = ALIGN(entry->start, pg_size); 980 end = ALIGN(entry->start + entry->length, pg_size); 981 982 if (entry->type != IOMMU_RESV_DIRECT && 983 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 984 continue; 985 986 for (addr = start; addr <= end; addr += pg_size) { 987 phys_addr_t phys_addr; 988 989 if (addr == end) 990 goto map_end; 991 992 phys_addr = iommu_iova_to_phys(domain, addr); 993 if (!phys_addr) { 994 map_size += pg_size; 995 continue; 996 } 997 998 map_end: 999 if (map_size) { 1000 ret = iommu_map(domain, addr - map_size, 1001 addr - map_size, map_size, 1002 entry->prot, GFP_KERNEL); 1003 if (ret) 1004 goto out; 1005 map_size = 0; 1006 } 1007 } 1008 1009 } 1010 1011 iommu_flush_iotlb_all(domain); 1012 1013 out: 1014 iommu_put_resv_regions(dev, &mappings); 1015 1016 return ret; 1017 } 1018 1019 /** 1020 * iommu_group_add_device - add a device to an iommu group 1021 * @group: the group into which to add the device (reference should be held) 1022 * @dev: the device 1023 * 1024 * This function is called by an iommu driver to add a device into a 1025 * group. Adding a device increments the group reference count. 1026 */ 1027 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1028 { 1029 int ret, i = 0; 1030 struct group_device *device; 1031 1032 device = kzalloc(sizeof(*device), GFP_KERNEL); 1033 if (!device) 1034 return -ENOMEM; 1035 1036 device->dev = dev; 1037 1038 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1039 if (ret) 1040 goto err_free_device; 1041 1042 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1043 rename: 1044 if (!device->name) { 1045 ret = -ENOMEM; 1046 goto err_remove_link; 1047 } 1048 1049 ret = sysfs_create_link_nowarn(group->devices_kobj, 1050 &dev->kobj, device->name); 1051 if (ret) { 1052 if (ret == -EEXIST && i >= 0) { 1053 /* 1054 * Account for the slim chance of collision 1055 * and append an instance to the name. 1056 */ 1057 kfree(device->name); 1058 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1059 kobject_name(&dev->kobj), i++); 1060 goto rename; 1061 } 1062 goto err_free_name; 1063 } 1064 1065 kobject_get(group->devices_kobj); 1066 1067 dev->iommu_group = group; 1068 1069 mutex_lock(&group->mutex); 1070 list_add_tail(&device->list, &group->devices); 1071 if (group->domain) 1072 ret = iommu_group_do_dma_first_attach(dev, group->domain); 1073 mutex_unlock(&group->mutex); 1074 if (ret) 1075 goto err_put_group; 1076 1077 trace_add_device_to_group(group->id, dev); 1078 1079 dev_info(dev, "Adding to iommu group %d\n", group->id); 1080 1081 return 0; 1082 1083 err_put_group: 1084 mutex_lock(&group->mutex); 1085 list_del(&device->list); 1086 mutex_unlock(&group->mutex); 1087 dev->iommu_group = NULL; 1088 kobject_put(group->devices_kobj); 1089 sysfs_remove_link(group->devices_kobj, device->name); 1090 err_free_name: 1091 kfree(device->name); 1092 err_remove_link: 1093 sysfs_remove_link(&dev->kobj, "iommu_group"); 1094 err_free_device: 1095 kfree(device); 1096 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1097 return ret; 1098 } 1099 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1100 1101 /** 1102 * iommu_group_remove_device - remove a device from it's current group 1103 * @dev: device to be removed 1104 * 1105 * This function is called by an iommu driver to remove the device from 1106 * it's current group. This decrements the iommu group reference count. 1107 */ 1108 void iommu_group_remove_device(struct device *dev) 1109 { 1110 struct iommu_group *group = dev->iommu_group; 1111 struct group_device *device; 1112 1113 if (!group) 1114 return; 1115 1116 dev_info(dev, "Removing from iommu group %d\n", group->id); 1117 1118 mutex_lock(&group->mutex); 1119 device = __iommu_group_remove_device(group, dev); 1120 mutex_unlock(&group->mutex); 1121 1122 if (device) 1123 __iommu_group_release_device(group, device); 1124 } 1125 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1126 1127 static int iommu_group_device_count(struct iommu_group *group) 1128 { 1129 struct group_device *entry; 1130 int ret = 0; 1131 1132 list_for_each_entry(entry, &group->devices, list) 1133 ret++; 1134 1135 return ret; 1136 } 1137 1138 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1139 int (*fn)(struct device *, void *)) 1140 { 1141 struct group_device *device; 1142 int ret = 0; 1143 1144 list_for_each_entry(device, &group->devices, list) { 1145 ret = fn(device->dev, data); 1146 if (ret) 1147 break; 1148 } 1149 return ret; 1150 } 1151 1152 /** 1153 * iommu_group_for_each_dev - iterate over each device in the group 1154 * @group: the group 1155 * @data: caller opaque data to be passed to callback function 1156 * @fn: caller supplied callback function 1157 * 1158 * This function is called by group users to iterate over group devices. 1159 * Callers should hold a reference count to the group during callback. 1160 * The group->mutex is held across callbacks, which will block calls to 1161 * iommu_group_add/remove_device. 1162 */ 1163 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1164 int (*fn)(struct device *, void *)) 1165 { 1166 int ret; 1167 1168 mutex_lock(&group->mutex); 1169 ret = __iommu_group_for_each_dev(group, data, fn); 1170 mutex_unlock(&group->mutex); 1171 1172 return ret; 1173 } 1174 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1175 1176 /** 1177 * iommu_group_get - Return the group for a device and increment reference 1178 * @dev: get the group that this device belongs to 1179 * 1180 * This function is called by iommu drivers and users to get the group 1181 * for the specified device. If found, the group is returned and the group 1182 * reference in incremented, else NULL. 1183 */ 1184 struct iommu_group *iommu_group_get(struct device *dev) 1185 { 1186 struct iommu_group *group = dev->iommu_group; 1187 1188 if (group) 1189 kobject_get(group->devices_kobj); 1190 1191 return group; 1192 } 1193 EXPORT_SYMBOL_GPL(iommu_group_get); 1194 1195 /** 1196 * iommu_group_ref_get - Increment reference on a group 1197 * @group: the group to use, must not be NULL 1198 * 1199 * This function is called by iommu drivers to take additional references on an 1200 * existing group. Returns the given group for convenience. 1201 */ 1202 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1203 { 1204 kobject_get(group->devices_kobj); 1205 return group; 1206 } 1207 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1208 1209 /** 1210 * iommu_group_put - Decrement group reference 1211 * @group: the group to use 1212 * 1213 * This function is called by iommu drivers and users to release the 1214 * iommu group. Once the reference count is zero, the group is released. 1215 */ 1216 void iommu_group_put(struct iommu_group *group) 1217 { 1218 if (group) 1219 kobject_put(group->devices_kobj); 1220 } 1221 EXPORT_SYMBOL_GPL(iommu_group_put); 1222 1223 /** 1224 * iommu_register_device_fault_handler() - Register a device fault handler 1225 * @dev: the device 1226 * @handler: the fault handler 1227 * @data: private data passed as argument to the handler 1228 * 1229 * When an IOMMU fault event is received, this handler gets called with the 1230 * fault event and data as argument. The handler should return 0 on success. If 1231 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1232 * complete the fault by calling iommu_page_response() with one of the following 1233 * response code: 1234 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1235 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1236 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1237 * page faults if possible. 1238 * 1239 * Return 0 if the fault handler was installed successfully, or an error. 1240 */ 1241 int iommu_register_device_fault_handler(struct device *dev, 1242 iommu_dev_fault_handler_t handler, 1243 void *data) 1244 { 1245 struct dev_iommu *param = dev->iommu; 1246 int ret = 0; 1247 1248 if (!param) 1249 return -EINVAL; 1250 1251 mutex_lock(¶m->lock); 1252 /* Only allow one fault handler registered for each device */ 1253 if (param->fault_param) { 1254 ret = -EBUSY; 1255 goto done_unlock; 1256 } 1257 1258 get_device(dev); 1259 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1260 if (!param->fault_param) { 1261 put_device(dev); 1262 ret = -ENOMEM; 1263 goto done_unlock; 1264 } 1265 param->fault_param->handler = handler; 1266 param->fault_param->data = data; 1267 mutex_init(¶m->fault_param->lock); 1268 INIT_LIST_HEAD(¶m->fault_param->faults); 1269 1270 done_unlock: 1271 mutex_unlock(¶m->lock); 1272 1273 return ret; 1274 } 1275 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1276 1277 /** 1278 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1279 * @dev: the device 1280 * 1281 * Remove the device fault handler installed with 1282 * iommu_register_device_fault_handler(). 1283 * 1284 * Return 0 on success, or an error. 1285 */ 1286 int iommu_unregister_device_fault_handler(struct device *dev) 1287 { 1288 struct dev_iommu *param = dev->iommu; 1289 int ret = 0; 1290 1291 if (!param) 1292 return -EINVAL; 1293 1294 mutex_lock(¶m->lock); 1295 1296 if (!param->fault_param) 1297 goto unlock; 1298 1299 /* we cannot unregister handler if there are pending faults */ 1300 if (!list_empty(¶m->fault_param->faults)) { 1301 ret = -EBUSY; 1302 goto unlock; 1303 } 1304 1305 kfree(param->fault_param); 1306 param->fault_param = NULL; 1307 put_device(dev); 1308 unlock: 1309 mutex_unlock(¶m->lock); 1310 1311 return ret; 1312 } 1313 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1314 1315 /** 1316 * iommu_report_device_fault() - Report fault event to device driver 1317 * @dev: the device 1318 * @evt: fault event data 1319 * 1320 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1321 * handler. When this function fails and the fault is recoverable, it is the 1322 * caller's responsibility to complete the fault. 1323 * 1324 * Return 0 on success, or an error. 1325 */ 1326 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1327 { 1328 struct dev_iommu *param = dev->iommu; 1329 struct iommu_fault_event *evt_pending = NULL; 1330 struct iommu_fault_param *fparam; 1331 int ret = 0; 1332 1333 if (!param || !evt) 1334 return -EINVAL; 1335 1336 /* we only report device fault if there is a handler registered */ 1337 mutex_lock(¶m->lock); 1338 fparam = param->fault_param; 1339 if (!fparam || !fparam->handler) { 1340 ret = -EINVAL; 1341 goto done_unlock; 1342 } 1343 1344 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1345 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1346 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1347 GFP_KERNEL); 1348 if (!evt_pending) { 1349 ret = -ENOMEM; 1350 goto done_unlock; 1351 } 1352 mutex_lock(&fparam->lock); 1353 list_add_tail(&evt_pending->list, &fparam->faults); 1354 mutex_unlock(&fparam->lock); 1355 } 1356 1357 ret = fparam->handler(&evt->fault, fparam->data); 1358 if (ret && evt_pending) { 1359 mutex_lock(&fparam->lock); 1360 list_del(&evt_pending->list); 1361 mutex_unlock(&fparam->lock); 1362 kfree(evt_pending); 1363 } 1364 done_unlock: 1365 mutex_unlock(¶m->lock); 1366 return ret; 1367 } 1368 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1369 1370 int iommu_page_response(struct device *dev, 1371 struct iommu_page_response *msg) 1372 { 1373 bool needs_pasid; 1374 int ret = -EINVAL; 1375 struct iommu_fault_event *evt; 1376 struct iommu_fault_page_request *prm; 1377 struct dev_iommu *param = dev->iommu; 1378 const struct iommu_ops *ops = dev_iommu_ops(dev); 1379 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1380 1381 if (!ops->page_response) 1382 return -ENODEV; 1383 1384 if (!param || !param->fault_param) 1385 return -EINVAL; 1386 1387 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1388 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1389 return -EINVAL; 1390 1391 /* Only send response if there is a fault report pending */ 1392 mutex_lock(¶m->fault_param->lock); 1393 if (list_empty(¶m->fault_param->faults)) { 1394 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1395 goto done_unlock; 1396 } 1397 /* 1398 * Check if we have a matching page request pending to respond, 1399 * otherwise return -EINVAL 1400 */ 1401 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1402 prm = &evt->fault.prm; 1403 if (prm->grpid != msg->grpid) 1404 continue; 1405 1406 /* 1407 * If the PASID is required, the corresponding request is 1408 * matched using the group ID, the PASID valid bit and the PASID 1409 * value. Otherwise only the group ID matches request and 1410 * response. 1411 */ 1412 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1413 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1414 continue; 1415 1416 if (!needs_pasid && has_pasid) { 1417 /* No big deal, just clear it. */ 1418 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1419 msg->pasid = 0; 1420 } 1421 1422 ret = ops->page_response(dev, evt, msg); 1423 list_del(&evt->list); 1424 kfree(evt); 1425 break; 1426 } 1427 1428 done_unlock: 1429 mutex_unlock(¶m->fault_param->lock); 1430 return ret; 1431 } 1432 EXPORT_SYMBOL_GPL(iommu_page_response); 1433 1434 /** 1435 * iommu_group_id - Return ID for a group 1436 * @group: the group to ID 1437 * 1438 * Return the unique ID for the group matching the sysfs group number. 1439 */ 1440 int iommu_group_id(struct iommu_group *group) 1441 { 1442 return group->id; 1443 } 1444 EXPORT_SYMBOL_GPL(iommu_group_id); 1445 1446 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1447 unsigned long *devfns); 1448 1449 /* 1450 * To consider a PCI device isolated, we require ACS to support Source 1451 * Validation, Request Redirection, Completer Redirection, and Upstream 1452 * Forwarding. This effectively means that devices cannot spoof their 1453 * requester ID, requests and completions cannot be redirected, and all 1454 * transactions are forwarded upstream, even as it passes through a 1455 * bridge where the target device is downstream. 1456 */ 1457 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1458 1459 /* 1460 * For multifunction devices which are not isolated from each other, find 1461 * all the other non-isolated functions and look for existing groups. For 1462 * each function, we also need to look for aliases to or from other devices 1463 * that may already have a group. 1464 */ 1465 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1466 unsigned long *devfns) 1467 { 1468 struct pci_dev *tmp = NULL; 1469 struct iommu_group *group; 1470 1471 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1472 return NULL; 1473 1474 for_each_pci_dev(tmp) { 1475 if (tmp == pdev || tmp->bus != pdev->bus || 1476 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1477 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1478 continue; 1479 1480 group = get_pci_alias_group(tmp, devfns); 1481 if (group) { 1482 pci_dev_put(tmp); 1483 return group; 1484 } 1485 } 1486 1487 return NULL; 1488 } 1489 1490 /* 1491 * Look for aliases to or from the given device for existing groups. DMA 1492 * aliases are only supported on the same bus, therefore the search 1493 * space is quite small (especially since we're really only looking at pcie 1494 * device, and therefore only expect multiple slots on the root complex or 1495 * downstream switch ports). It's conceivable though that a pair of 1496 * multifunction devices could have aliases between them that would cause a 1497 * loop. To prevent this, we use a bitmap to track where we've been. 1498 */ 1499 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1500 unsigned long *devfns) 1501 { 1502 struct pci_dev *tmp = NULL; 1503 struct iommu_group *group; 1504 1505 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1506 return NULL; 1507 1508 group = iommu_group_get(&pdev->dev); 1509 if (group) 1510 return group; 1511 1512 for_each_pci_dev(tmp) { 1513 if (tmp == pdev || tmp->bus != pdev->bus) 1514 continue; 1515 1516 /* We alias them or they alias us */ 1517 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1518 group = get_pci_alias_group(tmp, devfns); 1519 if (group) { 1520 pci_dev_put(tmp); 1521 return group; 1522 } 1523 1524 group = get_pci_function_alias_group(tmp, devfns); 1525 if (group) { 1526 pci_dev_put(tmp); 1527 return group; 1528 } 1529 } 1530 } 1531 1532 return NULL; 1533 } 1534 1535 struct group_for_pci_data { 1536 struct pci_dev *pdev; 1537 struct iommu_group *group; 1538 }; 1539 1540 /* 1541 * DMA alias iterator callback, return the last seen device. Stop and return 1542 * the IOMMU group if we find one along the way. 1543 */ 1544 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1545 { 1546 struct group_for_pci_data *data = opaque; 1547 1548 data->pdev = pdev; 1549 data->group = iommu_group_get(&pdev->dev); 1550 1551 return data->group != NULL; 1552 } 1553 1554 /* 1555 * Generic device_group call-back function. It just allocates one 1556 * iommu-group per device. 1557 */ 1558 struct iommu_group *generic_device_group(struct device *dev) 1559 { 1560 return iommu_group_alloc(); 1561 } 1562 EXPORT_SYMBOL_GPL(generic_device_group); 1563 1564 /* 1565 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1566 * to find or create an IOMMU group for a device. 1567 */ 1568 struct iommu_group *pci_device_group(struct device *dev) 1569 { 1570 struct pci_dev *pdev = to_pci_dev(dev); 1571 struct group_for_pci_data data; 1572 struct pci_bus *bus; 1573 struct iommu_group *group = NULL; 1574 u64 devfns[4] = { 0 }; 1575 1576 if (WARN_ON(!dev_is_pci(dev))) 1577 return ERR_PTR(-EINVAL); 1578 1579 /* 1580 * Find the upstream DMA alias for the device. A device must not 1581 * be aliased due to topology in order to have its own IOMMU group. 1582 * If we find an alias along the way that already belongs to a 1583 * group, use it. 1584 */ 1585 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1586 return data.group; 1587 1588 pdev = data.pdev; 1589 1590 /* 1591 * Continue upstream from the point of minimum IOMMU granularity 1592 * due to aliases to the point where devices are protected from 1593 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1594 * group, use it. 1595 */ 1596 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1597 if (!bus->self) 1598 continue; 1599 1600 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1601 break; 1602 1603 pdev = bus->self; 1604 1605 group = iommu_group_get(&pdev->dev); 1606 if (group) 1607 return group; 1608 } 1609 1610 /* 1611 * Look for existing groups on device aliases. If we alias another 1612 * device or another device aliases us, use the same group. 1613 */ 1614 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1615 if (group) 1616 return group; 1617 1618 /* 1619 * Look for existing groups on non-isolated functions on the same 1620 * slot and aliases of those funcions, if any. No need to clear 1621 * the search bitmap, the tested devfns are still valid. 1622 */ 1623 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1624 if (group) 1625 return group; 1626 1627 /* No shared group found, allocate new */ 1628 return iommu_group_alloc(); 1629 } 1630 EXPORT_SYMBOL_GPL(pci_device_group); 1631 1632 /* Get the IOMMU group for device on fsl-mc bus */ 1633 struct iommu_group *fsl_mc_device_group(struct device *dev) 1634 { 1635 struct device *cont_dev = fsl_mc_cont_dev(dev); 1636 struct iommu_group *group; 1637 1638 group = iommu_group_get(cont_dev); 1639 if (!group) 1640 group = iommu_group_alloc(); 1641 return group; 1642 } 1643 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1644 1645 static int iommu_get_def_domain_type(struct device *dev) 1646 { 1647 const struct iommu_ops *ops = dev_iommu_ops(dev); 1648 1649 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1650 return IOMMU_DOMAIN_DMA; 1651 1652 if (ops->def_domain_type) 1653 return ops->def_domain_type(dev); 1654 1655 return 0; 1656 } 1657 1658 static int iommu_group_alloc_default_domain(struct bus_type *bus, 1659 struct iommu_group *group, 1660 unsigned int type) 1661 { 1662 struct iommu_domain *dom; 1663 1664 dom = __iommu_domain_alloc(bus, type); 1665 if (!dom && type != IOMMU_DOMAIN_DMA) { 1666 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1667 if (dom) 1668 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1669 type, group->name); 1670 } 1671 1672 if (!dom) 1673 return -ENOMEM; 1674 1675 group->default_domain = dom; 1676 if (!group->domain) 1677 group->domain = dom; 1678 return 0; 1679 } 1680 1681 static int iommu_alloc_default_domain(struct iommu_group *group, 1682 struct device *dev) 1683 { 1684 unsigned int type; 1685 1686 if (group->default_domain) 1687 return 0; 1688 1689 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1690 1691 return iommu_group_alloc_default_domain(dev->bus, group, type); 1692 } 1693 1694 /** 1695 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1696 * @dev: target device 1697 * 1698 * This function is intended to be called by IOMMU drivers and extended to 1699 * support common, bus-defined algorithms when determining or creating the 1700 * IOMMU group for a device. On success, the caller will hold a reference 1701 * to the returned IOMMU group, which will already include the provided 1702 * device. The reference should be released with iommu_group_put(). 1703 */ 1704 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1705 { 1706 const struct iommu_ops *ops = dev_iommu_ops(dev); 1707 struct iommu_group *group; 1708 int ret; 1709 1710 group = iommu_group_get(dev); 1711 if (group) 1712 return group; 1713 1714 group = ops->device_group(dev); 1715 if (WARN_ON_ONCE(group == NULL)) 1716 return ERR_PTR(-EINVAL); 1717 1718 if (IS_ERR(group)) 1719 return group; 1720 1721 ret = iommu_group_add_device(group, dev); 1722 if (ret) 1723 goto out_put_group; 1724 1725 return group; 1726 1727 out_put_group: 1728 iommu_group_put(group); 1729 1730 return ERR_PTR(ret); 1731 } 1732 1733 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1734 { 1735 return group->default_domain; 1736 } 1737 1738 static int probe_iommu_group(struct device *dev, void *data) 1739 { 1740 struct list_head *group_list = data; 1741 struct iommu_group *group; 1742 int ret; 1743 1744 /* Device is probed already if in a group */ 1745 group = iommu_group_get(dev); 1746 if (group) { 1747 iommu_group_put(group); 1748 return 0; 1749 } 1750 1751 ret = __iommu_probe_device(dev, group_list); 1752 if (ret == -ENODEV) 1753 ret = 0; 1754 1755 return ret; 1756 } 1757 1758 static int iommu_bus_notifier(struct notifier_block *nb, 1759 unsigned long action, void *data) 1760 { 1761 struct device *dev = data; 1762 1763 if (action == BUS_NOTIFY_ADD_DEVICE) { 1764 int ret; 1765 1766 ret = iommu_probe_device(dev); 1767 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1768 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1769 iommu_release_device(dev); 1770 return NOTIFY_OK; 1771 } 1772 1773 return 0; 1774 } 1775 1776 struct __group_domain_type { 1777 struct device *dev; 1778 unsigned int type; 1779 }; 1780 1781 static int probe_get_default_domain_type(struct device *dev, void *data) 1782 { 1783 struct __group_domain_type *gtype = data; 1784 unsigned int type = iommu_get_def_domain_type(dev); 1785 1786 if (type) { 1787 if (gtype->type && gtype->type != type) { 1788 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1789 iommu_domain_type_str(type), 1790 dev_name(gtype->dev), 1791 iommu_domain_type_str(gtype->type)); 1792 gtype->type = 0; 1793 } 1794 1795 if (!gtype->dev) { 1796 gtype->dev = dev; 1797 gtype->type = type; 1798 } 1799 } 1800 1801 return 0; 1802 } 1803 1804 static void probe_alloc_default_domain(struct bus_type *bus, 1805 struct iommu_group *group) 1806 { 1807 struct __group_domain_type gtype; 1808 1809 memset(>ype, 0, sizeof(gtype)); 1810 1811 /* Ask for default domain requirements of all devices in the group */ 1812 __iommu_group_for_each_dev(group, >ype, 1813 probe_get_default_domain_type); 1814 1815 if (!gtype.type) 1816 gtype.type = iommu_def_domain_type; 1817 1818 iommu_group_alloc_default_domain(bus, group, gtype.type); 1819 1820 } 1821 1822 static int __iommu_group_dma_first_attach(struct iommu_group *group) 1823 { 1824 return __iommu_group_for_each_dev(group, group->default_domain, 1825 iommu_group_do_dma_first_attach); 1826 } 1827 1828 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1829 { 1830 const struct iommu_ops *ops = dev_iommu_ops(dev); 1831 1832 if (ops->probe_finalize) 1833 ops->probe_finalize(dev); 1834 1835 return 0; 1836 } 1837 1838 static void __iommu_group_dma_finalize(struct iommu_group *group) 1839 { 1840 __iommu_group_for_each_dev(group, group->default_domain, 1841 iommu_group_do_probe_finalize); 1842 } 1843 1844 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1845 { 1846 struct iommu_group *group = data; 1847 1848 iommu_create_device_direct_mappings(group, dev); 1849 1850 return 0; 1851 } 1852 1853 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1854 { 1855 return __iommu_group_for_each_dev(group, group, 1856 iommu_do_create_direct_mappings); 1857 } 1858 1859 int bus_iommu_probe(struct bus_type *bus) 1860 { 1861 struct iommu_group *group, *next; 1862 LIST_HEAD(group_list); 1863 int ret; 1864 1865 /* 1866 * This code-path does not allocate the default domain when 1867 * creating the iommu group, so do it after the groups are 1868 * created. 1869 */ 1870 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1871 if (ret) 1872 return ret; 1873 1874 list_for_each_entry_safe(group, next, &group_list, entry) { 1875 mutex_lock(&group->mutex); 1876 1877 /* Remove item from the list */ 1878 list_del_init(&group->entry); 1879 1880 /* Try to allocate default domain */ 1881 probe_alloc_default_domain(bus, group); 1882 1883 if (!group->default_domain) { 1884 mutex_unlock(&group->mutex); 1885 continue; 1886 } 1887 1888 iommu_group_create_direct_mappings(group); 1889 1890 ret = __iommu_group_dma_first_attach(group); 1891 1892 mutex_unlock(&group->mutex); 1893 1894 if (ret) 1895 break; 1896 1897 __iommu_group_dma_finalize(group); 1898 } 1899 1900 return ret; 1901 } 1902 1903 bool iommu_present(struct bus_type *bus) 1904 { 1905 return bus->iommu_ops != NULL; 1906 } 1907 EXPORT_SYMBOL_GPL(iommu_present); 1908 1909 /** 1910 * device_iommu_capable() - check for a general IOMMU capability 1911 * @dev: device to which the capability would be relevant, if available 1912 * @cap: IOMMU capability 1913 * 1914 * Return: true if an IOMMU is present and supports the given capability 1915 * for the given device, otherwise false. 1916 */ 1917 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1918 { 1919 const struct iommu_ops *ops; 1920 1921 if (!dev->iommu || !dev->iommu->iommu_dev) 1922 return false; 1923 1924 ops = dev_iommu_ops(dev); 1925 if (!ops->capable) 1926 return false; 1927 1928 return ops->capable(dev, cap); 1929 } 1930 EXPORT_SYMBOL_GPL(device_iommu_capable); 1931 1932 /** 1933 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1934 * for a group 1935 * @group: Group to query 1936 * 1937 * IOMMU groups should not have differing values of 1938 * msi_device_has_isolated_msi() for devices in a group. However nothing 1939 * directly prevents this, so ensure mistakes don't result in isolation failures 1940 * by checking that all the devices are the same. 1941 */ 1942 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1943 { 1944 struct group_device *group_dev; 1945 bool ret = true; 1946 1947 mutex_lock(&group->mutex); 1948 list_for_each_entry(group_dev, &group->devices, list) 1949 ret &= msi_device_has_isolated_msi(group_dev->dev); 1950 mutex_unlock(&group->mutex); 1951 return ret; 1952 } 1953 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1954 1955 /** 1956 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1957 * @domain: iommu domain 1958 * @handler: fault handler 1959 * @token: user data, will be passed back to the fault handler 1960 * 1961 * This function should be used by IOMMU users which want to be notified 1962 * whenever an IOMMU fault happens. 1963 * 1964 * The fault handler itself should return 0 on success, and an appropriate 1965 * error code otherwise. 1966 */ 1967 void iommu_set_fault_handler(struct iommu_domain *domain, 1968 iommu_fault_handler_t handler, 1969 void *token) 1970 { 1971 BUG_ON(!domain); 1972 1973 domain->handler = handler; 1974 domain->handler_token = token; 1975 } 1976 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1977 1978 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 1979 unsigned type) 1980 { 1981 struct iommu_domain *domain; 1982 1983 if (bus == NULL || bus->iommu_ops == NULL) 1984 return NULL; 1985 1986 domain = bus->iommu_ops->domain_alloc(type); 1987 if (!domain) 1988 return NULL; 1989 1990 domain->type = type; 1991 /* Assume all sizes by default; the driver may override this later */ 1992 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1993 if (!domain->ops) 1994 domain->ops = bus->iommu_ops->default_domain_ops; 1995 1996 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1997 iommu_domain_free(domain); 1998 domain = NULL; 1999 } 2000 return domain; 2001 } 2002 2003 struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) 2004 { 2005 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 2006 } 2007 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 2008 2009 void iommu_domain_free(struct iommu_domain *domain) 2010 { 2011 if (domain->type == IOMMU_DOMAIN_SVA) 2012 mmdrop(domain->mm); 2013 iommu_put_dma_cookie(domain); 2014 domain->ops->free(domain); 2015 } 2016 EXPORT_SYMBOL_GPL(iommu_domain_free); 2017 2018 /* 2019 * Put the group's domain back to the appropriate core-owned domain - either the 2020 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2021 */ 2022 static void __iommu_group_set_core_domain(struct iommu_group *group) 2023 { 2024 struct iommu_domain *new_domain; 2025 int ret; 2026 2027 if (group->owner) 2028 new_domain = group->blocking_domain; 2029 else 2030 new_domain = group->default_domain; 2031 2032 ret = __iommu_group_set_domain(group, new_domain); 2033 WARN(ret, "iommu driver failed to attach the default/blocking domain"); 2034 } 2035 2036 static int __iommu_attach_device(struct iommu_domain *domain, 2037 struct device *dev) 2038 { 2039 int ret; 2040 2041 if (unlikely(domain->ops->attach_dev == NULL)) 2042 return -ENODEV; 2043 2044 ret = domain->ops->attach_dev(domain, dev); 2045 if (ret) 2046 return ret; 2047 dev->iommu->attach_deferred = 0; 2048 trace_attach_device_to_domain(dev); 2049 return 0; 2050 } 2051 2052 /** 2053 * iommu_attach_device - Attach an IOMMU domain to a device 2054 * @domain: IOMMU domain to attach 2055 * @dev: Device that will be attached 2056 * 2057 * Returns 0 on success and error code on failure 2058 * 2059 * Note that EINVAL can be treated as a soft failure, indicating 2060 * that certain configuration of the domain is incompatible with 2061 * the device. In this case attaching a different domain to the 2062 * device may succeed. 2063 */ 2064 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2065 { 2066 struct iommu_group *group; 2067 int ret; 2068 2069 group = iommu_group_get(dev); 2070 if (!group) 2071 return -ENODEV; 2072 2073 /* 2074 * Lock the group to make sure the device-count doesn't 2075 * change while we are attaching 2076 */ 2077 mutex_lock(&group->mutex); 2078 ret = -EINVAL; 2079 if (iommu_group_device_count(group) != 1) 2080 goto out_unlock; 2081 2082 ret = __iommu_attach_group(domain, group); 2083 2084 out_unlock: 2085 mutex_unlock(&group->mutex); 2086 iommu_group_put(group); 2087 2088 return ret; 2089 } 2090 EXPORT_SYMBOL_GPL(iommu_attach_device); 2091 2092 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2093 { 2094 if (dev->iommu && dev->iommu->attach_deferred) 2095 return __iommu_attach_device(domain, dev); 2096 2097 return 0; 2098 } 2099 2100 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2101 { 2102 struct iommu_group *group; 2103 2104 group = iommu_group_get(dev); 2105 if (!group) 2106 return; 2107 2108 mutex_lock(&group->mutex); 2109 if (WARN_ON(domain != group->domain) || 2110 WARN_ON(iommu_group_device_count(group) != 1)) 2111 goto out_unlock; 2112 __iommu_group_set_core_domain(group); 2113 2114 out_unlock: 2115 mutex_unlock(&group->mutex); 2116 iommu_group_put(group); 2117 } 2118 EXPORT_SYMBOL_GPL(iommu_detach_device); 2119 2120 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2121 { 2122 struct iommu_domain *domain; 2123 struct iommu_group *group; 2124 2125 group = iommu_group_get(dev); 2126 if (!group) 2127 return NULL; 2128 2129 domain = group->domain; 2130 2131 iommu_group_put(group); 2132 2133 return domain; 2134 } 2135 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2136 2137 /* 2138 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2139 * guarantees that the group and its default domain are valid and correct. 2140 */ 2141 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2142 { 2143 return dev->iommu_group->default_domain; 2144 } 2145 2146 /* 2147 * IOMMU groups are really the natural working unit of the IOMMU, but 2148 * the IOMMU API works on domains and devices. Bridge that gap by 2149 * iterating over the devices in a group. Ideally we'd have a single 2150 * device which represents the requestor ID of the group, but we also 2151 * allow IOMMU drivers to create policy defined minimum sets, where 2152 * the physical hardware may be able to distiguish members, but we 2153 * wish to group them at a higher level (ex. untrusted multi-function 2154 * PCI devices). Thus we attach each device. 2155 */ 2156 static int iommu_group_do_attach_device(struct device *dev, void *data) 2157 { 2158 struct iommu_domain *domain = data; 2159 2160 return __iommu_attach_device(domain, dev); 2161 } 2162 2163 static int __iommu_attach_group(struct iommu_domain *domain, 2164 struct iommu_group *group) 2165 { 2166 int ret; 2167 2168 if (group->domain && group->domain != group->default_domain && 2169 group->domain != group->blocking_domain) 2170 return -EBUSY; 2171 2172 ret = __iommu_group_for_each_dev(group, domain, 2173 iommu_group_do_attach_device); 2174 if (ret == 0) { 2175 group->domain = domain; 2176 } else { 2177 /* 2178 * To recover from the case when certain device within the 2179 * group fails to attach to the new domain, we need force 2180 * attaching all devices back to the old domain. The old 2181 * domain is compatible for all devices in the group, 2182 * hence the iommu driver should always return success. 2183 */ 2184 struct iommu_domain *old_domain = group->domain; 2185 2186 group->domain = NULL; 2187 WARN(__iommu_group_set_domain(group, old_domain), 2188 "iommu driver failed to attach a compatible domain"); 2189 } 2190 2191 return ret; 2192 } 2193 2194 /** 2195 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2196 * @domain: IOMMU domain to attach 2197 * @group: IOMMU group that will be attached 2198 * 2199 * Returns 0 on success and error code on failure 2200 * 2201 * Note that EINVAL can be treated as a soft failure, indicating 2202 * that certain configuration of the domain is incompatible with 2203 * the group. In this case attaching a different domain to the 2204 * group may succeed. 2205 */ 2206 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2207 { 2208 int ret; 2209 2210 mutex_lock(&group->mutex); 2211 ret = __iommu_attach_group(domain, group); 2212 mutex_unlock(&group->mutex); 2213 2214 return ret; 2215 } 2216 EXPORT_SYMBOL_GPL(iommu_attach_group); 2217 2218 static int iommu_group_do_set_platform_dma(struct device *dev, void *data) 2219 { 2220 const struct iommu_ops *ops = dev_iommu_ops(dev); 2221 2222 if (!WARN_ON(!ops->set_platform_dma_ops)) 2223 ops->set_platform_dma_ops(dev); 2224 2225 return 0; 2226 } 2227 2228 static int __iommu_group_set_domain(struct iommu_group *group, 2229 struct iommu_domain *new_domain) 2230 { 2231 int ret; 2232 2233 if (group->domain == new_domain) 2234 return 0; 2235 2236 /* 2237 * New drivers should support default domains, so set_platform_dma() 2238 * op will never be called. Otherwise the NULL domain represents some 2239 * platform specific behavior. 2240 */ 2241 if (!new_domain) { 2242 __iommu_group_for_each_dev(group, NULL, 2243 iommu_group_do_set_platform_dma); 2244 group->domain = NULL; 2245 return 0; 2246 } 2247 2248 /* 2249 * Changing the domain is done by calling attach_dev() on the new 2250 * domain. This switch does not have to be atomic and DMA can be 2251 * discarded during the transition. DMA must only be able to access 2252 * either new_domain or group->domain, never something else. 2253 * 2254 * Note that this is called in error unwind paths, attaching to a 2255 * domain that has already been attached cannot fail. 2256 */ 2257 ret = __iommu_group_for_each_dev(group, new_domain, 2258 iommu_group_do_attach_device); 2259 if (ret) 2260 return ret; 2261 group->domain = new_domain; 2262 return 0; 2263 } 2264 2265 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2266 { 2267 mutex_lock(&group->mutex); 2268 __iommu_group_set_core_domain(group); 2269 mutex_unlock(&group->mutex); 2270 } 2271 EXPORT_SYMBOL_GPL(iommu_detach_group); 2272 2273 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2274 { 2275 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2276 return iova; 2277 2278 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2279 return 0; 2280 2281 return domain->ops->iova_to_phys(domain, iova); 2282 } 2283 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2284 2285 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2286 phys_addr_t paddr, size_t size, size_t *count) 2287 { 2288 unsigned int pgsize_idx, pgsize_idx_next; 2289 unsigned long pgsizes; 2290 size_t offset, pgsize, pgsize_next; 2291 unsigned long addr_merge = paddr | iova; 2292 2293 /* Page sizes supported by the hardware and small enough for @size */ 2294 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2295 2296 /* Constrain the page sizes further based on the maximum alignment */ 2297 if (likely(addr_merge)) 2298 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2299 2300 /* Make sure we have at least one suitable page size */ 2301 BUG_ON(!pgsizes); 2302 2303 /* Pick the biggest page size remaining */ 2304 pgsize_idx = __fls(pgsizes); 2305 pgsize = BIT(pgsize_idx); 2306 if (!count) 2307 return pgsize; 2308 2309 /* Find the next biggest support page size, if it exists */ 2310 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2311 if (!pgsizes) 2312 goto out_set_count; 2313 2314 pgsize_idx_next = __ffs(pgsizes); 2315 pgsize_next = BIT(pgsize_idx_next); 2316 2317 /* 2318 * There's no point trying a bigger page size unless the virtual 2319 * and physical addresses are similarly offset within the larger page. 2320 */ 2321 if ((iova ^ paddr) & (pgsize_next - 1)) 2322 goto out_set_count; 2323 2324 /* Calculate the offset to the next page size alignment boundary */ 2325 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2326 2327 /* 2328 * If size is big enough to accommodate the larger page, reduce 2329 * the number of smaller pages. 2330 */ 2331 if (offset + pgsize_next <= size) 2332 size = offset; 2333 2334 out_set_count: 2335 *count = size >> pgsize_idx; 2336 return pgsize; 2337 } 2338 2339 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2340 phys_addr_t paddr, size_t size, int prot, 2341 gfp_t gfp, size_t *mapped) 2342 { 2343 const struct iommu_domain_ops *ops = domain->ops; 2344 size_t pgsize, count; 2345 int ret; 2346 2347 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2348 2349 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2350 iova, &paddr, pgsize, count); 2351 2352 if (ops->map_pages) { 2353 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2354 gfp, mapped); 2355 } else { 2356 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2357 *mapped = ret ? 0 : pgsize; 2358 } 2359 2360 return ret; 2361 } 2362 2363 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2364 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2365 { 2366 const struct iommu_domain_ops *ops = domain->ops; 2367 unsigned long orig_iova = iova; 2368 unsigned int min_pagesz; 2369 size_t orig_size = size; 2370 phys_addr_t orig_paddr = paddr; 2371 int ret = 0; 2372 2373 if (unlikely(!(ops->map || ops->map_pages) || 2374 domain->pgsize_bitmap == 0UL)) 2375 return -ENODEV; 2376 2377 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2378 return -EINVAL; 2379 2380 /* find out the minimum page size supported */ 2381 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2382 2383 /* 2384 * both the virtual address and the physical one, as well as 2385 * the size of the mapping, must be aligned (at least) to the 2386 * size of the smallest page supported by the hardware 2387 */ 2388 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2389 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2390 iova, &paddr, size, min_pagesz); 2391 return -EINVAL; 2392 } 2393 2394 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2395 2396 while (size) { 2397 size_t mapped = 0; 2398 2399 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2400 &mapped); 2401 /* 2402 * Some pages may have been mapped, even if an error occurred, 2403 * so we should account for those so they can be unmapped. 2404 */ 2405 size -= mapped; 2406 2407 if (ret) 2408 break; 2409 2410 iova += mapped; 2411 paddr += mapped; 2412 } 2413 2414 /* unroll mapping in case something went wrong */ 2415 if (ret) 2416 iommu_unmap(domain, orig_iova, orig_size - size); 2417 else 2418 trace_map(orig_iova, orig_paddr, orig_size); 2419 2420 return ret; 2421 } 2422 2423 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2424 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2425 { 2426 const struct iommu_domain_ops *ops = domain->ops; 2427 int ret; 2428 2429 might_sleep_if(gfpflags_allow_blocking(gfp)); 2430 2431 /* Discourage passing strange GFP flags */ 2432 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2433 __GFP_HIGHMEM))) 2434 return -EINVAL; 2435 2436 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2437 if (ret == 0 && ops->iotlb_sync_map) 2438 ops->iotlb_sync_map(domain, iova, size); 2439 2440 return ret; 2441 } 2442 EXPORT_SYMBOL_GPL(iommu_map); 2443 2444 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2445 unsigned long iova, size_t size, 2446 struct iommu_iotlb_gather *iotlb_gather) 2447 { 2448 const struct iommu_domain_ops *ops = domain->ops; 2449 size_t pgsize, count; 2450 2451 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2452 return ops->unmap_pages ? 2453 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2454 ops->unmap(domain, iova, pgsize, iotlb_gather); 2455 } 2456 2457 static size_t __iommu_unmap(struct iommu_domain *domain, 2458 unsigned long iova, size_t size, 2459 struct iommu_iotlb_gather *iotlb_gather) 2460 { 2461 const struct iommu_domain_ops *ops = domain->ops; 2462 size_t unmapped_page, unmapped = 0; 2463 unsigned long orig_iova = iova; 2464 unsigned int min_pagesz; 2465 2466 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2467 domain->pgsize_bitmap == 0UL)) 2468 return 0; 2469 2470 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2471 return 0; 2472 2473 /* find out the minimum page size supported */ 2474 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2475 2476 /* 2477 * The virtual address, as well as the size of the mapping, must be 2478 * aligned (at least) to the size of the smallest page supported 2479 * by the hardware 2480 */ 2481 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2482 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2483 iova, size, min_pagesz); 2484 return 0; 2485 } 2486 2487 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2488 2489 /* 2490 * Keep iterating until we either unmap 'size' bytes (or more) 2491 * or we hit an area that isn't mapped. 2492 */ 2493 while (unmapped < size) { 2494 unmapped_page = __iommu_unmap_pages(domain, iova, 2495 size - unmapped, 2496 iotlb_gather); 2497 if (!unmapped_page) 2498 break; 2499 2500 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2501 iova, unmapped_page); 2502 2503 iova += unmapped_page; 2504 unmapped += unmapped_page; 2505 } 2506 2507 trace_unmap(orig_iova, size, unmapped); 2508 return unmapped; 2509 } 2510 2511 size_t iommu_unmap(struct iommu_domain *domain, 2512 unsigned long iova, size_t size) 2513 { 2514 struct iommu_iotlb_gather iotlb_gather; 2515 size_t ret; 2516 2517 iommu_iotlb_gather_init(&iotlb_gather); 2518 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2519 iommu_iotlb_sync(domain, &iotlb_gather); 2520 2521 return ret; 2522 } 2523 EXPORT_SYMBOL_GPL(iommu_unmap); 2524 2525 size_t iommu_unmap_fast(struct iommu_domain *domain, 2526 unsigned long iova, size_t size, 2527 struct iommu_iotlb_gather *iotlb_gather) 2528 { 2529 return __iommu_unmap(domain, iova, size, iotlb_gather); 2530 } 2531 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2532 2533 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2534 struct scatterlist *sg, unsigned int nents, int prot, 2535 gfp_t gfp) 2536 { 2537 const struct iommu_domain_ops *ops = domain->ops; 2538 size_t len = 0, mapped = 0; 2539 phys_addr_t start; 2540 unsigned int i = 0; 2541 int ret; 2542 2543 might_sleep_if(gfpflags_allow_blocking(gfp)); 2544 2545 /* Discourage passing strange GFP flags */ 2546 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2547 __GFP_HIGHMEM))) 2548 return -EINVAL; 2549 2550 while (i <= nents) { 2551 phys_addr_t s_phys = sg_phys(sg); 2552 2553 if (len && s_phys != start + len) { 2554 ret = __iommu_map(domain, iova + mapped, start, 2555 len, prot, gfp); 2556 2557 if (ret) 2558 goto out_err; 2559 2560 mapped += len; 2561 len = 0; 2562 } 2563 2564 if (sg_is_dma_bus_address(sg)) 2565 goto next; 2566 2567 if (len) { 2568 len += sg->length; 2569 } else { 2570 len = sg->length; 2571 start = s_phys; 2572 } 2573 2574 next: 2575 if (++i < nents) 2576 sg = sg_next(sg); 2577 } 2578 2579 if (ops->iotlb_sync_map) 2580 ops->iotlb_sync_map(domain, iova, mapped); 2581 return mapped; 2582 2583 out_err: 2584 /* undo mappings already done */ 2585 iommu_unmap(domain, iova, mapped); 2586 2587 return ret; 2588 } 2589 EXPORT_SYMBOL_GPL(iommu_map_sg); 2590 2591 /** 2592 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2593 * @domain: the iommu domain where the fault has happened 2594 * @dev: the device where the fault has happened 2595 * @iova: the faulting address 2596 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2597 * 2598 * This function should be called by the low-level IOMMU implementations 2599 * whenever IOMMU faults happen, to allow high-level users, that are 2600 * interested in such events, to know about them. 2601 * 2602 * This event may be useful for several possible use cases: 2603 * - mere logging of the event 2604 * - dynamic TLB/PTE loading 2605 * - if restarting of the faulting device is required 2606 * 2607 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2608 * PTE/TLB loading will one day be supported, implementations will be able 2609 * to tell whether it succeeded or not according to this return value). 2610 * 2611 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2612 * (though fault handlers can also return -ENOSYS, in case they want to 2613 * elicit the default behavior of the IOMMU drivers). 2614 */ 2615 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2616 unsigned long iova, int flags) 2617 { 2618 int ret = -ENOSYS; 2619 2620 /* 2621 * if upper layers showed interest and installed a fault handler, 2622 * invoke it. 2623 */ 2624 if (domain->handler) 2625 ret = domain->handler(domain, dev, iova, flags, 2626 domain->handler_token); 2627 2628 trace_io_page_fault(dev, iova, flags); 2629 return ret; 2630 } 2631 EXPORT_SYMBOL_GPL(report_iommu_fault); 2632 2633 static int __init iommu_init(void) 2634 { 2635 iommu_group_kset = kset_create_and_add("iommu_groups", 2636 NULL, kernel_kobj); 2637 BUG_ON(!iommu_group_kset); 2638 2639 iommu_debugfs_setup(); 2640 2641 return 0; 2642 } 2643 core_initcall(iommu_init); 2644 2645 int iommu_enable_nesting(struct iommu_domain *domain) 2646 { 2647 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2648 return -EINVAL; 2649 if (!domain->ops->enable_nesting) 2650 return -EINVAL; 2651 return domain->ops->enable_nesting(domain); 2652 } 2653 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2654 2655 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2656 unsigned long quirk) 2657 { 2658 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2659 return -EINVAL; 2660 if (!domain->ops->set_pgtable_quirks) 2661 return -EINVAL; 2662 return domain->ops->set_pgtable_quirks(domain, quirk); 2663 } 2664 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2665 2666 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2667 { 2668 const struct iommu_ops *ops = dev_iommu_ops(dev); 2669 2670 if (ops->get_resv_regions) 2671 ops->get_resv_regions(dev, list); 2672 } 2673 2674 /** 2675 * iommu_put_resv_regions - release resered regions 2676 * @dev: device for which to free reserved regions 2677 * @list: reserved region list for device 2678 * 2679 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2680 */ 2681 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2682 { 2683 struct iommu_resv_region *entry, *next; 2684 2685 list_for_each_entry_safe(entry, next, list, list) { 2686 if (entry->free) 2687 entry->free(dev, entry); 2688 else 2689 kfree(entry); 2690 } 2691 } 2692 EXPORT_SYMBOL(iommu_put_resv_regions); 2693 2694 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2695 size_t length, int prot, 2696 enum iommu_resv_type type, 2697 gfp_t gfp) 2698 { 2699 struct iommu_resv_region *region; 2700 2701 region = kzalloc(sizeof(*region), gfp); 2702 if (!region) 2703 return NULL; 2704 2705 INIT_LIST_HEAD(®ion->list); 2706 region->start = start; 2707 region->length = length; 2708 region->prot = prot; 2709 region->type = type; 2710 return region; 2711 } 2712 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2713 2714 void iommu_set_default_passthrough(bool cmd_line) 2715 { 2716 if (cmd_line) 2717 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2718 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2719 } 2720 2721 void iommu_set_default_translated(bool cmd_line) 2722 { 2723 if (cmd_line) 2724 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2725 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2726 } 2727 2728 bool iommu_default_passthrough(void) 2729 { 2730 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2731 } 2732 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2733 2734 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2735 { 2736 const struct iommu_ops *ops = NULL; 2737 struct iommu_device *iommu; 2738 2739 spin_lock(&iommu_device_lock); 2740 list_for_each_entry(iommu, &iommu_device_list, list) 2741 if (iommu->fwnode == fwnode) { 2742 ops = iommu->ops; 2743 break; 2744 } 2745 spin_unlock(&iommu_device_lock); 2746 return ops; 2747 } 2748 2749 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2750 const struct iommu_ops *ops) 2751 { 2752 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2753 2754 if (fwspec) 2755 return ops == fwspec->ops ? 0 : -EINVAL; 2756 2757 if (!dev_iommu_get(dev)) 2758 return -ENOMEM; 2759 2760 /* Preallocate for the overwhelmingly common case of 1 ID */ 2761 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2762 if (!fwspec) 2763 return -ENOMEM; 2764 2765 of_node_get(to_of_node(iommu_fwnode)); 2766 fwspec->iommu_fwnode = iommu_fwnode; 2767 fwspec->ops = ops; 2768 dev_iommu_fwspec_set(dev, fwspec); 2769 return 0; 2770 } 2771 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2772 2773 void iommu_fwspec_free(struct device *dev) 2774 { 2775 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2776 2777 if (fwspec) { 2778 fwnode_handle_put(fwspec->iommu_fwnode); 2779 kfree(fwspec); 2780 dev_iommu_fwspec_set(dev, NULL); 2781 } 2782 } 2783 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2784 2785 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2786 { 2787 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2788 int i, new_num; 2789 2790 if (!fwspec) 2791 return -EINVAL; 2792 2793 new_num = fwspec->num_ids + num_ids; 2794 if (new_num > 1) { 2795 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2796 GFP_KERNEL); 2797 if (!fwspec) 2798 return -ENOMEM; 2799 2800 dev_iommu_fwspec_set(dev, fwspec); 2801 } 2802 2803 for (i = 0; i < num_ids; i++) 2804 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2805 2806 fwspec->num_ids = new_num; 2807 return 0; 2808 } 2809 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2810 2811 /* 2812 * Per device IOMMU features. 2813 */ 2814 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2815 { 2816 if (dev->iommu && dev->iommu->iommu_dev) { 2817 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2818 2819 if (ops->dev_enable_feat) 2820 return ops->dev_enable_feat(dev, feat); 2821 } 2822 2823 return -ENODEV; 2824 } 2825 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2826 2827 /* 2828 * The device drivers should do the necessary cleanups before calling this. 2829 */ 2830 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2831 { 2832 if (dev->iommu && dev->iommu->iommu_dev) { 2833 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2834 2835 if (ops->dev_disable_feat) 2836 return ops->dev_disable_feat(dev, feat); 2837 } 2838 2839 return -EBUSY; 2840 } 2841 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2842 2843 /* 2844 * Changes the default domain of an iommu group that has *only* one device 2845 * 2846 * @group: The group for which the default domain should be changed 2847 * @prev_dev: The device in the group (this is used to make sure that the device 2848 * hasn't changed after the caller has called this function) 2849 * @type: The type of the new default domain that gets associated with the group 2850 * 2851 * Returns 0 on success and error code on failure 2852 * 2853 * Note: 2854 * 1. Presently, this function is called only when user requests to change the 2855 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2856 * Please take a closer look if intended to use for other purposes. 2857 */ 2858 static int iommu_change_dev_def_domain(struct iommu_group *group, 2859 struct device *prev_dev, int type) 2860 { 2861 struct iommu_domain *prev_dom; 2862 struct group_device *grp_dev; 2863 int ret, dev_def_dom; 2864 struct device *dev; 2865 2866 mutex_lock(&group->mutex); 2867 2868 if (group->default_domain != group->domain) { 2869 dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); 2870 ret = -EBUSY; 2871 goto out; 2872 } 2873 2874 /* 2875 * iommu group wasn't locked while acquiring device lock in 2876 * iommu_group_store_type(). So, make sure that the device count hasn't 2877 * changed while acquiring device lock. 2878 * 2879 * Changing default domain of an iommu group with two or more devices 2880 * isn't supported because there could be a potential deadlock. Consider 2881 * the following scenario. T1 is trying to acquire device locks of all 2882 * the devices in the group and before it could acquire all of them, 2883 * there could be another thread T2 (from different sub-system and use 2884 * case) that has already acquired some of the device locks and might be 2885 * waiting for T1 to release other device locks. 2886 */ 2887 if (iommu_group_device_count(group) != 1) { 2888 dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); 2889 ret = -EINVAL; 2890 goto out; 2891 } 2892 2893 /* Since group has only one device */ 2894 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2895 dev = grp_dev->dev; 2896 2897 if (prev_dev != dev) { 2898 dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); 2899 ret = -EBUSY; 2900 goto out; 2901 } 2902 2903 prev_dom = group->default_domain; 2904 if (!prev_dom) { 2905 ret = -EINVAL; 2906 goto out; 2907 } 2908 2909 dev_def_dom = iommu_get_def_domain_type(dev); 2910 if (!type) { 2911 /* 2912 * If the user hasn't requested any specific type of domain and 2913 * if the device supports both the domains, then default to the 2914 * domain the device was booted with 2915 */ 2916 type = dev_def_dom ? : iommu_def_domain_type; 2917 } else if (dev_def_dom && type != dev_def_dom) { 2918 dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", 2919 iommu_domain_type_str(type)); 2920 ret = -EINVAL; 2921 goto out; 2922 } 2923 2924 /* 2925 * Switch to a new domain only if the requested domain type is different 2926 * from the existing default domain type 2927 */ 2928 if (prev_dom->type == type) { 2929 ret = 0; 2930 goto out; 2931 } 2932 2933 /* We can bring up a flush queue without tearing down the domain */ 2934 if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { 2935 ret = iommu_dma_init_fq(prev_dom); 2936 if (!ret) 2937 prev_dom->type = IOMMU_DOMAIN_DMA_FQ; 2938 goto out; 2939 } 2940 2941 /* Sets group->default_domain to the newly allocated domain */ 2942 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2943 if (ret) 2944 goto out; 2945 2946 ret = iommu_create_device_direct_mappings(group, dev); 2947 if (ret) 2948 goto free_new_domain; 2949 2950 ret = __iommu_attach_device(group->default_domain, dev); 2951 if (ret) 2952 goto free_new_domain; 2953 2954 group->domain = group->default_domain; 2955 2956 /* 2957 * Release the mutex here because ops->probe_finalize() call-back of 2958 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 2959 * in-turn might call back into IOMMU core code, where it tries to take 2960 * group->mutex, resulting in a deadlock. 2961 */ 2962 mutex_unlock(&group->mutex); 2963 2964 /* Make sure dma_ops is appropriatley set */ 2965 iommu_group_do_probe_finalize(dev, group->default_domain); 2966 iommu_domain_free(prev_dom); 2967 return 0; 2968 2969 free_new_domain: 2970 iommu_domain_free(group->default_domain); 2971 group->default_domain = prev_dom; 2972 group->domain = prev_dom; 2973 2974 out: 2975 mutex_unlock(&group->mutex); 2976 2977 return ret; 2978 } 2979 2980 /* 2981 * Changing the default domain through sysfs requires the users to unbind the 2982 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2983 * transition. Return failure if this isn't met. 2984 * 2985 * We need to consider the race between this and the device release path. 2986 * device_lock(dev) is used here to guarantee that the device release path 2987 * will not be entered at the same time. 2988 */ 2989 static ssize_t iommu_group_store_type(struct iommu_group *group, 2990 const char *buf, size_t count) 2991 { 2992 struct group_device *grp_dev; 2993 struct device *dev; 2994 int ret, req_type; 2995 2996 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2997 return -EACCES; 2998 2999 if (WARN_ON(!group) || !group->default_domain) 3000 return -EINVAL; 3001 3002 if (sysfs_streq(buf, "identity")) 3003 req_type = IOMMU_DOMAIN_IDENTITY; 3004 else if (sysfs_streq(buf, "DMA")) 3005 req_type = IOMMU_DOMAIN_DMA; 3006 else if (sysfs_streq(buf, "DMA-FQ")) 3007 req_type = IOMMU_DOMAIN_DMA_FQ; 3008 else if (sysfs_streq(buf, "auto")) 3009 req_type = 0; 3010 else 3011 return -EINVAL; 3012 3013 /* 3014 * Lock/Unlock the group mutex here before device lock to 3015 * 1. Make sure that the iommu group has only one device (this is a 3016 * prerequisite for step 2) 3017 * 2. Get struct *dev which is needed to lock device 3018 */ 3019 mutex_lock(&group->mutex); 3020 if (iommu_group_device_count(group) != 1) { 3021 mutex_unlock(&group->mutex); 3022 pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); 3023 return -EINVAL; 3024 } 3025 3026 /* Since group has only one device */ 3027 grp_dev = list_first_entry(&group->devices, struct group_device, list); 3028 dev = grp_dev->dev; 3029 get_device(dev); 3030 3031 /* 3032 * Don't hold the group mutex because taking group mutex first and then 3033 * the device lock could potentially cause a deadlock as below. Assume 3034 * two threads T1 and T2. T1 is trying to change default domain of an 3035 * iommu group and T2 is trying to hot unplug a device or release [1] VF 3036 * of a PCIe device which is in the same iommu group. T1 takes group 3037 * mutex and before it could take device lock assume T2 has taken device 3038 * lock and is yet to take group mutex. Now, both the threads will be 3039 * waiting for the other thread to release lock. Below, lock order was 3040 * suggested. 3041 * device_lock(dev); 3042 * mutex_lock(&group->mutex); 3043 * iommu_change_dev_def_domain(); 3044 * mutex_unlock(&group->mutex); 3045 * device_unlock(dev); 3046 * 3047 * [1] Typical device release path 3048 * device_lock() from device/driver core code 3049 * -> bus_notifier() 3050 * -> iommu_bus_notifier() 3051 * -> iommu_release_device() 3052 * -> ops->release_device() vendor driver calls back iommu core code 3053 * -> mutex_lock() from iommu core code 3054 */ 3055 mutex_unlock(&group->mutex); 3056 3057 /* Check if the device in the group still has a driver bound to it */ 3058 device_lock(dev); 3059 if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && 3060 group->default_domain->type == IOMMU_DOMAIN_DMA)) { 3061 pr_err_ratelimited("Device is still bound to driver\n"); 3062 ret = -EBUSY; 3063 goto out; 3064 } 3065 3066 ret = iommu_change_dev_def_domain(group, dev, req_type); 3067 ret = ret ?: count; 3068 3069 out: 3070 device_unlock(dev); 3071 put_device(dev); 3072 3073 return ret; 3074 } 3075 3076 static bool iommu_is_default_domain(struct iommu_group *group) 3077 { 3078 if (group->domain == group->default_domain) 3079 return true; 3080 3081 /* 3082 * If the default domain was set to identity and it is still an identity 3083 * domain then we consider this a pass. This happens because of 3084 * amd_iommu_init_device() replacing the default idenytity domain with an 3085 * identity domain that has a different configuration for AMDGPU. 3086 */ 3087 if (group->default_domain && 3088 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3089 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3090 return true; 3091 return false; 3092 } 3093 3094 /** 3095 * iommu_device_use_default_domain() - Device driver wants to handle device 3096 * DMA through the kernel DMA API. 3097 * @dev: The device. 3098 * 3099 * The device driver about to bind @dev wants to do DMA through the kernel 3100 * DMA API. Return 0 if it is allowed, otherwise an error. 3101 */ 3102 int iommu_device_use_default_domain(struct device *dev) 3103 { 3104 struct iommu_group *group = iommu_group_get(dev); 3105 int ret = 0; 3106 3107 if (!group) 3108 return 0; 3109 3110 mutex_lock(&group->mutex); 3111 if (group->owner_cnt) { 3112 if (group->owner || !iommu_is_default_domain(group) || 3113 !xa_empty(&group->pasid_array)) { 3114 ret = -EBUSY; 3115 goto unlock_out; 3116 } 3117 } 3118 3119 group->owner_cnt++; 3120 3121 unlock_out: 3122 mutex_unlock(&group->mutex); 3123 iommu_group_put(group); 3124 3125 return ret; 3126 } 3127 3128 /** 3129 * iommu_device_unuse_default_domain() - Device driver stops handling device 3130 * DMA through the kernel DMA API. 3131 * @dev: The device. 3132 * 3133 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3134 * It must be called after iommu_device_use_default_domain(). 3135 */ 3136 void iommu_device_unuse_default_domain(struct device *dev) 3137 { 3138 struct iommu_group *group = iommu_group_get(dev); 3139 3140 if (!group) 3141 return; 3142 3143 mutex_lock(&group->mutex); 3144 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3145 group->owner_cnt--; 3146 3147 mutex_unlock(&group->mutex); 3148 iommu_group_put(group); 3149 } 3150 3151 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3152 { 3153 struct group_device *dev = 3154 list_first_entry(&group->devices, struct group_device, list); 3155 3156 if (group->blocking_domain) 3157 return 0; 3158 3159 group->blocking_domain = 3160 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3161 if (!group->blocking_domain) { 3162 /* 3163 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3164 * create an empty domain instead. 3165 */ 3166 group->blocking_domain = __iommu_domain_alloc( 3167 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3168 if (!group->blocking_domain) 3169 return -EINVAL; 3170 } 3171 return 0; 3172 } 3173 3174 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3175 { 3176 int ret; 3177 3178 if ((group->domain && group->domain != group->default_domain) || 3179 !xa_empty(&group->pasid_array)) 3180 return -EBUSY; 3181 3182 ret = __iommu_group_alloc_blocking_domain(group); 3183 if (ret) 3184 return ret; 3185 ret = __iommu_group_set_domain(group, group->blocking_domain); 3186 if (ret) 3187 return ret; 3188 3189 group->owner = owner; 3190 group->owner_cnt++; 3191 return 0; 3192 } 3193 3194 /** 3195 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3196 * @group: The group. 3197 * @owner: Caller specified pointer. Used for exclusive ownership. 3198 * 3199 * This is to support backward compatibility for vfio which manages the dma 3200 * ownership in iommu_group level. New invocations on this interface should be 3201 * prohibited. Only a single owner may exist for a group. 3202 */ 3203 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3204 { 3205 int ret = 0; 3206 3207 if (WARN_ON(!owner)) 3208 return -EINVAL; 3209 3210 mutex_lock(&group->mutex); 3211 if (group->owner_cnt) { 3212 ret = -EPERM; 3213 goto unlock_out; 3214 } 3215 3216 ret = __iommu_take_dma_ownership(group, owner); 3217 unlock_out: 3218 mutex_unlock(&group->mutex); 3219 3220 return ret; 3221 } 3222 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3223 3224 /** 3225 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3226 * @dev: The device. 3227 * @owner: Caller specified pointer. Used for exclusive ownership. 3228 * 3229 * Claim the DMA ownership of a device. Multiple devices in the same group may 3230 * concurrently claim ownership if they present the same owner value. Returns 0 3231 * on success and error code on failure 3232 */ 3233 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3234 { 3235 struct iommu_group *group; 3236 int ret = 0; 3237 3238 if (WARN_ON(!owner)) 3239 return -EINVAL; 3240 3241 group = iommu_group_get(dev); 3242 if (!group) 3243 return -ENODEV; 3244 3245 mutex_lock(&group->mutex); 3246 if (group->owner_cnt) { 3247 if (group->owner != owner) { 3248 ret = -EPERM; 3249 goto unlock_out; 3250 } 3251 group->owner_cnt++; 3252 goto unlock_out; 3253 } 3254 3255 ret = __iommu_take_dma_ownership(group, owner); 3256 unlock_out: 3257 mutex_unlock(&group->mutex); 3258 iommu_group_put(group); 3259 3260 return ret; 3261 } 3262 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3263 3264 static void __iommu_release_dma_ownership(struct iommu_group *group) 3265 { 3266 int ret; 3267 3268 if (WARN_ON(!group->owner_cnt || !group->owner || 3269 !xa_empty(&group->pasid_array))) 3270 return; 3271 3272 group->owner_cnt = 0; 3273 group->owner = NULL; 3274 ret = __iommu_group_set_domain(group, group->default_domain); 3275 WARN(ret, "iommu driver failed to attach the default domain"); 3276 } 3277 3278 /** 3279 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3280 * @dev: The device 3281 * 3282 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3283 */ 3284 void iommu_group_release_dma_owner(struct iommu_group *group) 3285 { 3286 mutex_lock(&group->mutex); 3287 __iommu_release_dma_ownership(group); 3288 mutex_unlock(&group->mutex); 3289 } 3290 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3291 3292 /** 3293 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3294 * @group: The device. 3295 * 3296 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3297 */ 3298 void iommu_device_release_dma_owner(struct device *dev) 3299 { 3300 struct iommu_group *group = iommu_group_get(dev); 3301 3302 mutex_lock(&group->mutex); 3303 if (group->owner_cnt > 1) 3304 group->owner_cnt--; 3305 else 3306 __iommu_release_dma_ownership(group); 3307 mutex_unlock(&group->mutex); 3308 iommu_group_put(group); 3309 } 3310 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3311 3312 /** 3313 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3314 * @group: The group. 3315 * 3316 * This provides status query on a given group. It is racy and only for 3317 * non-binding status reporting. 3318 */ 3319 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3320 { 3321 unsigned int user; 3322 3323 mutex_lock(&group->mutex); 3324 user = group->owner_cnt; 3325 mutex_unlock(&group->mutex); 3326 3327 return user; 3328 } 3329 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3330 3331 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3332 struct iommu_group *group, ioasid_t pasid) 3333 { 3334 struct group_device *device; 3335 int ret = 0; 3336 3337 list_for_each_entry(device, &group->devices, list) { 3338 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3339 if (ret) 3340 break; 3341 } 3342 3343 return ret; 3344 } 3345 3346 static void __iommu_remove_group_pasid(struct iommu_group *group, 3347 ioasid_t pasid) 3348 { 3349 struct group_device *device; 3350 const struct iommu_ops *ops; 3351 3352 list_for_each_entry(device, &group->devices, list) { 3353 ops = dev_iommu_ops(device->dev); 3354 ops->remove_dev_pasid(device->dev, pasid); 3355 } 3356 } 3357 3358 /* 3359 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3360 * @domain: the iommu domain. 3361 * @dev: the attached device. 3362 * @pasid: the pasid of the device. 3363 * 3364 * Return: 0 on success, or an error. 3365 */ 3366 int iommu_attach_device_pasid(struct iommu_domain *domain, 3367 struct device *dev, ioasid_t pasid) 3368 { 3369 struct iommu_group *group; 3370 void *curr; 3371 int ret; 3372 3373 if (!domain->ops->set_dev_pasid) 3374 return -EOPNOTSUPP; 3375 3376 group = iommu_group_get(dev); 3377 if (!group) 3378 return -ENODEV; 3379 3380 mutex_lock(&group->mutex); 3381 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3382 if (curr) { 3383 ret = xa_err(curr) ? : -EBUSY; 3384 goto out_unlock; 3385 } 3386 3387 ret = __iommu_set_group_pasid(domain, group, pasid); 3388 if (ret) { 3389 __iommu_remove_group_pasid(group, pasid); 3390 xa_erase(&group->pasid_array, pasid); 3391 } 3392 out_unlock: 3393 mutex_unlock(&group->mutex); 3394 iommu_group_put(group); 3395 3396 return ret; 3397 } 3398 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3399 3400 /* 3401 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3402 * @domain: the iommu domain. 3403 * @dev: the attached device. 3404 * @pasid: the pasid of the device. 3405 * 3406 * The @domain must have been attached to @pasid of the @dev with 3407 * iommu_attach_device_pasid(). 3408 */ 3409 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3410 ioasid_t pasid) 3411 { 3412 struct iommu_group *group = iommu_group_get(dev); 3413 3414 mutex_lock(&group->mutex); 3415 __iommu_remove_group_pasid(group, pasid); 3416 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3417 mutex_unlock(&group->mutex); 3418 3419 iommu_group_put(group); 3420 } 3421 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3422 3423 /* 3424 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3425 * @dev: the queried device 3426 * @pasid: the pasid of the device 3427 * @type: matched domain type, 0 for any match 3428 * 3429 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3430 * domain attached to pasid of a device. Callers must hold a lock around this 3431 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3432 * type is being manipulated. This API does not internally resolve races with 3433 * attach/detach. 3434 * 3435 * Return: attached domain on success, NULL otherwise. 3436 */ 3437 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3438 ioasid_t pasid, 3439 unsigned int type) 3440 { 3441 struct iommu_domain *domain; 3442 struct iommu_group *group; 3443 3444 group = iommu_group_get(dev); 3445 if (!group) 3446 return NULL; 3447 3448 xa_lock(&group->pasid_array); 3449 domain = xa_load(&group->pasid_array, pasid); 3450 if (type && domain && domain->type != type) 3451 domain = ERR_PTR(-EBUSY); 3452 xa_unlock(&group->pasid_array); 3453 iommu_group_put(group); 3454 3455 return domain; 3456 } 3457 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3458 3459 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3460 struct mm_struct *mm) 3461 { 3462 const struct iommu_ops *ops = dev_iommu_ops(dev); 3463 struct iommu_domain *domain; 3464 3465 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3466 if (!domain) 3467 return NULL; 3468 3469 domain->type = IOMMU_DOMAIN_SVA; 3470 mmgrab(mm); 3471 domain->mm = mm; 3472 domain->iopf_handler = iommu_sva_handle_iopf; 3473 domain->fault_data = mm; 3474 3475 return domain; 3476 } 3477