1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/bitops.h> 25 #include <linux/platform_device.h> 26 #include <linux/property.h> 27 #include <linux/fsl/mc.h> 28 #include <linux/module.h> 29 #include <linux/cc_platform.h> 30 #include <trace/events/iommu.h> 31 32 #include "dma-iommu.h" 33 34 static struct kset *iommu_group_kset; 35 static DEFINE_IDA(iommu_group_ida); 36 37 static unsigned int iommu_def_domain_type __read_mostly; 38 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 39 static u32 iommu_cmd_line __read_mostly; 40 41 struct iommu_group { 42 struct kobject kobj; 43 struct kobject *devices_kobj; 44 struct list_head devices; 45 struct mutex mutex; 46 void *iommu_data; 47 void (*iommu_data_release)(void *iommu_data); 48 char *name; 49 int id; 50 struct iommu_domain *default_domain; 51 struct iommu_domain *blocking_domain; 52 struct iommu_domain *domain; 53 struct list_head entry; 54 unsigned int owner_cnt; 55 void *owner; 56 }; 57 58 struct group_device { 59 struct list_head list; 60 struct device *dev; 61 char *name; 62 }; 63 64 struct iommu_group_attribute { 65 struct attribute attr; 66 ssize_t (*show)(struct iommu_group *group, char *buf); 67 ssize_t (*store)(struct iommu_group *group, 68 const char *buf, size_t count); 69 }; 70 71 static const char * const iommu_group_resv_type_string[] = { 72 [IOMMU_RESV_DIRECT] = "direct", 73 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 74 [IOMMU_RESV_RESERVED] = "reserved", 75 [IOMMU_RESV_MSI] = "msi", 76 [IOMMU_RESV_SW_MSI] = "msi", 77 }; 78 79 #define IOMMU_CMD_LINE_DMA_API BIT(0) 80 #define IOMMU_CMD_LINE_STRICT BIT(1) 81 82 static int iommu_bus_notifier(struct notifier_block *nb, 83 unsigned long action, void *data); 84 static int iommu_alloc_default_domain(struct iommu_group *group, 85 struct device *dev); 86 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 87 unsigned type); 88 static int __iommu_attach_device(struct iommu_domain *domain, 89 struct device *dev); 90 static int __iommu_attach_group(struct iommu_domain *domain, 91 struct iommu_group *group); 92 static int __iommu_group_set_domain(struct iommu_group *group, 93 struct iommu_domain *new_domain); 94 static int iommu_create_device_direct_mappings(struct iommu_group *group, 95 struct device *dev); 96 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 97 static ssize_t iommu_group_store_type(struct iommu_group *group, 98 const char *buf, size_t count); 99 100 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 101 struct iommu_group_attribute iommu_group_attr_##_name = \ 102 __ATTR(_name, _mode, _show, _store) 103 104 #define to_iommu_group_attr(_attr) \ 105 container_of(_attr, struct iommu_group_attribute, attr) 106 #define to_iommu_group(_kobj) \ 107 container_of(_kobj, struct iommu_group, kobj) 108 109 static LIST_HEAD(iommu_device_list); 110 static DEFINE_SPINLOCK(iommu_device_lock); 111 112 static struct bus_type * const iommu_buses[] = { 113 &platform_bus_type, 114 #ifdef CONFIG_PCI 115 &pci_bus_type, 116 #endif 117 #ifdef CONFIG_ARM_AMBA 118 &amba_bustype, 119 #endif 120 #ifdef CONFIG_FSL_MC_BUS 121 &fsl_mc_bus_type, 122 #endif 123 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 124 &host1x_context_device_bus_type, 125 #endif 126 }; 127 128 /* 129 * Use a function instead of an array here because the domain-type is a 130 * bit-field, so an array would waste memory. 131 */ 132 static const char *iommu_domain_type_str(unsigned int t) 133 { 134 switch (t) { 135 case IOMMU_DOMAIN_BLOCKED: 136 return "Blocked"; 137 case IOMMU_DOMAIN_IDENTITY: 138 return "Passthrough"; 139 case IOMMU_DOMAIN_UNMANAGED: 140 return "Unmanaged"; 141 case IOMMU_DOMAIN_DMA: 142 case IOMMU_DOMAIN_DMA_FQ: 143 return "Translated"; 144 default: 145 return "Unknown"; 146 } 147 } 148 149 static int __init iommu_subsys_init(void) 150 { 151 struct notifier_block *nb; 152 153 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 154 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 155 iommu_set_default_passthrough(false); 156 else 157 iommu_set_default_translated(false); 158 159 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 160 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 161 iommu_set_default_translated(false); 162 } 163 } 164 165 if (!iommu_default_passthrough() && !iommu_dma_strict) 166 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 167 168 pr_info("Default domain type: %s %s\n", 169 iommu_domain_type_str(iommu_def_domain_type), 170 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 171 "(set via kernel command line)" : ""); 172 173 if (!iommu_default_passthrough()) 174 pr_info("DMA domain TLB invalidation policy: %s mode %s\n", 175 iommu_dma_strict ? "strict" : "lazy", 176 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 177 "(set via kernel command line)" : ""); 178 179 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 180 if (!nb) 181 return -ENOMEM; 182 183 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 184 nb[i].notifier_call = iommu_bus_notifier; 185 bus_register_notifier(iommu_buses[i], &nb[i]); 186 } 187 188 return 0; 189 } 190 subsys_initcall(iommu_subsys_init); 191 192 static int remove_iommu_group(struct device *dev, void *data) 193 { 194 if (dev->iommu && dev->iommu->iommu_dev == data) 195 iommu_release_device(dev); 196 197 return 0; 198 } 199 200 /** 201 * iommu_device_register() - Register an IOMMU hardware instance 202 * @iommu: IOMMU handle for the instance 203 * @ops: IOMMU ops to associate with the instance 204 * @hwdev: (optional) actual instance device, used for fwnode lookup 205 * 206 * Return: 0 on success, or an error. 207 */ 208 int iommu_device_register(struct iommu_device *iommu, 209 const struct iommu_ops *ops, struct device *hwdev) 210 { 211 int err = 0; 212 213 /* We need to be able to take module references appropriately */ 214 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 215 return -EINVAL; 216 /* 217 * Temporarily enforce global restriction to a single driver. This was 218 * already the de-facto behaviour, since any possible combination of 219 * existing drivers would compete for at least the PCI or platform bus. 220 */ 221 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 222 return -EBUSY; 223 224 iommu->ops = ops; 225 if (hwdev) 226 iommu->fwnode = dev_fwnode(hwdev); 227 228 spin_lock(&iommu_device_lock); 229 list_add_tail(&iommu->list, &iommu_device_list); 230 spin_unlock(&iommu_device_lock); 231 232 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 233 iommu_buses[i]->iommu_ops = ops; 234 err = bus_iommu_probe(iommu_buses[i]); 235 } 236 if (err) 237 iommu_device_unregister(iommu); 238 return err; 239 } 240 EXPORT_SYMBOL_GPL(iommu_device_register); 241 242 void iommu_device_unregister(struct iommu_device *iommu) 243 { 244 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 245 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 246 247 spin_lock(&iommu_device_lock); 248 list_del(&iommu->list); 249 spin_unlock(&iommu_device_lock); 250 } 251 EXPORT_SYMBOL_GPL(iommu_device_unregister); 252 253 static struct dev_iommu *dev_iommu_get(struct device *dev) 254 { 255 struct dev_iommu *param = dev->iommu; 256 257 if (param) 258 return param; 259 260 param = kzalloc(sizeof(*param), GFP_KERNEL); 261 if (!param) 262 return NULL; 263 264 mutex_init(¶m->lock); 265 dev->iommu = param; 266 return param; 267 } 268 269 static void dev_iommu_free(struct device *dev) 270 { 271 struct dev_iommu *param = dev->iommu; 272 273 dev->iommu = NULL; 274 if (param->fwspec) { 275 fwnode_handle_put(param->fwspec->iommu_fwnode); 276 kfree(param->fwspec); 277 } 278 kfree(param); 279 } 280 281 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 282 { 283 const struct iommu_ops *ops = dev->bus->iommu_ops; 284 struct iommu_device *iommu_dev; 285 struct iommu_group *group; 286 int ret; 287 288 if (!ops) 289 return -ENODEV; 290 291 if (!dev_iommu_get(dev)) 292 return -ENOMEM; 293 294 if (!try_module_get(ops->owner)) { 295 ret = -EINVAL; 296 goto err_free; 297 } 298 299 iommu_dev = ops->probe_device(dev); 300 if (IS_ERR(iommu_dev)) { 301 ret = PTR_ERR(iommu_dev); 302 goto out_module_put; 303 } 304 305 dev->iommu->iommu_dev = iommu_dev; 306 307 group = iommu_group_get_for_dev(dev); 308 if (IS_ERR(group)) { 309 ret = PTR_ERR(group); 310 goto out_release; 311 } 312 iommu_group_put(group); 313 314 if (group_list && !group->default_domain && list_empty(&group->entry)) 315 list_add_tail(&group->entry, group_list); 316 317 iommu_device_link(iommu_dev, dev); 318 319 return 0; 320 321 out_release: 322 if (ops->release_device) 323 ops->release_device(dev); 324 325 out_module_put: 326 module_put(ops->owner); 327 328 err_free: 329 dev_iommu_free(dev); 330 331 return ret; 332 } 333 334 int iommu_probe_device(struct device *dev) 335 { 336 const struct iommu_ops *ops; 337 struct iommu_group *group; 338 int ret; 339 340 ret = __iommu_probe_device(dev, NULL); 341 if (ret) 342 goto err_out; 343 344 group = iommu_group_get(dev); 345 if (!group) { 346 ret = -ENODEV; 347 goto err_release; 348 } 349 350 /* 351 * Try to allocate a default domain - needs support from the 352 * IOMMU driver. There are still some drivers which don't 353 * support default domains, so the return value is not yet 354 * checked. 355 */ 356 mutex_lock(&group->mutex); 357 iommu_alloc_default_domain(group, dev); 358 359 /* 360 * If device joined an existing group which has been claimed, don't 361 * attach the default domain. 362 */ 363 if (group->default_domain && !group->owner) { 364 ret = __iommu_attach_device(group->default_domain, dev); 365 if (ret) { 366 mutex_unlock(&group->mutex); 367 iommu_group_put(group); 368 goto err_release; 369 } 370 } 371 372 iommu_create_device_direct_mappings(group, dev); 373 374 mutex_unlock(&group->mutex); 375 iommu_group_put(group); 376 377 ops = dev_iommu_ops(dev); 378 if (ops->probe_finalize) 379 ops->probe_finalize(dev); 380 381 return 0; 382 383 err_release: 384 iommu_release_device(dev); 385 386 err_out: 387 return ret; 388 389 } 390 391 void iommu_release_device(struct device *dev) 392 { 393 const struct iommu_ops *ops; 394 395 if (!dev->iommu) 396 return; 397 398 iommu_device_unlink(dev->iommu->iommu_dev, dev); 399 400 ops = dev_iommu_ops(dev); 401 if (ops->release_device) 402 ops->release_device(dev); 403 404 iommu_group_remove_device(dev); 405 module_put(ops->owner); 406 dev_iommu_free(dev); 407 } 408 409 static int __init iommu_set_def_domain_type(char *str) 410 { 411 bool pt; 412 int ret; 413 414 ret = kstrtobool(str, &pt); 415 if (ret) 416 return ret; 417 418 if (pt) 419 iommu_set_default_passthrough(true); 420 else 421 iommu_set_default_translated(true); 422 423 return 0; 424 } 425 early_param("iommu.passthrough", iommu_set_def_domain_type); 426 427 static int __init iommu_dma_setup(char *str) 428 { 429 int ret = kstrtobool(str, &iommu_dma_strict); 430 431 if (!ret) 432 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 433 return ret; 434 } 435 early_param("iommu.strict", iommu_dma_setup); 436 437 void iommu_set_dma_strict(void) 438 { 439 iommu_dma_strict = true; 440 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 441 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 442 } 443 444 static ssize_t iommu_group_attr_show(struct kobject *kobj, 445 struct attribute *__attr, char *buf) 446 { 447 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 448 struct iommu_group *group = to_iommu_group(kobj); 449 ssize_t ret = -EIO; 450 451 if (attr->show) 452 ret = attr->show(group, buf); 453 return ret; 454 } 455 456 static ssize_t iommu_group_attr_store(struct kobject *kobj, 457 struct attribute *__attr, 458 const char *buf, size_t count) 459 { 460 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 461 struct iommu_group *group = to_iommu_group(kobj); 462 ssize_t ret = -EIO; 463 464 if (attr->store) 465 ret = attr->store(group, buf, count); 466 return ret; 467 } 468 469 static const struct sysfs_ops iommu_group_sysfs_ops = { 470 .show = iommu_group_attr_show, 471 .store = iommu_group_attr_store, 472 }; 473 474 static int iommu_group_create_file(struct iommu_group *group, 475 struct iommu_group_attribute *attr) 476 { 477 return sysfs_create_file(&group->kobj, &attr->attr); 478 } 479 480 static void iommu_group_remove_file(struct iommu_group *group, 481 struct iommu_group_attribute *attr) 482 { 483 sysfs_remove_file(&group->kobj, &attr->attr); 484 } 485 486 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 487 { 488 return sprintf(buf, "%s\n", group->name); 489 } 490 491 /** 492 * iommu_insert_resv_region - Insert a new region in the 493 * list of reserved regions. 494 * @new: new region to insert 495 * @regions: list of regions 496 * 497 * Elements are sorted by start address and overlapping segments 498 * of the same type are merged. 499 */ 500 static int iommu_insert_resv_region(struct iommu_resv_region *new, 501 struct list_head *regions) 502 { 503 struct iommu_resv_region *iter, *tmp, *nr, *top; 504 LIST_HEAD(stack); 505 506 nr = iommu_alloc_resv_region(new->start, new->length, 507 new->prot, new->type); 508 if (!nr) 509 return -ENOMEM; 510 511 /* First add the new element based on start address sorting */ 512 list_for_each_entry(iter, regions, list) { 513 if (nr->start < iter->start || 514 (nr->start == iter->start && nr->type <= iter->type)) 515 break; 516 } 517 list_add_tail(&nr->list, &iter->list); 518 519 /* Merge overlapping segments of type nr->type in @regions, if any */ 520 list_for_each_entry_safe(iter, tmp, regions, list) { 521 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 522 523 /* no merge needed on elements of different types than @new */ 524 if (iter->type != new->type) { 525 list_move_tail(&iter->list, &stack); 526 continue; 527 } 528 529 /* look for the last stack element of same type as @iter */ 530 list_for_each_entry_reverse(top, &stack, list) 531 if (top->type == iter->type) 532 goto check_overlap; 533 534 list_move_tail(&iter->list, &stack); 535 continue; 536 537 check_overlap: 538 top_end = top->start + top->length - 1; 539 540 if (iter->start > top_end + 1) { 541 list_move_tail(&iter->list, &stack); 542 } else { 543 top->length = max(top_end, iter_end) - top->start + 1; 544 list_del(&iter->list); 545 kfree(iter); 546 } 547 } 548 list_splice(&stack, regions); 549 return 0; 550 } 551 552 static int 553 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 554 struct list_head *group_resv_regions) 555 { 556 struct iommu_resv_region *entry; 557 int ret = 0; 558 559 list_for_each_entry(entry, dev_resv_regions, list) { 560 ret = iommu_insert_resv_region(entry, group_resv_regions); 561 if (ret) 562 break; 563 } 564 return ret; 565 } 566 567 int iommu_get_group_resv_regions(struct iommu_group *group, 568 struct list_head *head) 569 { 570 struct group_device *device; 571 int ret = 0; 572 573 mutex_lock(&group->mutex); 574 list_for_each_entry(device, &group->devices, list) { 575 struct list_head dev_resv_regions; 576 577 /* 578 * Non-API groups still expose reserved_regions in sysfs, 579 * so filter out calls that get here that way. 580 */ 581 if (!device->dev->iommu) 582 break; 583 584 INIT_LIST_HEAD(&dev_resv_regions); 585 iommu_get_resv_regions(device->dev, &dev_resv_regions); 586 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 587 iommu_put_resv_regions(device->dev, &dev_resv_regions); 588 if (ret) 589 break; 590 } 591 mutex_unlock(&group->mutex); 592 return ret; 593 } 594 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 595 596 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 597 char *buf) 598 { 599 struct iommu_resv_region *region, *next; 600 struct list_head group_resv_regions; 601 char *str = buf; 602 603 INIT_LIST_HEAD(&group_resv_regions); 604 iommu_get_group_resv_regions(group, &group_resv_regions); 605 606 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 607 str += sprintf(str, "0x%016llx 0x%016llx %s\n", 608 (long long int)region->start, 609 (long long int)(region->start + 610 region->length - 1), 611 iommu_group_resv_type_string[region->type]); 612 kfree(region); 613 } 614 615 return (str - buf); 616 } 617 618 static ssize_t iommu_group_show_type(struct iommu_group *group, 619 char *buf) 620 { 621 char *type = "unknown\n"; 622 623 mutex_lock(&group->mutex); 624 if (group->default_domain) { 625 switch (group->default_domain->type) { 626 case IOMMU_DOMAIN_BLOCKED: 627 type = "blocked\n"; 628 break; 629 case IOMMU_DOMAIN_IDENTITY: 630 type = "identity\n"; 631 break; 632 case IOMMU_DOMAIN_UNMANAGED: 633 type = "unmanaged\n"; 634 break; 635 case IOMMU_DOMAIN_DMA: 636 type = "DMA\n"; 637 break; 638 case IOMMU_DOMAIN_DMA_FQ: 639 type = "DMA-FQ\n"; 640 break; 641 } 642 } 643 mutex_unlock(&group->mutex); 644 strcpy(buf, type); 645 646 return strlen(type); 647 } 648 649 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 650 651 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 652 iommu_group_show_resv_regions, NULL); 653 654 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 655 iommu_group_store_type); 656 657 static void iommu_group_release(struct kobject *kobj) 658 { 659 struct iommu_group *group = to_iommu_group(kobj); 660 661 pr_debug("Releasing group %d\n", group->id); 662 663 if (group->iommu_data_release) 664 group->iommu_data_release(group->iommu_data); 665 666 ida_free(&iommu_group_ida, group->id); 667 668 if (group->default_domain) 669 iommu_domain_free(group->default_domain); 670 if (group->blocking_domain) 671 iommu_domain_free(group->blocking_domain); 672 673 kfree(group->name); 674 kfree(group); 675 } 676 677 static struct kobj_type iommu_group_ktype = { 678 .sysfs_ops = &iommu_group_sysfs_ops, 679 .release = iommu_group_release, 680 }; 681 682 /** 683 * iommu_group_alloc - Allocate a new group 684 * 685 * This function is called by an iommu driver to allocate a new iommu 686 * group. The iommu group represents the minimum granularity of the iommu. 687 * Upon successful return, the caller holds a reference to the supplied 688 * group in order to hold the group until devices are added. Use 689 * iommu_group_put() to release this extra reference count, allowing the 690 * group to be automatically reclaimed once it has no devices or external 691 * references. 692 */ 693 struct iommu_group *iommu_group_alloc(void) 694 { 695 struct iommu_group *group; 696 int ret; 697 698 group = kzalloc(sizeof(*group), GFP_KERNEL); 699 if (!group) 700 return ERR_PTR(-ENOMEM); 701 702 group->kobj.kset = iommu_group_kset; 703 mutex_init(&group->mutex); 704 INIT_LIST_HEAD(&group->devices); 705 INIT_LIST_HEAD(&group->entry); 706 707 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 708 if (ret < 0) { 709 kfree(group); 710 return ERR_PTR(ret); 711 } 712 group->id = ret; 713 714 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 715 NULL, "%d", group->id); 716 if (ret) { 717 kobject_put(&group->kobj); 718 return ERR_PTR(ret); 719 } 720 721 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 722 if (!group->devices_kobj) { 723 kobject_put(&group->kobj); /* triggers .release & free */ 724 return ERR_PTR(-ENOMEM); 725 } 726 727 /* 728 * The devices_kobj holds a reference on the group kobject, so 729 * as long as that exists so will the group. We can therefore 730 * use the devices_kobj for reference counting. 731 */ 732 kobject_put(&group->kobj); 733 734 ret = iommu_group_create_file(group, 735 &iommu_group_attr_reserved_regions); 736 if (ret) 737 return ERR_PTR(ret); 738 739 ret = iommu_group_create_file(group, &iommu_group_attr_type); 740 if (ret) 741 return ERR_PTR(ret); 742 743 pr_debug("Allocated group %d\n", group->id); 744 745 return group; 746 } 747 EXPORT_SYMBOL_GPL(iommu_group_alloc); 748 749 struct iommu_group *iommu_group_get_by_id(int id) 750 { 751 struct kobject *group_kobj; 752 struct iommu_group *group; 753 const char *name; 754 755 if (!iommu_group_kset) 756 return NULL; 757 758 name = kasprintf(GFP_KERNEL, "%d", id); 759 if (!name) 760 return NULL; 761 762 group_kobj = kset_find_obj(iommu_group_kset, name); 763 kfree(name); 764 765 if (!group_kobj) 766 return NULL; 767 768 group = container_of(group_kobj, struct iommu_group, kobj); 769 BUG_ON(group->id != id); 770 771 kobject_get(group->devices_kobj); 772 kobject_put(&group->kobj); 773 774 return group; 775 } 776 EXPORT_SYMBOL_GPL(iommu_group_get_by_id); 777 778 /** 779 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 780 * @group: the group 781 * 782 * iommu drivers can store data in the group for use when doing iommu 783 * operations. This function provides a way to retrieve it. Caller 784 * should hold a group reference. 785 */ 786 void *iommu_group_get_iommudata(struct iommu_group *group) 787 { 788 return group->iommu_data; 789 } 790 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 791 792 /** 793 * iommu_group_set_iommudata - set iommu_data for a group 794 * @group: the group 795 * @iommu_data: new data 796 * @release: release function for iommu_data 797 * 798 * iommu drivers can store data in the group for use when doing iommu 799 * operations. This function provides a way to set the data after 800 * the group has been allocated. Caller should hold a group reference. 801 */ 802 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 803 void (*release)(void *iommu_data)) 804 { 805 group->iommu_data = iommu_data; 806 group->iommu_data_release = release; 807 } 808 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 809 810 /** 811 * iommu_group_set_name - set name for a group 812 * @group: the group 813 * @name: name 814 * 815 * Allow iommu driver to set a name for a group. When set it will 816 * appear in a name attribute file under the group in sysfs. 817 */ 818 int iommu_group_set_name(struct iommu_group *group, const char *name) 819 { 820 int ret; 821 822 if (group->name) { 823 iommu_group_remove_file(group, &iommu_group_attr_name); 824 kfree(group->name); 825 group->name = NULL; 826 if (!name) 827 return 0; 828 } 829 830 group->name = kstrdup(name, GFP_KERNEL); 831 if (!group->name) 832 return -ENOMEM; 833 834 ret = iommu_group_create_file(group, &iommu_group_attr_name); 835 if (ret) { 836 kfree(group->name); 837 group->name = NULL; 838 return ret; 839 } 840 841 return 0; 842 } 843 EXPORT_SYMBOL_GPL(iommu_group_set_name); 844 845 static int iommu_create_device_direct_mappings(struct iommu_group *group, 846 struct device *dev) 847 { 848 struct iommu_domain *domain = group->default_domain; 849 struct iommu_resv_region *entry; 850 struct list_head mappings; 851 unsigned long pg_size; 852 int ret = 0; 853 854 if (!domain || !iommu_is_dma_domain(domain)) 855 return 0; 856 857 BUG_ON(!domain->pgsize_bitmap); 858 859 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 860 INIT_LIST_HEAD(&mappings); 861 862 iommu_get_resv_regions(dev, &mappings); 863 864 /* We need to consider overlapping regions for different devices */ 865 list_for_each_entry(entry, &mappings, list) { 866 dma_addr_t start, end, addr; 867 size_t map_size = 0; 868 869 start = ALIGN(entry->start, pg_size); 870 end = ALIGN(entry->start + entry->length, pg_size); 871 872 if (entry->type != IOMMU_RESV_DIRECT && 873 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 874 continue; 875 876 for (addr = start; addr <= end; addr += pg_size) { 877 phys_addr_t phys_addr; 878 879 if (addr == end) 880 goto map_end; 881 882 phys_addr = iommu_iova_to_phys(domain, addr); 883 if (!phys_addr) { 884 map_size += pg_size; 885 continue; 886 } 887 888 map_end: 889 if (map_size) { 890 ret = iommu_map(domain, addr - map_size, 891 addr - map_size, map_size, 892 entry->prot); 893 if (ret) 894 goto out; 895 map_size = 0; 896 } 897 } 898 899 } 900 901 iommu_flush_iotlb_all(domain); 902 903 out: 904 iommu_put_resv_regions(dev, &mappings); 905 906 return ret; 907 } 908 909 static bool iommu_is_attach_deferred(struct device *dev) 910 { 911 const struct iommu_ops *ops = dev_iommu_ops(dev); 912 913 if (ops->is_attach_deferred) 914 return ops->is_attach_deferred(dev); 915 916 return false; 917 } 918 919 /** 920 * iommu_group_add_device - add a device to an iommu group 921 * @group: the group into which to add the device (reference should be held) 922 * @dev: the device 923 * 924 * This function is called by an iommu driver to add a device into a 925 * group. Adding a device increments the group reference count. 926 */ 927 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 928 { 929 int ret, i = 0; 930 struct group_device *device; 931 932 device = kzalloc(sizeof(*device), GFP_KERNEL); 933 if (!device) 934 return -ENOMEM; 935 936 device->dev = dev; 937 938 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 939 if (ret) 940 goto err_free_device; 941 942 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 943 rename: 944 if (!device->name) { 945 ret = -ENOMEM; 946 goto err_remove_link; 947 } 948 949 ret = sysfs_create_link_nowarn(group->devices_kobj, 950 &dev->kobj, device->name); 951 if (ret) { 952 if (ret == -EEXIST && i >= 0) { 953 /* 954 * Account for the slim chance of collision 955 * and append an instance to the name. 956 */ 957 kfree(device->name); 958 device->name = kasprintf(GFP_KERNEL, "%s.%d", 959 kobject_name(&dev->kobj), i++); 960 goto rename; 961 } 962 goto err_free_name; 963 } 964 965 kobject_get(group->devices_kobj); 966 967 dev->iommu_group = group; 968 969 mutex_lock(&group->mutex); 970 list_add_tail(&device->list, &group->devices); 971 if (group->domain && !iommu_is_attach_deferred(dev)) 972 ret = __iommu_attach_device(group->domain, dev); 973 mutex_unlock(&group->mutex); 974 if (ret) 975 goto err_put_group; 976 977 trace_add_device_to_group(group->id, dev); 978 979 dev_info(dev, "Adding to iommu group %d\n", group->id); 980 981 return 0; 982 983 err_put_group: 984 mutex_lock(&group->mutex); 985 list_del(&device->list); 986 mutex_unlock(&group->mutex); 987 dev->iommu_group = NULL; 988 kobject_put(group->devices_kobj); 989 sysfs_remove_link(group->devices_kobj, device->name); 990 err_free_name: 991 kfree(device->name); 992 err_remove_link: 993 sysfs_remove_link(&dev->kobj, "iommu_group"); 994 err_free_device: 995 kfree(device); 996 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 997 return ret; 998 } 999 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1000 1001 /** 1002 * iommu_group_remove_device - remove a device from it's current group 1003 * @dev: device to be removed 1004 * 1005 * This function is called by an iommu driver to remove the device from 1006 * it's current group. This decrements the iommu group reference count. 1007 */ 1008 void iommu_group_remove_device(struct device *dev) 1009 { 1010 struct iommu_group *group = dev->iommu_group; 1011 struct group_device *tmp_device, *device = NULL; 1012 1013 if (!group) 1014 return; 1015 1016 dev_info(dev, "Removing from iommu group %d\n", group->id); 1017 1018 mutex_lock(&group->mutex); 1019 list_for_each_entry(tmp_device, &group->devices, list) { 1020 if (tmp_device->dev == dev) { 1021 device = tmp_device; 1022 list_del(&device->list); 1023 break; 1024 } 1025 } 1026 mutex_unlock(&group->mutex); 1027 1028 if (!device) 1029 return; 1030 1031 sysfs_remove_link(group->devices_kobj, device->name); 1032 sysfs_remove_link(&dev->kobj, "iommu_group"); 1033 1034 trace_remove_device_from_group(group->id, dev); 1035 1036 kfree(device->name); 1037 kfree(device); 1038 dev->iommu_group = NULL; 1039 kobject_put(group->devices_kobj); 1040 } 1041 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1042 1043 static int iommu_group_device_count(struct iommu_group *group) 1044 { 1045 struct group_device *entry; 1046 int ret = 0; 1047 1048 list_for_each_entry(entry, &group->devices, list) 1049 ret++; 1050 1051 return ret; 1052 } 1053 1054 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1055 int (*fn)(struct device *, void *)) 1056 { 1057 struct group_device *device; 1058 int ret = 0; 1059 1060 list_for_each_entry(device, &group->devices, list) { 1061 ret = fn(device->dev, data); 1062 if (ret) 1063 break; 1064 } 1065 return ret; 1066 } 1067 1068 /** 1069 * iommu_group_for_each_dev - iterate over each device in the group 1070 * @group: the group 1071 * @data: caller opaque data to be passed to callback function 1072 * @fn: caller supplied callback function 1073 * 1074 * This function is called by group users to iterate over group devices. 1075 * Callers should hold a reference count to the group during callback. 1076 * The group->mutex is held across callbacks, which will block calls to 1077 * iommu_group_add/remove_device. 1078 */ 1079 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1080 int (*fn)(struct device *, void *)) 1081 { 1082 int ret; 1083 1084 mutex_lock(&group->mutex); 1085 ret = __iommu_group_for_each_dev(group, data, fn); 1086 mutex_unlock(&group->mutex); 1087 1088 return ret; 1089 } 1090 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1091 1092 /** 1093 * iommu_group_get - Return the group for a device and increment reference 1094 * @dev: get the group that this device belongs to 1095 * 1096 * This function is called by iommu drivers and users to get the group 1097 * for the specified device. If found, the group is returned and the group 1098 * reference in incremented, else NULL. 1099 */ 1100 struct iommu_group *iommu_group_get(struct device *dev) 1101 { 1102 struct iommu_group *group = dev->iommu_group; 1103 1104 if (group) 1105 kobject_get(group->devices_kobj); 1106 1107 return group; 1108 } 1109 EXPORT_SYMBOL_GPL(iommu_group_get); 1110 1111 /** 1112 * iommu_group_ref_get - Increment reference on a group 1113 * @group: the group to use, must not be NULL 1114 * 1115 * This function is called by iommu drivers to take additional references on an 1116 * existing group. Returns the given group for convenience. 1117 */ 1118 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1119 { 1120 kobject_get(group->devices_kobj); 1121 return group; 1122 } 1123 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1124 1125 /** 1126 * iommu_group_put - Decrement group reference 1127 * @group: the group to use 1128 * 1129 * This function is called by iommu drivers and users to release the 1130 * iommu group. Once the reference count is zero, the group is released. 1131 */ 1132 void iommu_group_put(struct iommu_group *group) 1133 { 1134 if (group) 1135 kobject_put(group->devices_kobj); 1136 } 1137 EXPORT_SYMBOL_GPL(iommu_group_put); 1138 1139 /** 1140 * iommu_register_device_fault_handler() - Register a device fault handler 1141 * @dev: the device 1142 * @handler: the fault handler 1143 * @data: private data passed as argument to the handler 1144 * 1145 * When an IOMMU fault event is received, this handler gets called with the 1146 * fault event and data as argument. The handler should return 0 on success. If 1147 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1148 * complete the fault by calling iommu_page_response() with one of the following 1149 * response code: 1150 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1151 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1152 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1153 * page faults if possible. 1154 * 1155 * Return 0 if the fault handler was installed successfully, or an error. 1156 */ 1157 int iommu_register_device_fault_handler(struct device *dev, 1158 iommu_dev_fault_handler_t handler, 1159 void *data) 1160 { 1161 struct dev_iommu *param = dev->iommu; 1162 int ret = 0; 1163 1164 if (!param) 1165 return -EINVAL; 1166 1167 mutex_lock(¶m->lock); 1168 /* Only allow one fault handler registered for each device */ 1169 if (param->fault_param) { 1170 ret = -EBUSY; 1171 goto done_unlock; 1172 } 1173 1174 get_device(dev); 1175 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1176 if (!param->fault_param) { 1177 put_device(dev); 1178 ret = -ENOMEM; 1179 goto done_unlock; 1180 } 1181 param->fault_param->handler = handler; 1182 param->fault_param->data = data; 1183 mutex_init(¶m->fault_param->lock); 1184 INIT_LIST_HEAD(¶m->fault_param->faults); 1185 1186 done_unlock: 1187 mutex_unlock(¶m->lock); 1188 1189 return ret; 1190 } 1191 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1192 1193 /** 1194 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1195 * @dev: the device 1196 * 1197 * Remove the device fault handler installed with 1198 * iommu_register_device_fault_handler(). 1199 * 1200 * Return 0 on success, or an error. 1201 */ 1202 int iommu_unregister_device_fault_handler(struct device *dev) 1203 { 1204 struct dev_iommu *param = dev->iommu; 1205 int ret = 0; 1206 1207 if (!param) 1208 return -EINVAL; 1209 1210 mutex_lock(¶m->lock); 1211 1212 if (!param->fault_param) 1213 goto unlock; 1214 1215 /* we cannot unregister handler if there are pending faults */ 1216 if (!list_empty(¶m->fault_param->faults)) { 1217 ret = -EBUSY; 1218 goto unlock; 1219 } 1220 1221 kfree(param->fault_param); 1222 param->fault_param = NULL; 1223 put_device(dev); 1224 unlock: 1225 mutex_unlock(¶m->lock); 1226 1227 return ret; 1228 } 1229 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1230 1231 /** 1232 * iommu_report_device_fault() - Report fault event to device driver 1233 * @dev: the device 1234 * @evt: fault event data 1235 * 1236 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1237 * handler. When this function fails and the fault is recoverable, it is the 1238 * caller's responsibility to complete the fault. 1239 * 1240 * Return 0 on success, or an error. 1241 */ 1242 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1243 { 1244 struct dev_iommu *param = dev->iommu; 1245 struct iommu_fault_event *evt_pending = NULL; 1246 struct iommu_fault_param *fparam; 1247 int ret = 0; 1248 1249 if (!param || !evt) 1250 return -EINVAL; 1251 1252 /* we only report device fault if there is a handler registered */ 1253 mutex_lock(¶m->lock); 1254 fparam = param->fault_param; 1255 if (!fparam || !fparam->handler) { 1256 ret = -EINVAL; 1257 goto done_unlock; 1258 } 1259 1260 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1261 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1262 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1263 GFP_KERNEL); 1264 if (!evt_pending) { 1265 ret = -ENOMEM; 1266 goto done_unlock; 1267 } 1268 mutex_lock(&fparam->lock); 1269 list_add_tail(&evt_pending->list, &fparam->faults); 1270 mutex_unlock(&fparam->lock); 1271 } 1272 1273 ret = fparam->handler(&evt->fault, fparam->data); 1274 if (ret && evt_pending) { 1275 mutex_lock(&fparam->lock); 1276 list_del(&evt_pending->list); 1277 mutex_unlock(&fparam->lock); 1278 kfree(evt_pending); 1279 } 1280 done_unlock: 1281 mutex_unlock(¶m->lock); 1282 return ret; 1283 } 1284 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1285 1286 int iommu_page_response(struct device *dev, 1287 struct iommu_page_response *msg) 1288 { 1289 bool needs_pasid; 1290 int ret = -EINVAL; 1291 struct iommu_fault_event *evt; 1292 struct iommu_fault_page_request *prm; 1293 struct dev_iommu *param = dev->iommu; 1294 const struct iommu_ops *ops = dev_iommu_ops(dev); 1295 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1296 1297 if (!ops->page_response) 1298 return -ENODEV; 1299 1300 if (!param || !param->fault_param) 1301 return -EINVAL; 1302 1303 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1304 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1305 return -EINVAL; 1306 1307 /* Only send response if there is a fault report pending */ 1308 mutex_lock(¶m->fault_param->lock); 1309 if (list_empty(¶m->fault_param->faults)) { 1310 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1311 goto done_unlock; 1312 } 1313 /* 1314 * Check if we have a matching page request pending to respond, 1315 * otherwise return -EINVAL 1316 */ 1317 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1318 prm = &evt->fault.prm; 1319 if (prm->grpid != msg->grpid) 1320 continue; 1321 1322 /* 1323 * If the PASID is required, the corresponding request is 1324 * matched using the group ID, the PASID valid bit and the PASID 1325 * value. Otherwise only the group ID matches request and 1326 * response. 1327 */ 1328 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1329 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1330 continue; 1331 1332 if (!needs_pasid && has_pasid) { 1333 /* No big deal, just clear it. */ 1334 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1335 msg->pasid = 0; 1336 } 1337 1338 ret = ops->page_response(dev, evt, msg); 1339 list_del(&evt->list); 1340 kfree(evt); 1341 break; 1342 } 1343 1344 done_unlock: 1345 mutex_unlock(¶m->fault_param->lock); 1346 return ret; 1347 } 1348 EXPORT_SYMBOL_GPL(iommu_page_response); 1349 1350 /** 1351 * iommu_group_id - Return ID for a group 1352 * @group: the group to ID 1353 * 1354 * Return the unique ID for the group matching the sysfs group number. 1355 */ 1356 int iommu_group_id(struct iommu_group *group) 1357 { 1358 return group->id; 1359 } 1360 EXPORT_SYMBOL_GPL(iommu_group_id); 1361 1362 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1363 unsigned long *devfns); 1364 1365 /* 1366 * To consider a PCI device isolated, we require ACS to support Source 1367 * Validation, Request Redirection, Completer Redirection, and Upstream 1368 * Forwarding. This effectively means that devices cannot spoof their 1369 * requester ID, requests and completions cannot be redirected, and all 1370 * transactions are forwarded upstream, even as it passes through a 1371 * bridge where the target device is downstream. 1372 */ 1373 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1374 1375 /* 1376 * For multifunction devices which are not isolated from each other, find 1377 * all the other non-isolated functions and look for existing groups. For 1378 * each function, we also need to look for aliases to or from other devices 1379 * that may already have a group. 1380 */ 1381 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1382 unsigned long *devfns) 1383 { 1384 struct pci_dev *tmp = NULL; 1385 struct iommu_group *group; 1386 1387 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1388 return NULL; 1389 1390 for_each_pci_dev(tmp) { 1391 if (tmp == pdev || tmp->bus != pdev->bus || 1392 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1393 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1394 continue; 1395 1396 group = get_pci_alias_group(tmp, devfns); 1397 if (group) { 1398 pci_dev_put(tmp); 1399 return group; 1400 } 1401 } 1402 1403 return NULL; 1404 } 1405 1406 /* 1407 * Look for aliases to or from the given device for existing groups. DMA 1408 * aliases are only supported on the same bus, therefore the search 1409 * space is quite small (especially since we're really only looking at pcie 1410 * device, and therefore only expect multiple slots on the root complex or 1411 * downstream switch ports). It's conceivable though that a pair of 1412 * multifunction devices could have aliases between them that would cause a 1413 * loop. To prevent this, we use a bitmap to track where we've been. 1414 */ 1415 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1416 unsigned long *devfns) 1417 { 1418 struct pci_dev *tmp = NULL; 1419 struct iommu_group *group; 1420 1421 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1422 return NULL; 1423 1424 group = iommu_group_get(&pdev->dev); 1425 if (group) 1426 return group; 1427 1428 for_each_pci_dev(tmp) { 1429 if (tmp == pdev || tmp->bus != pdev->bus) 1430 continue; 1431 1432 /* We alias them or they alias us */ 1433 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1434 group = get_pci_alias_group(tmp, devfns); 1435 if (group) { 1436 pci_dev_put(tmp); 1437 return group; 1438 } 1439 1440 group = get_pci_function_alias_group(tmp, devfns); 1441 if (group) { 1442 pci_dev_put(tmp); 1443 return group; 1444 } 1445 } 1446 } 1447 1448 return NULL; 1449 } 1450 1451 struct group_for_pci_data { 1452 struct pci_dev *pdev; 1453 struct iommu_group *group; 1454 }; 1455 1456 /* 1457 * DMA alias iterator callback, return the last seen device. Stop and return 1458 * the IOMMU group if we find one along the way. 1459 */ 1460 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1461 { 1462 struct group_for_pci_data *data = opaque; 1463 1464 data->pdev = pdev; 1465 data->group = iommu_group_get(&pdev->dev); 1466 1467 return data->group != NULL; 1468 } 1469 1470 /* 1471 * Generic device_group call-back function. It just allocates one 1472 * iommu-group per device. 1473 */ 1474 struct iommu_group *generic_device_group(struct device *dev) 1475 { 1476 return iommu_group_alloc(); 1477 } 1478 EXPORT_SYMBOL_GPL(generic_device_group); 1479 1480 /* 1481 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1482 * to find or create an IOMMU group for a device. 1483 */ 1484 struct iommu_group *pci_device_group(struct device *dev) 1485 { 1486 struct pci_dev *pdev = to_pci_dev(dev); 1487 struct group_for_pci_data data; 1488 struct pci_bus *bus; 1489 struct iommu_group *group = NULL; 1490 u64 devfns[4] = { 0 }; 1491 1492 if (WARN_ON(!dev_is_pci(dev))) 1493 return ERR_PTR(-EINVAL); 1494 1495 /* 1496 * Find the upstream DMA alias for the device. A device must not 1497 * be aliased due to topology in order to have its own IOMMU group. 1498 * If we find an alias along the way that already belongs to a 1499 * group, use it. 1500 */ 1501 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1502 return data.group; 1503 1504 pdev = data.pdev; 1505 1506 /* 1507 * Continue upstream from the point of minimum IOMMU granularity 1508 * due to aliases to the point where devices are protected from 1509 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1510 * group, use it. 1511 */ 1512 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1513 if (!bus->self) 1514 continue; 1515 1516 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1517 break; 1518 1519 pdev = bus->self; 1520 1521 group = iommu_group_get(&pdev->dev); 1522 if (group) 1523 return group; 1524 } 1525 1526 /* 1527 * Look for existing groups on device aliases. If we alias another 1528 * device or another device aliases us, use the same group. 1529 */ 1530 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1531 if (group) 1532 return group; 1533 1534 /* 1535 * Look for existing groups on non-isolated functions on the same 1536 * slot and aliases of those funcions, if any. No need to clear 1537 * the search bitmap, the tested devfns are still valid. 1538 */ 1539 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1540 if (group) 1541 return group; 1542 1543 /* No shared group found, allocate new */ 1544 return iommu_group_alloc(); 1545 } 1546 EXPORT_SYMBOL_GPL(pci_device_group); 1547 1548 /* Get the IOMMU group for device on fsl-mc bus */ 1549 struct iommu_group *fsl_mc_device_group(struct device *dev) 1550 { 1551 struct device *cont_dev = fsl_mc_cont_dev(dev); 1552 struct iommu_group *group; 1553 1554 group = iommu_group_get(cont_dev); 1555 if (!group) 1556 group = iommu_group_alloc(); 1557 return group; 1558 } 1559 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1560 1561 static int iommu_get_def_domain_type(struct device *dev) 1562 { 1563 const struct iommu_ops *ops = dev_iommu_ops(dev); 1564 1565 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1566 return IOMMU_DOMAIN_DMA; 1567 1568 if (ops->def_domain_type) 1569 return ops->def_domain_type(dev); 1570 1571 return 0; 1572 } 1573 1574 static int iommu_group_alloc_default_domain(struct bus_type *bus, 1575 struct iommu_group *group, 1576 unsigned int type) 1577 { 1578 struct iommu_domain *dom; 1579 1580 dom = __iommu_domain_alloc(bus, type); 1581 if (!dom && type != IOMMU_DOMAIN_DMA) { 1582 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1583 if (dom) 1584 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1585 type, group->name); 1586 } 1587 1588 if (!dom) 1589 return -ENOMEM; 1590 1591 group->default_domain = dom; 1592 if (!group->domain) 1593 group->domain = dom; 1594 return 0; 1595 } 1596 1597 static int iommu_alloc_default_domain(struct iommu_group *group, 1598 struct device *dev) 1599 { 1600 unsigned int type; 1601 1602 if (group->default_domain) 1603 return 0; 1604 1605 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1606 1607 return iommu_group_alloc_default_domain(dev->bus, group, type); 1608 } 1609 1610 /** 1611 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1612 * @dev: target device 1613 * 1614 * This function is intended to be called by IOMMU drivers and extended to 1615 * support common, bus-defined algorithms when determining or creating the 1616 * IOMMU group for a device. On success, the caller will hold a reference 1617 * to the returned IOMMU group, which will already include the provided 1618 * device. The reference should be released with iommu_group_put(). 1619 */ 1620 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1621 { 1622 const struct iommu_ops *ops = dev_iommu_ops(dev); 1623 struct iommu_group *group; 1624 int ret; 1625 1626 group = iommu_group_get(dev); 1627 if (group) 1628 return group; 1629 1630 group = ops->device_group(dev); 1631 if (WARN_ON_ONCE(group == NULL)) 1632 return ERR_PTR(-EINVAL); 1633 1634 if (IS_ERR(group)) 1635 return group; 1636 1637 ret = iommu_group_add_device(group, dev); 1638 if (ret) 1639 goto out_put_group; 1640 1641 return group; 1642 1643 out_put_group: 1644 iommu_group_put(group); 1645 1646 return ERR_PTR(ret); 1647 } 1648 1649 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1650 { 1651 return group->default_domain; 1652 } 1653 1654 static int probe_iommu_group(struct device *dev, void *data) 1655 { 1656 struct list_head *group_list = data; 1657 struct iommu_group *group; 1658 int ret; 1659 1660 /* Device is probed already if in a group */ 1661 group = iommu_group_get(dev); 1662 if (group) { 1663 iommu_group_put(group); 1664 return 0; 1665 } 1666 1667 ret = __iommu_probe_device(dev, group_list); 1668 if (ret == -ENODEV) 1669 ret = 0; 1670 1671 return ret; 1672 } 1673 1674 static int iommu_bus_notifier(struct notifier_block *nb, 1675 unsigned long action, void *data) 1676 { 1677 struct device *dev = data; 1678 1679 if (action == BUS_NOTIFY_ADD_DEVICE) { 1680 int ret; 1681 1682 ret = iommu_probe_device(dev); 1683 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1684 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1685 iommu_release_device(dev); 1686 return NOTIFY_OK; 1687 } 1688 1689 return 0; 1690 } 1691 1692 struct __group_domain_type { 1693 struct device *dev; 1694 unsigned int type; 1695 }; 1696 1697 static int probe_get_default_domain_type(struct device *dev, void *data) 1698 { 1699 struct __group_domain_type *gtype = data; 1700 unsigned int type = iommu_get_def_domain_type(dev); 1701 1702 if (type) { 1703 if (gtype->type && gtype->type != type) { 1704 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1705 iommu_domain_type_str(type), 1706 dev_name(gtype->dev), 1707 iommu_domain_type_str(gtype->type)); 1708 gtype->type = 0; 1709 } 1710 1711 if (!gtype->dev) { 1712 gtype->dev = dev; 1713 gtype->type = type; 1714 } 1715 } 1716 1717 return 0; 1718 } 1719 1720 static void probe_alloc_default_domain(struct bus_type *bus, 1721 struct iommu_group *group) 1722 { 1723 struct __group_domain_type gtype; 1724 1725 memset(>ype, 0, sizeof(gtype)); 1726 1727 /* Ask for default domain requirements of all devices in the group */ 1728 __iommu_group_for_each_dev(group, >ype, 1729 probe_get_default_domain_type); 1730 1731 if (!gtype.type) 1732 gtype.type = iommu_def_domain_type; 1733 1734 iommu_group_alloc_default_domain(bus, group, gtype.type); 1735 1736 } 1737 1738 static int iommu_group_do_dma_attach(struct device *dev, void *data) 1739 { 1740 struct iommu_domain *domain = data; 1741 int ret = 0; 1742 1743 if (!iommu_is_attach_deferred(dev)) 1744 ret = __iommu_attach_device(domain, dev); 1745 1746 return ret; 1747 } 1748 1749 static int __iommu_group_dma_attach(struct iommu_group *group) 1750 { 1751 return __iommu_group_for_each_dev(group, group->default_domain, 1752 iommu_group_do_dma_attach); 1753 } 1754 1755 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1756 { 1757 const struct iommu_ops *ops = dev_iommu_ops(dev); 1758 1759 if (ops->probe_finalize) 1760 ops->probe_finalize(dev); 1761 1762 return 0; 1763 } 1764 1765 static void __iommu_group_dma_finalize(struct iommu_group *group) 1766 { 1767 __iommu_group_for_each_dev(group, group->default_domain, 1768 iommu_group_do_probe_finalize); 1769 } 1770 1771 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1772 { 1773 struct iommu_group *group = data; 1774 1775 iommu_create_device_direct_mappings(group, dev); 1776 1777 return 0; 1778 } 1779 1780 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1781 { 1782 return __iommu_group_for_each_dev(group, group, 1783 iommu_do_create_direct_mappings); 1784 } 1785 1786 int bus_iommu_probe(struct bus_type *bus) 1787 { 1788 struct iommu_group *group, *next; 1789 LIST_HEAD(group_list); 1790 int ret; 1791 1792 /* 1793 * This code-path does not allocate the default domain when 1794 * creating the iommu group, so do it after the groups are 1795 * created. 1796 */ 1797 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1798 if (ret) 1799 return ret; 1800 1801 list_for_each_entry_safe(group, next, &group_list, entry) { 1802 /* Remove item from the list */ 1803 list_del_init(&group->entry); 1804 1805 mutex_lock(&group->mutex); 1806 1807 /* Try to allocate default domain */ 1808 probe_alloc_default_domain(bus, group); 1809 1810 if (!group->default_domain) { 1811 mutex_unlock(&group->mutex); 1812 continue; 1813 } 1814 1815 iommu_group_create_direct_mappings(group); 1816 1817 ret = __iommu_group_dma_attach(group); 1818 1819 mutex_unlock(&group->mutex); 1820 1821 if (ret) 1822 break; 1823 1824 __iommu_group_dma_finalize(group); 1825 } 1826 1827 return ret; 1828 } 1829 1830 bool iommu_present(struct bus_type *bus) 1831 { 1832 return bus->iommu_ops != NULL; 1833 } 1834 EXPORT_SYMBOL_GPL(iommu_present); 1835 1836 /** 1837 * device_iommu_capable() - check for a general IOMMU capability 1838 * @dev: device to which the capability would be relevant, if available 1839 * @cap: IOMMU capability 1840 * 1841 * Return: true if an IOMMU is present and supports the given capability 1842 * for the given device, otherwise false. 1843 */ 1844 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1845 { 1846 const struct iommu_ops *ops; 1847 1848 if (!dev->iommu || !dev->iommu->iommu_dev) 1849 return false; 1850 1851 ops = dev_iommu_ops(dev); 1852 if (!ops->capable) 1853 return false; 1854 1855 return ops->capable(dev, cap); 1856 } 1857 EXPORT_SYMBOL_GPL(device_iommu_capable); 1858 1859 /** 1860 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1861 * @domain: iommu domain 1862 * @handler: fault handler 1863 * @token: user data, will be passed back to the fault handler 1864 * 1865 * This function should be used by IOMMU users which want to be notified 1866 * whenever an IOMMU fault happens. 1867 * 1868 * The fault handler itself should return 0 on success, and an appropriate 1869 * error code otherwise. 1870 */ 1871 void iommu_set_fault_handler(struct iommu_domain *domain, 1872 iommu_fault_handler_t handler, 1873 void *token) 1874 { 1875 BUG_ON(!domain); 1876 1877 domain->handler = handler; 1878 domain->handler_token = token; 1879 } 1880 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1881 1882 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 1883 unsigned type) 1884 { 1885 struct iommu_domain *domain; 1886 1887 if (bus == NULL || bus->iommu_ops == NULL) 1888 return NULL; 1889 1890 domain = bus->iommu_ops->domain_alloc(type); 1891 if (!domain) 1892 return NULL; 1893 1894 domain->type = type; 1895 /* Assume all sizes by default; the driver may override this later */ 1896 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1897 if (!domain->ops) 1898 domain->ops = bus->iommu_ops->default_domain_ops; 1899 1900 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1901 iommu_domain_free(domain); 1902 domain = NULL; 1903 } 1904 return domain; 1905 } 1906 1907 struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) 1908 { 1909 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 1910 } 1911 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 1912 1913 void iommu_domain_free(struct iommu_domain *domain) 1914 { 1915 iommu_put_dma_cookie(domain); 1916 domain->ops->free(domain); 1917 } 1918 EXPORT_SYMBOL_GPL(iommu_domain_free); 1919 1920 /* 1921 * Put the group's domain back to the appropriate core-owned domain - either the 1922 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 1923 */ 1924 static void __iommu_group_set_core_domain(struct iommu_group *group) 1925 { 1926 struct iommu_domain *new_domain; 1927 int ret; 1928 1929 if (group->owner) 1930 new_domain = group->blocking_domain; 1931 else 1932 new_domain = group->default_domain; 1933 1934 ret = __iommu_group_set_domain(group, new_domain); 1935 WARN(ret, "iommu driver failed to attach the default/blocking domain"); 1936 } 1937 1938 static int __iommu_attach_device(struct iommu_domain *domain, 1939 struct device *dev) 1940 { 1941 int ret; 1942 1943 if (unlikely(domain->ops->attach_dev == NULL)) 1944 return -ENODEV; 1945 1946 ret = domain->ops->attach_dev(domain, dev); 1947 if (!ret) 1948 trace_attach_device_to_domain(dev); 1949 return ret; 1950 } 1951 1952 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 1953 { 1954 struct iommu_group *group; 1955 int ret; 1956 1957 group = iommu_group_get(dev); 1958 if (!group) 1959 return -ENODEV; 1960 1961 /* 1962 * Lock the group to make sure the device-count doesn't 1963 * change while we are attaching 1964 */ 1965 mutex_lock(&group->mutex); 1966 ret = -EINVAL; 1967 if (iommu_group_device_count(group) != 1) 1968 goto out_unlock; 1969 1970 ret = __iommu_attach_group(domain, group); 1971 1972 out_unlock: 1973 mutex_unlock(&group->mutex); 1974 iommu_group_put(group); 1975 1976 return ret; 1977 } 1978 EXPORT_SYMBOL_GPL(iommu_attach_device); 1979 1980 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 1981 { 1982 if (iommu_is_attach_deferred(dev)) 1983 return __iommu_attach_device(domain, dev); 1984 1985 return 0; 1986 } 1987 1988 static void __iommu_detach_device(struct iommu_domain *domain, 1989 struct device *dev) 1990 { 1991 if (iommu_is_attach_deferred(dev)) 1992 return; 1993 1994 domain->ops->detach_dev(domain, dev); 1995 trace_detach_device_from_domain(dev); 1996 } 1997 1998 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 1999 { 2000 struct iommu_group *group; 2001 2002 group = iommu_group_get(dev); 2003 if (!group) 2004 return; 2005 2006 mutex_lock(&group->mutex); 2007 if (WARN_ON(domain != group->domain) || 2008 WARN_ON(iommu_group_device_count(group) != 1)) 2009 goto out_unlock; 2010 __iommu_group_set_core_domain(group); 2011 2012 out_unlock: 2013 mutex_unlock(&group->mutex); 2014 iommu_group_put(group); 2015 } 2016 EXPORT_SYMBOL_GPL(iommu_detach_device); 2017 2018 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2019 { 2020 struct iommu_domain *domain; 2021 struct iommu_group *group; 2022 2023 group = iommu_group_get(dev); 2024 if (!group) 2025 return NULL; 2026 2027 domain = group->domain; 2028 2029 iommu_group_put(group); 2030 2031 return domain; 2032 } 2033 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2034 2035 /* 2036 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2037 * guarantees that the group and its default domain are valid and correct. 2038 */ 2039 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2040 { 2041 return dev->iommu_group->default_domain; 2042 } 2043 2044 /* 2045 * IOMMU groups are really the natural working unit of the IOMMU, but 2046 * the IOMMU API works on domains and devices. Bridge that gap by 2047 * iterating over the devices in a group. Ideally we'd have a single 2048 * device which represents the requestor ID of the group, but we also 2049 * allow IOMMU drivers to create policy defined minimum sets, where 2050 * the physical hardware may be able to distiguish members, but we 2051 * wish to group them at a higher level (ex. untrusted multi-function 2052 * PCI devices). Thus we attach each device. 2053 */ 2054 static int iommu_group_do_attach_device(struct device *dev, void *data) 2055 { 2056 struct iommu_domain *domain = data; 2057 2058 return __iommu_attach_device(domain, dev); 2059 } 2060 2061 static int __iommu_attach_group(struct iommu_domain *domain, 2062 struct iommu_group *group) 2063 { 2064 int ret; 2065 2066 if (group->domain && group->domain != group->default_domain && 2067 group->domain != group->blocking_domain) 2068 return -EBUSY; 2069 2070 ret = __iommu_group_for_each_dev(group, domain, 2071 iommu_group_do_attach_device); 2072 if (ret == 0) 2073 group->domain = domain; 2074 2075 return ret; 2076 } 2077 2078 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2079 { 2080 int ret; 2081 2082 mutex_lock(&group->mutex); 2083 ret = __iommu_attach_group(domain, group); 2084 mutex_unlock(&group->mutex); 2085 2086 return ret; 2087 } 2088 EXPORT_SYMBOL_GPL(iommu_attach_group); 2089 2090 static int iommu_group_do_detach_device(struct device *dev, void *data) 2091 { 2092 struct iommu_domain *domain = data; 2093 2094 __iommu_detach_device(domain, dev); 2095 2096 return 0; 2097 } 2098 2099 static int __iommu_group_set_domain(struct iommu_group *group, 2100 struct iommu_domain *new_domain) 2101 { 2102 int ret; 2103 2104 if (group->domain == new_domain) 2105 return 0; 2106 2107 /* 2108 * New drivers should support default domains and so the detach_dev() op 2109 * will never be called. Otherwise the NULL domain represents some 2110 * platform specific behavior. 2111 */ 2112 if (!new_domain) { 2113 if (WARN_ON(!group->domain->ops->detach_dev)) 2114 return -EINVAL; 2115 __iommu_group_for_each_dev(group, group->domain, 2116 iommu_group_do_detach_device); 2117 group->domain = NULL; 2118 return 0; 2119 } 2120 2121 /* 2122 * Changing the domain is done by calling attach_dev() on the new 2123 * domain. This switch does not have to be atomic and DMA can be 2124 * discarded during the transition. DMA must only be able to access 2125 * either new_domain or group->domain, never something else. 2126 * 2127 * Note that this is called in error unwind paths, attaching to a 2128 * domain that has already been attached cannot fail. 2129 */ 2130 ret = __iommu_group_for_each_dev(group, new_domain, 2131 iommu_group_do_attach_device); 2132 if (ret) 2133 return ret; 2134 group->domain = new_domain; 2135 return 0; 2136 } 2137 2138 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2139 { 2140 mutex_lock(&group->mutex); 2141 __iommu_group_set_core_domain(group); 2142 mutex_unlock(&group->mutex); 2143 } 2144 EXPORT_SYMBOL_GPL(iommu_detach_group); 2145 2146 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2147 { 2148 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2149 return iova; 2150 2151 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2152 return 0; 2153 2154 return domain->ops->iova_to_phys(domain, iova); 2155 } 2156 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2157 2158 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2159 phys_addr_t paddr, size_t size, size_t *count) 2160 { 2161 unsigned int pgsize_idx, pgsize_idx_next; 2162 unsigned long pgsizes; 2163 size_t offset, pgsize, pgsize_next; 2164 unsigned long addr_merge = paddr | iova; 2165 2166 /* Page sizes supported by the hardware and small enough for @size */ 2167 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2168 2169 /* Constrain the page sizes further based on the maximum alignment */ 2170 if (likely(addr_merge)) 2171 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2172 2173 /* Make sure we have at least one suitable page size */ 2174 BUG_ON(!pgsizes); 2175 2176 /* Pick the biggest page size remaining */ 2177 pgsize_idx = __fls(pgsizes); 2178 pgsize = BIT(pgsize_idx); 2179 if (!count) 2180 return pgsize; 2181 2182 /* Find the next biggest support page size, if it exists */ 2183 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2184 if (!pgsizes) 2185 goto out_set_count; 2186 2187 pgsize_idx_next = __ffs(pgsizes); 2188 pgsize_next = BIT(pgsize_idx_next); 2189 2190 /* 2191 * There's no point trying a bigger page size unless the virtual 2192 * and physical addresses are similarly offset within the larger page. 2193 */ 2194 if ((iova ^ paddr) & (pgsize_next - 1)) 2195 goto out_set_count; 2196 2197 /* Calculate the offset to the next page size alignment boundary */ 2198 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2199 2200 /* 2201 * If size is big enough to accommodate the larger page, reduce 2202 * the number of smaller pages. 2203 */ 2204 if (offset + pgsize_next <= size) 2205 size = offset; 2206 2207 out_set_count: 2208 *count = size >> pgsize_idx; 2209 return pgsize; 2210 } 2211 2212 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2213 phys_addr_t paddr, size_t size, int prot, 2214 gfp_t gfp, size_t *mapped) 2215 { 2216 const struct iommu_domain_ops *ops = domain->ops; 2217 size_t pgsize, count; 2218 int ret; 2219 2220 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2221 2222 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2223 iova, &paddr, pgsize, count); 2224 2225 if (ops->map_pages) { 2226 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2227 gfp, mapped); 2228 } else { 2229 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2230 *mapped = ret ? 0 : pgsize; 2231 } 2232 2233 return ret; 2234 } 2235 2236 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2237 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2238 { 2239 const struct iommu_domain_ops *ops = domain->ops; 2240 unsigned long orig_iova = iova; 2241 unsigned int min_pagesz; 2242 size_t orig_size = size; 2243 phys_addr_t orig_paddr = paddr; 2244 int ret = 0; 2245 2246 if (unlikely(!(ops->map || ops->map_pages) || 2247 domain->pgsize_bitmap == 0UL)) 2248 return -ENODEV; 2249 2250 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2251 return -EINVAL; 2252 2253 /* find out the minimum page size supported */ 2254 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2255 2256 /* 2257 * both the virtual address and the physical one, as well as 2258 * the size of the mapping, must be aligned (at least) to the 2259 * size of the smallest page supported by the hardware 2260 */ 2261 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2262 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2263 iova, &paddr, size, min_pagesz); 2264 return -EINVAL; 2265 } 2266 2267 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2268 2269 while (size) { 2270 size_t mapped = 0; 2271 2272 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2273 &mapped); 2274 /* 2275 * Some pages may have been mapped, even if an error occurred, 2276 * so we should account for those so they can be unmapped. 2277 */ 2278 size -= mapped; 2279 2280 if (ret) 2281 break; 2282 2283 iova += mapped; 2284 paddr += mapped; 2285 } 2286 2287 /* unroll mapping in case something went wrong */ 2288 if (ret) 2289 iommu_unmap(domain, orig_iova, orig_size - size); 2290 else 2291 trace_map(orig_iova, orig_paddr, orig_size); 2292 2293 return ret; 2294 } 2295 2296 static int _iommu_map(struct iommu_domain *domain, unsigned long iova, 2297 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2298 { 2299 const struct iommu_domain_ops *ops = domain->ops; 2300 int ret; 2301 2302 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2303 if (ret == 0 && ops->iotlb_sync_map) 2304 ops->iotlb_sync_map(domain, iova, size); 2305 2306 return ret; 2307 } 2308 2309 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2310 phys_addr_t paddr, size_t size, int prot) 2311 { 2312 might_sleep(); 2313 return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); 2314 } 2315 EXPORT_SYMBOL_GPL(iommu_map); 2316 2317 int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, 2318 phys_addr_t paddr, size_t size, int prot) 2319 { 2320 return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); 2321 } 2322 EXPORT_SYMBOL_GPL(iommu_map_atomic); 2323 2324 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2325 unsigned long iova, size_t size, 2326 struct iommu_iotlb_gather *iotlb_gather) 2327 { 2328 const struct iommu_domain_ops *ops = domain->ops; 2329 size_t pgsize, count; 2330 2331 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2332 return ops->unmap_pages ? 2333 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2334 ops->unmap(domain, iova, pgsize, iotlb_gather); 2335 } 2336 2337 static size_t __iommu_unmap(struct iommu_domain *domain, 2338 unsigned long iova, size_t size, 2339 struct iommu_iotlb_gather *iotlb_gather) 2340 { 2341 const struct iommu_domain_ops *ops = domain->ops; 2342 size_t unmapped_page, unmapped = 0; 2343 unsigned long orig_iova = iova; 2344 unsigned int min_pagesz; 2345 2346 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2347 domain->pgsize_bitmap == 0UL)) 2348 return 0; 2349 2350 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2351 return 0; 2352 2353 /* find out the minimum page size supported */ 2354 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2355 2356 /* 2357 * The virtual address, as well as the size of the mapping, must be 2358 * aligned (at least) to the size of the smallest page supported 2359 * by the hardware 2360 */ 2361 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2362 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2363 iova, size, min_pagesz); 2364 return 0; 2365 } 2366 2367 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2368 2369 /* 2370 * Keep iterating until we either unmap 'size' bytes (or more) 2371 * or we hit an area that isn't mapped. 2372 */ 2373 while (unmapped < size) { 2374 unmapped_page = __iommu_unmap_pages(domain, iova, 2375 size - unmapped, 2376 iotlb_gather); 2377 if (!unmapped_page) 2378 break; 2379 2380 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2381 iova, unmapped_page); 2382 2383 iova += unmapped_page; 2384 unmapped += unmapped_page; 2385 } 2386 2387 trace_unmap(orig_iova, size, unmapped); 2388 return unmapped; 2389 } 2390 2391 size_t iommu_unmap(struct iommu_domain *domain, 2392 unsigned long iova, size_t size) 2393 { 2394 struct iommu_iotlb_gather iotlb_gather; 2395 size_t ret; 2396 2397 iommu_iotlb_gather_init(&iotlb_gather); 2398 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2399 iommu_iotlb_sync(domain, &iotlb_gather); 2400 2401 return ret; 2402 } 2403 EXPORT_SYMBOL_GPL(iommu_unmap); 2404 2405 size_t iommu_unmap_fast(struct iommu_domain *domain, 2406 unsigned long iova, size_t size, 2407 struct iommu_iotlb_gather *iotlb_gather) 2408 { 2409 return __iommu_unmap(domain, iova, size, iotlb_gather); 2410 } 2411 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2412 2413 static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2414 struct scatterlist *sg, unsigned int nents, int prot, 2415 gfp_t gfp) 2416 { 2417 const struct iommu_domain_ops *ops = domain->ops; 2418 size_t len = 0, mapped = 0; 2419 phys_addr_t start; 2420 unsigned int i = 0; 2421 int ret; 2422 2423 while (i <= nents) { 2424 phys_addr_t s_phys = sg_phys(sg); 2425 2426 if (len && s_phys != start + len) { 2427 ret = __iommu_map(domain, iova + mapped, start, 2428 len, prot, gfp); 2429 2430 if (ret) 2431 goto out_err; 2432 2433 mapped += len; 2434 len = 0; 2435 } 2436 2437 if (sg_is_dma_bus_address(sg)) 2438 goto next; 2439 2440 if (len) { 2441 len += sg->length; 2442 } else { 2443 len = sg->length; 2444 start = s_phys; 2445 } 2446 2447 next: 2448 if (++i < nents) 2449 sg = sg_next(sg); 2450 } 2451 2452 if (ops->iotlb_sync_map) 2453 ops->iotlb_sync_map(domain, iova, mapped); 2454 return mapped; 2455 2456 out_err: 2457 /* undo mappings already done */ 2458 iommu_unmap(domain, iova, mapped); 2459 2460 return ret; 2461 } 2462 2463 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2464 struct scatterlist *sg, unsigned int nents, int prot) 2465 { 2466 might_sleep(); 2467 return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL); 2468 } 2469 EXPORT_SYMBOL_GPL(iommu_map_sg); 2470 2471 ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, 2472 struct scatterlist *sg, unsigned int nents, int prot) 2473 { 2474 return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); 2475 } 2476 2477 /** 2478 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2479 * @domain: the iommu domain where the fault has happened 2480 * @dev: the device where the fault has happened 2481 * @iova: the faulting address 2482 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2483 * 2484 * This function should be called by the low-level IOMMU implementations 2485 * whenever IOMMU faults happen, to allow high-level users, that are 2486 * interested in such events, to know about them. 2487 * 2488 * This event may be useful for several possible use cases: 2489 * - mere logging of the event 2490 * - dynamic TLB/PTE loading 2491 * - if restarting of the faulting device is required 2492 * 2493 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2494 * PTE/TLB loading will one day be supported, implementations will be able 2495 * to tell whether it succeeded or not according to this return value). 2496 * 2497 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2498 * (though fault handlers can also return -ENOSYS, in case they want to 2499 * elicit the default behavior of the IOMMU drivers). 2500 */ 2501 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2502 unsigned long iova, int flags) 2503 { 2504 int ret = -ENOSYS; 2505 2506 /* 2507 * if upper layers showed interest and installed a fault handler, 2508 * invoke it. 2509 */ 2510 if (domain->handler) 2511 ret = domain->handler(domain, dev, iova, flags, 2512 domain->handler_token); 2513 2514 trace_io_page_fault(dev, iova, flags); 2515 return ret; 2516 } 2517 EXPORT_SYMBOL_GPL(report_iommu_fault); 2518 2519 static int __init iommu_init(void) 2520 { 2521 iommu_group_kset = kset_create_and_add("iommu_groups", 2522 NULL, kernel_kobj); 2523 BUG_ON(!iommu_group_kset); 2524 2525 iommu_debugfs_setup(); 2526 2527 return 0; 2528 } 2529 core_initcall(iommu_init); 2530 2531 int iommu_enable_nesting(struct iommu_domain *domain) 2532 { 2533 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2534 return -EINVAL; 2535 if (!domain->ops->enable_nesting) 2536 return -EINVAL; 2537 return domain->ops->enable_nesting(domain); 2538 } 2539 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2540 2541 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2542 unsigned long quirk) 2543 { 2544 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2545 return -EINVAL; 2546 if (!domain->ops->set_pgtable_quirks) 2547 return -EINVAL; 2548 return domain->ops->set_pgtable_quirks(domain, quirk); 2549 } 2550 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2551 2552 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2553 { 2554 const struct iommu_ops *ops = dev_iommu_ops(dev); 2555 2556 if (ops->get_resv_regions) 2557 ops->get_resv_regions(dev, list); 2558 } 2559 2560 /** 2561 * iommu_put_resv_regions - release resered regions 2562 * @dev: device for which to free reserved regions 2563 * @list: reserved region list for device 2564 * 2565 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2566 */ 2567 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2568 { 2569 struct iommu_resv_region *entry, *next; 2570 2571 list_for_each_entry_safe(entry, next, list, list) { 2572 if (entry->free) 2573 entry->free(dev, entry); 2574 else 2575 kfree(entry); 2576 } 2577 } 2578 EXPORT_SYMBOL(iommu_put_resv_regions); 2579 2580 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2581 size_t length, int prot, 2582 enum iommu_resv_type type) 2583 { 2584 struct iommu_resv_region *region; 2585 2586 region = kzalloc(sizeof(*region), GFP_KERNEL); 2587 if (!region) 2588 return NULL; 2589 2590 INIT_LIST_HEAD(®ion->list); 2591 region->start = start; 2592 region->length = length; 2593 region->prot = prot; 2594 region->type = type; 2595 return region; 2596 } 2597 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2598 2599 void iommu_set_default_passthrough(bool cmd_line) 2600 { 2601 if (cmd_line) 2602 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2603 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2604 } 2605 2606 void iommu_set_default_translated(bool cmd_line) 2607 { 2608 if (cmd_line) 2609 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2610 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2611 } 2612 2613 bool iommu_default_passthrough(void) 2614 { 2615 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2616 } 2617 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2618 2619 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2620 { 2621 const struct iommu_ops *ops = NULL; 2622 struct iommu_device *iommu; 2623 2624 spin_lock(&iommu_device_lock); 2625 list_for_each_entry(iommu, &iommu_device_list, list) 2626 if (iommu->fwnode == fwnode) { 2627 ops = iommu->ops; 2628 break; 2629 } 2630 spin_unlock(&iommu_device_lock); 2631 return ops; 2632 } 2633 2634 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2635 const struct iommu_ops *ops) 2636 { 2637 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2638 2639 if (fwspec) 2640 return ops == fwspec->ops ? 0 : -EINVAL; 2641 2642 if (!dev_iommu_get(dev)) 2643 return -ENOMEM; 2644 2645 /* Preallocate for the overwhelmingly common case of 1 ID */ 2646 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2647 if (!fwspec) 2648 return -ENOMEM; 2649 2650 of_node_get(to_of_node(iommu_fwnode)); 2651 fwspec->iommu_fwnode = iommu_fwnode; 2652 fwspec->ops = ops; 2653 dev_iommu_fwspec_set(dev, fwspec); 2654 return 0; 2655 } 2656 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2657 2658 void iommu_fwspec_free(struct device *dev) 2659 { 2660 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2661 2662 if (fwspec) { 2663 fwnode_handle_put(fwspec->iommu_fwnode); 2664 kfree(fwspec); 2665 dev_iommu_fwspec_set(dev, NULL); 2666 } 2667 } 2668 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2669 2670 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2671 { 2672 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2673 int i, new_num; 2674 2675 if (!fwspec) 2676 return -EINVAL; 2677 2678 new_num = fwspec->num_ids + num_ids; 2679 if (new_num > 1) { 2680 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2681 GFP_KERNEL); 2682 if (!fwspec) 2683 return -ENOMEM; 2684 2685 dev_iommu_fwspec_set(dev, fwspec); 2686 } 2687 2688 for (i = 0; i < num_ids; i++) 2689 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2690 2691 fwspec->num_ids = new_num; 2692 return 0; 2693 } 2694 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2695 2696 /* 2697 * Per device IOMMU features. 2698 */ 2699 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2700 { 2701 if (dev->iommu && dev->iommu->iommu_dev) { 2702 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2703 2704 if (ops->dev_enable_feat) 2705 return ops->dev_enable_feat(dev, feat); 2706 } 2707 2708 return -ENODEV; 2709 } 2710 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2711 2712 /* 2713 * The device drivers should do the necessary cleanups before calling this. 2714 */ 2715 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2716 { 2717 if (dev->iommu && dev->iommu->iommu_dev) { 2718 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2719 2720 if (ops->dev_disable_feat) 2721 return ops->dev_disable_feat(dev, feat); 2722 } 2723 2724 return -EBUSY; 2725 } 2726 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2727 2728 /** 2729 * iommu_sva_bind_device() - Bind a process address space to a device 2730 * @dev: the device 2731 * @mm: the mm to bind, caller must hold a reference to it 2732 * @drvdata: opaque data pointer to pass to bind callback 2733 * 2734 * Create a bond between device and address space, allowing the device to access 2735 * the mm using the returned PASID. If a bond already exists between @device and 2736 * @mm, it is returned and an additional reference is taken. Caller must call 2737 * iommu_sva_unbind_device() to release each reference. 2738 * 2739 * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to 2740 * initialize the required SVA features. 2741 * 2742 * On error, returns an ERR_PTR value. 2743 */ 2744 struct iommu_sva * 2745 iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) 2746 { 2747 struct iommu_group *group; 2748 struct iommu_sva *handle = ERR_PTR(-EINVAL); 2749 const struct iommu_ops *ops = dev_iommu_ops(dev); 2750 2751 if (!ops->sva_bind) 2752 return ERR_PTR(-ENODEV); 2753 2754 group = iommu_group_get(dev); 2755 if (!group) 2756 return ERR_PTR(-ENODEV); 2757 2758 /* Ensure device count and domain don't change while we're binding */ 2759 mutex_lock(&group->mutex); 2760 2761 /* 2762 * To keep things simple, SVA currently doesn't support IOMMU groups 2763 * with more than one device. Existing SVA-capable systems are not 2764 * affected by the problems that required IOMMU groups (lack of ACS 2765 * isolation, device ID aliasing and other hardware issues). 2766 */ 2767 if (iommu_group_device_count(group) != 1) 2768 goto out_unlock; 2769 2770 handle = ops->sva_bind(dev, mm, drvdata); 2771 2772 out_unlock: 2773 mutex_unlock(&group->mutex); 2774 iommu_group_put(group); 2775 2776 return handle; 2777 } 2778 EXPORT_SYMBOL_GPL(iommu_sva_bind_device); 2779 2780 /** 2781 * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device 2782 * @handle: the handle returned by iommu_sva_bind_device() 2783 * 2784 * Put reference to a bond between device and address space. The device should 2785 * not be issuing any more transaction for this PASID. All outstanding page 2786 * requests for this PASID must have been flushed to the IOMMU. 2787 */ 2788 void iommu_sva_unbind_device(struct iommu_sva *handle) 2789 { 2790 struct iommu_group *group; 2791 struct device *dev = handle->dev; 2792 const struct iommu_ops *ops = dev_iommu_ops(dev); 2793 2794 if (!ops->sva_unbind) 2795 return; 2796 2797 group = iommu_group_get(dev); 2798 if (!group) 2799 return; 2800 2801 mutex_lock(&group->mutex); 2802 ops->sva_unbind(handle); 2803 mutex_unlock(&group->mutex); 2804 2805 iommu_group_put(group); 2806 } 2807 EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); 2808 2809 u32 iommu_sva_get_pasid(struct iommu_sva *handle) 2810 { 2811 const struct iommu_ops *ops = dev_iommu_ops(handle->dev); 2812 2813 if (!ops->sva_get_pasid) 2814 return IOMMU_PASID_INVALID; 2815 2816 return ops->sva_get_pasid(handle); 2817 } 2818 EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); 2819 2820 /* 2821 * Changes the default domain of an iommu group that has *only* one device 2822 * 2823 * @group: The group for which the default domain should be changed 2824 * @prev_dev: The device in the group (this is used to make sure that the device 2825 * hasn't changed after the caller has called this function) 2826 * @type: The type of the new default domain that gets associated with the group 2827 * 2828 * Returns 0 on success and error code on failure 2829 * 2830 * Note: 2831 * 1. Presently, this function is called only when user requests to change the 2832 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2833 * Please take a closer look if intended to use for other purposes. 2834 */ 2835 static int iommu_change_dev_def_domain(struct iommu_group *group, 2836 struct device *prev_dev, int type) 2837 { 2838 struct iommu_domain *prev_dom; 2839 struct group_device *grp_dev; 2840 int ret, dev_def_dom; 2841 struct device *dev; 2842 2843 mutex_lock(&group->mutex); 2844 2845 if (group->default_domain != group->domain) { 2846 dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); 2847 ret = -EBUSY; 2848 goto out; 2849 } 2850 2851 /* 2852 * iommu group wasn't locked while acquiring device lock in 2853 * iommu_group_store_type(). So, make sure that the device count hasn't 2854 * changed while acquiring device lock. 2855 * 2856 * Changing default domain of an iommu group with two or more devices 2857 * isn't supported because there could be a potential deadlock. Consider 2858 * the following scenario. T1 is trying to acquire device locks of all 2859 * the devices in the group and before it could acquire all of them, 2860 * there could be another thread T2 (from different sub-system and use 2861 * case) that has already acquired some of the device locks and might be 2862 * waiting for T1 to release other device locks. 2863 */ 2864 if (iommu_group_device_count(group) != 1) { 2865 dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); 2866 ret = -EINVAL; 2867 goto out; 2868 } 2869 2870 /* Since group has only one device */ 2871 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2872 dev = grp_dev->dev; 2873 2874 if (prev_dev != dev) { 2875 dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); 2876 ret = -EBUSY; 2877 goto out; 2878 } 2879 2880 prev_dom = group->default_domain; 2881 if (!prev_dom) { 2882 ret = -EINVAL; 2883 goto out; 2884 } 2885 2886 dev_def_dom = iommu_get_def_domain_type(dev); 2887 if (!type) { 2888 /* 2889 * If the user hasn't requested any specific type of domain and 2890 * if the device supports both the domains, then default to the 2891 * domain the device was booted with 2892 */ 2893 type = dev_def_dom ? : iommu_def_domain_type; 2894 } else if (dev_def_dom && type != dev_def_dom) { 2895 dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", 2896 iommu_domain_type_str(type)); 2897 ret = -EINVAL; 2898 goto out; 2899 } 2900 2901 /* 2902 * Switch to a new domain only if the requested domain type is different 2903 * from the existing default domain type 2904 */ 2905 if (prev_dom->type == type) { 2906 ret = 0; 2907 goto out; 2908 } 2909 2910 /* We can bring up a flush queue without tearing down the domain */ 2911 if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { 2912 ret = iommu_dma_init_fq(prev_dom); 2913 if (!ret) 2914 prev_dom->type = IOMMU_DOMAIN_DMA_FQ; 2915 goto out; 2916 } 2917 2918 /* Sets group->default_domain to the newly allocated domain */ 2919 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2920 if (ret) 2921 goto out; 2922 2923 ret = iommu_create_device_direct_mappings(group, dev); 2924 if (ret) 2925 goto free_new_domain; 2926 2927 ret = __iommu_attach_device(group->default_domain, dev); 2928 if (ret) 2929 goto free_new_domain; 2930 2931 group->domain = group->default_domain; 2932 2933 /* 2934 * Release the mutex here because ops->probe_finalize() call-back of 2935 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 2936 * in-turn might call back into IOMMU core code, where it tries to take 2937 * group->mutex, resulting in a deadlock. 2938 */ 2939 mutex_unlock(&group->mutex); 2940 2941 /* Make sure dma_ops is appropriatley set */ 2942 iommu_group_do_probe_finalize(dev, group->default_domain); 2943 iommu_domain_free(prev_dom); 2944 return 0; 2945 2946 free_new_domain: 2947 iommu_domain_free(group->default_domain); 2948 group->default_domain = prev_dom; 2949 group->domain = prev_dom; 2950 2951 out: 2952 mutex_unlock(&group->mutex); 2953 2954 return ret; 2955 } 2956 2957 /* 2958 * Changing the default domain through sysfs requires the users to unbind the 2959 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2960 * transition. Return failure if this isn't met. 2961 * 2962 * We need to consider the race between this and the device release path. 2963 * device_lock(dev) is used here to guarantee that the device release path 2964 * will not be entered at the same time. 2965 */ 2966 static ssize_t iommu_group_store_type(struct iommu_group *group, 2967 const char *buf, size_t count) 2968 { 2969 struct group_device *grp_dev; 2970 struct device *dev; 2971 int ret, req_type; 2972 2973 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2974 return -EACCES; 2975 2976 if (WARN_ON(!group) || !group->default_domain) 2977 return -EINVAL; 2978 2979 if (sysfs_streq(buf, "identity")) 2980 req_type = IOMMU_DOMAIN_IDENTITY; 2981 else if (sysfs_streq(buf, "DMA")) 2982 req_type = IOMMU_DOMAIN_DMA; 2983 else if (sysfs_streq(buf, "DMA-FQ")) 2984 req_type = IOMMU_DOMAIN_DMA_FQ; 2985 else if (sysfs_streq(buf, "auto")) 2986 req_type = 0; 2987 else 2988 return -EINVAL; 2989 2990 /* 2991 * Lock/Unlock the group mutex here before device lock to 2992 * 1. Make sure that the iommu group has only one device (this is a 2993 * prerequisite for step 2) 2994 * 2. Get struct *dev which is needed to lock device 2995 */ 2996 mutex_lock(&group->mutex); 2997 if (iommu_group_device_count(group) != 1) { 2998 mutex_unlock(&group->mutex); 2999 pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); 3000 return -EINVAL; 3001 } 3002 3003 /* Since group has only one device */ 3004 grp_dev = list_first_entry(&group->devices, struct group_device, list); 3005 dev = grp_dev->dev; 3006 get_device(dev); 3007 3008 /* 3009 * Don't hold the group mutex because taking group mutex first and then 3010 * the device lock could potentially cause a deadlock as below. Assume 3011 * two threads T1 and T2. T1 is trying to change default domain of an 3012 * iommu group and T2 is trying to hot unplug a device or release [1] VF 3013 * of a PCIe device which is in the same iommu group. T1 takes group 3014 * mutex and before it could take device lock assume T2 has taken device 3015 * lock and is yet to take group mutex. Now, both the threads will be 3016 * waiting for the other thread to release lock. Below, lock order was 3017 * suggested. 3018 * device_lock(dev); 3019 * mutex_lock(&group->mutex); 3020 * iommu_change_dev_def_domain(); 3021 * mutex_unlock(&group->mutex); 3022 * device_unlock(dev); 3023 * 3024 * [1] Typical device release path 3025 * device_lock() from device/driver core code 3026 * -> bus_notifier() 3027 * -> iommu_bus_notifier() 3028 * -> iommu_release_device() 3029 * -> ops->release_device() vendor driver calls back iommu core code 3030 * -> mutex_lock() from iommu core code 3031 */ 3032 mutex_unlock(&group->mutex); 3033 3034 /* Check if the device in the group still has a driver bound to it */ 3035 device_lock(dev); 3036 if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && 3037 group->default_domain->type == IOMMU_DOMAIN_DMA)) { 3038 pr_err_ratelimited("Device is still bound to driver\n"); 3039 ret = -EBUSY; 3040 goto out; 3041 } 3042 3043 ret = iommu_change_dev_def_domain(group, dev, req_type); 3044 ret = ret ?: count; 3045 3046 out: 3047 device_unlock(dev); 3048 put_device(dev); 3049 3050 return ret; 3051 } 3052 3053 static bool iommu_is_default_domain(struct iommu_group *group) 3054 { 3055 if (group->domain == group->default_domain) 3056 return true; 3057 3058 /* 3059 * If the default domain was set to identity and it is still an identity 3060 * domain then we consider this a pass. This happens because of 3061 * amd_iommu_init_device() replacing the default idenytity domain with an 3062 * identity domain that has a different configuration for AMDGPU. 3063 */ 3064 if (group->default_domain && 3065 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3066 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3067 return true; 3068 return false; 3069 } 3070 3071 /** 3072 * iommu_device_use_default_domain() - Device driver wants to handle device 3073 * DMA through the kernel DMA API. 3074 * @dev: The device. 3075 * 3076 * The device driver about to bind @dev wants to do DMA through the kernel 3077 * DMA API. Return 0 if it is allowed, otherwise an error. 3078 */ 3079 int iommu_device_use_default_domain(struct device *dev) 3080 { 3081 struct iommu_group *group = iommu_group_get(dev); 3082 int ret = 0; 3083 3084 if (!group) 3085 return 0; 3086 3087 mutex_lock(&group->mutex); 3088 if (group->owner_cnt) { 3089 if (group->owner || !iommu_is_default_domain(group)) { 3090 ret = -EBUSY; 3091 goto unlock_out; 3092 } 3093 } 3094 3095 group->owner_cnt++; 3096 3097 unlock_out: 3098 mutex_unlock(&group->mutex); 3099 iommu_group_put(group); 3100 3101 return ret; 3102 } 3103 3104 /** 3105 * iommu_device_unuse_default_domain() - Device driver stops handling device 3106 * DMA through the kernel DMA API. 3107 * @dev: The device. 3108 * 3109 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3110 * It must be called after iommu_device_use_default_domain(). 3111 */ 3112 void iommu_device_unuse_default_domain(struct device *dev) 3113 { 3114 struct iommu_group *group = iommu_group_get(dev); 3115 3116 if (!group) 3117 return; 3118 3119 mutex_lock(&group->mutex); 3120 if (!WARN_ON(!group->owner_cnt)) 3121 group->owner_cnt--; 3122 3123 mutex_unlock(&group->mutex); 3124 iommu_group_put(group); 3125 } 3126 3127 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3128 { 3129 struct group_device *dev = 3130 list_first_entry(&group->devices, struct group_device, list); 3131 3132 if (group->blocking_domain) 3133 return 0; 3134 3135 group->blocking_domain = 3136 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3137 if (!group->blocking_domain) { 3138 /* 3139 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3140 * create an empty domain instead. 3141 */ 3142 group->blocking_domain = __iommu_domain_alloc( 3143 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3144 if (!group->blocking_domain) 3145 return -EINVAL; 3146 } 3147 return 0; 3148 } 3149 3150 /** 3151 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3152 * @group: The group. 3153 * @owner: Caller specified pointer. Used for exclusive ownership. 3154 * 3155 * This is to support backward compatibility for vfio which manages 3156 * the dma ownership in iommu_group level. New invocations on this 3157 * interface should be prohibited. 3158 */ 3159 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3160 { 3161 int ret = 0; 3162 3163 mutex_lock(&group->mutex); 3164 if (group->owner_cnt) { 3165 ret = -EPERM; 3166 goto unlock_out; 3167 } else { 3168 if (group->domain && group->domain != group->default_domain) { 3169 ret = -EBUSY; 3170 goto unlock_out; 3171 } 3172 3173 ret = __iommu_group_alloc_blocking_domain(group); 3174 if (ret) 3175 goto unlock_out; 3176 3177 ret = __iommu_group_set_domain(group, group->blocking_domain); 3178 if (ret) 3179 goto unlock_out; 3180 group->owner = owner; 3181 } 3182 3183 group->owner_cnt++; 3184 unlock_out: 3185 mutex_unlock(&group->mutex); 3186 3187 return ret; 3188 } 3189 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3190 3191 /** 3192 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3193 * @group: The group. 3194 * 3195 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3196 */ 3197 void iommu_group_release_dma_owner(struct iommu_group *group) 3198 { 3199 int ret; 3200 3201 mutex_lock(&group->mutex); 3202 if (WARN_ON(!group->owner_cnt || !group->owner)) 3203 goto unlock_out; 3204 3205 group->owner_cnt = 0; 3206 group->owner = NULL; 3207 ret = __iommu_group_set_domain(group, group->default_domain); 3208 WARN(ret, "iommu driver failed to attach the default domain"); 3209 3210 unlock_out: 3211 mutex_unlock(&group->mutex); 3212 } 3213 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3214 3215 /** 3216 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3217 * @group: The group. 3218 * 3219 * This provides status query on a given group. It is racy and only for 3220 * non-binding status reporting. 3221 */ 3222 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3223 { 3224 unsigned int user; 3225 3226 mutex_lock(&group->mutex); 3227 user = group->owner_cnt; 3228 mutex_unlock(&group->mutex); 3229 3230 return user; 3231 } 3232 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3233