1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <linux/cdx/cdx_bus.h> 32 #include <trace/events/iommu.h> 33 #include <linux/sched/mm.h> 34 #include <linux/msi.h> 35 36 #include "dma-iommu.h" 37 38 #include "iommu-sva.h" 39 40 static struct kset *iommu_group_kset; 41 static DEFINE_IDA(iommu_group_ida); 42 43 static unsigned int iommu_def_domain_type __read_mostly; 44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 45 static u32 iommu_cmd_line __read_mostly; 46 47 struct iommu_group { 48 struct kobject kobj; 49 struct kobject *devices_kobj; 50 struct list_head devices; 51 struct xarray pasid_array; 52 struct mutex mutex; 53 void *iommu_data; 54 void (*iommu_data_release)(void *iommu_data); 55 char *name; 56 int id; 57 struct iommu_domain *default_domain; 58 struct iommu_domain *blocking_domain; 59 struct iommu_domain *domain; 60 struct list_head entry; 61 unsigned int owner_cnt; 62 void *owner; 63 }; 64 65 struct group_device { 66 struct list_head list; 67 struct device *dev; 68 char *name; 69 }; 70 71 struct iommu_group_attribute { 72 struct attribute attr; 73 ssize_t (*show)(struct iommu_group *group, char *buf); 74 ssize_t (*store)(struct iommu_group *group, 75 const char *buf, size_t count); 76 }; 77 78 static const char * const iommu_group_resv_type_string[] = { 79 [IOMMU_RESV_DIRECT] = "direct", 80 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 81 [IOMMU_RESV_RESERVED] = "reserved", 82 [IOMMU_RESV_MSI] = "msi", 83 [IOMMU_RESV_SW_MSI] = "msi", 84 }; 85 86 #define IOMMU_CMD_LINE_DMA_API BIT(0) 87 #define IOMMU_CMD_LINE_STRICT BIT(1) 88 89 static int iommu_bus_notifier(struct notifier_block *nb, 90 unsigned long action, void *data); 91 static int iommu_alloc_default_domain(struct iommu_group *group, 92 struct device *dev); 93 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 94 unsigned type); 95 static int __iommu_attach_device(struct iommu_domain *domain, 96 struct device *dev); 97 static int __iommu_attach_group(struct iommu_domain *domain, 98 struct iommu_group *group); 99 static int __iommu_group_set_domain(struct iommu_group *group, 100 struct iommu_domain *new_domain); 101 static int iommu_create_device_direct_mappings(struct iommu_group *group, 102 struct device *dev); 103 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 104 static ssize_t iommu_group_store_type(struct iommu_group *group, 105 const char *buf, size_t count); 106 107 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 108 struct iommu_group_attribute iommu_group_attr_##_name = \ 109 __ATTR(_name, _mode, _show, _store) 110 111 #define to_iommu_group_attr(_attr) \ 112 container_of(_attr, struct iommu_group_attribute, attr) 113 #define to_iommu_group(_kobj) \ 114 container_of(_kobj, struct iommu_group, kobj) 115 116 static LIST_HEAD(iommu_device_list); 117 static DEFINE_SPINLOCK(iommu_device_lock); 118 119 static struct bus_type * const iommu_buses[] = { 120 &platform_bus_type, 121 #ifdef CONFIG_PCI 122 &pci_bus_type, 123 #endif 124 #ifdef CONFIG_ARM_AMBA 125 &amba_bustype, 126 #endif 127 #ifdef CONFIG_FSL_MC_BUS 128 &fsl_mc_bus_type, 129 #endif 130 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 131 &host1x_context_device_bus_type, 132 #endif 133 #ifdef CONFIG_CDX_BUS 134 &cdx_bus_type, 135 #endif 136 }; 137 138 /* 139 * Use a function instead of an array here because the domain-type is a 140 * bit-field, so an array would waste memory. 141 */ 142 static const char *iommu_domain_type_str(unsigned int t) 143 { 144 switch (t) { 145 case IOMMU_DOMAIN_BLOCKED: 146 return "Blocked"; 147 case IOMMU_DOMAIN_IDENTITY: 148 return "Passthrough"; 149 case IOMMU_DOMAIN_UNMANAGED: 150 return "Unmanaged"; 151 case IOMMU_DOMAIN_DMA: 152 case IOMMU_DOMAIN_DMA_FQ: 153 return "Translated"; 154 default: 155 return "Unknown"; 156 } 157 } 158 159 static int __init iommu_subsys_init(void) 160 { 161 struct notifier_block *nb; 162 163 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 164 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 165 iommu_set_default_passthrough(false); 166 else 167 iommu_set_default_translated(false); 168 169 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 170 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 171 iommu_set_default_translated(false); 172 } 173 } 174 175 if (!iommu_default_passthrough() && !iommu_dma_strict) 176 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 177 178 pr_info("Default domain type: %s %s\n", 179 iommu_domain_type_str(iommu_def_domain_type), 180 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 181 "(set via kernel command line)" : ""); 182 183 if (!iommu_default_passthrough()) 184 pr_info("DMA domain TLB invalidation policy: %s mode %s\n", 185 iommu_dma_strict ? "strict" : "lazy", 186 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 187 "(set via kernel command line)" : ""); 188 189 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 190 if (!nb) 191 return -ENOMEM; 192 193 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 194 nb[i].notifier_call = iommu_bus_notifier; 195 bus_register_notifier(iommu_buses[i], &nb[i]); 196 } 197 198 return 0; 199 } 200 subsys_initcall(iommu_subsys_init); 201 202 static int remove_iommu_group(struct device *dev, void *data) 203 { 204 if (dev->iommu && dev->iommu->iommu_dev == data) 205 iommu_release_device(dev); 206 207 return 0; 208 } 209 210 /** 211 * iommu_device_register() - Register an IOMMU hardware instance 212 * @iommu: IOMMU handle for the instance 213 * @ops: IOMMU ops to associate with the instance 214 * @hwdev: (optional) actual instance device, used for fwnode lookup 215 * 216 * Return: 0 on success, or an error. 217 */ 218 int iommu_device_register(struct iommu_device *iommu, 219 const struct iommu_ops *ops, struct device *hwdev) 220 { 221 int err = 0; 222 223 /* We need to be able to take module references appropriately */ 224 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 225 return -EINVAL; 226 /* 227 * Temporarily enforce global restriction to a single driver. This was 228 * already the de-facto behaviour, since any possible combination of 229 * existing drivers would compete for at least the PCI or platform bus. 230 */ 231 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 232 return -EBUSY; 233 234 iommu->ops = ops; 235 if (hwdev) 236 iommu->fwnode = dev_fwnode(hwdev); 237 238 spin_lock(&iommu_device_lock); 239 list_add_tail(&iommu->list, &iommu_device_list); 240 spin_unlock(&iommu_device_lock); 241 242 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 243 iommu_buses[i]->iommu_ops = ops; 244 err = bus_iommu_probe(iommu_buses[i]); 245 } 246 if (err) 247 iommu_device_unregister(iommu); 248 return err; 249 } 250 EXPORT_SYMBOL_GPL(iommu_device_register); 251 252 void iommu_device_unregister(struct iommu_device *iommu) 253 { 254 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 255 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 256 257 spin_lock(&iommu_device_lock); 258 list_del(&iommu->list); 259 spin_unlock(&iommu_device_lock); 260 } 261 EXPORT_SYMBOL_GPL(iommu_device_unregister); 262 263 static struct dev_iommu *dev_iommu_get(struct device *dev) 264 { 265 struct dev_iommu *param = dev->iommu; 266 267 if (param) 268 return param; 269 270 param = kzalloc(sizeof(*param), GFP_KERNEL); 271 if (!param) 272 return NULL; 273 274 mutex_init(¶m->lock); 275 dev->iommu = param; 276 return param; 277 } 278 279 static void dev_iommu_free(struct device *dev) 280 { 281 struct dev_iommu *param = dev->iommu; 282 283 dev->iommu = NULL; 284 if (param->fwspec) { 285 fwnode_handle_put(param->fwspec->iommu_fwnode); 286 kfree(param->fwspec); 287 } 288 kfree(param); 289 } 290 291 static u32 dev_iommu_get_max_pasids(struct device *dev) 292 { 293 u32 max_pasids = 0, bits = 0; 294 int ret; 295 296 if (dev_is_pci(dev)) { 297 ret = pci_max_pasids(to_pci_dev(dev)); 298 if (ret > 0) 299 max_pasids = ret; 300 } else { 301 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 302 if (!ret) 303 max_pasids = 1UL << bits; 304 } 305 306 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 307 } 308 309 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 310 { 311 const struct iommu_ops *ops = dev->bus->iommu_ops; 312 struct iommu_device *iommu_dev; 313 struct iommu_group *group; 314 static DEFINE_MUTEX(iommu_probe_device_lock); 315 int ret; 316 317 if (!ops) 318 return -ENODEV; 319 /* 320 * Serialise to avoid races between IOMMU drivers registering in 321 * parallel and/or the "replay" calls from ACPI/OF code via client 322 * driver probe. Once the latter have been cleaned up we should 323 * probably be able to use device_lock() here to minimise the scope, 324 * but for now enforcing a simple global ordering is fine. 325 */ 326 mutex_lock(&iommu_probe_device_lock); 327 if (!dev_iommu_get(dev)) { 328 ret = -ENOMEM; 329 goto err_unlock; 330 } 331 332 if (!try_module_get(ops->owner)) { 333 ret = -EINVAL; 334 goto err_free; 335 } 336 337 iommu_dev = ops->probe_device(dev); 338 if (IS_ERR(iommu_dev)) { 339 ret = PTR_ERR(iommu_dev); 340 goto out_module_put; 341 } 342 343 dev->iommu->iommu_dev = iommu_dev; 344 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 345 346 group = iommu_group_get_for_dev(dev); 347 if (IS_ERR(group)) { 348 ret = PTR_ERR(group); 349 goto out_release; 350 } 351 352 mutex_lock(&group->mutex); 353 if (group_list && !group->default_domain && list_empty(&group->entry)) 354 list_add_tail(&group->entry, group_list); 355 mutex_unlock(&group->mutex); 356 iommu_group_put(group); 357 358 mutex_unlock(&iommu_probe_device_lock); 359 iommu_device_link(iommu_dev, dev); 360 361 return 0; 362 363 out_release: 364 if (ops->release_device) 365 ops->release_device(dev); 366 367 out_module_put: 368 module_put(ops->owner); 369 370 err_free: 371 dev_iommu_free(dev); 372 373 err_unlock: 374 mutex_unlock(&iommu_probe_device_lock); 375 376 return ret; 377 } 378 379 static bool iommu_is_attach_deferred(struct device *dev) 380 { 381 const struct iommu_ops *ops = dev_iommu_ops(dev); 382 383 if (ops->is_attach_deferred) 384 return ops->is_attach_deferred(dev); 385 386 return false; 387 } 388 389 static int iommu_group_do_dma_first_attach(struct device *dev, void *data) 390 { 391 struct iommu_domain *domain = data; 392 393 lockdep_assert_held(&dev->iommu_group->mutex); 394 395 if (iommu_is_attach_deferred(dev)) { 396 dev->iommu->attach_deferred = 1; 397 return 0; 398 } 399 400 return __iommu_attach_device(domain, dev); 401 } 402 403 int iommu_probe_device(struct device *dev) 404 { 405 const struct iommu_ops *ops; 406 struct iommu_group *group; 407 int ret; 408 409 ret = __iommu_probe_device(dev, NULL); 410 if (ret) 411 goto err_out; 412 413 group = iommu_group_get(dev); 414 if (!group) { 415 ret = -ENODEV; 416 goto err_release; 417 } 418 419 /* 420 * Try to allocate a default domain - needs support from the 421 * IOMMU driver. There are still some drivers which don't 422 * support default domains, so the return value is not yet 423 * checked. 424 */ 425 mutex_lock(&group->mutex); 426 iommu_alloc_default_domain(group, dev); 427 428 /* 429 * If device joined an existing group which has been claimed, don't 430 * attach the default domain. 431 */ 432 if (group->default_domain && !group->owner) { 433 ret = iommu_group_do_dma_first_attach(dev, group->default_domain); 434 if (ret) { 435 mutex_unlock(&group->mutex); 436 iommu_group_put(group); 437 goto err_release; 438 } 439 } 440 441 iommu_create_device_direct_mappings(group, dev); 442 443 mutex_unlock(&group->mutex); 444 iommu_group_put(group); 445 446 ops = dev_iommu_ops(dev); 447 if (ops->probe_finalize) 448 ops->probe_finalize(dev); 449 450 return 0; 451 452 err_release: 453 iommu_release_device(dev); 454 455 err_out: 456 return ret; 457 458 } 459 460 void iommu_release_device(struct device *dev) 461 { 462 const struct iommu_ops *ops; 463 464 if (!dev->iommu) 465 return; 466 467 iommu_device_unlink(dev->iommu->iommu_dev, dev); 468 469 ops = dev_iommu_ops(dev); 470 if (ops->release_device) 471 ops->release_device(dev); 472 473 iommu_group_remove_device(dev); 474 module_put(ops->owner); 475 dev_iommu_free(dev); 476 } 477 478 static int __init iommu_set_def_domain_type(char *str) 479 { 480 bool pt; 481 int ret; 482 483 ret = kstrtobool(str, &pt); 484 if (ret) 485 return ret; 486 487 if (pt) 488 iommu_set_default_passthrough(true); 489 else 490 iommu_set_default_translated(true); 491 492 return 0; 493 } 494 early_param("iommu.passthrough", iommu_set_def_domain_type); 495 496 static int __init iommu_dma_setup(char *str) 497 { 498 int ret = kstrtobool(str, &iommu_dma_strict); 499 500 if (!ret) 501 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 502 return ret; 503 } 504 early_param("iommu.strict", iommu_dma_setup); 505 506 void iommu_set_dma_strict(void) 507 { 508 iommu_dma_strict = true; 509 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 510 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 511 } 512 513 static ssize_t iommu_group_attr_show(struct kobject *kobj, 514 struct attribute *__attr, char *buf) 515 { 516 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 517 struct iommu_group *group = to_iommu_group(kobj); 518 ssize_t ret = -EIO; 519 520 if (attr->show) 521 ret = attr->show(group, buf); 522 return ret; 523 } 524 525 static ssize_t iommu_group_attr_store(struct kobject *kobj, 526 struct attribute *__attr, 527 const char *buf, size_t count) 528 { 529 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 530 struct iommu_group *group = to_iommu_group(kobj); 531 ssize_t ret = -EIO; 532 533 if (attr->store) 534 ret = attr->store(group, buf, count); 535 return ret; 536 } 537 538 static const struct sysfs_ops iommu_group_sysfs_ops = { 539 .show = iommu_group_attr_show, 540 .store = iommu_group_attr_store, 541 }; 542 543 static int iommu_group_create_file(struct iommu_group *group, 544 struct iommu_group_attribute *attr) 545 { 546 return sysfs_create_file(&group->kobj, &attr->attr); 547 } 548 549 static void iommu_group_remove_file(struct iommu_group *group, 550 struct iommu_group_attribute *attr) 551 { 552 sysfs_remove_file(&group->kobj, &attr->attr); 553 } 554 555 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 556 { 557 return sprintf(buf, "%s\n", group->name); 558 } 559 560 /** 561 * iommu_insert_resv_region - Insert a new region in the 562 * list of reserved regions. 563 * @new: new region to insert 564 * @regions: list of regions 565 * 566 * Elements are sorted by start address and overlapping segments 567 * of the same type are merged. 568 */ 569 static int iommu_insert_resv_region(struct iommu_resv_region *new, 570 struct list_head *regions) 571 { 572 struct iommu_resv_region *iter, *tmp, *nr, *top; 573 LIST_HEAD(stack); 574 575 nr = iommu_alloc_resv_region(new->start, new->length, 576 new->prot, new->type, GFP_KERNEL); 577 if (!nr) 578 return -ENOMEM; 579 580 /* First add the new element based on start address sorting */ 581 list_for_each_entry(iter, regions, list) { 582 if (nr->start < iter->start || 583 (nr->start == iter->start && nr->type <= iter->type)) 584 break; 585 } 586 list_add_tail(&nr->list, &iter->list); 587 588 /* Merge overlapping segments of type nr->type in @regions, if any */ 589 list_for_each_entry_safe(iter, tmp, regions, list) { 590 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 591 592 /* no merge needed on elements of different types than @new */ 593 if (iter->type != new->type) { 594 list_move_tail(&iter->list, &stack); 595 continue; 596 } 597 598 /* look for the last stack element of same type as @iter */ 599 list_for_each_entry_reverse(top, &stack, list) 600 if (top->type == iter->type) 601 goto check_overlap; 602 603 list_move_tail(&iter->list, &stack); 604 continue; 605 606 check_overlap: 607 top_end = top->start + top->length - 1; 608 609 if (iter->start > top_end + 1) { 610 list_move_tail(&iter->list, &stack); 611 } else { 612 top->length = max(top_end, iter_end) - top->start + 1; 613 list_del(&iter->list); 614 kfree(iter); 615 } 616 } 617 list_splice(&stack, regions); 618 return 0; 619 } 620 621 static int 622 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 623 struct list_head *group_resv_regions) 624 { 625 struct iommu_resv_region *entry; 626 int ret = 0; 627 628 list_for_each_entry(entry, dev_resv_regions, list) { 629 ret = iommu_insert_resv_region(entry, group_resv_regions); 630 if (ret) 631 break; 632 } 633 return ret; 634 } 635 636 int iommu_get_group_resv_regions(struct iommu_group *group, 637 struct list_head *head) 638 { 639 struct group_device *device; 640 int ret = 0; 641 642 mutex_lock(&group->mutex); 643 list_for_each_entry(device, &group->devices, list) { 644 struct list_head dev_resv_regions; 645 646 /* 647 * Non-API groups still expose reserved_regions in sysfs, 648 * so filter out calls that get here that way. 649 */ 650 if (!device->dev->iommu) 651 break; 652 653 INIT_LIST_HEAD(&dev_resv_regions); 654 iommu_get_resv_regions(device->dev, &dev_resv_regions); 655 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 656 iommu_put_resv_regions(device->dev, &dev_resv_regions); 657 if (ret) 658 break; 659 } 660 mutex_unlock(&group->mutex); 661 return ret; 662 } 663 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 664 665 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 666 char *buf) 667 { 668 struct iommu_resv_region *region, *next; 669 struct list_head group_resv_regions; 670 char *str = buf; 671 672 INIT_LIST_HEAD(&group_resv_regions); 673 iommu_get_group_resv_regions(group, &group_resv_regions); 674 675 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 676 str += sprintf(str, "0x%016llx 0x%016llx %s\n", 677 (long long int)region->start, 678 (long long int)(region->start + 679 region->length - 1), 680 iommu_group_resv_type_string[region->type]); 681 kfree(region); 682 } 683 684 return (str - buf); 685 } 686 687 static ssize_t iommu_group_show_type(struct iommu_group *group, 688 char *buf) 689 { 690 char *type = "unknown\n"; 691 692 mutex_lock(&group->mutex); 693 if (group->default_domain) { 694 switch (group->default_domain->type) { 695 case IOMMU_DOMAIN_BLOCKED: 696 type = "blocked\n"; 697 break; 698 case IOMMU_DOMAIN_IDENTITY: 699 type = "identity\n"; 700 break; 701 case IOMMU_DOMAIN_UNMANAGED: 702 type = "unmanaged\n"; 703 break; 704 case IOMMU_DOMAIN_DMA: 705 type = "DMA\n"; 706 break; 707 case IOMMU_DOMAIN_DMA_FQ: 708 type = "DMA-FQ\n"; 709 break; 710 } 711 } 712 mutex_unlock(&group->mutex); 713 strcpy(buf, type); 714 715 return strlen(type); 716 } 717 718 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 719 720 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 721 iommu_group_show_resv_regions, NULL); 722 723 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 724 iommu_group_store_type); 725 726 static void iommu_group_release(struct kobject *kobj) 727 { 728 struct iommu_group *group = to_iommu_group(kobj); 729 730 pr_debug("Releasing group %d\n", group->id); 731 732 if (group->iommu_data_release) 733 group->iommu_data_release(group->iommu_data); 734 735 ida_free(&iommu_group_ida, group->id); 736 737 if (group->default_domain) 738 iommu_domain_free(group->default_domain); 739 if (group->blocking_domain) 740 iommu_domain_free(group->blocking_domain); 741 742 kfree(group->name); 743 kfree(group); 744 } 745 746 static struct kobj_type iommu_group_ktype = { 747 .sysfs_ops = &iommu_group_sysfs_ops, 748 .release = iommu_group_release, 749 }; 750 751 /** 752 * iommu_group_alloc - Allocate a new group 753 * 754 * This function is called by an iommu driver to allocate a new iommu 755 * group. The iommu group represents the minimum granularity of the iommu. 756 * Upon successful return, the caller holds a reference to the supplied 757 * group in order to hold the group until devices are added. Use 758 * iommu_group_put() to release this extra reference count, allowing the 759 * group to be automatically reclaimed once it has no devices or external 760 * references. 761 */ 762 struct iommu_group *iommu_group_alloc(void) 763 { 764 struct iommu_group *group; 765 int ret; 766 767 group = kzalloc(sizeof(*group), GFP_KERNEL); 768 if (!group) 769 return ERR_PTR(-ENOMEM); 770 771 group->kobj.kset = iommu_group_kset; 772 mutex_init(&group->mutex); 773 INIT_LIST_HEAD(&group->devices); 774 INIT_LIST_HEAD(&group->entry); 775 xa_init(&group->pasid_array); 776 777 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 778 if (ret < 0) { 779 kfree(group); 780 return ERR_PTR(ret); 781 } 782 group->id = ret; 783 784 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 785 NULL, "%d", group->id); 786 if (ret) { 787 kobject_put(&group->kobj); 788 return ERR_PTR(ret); 789 } 790 791 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 792 if (!group->devices_kobj) { 793 kobject_put(&group->kobj); /* triggers .release & free */ 794 return ERR_PTR(-ENOMEM); 795 } 796 797 /* 798 * The devices_kobj holds a reference on the group kobject, so 799 * as long as that exists so will the group. We can therefore 800 * use the devices_kobj for reference counting. 801 */ 802 kobject_put(&group->kobj); 803 804 ret = iommu_group_create_file(group, 805 &iommu_group_attr_reserved_regions); 806 if (ret) { 807 kobject_put(group->devices_kobj); 808 return ERR_PTR(ret); 809 } 810 811 ret = iommu_group_create_file(group, &iommu_group_attr_type); 812 if (ret) { 813 kobject_put(group->devices_kobj); 814 return ERR_PTR(ret); 815 } 816 817 pr_debug("Allocated group %d\n", group->id); 818 819 return group; 820 } 821 EXPORT_SYMBOL_GPL(iommu_group_alloc); 822 823 struct iommu_group *iommu_group_get_by_id(int id) 824 { 825 struct kobject *group_kobj; 826 struct iommu_group *group; 827 const char *name; 828 829 if (!iommu_group_kset) 830 return NULL; 831 832 name = kasprintf(GFP_KERNEL, "%d", id); 833 if (!name) 834 return NULL; 835 836 group_kobj = kset_find_obj(iommu_group_kset, name); 837 kfree(name); 838 839 if (!group_kobj) 840 return NULL; 841 842 group = container_of(group_kobj, struct iommu_group, kobj); 843 BUG_ON(group->id != id); 844 845 kobject_get(group->devices_kobj); 846 kobject_put(&group->kobj); 847 848 return group; 849 } 850 EXPORT_SYMBOL_GPL(iommu_group_get_by_id); 851 852 /** 853 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 854 * @group: the group 855 * 856 * iommu drivers can store data in the group for use when doing iommu 857 * operations. This function provides a way to retrieve it. Caller 858 * should hold a group reference. 859 */ 860 void *iommu_group_get_iommudata(struct iommu_group *group) 861 { 862 return group->iommu_data; 863 } 864 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 865 866 /** 867 * iommu_group_set_iommudata - set iommu_data for a group 868 * @group: the group 869 * @iommu_data: new data 870 * @release: release function for iommu_data 871 * 872 * iommu drivers can store data in the group for use when doing iommu 873 * operations. This function provides a way to set the data after 874 * the group has been allocated. Caller should hold a group reference. 875 */ 876 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 877 void (*release)(void *iommu_data)) 878 { 879 group->iommu_data = iommu_data; 880 group->iommu_data_release = release; 881 } 882 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 883 884 /** 885 * iommu_group_set_name - set name for a group 886 * @group: the group 887 * @name: name 888 * 889 * Allow iommu driver to set a name for a group. When set it will 890 * appear in a name attribute file under the group in sysfs. 891 */ 892 int iommu_group_set_name(struct iommu_group *group, const char *name) 893 { 894 int ret; 895 896 if (group->name) { 897 iommu_group_remove_file(group, &iommu_group_attr_name); 898 kfree(group->name); 899 group->name = NULL; 900 if (!name) 901 return 0; 902 } 903 904 group->name = kstrdup(name, GFP_KERNEL); 905 if (!group->name) 906 return -ENOMEM; 907 908 ret = iommu_group_create_file(group, &iommu_group_attr_name); 909 if (ret) { 910 kfree(group->name); 911 group->name = NULL; 912 return ret; 913 } 914 915 return 0; 916 } 917 EXPORT_SYMBOL_GPL(iommu_group_set_name); 918 919 static int iommu_create_device_direct_mappings(struct iommu_group *group, 920 struct device *dev) 921 { 922 struct iommu_domain *domain = group->default_domain; 923 struct iommu_resv_region *entry; 924 struct list_head mappings; 925 unsigned long pg_size; 926 int ret = 0; 927 928 if (!domain || !iommu_is_dma_domain(domain)) 929 return 0; 930 931 BUG_ON(!domain->pgsize_bitmap); 932 933 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 934 INIT_LIST_HEAD(&mappings); 935 936 iommu_get_resv_regions(dev, &mappings); 937 938 /* We need to consider overlapping regions for different devices */ 939 list_for_each_entry(entry, &mappings, list) { 940 dma_addr_t start, end, addr; 941 size_t map_size = 0; 942 943 start = ALIGN(entry->start, pg_size); 944 end = ALIGN(entry->start + entry->length, pg_size); 945 946 if (entry->type != IOMMU_RESV_DIRECT && 947 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 948 continue; 949 950 for (addr = start; addr <= end; addr += pg_size) { 951 phys_addr_t phys_addr; 952 953 if (addr == end) 954 goto map_end; 955 956 phys_addr = iommu_iova_to_phys(domain, addr); 957 if (!phys_addr) { 958 map_size += pg_size; 959 continue; 960 } 961 962 map_end: 963 if (map_size) { 964 ret = iommu_map(domain, addr - map_size, 965 addr - map_size, map_size, 966 entry->prot, GFP_KERNEL); 967 if (ret) 968 goto out; 969 map_size = 0; 970 } 971 } 972 973 } 974 975 iommu_flush_iotlb_all(domain); 976 977 out: 978 iommu_put_resv_regions(dev, &mappings); 979 980 return ret; 981 } 982 983 /** 984 * iommu_group_add_device - add a device to an iommu group 985 * @group: the group into which to add the device (reference should be held) 986 * @dev: the device 987 * 988 * This function is called by an iommu driver to add a device into a 989 * group. Adding a device increments the group reference count. 990 */ 991 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 992 { 993 int ret, i = 0; 994 struct group_device *device; 995 996 device = kzalloc(sizeof(*device), GFP_KERNEL); 997 if (!device) 998 return -ENOMEM; 999 1000 device->dev = dev; 1001 1002 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1003 if (ret) 1004 goto err_free_device; 1005 1006 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1007 rename: 1008 if (!device->name) { 1009 ret = -ENOMEM; 1010 goto err_remove_link; 1011 } 1012 1013 ret = sysfs_create_link_nowarn(group->devices_kobj, 1014 &dev->kobj, device->name); 1015 if (ret) { 1016 if (ret == -EEXIST && i >= 0) { 1017 /* 1018 * Account for the slim chance of collision 1019 * and append an instance to the name. 1020 */ 1021 kfree(device->name); 1022 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1023 kobject_name(&dev->kobj), i++); 1024 goto rename; 1025 } 1026 goto err_free_name; 1027 } 1028 1029 kobject_get(group->devices_kobj); 1030 1031 dev->iommu_group = group; 1032 1033 mutex_lock(&group->mutex); 1034 list_add_tail(&device->list, &group->devices); 1035 if (group->domain) 1036 ret = iommu_group_do_dma_first_attach(dev, group->domain); 1037 mutex_unlock(&group->mutex); 1038 if (ret) 1039 goto err_put_group; 1040 1041 trace_add_device_to_group(group->id, dev); 1042 1043 dev_info(dev, "Adding to iommu group %d\n", group->id); 1044 1045 return 0; 1046 1047 err_put_group: 1048 mutex_lock(&group->mutex); 1049 list_del(&device->list); 1050 mutex_unlock(&group->mutex); 1051 dev->iommu_group = NULL; 1052 kobject_put(group->devices_kobj); 1053 sysfs_remove_link(group->devices_kobj, device->name); 1054 err_free_name: 1055 kfree(device->name); 1056 err_remove_link: 1057 sysfs_remove_link(&dev->kobj, "iommu_group"); 1058 err_free_device: 1059 kfree(device); 1060 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1061 return ret; 1062 } 1063 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1064 1065 /** 1066 * iommu_group_remove_device - remove a device from it's current group 1067 * @dev: device to be removed 1068 * 1069 * This function is called by an iommu driver to remove the device from 1070 * it's current group. This decrements the iommu group reference count. 1071 */ 1072 void iommu_group_remove_device(struct device *dev) 1073 { 1074 struct iommu_group *group = dev->iommu_group; 1075 struct group_device *tmp_device, *device = NULL; 1076 1077 if (!group) 1078 return; 1079 1080 dev_info(dev, "Removing from iommu group %d\n", group->id); 1081 1082 mutex_lock(&group->mutex); 1083 list_for_each_entry(tmp_device, &group->devices, list) { 1084 if (tmp_device->dev == dev) { 1085 device = tmp_device; 1086 list_del(&device->list); 1087 break; 1088 } 1089 } 1090 mutex_unlock(&group->mutex); 1091 1092 if (!device) 1093 return; 1094 1095 sysfs_remove_link(group->devices_kobj, device->name); 1096 sysfs_remove_link(&dev->kobj, "iommu_group"); 1097 1098 trace_remove_device_from_group(group->id, dev); 1099 1100 kfree(device->name); 1101 kfree(device); 1102 dev->iommu_group = NULL; 1103 kobject_put(group->devices_kobj); 1104 } 1105 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1106 1107 static int iommu_group_device_count(struct iommu_group *group) 1108 { 1109 struct group_device *entry; 1110 int ret = 0; 1111 1112 list_for_each_entry(entry, &group->devices, list) 1113 ret++; 1114 1115 return ret; 1116 } 1117 1118 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1119 int (*fn)(struct device *, void *)) 1120 { 1121 struct group_device *device; 1122 int ret = 0; 1123 1124 list_for_each_entry(device, &group->devices, list) { 1125 ret = fn(device->dev, data); 1126 if (ret) 1127 break; 1128 } 1129 return ret; 1130 } 1131 1132 /** 1133 * iommu_group_for_each_dev - iterate over each device in the group 1134 * @group: the group 1135 * @data: caller opaque data to be passed to callback function 1136 * @fn: caller supplied callback function 1137 * 1138 * This function is called by group users to iterate over group devices. 1139 * Callers should hold a reference count to the group during callback. 1140 * The group->mutex is held across callbacks, which will block calls to 1141 * iommu_group_add/remove_device. 1142 */ 1143 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1144 int (*fn)(struct device *, void *)) 1145 { 1146 int ret; 1147 1148 mutex_lock(&group->mutex); 1149 ret = __iommu_group_for_each_dev(group, data, fn); 1150 mutex_unlock(&group->mutex); 1151 1152 return ret; 1153 } 1154 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1155 1156 /** 1157 * iommu_group_get - Return the group for a device and increment reference 1158 * @dev: get the group that this device belongs to 1159 * 1160 * This function is called by iommu drivers and users to get the group 1161 * for the specified device. If found, the group is returned and the group 1162 * reference in incremented, else NULL. 1163 */ 1164 struct iommu_group *iommu_group_get(struct device *dev) 1165 { 1166 struct iommu_group *group = dev->iommu_group; 1167 1168 if (group) 1169 kobject_get(group->devices_kobj); 1170 1171 return group; 1172 } 1173 EXPORT_SYMBOL_GPL(iommu_group_get); 1174 1175 /** 1176 * iommu_group_ref_get - Increment reference on a group 1177 * @group: the group to use, must not be NULL 1178 * 1179 * This function is called by iommu drivers to take additional references on an 1180 * existing group. Returns the given group for convenience. 1181 */ 1182 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1183 { 1184 kobject_get(group->devices_kobj); 1185 return group; 1186 } 1187 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1188 1189 /** 1190 * iommu_group_put - Decrement group reference 1191 * @group: the group to use 1192 * 1193 * This function is called by iommu drivers and users to release the 1194 * iommu group. Once the reference count is zero, the group is released. 1195 */ 1196 void iommu_group_put(struct iommu_group *group) 1197 { 1198 if (group) 1199 kobject_put(group->devices_kobj); 1200 } 1201 EXPORT_SYMBOL_GPL(iommu_group_put); 1202 1203 /** 1204 * iommu_register_device_fault_handler() - Register a device fault handler 1205 * @dev: the device 1206 * @handler: the fault handler 1207 * @data: private data passed as argument to the handler 1208 * 1209 * When an IOMMU fault event is received, this handler gets called with the 1210 * fault event and data as argument. The handler should return 0 on success. If 1211 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1212 * complete the fault by calling iommu_page_response() with one of the following 1213 * response code: 1214 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1215 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1216 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1217 * page faults if possible. 1218 * 1219 * Return 0 if the fault handler was installed successfully, or an error. 1220 */ 1221 int iommu_register_device_fault_handler(struct device *dev, 1222 iommu_dev_fault_handler_t handler, 1223 void *data) 1224 { 1225 struct dev_iommu *param = dev->iommu; 1226 int ret = 0; 1227 1228 if (!param) 1229 return -EINVAL; 1230 1231 mutex_lock(¶m->lock); 1232 /* Only allow one fault handler registered for each device */ 1233 if (param->fault_param) { 1234 ret = -EBUSY; 1235 goto done_unlock; 1236 } 1237 1238 get_device(dev); 1239 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1240 if (!param->fault_param) { 1241 put_device(dev); 1242 ret = -ENOMEM; 1243 goto done_unlock; 1244 } 1245 param->fault_param->handler = handler; 1246 param->fault_param->data = data; 1247 mutex_init(¶m->fault_param->lock); 1248 INIT_LIST_HEAD(¶m->fault_param->faults); 1249 1250 done_unlock: 1251 mutex_unlock(¶m->lock); 1252 1253 return ret; 1254 } 1255 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1256 1257 /** 1258 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1259 * @dev: the device 1260 * 1261 * Remove the device fault handler installed with 1262 * iommu_register_device_fault_handler(). 1263 * 1264 * Return 0 on success, or an error. 1265 */ 1266 int iommu_unregister_device_fault_handler(struct device *dev) 1267 { 1268 struct dev_iommu *param = dev->iommu; 1269 int ret = 0; 1270 1271 if (!param) 1272 return -EINVAL; 1273 1274 mutex_lock(¶m->lock); 1275 1276 if (!param->fault_param) 1277 goto unlock; 1278 1279 /* we cannot unregister handler if there are pending faults */ 1280 if (!list_empty(¶m->fault_param->faults)) { 1281 ret = -EBUSY; 1282 goto unlock; 1283 } 1284 1285 kfree(param->fault_param); 1286 param->fault_param = NULL; 1287 put_device(dev); 1288 unlock: 1289 mutex_unlock(¶m->lock); 1290 1291 return ret; 1292 } 1293 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1294 1295 /** 1296 * iommu_report_device_fault() - Report fault event to device driver 1297 * @dev: the device 1298 * @evt: fault event data 1299 * 1300 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1301 * handler. When this function fails and the fault is recoverable, it is the 1302 * caller's responsibility to complete the fault. 1303 * 1304 * Return 0 on success, or an error. 1305 */ 1306 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1307 { 1308 struct dev_iommu *param = dev->iommu; 1309 struct iommu_fault_event *evt_pending = NULL; 1310 struct iommu_fault_param *fparam; 1311 int ret = 0; 1312 1313 if (!param || !evt) 1314 return -EINVAL; 1315 1316 /* we only report device fault if there is a handler registered */ 1317 mutex_lock(¶m->lock); 1318 fparam = param->fault_param; 1319 if (!fparam || !fparam->handler) { 1320 ret = -EINVAL; 1321 goto done_unlock; 1322 } 1323 1324 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1325 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1326 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1327 GFP_KERNEL); 1328 if (!evt_pending) { 1329 ret = -ENOMEM; 1330 goto done_unlock; 1331 } 1332 mutex_lock(&fparam->lock); 1333 list_add_tail(&evt_pending->list, &fparam->faults); 1334 mutex_unlock(&fparam->lock); 1335 } 1336 1337 ret = fparam->handler(&evt->fault, fparam->data); 1338 if (ret && evt_pending) { 1339 mutex_lock(&fparam->lock); 1340 list_del(&evt_pending->list); 1341 mutex_unlock(&fparam->lock); 1342 kfree(evt_pending); 1343 } 1344 done_unlock: 1345 mutex_unlock(¶m->lock); 1346 return ret; 1347 } 1348 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1349 1350 int iommu_page_response(struct device *dev, 1351 struct iommu_page_response *msg) 1352 { 1353 bool needs_pasid; 1354 int ret = -EINVAL; 1355 struct iommu_fault_event *evt; 1356 struct iommu_fault_page_request *prm; 1357 struct dev_iommu *param = dev->iommu; 1358 const struct iommu_ops *ops = dev_iommu_ops(dev); 1359 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1360 1361 if (!ops->page_response) 1362 return -ENODEV; 1363 1364 if (!param || !param->fault_param) 1365 return -EINVAL; 1366 1367 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1368 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1369 return -EINVAL; 1370 1371 /* Only send response if there is a fault report pending */ 1372 mutex_lock(¶m->fault_param->lock); 1373 if (list_empty(¶m->fault_param->faults)) { 1374 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1375 goto done_unlock; 1376 } 1377 /* 1378 * Check if we have a matching page request pending to respond, 1379 * otherwise return -EINVAL 1380 */ 1381 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1382 prm = &evt->fault.prm; 1383 if (prm->grpid != msg->grpid) 1384 continue; 1385 1386 /* 1387 * If the PASID is required, the corresponding request is 1388 * matched using the group ID, the PASID valid bit and the PASID 1389 * value. Otherwise only the group ID matches request and 1390 * response. 1391 */ 1392 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1393 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1394 continue; 1395 1396 if (!needs_pasid && has_pasid) { 1397 /* No big deal, just clear it. */ 1398 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1399 msg->pasid = 0; 1400 } 1401 1402 ret = ops->page_response(dev, evt, msg); 1403 list_del(&evt->list); 1404 kfree(evt); 1405 break; 1406 } 1407 1408 done_unlock: 1409 mutex_unlock(¶m->fault_param->lock); 1410 return ret; 1411 } 1412 EXPORT_SYMBOL_GPL(iommu_page_response); 1413 1414 /** 1415 * iommu_group_id - Return ID for a group 1416 * @group: the group to ID 1417 * 1418 * Return the unique ID for the group matching the sysfs group number. 1419 */ 1420 int iommu_group_id(struct iommu_group *group) 1421 { 1422 return group->id; 1423 } 1424 EXPORT_SYMBOL_GPL(iommu_group_id); 1425 1426 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1427 unsigned long *devfns); 1428 1429 /* 1430 * To consider a PCI device isolated, we require ACS to support Source 1431 * Validation, Request Redirection, Completer Redirection, and Upstream 1432 * Forwarding. This effectively means that devices cannot spoof their 1433 * requester ID, requests and completions cannot be redirected, and all 1434 * transactions are forwarded upstream, even as it passes through a 1435 * bridge where the target device is downstream. 1436 */ 1437 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1438 1439 /* 1440 * For multifunction devices which are not isolated from each other, find 1441 * all the other non-isolated functions and look for existing groups. For 1442 * each function, we also need to look for aliases to or from other devices 1443 * that may already have a group. 1444 */ 1445 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1446 unsigned long *devfns) 1447 { 1448 struct pci_dev *tmp = NULL; 1449 struct iommu_group *group; 1450 1451 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1452 return NULL; 1453 1454 for_each_pci_dev(tmp) { 1455 if (tmp == pdev || tmp->bus != pdev->bus || 1456 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1457 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1458 continue; 1459 1460 group = get_pci_alias_group(tmp, devfns); 1461 if (group) { 1462 pci_dev_put(tmp); 1463 return group; 1464 } 1465 } 1466 1467 return NULL; 1468 } 1469 1470 /* 1471 * Look for aliases to or from the given device for existing groups. DMA 1472 * aliases are only supported on the same bus, therefore the search 1473 * space is quite small (especially since we're really only looking at pcie 1474 * device, and therefore only expect multiple slots on the root complex or 1475 * downstream switch ports). It's conceivable though that a pair of 1476 * multifunction devices could have aliases between them that would cause a 1477 * loop. To prevent this, we use a bitmap to track where we've been. 1478 */ 1479 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1480 unsigned long *devfns) 1481 { 1482 struct pci_dev *tmp = NULL; 1483 struct iommu_group *group; 1484 1485 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1486 return NULL; 1487 1488 group = iommu_group_get(&pdev->dev); 1489 if (group) 1490 return group; 1491 1492 for_each_pci_dev(tmp) { 1493 if (tmp == pdev || tmp->bus != pdev->bus) 1494 continue; 1495 1496 /* We alias them or they alias us */ 1497 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1498 group = get_pci_alias_group(tmp, devfns); 1499 if (group) { 1500 pci_dev_put(tmp); 1501 return group; 1502 } 1503 1504 group = get_pci_function_alias_group(tmp, devfns); 1505 if (group) { 1506 pci_dev_put(tmp); 1507 return group; 1508 } 1509 } 1510 } 1511 1512 return NULL; 1513 } 1514 1515 struct group_for_pci_data { 1516 struct pci_dev *pdev; 1517 struct iommu_group *group; 1518 }; 1519 1520 /* 1521 * DMA alias iterator callback, return the last seen device. Stop and return 1522 * the IOMMU group if we find one along the way. 1523 */ 1524 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1525 { 1526 struct group_for_pci_data *data = opaque; 1527 1528 data->pdev = pdev; 1529 data->group = iommu_group_get(&pdev->dev); 1530 1531 return data->group != NULL; 1532 } 1533 1534 /* 1535 * Generic device_group call-back function. It just allocates one 1536 * iommu-group per device. 1537 */ 1538 struct iommu_group *generic_device_group(struct device *dev) 1539 { 1540 return iommu_group_alloc(); 1541 } 1542 EXPORT_SYMBOL_GPL(generic_device_group); 1543 1544 /* 1545 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1546 * to find or create an IOMMU group for a device. 1547 */ 1548 struct iommu_group *pci_device_group(struct device *dev) 1549 { 1550 struct pci_dev *pdev = to_pci_dev(dev); 1551 struct group_for_pci_data data; 1552 struct pci_bus *bus; 1553 struct iommu_group *group = NULL; 1554 u64 devfns[4] = { 0 }; 1555 1556 if (WARN_ON(!dev_is_pci(dev))) 1557 return ERR_PTR(-EINVAL); 1558 1559 /* 1560 * Find the upstream DMA alias for the device. A device must not 1561 * be aliased due to topology in order to have its own IOMMU group. 1562 * If we find an alias along the way that already belongs to a 1563 * group, use it. 1564 */ 1565 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1566 return data.group; 1567 1568 pdev = data.pdev; 1569 1570 /* 1571 * Continue upstream from the point of minimum IOMMU granularity 1572 * due to aliases to the point where devices are protected from 1573 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1574 * group, use it. 1575 */ 1576 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1577 if (!bus->self) 1578 continue; 1579 1580 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1581 break; 1582 1583 pdev = bus->self; 1584 1585 group = iommu_group_get(&pdev->dev); 1586 if (group) 1587 return group; 1588 } 1589 1590 /* 1591 * Look for existing groups on device aliases. If we alias another 1592 * device or another device aliases us, use the same group. 1593 */ 1594 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1595 if (group) 1596 return group; 1597 1598 /* 1599 * Look for existing groups on non-isolated functions on the same 1600 * slot and aliases of those funcions, if any. No need to clear 1601 * the search bitmap, the tested devfns are still valid. 1602 */ 1603 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1604 if (group) 1605 return group; 1606 1607 /* No shared group found, allocate new */ 1608 return iommu_group_alloc(); 1609 } 1610 EXPORT_SYMBOL_GPL(pci_device_group); 1611 1612 /* Get the IOMMU group for device on fsl-mc bus */ 1613 struct iommu_group *fsl_mc_device_group(struct device *dev) 1614 { 1615 struct device *cont_dev = fsl_mc_cont_dev(dev); 1616 struct iommu_group *group; 1617 1618 group = iommu_group_get(cont_dev); 1619 if (!group) 1620 group = iommu_group_alloc(); 1621 return group; 1622 } 1623 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1624 1625 static int iommu_get_def_domain_type(struct device *dev) 1626 { 1627 const struct iommu_ops *ops = dev_iommu_ops(dev); 1628 1629 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1630 return IOMMU_DOMAIN_DMA; 1631 1632 if (ops->def_domain_type) 1633 return ops->def_domain_type(dev); 1634 1635 return 0; 1636 } 1637 1638 static int iommu_group_alloc_default_domain(struct bus_type *bus, 1639 struct iommu_group *group, 1640 unsigned int type) 1641 { 1642 struct iommu_domain *dom; 1643 1644 dom = __iommu_domain_alloc(bus, type); 1645 if (!dom && type != IOMMU_DOMAIN_DMA) { 1646 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1647 if (dom) 1648 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1649 type, group->name); 1650 } 1651 1652 if (!dom) 1653 return -ENOMEM; 1654 1655 group->default_domain = dom; 1656 if (!group->domain) 1657 group->domain = dom; 1658 return 0; 1659 } 1660 1661 static int iommu_alloc_default_domain(struct iommu_group *group, 1662 struct device *dev) 1663 { 1664 unsigned int type; 1665 1666 if (group->default_domain) 1667 return 0; 1668 1669 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1670 1671 return iommu_group_alloc_default_domain(dev->bus, group, type); 1672 } 1673 1674 /** 1675 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1676 * @dev: target device 1677 * 1678 * This function is intended to be called by IOMMU drivers and extended to 1679 * support common, bus-defined algorithms when determining or creating the 1680 * IOMMU group for a device. On success, the caller will hold a reference 1681 * to the returned IOMMU group, which will already include the provided 1682 * device. The reference should be released with iommu_group_put(). 1683 */ 1684 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1685 { 1686 const struct iommu_ops *ops = dev_iommu_ops(dev); 1687 struct iommu_group *group; 1688 int ret; 1689 1690 group = iommu_group_get(dev); 1691 if (group) 1692 return group; 1693 1694 group = ops->device_group(dev); 1695 if (WARN_ON_ONCE(group == NULL)) 1696 return ERR_PTR(-EINVAL); 1697 1698 if (IS_ERR(group)) 1699 return group; 1700 1701 ret = iommu_group_add_device(group, dev); 1702 if (ret) 1703 goto out_put_group; 1704 1705 return group; 1706 1707 out_put_group: 1708 iommu_group_put(group); 1709 1710 return ERR_PTR(ret); 1711 } 1712 1713 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1714 { 1715 return group->default_domain; 1716 } 1717 1718 static int probe_iommu_group(struct device *dev, void *data) 1719 { 1720 struct list_head *group_list = data; 1721 struct iommu_group *group; 1722 int ret; 1723 1724 /* Device is probed already if in a group */ 1725 group = iommu_group_get(dev); 1726 if (group) { 1727 iommu_group_put(group); 1728 return 0; 1729 } 1730 1731 ret = __iommu_probe_device(dev, group_list); 1732 if (ret == -ENODEV) 1733 ret = 0; 1734 1735 return ret; 1736 } 1737 1738 static int iommu_bus_notifier(struct notifier_block *nb, 1739 unsigned long action, void *data) 1740 { 1741 struct device *dev = data; 1742 1743 if (action == BUS_NOTIFY_ADD_DEVICE) { 1744 int ret; 1745 1746 ret = iommu_probe_device(dev); 1747 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1748 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1749 iommu_release_device(dev); 1750 return NOTIFY_OK; 1751 } 1752 1753 return 0; 1754 } 1755 1756 struct __group_domain_type { 1757 struct device *dev; 1758 unsigned int type; 1759 }; 1760 1761 static int probe_get_default_domain_type(struct device *dev, void *data) 1762 { 1763 struct __group_domain_type *gtype = data; 1764 unsigned int type = iommu_get_def_domain_type(dev); 1765 1766 if (type) { 1767 if (gtype->type && gtype->type != type) { 1768 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1769 iommu_domain_type_str(type), 1770 dev_name(gtype->dev), 1771 iommu_domain_type_str(gtype->type)); 1772 gtype->type = 0; 1773 } 1774 1775 if (!gtype->dev) { 1776 gtype->dev = dev; 1777 gtype->type = type; 1778 } 1779 } 1780 1781 return 0; 1782 } 1783 1784 static void probe_alloc_default_domain(struct bus_type *bus, 1785 struct iommu_group *group) 1786 { 1787 struct __group_domain_type gtype; 1788 1789 memset(>ype, 0, sizeof(gtype)); 1790 1791 /* Ask for default domain requirements of all devices in the group */ 1792 __iommu_group_for_each_dev(group, >ype, 1793 probe_get_default_domain_type); 1794 1795 if (!gtype.type) 1796 gtype.type = iommu_def_domain_type; 1797 1798 iommu_group_alloc_default_domain(bus, group, gtype.type); 1799 1800 } 1801 1802 static int __iommu_group_dma_first_attach(struct iommu_group *group) 1803 { 1804 return __iommu_group_for_each_dev(group, group->default_domain, 1805 iommu_group_do_dma_first_attach); 1806 } 1807 1808 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1809 { 1810 const struct iommu_ops *ops = dev_iommu_ops(dev); 1811 1812 if (ops->probe_finalize) 1813 ops->probe_finalize(dev); 1814 1815 return 0; 1816 } 1817 1818 static void __iommu_group_dma_finalize(struct iommu_group *group) 1819 { 1820 __iommu_group_for_each_dev(group, group->default_domain, 1821 iommu_group_do_probe_finalize); 1822 } 1823 1824 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1825 { 1826 struct iommu_group *group = data; 1827 1828 iommu_create_device_direct_mappings(group, dev); 1829 1830 return 0; 1831 } 1832 1833 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1834 { 1835 return __iommu_group_for_each_dev(group, group, 1836 iommu_do_create_direct_mappings); 1837 } 1838 1839 int bus_iommu_probe(struct bus_type *bus) 1840 { 1841 struct iommu_group *group, *next; 1842 LIST_HEAD(group_list); 1843 int ret; 1844 1845 /* 1846 * This code-path does not allocate the default domain when 1847 * creating the iommu group, so do it after the groups are 1848 * created. 1849 */ 1850 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1851 if (ret) 1852 return ret; 1853 1854 list_for_each_entry_safe(group, next, &group_list, entry) { 1855 mutex_lock(&group->mutex); 1856 1857 /* Remove item from the list */ 1858 list_del_init(&group->entry); 1859 1860 /* Try to allocate default domain */ 1861 probe_alloc_default_domain(bus, group); 1862 1863 if (!group->default_domain) { 1864 mutex_unlock(&group->mutex); 1865 continue; 1866 } 1867 1868 iommu_group_create_direct_mappings(group); 1869 1870 ret = __iommu_group_dma_first_attach(group); 1871 1872 mutex_unlock(&group->mutex); 1873 1874 if (ret) 1875 break; 1876 1877 __iommu_group_dma_finalize(group); 1878 } 1879 1880 return ret; 1881 } 1882 1883 bool iommu_present(struct bus_type *bus) 1884 { 1885 return bus->iommu_ops != NULL; 1886 } 1887 EXPORT_SYMBOL_GPL(iommu_present); 1888 1889 /** 1890 * device_iommu_capable() - check for a general IOMMU capability 1891 * @dev: device to which the capability would be relevant, if available 1892 * @cap: IOMMU capability 1893 * 1894 * Return: true if an IOMMU is present and supports the given capability 1895 * for the given device, otherwise false. 1896 */ 1897 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1898 { 1899 const struct iommu_ops *ops; 1900 1901 if (!dev->iommu || !dev->iommu->iommu_dev) 1902 return false; 1903 1904 ops = dev_iommu_ops(dev); 1905 if (!ops->capable) 1906 return false; 1907 1908 return ops->capable(dev, cap); 1909 } 1910 EXPORT_SYMBOL_GPL(device_iommu_capable); 1911 1912 /** 1913 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1914 * for a group 1915 * @group: Group to query 1916 * 1917 * IOMMU groups should not have differing values of 1918 * msi_device_has_isolated_msi() for devices in a group. However nothing 1919 * directly prevents this, so ensure mistakes don't result in isolation failures 1920 * by checking that all the devices are the same. 1921 */ 1922 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1923 { 1924 struct group_device *group_dev; 1925 bool ret = true; 1926 1927 mutex_lock(&group->mutex); 1928 list_for_each_entry(group_dev, &group->devices, list) 1929 ret &= msi_device_has_isolated_msi(group_dev->dev); 1930 mutex_unlock(&group->mutex); 1931 return ret; 1932 } 1933 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1934 1935 /** 1936 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1937 * @domain: iommu domain 1938 * @handler: fault handler 1939 * @token: user data, will be passed back to the fault handler 1940 * 1941 * This function should be used by IOMMU users which want to be notified 1942 * whenever an IOMMU fault happens. 1943 * 1944 * The fault handler itself should return 0 on success, and an appropriate 1945 * error code otherwise. 1946 */ 1947 void iommu_set_fault_handler(struct iommu_domain *domain, 1948 iommu_fault_handler_t handler, 1949 void *token) 1950 { 1951 BUG_ON(!domain); 1952 1953 domain->handler = handler; 1954 domain->handler_token = token; 1955 } 1956 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1957 1958 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 1959 unsigned type) 1960 { 1961 struct iommu_domain *domain; 1962 1963 if (bus == NULL || bus->iommu_ops == NULL) 1964 return NULL; 1965 1966 domain = bus->iommu_ops->domain_alloc(type); 1967 if (!domain) 1968 return NULL; 1969 1970 domain->type = type; 1971 /* Assume all sizes by default; the driver may override this later */ 1972 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1973 if (!domain->ops) 1974 domain->ops = bus->iommu_ops->default_domain_ops; 1975 1976 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1977 iommu_domain_free(domain); 1978 domain = NULL; 1979 } 1980 return domain; 1981 } 1982 1983 struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) 1984 { 1985 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 1986 } 1987 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 1988 1989 void iommu_domain_free(struct iommu_domain *domain) 1990 { 1991 if (domain->type == IOMMU_DOMAIN_SVA) 1992 mmdrop(domain->mm); 1993 iommu_put_dma_cookie(domain); 1994 domain->ops->free(domain); 1995 } 1996 EXPORT_SYMBOL_GPL(iommu_domain_free); 1997 1998 /* 1999 * Put the group's domain back to the appropriate core-owned domain - either the 2000 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2001 */ 2002 static void __iommu_group_set_core_domain(struct iommu_group *group) 2003 { 2004 struct iommu_domain *new_domain; 2005 int ret; 2006 2007 if (group->owner) 2008 new_domain = group->blocking_domain; 2009 else 2010 new_domain = group->default_domain; 2011 2012 ret = __iommu_group_set_domain(group, new_domain); 2013 WARN(ret, "iommu driver failed to attach the default/blocking domain"); 2014 } 2015 2016 static int __iommu_attach_device(struct iommu_domain *domain, 2017 struct device *dev) 2018 { 2019 int ret; 2020 2021 if (unlikely(domain->ops->attach_dev == NULL)) 2022 return -ENODEV; 2023 2024 ret = domain->ops->attach_dev(domain, dev); 2025 if (ret) 2026 return ret; 2027 dev->iommu->attach_deferred = 0; 2028 trace_attach_device_to_domain(dev); 2029 return 0; 2030 } 2031 2032 /** 2033 * iommu_attach_device - Attach an IOMMU domain to a device 2034 * @domain: IOMMU domain to attach 2035 * @dev: Device that will be attached 2036 * 2037 * Returns 0 on success and error code on failure 2038 * 2039 * Note that EINVAL can be treated as a soft failure, indicating 2040 * that certain configuration of the domain is incompatible with 2041 * the device. In this case attaching a different domain to the 2042 * device may succeed. 2043 */ 2044 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2045 { 2046 struct iommu_group *group; 2047 int ret; 2048 2049 group = iommu_group_get(dev); 2050 if (!group) 2051 return -ENODEV; 2052 2053 /* 2054 * Lock the group to make sure the device-count doesn't 2055 * change while we are attaching 2056 */ 2057 mutex_lock(&group->mutex); 2058 ret = -EINVAL; 2059 if (iommu_group_device_count(group) != 1) 2060 goto out_unlock; 2061 2062 ret = __iommu_attach_group(domain, group); 2063 2064 out_unlock: 2065 mutex_unlock(&group->mutex); 2066 iommu_group_put(group); 2067 2068 return ret; 2069 } 2070 EXPORT_SYMBOL_GPL(iommu_attach_device); 2071 2072 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2073 { 2074 if (dev->iommu && dev->iommu->attach_deferred) 2075 return __iommu_attach_device(domain, dev); 2076 2077 return 0; 2078 } 2079 2080 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2081 { 2082 struct iommu_group *group; 2083 2084 group = iommu_group_get(dev); 2085 if (!group) 2086 return; 2087 2088 mutex_lock(&group->mutex); 2089 if (WARN_ON(domain != group->domain) || 2090 WARN_ON(iommu_group_device_count(group) != 1)) 2091 goto out_unlock; 2092 __iommu_group_set_core_domain(group); 2093 2094 out_unlock: 2095 mutex_unlock(&group->mutex); 2096 iommu_group_put(group); 2097 } 2098 EXPORT_SYMBOL_GPL(iommu_detach_device); 2099 2100 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2101 { 2102 struct iommu_domain *domain; 2103 struct iommu_group *group; 2104 2105 group = iommu_group_get(dev); 2106 if (!group) 2107 return NULL; 2108 2109 domain = group->domain; 2110 2111 iommu_group_put(group); 2112 2113 return domain; 2114 } 2115 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2116 2117 /* 2118 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2119 * guarantees that the group and its default domain are valid and correct. 2120 */ 2121 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2122 { 2123 return dev->iommu_group->default_domain; 2124 } 2125 2126 /* 2127 * IOMMU groups are really the natural working unit of the IOMMU, but 2128 * the IOMMU API works on domains and devices. Bridge that gap by 2129 * iterating over the devices in a group. Ideally we'd have a single 2130 * device which represents the requestor ID of the group, but we also 2131 * allow IOMMU drivers to create policy defined minimum sets, where 2132 * the physical hardware may be able to distiguish members, but we 2133 * wish to group them at a higher level (ex. untrusted multi-function 2134 * PCI devices). Thus we attach each device. 2135 */ 2136 static int iommu_group_do_attach_device(struct device *dev, void *data) 2137 { 2138 struct iommu_domain *domain = data; 2139 2140 return __iommu_attach_device(domain, dev); 2141 } 2142 2143 static int __iommu_attach_group(struct iommu_domain *domain, 2144 struct iommu_group *group) 2145 { 2146 int ret; 2147 2148 if (group->domain && group->domain != group->default_domain && 2149 group->domain != group->blocking_domain) 2150 return -EBUSY; 2151 2152 ret = __iommu_group_for_each_dev(group, domain, 2153 iommu_group_do_attach_device); 2154 if (ret == 0) { 2155 group->domain = domain; 2156 } else { 2157 /* 2158 * To recover from the case when certain device within the 2159 * group fails to attach to the new domain, we need force 2160 * attaching all devices back to the old domain. The old 2161 * domain is compatible for all devices in the group, 2162 * hence the iommu driver should always return success. 2163 */ 2164 struct iommu_domain *old_domain = group->domain; 2165 2166 group->domain = NULL; 2167 WARN(__iommu_group_set_domain(group, old_domain), 2168 "iommu driver failed to attach a compatible domain"); 2169 } 2170 2171 return ret; 2172 } 2173 2174 /** 2175 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2176 * @domain: IOMMU domain to attach 2177 * @group: IOMMU group that will be attached 2178 * 2179 * Returns 0 on success and error code on failure 2180 * 2181 * Note that EINVAL can be treated as a soft failure, indicating 2182 * that certain configuration of the domain is incompatible with 2183 * the group. In this case attaching a different domain to the 2184 * group may succeed. 2185 */ 2186 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2187 { 2188 int ret; 2189 2190 mutex_lock(&group->mutex); 2191 ret = __iommu_attach_group(domain, group); 2192 mutex_unlock(&group->mutex); 2193 2194 return ret; 2195 } 2196 EXPORT_SYMBOL_GPL(iommu_attach_group); 2197 2198 static int iommu_group_do_set_platform_dma(struct device *dev, void *data) 2199 { 2200 const struct iommu_ops *ops = dev_iommu_ops(dev); 2201 2202 if (!WARN_ON(!ops->set_platform_dma_ops)) 2203 ops->set_platform_dma_ops(dev); 2204 2205 return 0; 2206 } 2207 2208 static int __iommu_group_set_domain(struct iommu_group *group, 2209 struct iommu_domain *new_domain) 2210 { 2211 int ret; 2212 2213 if (group->domain == new_domain) 2214 return 0; 2215 2216 /* 2217 * New drivers should support default domains, so set_platform_dma() 2218 * op will never be called. Otherwise the NULL domain represents some 2219 * platform specific behavior. 2220 */ 2221 if (!new_domain) { 2222 __iommu_group_for_each_dev(group, NULL, 2223 iommu_group_do_set_platform_dma); 2224 group->domain = NULL; 2225 return 0; 2226 } 2227 2228 /* 2229 * Changing the domain is done by calling attach_dev() on the new 2230 * domain. This switch does not have to be atomic and DMA can be 2231 * discarded during the transition. DMA must only be able to access 2232 * either new_domain or group->domain, never something else. 2233 * 2234 * Note that this is called in error unwind paths, attaching to a 2235 * domain that has already been attached cannot fail. 2236 */ 2237 ret = __iommu_group_for_each_dev(group, new_domain, 2238 iommu_group_do_attach_device); 2239 if (ret) 2240 return ret; 2241 group->domain = new_domain; 2242 return 0; 2243 } 2244 2245 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2246 { 2247 mutex_lock(&group->mutex); 2248 __iommu_group_set_core_domain(group); 2249 mutex_unlock(&group->mutex); 2250 } 2251 EXPORT_SYMBOL_GPL(iommu_detach_group); 2252 2253 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2254 { 2255 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2256 return iova; 2257 2258 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2259 return 0; 2260 2261 return domain->ops->iova_to_phys(domain, iova); 2262 } 2263 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2264 2265 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2266 phys_addr_t paddr, size_t size, size_t *count) 2267 { 2268 unsigned int pgsize_idx, pgsize_idx_next; 2269 unsigned long pgsizes; 2270 size_t offset, pgsize, pgsize_next; 2271 unsigned long addr_merge = paddr | iova; 2272 2273 /* Page sizes supported by the hardware and small enough for @size */ 2274 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2275 2276 /* Constrain the page sizes further based on the maximum alignment */ 2277 if (likely(addr_merge)) 2278 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2279 2280 /* Make sure we have at least one suitable page size */ 2281 BUG_ON(!pgsizes); 2282 2283 /* Pick the biggest page size remaining */ 2284 pgsize_idx = __fls(pgsizes); 2285 pgsize = BIT(pgsize_idx); 2286 if (!count) 2287 return pgsize; 2288 2289 /* Find the next biggest support page size, if it exists */ 2290 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2291 if (!pgsizes) 2292 goto out_set_count; 2293 2294 pgsize_idx_next = __ffs(pgsizes); 2295 pgsize_next = BIT(pgsize_idx_next); 2296 2297 /* 2298 * There's no point trying a bigger page size unless the virtual 2299 * and physical addresses are similarly offset within the larger page. 2300 */ 2301 if ((iova ^ paddr) & (pgsize_next - 1)) 2302 goto out_set_count; 2303 2304 /* Calculate the offset to the next page size alignment boundary */ 2305 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2306 2307 /* 2308 * If size is big enough to accommodate the larger page, reduce 2309 * the number of smaller pages. 2310 */ 2311 if (offset + pgsize_next <= size) 2312 size = offset; 2313 2314 out_set_count: 2315 *count = size >> pgsize_idx; 2316 return pgsize; 2317 } 2318 2319 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2320 phys_addr_t paddr, size_t size, int prot, 2321 gfp_t gfp, size_t *mapped) 2322 { 2323 const struct iommu_domain_ops *ops = domain->ops; 2324 size_t pgsize, count; 2325 int ret; 2326 2327 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2328 2329 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2330 iova, &paddr, pgsize, count); 2331 2332 if (ops->map_pages) { 2333 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2334 gfp, mapped); 2335 } else { 2336 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2337 *mapped = ret ? 0 : pgsize; 2338 } 2339 2340 return ret; 2341 } 2342 2343 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2344 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2345 { 2346 const struct iommu_domain_ops *ops = domain->ops; 2347 unsigned long orig_iova = iova; 2348 unsigned int min_pagesz; 2349 size_t orig_size = size; 2350 phys_addr_t orig_paddr = paddr; 2351 int ret = 0; 2352 2353 if (unlikely(!(ops->map || ops->map_pages) || 2354 domain->pgsize_bitmap == 0UL)) 2355 return -ENODEV; 2356 2357 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2358 return -EINVAL; 2359 2360 /* find out the minimum page size supported */ 2361 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2362 2363 /* 2364 * both the virtual address and the physical one, as well as 2365 * the size of the mapping, must be aligned (at least) to the 2366 * size of the smallest page supported by the hardware 2367 */ 2368 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2369 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2370 iova, &paddr, size, min_pagesz); 2371 return -EINVAL; 2372 } 2373 2374 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2375 2376 while (size) { 2377 size_t mapped = 0; 2378 2379 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2380 &mapped); 2381 /* 2382 * Some pages may have been mapped, even if an error occurred, 2383 * so we should account for those so they can be unmapped. 2384 */ 2385 size -= mapped; 2386 2387 if (ret) 2388 break; 2389 2390 iova += mapped; 2391 paddr += mapped; 2392 } 2393 2394 /* unroll mapping in case something went wrong */ 2395 if (ret) 2396 iommu_unmap(domain, orig_iova, orig_size - size); 2397 else 2398 trace_map(orig_iova, orig_paddr, orig_size); 2399 2400 return ret; 2401 } 2402 2403 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2404 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2405 { 2406 const struct iommu_domain_ops *ops = domain->ops; 2407 int ret; 2408 2409 might_sleep_if(gfpflags_allow_blocking(gfp)); 2410 2411 /* Discourage passing strange GFP flags */ 2412 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2413 __GFP_HIGHMEM))) 2414 return -EINVAL; 2415 2416 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2417 if (ret == 0 && ops->iotlb_sync_map) 2418 ops->iotlb_sync_map(domain, iova, size); 2419 2420 return ret; 2421 } 2422 EXPORT_SYMBOL_GPL(iommu_map); 2423 2424 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2425 unsigned long iova, size_t size, 2426 struct iommu_iotlb_gather *iotlb_gather) 2427 { 2428 const struct iommu_domain_ops *ops = domain->ops; 2429 size_t pgsize, count; 2430 2431 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2432 return ops->unmap_pages ? 2433 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2434 ops->unmap(domain, iova, pgsize, iotlb_gather); 2435 } 2436 2437 static size_t __iommu_unmap(struct iommu_domain *domain, 2438 unsigned long iova, size_t size, 2439 struct iommu_iotlb_gather *iotlb_gather) 2440 { 2441 const struct iommu_domain_ops *ops = domain->ops; 2442 size_t unmapped_page, unmapped = 0; 2443 unsigned long orig_iova = iova; 2444 unsigned int min_pagesz; 2445 2446 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2447 domain->pgsize_bitmap == 0UL)) 2448 return 0; 2449 2450 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2451 return 0; 2452 2453 /* find out the minimum page size supported */ 2454 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2455 2456 /* 2457 * The virtual address, as well as the size of the mapping, must be 2458 * aligned (at least) to the size of the smallest page supported 2459 * by the hardware 2460 */ 2461 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2462 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2463 iova, size, min_pagesz); 2464 return 0; 2465 } 2466 2467 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2468 2469 /* 2470 * Keep iterating until we either unmap 'size' bytes (or more) 2471 * or we hit an area that isn't mapped. 2472 */ 2473 while (unmapped < size) { 2474 unmapped_page = __iommu_unmap_pages(domain, iova, 2475 size - unmapped, 2476 iotlb_gather); 2477 if (!unmapped_page) 2478 break; 2479 2480 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2481 iova, unmapped_page); 2482 2483 iova += unmapped_page; 2484 unmapped += unmapped_page; 2485 } 2486 2487 trace_unmap(orig_iova, size, unmapped); 2488 return unmapped; 2489 } 2490 2491 size_t iommu_unmap(struct iommu_domain *domain, 2492 unsigned long iova, size_t size) 2493 { 2494 struct iommu_iotlb_gather iotlb_gather; 2495 size_t ret; 2496 2497 iommu_iotlb_gather_init(&iotlb_gather); 2498 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2499 iommu_iotlb_sync(domain, &iotlb_gather); 2500 2501 return ret; 2502 } 2503 EXPORT_SYMBOL_GPL(iommu_unmap); 2504 2505 size_t iommu_unmap_fast(struct iommu_domain *domain, 2506 unsigned long iova, size_t size, 2507 struct iommu_iotlb_gather *iotlb_gather) 2508 { 2509 return __iommu_unmap(domain, iova, size, iotlb_gather); 2510 } 2511 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2512 2513 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2514 struct scatterlist *sg, unsigned int nents, int prot, 2515 gfp_t gfp) 2516 { 2517 const struct iommu_domain_ops *ops = domain->ops; 2518 size_t len = 0, mapped = 0; 2519 phys_addr_t start; 2520 unsigned int i = 0; 2521 int ret; 2522 2523 might_sleep_if(gfpflags_allow_blocking(gfp)); 2524 2525 /* Discourage passing strange GFP flags */ 2526 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2527 __GFP_HIGHMEM))) 2528 return -EINVAL; 2529 2530 while (i <= nents) { 2531 phys_addr_t s_phys = sg_phys(sg); 2532 2533 if (len && s_phys != start + len) { 2534 ret = __iommu_map(domain, iova + mapped, start, 2535 len, prot, gfp); 2536 2537 if (ret) 2538 goto out_err; 2539 2540 mapped += len; 2541 len = 0; 2542 } 2543 2544 if (sg_is_dma_bus_address(sg)) 2545 goto next; 2546 2547 if (len) { 2548 len += sg->length; 2549 } else { 2550 len = sg->length; 2551 start = s_phys; 2552 } 2553 2554 next: 2555 if (++i < nents) 2556 sg = sg_next(sg); 2557 } 2558 2559 if (ops->iotlb_sync_map) 2560 ops->iotlb_sync_map(domain, iova, mapped); 2561 return mapped; 2562 2563 out_err: 2564 /* undo mappings already done */ 2565 iommu_unmap(domain, iova, mapped); 2566 2567 return ret; 2568 } 2569 EXPORT_SYMBOL_GPL(iommu_map_sg); 2570 2571 /** 2572 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2573 * @domain: the iommu domain where the fault has happened 2574 * @dev: the device where the fault has happened 2575 * @iova: the faulting address 2576 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2577 * 2578 * This function should be called by the low-level IOMMU implementations 2579 * whenever IOMMU faults happen, to allow high-level users, that are 2580 * interested in such events, to know about them. 2581 * 2582 * This event may be useful for several possible use cases: 2583 * - mere logging of the event 2584 * - dynamic TLB/PTE loading 2585 * - if restarting of the faulting device is required 2586 * 2587 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2588 * PTE/TLB loading will one day be supported, implementations will be able 2589 * to tell whether it succeeded or not according to this return value). 2590 * 2591 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2592 * (though fault handlers can also return -ENOSYS, in case they want to 2593 * elicit the default behavior of the IOMMU drivers). 2594 */ 2595 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2596 unsigned long iova, int flags) 2597 { 2598 int ret = -ENOSYS; 2599 2600 /* 2601 * if upper layers showed interest and installed a fault handler, 2602 * invoke it. 2603 */ 2604 if (domain->handler) 2605 ret = domain->handler(domain, dev, iova, flags, 2606 domain->handler_token); 2607 2608 trace_io_page_fault(dev, iova, flags); 2609 return ret; 2610 } 2611 EXPORT_SYMBOL_GPL(report_iommu_fault); 2612 2613 static int __init iommu_init(void) 2614 { 2615 iommu_group_kset = kset_create_and_add("iommu_groups", 2616 NULL, kernel_kobj); 2617 BUG_ON(!iommu_group_kset); 2618 2619 iommu_debugfs_setup(); 2620 2621 return 0; 2622 } 2623 core_initcall(iommu_init); 2624 2625 int iommu_enable_nesting(struct iommu_domain *domain) 2626 { 2627 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2628 return -EINVAL; 2629 if (!domain->ops->enable_nesting) 2630 return -EINVAL; 2631 return domain->ops->enable_nesting(domain); 2632 } 2633 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2634 2635 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2636 unsigned long quirk) 2637 { 2638 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2639 return -EINVAL; 2640 if (!domain->ops->set_pgtable_quirks) 2641 return -EINVAL; 2642 return domain->ops->set_pgtable_quirks(domain, quirk); 2643 } 2644 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2645 2646 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2647 { 2648 const struct iommu_ops *ops = dev_iommu_ops(dev); 2649 2650 if (ops->get_resv_regions) 2651 ops->get_resv_regions(dev, list); 2652 } 2653 2654 /** 2655 * iommu_put_resv_regions - release resered regions 2656 * @dev: device for which to free reserved regions 2657 * @list: reserved region list for device 2658 * 2659 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2660 */ 2661 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2662 { 2663 struct iommu_resv_region *entry, *next; 2664 2665 list_for_each_entry_safe(entry, next, list, list) { 2666 if (entry->free) 2667 entry->free(dev, entry); 2668 else 2669 kfree(entry); 2670 } 2671 } 2672 EXPORT_SYMBOL(iommu_put_resv_regions); 2673 2674 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2675 size_t length, int prot, 2676 enum iommu_resv_type type, 2677 gfp_t gfp) 2678 { 2679 struct iommu_resv_region *region; 2680 2681 region = kzalloc(sizeof(*region), gfp); 2682 if (!region) 2683 return NULL; 2684 2685 INIT_LIST_HEAD(®ion->list); 2686 region->start = start; 2687 region->length = length; 2688 region->prot = prot; 2689 region->type = type; 2690 return region; 2691 } 2692 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2693 2694 void iommu_set_default_passthrough(bool cmd_line) 2695 { 2696 if (cmd_line) 2697 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2698 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2699 } 2700 2701 void iommu_set_default_translated(bool cmd_line) 2702 { 2703 if (cmd_line) 2704 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2705 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2706 } 2707 2708 bool iommu_default_passthrough(void) 2709 { 2710 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2711 } 2712 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2713 2714 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2715 { 2716 const struct iommu_ops *ops = NULL; 2717 struct iommu_device *iommu; 2718 2719 spin_lock(&iommu_device_lock); 2720 list_for_each_entry(iommu, &iommu_device_list, list) 2721 if (iommu->fwnode == fwnode) { 2722 ops = iommu->ops; 2723 break; 2724 } 2725 spin_unlock(&iommu_device_lock); 2726 return ops; 2727 } 2728 2729 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2730 const struct iommu_ops *ops) 2731 { 2732 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2733 2734 if (fwspec) 2735 return ops == fwspec->ops ? 0 : -EINVAL; 2736 2737 if (!dev_iommu_get(dev)) 2738 return -ENOMEM; 2739 2740 /* Preallocate for the overwhelmingly common case of 1 ID */ 2741 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2742 if (!fwspec) 2743 return -ENOMEM; 2744 2745 of_node_get(to_of_node(iommu_fwnode)); 2746 fwspec->iommu_fwnode = iommu_fwnode; 2747 fwspec->ops = ops; 2748 dev_iommu_fwspec_set(dev, fwspec); 2749 return 0; 2750 } 2751 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2752 2753 void iommu_fwspec_free(struct device *dev) 2754 { 2755 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2756 2757 if (fwspec) { 2758 fwnode_handle_put(fwspec->iommu_fwnode); 2759 kfree(fwspec); 2760 dev_iommu_fwspec_set(dev, NULL); 2761 } 2762 } 2763 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2764 2765 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2766 { 2767 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2768 int i, new_num; 2769 2770 if (!fwspec) 2771 return -EINVAL; 2772 2773 new_num = fwspec->num_ids + num_ids; 2774 if (new_num > 1) { 2775 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2776 GFP_KERNEL); 2777 if (!fwspec) 2778 return -ENOMEM; 2779 2780 dev_iommu_fwspec_set(dev, fwspec); 2781 } 2782 2783 for (i = 0; i < num_ids; i++) 2784 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2785 2786 fwspec->num_ids = new_num; 2787 return 0; 2788 } 2789 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2790 2791 /* 2792 * Per device IOMMU features. 2793 */ 2794 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2795 { 2796 if (dev->iommu && dev->iommu->iommu_dev) { 2797 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2798 2799 if (ops->dev_enable_feat) 2800 return ops->dev_enable_feat(dev, feat); 2801 } 2802 2803 return -ENODEV; 2804 } 2805 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2806 2807 /* 2808 * The device drivers should do the necessary cleanups before calling this. 2809 */ 2810 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2811 { 2812 if (dev->iommu && dev->iommu->iommu_dev) { 2813 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2814 2815 if (ops->dev_disable_feat) 2816 return ops->dev_disable_feat(dev, feat); 2817 } 2818 2819 return -EBUSY; 2820 } 2821 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2822 2823 /* 2824 * Changes the default domain of an iommu group that has *only* one device 2825 * 2826 * @group: The group for which the default domain should be changed 2827 * @prev_dev: The device in the group (this is used to make sure that the device 2828 * hasn't changed after the caller has called this function) 2829 * @type: The type of the new default domain that gets associated with the group 2830 * 2831 * Returns 0 on success and error code on failure 2832 * 2833 * Note: 2834 * 1. Presently, this function is called only when user requests to change the 2835 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2836 * Please take a closer look if intended to use for other purposes. 2837 */ 2838 static int iommu_change_dev_def_domain(struct iommu_group *group, 2839 struct device *prev_dev, int type) 2840 { 2841 struct iommu_domain *prev_dom; 2842 struct group_device *grp_dev; 2843 int ret, dev_def_dom; 2844 struct device *dev; 2845 2846 mutex_lock(&group->mutex); 2847 2848 if (group->default_domain != group->domain) { 2849 dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); 2850 ret = -EBUSY; 2851 goto out; 2852 } 2853 2854 /* 2855 * iommu group wasn't locked while acquiring device lock in 2856 * iommu_group_store_type(). So, make sure that the device count hasn't 2857 * changed while acquiring device lock. 2858 * 2859 * Changing default domain of an iommu group with two or more devices 2860 * isn't supported because there could be a potential deadlock. Consider 2861 * the following scenario. T1 is trying to acquire device locks of all 2862 * the devices in the group and before it could acquire all of them, 2863 * there could be another thread T2 (from different sub-system and use 2864 * case) that has already acquired some of the device locks and might be 2865 * waiting for T1 to release other device locks. 2866 */ 2867 if (iommu_group_device_count(group) != 1) { 2868 dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); 2869 ret = -EINVAL; 2870 goto out; 2871 } 2872 2873 /* Since group has only one device */ 2874 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2875 dev = grp_dev->dev; 2876 2877 if (prev_dev != dev) { 2878 dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); 2879 ret = -EBUSY; 2880 goto out; 2881 } 2882 2883 prev_dom = group->default_domain; 2884 if (!prev_dom) { 2885 ret = -EINVAL; 2886 goto out; 2887 } 2888 2889 dev_def_dom = iommu_get_def_domain_type(dev); 2890 if (!type) { 2891 /* 2892 * If the user hasn't requested any specific type of domain and 2893 * if the device supports both the domains, then default to the 2894 * domain the device was booted with 2895 */ 2896 type = dev_def_dom ? : iommu_def_domain_type; 2897 } else if (dev_def_dom && type != dev_def_dom) { 2898 dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", 2899 iommu_domain_type_str(type)); 2900 ret = -EINVAL; 2901 goto out; 2902 } 2903 2904 /* 2905 * Switch to a new domain only if the requested domain type is different 2906 * from the existing default domain type 2907 */ 2908 if (prev_dom->type == type) { 2909 ret = 0; 2910 goto out; 2911 } 2912 2913 /* We can bring up a flush queue without tearing down the domain */ 2914 if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { 2915 ret = iommu_dma_init_fq(prev_dom); 2916 if (!ret) 2917 prev_dom->type = IOMMU_DOMAIN_DMA_FQ; 2918 goto out; 2919 } 2920 2921 /* Sets group->default_domain to the newly allocated domain */ 2922 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2923 if (ret) 2924 goto out; 2925 2926 ret = iommu_create_device_direct_mappings(group, dev); 2927 if (ret) 2928 goto free_new_domain; 2929 2930 ret = __iommu_attach_device(group->default_domain, dev); 2931 if (ret) 2932 goto free_new_domain; 2933 2934 group->domain = group->default_domain; 2935 2936 /* 2937 * Release the mutex here because ops->probe_finalize() call-back of 2938 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 2939 * in-turn might call back into IOMMU core code, where it tries to take 2940 * group->mutex, resulting in a deadlock. 2941 */ 2942 mutex_unlock(&group->mutex); 2943 2944 /* Make sure dma_ops is appropriatley set */ 2945 iommu_group_do_probe_finalize(dev, group->default_domain); 2946 iommu_domain_free(prev_dom); 2947 return 0; 2948 2949 free_new_domain: 2950 iommu_domain_free(group->default_domain); 2951 group->default_domain = prev_dom; 2952 group->domain = prev_dom; 2953 2954 out: 2955 mutex_unlock(&group->mutex); 2956 2957 return ret; 2958 } 2959 2960 /* 2961 * Changing the default domain through sysfs requires the users to unbind the 2962 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2963 * transition. Return failure if this isn't met. 2964 * 2965 * We need to consider the race between this and the device release path. 2966 * device_lock(dev) is used here to guarantee that the device release path 2967 * will not be entered at the same time. 2968 */ 2969 static ssize_t iommu_group_store_type(struct iommu_group *group, 2970 const char *buf, size_t count) 2971 { 2972 struct group_device *grp_dev; 2973 struct device *dev; 2974 int ret, req_type; 2975 2976 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2977 return -EACCES; 2978 2979 if (WARN_ON(!group) || !group->default_domain) 2980 return -EINVAL; 2981 2982 if (sysfs_streq(buf, "identity")) 2983 req_type = IOMMU_DOMAIN_IDENTITY; 2984 else if (sysfs_streq(buf, "DMA")) 2985 req_type = IOMMU_DOMAIN_DMA; 2986 else if (sysfs_streq(buf, "DMA-FQ")) 2987 req_type = IOMMU_DOMAIN_DMA_FQ; 2988 else if (sysfs_streq(buf, "auto")) 2989 req_type = 0; 2990 else 2991 return -EINVAL; 2992 2993 /* 2994 * Lock/Unlock the group mutex here before device lock to 2995 * 1. Make sure that the iommu group has only one device (this is a 2996 * prerequisite for step 2) 2997 * 2. Get struct *dev which is needed to lock device 2998 */ 2999 mutex_lock(&group->mutex); 3000 if (iommu_group_device_count(group) != 1) { 3001 mutex_unlock(&group->mutex); 3002 pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); 3003 return -EINVAL; 3004 } 3005 3006 /* Since group has only one device */ 3007 grp_dev = list_first_entry(&group->devices, struct group_device, list); 3008 dev = grp_dev->dev; 3009 get_device(dev); 3010 3011 /* 3012 * Don't hold the group mutex because taking group mutex first and then 3013 * the device lock could potentially cause a deadlock as below. Assume 3014 * two threads T1 and T2. T1 is trying to change default domain of an 3015 * iommu group and T2 is trying to hot unplug a device or release [1] VF 3016 * of a PCIe device which is in the same iommu group. T1 takes group 3017 * mutex and before it could take device lock assume T2 has taken device 3018 * lock and is yet to take group mutex. Now, both the threads will be 3019 * waiting for the other thread to release lock. Below, lock order was 3020 * suggested. 3021 * device_lock(dev); 3022 * mutex_lock(&group->mutex); 3023 * iommu_change_dev_def_domain(); 3024 * mutex_unlock(&group->mutex); 3025 * device_unlock(dev); 3026 * 3027 * [1] Typical device release path 3028 * device_lock() from device/driver core code 3029 * -> bus_notifier() 3030 * -> iommu_bus_notifier() 3031 * -> iommu_release_device() 3032 * -> ops->release_device() vendor driver calls back iommu core code 3033 * -> mutex_lock() from iommu core code 3034 */ 3035 mutex_unlock(&group->mutex); 3036 3037 /* Check if the device in the group still has a driver bound to it */ 3038 device_lock(dev); 3039 if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && 3040 group->default_domain->type == IOMMU_DOMAIN_DMA)) { 3041 pr_err_ratelimited("Device is still bound to driver\n"); 3042 ret = -EBUSY; 3043 goto out; 3044 } 3045 3046 ret = iommu_change_dev_def_domain(group, dev, req_type); 3047 ret = ret ?: count; 3048 3049 out: 3050 device_unlock(dev); 3051 put_device(dev); 3052 3053 return ret; 3054 } 3055 3056 static bool iommu_is_default_domain(struct iommu_group *group) 3057 { 3058 if (group->domain == group->default_domain) 3059 return true; 3060 3061 /* 3062 * If the default domain was set to identity and it is still an identity 3063 * domain then we consider this a pass. This happens because of 3064 * amd_iommu_init_device() replacing the default idenytity domain with an 3065 * identity domain that has a different configuration for AMDGPU. 3066 */ 3067 if (group->default_domain && 3068 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3069 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3070 return true; 3071 return false; 3072 } 3073 3074 /** 3075 * iommu_device_use_default_domain() - Device driver wants to handle device 3076 * DMA through the kernel DMA API. 3077 * @dev: The device. 3078 * 3079 * The device driver about to bind @dev wants to do DMA through the kernel 3080 * DMA API. Return 0 if it is allowed, otherwise an error. 3081 */ 3082 int iommu_device_use_default_domain(struct device *dev) 3083 { 3084 struct iommu_group *group = iommu_group_get(dev); 3085 int ret = 0; 3086 3087 if (!group) 3088 return 0; 3089 3090 mutex_lock(&group->mutex); 3091 if (group->owner_cnt) { 3092 if (group->owner || !iommu_is_default_domain(group) || 3093 !xa_empty(&group->pasid_array)) { 3094 ret = -EBUSY; 3095 goto unlock_out; 3096 } 3097 } 3098 3099 group->owner_cnt++; 3100 3101 unlock_out: 3102 mutex_unlock(&group->mutex); 3103 iommu_group_put(group); 3104 3105 return ret; 3106 } 3107 3108 /** 3109 * iommu_device_unuse_default_domain() - Device driver stops handling device 3110 * DMA through the kernel DMA API. 3111 * @dev: The device. 3112 * 3113 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3114 * It must be called after iommu_device_use_default_domain(). 3115 */ 3116 void iommu_device_unuse_default_domain(struct device *dev) 3117 { 3118 struct iommu_group *group = iommu_group_get(dev); 3119 3120 if (!group) 3121 return; 3122 3123 mutex_lock(&group->mutex); 3124 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3125 group->owner_cnt--; 3126 3127 mutex_unlock(&group->mutex); 3128 iommu_group_put(group); 3129 } 3130 3131 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3132 { 3133 struct group_device *dev = 3134 list_first_entry(&group->devices, struct group_device, list); 3135 3136 if (group->blocking_domain) 3137 return 0; 3138 3139 group->blocking_domain = 3140 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3141 if (!group->blocking_domain) { 3142 /* 3143 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3144 * create an empty domain instead. 3145 */ 3146 group->blocking_domain = __iommu_domain_alloc( 3147 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3148 if (!group->blocking_domain) 3149 return -EINVAL; 3150 } 3151 return 0; 3152 } 3153 3154 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3155 { 3156 int ret; 3157 3158 if ((group->domain && group->domain != group->default_domain) || 3159 !xa_empty(&group->pasid_array)) 3160 return -EBUSY; 3161 3162 ret = __iommu_group_alloc_blocking_domain(group); 3163 if (ret) 3164 return ret; 3165 ret = __iommu_group_set_domain(group, group->blocking_domain); 3166 if (ret) 3167 return ret; 3168 3169 group->owner = owner; 3170 group->owner_cnt++; 3171 return 0; 3172 } 3173 3174 /** 3175 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3176 * @group: The group. 3177 * @owner: Caller specified pointer. Used for exclusive ownership. 3178 * 3179 * This is to support backward compatibility for vfio which manages the dma 3180 * ownership in iommu_group level. New invocations on this interface should be 3181 * prohibited. Only a single owner may exist for a group. 3182 */ 3183 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3184 { 3185 int ret = 0; 3186 3187 if (WARN_ON(!owner)) 3188 return -EINVAL; 3189 3190 mutex_lock(&group->mutex); 3191 if (group->owner_cnt) { 3192 ret = -EPERM; 3193 goto unlock_out; 3194 } 3195 3196 ret = __iommu_take_dma_ownership(group, owner); 3197 unlock_out: 3198 mutex_unlock(&group->mutex); 3199 3200 return ret; 3201 } 3202 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3203 3204 /** 3205 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3206 * @dev: The device. 3207 * @owner: Caller specified pointer. Used for exclusive ownership. 3208 * 3209 * Claim the DMA ownership of a device. Multiple devices in the same group may 3210 * concurrently claim ownership if they present the same owner value. Returns 0 3211 * on success and error code on failure 3212 */ 3213 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3214 { 3215 struct iommu_group *group; 3216 int ret = 0; 3217 3218 if (WARN_ON(!owner)) 3219 return -EINVAL; 3220 3221 group = iommu_group_get(dev); 3222 if (!group) 3223 return -ENODEV; 3224 3225 mutex_lock(&group->mutex); 3226 if (group->owner_cnt) { 3227 if (group->owner != owner) { 3228 ret = -EPERM; 3229 goto unlock_out; 3230 } 3231 group->owner_cnt++; 3232 goto unlock_out; 3233 } 3234 3235 ret = __iommu_take_dma_ownership(group, owner); 3236 unlock_out: 3237 mutex_unlock(&group->mutex); 3238 iommu_group_put(group); 3239 3240 return ret; 3241 } 3242 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3243 3244 static void __iommu_release_dma_ownership(struct iommu_group *group) 3245 { 3246 int ret; 3247 3248 if (WARN_ON(!group->owner_cnt || !group->owner || 3249 !xa_empty(&group->pasid_array))) 3250 return; 3251 3252 group->owner_cnt = 0; 3253 group->owner = NULL; 3254 ret = __iommu_group_set_domain(group, group->default_domain); 3255 WARN(ret, "iommu driver failed to attach the default domain"); 3256 } 3257 3258 /** 3259 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3260 * @dev: The device 3261 * 3262 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3263 */ 3264 void iommu_group_release_dma_owner(struct iommu_group *group) 3265 { 3266 mutex_lock(&group->mutex); 3267 __iommu_release_dma_ownership(group); 3268 mutex_unlock(&group->mutex); 3269 } 3270 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3271 3272 /** 3273 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3274 * @group: The device. 3275 * 3276 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3277 */ 3278 void iommu_device_release_dma_owner(struct device *dev) 3279 { 3280 struct iommu_group *group = iommu_group_get(dev); 3281 3282 mutex_lock(&group->mutex); 3283 if (group->owner_cnt > 1) 3284 group->owner_cnt--; 3285 else 3286 __iommu_release_dma_ownership(group); 3287 mutex_unlock(&group->mutex); 3288 iommu_group_put(group); 3289 } 3290 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3291 3292 /** 3293 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3294 * @group: The group. 3295 * 3296 * This provides status query on a given group. It is racy and only for 3297 * non-binding status reporting. 3298 */ 3299 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3300 { 3301 unsigned int user; 3302 3303 mutex_lock(&group->mutex); 3304 user = group->owner_cnt; 3305 mutex_unlock(&group->mutex); 3306 3307 return user; 3308 } 3309 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3310 3311 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3312 struct iommu_group *group, ioasid_t pasid) 3313 { 3314 struct group_device *device; 3315 int ret = 0; 3316 3317 list_for_each_entry(device, &group->devices, list) { 3318 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3319 if (ret) 3320 break; 3321 } 3322 3323 return ret; 3324 } 3325 3326 static void __iommu_remove_group_pasid(struct iommu_group *group, 3327 ioasid_t pasid) 3328 { 3329 struct group_device *device; 3330 const struct iommu_ops *ops; 3331 3332 list_for_each_entry(device, &group->devices, list) { 3333 ops = dev_iommu_ops(device->dev); 3334 ops->remove_dev_pasid(device->dev, pasid); 3335 } 3336 } 3337 3338 /* 3339 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3340 * @domain: the iommu domain. 3341 * @dev: the attached device. 3342 * @pasid: the pasid of the device. 3343 * 3344 * Return: 0 on success, or an error. 3345 */ 3346 int iommu_attach_device_pasid(struct iommu_domain *domain, 3347 struct device *dev, ioasid_t pasid) 3348 { 3349 struct iommu_group *group; 3350 void *curr; 3351 int ret; 3352 3353 if (!domain->ops->set_dev_pasid) 3354 return -EOPNOTSUPP; 3355 3356 group = iommu_group_get(dev); 3357 if (!group) 3358 return -ENODEV; 3359 3360 mutex_lock(&group->mutex); 3361 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3362 if (curr) { 3363 ret = xa_err(curr) ? : -EBUSY; 3364 goto out_unlock; 3365 } 3366 3367 ret = __iommu_set_group_pasid(domain, group, pasid); 3368 if (ret) { 3369 __iommu_remove_group_pasid(group, pasid); 3370 xa_erase(&group->pasid_array, pasid); 3371 } 3372 out_unlock: 3373 mutex_unlock(&group->mutex); 3374 iommu_group_put(group); 3375 3376 return ret; 3377 } 3378 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3379 3380 /* 3381 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3382 * @domain: the iommu domain. 3383 * @dev: the attached device. 3384 * @pasid: the pasid of the device. 3385 * 3386 * The @domain must have been attached to @pasid of the @dev with 3387 * iommu_attach_device_pasid(). 3388 */ 3389 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3390 ioasid_t pasid) 3391 { 3392 struct iommu_group *group = iommu_group_get(dev); 3393 3394 mutex_lock(&group->mutex); 3395 __iommu_remove_group_pasid(group, pasid); 3396 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3397 mutex_unlock(&group->mutex); 3398 3399 iommu_group_put(group); 3400 } 3401 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3402 3403 /* 3404 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3405 * @dev: the queried device 3406 * @pasid: the pasid of the device 3407 * @type: matched domain type, 0 for any match 3408 * 3409 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3410 * domain attached to pasid of a device. Callers must hold a lock around this 3411 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3412 * type is being manipulated. This API does not internally resolve races with 3413 * attach/detach. 3414 * 3415 * Return: attached domain on success, NULL otherwise. 3416 */ 3417 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3418 ioasid_t pasid, 3419 unsigned int type) 3420 { 3421 struct iommu_domain *domain; 3422 struct iommu_group *group; 3423 3424 group = iommu_group_get(dev); 3425 if (!group) 3426 return NULL; 3427 3428 xa_lock(&group->pasid_array); 3429 domain = xa_load(&group->pasid_array, pasid); 3430 if (type && domain && domain->type != type) 3431 domain = ERR_PTR(-EBUSY); 3432 xa_unlock(&group->pasid_array); 3433 iommu_group_put(group); 3434 3435 return domain; 3436 } 3437 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3438 3439 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3440 struct mm_struct *mm) 3441 { 3442 const struct iommu_ops *ops = dev_iommu_ops(dev); 3443 struct iommu_domain *domain; 3444 3445 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3446 if (!domain) 3447 return NULL; 3448 3449 domain->type = IOMMU_DOMAIN_SVA; 3450 mmgrab(mm); 3451 domain->mm = mm; 3452 domain->iopf_handler = iommu_sva_handle_iopf; 3453 domain->fault_data = mm; 3454 3455 return domain; 3456 } 3457