1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <linux/cdx/cdx_bus.h> 32 #include <trace/events/iommu.h> 33 #include <linux/sched/mm.h> 34 #include <linux/msi.h> 35 36 #include "dma-iommu.h" 37 38 #include "iommu-sva.h" 39 40 static struct kset *iommu_group_kset; 41 static DEFINE_IDA(iommu_group_ida); 42 43 static unsigned int iommu_def_domain_type __read_mostly; 44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 45 static u32 iommu_cmd_line __read_mostly; 46 47 struct iommu_group { 48 struct kobject kobj; 49 struct kobject *devices_kobj; 50 struct list_head devices; 51 struct xarray pasid_array; 52 struct mutex mutex; 53 void *iommu_data; 54 void (*iommu_data_release)(void *iommu_data); 55 char *name; 56 int id; 57 struct iommu_domain *default_domain; 58 struct iommu_domain *blocking_domain; 59 struct iommu_domain *domain; 60 struct list_head entry; 61 unsigned int owner_cnt; 62 void *owner; 63 }; 64 65 struct group_device { 66 struct list_head list; 67 struct device *dev; 68 char *name; 69 }; 70 71 /* Iterate over each struct group_device in a struct iommu_group */ 72 #define for_each_group_device(group, pos) \ 73 list_for_each_entry(pos, &(group)->devices, list) 74 75 struct iommu_group_attribute { 76 struct attribute attr; 77 ssize_t (*show)(struct iommu_group *group, char *buf); 78 ssize_t (*store)(struct iommu_group *group, 79 const char *buf, size_t count); 80 }; 81 82 static const char * const iommu_group_resv_type_string[] = { 83 [IOMMU_RESV_DIRECT] = "direct", 84 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 85 [IOMMU_RESV_RESERVED] = "reserved", 86 [IOMMU_RESV_MSI] = "msi", 87 [IOMMU_RESV_SW_MSI] = "msi", 88 }; 89 90 #define IOMMU_CMD_LINE_DMA_API BIT(0) 91 #define IOMMU_CMD_LINE_STRICT BIT(1) 92 93 static int iommu_bus_notifier(struct notifier_block *nb, 94 unsigned long action, void *data); 95 static void iommu_release_device(struct device *dev); 96 static int iommu_alloc_default_domain(struct iommu_group *group, 97 struct device *dev); 98 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 99 unsigned type); 100 static int __iommu_attach_device(struct iommu_domain *domain, 101 struct device *dev); 102 static int __iommu_attach_group(struct iommu_domain *domain, 103 struct iommu_group *group); 104 static int __iommu_group_set_domain(struct iommu_group *group, 105 struct iommu_domain *new_domain); 106 static int iommu_create_device_direct_mappings(struct iommu_group *group, 107 struct device *dev); 108 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 109 static ssize_t iommu_group_store_type(struct iommu_group *group, 110 const char *buf, size_t count); 111 112 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 113 struct iommu_group_attribute iommu_group_attr_##_name = \ 114 __ATTR(_name, _mode, _show, _store) 115 116 #define to_iommu_group_attr(_attr) \ 117 container_of(_attr, struct iommu_group_attribute, attr) 118 #define to_iommu_group(_kobj) \ 119 container_of(_kobj, struct iommu_group, kobj) 120 121 static LIST_HEAD(iommu_device_list); 122 static DEFINE_SPINLOCK(iommu_device_lock); 123 124 static struct bus_type * const iommu_buses[] = { 125 &platform_bus_type, 126 #ifdef CONFIG_PCI 127 &pci_bus_type, 128 #endif 129 #ifdef CONFIG_ARM_AMBA 130 &amba_bustype, 131 #endif 132 #ifdef CONFIG_FSL_MC_BUS 133 &fsl_mc_bus_type, 134 #endif 135 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 136 &host1x_context_device_bus_type, 137 #endif 138 #ifdef CONFIG_CDX_BUS 139 &cdx_bus_type, 140 #endif 141 }; 142 143 /* 144 * Use a function instead of an array here because the domain-type is a 145 * bit-field, so an array would waste memory. 146 */ 147 static const char *iommu_domain_type_str(unsigned int t) 148 { 149 switch (t) { 150 case IOMMU_DOMAIN_BLOCKED: 151 return "Blocked"; 152 case IOMMU_DOMAIN_IDENTITY: 153 return "Passthrough"; 154 case IOMMU_DOMAIN_UNMANAGED: 155 return "Unmanaged"; 156 case IOMMU_DOMAIN_DMA: 157 case IOMMU_DOMAIN_DMA_FQ: 158 return "Translated"; 159 default: 160 return "Unknown"; 161 } 162 } 163 164 static int __init iommu_subsys_init(void) 165 { 166 struct notifier_block *nb; 167 168 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 169 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 170 iommu_set_default_passthrough(false); 171 else 172 iommu_set_default_translated(false); 173 174 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 175 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 176 iommu_set_default_translated(false); 177 } 178 } 179 180 if (!iommu_default_passthrough() && !iommu_dma_strict) 181 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 182 183 pr_info("Default domain type: %s%s\n", 184 iommu_domain_type_str(iommu_def_domain_type), 185 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 186 " (set via kernel command line)" : ""); 187 188 if (!iommu_default_passthrough()) 189 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 190 iommu_dma_strict ? "strict" : "lazy", 191 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 192 " (set via kernel command line)" : ""); 193 194 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 195 if (!nb) 196 return -ENOMEM; 197 198 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 199 nb[i].notifier_call = iommu_bus_notifier; 200 bus_register_notifier(iommu_buses[i], &nb[i]); 201 } 202 203 return 0; 204 } 205 subsys_initcall(iommu_subsys_init); 206 207 static int remove_iommu_group(struct device *dev, void *data) 208 { 209 if (dev->iommu && dev->iommu->iommu_dev == data) 210 iommu_release_device(dev); 211 212 return 0; 213 } 214 215 /** 216 * iommu_device_register() - Register an IOMMU hardware instance 217 * @iommu: IOMMU handle for the instance 218 * @ops: IOMMU ops to associate with the instance 219 * @hwdev: (optional) actual instance device, used for fwnode lookup 220 * 221 * Return: 0 on success, or an error. 222 */ 223 int iommu_device_register(struct iommu_device *iommu, 224 const struct iommu_ops *ops, struct device *hwdev) 225 { 226 int err = 0; 227 228 /* We need to be able to take module references appropriately */ 229 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 230 return -EINVAL; 231 /* 232 * Temporarily enforce global restriction to a single driver. This was 233 * already the de-facto behaviour, since any possible combination of 234 * existing drivers would compete for at least the PCI or platform bus. 235 */ 236 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 237 return -EBUSY; 238 239 iommu->ops = ops; 240 if (hwdev) 241 iommu->fwnode = dev_fwnode(hwdev); 242 243 spin_lock(&iommu_device_lock); 244 list_add_tail(&iommu->list, &iommu_device_list); 245 spin_unlock(&iommu_device_lock); 246 247 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 248 iommu_buses[i]->iommu_ops = ops; 249 err = bus_iommu_probe(iommu_buses[i]); 250 } 251 if (err) 252 iommu_device_unregister(iommu); 253 return err; 254 } 255 EXPORT_SYMBOL_GPL(iommu_device_register); 256 257 void iommu_device_unregister(struct iommu_device *iommu) 258 { 259 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 260 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 261 262 spin_lock(&iommu_device_lock); 263 list_del(&iommu->list); 264 spin_unlock(&iommu_device_lock); 265 } 266 EXPORT_SYMBOL_GPL(iommu_device_unregister); 267 268 static struct dev_iommu *dev_iommu_get(struct device *dev) 269 { 270 struct dev_iommu *param = dev->iommu; 271 272 if (param) 273 return param; 274 275 param = kzalloc(sizeof(*param), GFP_KERNEL); 276 if (!param) 277 return NULL; 278 279 mutex_init(¶m->lock); 280 dev->iommu = param; 281 return param; 282 } 283 284 static void dev_iommu_free(struct device *dev) 285 { 286 struct dev_iommu *param = dev->iommu; 287 288 dev->iommu = NULL; 289 if (param->fwspec) { 290 fwnode_handle_put(param->fwspec->iommu_fwnode); 291 kfree(param->fwspec); 292 } 293 kfree(param); 294 } 295 296 static u32 dev_iommu_get_max_pasids(struct device *dev) 297 { 298 u32 max_pasids = 0, bits = 0; 299 int ret; 300 301 if (dev_is_pci(dev)) { 302 ret = pci_max_pasids(to_pci_dev(dev)); 303 if (ret > 0) 304 max_pasids = ret; 305 } else { 306 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 307 if (!ret) 308 max_pasids = 1UL << bits; 309 } 310 311 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 312 } 313 314 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 315 { 316 const struct iommu_ops *ops = dev->bus->iommu_ops; 317 struct iommu_device *iommu_dev; 318 struct iommu_group *group; 319 static DEFINE_MUTEX(iommu_probe_device_lock); 320 int ret; 321 322 if (!ops) 323 return -ENODEV; 324 /* 325 * Serialise to avoid races between IOMMU drivers registering in 326 * parallel and/or the "replay" calls from ACPI/OF code via client 327 * driver probe. Once the latter have been cleaned up we should 328 * probably be able to use device_lock() here to minimise the scope, 329 * but for now enforcing a simple global ordering is fine. 330 */ 331 mutex_lock(&iommu_probe_device_lock); 332 if (!dev_iommu_get(dev)) { 333 ret = -ENOMEM; 334 goto err_unlock; 335 } 336 337 if (!try_module_get(ops->owner)) { 338 ret = -EINVAL; 339 goto err_free; 340 } 341 342 iommu_dev = ops->probe_device(dev); 343 if (IS_ERR(iommu_dev)) { 344 ret = PTR_ERR(iommu_dev); 345 goto out_module_put; 346 } 347 348 dev->iommu->iommu_dev = iommu_dev; 349 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 350 351 group = iommu_group_get_for_dev(dev); 352 if (IS_ERR(group)) { 353 ret = PTR_ERR(group); 354 goto out_release; 355 } 356 357 mutex_lock(&group->mutex); 358 if (group_list && !group->default_domain && list_empty(&group->entry)) 359 list_add_tail(&group->entry, group_list); 360 mutex_unlock(&group->mutex); 361 iommu_group_put(group); 362 363 mutex_unlock(&iommu_probe_device_lock); 364 iommu_device_link(iommu_dev, dev); 365 366 return 0; 367 368 out_release: 369 if (ops->release_device) 370 ops->release_device(dev); 371 372 out_module_put: 373 module_put(ops->owner); 374 375 err_free: 376 dev_iommu_free(dev); 377 378 err_unlock: 379 mutex_unlock(&iommu_probe_device_lock); 380 381 return ret; 382 } 383 384 static bool iommu_is_attach_deferred(struct device *dev) 385 { 386 const struct iommu_ops *ops = dev_iommu_ops(dev); 387 388 if (ops->is_attach_deferred) 389 return ops->is_attach_deferred(dev); 390 391 return false; 392 } 393 394 static int iommu_group_do_dma_first_attach(struct device *dev, void *data) 395 { 396 struct iommu_domain *domain = data; 397 398 lockdep_assert_held(&dev->iommu_group->mutex); 399 400 if (iommu_is_attach_deferred(dev)) { 401 dev->iommu->attach_deferred = 1; 402 return 0; 403 } 404 405 return __iommu_attach_device(domain, dev); 406 } 407 408 int iommu_probe_device(struct device *dev) 409 { 410 const struct iommu_ops *ops; 411 struct iommu_group *group; 412 int ret; 413 414 ret = __iommu_probe_device(dev, NULL); 415 if (ret) 416 goto err_out; 417 418 group = iommu_group_get(dev); 419 if (!group) { 420 ret = -ENODEV; 421 goto err_release; 422 } 423 424 /* 425 * Try to allocate a default domain - needs support from the 426 * IOMMU driver. There are still some drivers which don't 427 * support default domains, so the return value is not yet 428 * checked. 429 */ 430 mutex_lock(&group->mutex); 431 iommu_alloc_default_domain(group, dev); 432 433 /* 434 * If device joined an existing group which has been claimed, don't 435 * attach the default domain. 436 */ 437 if (group->default_domain && !group->owner) { 438 ret = iommu_group_do_dma_first_attach(dev, group->default_domain); 439 if (ret) { 440 mutex_unlock(&group->mutex); 441 iommu_group_put(group); 442 goto err_release; 443 } 444 } 445 446 iommu_create_device_direct_mappings(group, dev); 447 448 mutex_unlock(&group->mutex); 449 iommu_group_put(group); 450 451 ops = dev_iommu_ops(dev); 452 if (ops->probe_finalize) 453 ops->probe_finalize(dev); 454 455 return 0; 456 457 err_release: 458 iommu_release_device(dev); 459 460 err_out: 461 return ret; 462 463 } 464 465 /* 466 * Remove a device from a group's device list and return the group device 467 * if successful. 468 */ 469 static struct group_device * 470 __iommu_group_remove_device(struct iommu_group *group, struct device *dev) 471 { 472 struct group_device *device; 473 474 lockdep_assert_held(&group->mutex); 475 for_each_group_device(group, device) { 476 if (device->dev == dev) { 477 list_del(&device->list); 478 return device; 479 } 480 } 481 482 return NULL; 483 } 484 485 /* 486 * Release a device from its group and decrements the iommu group reference 487 * count. 488 */ 489 static void __iommu_group_release_device(struct iommu_group *group, 490 struct group_device *grp_dev) 491 { 492 struct device *dev = grp_dev->dev; 493 494 sysfs_remove_link(group->devices_kobj, grp_dev->name); 495 sysfs_remove_link(&dev->kobj, "iommu_group"); 496 497 trace_remove_device_from_group(group->id, dev); 498 499 kfree(grp_dev->name); 500 kfree(grp_dev); 501 dev->iommu_group = NULL; 502 kobject_put(group->devices_kobj); 503 } 504 505 static void iommu_release_device(struct device *dev) 506 { 507 struct iommu_group *group = dev->iommu_group; 508 struct group_device *device; 509 const struct iommu_ops *ops; 510 511 if (!dev->iommu || !group) 512 return; 513 514 iommu_device_unlink(dev->iommu->iommu_dev, dev); 515 516 mutex_lock(&group->mutex); 517 device = __iommu_group_remove_device(group, dev); 518 519 /* 520 * If the group has become empty then ownership must have been released, 521 * and the current domain must be set back to NULL or the default 522 * domain. 523 */ 524 if (list_empty(&group->devices)) 525 WARN_ON(group->owner_cnt || 526 group->domain != group->default_domain); 527 528 /* 529 * release_device() must stop using any attached domain on the device. 530 * If there are still other devices in the group they are not effected 531 * by this callback. 532 * 533 * The IOMMU driver must set the device to either an identity or 534 * blocking translation and stop using any domain pointer, as it is 535 * going to be freed. 536 */ 537 ops = dev_iommu_ops(dev); 538 if (ops->release_device) 539 ops->release_device(dev); 540 mutex_unlock(&group->mutex); 541 542 if (device) 543 __iommu_group_release_device(group, device); 544 545 module_put(ops->owner); 546 dev_iommu_free(dev); 547 } 548 549 static int __init iommu_set_def_domain_type(char *str) 550 { 551 bool pt; 552 int ret; 553 554 ret = kstrtobool(str, &pt); 555 if (ret) 556 return ret; 557 558 if (pt) 559 iommu_set_default_passthrough(true); 560 else 561 iommu_set_default_translated(true); 562 563 return 0; 564 } 565 early_param("iommu.passthrough", iommu_set_def_domain_type); 566 567 static int __init iommu_dma_setup(char *str) 568 { 569 int ret = kstrtobool(str, &iommu_dma_strict); 570 571 if (!ret) 572 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 573 return ret; 574 } 575 early_param("iommu.strict", iommu_dma_setup); 576 577 void iommu_set_dma_strict(void) 578 { 579 iommu_dma_strict = true; 580 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 581 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 582 } 583 584 static ssize_t iommu_group_attr_show(struct kobject *kobj, 585 struct attribute *__attr, char *buf) 586 { 587 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 588 struct iommu_group *group = to_iommu_group(kobj); 589 ssize_t ret = -EIO; 590 591 if (attr->show) 592 ret = attr->show(group, buf); 593 return ret; 594 } 595 596 static ssize_t iommu_group_attr_store(struct kobject *kobj, 597 struct attribute *__attr, 598 const char *buf, size_t count) 599 { 600 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 601 struct iommu_group *group = to_iommu_group(kobj); 602 ssize_t ret = -EIO; 603 604 if (attr->store) 605 ret = attr->store(group, buf, count); 606 return ret; 607 } 608 609 static const struct sysfs_ops iommu_group_sysfs_ops = { 610 .show = iommu_group_attr_show, 611 .store = iommu_group_attr_store, 612 }; 613 614 static int iommu_group_create_file(struct iommu_group *group, 615 struct iommu_group_attribute *attr) 616 { 617 return sysfs_create_file(&group->kobj, &attr->attr); 618 } 619 620 static void iommu_group_remove_file(struct iommu_group *group, 621 struct iommu_group_attribute *attr) 622 { 623 sysfs_remove_file(&group->kobj, &attr->attr); 624 } 625 626 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 627 { 628 return sysfs_emit(buf, "%s\n", group->name); 629 } 630 631 /** 632 * iommu_insert_resv_region - Insert a new region in the 633 * list of reserved regions. 634 * @new: new region to insert 635 * @regions: list of regions 636 * 637 * Elements are sorted by start address and overlapping segments 638 * of the same type are merged. 639 */ 640 static int iommu_insert_resv_region(struct iommu_resv_region *new, 641 struct list_head *regions) 642 { 643 struct iommu_resv_region *iter, *tmp, *nr, *top; 644 LIST_HEAD(stack); 645 646 nr = iommu_alloc_resv_region(new->start, new->length, 647 new->prot, new->type, GFP_KERNEL); 648 if (!nr) 649 return -ENOMEM; 650 651 /* First add the new element based on start address sorting */ 652 list_for_each_entry(iter, regions, list) { 653 if (nr->start < iter->start || 654 (nr->start == iter->start && nr->type <= iter->type)) 655 break; 656 } 657 list_add_tail(&nr->list, &iter->list); 658 659 /* Merge overlapping segments of type nr->type in @regions, if any */ 660 list_for_each_entry_safe(iter, tmp, regions, list) { 661 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 662 663 /* no merge needed on elements of different types than @new */ 664 if (iter->type != new->type) { 665 list_move_tail(&iter->list, &stack); 666 continue; 667 } 668 669 /* look for the last stack element of same type as @iter */ 670 list_for_each_entry_reverse(top, &stack, list) 671 if (top->type == iter->type) 672 goto check_overlap; 673 674 list_move_tail(&iter->list, &stack); 675 continue; 676 677 check_overlap: 678 top_end = top->start + top->length - 1; 679 680 if (iter->start > top_end + 1) { 681 list_move_tail(&iter->list, &stack); 682 } else { 683 top->length = max(top_end, iter_end) - top->start + 1; 684 list_del(&iter->list); 685 kfree(iter); 686 } 687 } 688 list_splice(&stack, regions); 689 return 0; 690 } 691 692 static int 693 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 694 struct list_head *group_resv_regions) 695 { 696 struct iommu_resv_region *entry; 697 int ret = 0; 698 699 list_for_each_entry(entry, dev_resv_regions, list) { 700 ret = iommu_insert_resv_region(entry, group_resv_regions); 701 if (ret) 702 break; 703 } 704 return ret; 705 } 706 707 int iommu_get_group_resv_regions(struct iommu_group *group, 708 struct list_head *head) 709 { 710 struct group_device *device; 711 int ret = 0; 712 713 mutex_lock(&group->mutex); 714 for_each_group_device(group, device) { 715 struct list_head dev_resv_regions; 716 717 /* 718 * Non-API groups still expose reserved_regions in sysfs, 719 * so filter out calls that get here that way. 720 */ 721 if (!device->dev->iommu) 722 break; 723 724 INIT_LIST_HEAD(&dev_resv_regions); 725 iommu_get_resv_regions(device->dev, &dev_resv_regions); 726 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 727 iommu_put_resv_regions(device->dev, &dev_resv_regions); 728 if (ret) 729 break; 730 } 731 mutex_unlock(&group->mutex); 732 return ret; 733 } 734 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 735 736 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 737 char *buf) 738 { 739 struct iommu_resv_region *region, *next; 740 struct list_head group_resv_regions; 741 int offset = 0; 742 743 INIT_LIST_HEAD(&group_resv_regions); 744 iommu_get_group_resv_regions(group, &group_resv_regions); 745 746 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 747 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 748 (long long)region->start, 749 (long long)(region->start + 750 region->length - 1), 751 iommu_group_resv_type_string[region->type]); 752 kfree(region); 753 } 754 755 return offset; 756 } 757 758 static ssize_t iommu_group_show_type(struct iommu_group *group, 759 char *buf) 760 { 761 char *type = "unknown"; 762 763 mutex_lock(&group->mutex); 764 if (group->default_domain) { 765 switch (group->default_domain->type) { 766 case IOMMU_DOMAIN_BLOCKED: 767 type = "blocked"; 768 break; 769 case IOMMU_DOMAIN_IDENTITY: 770 type = "identity"; 771 break; 772 case IOMMU_DOMAIN_UNMANAGED: 773 type = "unmanaged"; 774 break; 775 case IOMMU_DOMAIN_DMA: 776 type = "DMA"; 777 break; 778 case IOMMU_DOMAIN_DMA_FQ: 779 type = "DMA-FQ"; 780 break; 781 } 782 } 783 mutex_unlock(&group->mutex); 784 785 return sysfs_emit(buf, "%s\n", type); 786 } 787 788 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 789 790 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 791 iommu_group_show_resv_regions, NULL); 792 793 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 794 iommu_group_store_type); 795 796 static void iommu_group_release(struct kobject *kobj) 797 { 798 struct iommu_group *group = to_iommu_group(kobj); 799 800 pr_debug("Releasing group %d\n", group->id); 801 802 if (group->iommu_data_release) 803 group->iommu_data_release(group->iommu_data); 804 805 ida_free(&iommu_group_ida, group->id); 806 807 if (group->default_domain) 808 iommu_domain_free(group->default_domain); 809 if (group->blocking_domain) 810 iommu_domain_free(group->blocking_domain); 811 812 kfree(group->name); 813 kfree(group); 814 } 815 816 static const struct kobj_type iommu_group_ktype = { 817 .sysfs_ops = &iommu_group_sysfs_ops, 818 .release = iommu_group_release, 819 }; 820 821 /** 822 * iommu_group_alloc - Allocate a new group 823 * 824 * This function is called by an iommu driver to allocate a new iommu 825 * group. The iommu group represents the minimum granularity of the iommu. 826 * Upon successful return, the caller holds a reference to the supplied 827 * group in order to hold the group until devices are added. Use 828 * iommu_group_put() to release this extra reference count, allowing the 829 * group to be automatically reclaimed once it has no devices or external 830 * references. 831 */ 832 struct iommu_group *iommu_group_alloc(void) 833 { 834 struct iommu_group *group; 835 int ret; 836 837 group = kzalloc(sizeof(*group), GFP_KERNEL); 838 if (!group) 839 return ERR_PTR(-ENOMEM); 840 841 group->kobj.kset = iommu_group_kset; 842 mutex_init(&group->mutex); 843 INIT_LIST_HEAD(&group->devices); 844 INIT_LIST_HEAD(&group->entry); 845 xa_init(&group->pasid_array); 846 847 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 848 if (ret < 0) { 849 kfree(group); 850 return ERR_PTR(ret); 851 } 852 group->id = ret; 853 854 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 855 NULL, "%d", group->id); 856 if (ret) { 857 kobject_put(&group->kobj); 858 return ERR_PTR(ret); 859 } 860 861 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 862 if (!group->devices_kobj) { 863 kobject_put(&group->kobj); /* triggers .release & free */ 864 return ERR_PTR(-ENOMEM); 865 } 866 867 /* 868 * The devices_kobj holds a reference on the group kobject, so 869 * as long as that exists so will the group. We can therefore 870 * use the devices_kobj for reference counting. 871 */ 872 kobject_put(&group->kobj); 873 874 ret = iommu_group_create_file(group, 875 &iommu_group_attr_reserved_regions); 876 if (ret) { 877 kobject_put(group->devices_kobj); 878 return ERR_PTR(ret); 879 } 880 881 ret = iommu_group_create_file(group, &iommu_group_attr_type); 882 if (ret) { 883 kobject_put(group->devices_kobj); 884 return ERR_PTR(ret); 885 } 886 887 pr_debug("Allocated group %d\n", group->id); 888 889 return group; 890 } 891 EXPORT_SYMBOL_GPL(iommu_group_alloc); 892 893 /** 894 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 895 * @group: the group 896 * 897 * iommu drivers can store data in the group for use when doing iommu 898 * operations. This function provides a way to retrieve it. Caller 899 * should hold a group reference. 900 */ 901 void *iommu_group_get_iommudata(struct iommu_group *group) 902 { 903 return group->iommu_data; 904 } 905 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 906 907 /** 908 * iommu_group_set_iommudata - set iommu_data for a group 909 * @group: the group 910 * @iommu_data: new data 911 * @release: release function for iommu_data 912 * 913 * iommu drivers can store data in the group for use when doing iommu 914 * operations. This function provides a way to set the data after 915 * the group has been allocated. Caller should hold a group reference. 916 */ 917 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 918 void (*release)(void *iommu_data)) 919 { 920 group->iommu_data = iommu_data; 921 group->iommu_data_release = release; 922 } 923 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 924 925 /** 926 * iommu_group_set_name - set name for a group 927 * @group: the group 928 * @name: name 929 * 930 * Allow iommu driver to set a name for a group. When set it will 931 * appear in a name attribute file under the group in sysfs. 932 */ 933 int iommu_group_set_name(struct iommu_group *group, const char *name) 934 { 935 int ret; 936 937 if (group->name) { 938 iommu_group_remove_file(group, &iommu_group_attr_name); 939 kfree(group->name); 940 group->name = NULL; 941 if (!name) 942 return 0; 943 } 944 945 group->name = kstrdup(name, GFP_KERNEL); 946 if (!group->name) 947 return -ENOMEM; 948 949 ret = iommu_group_create_file(group, &iommu_group_attr_name); 950 if (ret) { 951 kfree(group->name); 952 group->name = NULL; 953 return ret; 954 } 955 956 return 0; 957 } 958 EXPORT_SYMBOL_GPL(iommu_group_set_name); 959 960 static int iommu_create_device_direct_mappings(struct iommu_group *group, 961 struct device *dev) 962 { 963 struct iommu_domain *domain = group->default_domain; 964 struct iommu_resv_region *entry; 965 struct list_head mappings; 966 unsigned long pg_size; 967 int ret = 0; 968 969 if (!domain || !iommu_is_dma_domain(domain)) 970 return 0; 971 972 BUG_ON(!domain->pgsize_bitmap); 973 974 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 975 INIT_LIST_HEAD(&mappings); 976 977 iommu_get_resv_regions(dev, &mappings); 978 979 /* We need to consider overlapping regions for different devices */ 980 list_for_each_entry(entry, &mappings, list) { 981 dma_addr_t start, end, addr; 982 size_t map_size = 0; 983 984 start = ALIGN(entry->start, pg_size); 985 end = ALIGN(entry->start + entry->length, pg_size); 986 987 if (entry->type != IOMMU_RESV_DIRECT && 988 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 989 continue; 990 991 for (addr = start; addr <= end; addr += pg_size) { 992 phys_addr_t phys_addr; 993 994 if (addr == end) 995 goto map_end; 996 997 phys_addr = iommu_iova_to_phys(domain, addr); 998 if (!phys_addr) { 999 map_size += pg_size; 1000 continue; 1001 } 1002 1003 map_end: 1004 if (map_size) { 1005 ret = iommu_map(domain, addr - map_size, 1006 addr - map_size, map_size, 1007 entry->prot, GFP_KERNEL); 1008 if (ret) 1009 goto out; 1010 map_size = 0; 1011 } 1012 } 1013 1014 } 1015 1016 iommu_flush_iotlb_all(domain); 1017 1018 out: 1019 iommu_put_resv_regions(dev, &mappings); 1020 1021 return ret; 1022 } 1023 1024 /** 1025 * iommu_group_add_device - add a device to an iommu group 1026 * @group: the group into which to add the device (reference should be held) 1027 * @dev: the device 1028 * 1029 * This function is called by an iommu driver to add a device into a 1030 * group. Adding a device increments the group reference count. 1031 */ 1032 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1033 { 1034 int ret, i = 0; 1035 struct group_device *device; 1036 1037 device = kzalloc(sizeof(*device), GFP_KERNEL); 1038 if (!device) 1039 return -ENOMEM; 1040 1041 device->dev = dev; 1042 1043 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1044 if (ret) 1045 goto err_free_device; 1046 1047 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1048 rename: 1049 if (!device->name) { 1050 ret = -ENOMEM; 1051 goto err_remove_link; 1052 } 1053 1054 ret = sysfs_create_link_nowarn(group->devices_kobj, 1055 &dev->kobj, device->name); 1056 if (ret) { 1057 if (ret == -EEXIST && i >= 0) { 1058 /* 1059 * Account for the slim chance of collision 1060 * and append an instance to the name. 1061 */ 1062 kfree(device->name); 1063 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1064 kobject_name(&dev->kobj), i++); 1065 goto rename; 1066 } 1067 goto err_free_name; 1068 } 1069 1070 kobject_get(group->devices_kobj); 1071 1072 dev->iommu_group = group; 1073 1074 mutex_lock(&group->mutex); 1075 list_add_tail(&device->list, &group->devices); 1076 if (group->domain) 1077 ret = iommu_group_do_dma_first_attach(dev, group->domain); 1078 mutex_unlock(&group->mutex); 1079 if (ret) 1080 goto err_put_group; 1081 1082 trace_add_device_to_group(group->id, dev); 1083 1084 dev_info(dev, "Adding to iommu group %d\n", group->id); 1085 1086 return 0; 1087 1088 err_put_group: 1089 mutex_lock(&group->mutex); 1090 list_del(&device->list); 1091 mutex_unlock(&group->mutex); 1092 dev->iommu_group = NULL; 1093 kobject_put(group->devices_kobj); 1094 sysfs_remove_link(group->devices_kobj, device->name); 1095 err_free_name: 1096 kfree(device->name); 1097 err_remove_link: 1098 sysfs_remove_link(&dev->kobj, "iommu_group"); 1099 err_free_device: 1100 kfree(device); 1101 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1102 return ret; 1103 } 1104 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1105 1106 /** 1107 * iommu_group_remove_device - remove a device from it's current group 1108 * @dev: device to be removed 1109 * 1110 * This function is called by an iommu driver to remove the device from 1111 * it's current group. This decrements the iommu group reference count. 1112 */ 1113 void iommu_group_remove_device(struct device *dev) 1114 { 1115 struct iommu_group *group = dev->iommu_group; 1116 struct group_device *device; 1117 1118 if (!group) 1119 return; 1120 1121 dev_info(dev, "Removing from iommu group %d\n", group->id); 1122 1123 mutex_lock(&group->mutex); 1124 device = __iommu_group_remove_device(group, dev); 1125 mutex_unlock(&group->mutex); 1126 1127 if (device) 1128 __iommu_group_release_device(group, device); 1129 } 1130 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1131 1132 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1133 int (*fn)(struct device *, void *)) 1134 { 1135 struct group_device *device; 1136 int ret = 0; 1137 1138 for_each_group_device(group, device) { 1139 ret = fn(device->dev, data); 1140 if (ret) 1141 break; 1142 } 1143 return ret; 1144 } 1145 1146 /** 1147 * iommu_group_for_each_dev - iterate over each device in the group 1148 * @group: the group 1149 * @data: caller opaque data to be passed to callback function 1150 * @fn: caller supplied callback function 1151 * 1152 * This function is called by group users to iterate over group devices. 1153 * Callers should hold a reference count to the group during callback. 1154 * The group->mutex is held across callbacks, which will block calls to 1155 * iommu_group_add/remove_device. 1156 */ 1157 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1158 int (*fn)(struct device *, void *)) 1159 { 1160 int ret; 1161 1162 mutex_lock(&group->mutex); 1163 ret = __iommu_group_for_each_dev(group, data, fn); 1164 mutex_unlock(&group->mutex); 1165 1166 return ret; 1167 } 1168 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1169 1170 /** 1171 * iommu_group_get - Return the group for a device and increment reference 1172 * @dev: get the group that this device belongs to 1173 * 1174 * This function is called by iommu drivers and users to get the group 1175 * for the specified device. If found, the group is returned and the group 1176 * reference in incremented, else NULL. 1177 */ 1178 struct iommu_group *iommu_group_get(struct device *dev) 1179 { 1180 struct iommu_group *group = dev->iommu_group; 1181 1182 if (group) 1183 kobject_get(group->devices_kobj); 1184 1185 return group; 1186 } 1187 EXPORT_SYMBOL_GPL(iommu_group_get); 1188 1189 /** 1190 * iommu_group_ref_get - Increment reference on a group 1191 * @group: the group to use, must not be NULL 1192 * 1193 * This function is called by iommu drivers to take additional references on an 1194 * existing group. Returns the given group for convenience. 1195 */ 1196 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1197 { 1198 kobject_get(group->devices_kobj); 1199 return group; 1200 } 1201 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1202 1203 /** 1204 * iommu_group_put - Decrement group reference 1205 * @group: the group to use 1206 * 1207 * This function is called by iommu drivers and users to release the 1208 * iommu group. Once the reference count is zero, the group is released. 1209 */ 1210 void iommu_group_put(struct iommu_group *group) 1211 { 1212 if (group) 1213 kobject_put(group->devices_kobj); 1214 } 1215 EXPORT_SYMBOL_GPL(iommu_group_put); 1216 1217 /** 1218 * iommu_register_device_fault_handler() - Register a device fault handler 1219 * @dev: the device 1220 * @handler: the fault handler 1221 * @data: private data passed as argument to the handler 1222 * 1223 * When an IOMMU fault event is received, this handler gets called with the 1224 * fault event and data as argument. The handler should return 0 on success. If 1225 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1226 * complete the fault by calling iommu_page_response() with one of the following 1227 * response code: 1228 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1229 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1230 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1231 * page faults if possible. 1232 * 1233 * Return 0 if the fault handler was installed successfully, or an error. 1234 */ 1235 int iommu_register_device_fault_handler(struct device *dev, 1236 iommu_dev_fault_handler_t handler, 1237 void *data) 1238 { 1239 struct dev_iommu *param = dev->iommu; 1240 int ret = 0; 1241 1242 if (!param) 1243 return -EINVAL; 1244 1245 mutex_lock(¶m->lock); 1246 /* Only allow one fault handler registered for each device */ 1247 if (param->fault_param) { 1248 ret = -EBUSY; 1249 goto done_unlock; 1250 } 1251 1252 get_device(dev); 1253 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1254 if (!param->fault_param) { 1255 put_device(dev); 1256 ret = -ENOMEM; 1257 goto done_unlock; 1258 } 1259 param->fault_param->handler = handler; 1260 param->fault_param->data = data; 1261 mutex_init(¶m->fault_param->lock); 1262 INIT_LIST_HEAD(¶m->fault_param->faults); 1263 1264 done_unlock: 1265 mutex_unlock(¶m->lock); 1266 1267 return ret; 1268 } 1269 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1270 1271 /** 1272 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1273 * @dev: the device 1274 * 1275 * Remove the device fault handler installed with 1276 * iommu_register_device_fault_handler(). 1277 * 1278 * Return 0 on success, or an error. 1279 */ 1280 int iommu_unregister_device_fault_handler(struct device *dev) 1281 { 1282 struct dev_iommu *param = dev->iommu; 1283 int ret = 0; 1284 1285 if (!param) 1286 return -EINVAL; 1287 1288 mutex_lock(¶m->lock); 1289 1290 if (!param->fault_param) 1291 goto unlock; 1292 1293 /* we cannot unregister handler if there are pending faults */ 1294 if (!list_empty(¶m->fault_param->faults)) { 1295 ret = -EBUSY; 1296 goto unlock; 1297 } 1298 1299 kfree(param->fault_param); 1300 param->fault_param = NULL; 1301 put_device(dev); 1302 unlock: 1303 mutex_unlock(¶m->lock); 1304 1305 return ret; 1306 } 1307 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1308 1309 /** 1310 * iommu_report_device_fault() - Report fault event to device driver 1311 * @dev: the device 1312 * @evt: fault event data 1313 * 1314 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1315 * handler. When this function fails and the fault is recoverable, it is the 1316 * caller's responsibility to complete the fault. 1317 * 1318 * Return 0 on success, or an error. 1319 */ 1320 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1321 { 1322 struct dev_iommu *param = dev->iommu; 1323 struct iommu_fault_event *evt_pending = NULL; 1324 struct iommu_fault_param *fparam; 1325 int ret = 0; 1326 1327 if (!param || !evt) 1328 return -EINVAL; 1329 1330 /* we only report device fault if there is a handler registered */ 1331 mutex_lock(¶m->lock); 1332 fparam = param->fault_param; 1333 if (!fparam || !fparam->handler) { 1334 ret = -EINVAL; 1335 goto done_unlock; 1336 } 1337 1338 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1339 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1340 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1341 GFP_KERNEL); 1342 if (!evt_pending) { 1343 ret = -ENOMEM; 1344 goto done_unlock; 1345 } 1346 mutex_lock(&fparam->lock); 1347 list_add_tail(&evt_pending->list, &fparam->faults); 1348 mutex_unlock(&fparam->lock); 1349 } 1350 1351 ret = fparam->handler(&evt->fault, fparam->data); 1352 if (ret && evt_pending) { 1353 mutex_lock(&fparam->lock); 1354 list_del(&evt_pending->list); 1355 mutex_unlock(&fparam->lock); 1356 kfree(evt_pending); 1357 } 1358 done_unlock: 1359 mutex_unlock(¶m->lock); 1360 return ret; 1361 } 1362 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1363 1364 int iommu_page_response(struct device *dev, 1365 struct iommu_page_response *msg) 1366 { 1367 bool needs_pasid; 1368 int ret = -EINVAL; 1369 struct iommu_fault_event *evt; 1370 struct iommu_fault_page_request *prm; 1371 struct dev_iommu *param = dev->iommu; 1372 const struct iommu_ops *ops = dev_iommu_ops(dev); 1373 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1374 1375 if (!ops->page_response) 1376 return -ENODEV; 1377 1378 if (!param || !param->fault_param) 1379 return -EINVAL; 1380 1381 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1382 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1383 return -EINVAL; 1384 1385 /* Only send response if there is a fault report pending */ 1386 mutex_lock(¶m->fault_param->lock); 1387 if (list_empty(¶m->fault_param->faults)) { 1388 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1389 goto done_unlock; 1390 } 1391 /* 1392 * Check if we have a matching page request pending to respond, 1393 * otherwise return -EINVAL 1394 */ 1395 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1396 prm = &evt->fault.prm; 1397 if (prm->grpid != msg->grpid) 1398 continue; 1399 1400 /* 1401 * If the PASID is required, the corresponding request is 1402 * matched using the group ID, the PASID valid bit and the PASID 1403 * value. Otherwise only the group ID matches request and 1404 * response. 1405 */ 1406 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1407 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1408 continue; 1409 1410 if (!needs_pasid && has_pasid) { 1411 /* No big deal, just clear it. */ 1412 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1413 msg->pasid = 0; 1414 } 1415 1416 ret = ops->page_response(dev, evt, msg); 1417 list_del(&evt->list); 1418 kfree(evt); 1419 break; 1420 } 1421 1422 done_unlock: 1423 mutex_unlock(¶m->fault_param->lock); 1424 return ret; 1425 } 1426 EXPORT_SYMBOL_GPL(iommu_page_response); 1427 1428 /** 1429 * iommu_group_id - Return ID for a group 1430 * @group: the group to ID 1431 * 1432 * Return the unique ID for the group matching the sysfs group number. 1433 */ 1434 int iommu_group_id(struct iommu_group *group) 1435 { 1436 return group->id; 1437 } 1438 EXPORT_SYMBOL_GPL(iommu_group_id); 1439 1440 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1441 unsigned long *devfns); 1442 1443 /* 1444 * To consider a PCI device isolated, we require ACS to support Source 1445 * Validation, Request Redirection, Completer Redirection, and Upstream 1446 * Forwarding. This effectively means that devices cannot spoof their 1447 * requester ID, requests and completions cannot be redirected, and all 1448 * transactions are forwarded upstream, even as it passes through a 1449 * bridge where the target device is downstream. 1450 */ 1451 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1452 1453 /* 1454 * For multifunction devices which are not isolated from each other, find 1455 * all the other non-isolated functions and look for existing groups. For 1456 * each function, we also need to look for aliases to or from other devices 1457 * that may already have a group. 1458 */ 1459 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1460 unsigned long *devfns) 1461 { 1462 struct pci_dev *tmp = NULL; 1463 struct iommu_group *group; 1464 1465 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1466 return NULL; 1467 1468 for_each_pci_dev(tmp) { 1469 if (tmp == pdev || tmp->bus != pdev->bus || 1470 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1471 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1472 continue; 1473 1474 group = get_pci_alias_group(tmp, devfns); 1475 if (group) { 1476 pci_dev_put(tmp); 1477 return group; 1478 } 1479 } 1480 1481 return NULL; 1482 } 1483 1484 /* 1485 * Look for aliases to or from the given device for existing groups. DMA 1486 * aliases are only supported on the same bus, therefore the search 1487 * space is quite small (especially since we're really only looking at pcie 1488 * device, and therefore only expect multiple slots on the root complex or 1489 * downstream switch ports). It's conceivable though that a pair of 1490 * multifunction devices could have aliases between them that would cause a 1491 * loop. To prevent this, we use a bitmap to track where we've been. 1492 */ 1493 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1494 unsigned long *devfns) 1495 { 1496 struct pci_dev *tmp = NULL; 1497 struct iommu_group *group; 1498 1499 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1500 return NULL; 1501 1502 group = iommu_group_get(&pdev->dev); 1503 if (group) 1504 return group; 1505 1506 for_each_pci_dev(tmp) { 1507 if (tmp == pdev || tmp->bus != pdev->bus) 1508 continue; 1509 1510 /* We alias them or they alias us */ 1511 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1512 group = get_pci_alias_group(tmp, devfns); 1513 if (group) { 1514 pci_dev_put(tmp); 1515 return group; 1516 } 1517 1518 group = get_pci_function_alias_group(tmp, devfns); 1519 if (group) { 1520 pci_dev_put(tmp); 1521 return group; 1522 } 1523 } 1524 } 1525 1526 return NULL; 1527 } 1528 1529 struct group_for_pci_data { 1530 struct pci_dev *pdev; 1531 struct iommu_group *group; 1532 }; 1533 1534 /* 1535 * DMA alias iterator callback, return the last seen device. Stop and return 1536 * the IOMMU group if we find one along the way. 1537 */ 1538 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1539 { 1540 struct group_for_pci_data *data = opaque; 1541 1542 data->pdev = pdev; 1543 data->group = iommu_group_get(&pdev->dev); 1544 1545 return data->group != NULL; 1546 } 1547 1548 /* 1549 * Generic device_group call-back function. It just allocates one 1550 * iommu-group per device. 1551 */ 1552 struct iommu_group *generic_device_group(struct device *dev) 1553 { 1554 return iommu_group_alloc(); 1555 } 1556 EXPORT_SYMBOL_GPL(generic_device_group); 1557 1558 /* 1559 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1560 * to find or create an IOMMU group for a device. 1561 */ 1562 struct iommu_group *pci_device_group(struct device *dev) 1563 { 1564 struct pci_dev *pdev = to_pci_dev(dev); 1565 struct group_for_pci_data data; 1566 struct pci_bus *bus; 1567 struct iommu_group *group = NULL; 1568 u64 devfns[4] = { 0 }; 1569 1570 if (WARN_ON(!dev_is_pci(dev))) 1571 return ERR_PTR(-EINVAL); 1572 1573 /* 1574 * Find the upstream DMA alias for the device. A device must not 1575 * be aliased due to topology in order to have its own IOMMU group. 1576 * If we find an alias along the way that already belongs to a 1577 * group, use it. 1578 */ 1579 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1580 return data.group; 1581 1582 pdev = data.pdev; 1583 1584 /* 1585 * Continue upstream from the point of minimum IOMMU granularity 1586 * due to aliases to the point where devices are protected from 1587 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1588 * group, use it. 1589 */ 1590 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1591 if (!bus->self) 1592 continue; 1593 1594 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1595 break; 1596 1597 pdev = bus->self; 1598 1599 group = iommu_group_get(&pdev->dev); 1600 if (group) 1601 return group; 1602 } 1603 1604 /* 1605 * Look for existing groups on device aliases. If we alias another 1606 * device or another device aliases us, use the same group. 1607 */ 1608 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1609 if (group) 1610 return group; 1611 1612 /* 1613 * Look for existing groups on non-isolated functions on the same 1614 * slot and aliases of those funcions, if any. No need to clear 1615 * the search bitmap, the tested devfns are still valid. 1616 */ 1617 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1618 if (group) 1619 return group; 1620 1621 /* No shared group found, allocate new */ 1622 return iommu_group_alloc(); 1623 } 1624 EXPORT_SYMBOL_GPL(pci_device_group); 1625 1626 /* Get the IOMMU group for device on fsl-mc bus */ 1627 struct iommu_group *fsl_mc_device_group(struct device *dev) 1628 { 1629 struct device *cont_dev = fsl_mc_cont_dev(dev); 1630 struct iommu_group *group; 1631 1632 group = iommu_group_get(cont_dev); 1633 if (!group) 1634 group = iommu_group_alloc(); 1635 return group; 1636 } 1637 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1638 1639 static int iommu_get_def_domain_type(struct device *dev) 1640 { 1641 const struct iommu_ops *ops = dev_iommu_ops(dev); 1642 1643 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1644 return IOMMU_DOMAIN_DMA; 1645 1646 if (ops->def_domain_type) 1647 return ops->def_domain_type(dev); 1648 1649 return 0; 1650 } 1651 1652 static int iommu_group_alloc_default_domain(const struct bus_type *bus, 1653 struct iommu_group *group, 1654 unsigned int type) 1655 { 1656 struct iommu_domain *dom; 1657 1658 dom = __iommu_domain_alloc(bus, type); 1659 if (!dom && type != IOMMU_DOMAIN_DMA) { 1660 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1661 if (dom) 1662 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1663 type, group->name); 1664 } 1665 1666 if (!dom) 1667 return -ENOMEM; 1668 1669 group->default_domain = dom; 1670 if (!group->domain) 1671 group->domain = dom; 1672 return 0; 1673 } 1674 1675 static int iommu_alloc_default_domain(struct iommu_group *group, 1676 struct device *dev) 1677 { 1678 unsigned int type; 1679 1680 if (group->default_domain) 1681 return 0; 1682 1683 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1684 1685 return iommu_group_alloc_default_domain(dev->bus, group, type); 1686 } 1687 1688 /** 1689 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1690 * @dev: target device 1691 * 1692 * This function is intended to be called by IOMMU drivers and extended to 1693 * support common, bus-defined algorithms when determining or creating the 1694 * IOMMU group for a device. On success, the caller will hold a reference 1695 * to the returned IOMMU group, which will already include the provided 1696 * device. The reference should be released with iommu_group_put(). 1697 */ 1698 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1699 { 1700 const struct iommu_ops *ops = dev_iommu_ops(dev); 1701 struct iommu_group *group; 1702 int ret; 1703 1704 group = iommu_group_get(dev); 1705 if (group) 1706 return group; 1707 1708 group = ops->device_group(dev); 1709 if (WARN_ON_ONCE(group == NULL)) 1710 return ERR_PTR(-EINVAL); 1711 1712 if (IS_ERR(group)) 1713 return group; 1714 1715 ret = iommu_group_add_device(group, dev); 1716 if (ret) 1717 goto out_put_group; 1718 1719 return group; 1720 1721 out_put_group: 1722 iommu_group_put(group); 1723 1724 return ERR_PTR(ret); 1725 } 1726 1727 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1728 { 1729 return group->default_domain; 1730 } 1731 1732 static int probe_iommu_group(struct device *dev, void *data) 1733 { 1734 struct list_head *group_list = data; 1735 struct iommu_group *group; 1736 int ret; 1737 1738 /* Device is probed already if in a group */ 1739 group = iommu_group_get(dev); 1740 if (group) { 1741 iommu_group_put(group); 1742 return 0; 1743 } 1744 1745 ret = __iommu_probe_device(dev, group_list); 1746 if (ret == -ENODEV) 1747 ret = 0; 1748 1749 return ret; 1750 } 1751 1752 static int iommu_bus_notifier(struct notifier_block *nb, 1753 unsigned long action, void *data) 1754 { 1755 struct device *dev = data; 1756 1757 if (action == BUS_NOTIFY_ADD_DEVICE) { 1758 int ret; 1759 1760 ret = iommu_probe_device(dev); 1761 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1762 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1763 iommu_release_device(dev); 1764 return NOTIFY_OK; 1765 } 1766 1767 return 0; 1768 } 1769 1770 struct __group_domain_type { 1771 struct device *dev; 1772 unsigned int type; 1773 }; 1774 1775 static int probe_get_default_domain_type(struct device *dev, void *data) 1776 { 1777 struct __group_domain_type *gtype = data; 1778 unsigned int type = iommu_get_def_domain_type(dev); 1779 1780 if (type) { 1781 if (gtype->type && gtype->type != type) { 1782 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1783 iommu_domain_type_str(type), 1784 dev_name(gtype->dev), 1785 iommu_domain_type_str(gtype->type)); 1786 gtype->type = 0; 1787 } 1788 1789 if (!gtype->dev) { 1790 gtype->dev = dev; 1791 gtype->type = type; 1792 } 1793 } 1794 1795 return 0; 1796 } 1797 1798 static void probe_alloc_default_domain(const struct bus_type *bus, 1799 struct iommu_group *group) 1800 { 1801 struct __group_domain_type gtype; 1802 1803 memset(>ype, 0, sizeof(gtype)); 1804 1805 /* Ask for default domain requirements of all devices in the group */ 1806 __iommu_group_for_each_dev(group, >ype, 1807 probe_get_default_domain_type); 1808 1809 if (!gtype.type) 1810 gtype.type = iommu_def_domain_type; 1811 1812 iommu_group_alloc_default_domain(bus, group, gtype.type); 1813 1814 } 1815 1816 static int __iommu_group_dma_first_attach(struct iommu_group *group) 1817 { 1818 return __iommu_group_for_each_dev(group, group->default_domain, 1819 iommu_group_do_dma_first_attach); 1820 } 1821 1822 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1823 { 1824 const struct iommu_ops *ops = dev_iommu_ops(dev); 1825 1826 if (ops->probe_finalize) 1827 ops->probe_finalize(dev); 1828 1829 return 0; 1830 } 1831 1832 static void __iommu_group_dma_finalize(struct iommu_group *group) 1833 { 1834 __iommu_group_for_each_dev(group, group->default_domain, 1835 iommu_group_do_probe_finalize); 1836 } 1837 1838 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1839 { 1840 struct iommu_group *group = data; 1841 1842 iommu_create_device_direct_mappings(group, dev); 1843 1844 return 0; 1845 } 1846 1847 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1848 { 1849 return __iommu_group_for_each_dev(group, group, 1850 iommu_do_create_direct_mappings); 1851 } 1852 1853 int bus_iommu_probe(const struct bus_type *bus) 1854 { 1855 struct iommu_group *group, *next; 1856 LIST_HEAD(group_list); 1857 int ret; 1858 1859 /* 1860 * This code-path does not allocate the default domain when 1861 * creating the iommu group, so do it after the groups are 1862 * created. 1863 */ 1864 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1865 if (ret) 1866 return ret; 1867 1868 list_for_each_entry_safe(group, next, &group_list, entry) { 1869 mutex_lock(&group->mutex); 1870 1871 /* Remove item from the list */ 1872 list_del_init(&group->entry); 1873 1874 /* Try to allocate default domain */ 1875 probe_alloc_default_domain(bus, group); 1876 1877 if (!group->default_domain) { 1878 mutex_unlock(&group->mutex); 1879 continue; 1880 } 1881 1882 iommu_group_create_direct_mappings(group); 1883 1884 ret = __iommu_group_dma_first_attach(group); 1885 1886 mutex_unlock(&group->mutex); 1887 1888 if (ret) 1889 break; 1890 1891 __iommu_group_dma_finalize(group); 1892 } 1893 1894 return ret; 1895 } 1896 1897 bool iommu_present(const struct bus_type *bus) 1898 { 1899 return bus->iommu_ops != NULL; 1900 } 1901 EXPORT_SYMBOL_GPL(iommu_present); 1902 1903 /** 1904 * device_iommu_capable() - check for a general IOMMU capability 1905 * @dev: device to which the capability would be relevant, if available 1906 * @cap: IOMMU capability 1907 * 1908 * Return: true if an IOMMU is present and supports the given capability 1909 * for the given device, otherwise false. 1910 */ 1911 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1912 { 1913 const struct iommu_ops *ops; 1914 1915 if (!dev->iommu || !dev->iommu->iommu_dev) 1916 return false; 1917 1918 ops = dev_iommu_ops(dev); 1919 if (!ops->capable) 1920 return false; 1921 1922 return ops->capable(dev, cap); 1923 } 1924 EXPORT_SYMBOL_GPL(device_iommu_capable); 1925 1926 /** 1927 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1928 * for a group 1929 * @group: Group to query 1930 * 1931 * IOMMU groups should not have differing values of 1932 * msi_device_has_isolated_msi() for devices in a group. However nothing 1933 * directly prevents this, so ensure mistakes don't result in isolation failures 1934 * by checking that all the devices are the same. 1935 */ 1936 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1937 { 1938 struct group_device *group_dev; 1939 bool ret = true; 1940 1941 mutex_lock(&group->mutex); 1942 for_each_group_device(group, group_dev) 1943 ret &= msi_device_has_isolated_msi(group_dev->dev); 1944 mutex_unlock(&group->mutex); 1945 return ret; 1946 } 1947 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1948 1949 /** 1950 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1951 * @domain: iommu domain 1952 * @handler: fault handler 1953 * @token: user data, will be passed back to the fault handler 1954 * 1955 * This function should be used by IOMMU users which want to be notified 1956 * whenever an IOMMU fault happens. 1957 * 1958 * The fault handler itself should return 0 on success, and an appropriate 1959 * error code otherwise. 1960 */ 1961 void iommu_set_fault_handler(struct iommu_domain *domain, 1962 iommu_fault_handler_t handler, 1963 void *token) 1964 { 1965 BUG_ON(!domain); 1966 1967 domain->handler = handler; 1968 domain->handler_token = token; 1969 } 1970 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1971 1972 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 1973 unsigned type) 1974 { 1975 struct iommu_domain *domain; 1976 unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; 1977 1978 if (bus == NULL || bus->iommu_ops == NULL) 1979 return NULL; 1980 1981 domain = bus->iommu_ops->domain_alloc(alloc_type); 1982 if (!domain) 1983 return NULL; 1984 1985 domain->type = type; 1986 /* 1987 * If not already set, assume all sizes by default; the driver 1988 * may override this later 1989 */ 1990 if (!domain->pgsize_bitmap) 1991 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1992 1993 if (!domain->ops) 1994 domain->ops = bus->iommu_ops->default_domain_ops; 1995 1996 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1997 iommu_domain_free(domain); 1998 domain = NULL; 1999 } 2000 return domain; 2001 } 2002 2003 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) 2004 { 2005 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 2006 } 2007 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 2008 2009 void iommu_domain_free(struct iommu_domain *domain) 2010 { 2011 if (domain->type == IOMMU_DOMAIN_SVA) 2012 mmdrop(domain->mm); 2013 iommu_put_dma_cookie(domain); 2014 domain->ops->free(domain); 2015 } 2016 EXPORT_SYMBOL_GPL(iommu_domain_free); 2017 2018 /* 2019 * Put the group's domain back to the appropriate core-owned domain - either the 2020 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2021 */ 2022 static void __iommu_group_set_core_domain(struct iommu_group *group) 2023 { 2024 struct iommu_domain *new_domain; 2025 int ret; 2026 2027 if (group->owner) 2028 new_domain = group->blocking_domain; 2029 else 2030 new_domain = group->default_domain; 2031 2032 ret = __iommu_group_set_domain(group, new_domain); 2033 WARN(ret, "iommu driver failed to attach the default/blocking domain"); 2034 } 2035 2036 static int __iommu_attach_device(struct iommu_domain *domain, 2037 struct device *dev) 2038 { 2039 int ret; 2040 2041 if (unlikely(domain->ops->attach_dev == NULL)) 2042 return -ENODEV; 2043 2044 ret = domain->ops->attach_dev(domain, dev); 2045 if (ret) 2046 return ret; 2047 dev->iommu->attach_deferred = 0; 2048 trace_attach_device_to_domain(dev); 2049 return 0; 2050 } 2051 2052 /** 2053 * iommu_attach_device - Attach an IOMMU domain to a device 2054 * @domain: IOMMU domain to attach 2055 * @dev: Device that will be attached 2056 * 2057 * Returns 0 on success and error code on failure 2058 * 2059 * Note that EINVAL can be treated as a soft failure, indicating 2060 * that certain configuration of the domain is incompatible with 2061 * the device. In this case attaching a different domain to the 2062 * device may succeed. 2063 */ 2064 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2065 { 2066 struct iommu_group *group; 2067 int ret; 2068 2069 group = iommu_group_get(dev); 2070 if (!group) 2071 return -ENODEV; 2072 2073 /* 2074 * Lock the group to make sure the device-count doesn't 2075 * change while we are attaching 2076 */ 2077 mutex_lock(&group->mutex); 2078 ret = -EINVAL; 2079 if (list_count_nodes(&group->devices) != 1) 2080 goto out_unlock; 2081 2082 ret = __iommu_attach_group(domain, group); 2083 2084 out_unlock: 2085 mutex_unlock(&group->mutex); 2086 iommu_group_put(group); 2087 2088 return ret; 2089 } 2090 EXPORT_SYMBOL_GPL(iommu_attach_device); 2091 2092 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2093 { 2094 if (dev->iommu && dev->iommu->attach_deferred) 2095 return __iommu_attach_device(domain, dev); 2096 2097 return 0; 2098 } 2099 2100 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2101 { 2102 struct iommu_group *group; 2103 2104 group = iommu_group_get(dev); 2105 if (!group) 2106 return; 2107 2108 mutex_lock(&group->mutex); 2109 if (WARN_ON(domain != group->domain) || 2110 WARN_ON(list_count_nodes(&group->devices) != 1)) 2111 goto out_unlock; 2112 __iommu_group_set_core_domain(group); 2113 2114 out_unlock: 2115 mutex_unlock(&group->mutex); 2116 iommu_group_put(group); 2117 } 2118 EXPORT_SYMBOL_GPL(iommu_detach_device); 2119 2120 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2121 { 2122 struct iommu_domain *domain; 2123 struct iommu_group *group; 2124 2125 group = iommu_group_get(dev); 2126 if (!group) 2127 return NULL; 2128 2129 domain = group->domain; 2130 2131 iommu_group_put(group); 2132 2133 return domain; 2134 } 2135 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2136 2137 /* 2138 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2139 * guarantees that the group and its default domain are valid and correct. 2140 */ 2141 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2142 { 2143 return dev->iommu_group->default_domain; 2144 } 2145 2146 /* 2147 * IOMMU groups are really the natural working unit of the IOMMU, but 2148 * the IOMMU API works on domains and devices. Bridge that gap by 2149 * iterating over the devices in a group. Ideally we'd have a single 2150 * device which represents the requestor ID of the group, but we also 2151 * allow IOMMU drivers to create policy defined minimum sets, where 2152 * the physical hardware may be able to distiguish members, but we 2153 * wish to group them at a higher level (ex. untrusted multi-function 2154 * PCI devices). Thus we attach each device. 2155 */ 2156 static int iommu_group_do_attach_device(struct device *dev, void *data) 2157 { 2158 struct iommu_domain *domain = data; 2159 2160 return __iommu_attach_device(domain, dev); 2161 } 2162 2163 static int __iommu_attach_group(struct iommu_domain *domain, 2164 struct iommu_group *group) 2165 { 2166 int ret; 2167 2168 if (group->domain && group->domain != group->default_domain && 2169 group->domain != group->blocking_domain) 2170 return -EBUSY; 2171 2172 ret = __iommu_group_for_each_dev(group, domain, 2173 iommu_group_do_attach_device); 2174 if (ret == 0) { 2175 group->domain = domain; 2176 } else { 2177 /* 2178 * To recover from the case when certain device within the 2179 * group fails to attach to the new domain, we need force 2180 * attaching all devices back to the old domain. The old 2181 * domain is compatible for all devices in the group, 2182 * hence the iommu driver should always return success. 2183 */ 2184 struct iommu_domain *old_domain = group->domain; 2185 2186 group->domain = NULL; 2187 WARN(__iommu_group_set_domain(group, old_domain), 2188 "iommu driver failed to attach a compatible domain"); 2189 } 2190 2191 return ret; 2192 } 2193 2194 /** 2195 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2196 * @domain: IOMMU domain to attach 2197 * @group: IOMMU group that will be attached 2198 * 2199 * Returns 0 on success and error code on failure 2200 * 2201 * Note that EINVAL can be treated as a soft failure, indicating 2202 * that certain configuration of the domain is incompatible with 2203 * the group. In this case attaching a different domain to the 2204 * group may succeed. 2205 */ 2206 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2207 { 2208 int ret; 2209 2210 mutex_lock(&group->mutex); 2211 ret = __iommu_attach_group(domain, group); 2212 mutex_unlock(&group->mutex); 2213 2214 return ret; 2215 } 2216 EXPORT_SYMBOL_GPL(iommu_attach_group); 2217 2218 static int iommu_group_do_set_platform_dma(struct device *dev, void *data) 2219 { 2220 const struct iommu_ops *ops = dev_iommu_ops(dev); 2221 2222 if (!WARN_ON(!ops->set_platform_dma_ops)) 2223 ops->set_platform_dma_ops(dev); 2224 2225 return 0; 2226 } 2227 2228 static int __iommu_group_set_domain(struct iommu_group *group, 2229 struct iommu_domain *new_domain) 2230 { 2231 int ret; 2232 2233 if (group->domain == new_domain) 2234 return 0; 2235 2236 /* 2237 * New drivers should support default domains, so set_platform_dma() 2238 * op will never be called. Otherwise the NULL domain represents some 2239 * platform specific behavior. 2240 */ 2241 if (!new_domain) { 2242 __iommu_group_for_each_dev(group, NULL, 2243 iommu_group_do_set_platform_dma); 2244 group->domain = NULL; 2245 return 0; 2246 } 2247 2248 /* 2249 * Changing the domain is done by calling attach_dev() on the new 2250 * domain. This switch does not have to be atomic and DMA can be 2251 * discarded during the transition. DMA must only be able to access 2252 * either new_domain or group->domain, never something else. 2253 * 2254 * Note that this is called in error unwind paths, attaching to a 2255 * domain that has already been attached cannot fail. 2256 */ 2257 ret = __iommu_group_for_each_dev(group, new_domain, 2258 iommu_group_do_attach_device); 2259 if (ret) 2260 return ret; 2261 group->domain = new_domain; 2262 return 0; 2263 } 2264 2265 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2266 { 2267 mutex_lock(&group->mutex); 2268 __iommu_group_set_core_domain(group); 2269 mutex_unlock(&group->mutex); 2270 } 2271 EXPORT_SYMBOL_GPL(iommu_detach_group); 2272 2273 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2274 { 2275 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2276 return iova; 2277 2278 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2279 return 0; 2280 2281 return domain->ops->iova_to_phys(domain, iova); 2282 } 2283 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2284 2285 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2286 phys_addr_t paddr, size_t size, size_t *count) 2287 { 2288 unsigned int pgsize_idx, pgsize_idx_next; 2289 unsigned long pgsizes; 2290 size_t offset, pgsize, pgsize_next; 2291 unsigned long addr_merge = paddr | iova; 2292 2293 /* Page sizes supported by the hardware and small enough for @size */ 2294 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2295 2296 /* Constrain the page sizes further based on the maximum alignment */ 2297 if (likely(addr_merge)) 2298 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2299 2300 /* Make sure we have at least one suitable page size */ 2301 BUG_ON(!pgsizes); 2302 2303 /* Pick the biggest page size remaining */ 2304 pgsize_idx = __fls(pgsizes); 2305 pgsize = BIT(pgsize_idx); 2306 if (!count) 2307 return pgsize; 2308 2309 /* Find the next biggest support page size, if it exists */ 2310 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2311 if (!pgsizes) 2312 goto out_set_count; 2313 2314 pgsize_idx_next = __ffs(pgsizes); 2315 pgsize_next = BIT(pgsize_idx_next); 2316 2317 /* 2318 * There's no point trying a bigger page size unless the virtual 2319 * and physical addresses are similarly offset within the larger page. 2320 */ 2321 if ((iova ^ paddr) & (pgsize_next - 1)) 2322 goto out_set_count; 2323 2324 /* Calculate the offset to the next page size alignment boundary */ 2325 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2326 2327 /* 2328 * If size is big enough to accommodate the larger page, reduce 2329 * the number of smaller pages. 2330 */ 2331 if (offset + pgsize_next <= size) 2332 size = offset; 2333 2334 out_set_count: 2335 *count = size >> pgsize_idx; 2336 return pgsize; 2337 } 2338 2339 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2340 phys_addr_t paddr, size_t size, int prot, 2341 gfp_t gfp, size_t *mapped) 2342 { 2343 const struct iommu_domain_ops *ops = domain->ops; 2344 size_t pgsize, count; 2345 int ret; 2346 2347 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2348 2349 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2350 iova, &paddr, pgsize, count); 2351 2352 if (ops->map_pages) { 2353 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2354 gfp, mapped); 2355 } else { 2356 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2357 *mapped = ret ? 0 : pgsize; 2358 } 2359 2360 return ret; 2361 } 2362 2363 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2364 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2365 { 2366 const struct iommu_domain_ops *ops = domain->ops; 2367 unsigned long orig_iova = iova; 2368 unsigned int min_pagesz; 2369 size_t orig_size = size; 2370 phys_addr_t orig_paddr = paddr; 2371 int ret = 0; 2372 2373 if (unlikely(!(ops->map || ops->map_pages) || 2374 domain->pgsize_bitmap == 0UL)) 2375 return -ENODEV; 2376 2377 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2378 return -EINVAL; 2379 2380 /* find out the minimum page size supported */ 2381 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2382 2383 /* 2384 * both the virtual address and the physical one, as well as 2385 * the size of the mapping, must be aligned (at least) to the 2386 * size of the smallest page supported by the hardware 2387 */ 2388 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2389 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2390 iova, &paddr, size, min_pagesz); 2391 return -EINVAL; 2392 } 2393 2394 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2395 2396 while (size) { 2397 size_t mapped = 0; 2398 2399 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2400 &mapped); 2401 /* 2402 * Some pages may have been mapped, even if an error occurred, 2403 * so we should account for those so they can be unmapped. 2404 */ 2405 size -= mapped; 2406 2407 if (ret) 2408 break; 2409 2410 iova += mapped; 2411 paddr += mapped; 2412 } 2413 2414 /* unroll mapping in case something went wrong */ 2415 if (ret) 2416 iommu_unmap(domain, orig_iova, orig_size - size); 2417 else 2418 trace_map(orig_iova, orig_paddr, orig_size); 2419 2420 return ret; 2421 } 2422 2423 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2424 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2425 { 2426 const struct iommu_domain_ops *ops = domain->ops; 2427 int ret; 2428 2429 might_sleep_if(gfpflags_allow_blocking(gfp)); 2430 2431 /* Discourage passing strange GFP flags */ 2432 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2433 __GFP_HIGHMEM))) 2434 return -EINVAL; 2435 2436 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2437 if (ret == 0 && ops->iotlb_sync_map) 2438 ops->iotlb_sync_map(domain, iova, size); 2439 2440 return ret; 2441 } 2442 EXPORT_SYMBOL_GPL(iommu_map); 2443 2444 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2445 unsigned long iova, size_t size, 2446 struct iommu_iotlb_gather *iotlb_gather) 2447 { 2448 const struct iommu_domain_ops *ops = domain->ops; 2449 size_t pgsize, count; 2450 2451 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2452 return ops->unmap_pages ? 2453 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2454 ops->unmap(domain, iova, pgsize, iotlb_gather); 2455 } 2456 2457 static size_t __iommu_unmap(struct iommu_domain *domain, 2458 unsigned long iova, size_t size, 2459 struct iommu_iotlb_gather *iotlb_gather) 2460 { 2461 const struct iommu_domain_ops *ops = domain->ops; 2462 size_t unmapped_page, unmapped = 0; 2463 unsigned long orig_iova = iova; 2464 unsigned int min_pagesz; 2465 2466 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2467 domain->pgsize_bitmap == 0UL)) 2468 return 0; 2469 2470 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2471 return 0; 2472 2473 /* find out the minimum page size supported */ 2474 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2475 2476 /* 2477 * The virtual address, as well as the size of the mapping, must be 2478 * aligned (at least) to the size of the smallest page supported 2479 * by the hardware 2480 */ 2481 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2482 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2483 iova, size, min_pagesz); 2484 return 0; 2485 } 2486 2487 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2488 2489 /* 2490 * Keep iterating until we either unmap 'size' bytes (or more) 2491 * or we hit an area that isn't mapped. 2492 */ 2493 while (unmapped < size) { 2494 unmapped_page = __iommu_unmap_pages(domain, iova, 2495 size - unmapped, 2496 iotlb_gather); 2497 if (!unmapped_page) 2498 break; 2499 2500 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2501 iova, unmapped_page); 2502 2503 iova += unmapped_page; 2504 unmapped += unmapped_page; 2505 } 2506 2507 trace_unmap(orig_iova, size, unmapped); 2508 return unmapped; 2509 } 2510 2511 size_t iommu_unmap(struct iommu_domain *domain, 2512 unsigned long iova, size_t size) 2513 { 2514 struct iommu_iotlb_gather iotlb_gather; 2515 size_t ret; 2516 2517 iommu_iotlb_gather_init(&iotlb_gather); 2518 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2519 iommu_iotlb_sync(domain, &iotlb_gather); 2520 2521 return ret; 2522 } 2523 EXPORT_SYMBOL_GPL(iommu_unmap); 2524 2525 size_t iommu_unmap_fast(struct iommu_domain *domain, 2526 unsigned long iova, size_t size, 2527 struct iommu_iotlb_gather *iotlb_gather) 2528 { 2529 return __iommu_unmap(domain, iova, size, iotlb_gather); 2530 } 2531 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2532 2533 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2534 struct scatterlist *sg, unsigned int nents, int prot, 2535 gfp_t gfp) 2536 { 2537 const struct iommu_domain_ops *ops = domain->ops; 2538 size_t len = 0, mapped = 0; 2539 phys_addr_t start; 2540 unsigned int i = 0; 2541 int ret; 2542 2543 might_sleep_if(gfpflags_allow_blocking(gfp)); 2544 2545 /* Discourage passing strange GFP flags */ 2546 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2547 __GFP_HIGHMEM))) 2548 return -EINVAL; 2549 2550 while (i <= nents) { 2551 phys_addr_t s_phys = sg_phys(sg); 2552 2553 if (len && s_phys != start + len) { 2554 ret = __iommu_map(domain, iova + mapped, start, 2555 len, prot, gfp); 2556 2557 if (ret) 2558 goto out_err; 2559 2560 mapped += len; 2561 len = 0; 2562 } 2563 2564 if (sg_is_dma_bus_address(sg)) 2565 goto next; 2566 2567 if (len) { 2568 len += sg->length; 2569 } else { 2570 len = sg->length; 2571 start = s_phys; 2572 } 2573 2574 next: 2575 if (++i < nents) 2576 sg = sg_next(sg); 2577 } 2578 2579 if (ops->iotlb_sync_map) 2580 ops->iotlb_sync_map(domain, iova, mapped); 2581 return mapped; 2582 2583 out_err: 2584 /* undo mappings already done */ 2585 iommu_unmap(domain, iova, mapped); 2586 2587 return ret; 2588 } 2589 EXPORT_SYMBOL_GPL(iommu_map_sg); 2590 2591 /** 2592 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2593 * @domain: the iommu domain where the fault has happened 2594 * @dev: the device where the fault has happened 2595 * @iova: the faulting address 2596 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2597 * 2598 * This function should be called by the low-level IOMMU implementations 2599 * whenever IOMMU faults happen, to allow high-level users, that are 2600 * interested in such events, to know about them. 2601 * 2602 * This event may be useful for several possible use cases: 2603 * - mere logging of the event 2604 * - dynamic TLB/PTE loading 2605 * - if restarting of the faulting device is required 2606 * 2607 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2608 * PTE/TLB loading will one day be supported, implementations will be able 2609 * to tell whether it succeeded or not according to this return value). 2610 * 2611 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2612 * (though fault handlers can also return -ENOSYS, in case they want to 2613 * elicit the default behavior of the IOMMU drivers). 2614 */ 2615 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2616 unsigned long iova, int flags) 2617 { 2618 int ret = -ENOSYS; 2619 2620 /* 2621 * if upper layers showed interest and installed a fault handler, 2622 * invoke it. 2623 */ 2624 if (domain->handler) 2625 ret = domain->handler(domain, dev, iova, flags, 2626 domain->handler_token); 2627 2628 trace_io_page_fault(dev, iova, flags); 2629 return ret; 2630 } 2631 EXPORT_SYMBOL_GPL(report_iommu_fault); 2632 2633 static int __init iommu_init(void) 2634 { 2635 iommu_group_kset = kset_create_and_add("iommu_groups", 2636 NULL, kernel_kobj); 2637 BUG_ON(!iommu_group_kset); 2638 2639 iommu_debugfs_setup(); 2640 2641 return 0; 2642 } 2643 core_initcall(iommu_init); 2644 2645 int iommu_enable_nesting(struct iommu_domain *domain) 2646 { 2647 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2648 return -EINVAL; 2649 if (!domain->ops->enable_nesting) 2650 return -EINVAL; 2651 return domain->ops->enable_nesting(domain); 2652 } 2653 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2654 2655 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2656 unsigned long quirk) 2657 { 2658 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2659 return -EINVAL; 2660 if (!domain->ops->set_pgtable_quirks) 2661 return -EINVAL; 2662 return domain->ops->set_pgtable_quirks(domain, quirk); 2663 } 2664 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2665 2666 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2667 { 2668 const struct iommu_ops *ops = dev_iommu_ops(dev); 2669 2670 if (ops->get_resv_regions) 2671 ops->get_resv_regions(dev, list); 2672 } 2673 2674 /** 2675 * iommu_put_resv_regions - release resered regions 2676 * @dev: device for which to free reserved regions 2677 * @list: reserved region list for device 2678 * 2679 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2680 */ 2681 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2682 { 2683 struct iommu_resv_region *entry, *next; 2684 2685 list_for_each_entry_safe(entry, next, list, list) { 2686 if (entry->free) 2687 entry->free(dev, entry); 2688 else 2689 kfree(entry); 2690 } 2691 } 2692 EXPORT_SYMBOL(iommu_put_resv_regions); 2693 2694 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2695 size_t length, int prot, 2696 enum iommu_resv_type type, 2697 gfp_t gfp) 2698 { 2699 struct iommu_resv_region *region; 2700 2701 region = kzalloc(sizeof(*region), gfp); 2702 if (!region) 2703 return NULL; 2704 2705 INIT_LIST_HEAD(®ion->list); 2706 region->start = start; 2707 region->length = length; 2708 region->prot = prot; 2709 region->type = type; 2710 return region; 2711 } 2712 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2713 2714 void iommu_set_default_passthrough(bool cmd_line) 2715 { 2716 if (cmd_line) 2717 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2718 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2719 } 2720 2721 void iommu_set_default_translated(bool cmd_line) 2722 { 2723 if (cmd_line) 2724 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2725 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2726 } 2727 2728 bool iommu_default_passthrough(void) 2729 { 2730 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2731 } 2732 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2733 2734 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2735 { 2736 const struct iommu_ops *ops = NULL; 2737 struct iommu_device *iommu; 2738 2739 spin_lock(&iommu_device_lock); 2740 list_for_each_entry(iommu, &iommu_device_list, list) 2741 if (iommu->fwnode == fwnode) { 2742 ops = iommu->ops; 2743 break; 2744 } 2745 spin_unlock(&iommu_device_lock); 2746 return ops; 2747 } 2748 2749 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2750 const struct iommu_ops *ops) 2751 { 2752 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2753 2754 if (fwspec) 2755 return ops == fwspec->ops ? 0 : -EINVAL; 2756 2757 if (!dev_iommu_get(dev)) 2758 return -ENOMEM; 2759 2760 /* Preallocate for the overwhelmingly common case of 1 ID */ 2761 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2762 if (!fwspec) 2763 return -ENOMEM; 2764 2765 of_node_get(to_of_node(iommu_fwnode)); 2766 fwspec->iommu_fwnode = iommu_fwnode; 2767 fwspec->ops = ops; 2768 dev_iommu_fwspec_set(dev, fwspec); 2769 return 0; 2770 } 2771 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2772 2773 void iommu_fwspec_free(struct device *dev) 2774 { 2775 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2776 2777 if (fwspec) { 2778 fwnode_handle_put(fwspec->iommu_fwnode); 2779 kfree(fwspec); 2780 dev_iommu_fwspec_set(dev, NULL); 2781 } 2782 } 2783 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2784 2785 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2786 { 2787 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2788 int i, new_num; 2789 2790 if (!fwspec) 2791 return -EINVAL; 2792 2793 new_num = fwspec->num_ids + num_ids; 2794 if (new_num > 1) { 2795 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2796 GFP_KERNEL); 2797 if (!fwspec) 2798 return -ENOMEM; 2799 2800 dev_iommu_fwspec_set(dev, fwspec); 2801 } 2802 2803 for (i = 0; i < num_ids; i++) 2804 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2805 2806 fwspec->num_ids = new_num; 2807 return 0; 2808 } 2809 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2810 2811 /* 2812 * Per device IOMMU features. 2813 */ 2814 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2815 { 2816 if (dev->iommu && dev->iommu->iommu_dev) { 2817 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2818 2819 if (ops->dev_enable_feat) 2820 return ops->dev_enable_feat(dev, feat); 2821 } 2822 2823 return -ENODEV; 2824 } 2825 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2826 2827 /* 2828 * The device drivers should do the necessary cleanups before calling this. 2829 */ 2830 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2831 { 2832 if (dev->iommu && dev->iommu->iommu_dev) { 2833 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2834 2835 if (ops->dev_disable_feat) 2836 return ops->dev_disable_feat(dev, feat); 2837 } 2838 2839 return -EBUSY; 2840 } 2841 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2842 2843 /* 2844 * Changes the default domain of an iommu group 2845 * 2846 * @group: The group for which the default domain should be changed 2847 * @dev: The first device in the group 2848 * @type: The type of the new default domain that gets associated with the group 2849 * 2850 * Returns 0 on success and error code on failure 2851 * 2852 * Note: 2853 * 1. Presently, this function is called only when user requests to change the 2854 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2855 * Please take a closer look if intended to use for other purposes. 2856 */ 2857 static int iommu_change_dev_def_domain(struct iommu_group *group, 2858 struct device *dev, int type) 2859 { 2860 struct __group_domain_type gtype = {NULL, 0}; 2861 struct iommu_domain *prev_dom; 2862 int ret; 2863 2864 lockdep_assert_held(&group->mutex); 2865 2866 prev_dom = group->default_domain; 2867 __iommu_group_for_each_dev(group, >ype, 2868 probe_get_default_domain_type); 2869 if (!type) { 2870 /* 2871 * If the user hasn't requested any specific type of domain and 2872 * if the device supports both the domains, then default to the 2873 * domain the device was booted with 2874 */ 2875 type = gtype.type ? : iommu_def_domain_type; 2876 } else if (gtype.type && type != gtype.type) { 2877 dev_err_ratelimited(dev, "Device cannot be in %s domain\n", 2878 iommu_domain_type_str(type)); 2879 return -EINVAL; 2880 } 2881 2882 /* 2883 * Switch to a new domain only if the requested domain type is different 2884 * from the existing default domain type 2885 */ 2886 if (prev_dom->type == type) 2887 return 0; 2888 2889 group->default_domain = NULL; 2890 group->domain = NULL; 2891 2892 /* Sets group->default_domain to the newly allocated domain */ 2893 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2894 if (ret) 2895 goto restore_old_domain; 2896 2897 ret = iommu_group_create_direct_mappings(group); 2898 if (ret) 2899 goto free_new_domain; 2900 2901 ret = __iommu_attach_group(group->default_domain, group); 2902 if (ret) 2903 goto free_new_domain; 2904 2905 iommu_domain_free(prev_dom); 2906 2907 return 0; 2908 2909 free_new_domain: 2910 iommu_domain_free(group->default_domain); 2911 restore_old_domain: 2912 group->default_domain = prev_dom; 2913 group->domain = prev_dom; 2914 2915 return ret; 2916 } 2917 2918 /* 2919 * Changing the default domain through sysfs requires the users to unbind the 2920 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2921 * transition. Return failure if this isn't met. 2922 * 2923 * We need to consider the race between this and the device release path. 2924 * group->mutex is used here to guarantee that the device release path 2925 * will not be entered at the same time. 2926 */ 2927 static ssize_t iommu_group_store_type(struct iommu_group *group, 2928 const char *buf, size_t count) 2929 { 2930 struct group_device *grp_dev; 2931 struct device *dev; 2932 int ret, req_type; 2933 2934 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2935 return -EACCES; 2936 2937 if (WARN_ON(!group) || !group->default_domain) 2938 return -EINVAL; 2939 2940 if (sysfs_streq(buf, "identity")) 2941 req_type = IOMMU_DOMAIN_IDENTITY; 2942 else if (sysfs_streq(buf, "DMA")) 2943 req_type = IOMMU_DOMAIN_DMA; 2944 else if (sysfs_streq(buf, "DMA-FQ")) 2945 req_type = IOMMU_DOMAIN_DMA_FQ; 2946 else if (sysfs_streq(buf, "auto")) 2947 req_type = 0; 2948 else 2949 return -EINVAL; 2950 2951 mutex_lock(&group->mutex); 2952 /* We can bring up a flush queue without tearing down the domain. */ 2953 if (req_type == IOMMU_DOMAIN_DMA_FQ && 2954 group->default_domain->type == IOMMU_DOMAIN_DMA) { 2955 ret = iommu_dma_init_fq(group->default_domain); 2956 if (!ret) 2957 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 2958 mutex_unlock(&group->mutex); 2959 2960 return ret ?: count; 2961 } 2962 2963 /* Otherwise, ensure that device exists and no driver is bound. */ 2964 if (list_empty(&group->devices) || group->owner_cnt) { 2965 mutex_unlock(&group->mutex); 2966 return -EPERM; 2967 } 2968 2969 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2970 dev = grp_dev->dev; 2971 2972 ret = iommu_change_dev_def_domain(group, dev, req_type); 2973 2974 /* 2975 * Release the mutex here because ops->probe_finalize() call-back of 2976 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 2977 * in-turn might call back into IOMMU core code, where it tries to take 2978 * group->mutex, resulting in a deadlock. 2979 */ 2980 mutex_unlock(&group->mutex); 2981 2982 /* Make sure dma_ops is appropriatley set */ 2983 if (!ret) 2984 __iommu_group_dma_finalize(group); 2985 2986 return ret ?: count; 2987 } 2988 2989 static bool iommu_is_default_domain(struct iommu_group *group) 2990 { 2991 if (group->domain == group->default_domain) 2992 return true; 2993 2994 /* 2995 * If the default domain was set to identity and it is still an identity 2996 * domain then we consider this a pass. This happens because of 2997 * amd_iommu_init_device() replacing the default idenytity domain with an 2998 * identity domain that has a different configuration for AMDGPU. 2999 */ 3000 if (group->default_domain && 3001 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3002 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3003 return true; 3004 return false; 3005 } 3006 3007 /** 3008 * iommu_device_use_default_domain() - Device driver wants to handle device 3009 * DMA through the kernel DMA API. 3010 * @dev: The device. 3011 * 3012 * The device driver about to bind @dev wants to do DMA through the kernel 3013 * DMA API. Return 0 if it is allowed, otherwise an error. 3014 */ 3015 int iommu_device_use_default_domain(struct device *dev) 3016 { 3017 struct iommu_group *group = iommu_group_get(dev); 3018 int ret = 0; 3019 3020 if (!group) 3021 return 0; 3022 3023 mutex_lock(&group->mutex); 3024 if (group->owner_cnt) { 3025 if (group->owner || !iommu_is_default_domain(group) || 3026 !xa_empty(&group->pasid_array)) { 3027 ret = -EBUSY; 3028 goto unlock_out; 3029 } 3030 } 3031 3032 group->owner_cnt++; 3033 3034 unlock_out: 3035 mutex_unlock(&group->mutex); 3036 iommu_group_put(group); 3037 3038 return ret; 3039 } 3040 3041 /** 3042 * iommu_device_unuse_default_domain() - Device driver stops handling device 3043 * DMA through the kernel DMA API. 3044 * @dev: The device. 3045 * 3046 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3047 * It must be called after iommu_device_use_default_domain(). 3048 */ 3049 void iommu_device_unuse_default_domain(struct device *dev) 3050 { 3051 struct iommu_group *group = iommu_group_get(dev); 3052 3053 if (!group) 3054 return; 3055 3056 mutex_lock(&group->mutex); 3057 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3058 group->owner_cnt--; 3059 3060 mutex_unlock(&group->mutex); 3061 iommu_group_put(group); 3062 } 3063 3064 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3065 { 3066 struct group_device *dev = 3067 list_first_entry(&group->devices, struct group_device, list); 3068 3069 if (group->blocking_domain) 3070 return 0; 3071 3072 group->blocking_domain = 3073 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3074 if (!group->blocking_domain) { 3075 /* 3076 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3077 * create an empty domain instead. 3078 */ 3079 group->blocking_domain = __iommu_domain_alloc( 3080 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3081 if (!group->blocking_domain) 3082 return -EINVAL; 3083 } 3084 return 0; 3085 } 3086 3087 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3088 { 3089 int ret; 3090 3091 if ((group->domain && group->domain != group->default_domain) || 3092 !xa_empty(&group->pasid_array)) 3093 return -EBUSY; 3094 3095 ret = __iommu_group_alloc_blocking_domain(group); 3096 if (ret) 3097 return ret; 3098 ret = __iommu_group_set_domain(group, group->blocking_domain); 3099 if (ret) 3100 return ret; 3101 3102 group->owner = owner; 3103 group->owner_cnt++; 3104 return 0; 3105 } 3106 3107 /** 3108 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3109 * @group: The group. 3110 * @owner: Caller specified pointer. Used for exclusive ownership. 3111 * 3112 * This is to support backward compatibility for vfio which manages the dma 3113 * ownership in iommu_group level. New invocations on this interface should be 3114 * prohibited. Only a single owner may exist for a group. 3115 */ 3116 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3117 { 3118 int ret = 0; 3119 3120 if (WARN_ON(!owner)) 3121 return -EINVAL; 3122 3123 mutex_lock(&group->mutex); 3124 if (group->owner_cnt) { 3125 ret = -EPERM; 3126 goto unlock_out; 3127 } 3128 3129 ret = __iommu_take_dma_ownership(group, owner); 3130 unlock_out: 3131 mutex_unlock(&group->mutex); 3132 3133 return ret; 3134 } 3135 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3136 3137 /** 3138 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3139 * @dev: The device. 3140 * @owner: Caller specified pointer. Used for exclusive ownership. 3141 * 3142 * Claim the DMA ownership of a device. Multiple devices in the same group may 3143 * concurrently claim ownership if they present the same owner value. Returns 0 3144 * on success and error code on failure 3145 */ 3146 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3147 { 3148 struct iommu_group *group; 3149 int ret = 0; 3150 3151 if (WARN_ON(!owner)) 3152 return -EINVAL; 3153 3154 group = iommu_group_get(dev); 3155 if (!group) 3156 return -ENODEV; 3157 3158 mutex_lock(&group->mutex); 3159 if (group->owner_cnt) { 3160 if (group->owner != owner) { 3161 ret = -EPERM; 3162 goto unlock_out; 3163 } 3164 group->owner_cnt++; 3165 goto unlock_out; 3166 } 3167 3168 ret = __iommu_take_dma_ownership(group, owner); 3169 unlock_out: 3170 mutex_unlock(&group->mutex); 3171 iommu_group_put(group); 3172 3173 return ret; 3174 } 3175 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3176 3177 static void __iommu_release_dma_ownership(struct iommu_group *group) 3178 { 3179 int ret; 3180 3181 if (WARN_ON(!group->owner_cnt || !group->owner || 3182 !xa_empty(&group->pasid_array))) 3183 return; 3184 3185 group->owner_cnt = 0; 3186 group->owner = NULL; 3187 ret = __iommu_group_set_domain(group, group->default_domain); 3188 WARN(ret, "iommu driver failed to attach the default domain"); 3189 } 3190 3191 /** 3192 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3193 * @dev: The device 3194 * 3195 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3196 */ 3197 void iommu_group_release_dma_owner(struct iommu_group *group) 3198 { 3199 mutex_lock(&group->mutex); 3200 __iommu_release_dma_ownership(group); 3201 mutex_unlock(&group->mutex); 3202 } 3203 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3204 3205 /** 3206 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3207 * @group: The device. 3208 * 3209 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3210 */ 3211 void iommu_device_release_dma_owner(struct device *dev) 3212 { 3213 struct iommu_group *group = iommu_group_get(dev); 3214 3215 mutex_lock(&group->mutex); 3216 if (group->owner_cnt > 1) 3217 group->owner_cnt--; 3218 else 3219 __iommu_release_dma_ownership(group); 3220 mutex_unlock(&group->mutex); 3221 iommu_group_put(group); 3222 } 3223 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3224 3225 /** 3226 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3227 * @group: The group. 3228 * 3229 * This provides status query on a given group. It is racy and only for 3230 * non-binding status reporting. 3231 */ 3232 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3233 { 3234 unsigned int user; 3235 3236 mutex_lock(&group->mutex); 3237 user = group->owner_cnt; 3238 mutex_unlock(&group->mutex); 3239 3240 return user; 3241 } 3242 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3243 3244 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3245 struct iommu_group *group, ioasid_t pasid) 3246 { 3247 struct group_device *device; 3248 int ret = 0; 3249 3250 for_each_group_device(group, device) { 3251 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3252 if (ret) 3253 break; 3254 } 3255 3256 return ret; 3257 } 3258 3259 static void __iommu_remove_group_pasid(struct iommu_group *group, 3260 ioasid_t pasid) 3261 { 3262 struct group_device *device; 3263 const struct iommu_ops *ops; 3264 3265 for_each_group_device(group, device) { 3266 ops = dev_iommu_ops(device->dev); 3267 ops->remove_dev_pasid(device->dev, pasid); 3268 } 3269 } 3270 3271 /* 3272 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3273 * @domain: the iommu domain. 3274 * @dev: the attached device. 3275 * @pasid: the pasid of the device. 3276 * 3277 * Return: 0 on success, or an error. 3278 */ 3279 int iommu_attach_device_pasid(struct iommu_domain *domain, 3280 struct device *dev, ioasid_t pasid) 3281 { 3282 struct iommu_group *group; 3283 void *curr; 3284 int ret; 3285 3286 if (!domain->ops->set_dev_pasid) 3287 return -EOPNOTSUPP; 3288 3289 group = iommu_group_get(dev); 3290 if (!group) 3291 return -ENODEV; 3292 3293 mutex_lock(&group->mutex); 3294 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3295 if (curr) { 3296 ret = xa_err(curr) ? : -EBUSY; 3297 goto out_unlock; 3298 } 3299 3300 ret = __iommu_set_group_pasid(domain, group, pasid); 3301 if (ret) { 3302 __iommu_remove_group_pasid(group, pasid); 3303 xa_erase(&group->pasid_array, pasid); 3304 } 3305 out_unlock: 3306 mutex_unlock(&group->mutex); 3307 iommu_group_put(group); 3308 3309 return ret; 3310 } 3311 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3312 3313 /* 3314 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3315 * @domain: the iommu domain. 3316 * @dev: the attached device. 3317 * @pasid: the pasid of the device. 3318 * 3319 * The @domain must have been attached to @pasid of the @dev with 3320 * iommu_attach_device_pasid(). 3321 */ 3322 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3323 ioasid_t pasid) 3324 { 3325 struct iommu_group *group = iommu_group_get(dev); 3326 3327 mutex_lock(&group->mutex); 3328 __iommu_remove_group_pasid(group, pasid); 3329 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3330 mutex_unlock(&group->mutex); 3331 3332 iommu_group_put(group); 3333 } 3334 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3335 3336 /* 3337 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3338 * @dev: the queried device 3339 * @pasid: the pasid of the device 3340 * @type: matched domain type, 0 for any match 3341 * 3342 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3343 * domain attached to pasid of a device. Callers must hold a lock around this 3344 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3345 * type is being manipulated. This API does not internally resolve races with 3346 * attach/detach. 3347 * 3348 * Return: attached domain on success, NULL otherwise. 3349 */ 3350 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3351 ioasid_t pasid, 3352 unsigned int type) 3353 { 3354 struct iommu_domain *domain; 3355 struct iommu_group *group; 3356 3357 group = iommu_group_get(dev); 3358 if (!group) 3359 return NULL; 3360 3361 xa_lock(&group->pasid_array); 3362 domain = xa_load(&group->pasid_array, pasid); 3363 if (type && domain && domain->type != type) 3364 domain = ERR_PTR(-EBUSY); 3365 xa_unlock(&group->pasid_array); 3366 iommu_group_put(group); 3367 3368 return domain; 3369 } 3370 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3371 3372 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3373 struct mm_struct *mm) 3374 { 3375 const struct iommu_ops *ops = dev_iommu_ops(dev); 3376 struct iommu_domain *domain; 3377 3378 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3379 if (!domain) 3380 return NULL; 3381 3382 domain->type = IOMMU_DOMAIN_SVA; 3383 mmgrab(mm); 3384 domain->mm = mm; 3385 domain->iopf_handler = iommu_sva_handle_iopf; 3386 domain->fault_data = mm; 3387 3388 return domain; 3389 } 3390