1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <linux/cdx/cdx_bus.h> 32 #include <trace/events/iommu.h> 33 #include <linux/sched/mm.h> 34 #include <linux/msi.h> 35 36 #include "dma-iommu.h" 37 38 #include "iommu-sva.h" 39 40 static struct kset *iommu_group_kset; 41 static DEFINE_IDA(iommu_group_ida); 42 43 static unsigned int iommu_def_domain_type __read_mostly; 44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 45 static u32 iommu_cmd_line __read_mostly; 46 47 struct iommu_group { 48 struct kobject kobj; 49 struct kobject *devices_kobj; 50 struct list_head devices; 51 struct xarray pasid_array; 52 struct mutex mutex; 53 void *iommu_data; 54 void (*iommu_data_release)(void *iommu_data); 55 char *name; 56 int id; 57 struct iommu_domain *default_domain; 58 struct iommu_domain *blocking_domain; 59 struct iommu_domain *domain; 60 struct list_head entry; 61 unsigned int owner_cnt; 62 void *owner; 63 }; 64 65 struct group_device { 66 struct list_head list; 67 struct device *dev; 68 char *name; 69 }; 70 71 /* Iterate over each struct group_device in a struct iommu_group */ 72 #define for_each_group_device(group, pos) \ 73 list_for_each_entry(pos, &(group)->devices, list) 74 75 struct iommu_group_attribute { 76 struct attribute attr; 77 ssize_t (*show)(struct iommu_group *group, char *buf); 78 ssize_t (*store)(struct iommu_group *group, 79 const char *buf, size_t count); 80 }; 81 82 static const char * const iommu_group_resv_type_string[] = { 83 [IOMMU_RESV_DIRECT] = "direct", 84 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 85 [IOMMU_RESV_RESERVED] = "reserved", 86 [IOMMU_RESV_MSI] = "msi", 87 [IOMMU_RESV_SW_MSI] = "msi", 88 }; 89 90 #define IOMMU_CMD_LINE_DMA_API BIT(0) 91 #define IOMMU_CMD_LINE_STRICT BIT(1) 92 93 static int iommu_bus_notifier(struct notifier_block *nb, 94 unsigned long action, void *data); 95 static void iommu_release_device(struct device *dev); 96 static int iommu_alloc_default_domain(struct iommu_group *group, 97 struct device *dev); 98 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 99 unsigned type); 100 static int __iommu_attach_device(struct iommu_domain *domain, 101 struct device *dev); 102 static int __iommu_attach_group(struct iommu_domain *domain, 103 struct iommu_group *group); 104 105 enum { 106 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 107 }; 108 109 static int __iommu_device_set_domain(struct iommu_group *group, 110 struct device *dev, 111 struct iommu_domain *new_domain, 112 unsigned int flags); 113 static int __iommu_group_set_domain_internal(struct iommu_group *group, 114 struct iommu_domain *new_domain, 115 unsigned int flags); 116 static int __iommu_group_set_domain(struct iommu_group *group, 117 struct iommu_domain *new_domain) 118 { 119 return __iommu_group_set_domain_internal(group, new_domain, 0); 120 } 121 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 122 struct iommu_domain *new_domain) 123 { 124 WARN_ON(__iommu_group_set_domain_internal( 125 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 126 } 127 128 static int iommu_create_device_direct_mappings(struct iommu_group *group, 129 struct device *dev); 130 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 131 static ssize_t iommu_group_store_type(struct iommu_group *group, 132 const char *buf, size_t count); 133 134 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 135 struct iommu_group_attribute iommu_group_attr_##_name = \ 136 __ATTR(_name, _mode, _show, _store) 137 138 #define to_iommu_group_attr(_attr) \ 139 container_of(_attr, struct iommu_group_attribute, attr) 140 #define to_iommu_group(_kobj) \ 141 container_of(_kobj, struct iommu_group, kobj) 142 143 static LIST_HEAD(iommu_device_list); 144 static DEFINE_SPINLOCK(iommu_device_lock); 145 146 static struct bus_type * const iommu_buses[] = { 147 &platform_bus_type, 148 #ifdef CONFIG_PCI 149 &pci_bus_type, 150 #endif 151 #ifdef CONFIG_ARM_AMBA 152 &amba_bustype, 153 #endif 154 #ifdef CONFIG_FSL_MC_BUS 155 &fsl_mc_bus_type, 156 #endif 157 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 158 &host1x_context_device_bus_type, 159 #endif 160 #ifdef CONFIG_CDX_BUS 161 &cdx_bus_type, 162 #endif 163 }; 164 165 /* 166 * Use a function instead of an array here because the domain-type is a 167 * bit-field, so an array would waste memory. 168 */ 169 static const char *iommu_domain_type_str(unsigned int t) 170 { 171 switch (t) { 172 case IOMMU_DOMAIN_BLOCKED: 173 return "Blocked"; 174 case IOMMU_DOMAIN_IDENTITY: 175 return "Passthrough"; 176 case IOMMU_DOMAIN_UNMANAGED: 177 return "Unmanaged"; 178 case IOMMU_DOMAIN_DMA: 179 case IOMMU_DOMAIN_DMA_FQ: 180 return "Translated"; 181 default: 182 return "Unknown"; 183 } 184 } 185 186 static int __init iommu_subsys_init(void) 187 { 188 struct notifier_block *nb; 189 190 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 191 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 192 iommu_set_default_passthrough(false); 193 else 194 iommu_set_default_translated(false); 195 196 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 197 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 198 iommu_set_default_translated(false); 199 } 200 } 201 202 if (!iommu_default_passthrough() && !iommu_dma_strict) 203 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 204 205 pr_info("Default domain type: %s%s\n", 206 iommu_domain_type_str(iommu_def_domain_type), 207 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 208 " (set via kernel command line)" : ""); 209 210 if (!iommu_default_passthrough()) 211 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 212 iommu_dma_strict ? "strict" : "lazy", 213 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 214 " (set via kernel command line)" : ""); 215 216 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 217 if (!nb) 218 return -ENOMEM; 219 220 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 221 nb[i].notifier_call = iommu_bus_notifier; 222 bus_register_notifier(iommu_buses[i], &nb[i]); 223 } 224 225 return 0; 226 } 227 subsys_initcall(iommu_subsys_init); 228 229 static int remove_iommu_group(struct device *dev, void *data) 230 { 231 if (dev->iommu && dev->iommu->iommu_dev == data) 232 iommu_release_device(dev); 233 234 return 0; 235 } 236 237 /** 238 * iommu_device_register() - Register an IOMMU hardware instance 239 * @iommu: IOMMU handle for the instance 240 * @ops: IOMMU ops to associate with the instance 241 * @hwdev: (optional) actual instance device, used for fwnode lookup 242 * 243 * Return: 0 on success, or an error. 244 */ 245 int iommu_device_register(struct iommu_device *iommu, 246 const struct iommu_ops *ops, struct device *hwdev) 247 { 248 int err = 0; 249 250 /* We need to be able to take module references appropriately */ 251 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 252 return -EINVAL; 253 /* 254 * Temporarily enforce global restriction to a single driver. This was 255 * already the de-facto behaviour, since any possible combination of 256 * existing drivers would compete for at least the PCI or platform bus. 257 */ 258 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 259 return -EBUSY; 260 261 iommu->ops = ops; 262 if (hwdev) 263 iommu->fwnode = dev_fwnode(hwdev); 264 265 spin_lock(&iommu_device_lock); 266 list_add_tail(&iommu->list, &iommu_device_list); 267 spin_unlock(&iommu_device_lock); 268 269 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 270 iommu_buses[i]->iommu_ops = ops; 271 err = bus_iommu_probe(iommu_buses[i]); 272 } 273 if (err) 274 iommu_device_unregister(iommu); 275 return err; 276 } 277 EXPORT_SYMBOL_GPL(iommu_device_register); 278 279 void iommu_device_unregister(struct iommu_device *iommu) 280 { 281 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 282 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 283 284 spin_lock(&iommu_device_lock); 285 list_del(&iommu->list); 286 spin_unlock(&iommu_device_lock); 287 } 288 EXPORT_SYMBOL_GPL(iommu_device_unregister); 289 290 static struct dev_iommu *dev_iommu_get(struct device *dev) 291 { 292 struct dev_iommu *param = dev->iommu; 293 294 if (param) 295 return param; 296 297 param = kzalloc(sizeof(*param), GFP_KERNEL); 298 if (!param) 299 return NULL; 300 301 mutex_init(¶m->lock); 302 dev->iommu = param; 303 return param; 304 } 305 306 static void dev_iommu_free(struct device *dev) 307 { 308 struct dev_iommu *param = dev->iommu; 309 310 dev->iommu = NULL; 311 if (param->fwspec) { 312 fwnode_handle_put(param->fwspec->iommu_fwnode); 313 kfree(param->fwspec); 314 } 315 kfree(param); 316 } 317 318 static u32 dev_iommu_get_max_pasids(struct device *dev) 319 { 320 u32 max_pasids = 0, bits = 0; 321 int ret; 322 323 if (dev_is_pci(dev)) { 324 ret = pci_max_pasids(to_pci_dev(dev)); 325 if (ret > 0) 326 max_pasids = ret; 327 } else { 328 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 329 if (!ret) 330 max_pasids = 1UL << bits; 331 } 332 333 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 334 } 335 336 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 337 { 338 const struct iommu_ops *ops = dev->bus->iommu_ops; 339 struct iommu_device *iommu_dev; 340 struct iommu_group *group; 341 static DEFINE_MUTEX(iommu_probe_device_lock); 342 int ret; 343 344 if (!ops) 345 return -ENODEV; 346 /* 347 * Serialise to avoid races between IOMMU drivers registering in 348 * parallel and/or the "replay" calls from ACPI/OF code via client 349 * driver probe. Once the latter have been cleaned up we should 350 * probably be able to use device_lock() here to minimise the scope, 351 * but for now enforcing a simple global ordering is fine. 352 */ 353 mutex_lock(&iommu_probe_device_lock); 354 if (!dev_iommu_get(dev)) { 355 ret = -ENOMEM; 356 goto err_unlock; 357 } 358 359 if (!try_module_get(ops->owner)) { 360 ret = -EINVAL; 361 goto err_free; 362 } 363 364 iommu_dev = ops->probe_device(dev); 365 if (IS_ERR(iommu_dev)) { 366 ret = PTR_ERR(iommu_dev); 367 goto out_module_put; 368 } 369 370 dev->iommu->iommu_dev = iommu_dev; 371 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 372 if (ops->is_attach_deferred) 373 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 374 375 group = iommu_group_get_for_dev(dev); 376 if (IS_ERR(group)) { 377 ret = PTR_ERR(group); 378 goto out_release; 379 } 380 381 mutex_lock(&group->mutex); 382 if (group_list && !group->default_domain && list_empty(&group->entry)) 383 list_add_tail(&group->entry, group_list); 384 mutex_unlock(&group->mutex); 385 iommu_group_put(group); 386 387 mutex_unlock(&iommu_probe_device_lock); 388 iommu_device_link(iommu_dev, dev); 389 390 return 0; 391 392 out_release: 393 if (ops->release_device) 394 ops->release_device(dev); 395 396 out_module_put: 397 module_put(ops->owner); 398 399 err_free: 400 dev_iommu_free(dev); 401 402 err_unlock: 403 mutex_unlock(&iommu_probe_device_lock); 404 405 return ret; 406 } 407 408 int iommu_probe_device(struct device *dev) 409 { 410 const struct iommu_ops *ops; 411 struct iommu_group *group; 412 int ret; 413 414 ret = __iommu_probe_device(dev, NULL); 415 if (ret) 416 goto err_out; 417 418 group = iommu_group_get(dev); 419 if (!group) { 420 ret = -ENODEV; 421 goto err_release; 422 } 423 424 mutex_lock(&group->mutex); 425 426 iommu_create_device_direct_mappings(group, dev); 427 428 if (group->domain) { 429 ret = __iommu_device_set_domain(group, dev, group->domain, 0); 430 } else if (!group->default_domain) { 431 /* 432 * Try to allocate a default domain - needs support from the 433 * IOMMU driver. There are still some drivers which don't 434 * support default domains, so the return value is not yet 435 * checked. 436 */ 437 iommu_alloc_default_domain(group, dev); 438 if (group->default_domain) { 439 iommu_create_device_direct_mappings(group, dev); 440 ret = __iommu_group_set_domain(group, 441 group->default_domain); 442 } 443 444 /* 445 * We assume that the iommu driver starts up the device in 446 * 'set_platform_dma_ops' mode if it does not support default 447 * domains. 448 */ 449 } 450 if (ret) 451 goto err_unlock; 452 453 mutex_unlock(&group->mutex); 454 iommu_group_put(group); 455 456 ops = dev_iommu_ops(dev); 457 if (ops->probe_finalize) 458 ops->probe_finalize(dev); 459 460 return 0; 461 462 err_unlock: 463 mutex_unlock(&group->mutex); 464 iommu_group_put(group); 465 err_release: 466 iommu_release_device(dev); 467 468 err_out: 469 return ret; 470 471 } 472 473 /* 474 * Remove a device from a group's device list and return the group device 475 * if successful. 476 */ 477 static struct group_device * 478 __iommu_group_remove_device(struct iommu_group *group, struct device *dev) 479 { 480 struct group_device *device; 481 482 lockdep_assert_held(&group->mutex); 483 for_each_group_device(group, device) { 484 if (device->dev == dev) { 485 list_del(&device->list); 486 return device; 487 } 488 } 489 490 return NULL; 491 } 492 493 /* 494 * Release a device from its group and decrements the iommu group reference 495 * count. 496 */ 497 static void __iommu_group_release_device(struct iommu_group *group, 498 struct group_device *grp_dev) 499 { 500 struct device *dev = grp_dev->dev; 501 502 sysfs_remove_link(group->devices_kobj, grp_dev->name); 503 sysfs_remove_link(&dev->kobj, "iommu_group"); 504 505 trace_remove_device_from_group(group->id, dev); 506 507 kfree(grp_dev->name); 508 kfree(grp_dev); 509 dev->iommu_group = NULL; 510 kobject_put(group->devices_kobj); 511 } 512 513 static void iommu_release_device(struct device *dev) 514 { 515 struct iommu_group *group = dev->iommu_group; 516 struct group_device *device; 517 const struct iommu_ops *ops; 518 519 if (!dev->iommu || !group) 520 return; 521 522 iommu_device_unlink(dev->iommu->iommu_dev, dev); 523 524 mutex_lock(&group->mutex); 525 device = __iommu_group_remove_device(group, dev); 526 527 /* 528 * If the group has become empty then ownership must have been released, 529 * and the current domain must be set back to NULL or the default 530 * domain. 531 */ 532 if (list_empty(&group->devices)) 533 WARN_ON(group->owner_cnt || 534 group->domain != group->default_domain); 535 536 /* 537 * release_device() must stop using any attached domain on the device. 538 * If there are still other devices in the group they are not effected 539 * by this callback. 540 * 541 * The IOMMU driver must set the device to either an identity or 542 * blocking translation and stop using any domain pointer, as it is 543 * going to be freed. 544 */ 545 ops = dev_iommu_ops(dev); 546 if (ops->release_device) 547 ops->release_device(dev); 548 mutex_unlock(&group->mutex); 549 550 if (device) 551 __iommu_group_release_device(group, device); 552 553 module_put(ops->owner); 554 dev_iommu_free(dev); 555 } 556 557 static int __init iommu_set_def_domain_type(char *str) 558 { 559 bool pt; 560 int ret; 561 562 ret = kstrtobool(str, &pt); 563 if (ret) 564 return ret; 565 566 if (pt) 567 iommu_set_default_passthrough(true); 568 else 569 iommu_set_default_translated(true); 570 571 return 0; 572 } 573 early_param("iommu.passthrough", iommu_set_def_domain_type); 574 575 static int __init iommu_dma_setup(char *str) 576 { 577 int ret = kstrtobool(str, &iommu_dma_strict); 578 579 if (!ret) 580 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 581 return ret; 582 } 583 early_param("iommu.strict", iommu_dma_setup); 584 585 void iommu_set_dma_strict(void) 586 { 587 iommu_dma_strict = true; 588 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 589 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 590 } 591 592 static ssize_t iommu_group_attr_show(struct kobject *kobj, 593 struct attribute *__attr, char *buf) 594 { 595 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 596 struct iommu_group *group = to_iommu_group(kobj); 597 ssize_t ret = -EIO; 598 599 if (attr->show) 600 ret = attr->show(group, buf); 601 return ret; 602 } 603 604 static ssize_t iommu_group_attr_store(struct kobject *kobj, 605 struct attribute *__attr, 606 const char *buf, size_t count) 607 { 608 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 609 struct iommu_group *group = to_iommu_group(kobj); 610 ssize_t ret = -EIO; 611 612 if (attr->store) 613 ret = attr->store(group, buf, count); 614 return ret; 615 } 616 617 static const struct sysfs_ops iommu_group_sysfs_ops = { 618 .show = iommu_group_attr_show, 619 .store = iommu_group_attr_store, 620 }; 621 622 static int iommu_group_create_file(struct iommu_group *group, 623 struct iommu_group_attribute *attr) 624 { 625 return sysfs_create_file(&group->kobj, &attr->attr); 626 } 627 628 static void iommu_group_remove_file(struct iommu_group *group, 629 struct iommu_group_attribute *attr) 630 { 631 sysfs_remove_file(&group->kobj, &attr->attr); 632 } 633 634 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 635 { 636 return sysfs_emit(buf, "%s\n", group->name); 637 } 638 639 /** 640 * iommu_insert_resv_region - Insert a new region in the 641 * list of reserved regions. 642 * @new: new region to insert 643 * @regions: list of regions 644 * 645 * Elements are sorted by start address and overlapping segments 646 * of the same type are merged. 647 */ 648 static int iommu_insert_resv_region(struct iommu_resv_region *new, 649 struct list_head *regions) 650 { 651 struct iommu_resv_region *iter, *tmp, *nr, *top; 652 LIST_HEAD(stack); 653 654 nr = iommu_alloc_resv_region(new->start, new->length, 655 new->prot, new->type, GFP_KERNEL); 656 if (!nr) 657 return -ENOMEM; 658 659 /* First add the new element based on start address sorting */ 660 list_for_each_entry(iter, regions, list) { 661 if (nr->start < iter->start || 662 (nr->start == iter->start && nr->type <= iter->type)) 663 break; 664 } 665 list_add_tail(&nr->list, &iter->list); 666 667 /* Merge overlapping segments of type nr->type in @regions, if any */ 668 list_for_each_entry_safe(iter, tmp, regions, list) { 669 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 670 671 /* no merge needed on elements of different types than @new */ 672 if (iter->type != new->type) { 673 list_move_tail(&iter->list, &stack); 674 continue; 675 } 676 677 /* look for the last stack element of same type as @iter */ 678 list_for_each_entry_reverse(top, &stack, list) 679 if (top->type == iter->type) 680 goto check_overlap; 681 682 list_move_tail(&iter->list, &stack); 683 continue; 684 685 check_overlap: 686 top_end = top->start + top->length - 1; 687 688 if (iter->start > top_end + 1) { 689 list_move_tail(&iter->list, &stack); 690 } else { 691 top->length = max(top_end, iter_end) - top->start + 1; 692 list_del(&iter->list); 693 kfree(iter); 694 } 695 } 696 list_splice(&stack, regions); 697 return 0; 698 } 699 700 static int 701 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 702 struct list_head *group_resv_regions) 703 { 704 struct iommu_resv_region *entry; 705 int ret = 0; 706 707 list_for_each_entry(entry, dev_resv_regions, list) { 708 ret = iommu_insert_resv_region(entry, group_resv_regions); 709 if (ret) 710 break; 711 } 712 return ret; 713 } 714 715 int iommu_get_group_resv_regions(struct iommu_group *group, 716 struct list_head *head) 717 { 718 struct group_device *device; 719 int ret = 0; 720 721 mutex_lock(&group->mutex); 722 for_each_group_device(group, device) { 723 struct list_head dev_resv_regions; 724 725 /* 726 * Non-API groups still expose reserved_regions in sysfs, 727 * so filter out calls that get here that way. 728 */ 729 if (!device->dev->iommu) 730 break; 731 732 INIT_LIST_HEAD(&dev_resv_regions); 733 iommu_get_resv_regions(device->dev, &dev_resv_regions); 734 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 735 iommu_put_resv_regions(device->dev, &dev_resv_regions); 736 if (ret) 737 break; 738 } 739 mutex_unlock(&group->mutex); 740 return ret; 741 } 742 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 743 744 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 745 char *buf) 746 { 747 struct iommu_resv_region *region, *next; 748 struct list_head group_resv_regions; 749 int offset = 0; 750 751 INIT_LIST_HEAD(&group_resv_regions); 752 iommu_get_group_resv_regions(group, &group_resv_regions); 753 754 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 755 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 756 (long long)region->start, 757 (long long)(region->start + 758 region->length - 1), 759 iommu_group_resv_type_string[region->type]); 760 kfree(region); 761 } 762 763 return offset; 764 } 765 766 static ssize_t iommu_group_show_type(struct iommu_group *group, 767 char *buf) 768 { 769 char *type = "unknown"; 770 771 mutex_lock(&group->mutex); 772 if (group->default_domain) { 773 switch (group->default_domain->type) { 774 case IOMMU_DOMAIN_BLOCKED: 775 type = "blocked"; 776 break; 777 case IOMMU_DOMAIN_IDENTITY: 778 type = "identity"; 779 break; 780 case IOMMU_DOMAIN_UNMANAGED: 781 type = "unmanaged"; 782 break; 783 case IOMMU_DOMAIN_DMA: 784 type = "DMA"; 785 break; 786 case IOMMU_DOMAIN_DMA_FQ: 787 type = "DMA-FQ"; 788 break; 789 } 790 } 791 mutex_unlock(&group->mutex); 792 793 return sysfs_emit(buf, "%s\n", type); 794 } 795 796 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 797 798 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 799 iommu_group_show_resv_regions, NULL); 800 801 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 802 iommu_group_store_type); 803 804 static void iommu_group_release(struct kobject *kobj) 805 { 806 struct iommu_group *group = to_iommu_group(kobj); 807 808 pr_debug("Releasing group %d\n", group->id); 809 810 if (group->iommu_data_release) 811 group->iommu_data_release(group->iommu_data); 812 813 ida_free(&iommu_group_ida, group->id); 814 815 if (group->default_domain) 816 iommu_domain_free(group->default_domain); 817 if (group->blocking_domain) 818 iommu_domain_free(group->blocking_domain); 819 820 kfree(group->name); 821 kfree(group); 822 } 823 824 static const struct kobj_type iommu_group_ktype = { 825 .sysfs_ops = &iommu_group_sysfs_ops, 826 .release = iommu_group_release, 827 }; 828 829 /** 830 * iommu_group_alloc - Allocate a new group 831 * 832 * This function is called by an iommu driver to allocate a new iommu 833 * group. The iommu group represents the minimum granularity of the iommu. 834 * Upon successful return, the caller holds a reference to the supplied 835 * group in order to hold the group until devices are added. Use 836 * iommu_group_put() to release this extra reference count, allowing the 837 * group to be automatically reclaimed once it has no devices or external 838 * references. 839 */ 840 struct iommu_group *iommu_group_alloc(void) 841 { 842 struct iommu_group *group; 843 int ret; 844 845 group = kzalloc(sizeof(*group), GFP_KERNEL); 846 if (!group) 847 return ERR_PTR(-ENOMEM); 848 849 group->kobj.kset = iommu_group_kset; 850 mutex_init(&group->mutex); 851 INIT_LIST_HEAD(&group->devices); 852 INIT_LIST_HEAD(&group->entry); 853 xa_init(&group->pasid_array); 854 855 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 856 if (ret < 0) { 857 kfree(group); 858 return ERR_PTR(ret); 859 } 860 group->id = ret; 861 862 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 863 NULL, "%d", group->id); 864 if (ret) { 865 kobject_put(&group->kobj); 866 return ERR_PTR(ret); 867 } 868 869 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 870 if (!group->devices_kobj) { 871 kobject_put(&group->kobj); /* triggers .release & free */ 872 return ERR_PTR(-ENOMEM); 873 } 874 875 /* 876 * The devices_kobj holds a reference on the group kobject, so 877 * as long as that exists so will the group. We can therefore 878 * use the devices_kobj for reference counting. 879 */ 880 kobject_put(&group->kobj); 881 882 ret = iommu_group_create_file(group, 883 &iommu_group_attr_reserved_regions); 884 if (ret) { 885 kobject_put(group->devices_kobj); 886 return ERR_PTR(ret); 887 } 888 889 ret = iommu_group_create_file(group, &iommu_group_attr_type); 890 if (ret) { 891 kobject_put(group->devices_kobj); 892 return ERR_PTR(ret); 893 } 894 895 pr_debug("Allocated group %d\n", group->id); 896 897 return group; 898 } 899 EXPORT_SYMBOL_GPL(iommu_group_alloc); 900 901 /** 902 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 903 * @group: the group 904 * 905 * iommu drivers can store data in the group for use when doing iommu 906 * operations. This function provides a way to retrieve it. Caller 907 * should hold a group reference. 908 */ 909 void *iommu_group_get_iommudata(struct iommu_group *group) 910 { 911 return group->iommu_data; 912 } 913 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 914 915 /** 916 * iommu_group_set_iommudata - set iommu_data for a group 917 * @group: the group 918 * @iommu_data: new data 919 * @release: release function for iommu_data 920 * 921 * iommu drivers can store data in the group for use when doing iommu 922 * operations. This function provides a way to set the data after 923 * the group has been allocated. Caller should hold a group reference. 924 */ 925 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 926 void (*release)(void *iommu_data)) 927 { 928 group->iommu_data = iommu_data; 929 group->iommu_data_release = release; 930 } 931 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 932 933 /** 934 * iommu_group_set_name - set name for a group 935 * @group: the group 936 * @name: name 937 * 938 * Allow iommu driver to set a name for a group. When set it will 939 * appear in a name attribute file under the group in sysfs. 940 */ 941 int iommu_group_set_name(struct iommu_group *group, const char *name) 942 { 943 int ret; 944 945 if (group->name) { 946 iommu_group_remove_file(group, &iommu_group_attr_name); 947 kfree(group->name); 948 group->name = NULL; 949 if (!name) 950 return 0; 951 } 952 953 group->name = kstrdup(name, GFP_KERNEL); 954 if (!group->name) 955 return -ENOMEM; 956 957 ret = iommu_group_create_file(group, &iommu_group_attr_name); 958 if (ret) { 959 kfree(group->name); 960 group->name = NULL; 961 return ret; 962 } 963 964 return 0; 965 } 966 EXPORT_SYMBOL_GPL(iommu_group_set_name); 967 968 static int iommu_create_device_direct_mappings(struct iommu_group *group, 969 struct device *dev) 970 { 971 struct iommu_domain *domain = group->default_domain; 972 struct iommu_resv_region *entry; 973 struct list_head mappings; 974 unsigned long pg_size; 975 int ret = 0; 976 977 if (!domain || !iommu_is_dma_domain(domain)) 978 return 0; 979 980 BUG_ON(!domain->pgsize_bitmap); 981 982 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 983 INIT_LIST_HEAD(&mappings); 984 985 iommu_get_resv_regions(dev, &mappings); 986 987 /* We need to consider overlapping regions for different devices */ 988 list_for_each_entry(entry, &mappings, list) { 989 dma_addr_t start, end, addr; 990 size_t map_size = 0; 991 992 start = ALIGN(entry->start, pg_size); 993 end = ALIGN(entry->start + entry->length, pg_size); 994 995 if (entry->type != IOMMU_RESV_DIRECT && 996 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 997 continue; 998 999 for (addr = start; addr <= end; addr += pg_size) { 1000 phys_addr_t phys_addr; 1001 1002 if (addr == end) 1003 goto map_end; 1004 1005 phys_addr = iommu_iova_to_phys(domain, addr); 1006 if (!phys_addr) { 1007 map_size += pg_size; 1008 continue; 1009 } 1010 1011 map_end: 1012 if (map_size) { 1013 ret = iommu_map(domain, addr - map_size, 1014 addr - map_size, map_size, 1015 entry->prot, GFP_KERNEL); 1016 if (ret) 1017 goto out; 1018 map_size = 0; 1019 } 1020 } 1021 1022 } 1023 1024 iommu_flush_iotlb_all(domain); 1025 1026 out: 1027 iommu_put_resv_regions(dev, &mappings); 1028 1029 return ret; 1030 } 1031 1032 /** 1033 * iommu_group_add_device - add a device to an iommu group 1034 * @group: the group into which to add the device (reference should be held) 1035 * @dev: the device 1036 * 1037 * This function is called by an iommu driver to add a device into a 1038 * group. Adding a device increments the group reference count. 1039 */ 1040 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1041 { 1042 int ret, i = 0; 1043 struct group_device *device; 1044 1045 device = kzalloc(sizeof(*device), GFP_KERNEL); 1046 if (!device) 1047 return -ENOMEM; 1048 1049 device->dev = dev; 1050 1051 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1052 if (ret) 1053 goto err_free_device; 1054 1055 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1056 rename: 1057 if (!device->name) { 1058 ret = -ENOMEM; 1059 goto err_remove_link; 1060 } 1061 1062 ret = sysfs_create_link_nowarn(group->devices_kobj, 1063 &dev->kobj, device->name); 1064 if (ret) { 1065 if (ret == -EEXIST && i >= 0) { 1066 /* 1067 * Account for the slim chance of collision 1068 * and append an instance to the name. 1069 */ 1070 kfree(device->name); 1071 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1072 kobject_name(&dev->kobj), i++); 1073 goto rename; 1074 } 1075 goto err_free_name; 1076 } 1077 1078 kobject_get(group->devices_kobj); 1079 1080 dev->iommu_group = group; 1081 1082 mutex_lock(&group->mutex); 1083 list_add_tail(&device->list, &group->devices); 1084 mutex_unlock(&group->mutex); 1085 trace_add_device_to_group(group->id, dev); 1086 1087 dev_info(dev, "Adding to iommu group %d\n", group->id); 1088 1089 return 0; 1090 1091 err_free_name: 1092 kfree(device->name); 1093 err_remove_link: 1094 sysfs_remove_link(&dev->kobj, "iommu_group"); 1095 err_free_device: 1096 kfree(device); 1097 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1098 return ret; 1099 } 1100 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1101 1102 /** 1103 * iommu_group_remove_device - remove a device from it's current group 1104 * @dev: device to be removed 1105 * 1106 * This function is called by an iommu driver to remove the device from 1107 * it's current group. This decrements the iommu group reference count. 1108 */ 1109 void iommu_group_remove_device(struct device *dev) 1110 { 1111 struct iommu_group *group = dev->iommu_group; 1112 struct group_device *device; 1113 1114 if (!group) 1115 return; 1116 1117 dev_info(dev, "Removing from iommu group %d\n", group->id); 1118 1119 mutex_lock(&group->mutex); 1120 device = __iommu_group_remove_device(group, dev); 1121 mutex_unlock(&group->mutex); 1122 1123 if (device) 1124 __iommu_group_release_device(group, device); 1125 } 1126 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1127 1128 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1129 int (*fn)(struct device *, void *)) 1130 { 1131 struct group_device *device; 1132 int ret = 0; 1133 1134 for_each_group_device(group, device) { 1135 ret = fn(device->dev, data); 1136 if (ret) 1137 break; 1138 } 1139 return ret; 1140 } 1141 1142 /** 1143 * iommu_group_for_each_dev - iterate over each device in the group 1144 * @group: the group 1145 * @data: caller opaque data to be passed to callback function 1146 * @fn: caller supplied callback function 1147 * 1148 * This function is called by group users to iterate over group devices. 1149 * Callers should hold a reference count to the group during callback. 1150 * The group->mutex is held across callbacks, which will block calls to 1151 * iommu_group_add/remove_device. 1152 */ 1153 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1154 int (*fn)(struct device *, void *)) 1155 { 1156 int ret; 1157 1158 mutex_lock(&group->mutex); 1159 ret = __iommu_group_for_each_dev(group, data, fn); 1160 mutex_unlock(&group->mutex); 1161 1162 return ret; 1163 } 1164 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1165 1166 /** 1167 * iommu_group_get - Return the group for a device and increment reference 1168 * @dev: get the group that this device belongs to 1169 * 1170 * This function is called by iommu drivers and users to get the group 1171 * for the specified device. If found, the group is returned and the group 1172 * reference in incremented, else NULL. 1173 */ 1174 struct iommu_group *iommu_group_get(struct device *dev) 1175 { 1176 struct iommu_group *group = dev->iommu_group; 1177 1178 if (group) 1179 kobject_get(group->devices_kobj); 1180 1181 return group; 1182 } 1183 EXPORT_SYMBOL_GPL(iommu_group_get); 1184 1185 /** 1186 * iommu_group_ref_get - Increment reference on a group 1187 * @group: the group to use, must not be NULL 1188 * 1189 * This function is called by iommu drivers to take additional references on an 1190 * existing group. Returns the given group for convenience. 1191 */ 1192 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1193 { 1194 kobject_get(group->devices_kobj); 1195 return group; 1196 } 1197 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1198 1199 /** 1200 * iommu_group_put - Decrement group reference 1201 * @group: the group to use 1202 * 1203 * This function is called by iommu drivers and users to release the 1204 * iommu group. Once the reference count is zero, the group is released. 1205 */ 1206 void iommu_group_put(struct iommu_group *group) 1207 { 1208 if (group) 1209 kobject_put(group->devices_kobj); 1210 } 1211 EXPORT_SYMBOL_GPL(iommu_group_put); 1212 1213 /** 1214 * iommu_register_device_fault_handler() - Register a device fault handler 1215 * @dev: the device 1216 * @handler: the fault handler 1217 * @data: private data passed as argument to the handler 1218 * 1219 * When an IOMMU fault event is received, this handler gets called with the 1220 * fault event and data as argument. The handler should return 0 on success. If 1221 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1222 * complete the fault by calling iommu_page_response() with one of the following 1223 * response code: 1224 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1225 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1226 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1227 * page faults if possible. 1228 * 1229 * Return 0 if the fault handler was installed successfully, or an error. 1230 */ 1231 int iommu_register_device_fault_handler(struct device *dev, 1232 iommu_dev_fault_handler_t handler, 1233 void *data) 1234 { 1235 struct dev_iommu *param = dev->iommu; 1236 int ret = 0; 1237 1238 if (!param) 1239 return -EINVAL; 1240 1241 mutex_lock(¶m->lock); 1242 /* Only allow one fault handler registered for each device */ 1243 if (param->fault_param) { 1244 ret = -EBUSY; 1245 goto done_unlock; 1246 } 1247 1248 get_device(dev); 1249 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1250 if (!param->fault_param) { 1251 put_device(dev); 1252 ret = -ENOMEM; 1253 goto done_unlock; 1254 } 1255 param->fault_param->handler = handler; 1256 param->fault_param->data = data; 1257 mutex_init(¶m->fault_param->lock); 1258 INIT_LIST_HEAD(¶m->fault_param->faults); 1259 1260 done_unlock: 1261 mutex_unlock(¶m->lock); 1262 1263 return ret; 1264 } 1265 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1266 1267 /** 1268 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1269 * @dev: the device 1270 * 1271 * Remove the device fault handler installed with 1272 * iommu_register_device_fault_handler(). 1273 * 1274 * Return 0 on success, or an error. 1275 */ 1276 int iommu_unregister_device_fault_handler(struct device *dev) 1277 { 1278 struct dev_iommu *param = dev->iommu; 1279 int ret = 0; 1280 1281 if (!param) 1282 return -EINVAL; 1283 1284 mutex_lock(¶m->lock); 1285 1286 if (!param->fault_param) 1287 goto unlock; 1288 1289 /* we cannot unregister handler if there are pending faults */ 1290 if (!list_empty(¶m->fault_param->faults)) { 1291 ret = -EBUSY; 1292 goto unlock; 1293 } 1294 1295 kfree(param->fault_param); 1296 param->fault_param = NULL; 1297 put_device(dev); 1298 unlock: 1299 mutex_unlock(¶m->lock); 1300 1301 return ret; 1302 } 1303 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1304 1305 /** 1306 * iommu_report_device_fault() - Report fault event to device driver 1307 * @dev: the device 1308 * @evt: fault event data 1309 * 1310 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1311 * handler. When this function fails and the fault is recoverable, it is the 1312 * caller's responsibility to complete the fault. 1313 * 1314 * Return 0 on success, or an error. 1315 */ 1316 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1317 { 1318 struct dev_iommu *param = dev->iommu; 1319 struct iommu_fault_event *evt_pending = NULL; 1320 struct iommu_fault_param *fparam; 1321 int ret = 0; 1322 1323 if (!param || !evt) 1324 return -EINVAL; 1325 1326 /* we only report device fault if there is a handler registered */ 1327 mutex_lock(¶m->lock); 1328 fparam = param->fault_param; 1329 if (!fparam || !fparam->handler) { 1330 ret = -EINVAL; 1331 goto done_unlock; 1332 } 1333 1334 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1335 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1336 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1337 GFP_KERNEL); 1338 if (!evt_pending) { 1339 ret = -ENOMEM; 1340 goto done_unlock; 1341 } 1342 mutex_lock(&fparam->lock); 1343 list_add_tail(&evt_pending->list, &fparam->faults); 1344 mutex_unlock(&fparam->lock); 1345 } 1346 1347 ret = fparam->handler(&evt->fault, fparam->data); 1348 if (ret && evt_pending) { 1349 mutex_lock(&fparam->lock); 1350 list_del(&evt_pending->list); 1351 mutex_unlock(&fparam->lock); 1352 kfree(evt_pending); 1353 } 1354 done_unlock: 1355 mutex_unlock(¶m->lock); 1356 return ret; 1357 } 1358 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1359 1360 int iommu_page_response(struct device *dev, 1361 struct iommu_page_response *msg) 1362 { 1363 bool needs_pasid; 1364 int ret = -EINVAL; 1365 struct iommu_fault_event *evt; 1366 struct iommu_fault_page_request *prm; 1367 struct dev_iommu *param = dev->iommu; 1368 const struct iommu_ops *ops = dev_iommu_ops(dev); 1369 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1370 1371 if (!ops->page_response) 1372 return -ENODEV; 1373 1374 if (!param || !param->fault_param) 1375 return -EINVAL; 1376 1377 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1378 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1379 return -EINVAL; 1380 1381 /* Only send response if there is a fault report pending */ 1382 mutex_lock(¶m->fault_param->lock); 1383 if (list_empty(¶m->fault_param->faults)) { 1384 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1385 goto done_unlock; 1386 } 1387 /* 1388 * Check if we have a matching page request pending to respond, 1389 * otherwise return -EINVAL 1390 */ 1391 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1392 prm = &evt->fault.prm; 1393 if (prm->grpid != msg->grpid) 1394 continue; 1395 1396 /* 1397 * If the PASID is required, the corresponding request is 1398 * matched using the group ID, the PASID valid bit and the PASID 1399 * value. Otherwise only the group ID matches request and 1400 * response. 1401 */ 1402 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1403 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1404 continue; 1405 1406 if (!needs_pasid && has_pasid) { 1407 /* No big deal, just clear it. */ 1408 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1409 msg->pasid = 0; 1410 } 1411 1412 ret = ops->page_response(dev, evt, msg); 1413 list_del(&evt->list); 1414 kfree(evt); 1415 break; 1416 } 1417 1418 done_unlock: 1419 mutex_unlock(¶m->fault_param->lock); 1420 return ret; 1421 } 1422 EXPORT_SYMBOL_GPL(iommu_page_response); 1423 1424 /** 1425 * iommu_group_id - Return ID for a group 1426 * @group: the group to ID 1427 * 1428 * Return the unique ID for the group matching the sysfs group number. 1429 */ 1430 int iommu_group_id(struct iommu_group *group) 1431 { 1432 return group->id; 1433 } 1434 EXPORT_SYMBOL_GPL(iommu_group_id); 1435 1436 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1437 unsigned long *devfns); 1438 1439 /* 1440 * To consider a PCI device isolated, we require ACS to support Source 1441 * Validation, Request Redirection, Completer Redirection, and Upstream 1442 * Forwarding. This effectively means that devices cannot spoof their 1443 * requester ID, requests and completions cannot be redirected, and all 1444 * transactions are forwarded upstream, even as it passes through a 1445 * bridge where the target device is downstream. 1446 */ 1447 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1448 1449 /* 1450 * For multifunction devices which are not isolated from each other, find 1451 * all the other non-isolated functions and look for existing groups. For 1452 * each function, we also need to look for aliases to or from other devices 1453 * that may already have a group. 1454 */ 1455 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1456 unsigned long *devfns) 1457 { 1458 struct pci_dev *tmp = NULL; 1459 struct iommu_group *group; 1460 1461 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1462 return NULL; 1463 1464 for_each_pci_dev(tmp) { 1465 if (tmp == pdev || tmp->bus != pdev->bus || 1466 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1467 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1468 continue; 1469 1470 group = get_pci_alias_group(tmp, devfns); 1471 if (group) { 1472 pci_dev_put(tmp); 1473 return group; 1474 } 1475 } 1476 1477 return NULL; 1478 } 1479 1480 /* 1481 * Look for aliases to or from the given device for existing groups. DMA 1482 * aliases are only supported on the same bus, therefore the search 1483 * space is quite small (especially since we're really only looking at pcie 1484 * device, and therefore only expect multiple slots on the root complex or 1485 * downstream switch ports). It's conceivable though that a pair of 1486 * multifunction devices could have aliases between them that would cause a 1487 * loop. To prevent this, we use a bitmap to track where we've been. 1488 */ 1489 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1490 unsigned long *devfns) 1491 { 1492 struct pci_dev *tmp = NULL; 1493 struct iommu_group *group; 1494 1495 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1496 return NULL; 1497 1498 group = iommu_group_get(&pdev->dev); 1499 if (group) 1500 return group; 1501 1502 for_each_pci_dev(tmp) { 1503 if (tmp == pdev || tmp->bus != pdev->bus) 1504 continue; 1505 1506 /* We alias them or they alias us */ 1507 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1508 group = get_pci_alias_group(tmp, devfns); 1509 if (group) { 1510 pci_dev_put(tmp); 1511 return group; 1512 } 1513 1514 group = get_pci_function_alias_group(tmp, devfns); 1515 if (group) { 1516 pci_dev_put(tmp); 1517 return group; 1518 } 1519 } 1520 } 1521 1522 return NULL; 1523 } 1524 1525 struct group_for_pci_data { 1526 struct pci_dev *pdev; 1527 struct iommu_group *group; 1528 }; 1529 1530 /* 1531 * DMA alias iterator callback, return the last seen device. Stop and return 1532 * the IOMMU group if we find one along the way. 1533 */ 1534 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1535 { 1536 struct group_for_pci_data *data = opaque; 1537 1538 data->pdev = pdev; 1539 data->group = iommu_group_get(&pdev->dev); 1540 1541 return data->group != NULL; 1542 } 1543 1544 /* 1545 * Generic device_group call-back function. It just allocates one 1546 * iommu-group per device. 1547 */ 1548 struct iommu_group *generic_device_group(struct device *dev) 1549 { 1550 return iommu_group_alloc(); 1551 } 1552 EXPORT_SYMBOL_GPL(generic_device_group); 1553 1554 /* 1555 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1556 * to find or create an IOMMU group for a device. 1557 */ 1558 struct iommu_group *pci_device_group(struct device *dev) 1559 { 1560 struct pci_dev *pdev = to_pci_dev(dev); 1561 struct group_for_pci_data data; 1562 struct pci_bus *bus; 1563 struct iommu_group *group = NULL; 1564 u64 devfns[4] = { 0 }; 1565 1566 if (WARN_ON(!dev_is_pci(dev))) 1567 return ERR_PTR(-EINVAL); 1568 1569 /* 1570 * Find the upstream DMA alias for the device. A device must not 1571 * be aliased due to topology in order to have its own IOMMU group. 1572 * If we find an alias along the way that already belongs to a 1573 * group, use it. 1574 */ 1575 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1576 return data.group; 1577 1578 pdev = data.pdev; 1579 1580 /* 1581 * Continue upstream from the point of minimum IOMMU granularity 1582 * due to aliases to the point where devices are protected from 1583 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1584 * group, use it. 1585 */ 1586 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1587 if (!bus->self) 1588 continue; 1589 1590 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1591 break; 1592 1593 pdev = bus->self; 1594 1595 group = iommu_group_get(&pdev->dev); 1596 if (group) 1597 return group; 1598 } 1599 1600 /* 1601 * Look for existing groups on device aliases. If we alias another 1602 * device or another device aliases us, use the same group. 1603 */ 1604 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1605 if (group) 1606 return group; 1607 1608 /* 1609 * Look for existing groups on non-isolated functions on the same 1610 * slot and aliases of those funcions, if any. No need to clear 1611 * the search bitmap, the tested devfns are still valid. 1612 */ 1613 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1614 if (group) 1615 return group; 1616 1617 /* No shared group found, allocate new */ 1618 return iommu_group_alloc(); 1619 } 1620 EXPORT_SYMBOL_GPL(pci_device_group); 1621 1622 /* Get the IOMMU group for device on fsl-mc bus */ 1623 struct iommu_group *fsl_mc_device_group(struct device *dev) 1624 { 1625 struct device *cont_dev = fsl_mc_cont_dev(dev); 1626 struct iommu_group *group; 1627 1628 group = iommu_group_get(cont_dev); 1629 if (!group) 1630 group = iommu_group_alloc(); 1631 return group; 1632 } 1633 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1634 1635 static int iommu_get_def_domain_type(struct device *dev) 1636 { 1637 const struct iommu_ops *ops = dev_iommu_ops(dev); 1638 1639 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1640 return IOMMU_DOMAIN_DMA; 1641 1642 if (ops->def_domain_type) 1643 return ops->def_domain_type(dev); 1644 1645 return 0; 1646 } 1647 1648 static int iommu_group_alloc_default_domain(const struct bus_type *bus, 1649 struct iommu_group *group, 1650 unsigned int type) 1651 { 1652 struct iommu_domain *dom; 1653 1654 dom = __iommu_domain_alloc(bus, type); 1655 if (!dom && type != IOMMU_DOMAIN_DMA) { 1656 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1657 if (dom) 1658 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1659 type, group->name); 1660 } 1661 1662 if (!dom) 1663 return -ENOMEM; 1664 1665 group->default_domain = dom; 1666 return 0; 1667 } 1668 1669 static int iommu_alloc_default_domain(struct iommu_group *group, 1670 struct device *dev) 1671 { 1672 unsigned int type; 1673 1674 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1675 1676 return iommu_group_alloc_default_domain(dev->bus, group, type); 1677 } 1678 1679 /** 1680 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1681 * @dev: target device 1682 * 1683 * This function is intended to be called by IOMMU drivers and extended to 1684 * support common, bus-defined algorithms when determining or creating the 1685 * IOMMU group for a device. On success, the caller will hold a reference 1686 * to the returned IOMMU group, which will already include the provided 1687 * device. The reference should be released with iommu_group_put(). 1688 */ 1689 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1690 { 1691 const struct iommu_ops *ops = dev_iommu_ops(dev); 1692 struct iommu_group *group; 1693 int ret; 1694 1695 group = iommu_group_get(dev); 1696 if (group) 1697 return group; 1698 1699 group = ops->device_group(dev); 1700 if (WARN_ON_ONCE(group == NULL)) 1701 return ERR_PTR(-EINVAL); 1702 1703 if (IS_ERR(group)) 1704 return group; 1705 1706 ret = iommu_group_add_device(group, dev); 1707 if (ret) 1708 goto out_put_group; 1709 1710 return group; 1711 1712 out_put_group: 1713 iommu_group_put(group); 1714 1715 return ERR_PTR(ret); 1716 } 1717 1718 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1719 { 1720 return group->default_domain; 1721 } 1722 1723 static int probe_iommu_group(struct device *dev, void *data) 1724 { 1725 struct list_head *group_list = data; 1726 struct iommu_group *group; 1727 int ret; 1728 1729 /* Device is probed already if in a group */ 1730 group = iommu_group_get(dev); 1731 if (group) { 1732 iommu_group_put(group); 1733 return 0; 1734 } 1735 1736 ret = __iommu_probe_device(dev, group_list); 1737 if (ret == -ENODEV) 1738 ret = 0; 1739 1740 return ret; 1741 } 1742 1743 static int iommu_bus_notifier(struct notifier_block *nb, 1744 unsigned long action, void *data) 1745 { 1746 struct device *dev = data; 1747 1748 if (action == BUS_NOTIFY_ADD_DEVICE) { 1749 int ret; 1750 1751 ret = iommu_probe_device(dev); 1752 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1753 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1754 iommu_release_device(dev); 1755 return NOTIFY_OK; 1756 } 1757 1758 return 0; 1759 } 1760 1761 struct __group_domain_type { 1762 struct device *dev; 1763 unsigned int type; 1764 }; 1765 1766 static int probe_get_default_domain_type(struct device *dev, void *data) 1767 { 1768 struct __group_domain_type *gtype = data; 1769 unsigned int type = iommu_get_def_domain_type(dev); 1770 1771 if (type) { 1772 if (gtype->type && gtype->type != type) { 1773 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1774 iommu_domain_type_str(type), 1775 dev_name(gtype->dev), 1776 iommu_domain_type_str(gtype->type)); 1777 gtype->type = 0; 1778 } 1779 1780 if (!gtype->dev) { 1781 gtype->dev = dev; 1782 gtype->type = type; 1783 } 1784 } 1785 1786 return 0; 1787 } 1788 1789 static void probe_alloc_default_domain(const struct bus_type *bus, 1790 struct iommu_group *group) 1791 { 1792 struct __group_domain_type gtype; 1793 1794 memset(>ype, 0, sizeof(gtype)); 1795 1796 /* Ask for default domain requirements of all devices in the group */ 1797 __iommu_group_for_each_dev(group, >ype, 1798 probe_get_default_domain_type); 1799 1800 if (!gtype.type) 1801 gtype.type = iommu_def_domain_type; 1802 1803 iommu_group_alloc_default_domain(bus, group, gtype.type); 1804 1805 } 1806 1807 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1808 { 1809 const struct iommu_ops *ops = dev_iommu_ops(dev); 1810 1811 if (ops->probe_finalize) 1812 ops->probe_finalize(dev); 1813 1814 return 0; 1815 } 1816 1817 static void __iommu_group_dma_finalize(struct iommu_group *group) 1818 { 1819 __iommu_group_for_each_dev(group, group->default_domain, 1820 iommu_group_do_probe_finalize); 1821 } 1822 1823 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1824 { 1825 struct iommu_group *group = data; 1826 1827 iommu_create_device_direct_mappings(group, dev); 1828 1829 return 0; 1830 } 1831 1832 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1833 { 1834 return __iommu_group_for_each_dev(group, group, 1835 iommu_do_create_direct_mappings); 1836 } 1837 1838 int bus_iommu_probe(const struct bus_type *bus) 1839 { 1840 struct iommu_group *group, *next; 1841 LIST_HEAD(group_list); 1842 int ret; 1843 1844 /* 1845 * This code-path does not allocate the default domain when 1846 * creating the iommu group, so do it after the groups are 1847 * created. 1848 */ 1849 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1850 if (ret) 1851 return ret; 1852 1853 list_for_each_entry_safe(group, next, &group_list, entry) { 1854 mutex_lock(&group->mutex); 1855 1856 /* Remove item from the list */ 1857 list_del_init(&group->entry); 1858 1859 /* Try to allocate default domain */ 1860 probe_alloc_default_domain(bus, group); 1861 1862 if (!group->default_domain) { 1863 mutex_unlock(&group->mutex); 1864 continue; 1865 } 1866 1867 iommu_group_create_direct_mappings(group); 1868 1869 ret = __iommu_group_set_domain(group, group->default_domain); 1870 1871 mutex_unlock(&group->mutex); 1872 1873 if (ret) 1874 break; 1875 1876 __iommu_group_dma_finalize(group); 1877 } 1878 1879 return ret; 1880 } 1881 1882 bool iommu_present(const struct bus_type *bus) 1883 { 1884 return bus->iommu_ops != NULL; 1885 } 1886 EXPORT_SYMBOL_GPL(iommu_present); 1887 1888 /** 1889 * device_iommu_capable() - check for a general IOMMU capability 1890 * @dev: device to which the capability would be relevant, if available 1891 * @cap: IOMMU capability 1892 * 1893 * Return: true if an IOMMU is present and supports the given capability 1894 * for the given device, otherwise false. 1895 */ 1896 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1897 { 1898 const struct iommu_ops *ops; 1899 1900 if (!dev->iommu || !dev->iommu->iommu_dev) 1901 return false; 1902 1903 ops = dev_iommu_ops(dev); 1904 if (!ops->capable) 1905 return false; 1906 1907 return ops->capable(dev, cap); 1908 } 1909 EXPORT_SYMBOL_GPL(device_iommu_capable); 1910 1911 /** 1912 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1913 * for a group 1914 * @group: Group to query 1915 * 1916 * IOMMU groups should not have differing values of 1917 * msi_device_has_isolated_msi() for devices in a group. However nothing 1918 * directly prevents this, so ensure mistakes don't result in isolation failures 1919 * by checking that all the devices are the same. 1920 */ 1921 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1922 { 1923 struct group_device *group_dev; 1924 bool ret = true; 1925 1926 mutex_lock(&group->mutex); 1927 for_each_group_device(group, group_dev) 1928 ret &= msi_device_has_isolated_msi(group_dev->dev); 1929 mutex_unlock(&group->mutex); 1930 return ret; 1931 } 1932 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1933 1934 /** 1935 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1936 * @domain: iommu domain 1937 * @handler: fault handler 1938 * @token: user data, will be passed back to the fault handler 1939 * 1940 * This function should be used by IOMMU users which want to be notified 1941 * whenever an IOMMU fault happens. 1942 * 1943 * The fault handler itself should return 0 on success, and an appropriate 1944 * error code otherwise. 1945 */ 1946 void iommu_set_fault_handler(struct iommu_domain *domain, 1947 iommu_fault_handler_t handler, 1948 void *token) 1949 { 1950 BUG_ON(!domain); 1951 1952 domain->handler = handler; 1953 domain->handler_token = token; 1954 } 1955 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1956 1957 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 1958 unsigned type) 1959 { 1960 struct iommu_domain *domain; 1961 unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; 1962 1963 if (bus == NULL || bus->iommu_ops == NULL) 1964 return NULL; 1965 1966 domain = bus->iommu_ops->domain_alloc(alloc_type); 1967 if (!domain) 1968 return NULL; 1969 1970 domain->type = type; 1971 /* 1972 * If not already set, assume all sizes by default; the driver 1973 * may override this later 1974 */ 1975 if (!domain->pgsize_bitmap) 1976 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1977 1978 if (!domain->ops) 1979 domain->ops = bus->iommu_ops->default_domain_ops; 1980 1981 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1982 iommu_domain_free(domain); 1983 domain = NULL; 1984 } 1985 return domain; 1986 } 1987 1988 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) 1989 { 1990 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 1991 } 1992 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 1993 1994 void iommu_domain_free(struct iommu_domain *domain) 1995 { 1996 if (domain->type == IOMMU_DOMAIN_SVA) 1997 mmdrop(domain->mm); 1998 iommu_put_dma_cookie(domain); 1999 domain->ops->free(domain); 2000 } 2001 EXPORT_SYMBOL_GPL(iommu_domain_free); 2002 2003 /* 2004 * Put the group's domain back to the appropriate core-owned domain - either the 2005 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2006 */ 2007 static void __iommu_group_set_core_domain(struct iommu_group *group) 2008 { 2009 struct iommu_domain *new_domain; 2010 2011 if (group->owner) 2012 new_domain = group->blocking_domain; 2013 else 2014 new_domain = group->default_domain; 2015 2016 __iommu_group_set_domain_nofail(group, new_domain); 2017 } 2018 2019 static int __iommu_attach_device(struct iommu_domain *domain, 2020 struct device *dev) 2021 { 2022 int ret; 2023 2024 if (unlikely(domain->ops->attach_dev == NULL)) 2025 return -ENODEV; 2026 2027 ret = domain->ops->attach_dev(domain, dev); 2028 if (ret) 2029 return ret; 2030 dev->iommu->attach_deferred = 0; 2031 trace_attach_device_to_domain(dev); 2032 return 0; 2033 } 2034 2035 /** 2036 * iommu_attach_device - Attach an IOMMU domain to a device 2037 * @domain: IOMMU domain to attach 2038 * @dev: Device that will be attached 2039 * 2040 * Returns 0 on success and error code on failure 2041 * 2042 * Note that EINVAL can be treated as a soft failure, indicating 2043 * that certain configuration of the domain is incompatible with 2044 * the device. In this case attaching a different domain to the 2045 * device may succeed. 2046 */ 2047 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2048 { 2049 struct iommu_group *group; 2050 int ret; 2051 2052 group = iommu_group_get(dev); 2053 if (!group) 2054 return -ENODEV; 2055 2056 /* 2057 * Lock the group to make sure the device-count doesn't 2058 * change while we are attaching 2059 */ 2060 mutex_lock(&group->mutex); 2061 ret = -EINVAL; 2062 if (list_count_nodes(&group->devices) != 1) 2063 goto out_unlock; 2064 2065 ret = __iommu_attach_group(domain, group); 2066 2067 out_unlock: 2068 mutex_unlock(&group->mutex); 2069 iommu_group_put(group); 2070 2071 return ret; 2072 } 2073 EXPORT_SYMBOL_GPL(iommu_attach_device); 2074 2075 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2076 { 2077 if (dev->iommu && dev->iommu->attach_deferred) 2078 return __iommu_attach_device(domain, dev); 2079 2080 return 0; 2081 } 2082 2083 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2084 { 2085 struct iommu_group *group; 2086 2087 group = iommu_group_get(dev); 2088 if (!group) 2089 return; 2090 2091 mutex_lock(&group->mutex); 2092 if (WARN_ON(domain != group->domain) || 2093 WARN_ON(list_count_nodes(&group->devices) != 1)) 2094 goto out_unlock; 2095 __iommu_group_set_core_domain(group); 2096 2097 out_unlock: 2098 mutex_unlock(&group->mutex); 2099 iommu_group_put(group); 2100 } 2101 EXPORT_SYMBOL_GPL(iommu_detach_device); 2102 2103 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2104 { 2105 struct iommu_domain *domain; 2106 struct iommu_group *group; 2107 2108 group = iommu_group_get(dev); 2109 if (!group) 2110 return NULL; 2111 2112 domain = group->domain; 2113 2114 iommu_group_put(group); 2115 2116 return domain; 2117 } 2118 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2119 2120 /* 2121 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2122 * guarantees that the group and its default domain are valid and correct. 2123 */ 2124 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2125 { 2126 return dev->iommu_group->default_domain; 2127 } 2128 2129 static int __iommu_attach_group(struct iommu_domain *domain, 2130 struct iommu_group *group) 2131 { 2132 if (group->domain && group->domain != group->default_domain && 2133 group->domain != group->blocking_domain) 2134 return -EBUSY; 2135 2136 return __iommu_group_set_domain(group, domain); 2137 } 2138 2139 /** 2140 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2141 * @domain: IOMMU domain to attach 2142 * @group: IOMMU group that will be attached 2143 * 2144 * Returns 0 on success and error code on failure 2145 * 2146 * Note that EINVAL can be treated as a soft failure, indicating 2147 * that certain configuration of the domain is incompatible with 2148 * the group. In this case attaching a different domain to the 2149 * group may succeed. 2150 */ 2151 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2152 { 2153 int ret; 2154 2155 mutex_lock(&group->mutex); 2156 ret = __iommu_attach_group(domain, group); 2157 mutex_unlock(&group->mutex); 2158 2159 return ret; 2160 } 2161 EXPORT_SYMBOL_GPL(iommu_attach_group); 2162 2163 static int __iommu_device_set_domain(struct iommu_group *group, 2164 struct device *dev, 2165 struct iommu_domain *new_domain, 2166 unsigned int flags) 2167 { 2168 int ret; 2169 2170 if (dev->iommu->attach_deferred) { 2171 if (new_domain == group->default_domain) 2172 return 0; 2173 dev->iommu->attach_deferred = 0; 2174 } 2175 2176 ret = __iommu_attach_device(new_domain, dev); 2177 if (ret) { 2178 /* 2179 * If we have a blocking domain then try to attach that in hopes 2180 * of avoiding a UAF. Modern drivers should implement blocking 2181 * domains as global statics that cannot fail. 2182 */ 2183 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2184 group->blocking_domain && 2185 group->blocking_domain != new_domain) 2186 __iommu_attach_device(group->blocking_domain, dev); 2187 return ret; 2188 } 2189 return 0; 2190 } 2191 2192 /* 2193 * If 0 is returned the group's domain is new_domain. If an error is returned 2194 * then the group's domain will be set back to the existing domain unless 2195 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2196 * domains is left inconsistent. This is a driver bug to fail attach with a 2197 * previously good domain. We try to avoid a kernel UAF because of this. 2198 * 2199 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2200 * API works on domains and devices. Bridge that gap by iterating over the 2201 * devices in a group. Ideally we'd have a single device which represents the 2202 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2203 * defined minimum sets, where the physical hardware may be able to distiguish 2204 * members, but we wish to group them at a higher level (ex. untrusted 2205 * multi-function PCI devices). Thus we attach each device. 2206 */ 2207 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2208 struct iommu_domain *new_domain, 2209 unsigned int flags) 2210 { 2211 struct group_device *last_gdev; 2212 struct group_device *gdev; 2213 int result; 2214 int ret; 2215 2216 lockdep_assert_held(&group->mutex); 2217 2218 if (group->domain == new_domain) 2219 return 0; 2220 2221 /* 2222 * New drivers should support default domains, so set_platform_dma() 2223 * op will never be called. Otherwise the NULL domain represents some 2224 * platform specific behavior. 2225 */ 2226 if (!new_domain) { 2227 for_each_group_device(group, gdev) { 2228 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2229 2230 if (!WARN_ON(!ops->set_platform_dma_ops)) 2231 ops->set_platform_dma_ops(gdev->dev); 2232 } 2233 group->domain = NULL; 2234 return 0; 2235 } 2236 2237 /* 2238 * Changing the domain is done by calling attach_dev() on the new 2239 * domain. This switch does not have to be atomic and DMA can be 2240 * discarded during the transition. DMA must only be able to access 2241 * either new_domain or group->domain, never something else. 2242 */ 2243 result = 0; 2244 for_each_group_device(group, gdev) { 2245 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2246 flags); 2247 if (ret) { 2248 result = ret; 2249 /* 2250 * Keep trying the other devices in the group. If a 2251 * driver fails attach to an otherwise good domain, and 2252 * does not support blocking domains, it should at least 2253 * drop its reference on the current domain so we don't 2254 * UAF. 2255 */ 2256 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2257 continue; 2258 goto err_revert; 2259 } 2260 } 2261 group->domain = new_domain; 2262 return result; 2263 2264 err_revert: 2265 /* 2266 * This is called in error unwind paths. A well behaved driver should 2267 * always allow us to attach to a domain that was already attached. 2268 */ 2269 last_gdev = gdev; 2270 for_each_group_device(group, gdev) { 2271 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2272 2273 /* 2274 * If set_platform_dma_ops is not present a NULL domain can 2275 * happen only for first probe, in which case we leave 2276 * group->domain as NULL and let release clean everything up. 2277 */ 2278 if (group->domain) 2279 WARN_ON(__iommu_device_set_domain( 2280 group, gdev->dev, group->domain, 2281 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2282 else if (ops->set_platform_dma_ops) 2283 ops->set_platform_dma_ops(gdev->dev); 2284 if (gdev == last_gdev) 2285 break; 2286 } 2287 return ret; 2288 } 2289 2290 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2291 { 2292 mutex_lock(&group->mutex); 2293 __iommu_group_set_core_domain(group); 2294 mutex_unlock(&group->mutex); 2295 } 2296 EXPORT_SYMBOL_GPL(iommu_detach_group); 2297 2298 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2299 { 2300 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2301 return iova; 2302 2303 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2304 return 0; 2305 2306 return domain->ops->iova_to_phys(domain, iova); 2307 } 2308 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2309 2310 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2311 phys_addr_t paddr, size_t size, size_t *count) 2312 { 2313 unsigned int pgsize_idx, pgsize_idx_next; 2314 unsigned long pgsizes; 2315 size_t offset, pgsize, pgsize_next; 2316 unsigned long addr_merge = paddr | iova; 2317 2318 /* Page sizes supported by the hardware and small enough for @size */ 2319 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2320 2321 /* Constrain the page sizes further based on the maximum alignment */ 2322 if (likely(addr_merge)) 2323 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2324 2325 /* Make sure we have at least one suitable page size */ 2326 BUG_ON(!pgsizes); 2327 2328 /* Pick the biggest page size remaining */ 2329 pgsize_idx = __fls(pgsizes); 2330 pgsize = BIT(pgsize_idx); 2331 if (!count) 2332 return pgsize; 2333 2334 /* Find the next biggest support page size, if it exists */ 2335 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2336 if (!pgsizes) 2337 goto out_set_count; 2338 2339 pgsize_idx_next = __ffs(pgsizes); 2340 pgsize_next = BIT(pgsize_idx_next); 2341 2342 /* 2343 * There's no point trying a bigger page size unless the virtual 2344 * and physical addresses are similarly offset within the larger page. 2345 */ 2346 if ((iova ^ paddr) & (pgsize_next - 1)) 2347 goto out_set_count; 2348 2349 /* Calculate the offset to the next page size alignment boundary */ 2350 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2351 2352 /* 2353 * If size is big enough to accommodate the larger page, reduce 2354 * the number of smaller pages. 2355 */ 2356 if (offset + pgsize_next <= size) 2357 size = offset; 2358 2359 out_set_count: 2360 *count = size >> pgsize_idx; 2361 return pgsize; 2362 } 2363 2364 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2365 phys_addr_t paddr, size_t size, int prot, 2366 gfp_t gfp, size_t *mapped) 2367 { 2368 const struct iommu_domain_ops *ops = domain->ops; 2369 size_t pgsize, count; 2370 int ret; 2371 2372 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2373 2374 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2375 iova, &paddr, pgsize, count); 2376 2377 if (ops->map_pages) { 2378 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2379 gfp, mapped); 2380 } else { 2381 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2382 *mapped = ret ? 0 : pgsize; 2383 } 2384 2385 return ret; 2386 } 2387 2388 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2389 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2390 { 2391 const struct iommu_domain_ops *ops = domain->ops; 2392 unsigned long orig_iova = iova; 2393 unsigned int min_pagesz; 2394 size_t orig_size = size; 2395 phys_addr_t orig_paddr = paddr; 2396 int ret = 0; 2397 2398 if (unlikely(!(ops->map || ops->map_pages) || 2399 domain->pgsize_bitmap == 0UL)) 2400 return -ENODEV; 2401 2402 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2403 return -EINVAL; 2404 2405 /* find out the minimum page size supported */ 2406 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2407 2408 /* 2409 * both the virtual address and the physical one, as well as 2410 * the size of the mapping, must be aligned (at least) to the 2411 * size of the smallest page supported by the hardware 2412 */ 2413 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2414 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2415 iova, &paddr, size, min_pagesz); 2416 return -EINVAL; 2417 } 2418 2419 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2420 2421 while (size) { 2422 size_t mapped = 0; 2423 2424 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2425 &mapped); 2426 /* 2427 * Some pages may have been mapped, even if an error occurred, 2428 * so we should account for those so they can be unmapped. 2429 */ 2430 size -= mapped; 2431 2432 if (ret) 2433 break; 2434 2435 iova += mapped; 2436 paddr += mapped; 2437 } 2438 2439 /* unroll mapping in case something went wrong */ 2440 if (ret) 2441 iommu_unmap(domain, orig_iova, orig_size - size); 2442 else 2443 trace_map(orig_iova, orig_paddr, orig_size); 2444 2445 return ret; 2446 } 2447 2448 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2449 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2450 { 2451 const struct iommu_domain_ops *ops = domain->ops; 2452 int ret; 2453 2454 might_sleep_if(gfpflags_allow_blocking(gfp)); 2455 2456 /* Discourage passing strange GFP flags */ 2457 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2458 __GFP_HIGHMEM))) 2459 return -EINVAL; 2460 2461 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2462 if (ret == 0 && ops->iotlb_sync_map) 2463 ops->iotlb_sync_map(domain, iova, size); 2464 2465 return ret; 2466 } 2467 EXPORT_SYMBOL_GPL(iommu_map); 2468 2469 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2470 unsigned long iova, size_t size, 2471 struct iommu_iotlb_gather *iotlb_gather) 2472 { 2473 const struct iommu_domain_ops *ops = domain->ops; 2474 size_t pgsize, count; 2475 2476 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2477 return ops->unmap_pages ? 2478 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2479 ops->unmap(domain, iova, pgsize, iotlb_gather); 2480 } 2481 2482 static size_t __iommu_unmap(struct iommu_domain *domain, 2483 unsigned long iova, size_t size, 2484 struct iommu_iotlb_gather *iotlb_gather) 2485 { 2486 const struct iommu_domain_ops *ops = domain->ops; 2487 size_t unmapped_page, unmapped = 0; 2488 unsigned long orig_iova = iova; 2489 unsigned int min_pagesz; 2490 2491 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2492 domain->pgsize_bitmap == 0UL)) 2493 return 0; 2494 2495 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2496 return 0; 2497 2498 /* find out the minimum page size supported */ 2499 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2500 2501 /* 2502 * The virtual address, as well as the size of the mapping, must be 2503 * aligned (at least) to the size of the smallest page supported 2504 * by the hardware 2505 */ 2506 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2507 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2508 iova, size, min_pagesz); 2509 return 0; 2510 } 2511 2512 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2513 2514 /* 2515 * Keep iterating until we either unmap 'size' bytes (or more) 2516 * or we hit an area that isn't mapped. 2517 */ 2518 while (unmapped < size) { 2519 unmapped_page = __iommu_unmap_pages(domain, iova, 2520 size - unmapped, 2521 iotlb_gather); 2522 if (!unmapped_page) 2523 break; 2524 2525 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2526 iova, unmapped_page); 2527 2528 iova += unmapped_page; 2529 unmapped += unmapped_page; 2530 } 2531 2532 trace_unmap(orig_iova, size, unmapped); 2533 return unmapped; 2534 } 2535 2536 size_t iommu_unmap(struct iommu_domain *domain, 2537 unsigned long iova, size_t size) 2538 { 2539 struct iommu_iotlb_gather iotlb_gather; 2540 size_t ret; 2541 2542 iommu_iotlb_gather_init(&iotlb_gather); 2543 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2544 iommu_iotlb_sync(domain, &iotlb_gather); 2545 2546 return ret; 2547 } 2548 EXPORT_SYMBOL_GPL(iommu_unmap); 2549 2550 size_t iommu_unmap_fast(struct iommu_domain *domain, 2551 unsigned long iova, size_t size, 2552 struct iommu_iotlb_gather *iotlb_gather) 2553 { 2554 return __iommu_unmap(domain, iova, size, iotlb_gather); 2555 } 2556 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2557 2558 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2559 struct scatterlist *sg, unsigned int nents, int prot, 2560 gfp_t gfp) 2561 { 2562 const struct iommu_domain_ops *ops = domain->ops; 2563 size_t len = 0, mapped = 0; 2564 phys_addr_t start; 2565 unsigned int i = 0; 2566 int ret; 2567 2568 might_sleep_if(gfpflags_allow_blocking(gfp)); 2569 2570 /* Discourage passing strange GFP flags */ 2571 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2572 __GFP_HIGHMEM))) 2573 return -EINVAL; 2574 2575 while (i <= nents) { 2576 phys_addr_t s_phys = sg_phys(sg); 2577 2578 if (len && s_phys != start + len) { 2579 ret = __iommu_map(domain, iova + mapped, start, 2580 len, prot, gfp); 2581 2582 if (ret) 2583 goto out_err; 2584 2585 mapped += len; 2586 len = 0; 2587 } 2588 2589 if (sg_is_dma_bus_address(sg)) 2590 goto next; 2591 2592 if (len) { 2593 len += sg->length; 2594 } else { 2595 len = sg->length; 2596 start = s_phys; 2597 } 2598 2599 next: 2600 if (++i < nents) 2601 sg = sg_next(sg); 2602 } 2603 2604 if (ops->iotlb_sync_map) 2605 ops->iotlb_sync_map(domain, iova, mapped); 2606 return mapped; 2607 2608 out_err: 2609 /* undo mappings already done */ 2610 iommu_unmap(domain, iova, mapped); 2611 2612 return ret; 2613 } 2614 EXPORT_SYMBOL_GPL(iommu_map_sg); 2615 2616 /** 2617 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2618 * @domain: the iommu domain where the fault has happened 2619 * @dev: the device where the fault has happened 2620 * @iova: the faulting address 2621 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2622 * 2623 * This function should be called by the low-level IOMMU implementations 2624 * whenever IOMMU faults happen, to allow high-level users, that are 2625 * interested in such events, to know about them. 2626 * 2627 * This event may be useful for several possible use cases: 2628 * - mere logging of the event 2629 * - dynamic TLB/PTE loading 2630 * - if restarting of the faulting device is required 2631 * 2632 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2633 * PTE/TLB loading will one day be supported, implementations will be able 2634 * to tell whether it succeeded or not according to this return value). 2635 * 2636 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2637 * (though fault handlers can also return -ENOSYS, in case they want to 2638 * elicit the default behavior of the IOMMU drivers). 2639 */ 2640 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2641 unsigned long iova, int flags) 2642 { 2643 int ret = -ENOSYS; 2644 2645 /* 2646 * if upper layers showed interest and installed a fault handler, 2647 * invoke it. 2648 */ 2649 if (domain->handler) 2650 ret = domain->handler(domain, dev, iova, flags, 2651 domain->handler_token); 2652 2653 trace_io_page_fault(dev, iova, flags); 2654 return ret; 2655 } 2656 EXPORT_SYMBOL_GPL(report_iommu_fault); 2657 2658 static int __init iommu_init(void) 2659 { 2660 iommu_group_kset = kset_create_and_add("iommu_groups", 2661 NULL, kernel_kobj); 2662 BUG_ON(!iommu_group_kset); 2663 2664 iommu_debugfs_setup(); 2665 2666 return 0; 2667 } 2668 core_initcall(iommu_init); 2669 2670 int iommu_enable_nesting(struct iommu_domain *domain) 2671 { 2672 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2673 return -EINVAL; 2674 if (!domain->ops->enable_nesting) 2675 return -EINVAL; 2676 return domain->ops->enable_nesting(domain); 2677 } 2678 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2679 2680 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2681 unsigned long quirk) 2682 { 2683 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2684 return -EINVAL; 2685 if (!domain->ops->set_pgtable_quirks) 2686 return -EINVAL; 2687 return domain->ops->set_pgtable_quirks(domain, quirk); 2688 } 2689 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2690 2691 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2692 { 2693 const struct iommu_ops *ops = dev_iommu_ops(dev); 2694 2695 if (ops->get_resv_regions) 2696 ops->get_resv_regions(dev, list); 2697 } 2698 2699 /** 2700 * iommu_put_resv_regions - release resered regions 2701 * @dev: device for which to free reserved regions 2702 * @list: reserved region list for device 2703 * 2704 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2705 */ 2706 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2707 { 2708 struct iommu_resv_region *entry, *next; 2709 2710 list_for_each_entry_safe(entry, next, list, list) { 2711 if (entry->free) 2712 entry->free(dev, entry); 2713 else 2714 kfree(entry); 2715 } 2716 } 2717 EXPORT_SYMBOL(iommu_put_resv_regions); 2718 2719 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2720 size_t length, int prot, 2721 enum iommu_resv_type type, 2722 gfp_t gfp) 2723 { 2724 struct iommu_resv_region *region; 2725 2726 region = kzalloc(sizeof(*region), gfp); 2727 if (!region) 2728 return NULL; 2729 2730 INIT_LIST_HEAD(®ion->list); 2731 region->start = start; 2732 region->length = length; 2733 region->prot = prot; 2734 region->type = type; 2735 return region; 2736 } 2737 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2738 2739 void iommu_set_default_passthrough(bool cmd_line) 2740 { 2741 if (cmd_line) 2742 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2743 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2744 } 2745 2746 void iommu_set_default_translated(bool cmd_line) 2747 { 2748 if (cmd_line) 2749 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2750 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2751 } 2752 2753 bool iommu_default_passthrough(void) 2754 { 2755 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2756 } 2757 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2758 2759 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2760 { 2761 const struct iommu_ops *ops = NULL; 2762 struct iommu_device *iommu; 2763 2764 spin_lock(&iommu_device_lock); 2765 list_for_each_entry(iommu, &iommu_device_list, list) 2766 if (iommu->fwnode == fwnode) { 2767 ops = iommu->ops; 2768 break; 2769 } 2770 spin_unlock(&iommu_device_lock); 2771 return ops; 2772 } 2773 2774 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2775 const struct iommu_ops *ops) 2776 { 2777 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2778 2779 if (fwspec) 2780 return ops == fwspec->ops ? 0 : -EINVAL; 2781 2782 if (!dev_iommu_get(dev)) 2783 return -ENOMEM; 2784 2785 /* Preallocate for the overwhelmingly common case of 1 ID */ 2786 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2787 if (!fwspec) 2788 return -ENOMEM; 2789 2790 of_node_get(to_of_node(iommu_fwnode)); 2791 fwspec->iommu_fwnode = iommu_fwnode; 2792 fwspec->ops = ops; 2793 dev_iommu_fwspec_set(dev, fwspec); 2794 return 0; 2795 } 2796 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2797 2798 void iommu_fwspec_free(struct device *dev) 2799 { 2800 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2801 2802 if (fwspec) { 2803 fwnode_handle_put(fwspec->iommu_fwnode); 2804 kfree(fwspec); 2805 dev_iommu_fwspec_set(dev, NULL); 2806 } 2807 } 2808 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2809 2810 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2811 { 2812 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2813 int i, new_num; 2814 2815 if (!fwspec) 2816 return -EINVAL; 2817 2818 new_num = fwspec->num_ids + num_ids; 2819 if (new_num > 1) { 2820 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2821 GFP_KERNEL); 2822 if (!fwspec) 2823 return -ENOMEM; 2824 2825 dev_iommu_fwspec_set(dev, fwspec); 2826 } 2827 2828 for (i = 0; i < num_ids; i++) 2829 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2830 2831 fwspec->num_ids = new_num; 2832 return 0; 2833 } 2834 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2835 2836 /* 2837 * Per device IOMMU features. 2838 */ 2839 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2840 { 2841 if (dev->iommu && dev->iommu->iommu_dev) { 2842 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2843 2844 if (ops->dev_enable_feat) 2845 return ops->dev_enable_feat(dev, feat); 2846 } 2847 2848 return -ENODEV; 2849 } 2850 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2851 2852 /* 2853 * The device drivers should do the necessary cleanups before calling this. 2854 */ 2855 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2856 { 2857 if (dev->iommu && dev->iommu->iommu_dev) { 2858 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2859 2860 if (ops->dev_disable_feat) 2861 return ops->dev_disable_feat(dev, feat); 2862 } 2863 2864 return -EBUSY; 2865 } 2866 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2867 2868 /* 2869 * Changes the default domain of an iommu group 2870 * 2871 * @group: The group for which the default domain should be changed 2872 * @dev: The first device in the group 2873 * @type: The type of the new default domain that gets associated with the group 2874 * 2875 * Returns 0 on success and error code on failure 2876 * 2877 * Note: 2878 * 1. Presently, this function is called only when user requests to change the 2879 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2880 * Please take a closer look if intended to use for other purposes. 2881 */ 2882 static int iommu_change_dev_def_domain(struct iommu_group *group, 2883 struct device *dev, int type) 2884 { 2885 struct __group_domain_type gtype = {NULL, 0}; 2886 struct iommu_domain *prev_dom; 2887 int ret; 2888 2889 lockdep_assert_held(&group->mutex); 2890 2891 prev_dom = group->default_domain; 2892 __iommu_group_for_each_dev(group, >ype, 2893 probe_get_default_domain_type); 2894 if (!type) { 2895 /* 2896 * If the user hasn't requested any specific type of domain and 2897 * if the device supports both the domains, then default to the 2898 * domain the device was booted with 2899 */ 2900 type = gtype.type ? : iommu_def_domain_type; 2901 } else if (gtype.type && type != gtype.type) { 2902 dev_err_ratelimited(dev, "Device cannot be in %s domain\n", 2903 iommu_domain_type_str(type)); 2904 return -EINVAL; 2905 } 2906 2907 /* 2908 * Switch to a new domain only if the requested domain type is different 2909 * from the existing default domain type 2910 */ 2911 if (prev_dom->type == type) 2912 return 0; 2913 2914 group->default_domain = NULL; 2915 group->domain = NULL; 2916 2917 /* Sets group->default_domain to the newly allocated domain */ 2918 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2919 if (ret) 2920 goto restore_old_domain; 2921 2922 group->domain = prev_dom; 2923 ret = iommu_create_device_direct_mappings(group, dev); 2924 if (ret) 2925 goto free_new_domain; 2926 2927 ret = __iommu_group_set_domain(group, group->default_domain); 2928 if (ret) 2929 goto free_new_domain; 2930 2931 iommu_domain_free(prev_dom); 2932 2933 return 0; 2934 2935 free_new_domain: 2936 iommu_domain_free(group->default_domain); 2937 restore_old_domain: 2938 group->default_domain = prev_dom; 2939 2940 return ret; 2941 } 2942 2943 /* 2944 * Changing the default domain through sysfs requires the users to unbind the 2945 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2946 * transition. Return failure if this isn't met. 2947 * 2948 * We need to consider the race between this and the device release path. 2949 * group->mutex is used here to guarantee that the device release path 2950 * will not be entered at the same time. 2951 */ 2952 static ssize_t iommu_group_store_type(struct iommu_group *group, 2953 const char *buf, size_t count) 2954 { 2955 struct group_device *grp_dev; 2956 struct device *dev; 2957 int ret, req_type; 2958 2959 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2960 return -EACCES; 2961 2962 if (WARN_ON(!group) || !group->default_domain) 2963 return -EINVAL; 2964 2965 if (sysfs_streq(buf, "identity")) 2966 req_type = IOMMU_DOMAIN_IDENTITY; 2967 else if (sysfs_streq(buf, "DMA")) 2968 req_type = IOMMU_DOMAIN_DMA; 2969 else if (sysfs_streq(buf, "DMA-FQ")) 2970 req_type = IOMMU_DOMAIN_DMA_FQ; 2971 else if (sysfs_streq(buf, "auto")) 2972 req_type = 0; 2973 else 2974 return -EINVAL; 2975 2976 mutex_lock(&group->mutex); 2977 /* We can bring up a flush queue without tearing down the domain. */ 2978 if (req_type == IOMMU_DOMAIN_DMA_FQ && 2979 group->default_domain->type == IOMMU_DOMAIN_DMA) { 2980 ret = iommu_dma_init_fq(group->default_domain); 2981 if (!ret) 2982 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 2983 mutex_unlock(&group->mutex); 2984 2985 return ret ?: count; 2986 } 2987 2988 /* Otherwise, ensure that device exists and no driver is bound. */ 2989 if (list_empty(&group->devices) || group->owner_cnt) { 2990 mutex_unlock(&group->mutex); 2991 return -EPERM; 2992 } 2993 2994 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2995 dev = grp_dev->dev; 2996 2997 ret = iommu_change_dev_def_domain(group, dev, req_type); 2998 2999 /* 3000 * Release the mutex here because ops->probe_finalize() call-back of 3001 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 3002 * in-turn might call back into IOMMU core code, where it tries to take 3003 * group->mutex, resulting in a deadlock. 3004 */ 3005 mutex_unlock(&group->mutex); 3006 3007 /* Make sure dma_ops is appropriatley set */ 3008 if (!ret) 3009 __iommu_group_dma_finalize(group); 3010 3011 return ret ?: count; 3012 } 3013 3014 static bool iommu_is_default_domain(struct iommu_group *group) 3015 { 3016 if (group->domain == group->default_domain) 3017 return true; 3018 3019 /* 3020 * If the default domain was set to identity and it is still an identity 3021 * domain then we consider this a pass. This happens because of 3022 * amd_iommu_init_device() replacing the default idenytity domain with an 3023 * identity domain that has a different configuration for AMDGPU. 3024 */ 3025 if (group->default_domain && 3026 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3027 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3028 return true; 3029 return false; 3030 } 3031 3032 /** 3033 * iommu_device_use_default_domain() - Device driver wants to handle device 3034 * DMA through the kernel DMA API. 3035 * @dev: The device. 3036 * 3037 * The device driver about to bind @dev wants to do DMA through the kernel 3038 * DMA API. Return 0 if it is allowed, otherwise an error. 3039 */ 3040 int iommu_device_use_default_domain(struct device *dev) 3041 { 3042 struct iommu_group *group = iommu_group_get(dev); 3043 int ret = 0; 3044 3045 if (!group) 3046 return 0; 3047 3048 mutex_lock(&group->mutex); 3049 if (group->owner_cnt) { 3050 if (group->owner || !iommu_is_default_domain(group) || 3051 !xa_empty(&group->pasid_array)) { 3052 ret = -EBUSY; 3053 goto unlock_out; 3054 } 3055 } 3056 3057 group->owner_cnt++; 3058 3059 unlock_out: 3060 mutex_unlock(&group->mutex); 3061 iommu_group_put(group); 3062 3063 return ret; 3064 } 3065 3066 /** 3067 * iommu_device_unuse_default_domain() - Device driver stops handling device 3068 * DMA through the kernel DMA API. 3069 * @dev: The device. 3070 * 3071 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3072 * It must be called after iommu_device_use_default_domain(). 3073 */ 3074 void iommu_device_unuse_default_domain(struct device *dev) 3075 { 3076 struct iommu_group *group = iommu_group_get(dev); 3077 3078 if (!group) 3079 return; 3080 3081 mutex_lock(&group->mutex); 3082 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3083 group->owner_cnt--; 3084 3085 mutex_unlock(&group->mutex); 3086 iommu_group_put(group); 3087 } 3088 3089 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3090 { 3091 struct group_device *dev = 3092 list_first_entry(&group->devices, struct group_device, list); 3093 3094 if (group->blocking_domain) 3095 return 0; 3096 3097 group->blocking_domain = 3098 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3099 if (!group->blocking_domain) { 3100 /* 3101 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3102 * create an empty domain instead. 3103 */ 3104 group->blocking_domain = __iommu_domain_alloc( 3105 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3106 if (!group->blocking_domain) 3107 return -EINVAL; 3108 } 3109 return 0; 3110 } 3111 3112 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3113 { 3114 int ret; 3115 3116 if ((group->domain && group->domain != group->default_domain) || 3117 !xa_empty(&group->pasid_array)) 3118 return -EBUSY; 3119 3120 ret = __iommu_group_alloc_blocking_domain(group); 3121 if (ret) 3122 return ret; 3123 ret = __iommu_group_set_domain(group, group->blocking_domain); 3124 if (ret) 3125 return ret; 3126 3127 group->owner = owner; 3128 group->owner_cnt++; 3129 return 0; 3130 } 3131 3132 /** 3133 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3134 * @group: The group. 3135 * @owner: Caller specified pointer. Used for exclusive ownership. 3136 * 3137 * This is to support backward compatibility for vfio which manages the dma 3138 * ownership in iommu_group level. New invocations on this interface should be 3139 * prohibited. Only a single owner may exist for a group. 3140 */ 3141 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3142 { 3143 int ret = 0; 3144 3145 if (WARN_ON(!owner)) 3146 return -EINVAL; 3147 3148 mutex_lock(&group->mutex); 3149 if (group->owner_cnt) { 3150 ret = -EPERM; 3151 goto unlock_out; 3152 } 3153 3154 ret = __iommu_take_dma_ownership(group, owner); 3155 unlock_out: 3156 mutex_unlock(&group->mutex); 3157 3158 return ret; 3159 } 3160 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3161 3162 /** 3163 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3164 * @dev: The device. 3165 * @owner: Caller specified pointer. Used for exclusive ownership. 3166 * 3167 * Claim the DMA ownership of a device. Multiple devices in the same group may 3168 * concurrently claim ownership if they present the same owner value. Returns 0 3169 * on success and error code on failure 3170 */ 3171 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3172 { 3173 struct iommu_group *group; 3174 int ret = 0; 3175 3176 if (WARN_ON(!owner)) 3177 return -EINVAL; 3178 3179 group = iommu_group_get(dev); 3180 if (!group) 3181 return -ENODEV; 3182 3183 mutex_lock(&group->mutex); 3184 if (group->owner_cnt) { 3185 if (group->owner != owner) { 3186 ret = -EPERM; 3187 goto unlock_out; 3188 } 3189 group->owner_cnt++; 3190 goto unlock_out; 3191 } 3192 3193 ret = __iommu_take_dma_ownership(group, owner); 3194 unlock_out: 3195 mutex_unlock(&group->mutex); 3196 iommu_group_put(group); 3197 3198 return ret; 3199 } 3200 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3201 3202 static void __iommu_release_dma_ownership(struct iommu_group *group) 3203 { 3204 if (WARN_ON(!group->owner_cnt || !group->owner || 3205 !xa_empty(&group->pasid_array))) 3206 return; 3207 3208 group->owner_cnt = 0; 3209 group->owner = NULL; 3210 __iommu_group_set_domain_nofail(group, group->default_domain); 3211 } 3212 3213 /** 3214 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3215 * @dev: The device 3216 * 3217 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3218 */ 3219 void iommu_group_release_dma_owner(struct iommu_group *group) 3220 { 3221 mutex_lock(&group->mutex); 3222 __iommu_release_dma_ownership(group); 3223 mutex_unlock(&group->mutex); 3224 } 3225 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3226 3227 /** 3228 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3229 * @group: The device. 3230 * 3231 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3232 */ 3233 void iommu_device_release_dma_owner(struct device *dev) 3234 { 3235 struct iommu_group *group = iommu_group_get(dev); 3236 3237 mutex_lock(&group->mutex); 3238 if (group->owner_cnt > 1) 3239 group->owner_cnt--; 3240 else 3241 __iommu_release_dma_ownership(group); 3242 mutex_unlock(&group->mutex); 3243 iommu_group_put(group); 3244 } 3245 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3246 3247 /** 3248 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3249 * @group: The group. 3250 * 3251 * This provides status query on a given group. It is racy and only for 3252 * non-binding status reporting. 3253 */ 3254 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3255 { 3256 unsigned int user; 3257 3258 mutex_lock(&group->mutex); 3259 user = group->owner_cnt; 3260 mutex_unlock(&group->mutex); 3261 3262 return user; 3263 } 3264 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3265 3266 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3267 struct iommu_group *group, ioasid_t pasid) 3268 { 3269 struct group_device *device; 3270 int ret = 0; 3271 3272 for_each_group_device(group, device) { 3273 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3274 if (ret) 3275 break; 3276 } 3277 3278 return ret; 3279 } 3280 3281 static void __iommu_remove_group_pasid(struct iommu_group *group, 3282 ioasid_t pasid) 3283 { 3284 struct group_device *device; 3285 const struct iommu_ops *ops; 3286 3287 for_each_group_device(group, device) { 3288 ops = dev_iommu_ops(device->dev); 3289 ops->remove_dev_pasid(device->dev, pasid); 3290 } 3291 } 3292 3293 /* 3294 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3295 * @domain: the iommu domain. 3296 * @dev: the attached device. 3297 * @pasid: the pasid of the device. 3298 * 3299 * Return: 0 on success, or an error. 3300 */ 3301 int iommu_attach_device_pasid(struct iommu_domain *domain, 3302 struct device *dev, ioasid_t pasid) 3303 { 3304 struct iommu_group *group; 3305 void *curr; 3306 int ret; 3307 3308 if (!domain->ops->set_dev_pasid) 3309 return -EOPNOTSUPP; 3310 3311 group = iommu_group_get(dev); 3312 if (!group) 3313 return -ENODEV; 3314 3315 mutex_lock(&group->mutex); 3316 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3317 if (curr) { 3318 ret = xa_err(curr) ? : -EBUSY; 3319 goto out_unlock; 3320 } 3321 3322 ret = __iommu_set_group_pasid(domain, group, pasid); 3323 if (ret) { 3324 __iommu_remove_group_pasid(group, pasid); 3325 xa_erase(&group->pasid_array, pasid); 3326 } 3327 out_unlock: 3328 mutex_unlock(&group->mutex); 3329 iommu_group_put(group); 3330 3331 return ret; 3332 } 3333 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3334 3335 /* 3336 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3337 * @domain: the iommu domain. 3338 * @dev: the attached device. 3339 * @pasid: the pasid of the device. 3340 * 3341 * The @domain must have been attached to @pasid of the @dev with 3342 * iommu_attach_device_pasid(). 3343 */ 3344 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3345 ioasid_t pasid) 3346 { 3347 struct iommu_group *group = iommu_group_get(dev); 3348 3349 mutex_lock(&group->mutex); 3350 __iommu_remove_group_pasid(group, pasid); 3351 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3352 mutex_unlock(&group->mutex); 3353 3354 iommu_group_put(group); 3355 } 3356 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3357 3358 /* 3359 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3360 * @dev: the queried device 3361 * @pasid: the pasid of the device 3362 * @type: matched domain type, 0 for any match 3363 * 3364 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3365 * domain attached to pasid of a device. Callers must hold a lock around this 3366 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3367 * type is being manipulated. This API does not internally resolve races with 3368 * attach/detach. 3369 * 3370 * Return: attached domain on success, NULL otherwise. 3371 */ 3372 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3373 ioasid_t pasid, 3374 unsigned int type) 3375 { 3376 struct iommu_domain *domain; 3377 struct iommu_group *group; 3378 3379 group = iommu_group_get(dev); 3380 if (!group) 3381 return NULL; 3382 3383 xa_lock(&group->pasid_array); 3384 domain = xa_load(&group->pasid_array, pasid); 3385 if (type && domain && domain->type != type) 3386 domain = ERR_PTR(-EBUSY); 3387 xa_unlock(&group->pasid_array); 3388 iommu_group_put(group); 3389 3390 return domain; 3391 } 3392 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3393 3394 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3395 struct mm_struct *mm) 3396 { 3397 const struct iommu_ops *ops = dev_iommu_ops(dev); 3398 struct iommu_domain *domain; 3399 3400 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3401 if (!domain) 3402 return NULL; 3403 3404 domain->type = IOMMU_DOMAIN_SVA; 3405 mmgrab(mm); 3406 domain->mm = mm; 3407 domain->iopf_handler = iommu_sva_handle_iopf; 3408 domain->fault_data = mm; 3409 3410 return domain; 3411 } 3412