1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <linux/cdx/cdx_bus.h> 32 #include <trace/events/iommu.h> 33 #include <linux/sched/mm.h> 34 #include <linux/msi.h> 35 36 #include "dma-iommu.h" 37 38 #include "iommu-sva.h" 39 40 static struct kset *iommu_group_kset; 41 static DEFINE_IDA(iommu_group_ida); 42 43 static unsigned int iommu_def_domain_type __read_mostly; 44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 45 static u32 iommu_cmd_line __read_mostly; 46 47 struct iommu_group { 48 struct kobject kobj; 49 struct kobject *devices_kobj; 50 struct list_head devices; 51 struct xarray pasid_array; 52 struct mutex mutex; 53 void *iommu_data; 54 void (*iommu_data_release)(void *iommu_data); 55 char *name; 56 int id; 57 struct iommu_domain *default_domain; 58 struct iommu_domain *blocking_domain; 59 struct iommu_domain *domain; 60 struct list_head entry; 61 unsigned int owner_cnt; 62 void *owner; 63 }; 64 65 struct group_device { 66 struct list_head list; 67 struct device *dev; 68 char *name; 69 }; 70 71 /* Iterate over each struct group_device in a struct iommu_group */ 72 #define for_each_group_device(group, pos) \ 73 list_for_each_entry(pos, &(group)->devices, list) 74 75 struct iommu_group_attribute { 76 struct attribute attr; 77 ssize_t (*show)(struct iommu_group *group, char *buf); 78 ssize_t (*store)(struct iommu_group *group, 79 const char *buf, size_t count); 80 }; 81 82 static const char * const iommu_group_resv_type_string[] = { 83 [IOMMU_RESV_DIRECT] = "direct", 84 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 85 [IOMMU_RESV_RESERVED] = "reserved", 86 [IOMMU_RESV_MSI] = "msi", 87 [IOMMU_RESV_SW_MSI] = "msi", 88 }; 89 90 #define IOMMU_CMD_LINE_DMA_API BIT(0) 91 #define IOMMU_CMD_LINE_STRICT BIT(1) 92 93 static int iommu_bus_notifier(struct notifier_block *nb, 94 unsigned long action, void *data); 95 static void iommu_release_device(struct device *dev); 96 static int iommu_alloc_default_domain(struct iommu_group *group, 97 struct device *dev); 98 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 99 unsigned type); 100 static int __iommu_attach_device(struct iommu_domain *domain, 101 struct device *dev); 102 static int __iommu_attach_group(struct iommu_domain *domain, 103 struct iommu_group *group); 104 105 enum { 106 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 107 }; 108 109 static int __iommu_device_set_domain(struct iommu_group *group, 110 struct device *dev, 111 struct iommu_domain *new_domain, 112 unsigned int flags); 113 static int __iommu_group_set_domain_internal(struct iommu_group *group, 114 struct iommu_domain *new_domain, 115 unsigned int flags); 116 static int __iommu_group_set_domain(struct iommu_group *group, 117 struct iommu_domain *new_domain) 118 { 119 return __iommu_group_set_domain_internal(group, new_domain, 0); 120 } 121 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 122 struct iommu_domain *new_domain) 123 { 124 WARN_ON(__iommu_group_set_domain_internal( 125 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 126 } 127 128 static int iommu_create_device_direct_mappings(struct iommu_group *group, 129 struct device *dev); 130 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 131 static ssize_t iommu_group_store_type(struct iommu_group *group, 132 const char *buf, size_t count); 133 134 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 135 struct iommu_group_attribute iommu_group_attr_##_name = \ 136 __ATTR(_name, _mode, _show, _store) 137 138 #define to_iommu_group_attr(_attr) \ 139 container_of(_attr, struct iommu_group_attribute, attr) 140 #define to_iommu_group(_kobj) \ 141 container_of(_kobj, struct iommu_group, kobj) 142 143 static LIST_HEAD(iommu_device_list); 144 static DEFINE_SPINLOCK(iommu_device_lock); 145 146 static struct bus_type * const iommu_buses[] = { 147 &platform_bus_type, 148 #ifdef CONFIG_PCI 149 &pci_bus_type, 150 #endif 151 #ifdef CONFIG_ARM_AMBA 152 &amba_bustype, 153 #endif 154 #ifdef CONFIG_FSL_MC_BUS 155 &fsl_mc_bus_type, 156 #endif 157 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 158 &host1x_context_device_bus_type, 159 #endif 160 #ifdef CONFIG_CDX_BUS 161 &cdx_bus_type, 162 #endif 163 }; 164 165 /* 166 * Use a function instead of an array here because the domain-type is a 167 * bit-field, so an array would waste memory. 168 */ 169 static const char *iommu_domain_type_str(unsigned int t) 170 { 171 switch (t) { 172 case IOMMU_DOMAIN_BLOCKED: 173 return "Blocked"; 174 case IOMMU_DOMAIN_IDENTITY: 175 return "Passthrough"; 176 case IOMMU_DOMAIN_UNMANAGED: 177 return "Unmanaged"; 178 case IOMMU_DOMAIN_DMA: 179 case IOMMU_DOMAIN_DMA_FQ: 180 return "Translated"; 181 default: 182 return "Unknown"; 183 } 184 } 185 186 static int __init iommu_subsys_init(void) 187 { 188 struct notifier_block *nb; 189 190 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 191 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 192 iommu_set_default_passthrough(false); 193 else 194 iommu_set_default_translated(false); 195 196 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 197 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 198 iommu_set_default_translated(false); 199 } 200 } 201 202 if (!iommu_default_passthrough() && !iommu_dma_strict) 203 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 204 205 pr_info("Default domain type: %s%s\n", 206 iommu_domain_type_str(iommu_def_domain_type), 207 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 208 " (set via kernel command line)" : ""); 209 210 if (!iommu_default_passthrough()) 211 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 212 iommu_dma_strict ? "strict" : "lazy", 213 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 214 " (set via kernel command line)" : ""); 215 216 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 217 if (!nb) 218 return -ENOMEM; 219 220 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 221 nb[i].notifier_call = iommu_bus_notifier; 222 bus_register_notifier(iommu_buses[i], &nb[i]); 223 } 224 225 return 0; 226 } 227 subsys_initcall(iommu_subsys_init); 228 229 static int remove_iommu_group(struct device *dev, void *data) 230 { 231 if (dev->iommu && dev->iommu->iommu_dev == data) 232 iommu_release_device(dev); 233 234 return 0; 235 } 236 237 /** 238 * iommu_device_register() - Register an IOMMU hardware instance 239 * @iommu: IOMMU handle for the instance 240 * @ops: IOMMU ops to associate with the instance 241 * @hwdev: (optional) actual instance device, used for fwnode lookup 242 * 243 * Return: 0 on success, or an error. 244 */ 245 int iommu_device_register(struct iommu_device *iommu, 246 const struct iommu_ops *ops, struct device *hwdev) 247 { 248 int err = 0; 249 250 /* We need to be able to take module references appropriately */ 251 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 252 return -EINVAL; 253 /* 254 * Temporarily enforce global restriction to a single driver. This was 255 * already the de-facto behaviour, since any possible combination of 256 * existing drivers would compete for at least the PCI or platform bus. 257 */ 258 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 259 return -EBUSY; 260 261 iommu->ops = ops; 262 if (hwdev) 263 iommu->fwnode = dev_fwnode(hwdev); 264 265 spin_lock(&iommu_device_lock); 266 list_add_tail(&iommu->list, &iommu_device_list); 267 spin_unlock(&iommu_device_lock); 268 269 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 270 iommu_buses[i]->iommu_ops = ops; 271 err = bus_iommu_probe(iommu_buses[i]); 272 } 273 if (err) 274 iommu_device_unregister(iommu); 275 return err; 276 } 277 EXPORT_SYMBOL_GPL(iommu_device_register); 278 279 void iommu_device_unregister(struct iommu_device *iommu) 280 { 281 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 282 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 283 284 spin_lock(&iommu_device_lock); 285 list_del(&iommu->list); 286 spin_unlock(&iommu_device_lock); 287 } 288 EXPORT_SYMBOL_GPL(iommu_device_unregister); 289 290 static struct dev_iommu *dev_iommu_get(struct device *dev) 291 { 292 struct dev_iommu *param = dev->iommu; 293 294 if (param) 295 return param; 296 297 param = kzalloc(sizeof(*param), GFP_KERNEL); 298 if (!param) 299 return NULL; 300 301 mutex_init(¶m->lock); 302 dev->iommu = param; 303 return param; 304 } 305 306 static void dev_iommu_free(struct device *dev) 307 { 308 struct dev_iommu *param = dev->iommu; 309 310 dev->iommu = NULL; 311 if (param->fwspec) { 312 fwnode_handle_put(param->fwspec->iommu_fwnode); 313 kfree(param->fwspec); 314 } 315 kfree(param); 316 } 317 318 static u32 dev_iommu_get_max_pasids(struct device *dev) 319 { 320 u32 max_pasids = 0, bits = 0; 321 int ret; 322 323 if (dev_is_pci(dev)) { 324 ret = pci_max_pasids(to_pci_dev(dev)); 325 if (ret > 0) 326 max_pasids = ret; 327 } else { 328 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 329 if (!ret) 330 max_pasids = 1UL << bits; 331 } 332 333 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 334 } 335 336 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 337 { 338 const struct iommu_ops *ops = dev->bus->iommu_ops; 339 struct iommu_device *iommu_dev; 340 struct iommu_group *group; 341 static DEFINE_MUTEX(iommu_probe_device_lock); 342 int ret; 343 344 if (!ops) 345 return -ENODEV; 346 /* 347 * Serialise to avoid races between IOMMU drivers registering in 348 * parallel and/or the "replay" calls from ACPI/OF code via client 349 * driver probe. Once the latter have been cleaned up we should 350 * probably be able to use device_lock() here to minimise the scope, 351 * but for now enforcing a simple global ordering is fine. 352 */ 353 mutex_lock(&iommu_probe_device_lock); 354 if (!dev_iommu_get(dev)) { 355 ret = -ENOMEM; 356 goto err_unlock; 357 } 358 359 if (!try_module_get(ops->owner)) { 360 ret = -EINVAL; 361 goto err_free; 362 } 363 364 iommu_dev = ops->probe_device(dev); 365 if (IS_ERR(iommu_dev)) { 366 ret = PTR_ERR(iommu_dev); 367 goto out_module_put; 368 } 369 370 dev->iommu->iommu_dev = iommu_dev; 371 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 372 if (ops->is_attach_deferred) 373 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 374 375 group = iommu_group_get_for_dev(dev); 376 if (IS_ERR(group)) { 377 ret = PTR_ERR(group); 378 goto out_release; 379 } 380 381 mutex_lock(&group->mutex); 382 if (group_list && !group->default_domain && list_empty(&group->entry)) 383 list_add_tail(&group->entry, group_list); 384 mutex_unlock(&group->mutex); 385 iommu_group_put(group); 386 387 mutex_unlock(&iommu_probe_device_lock); 388 iommu_device_link(iommu_dev, dev); 389 390 return 0; 391 392 out_release: 393 if (ops->release_device) 394 ops->release_device(dev); 395 396 out_module_put: 397 module_put(ops->owner); 398 399 err_free: 400 dev_iommu_free(dev); 401 402 err_unlock: 403 mutex_unlock(&iommu_probe_device_lock); 404 405 return ret; 406 } 407 408 int iommu_probe_device(struct device *dev) 409 { 410 const struct iommu_ops *ops; 411 struct iommu_group *group; 412 int ret; 413 414 ret = __iommu_probe_device(dev, NULL); 415 if (ret) 416 goto err_out; 417 418 group = iommu_group_get(dev); 419 if (!group) { 420 ret = -ENODEV; 421 goto err_release; 422 } 423 424 mutex_lock(&group->mutex); 425 426 if (group->domain) { 427 ret = __iommu_device_set_domain(group, dev, group->domain, 0); 428 } else if (!group->default_domain) { 429 /* 430 * Try to allocate a default domain - needs support from the 431 * IOMMU driver. There are still some drivers which don't 432 * support default domains, so the return value is not yet 433 * checked. 434 */ 435 iommu_alloc_default_domain(group, dev); 436 group->domain = NULL; 437 if (group->default_domain) 438 ret = __iommu_group_set_domain(group, 439 group->default_domain); 440 441 /* 442 * We assume that the iommu driver starts up the device in 443 * 'set_platform_dma_ops' mode if it does not support default 444 * domains. 445 */ 446 } 447 if (ret) 448 goto err_unlock; 449 450 iommu_create_device_direct_mappings(group, dev); 451 452 mutex_unlock(&group->mutex); 453 iommu_group_put(group); 454 455 ops = dev_iommu_ops(dev); 456 if (ops->probe_finalize) 457 ops->probe_finalize(dev); 458 459 return 0; 460 461 err_unlock: 462 mutex_unlock(&group->mutex); 463 iommu_group_put(group); 464 err_release: 465 iommu_release_device(dev); 466 467 err_out: 468 return ret; 469 470 } 471 472 /* 473 * Remove a device from a group's device list and return the group device 474 * if successful. 475 */ 476 static struct group_device * 477 __iommu_group_remove_device(struct iommu_group *group, struct device *dev) 478 { 479 struct group_device *device; 480 481 lockdep_assert_held(&group->mutex); 482 for_each_group_device(group, device) { 483 if (device->dev == dev) { 484 list_del(&device->list); 485 return device; 486 } 487 } 488 489 return NULL; 490 } 491 492 /* 493 * Release a device from its group and decrements the iommu group reference 494 * count. 495 */ 496 static void __iommu_group_release_device(struct iommu_group *group, 497 struct group_device *grp_dev) 498 { 499 struct device *dev = grp_dev->dev; 500 501 sysfs_remove_link(group->devices_kobj, grp_dev->name); 502 sysfs_remove_link(&dev->kobj, "iommu_group"); 503 504 trace_remove_device_from_group(group->id, dev); 505 506 kfree(grp_dev->name); 507 kfree(grp_dev); 508 dev->iommu_group = NULL; 509 kobject_put(group->devices_kobj); 510 } 511 512 static void iommu_release_device(struct device *dev) 513 { 514 struct iommu_group *group = dev->iommu_group; 515 struct group_device *device; 516 const struct iommu_ops *ops; 517 518 if (!dev->iommu || !group) 519 return; 520 521 iommu_device_unlink(dev->iommu->iommu_dev, dev); 522 523 mutex_lock(&group->mutex); 524 device = __iommu_group_remove_device(group, dev); 525 526 /* 527 * If the group has become empty then ownership must have been released, 528 * and the current domain must be set back to NULL or the default 529 * domain. 530 */ 531 if (list_empty(&group->devices)) 532 WARN_ON(group->owner_cnt || 533 group->domain != group->default_domain); 534 535 /* 536 * release_device() must stop using any attached domain on the device. 537 * If there are still other devices in the group they are not effected 538 * by this callback. 539 * 540 * The IOMMU driver must set the device to either an identity or 541 * blocking translation and stop using any domain pointer, as it is 542 * going to be freed. 543 */ 544 ops = dev_iommu_ops(dev); 545 if (ops->release_device) 546 ops->release_device(dev); 547 mutex_unlock(&group->mutex); 548 549 if (device) 550 __iommu_group_release_device(group, device); 551 552 module_put(ops->owner); 553 dev_iommu_free(dev); 554 } 555 556 static int __init iommu_set_def_domain_type(char *str) 557 { 558 bool pt; 559 int ret; 560 561 ret = kstrtobool(str, &pt); 562 if (ret) 563 return ret; 564 565 if (pt) 566 iommu_set_default_passthrough(true); 567 else 568 iommu_set_default_translated(true); 569 570 return 0; 571 } 572 early_param("iommu.passthrough", iommu_set_def_domain_type); 573 574 static int __init iommu_dma_setup(char *str) 575 { 576 int ret = kstrtobool(str, &iommu_dma_strict); 577 578 if (!ret) 579 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 580 return ret; 581 } 582 early_param("iommu.strict", iommu_dma_setup); 583 584 void iommu_set_dma_strict(void) 585 { 586 iommu_dma_strict = true; 587 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 588 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 589 } 590 591 static ssize_t iommu_group_attr_show(struct kobject *kobj, 592 struct attribute *__attr, char *buf) 593 { 594 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 595 struct iommu_group *group = to_iommu_group(kobj); 596 ssize_t ret = -EIO; 597 598 if (attr->show) 599 ret = attr->show(group, buf); 600 return ret; 601 } 602 603 static ssize_t iommu_group_attr_store(struct kobject *kobj, 604 struct attribute *__attr, 605 const char *buf, size_t count) 606 { 607 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 608 struct iommu_group *group = to_iommu_group(kobj); 609 ssize_t ret = -EIO; 610 611 if (attr->store) 612 ret = attr->store(group, buf, count); 613 return ret; 614 } 615 616 static const struct sysfs_ops iommu_group_sysfs_ops = { 617 .show = iommu_group_attr_show, 618 .store = iommu_group_attr_store, 619 }; 620 621 static int iommu_group_create_file(struct iommu_group *group, 622 struct iommu_group_attribute *attr) 623 { 624 return sysfs_create_file(&group->kobj, &attr->attr); 625 } 626 627 static void iommu_group_remove_file(struct iommu_group *group, 628 struct iommu_group_attribute *attr) 629 { 630 sysfs_remove_file(&group->kobj, &attr->attr); 631 } 632 633 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 634 { 635 return sysfs_emit(buf, "%s\n", group->name); 636 } 637 638 /** 639 * iommu_insert_resv_region - Insert a new region in the 640 * list of reserved regions. 641 * @new: new region to insert 642 * @regions: list of regions 643 * 644 * Elements are sorted by start address and overlapping segments 645 * of the same type are merged. 646 */ 647 static int iommu_insert_resv_region(struct iommu_resv_region *new, 648 struct list_head *regions) 649 { 650 struct iommu_resv_region *iter, *tmp, *nr, *top; 651 LIST_HEAD(stack); 652 653 nr = iommu_alloc_resv_region(new->start, new->length, 654 new->prot, new->type, GFP_KERNEL); 655 if (!nr) 656 return -ENOMEM; 657 658 /* First add the new element based on start address sorting */ 659 list_for_each_entry(iter, regions, list) { 660 if (nr->start < iter->start || 661 (nr->start == iter->start && nr->type <= iter->type)) 662 break; 663 } 664 list_add_tail(&nr->list, &iter->list); 665 666 /* Merge overlapping segments of type nr->type in @regions, if any */ 667 list_for_each_entry_safe(iter, tmp, regions, list) { 668 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 669 670 /* no merge needed on elements of different types than @new */ 671 if (iter->type != new->type) { 672 list_move_tail(&iter->list, &stack); 673 continue; 674 } 675 676 /* look for the last stack element of same type as @iter */ 677 list_for_each_entry_reverse(top, &stack, list) 678 if (top->type == iter->type) 679 goto check_overlap; 680 681 list_move_tail(&iter->list, &stack); 682 continue; 683 684 check_overlap: 685 top_end = top->start + top->length - 1; 686 687 if (iter->start > top_end + 1) { 688 list_move_tail(&iter->list, &stack); 689 } else { 690 top->length = max(top_end, iter_end) - top->start + 1; 691 list_del(&iter->list); 692 kfree(iter); 693 } 694 } 695 list_splice(&stack, regions); 696 return 0; 697 } 698 699 static int 700 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 701 struct list_head *group_resv_regions) 702 { 703 struct iommu_resv_region *entry; 704 int ret = 0; 705 706 list_for_each_entry(entry, dev_resv_regions, list) { 707 ret = iommu_insert_resv_region(entry, group_resv_regions); 708 if (ret) 709 break; 710 } 711 return ret; 712 } 713 714 int iommu_get_group_resv_regions(struct iommu_group *group, 715 struct list_head *head) 716 { 717 struct group_device *device; 718 int ret = 0; 719 720 mutex_lock(&group->mutex); 721 for_each_group_device(group, device) { 722 struct list_head dev_resv_regions; 723 724 /* 725 * Non-API groups still expose reserved_regions in sysfs, 726 * so filter out calls that get here that way. 727 */ 728 if (!device->dev->iommu) 729 break; 730 731 INIT_LIST_HEAD(&dev_resv_regions); 732 iommu_get_resv_regions(device->dev, &dev_resv_regions); 733 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 734 iommu_put_resv_regions(device->dev, &dev_resv_regions); 735 if (ret) 736 break; 737 } 738 mutex_unlock(&group->mutex); 739 return ret; 740 } 741 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 742 743 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 744 char *buf) 745 { 746 struct iommu_resv_region *region, *next; 747 struct list_head group_resv_regions; 748 int offset = 0; 749 750 INIT_LIST_HEAD(&group_resv_regions); 751 iommu_get_group_resv_regions(group, &group_resv_regions); 752 753 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 754 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 755 (long long)region->start, 756 (long long)(region->start + 757 region->length - 1), 758 iommu_group_resv_type_string[region->type]); 759 kfree(region); 760 } 761 762 return offset; 763 } 764 765 static ssize_t iommu_group_show_type(struct iommu_group *group, 766 char *buf) 767 { 768 char *type = "unknown"; 769 770 mutex_lock(&group->mutex); 771 if (group->default_domain) { 772 switch (group->default_domain->type) { 773 case IOMMU_DOMAIN_BLOCKED: 774 type = "blocked"; 775 break; 776 case IOMMU_DOMAIN_IDENTITY: 777 type = "identity"; 778 break; 779 case IOMMU_DOMAIN_UNMANAGED: 780 type = "unmanaged"; 781 break; 782 case IOMMU_DOMAIN_DMA: 783 type = "DMA"; 784 break; 785 case IOMMU_DOMAIN_DMA_FQ: 786 type = "DMA-FQ"; 787 break; 788 } 789 } 790 mutex_unlock(&group->mutex); 791 792 return sysfs_emit(buf, "%s\n", type); 793 } 794 795 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 796 797 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 798 iommu_group_show_resv_regions, NULL); 799 800 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 801 iommu_group_store_type); 802 803 static void iommu_group_release(struct kobject *kobj) 804 { 805 struct iommu_group *group = to_iommu_group(kobj); 806 807 pr_debug("Releasing group %d\n", group->id); 808 809 if (group->iommu_data_release) 810 group->iommu_data_release(group->iommu_data); 811 812 ida_free(&iommu_group_ida, group->id); 813 814 if (group->default_domain) 815 iommu_domain_free(group->default_domain); 816 if (group->blocking_domain) 817 iommu_domain_free(group->blocking_domain); 818 819 kfree(group->name); 820 kfree(group); 821 } 822 823 static const struct kobj_type iommu_group_ktype = { 824 .sysfs_ops = &iommu_group_sysfs_ops, 825 .release = iommu_group_release, 826 }; 827 828 /** 829 * iommu_group_alloc - Allocate a new group 830 * 831 * This function is called by an iommu driver to allocate a new iommu 832 * group. The iommu group represents the minimum granularity of the iommu. 833 * Upon successful return, the caller holds a reference to the supplied 834 * group in order to hold the group until devices are added. Use 835 * iommu_group_put() to release this extra reference count, allowing the 836 * group to be automatically reclaimed once it has no devices or external 837 * references. 838 */ 839 struct iommu_group *iommu_group_alloc(void) 840 { 841 struct iommu_group *group; 842 int ret; 843 844 group = kzalloc(sizeof(*group), GFP_KERNEL); 845 if (!group) 846 return ERR_PTR(-ENOMEM); 847 848 group->kobj.kset = iommu_group_kset; 849 mutex_init(&group->mutex); 850 INIT_LIST_HEAD(&group->devices); 851 INIT_LIST_HEAD(&group->entry); 852 xa_init(&group->pasid_array); 853 854 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 855 if (ret < 0) { 856 kfree(group); 857 return ERR_PTR(ret); 858 } 859 group->id = ret; 860 861 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 862 NULL, "%d", group->id); 863 if (ret) { 864 kobject_put(&group->kobj); 865 return ERR_PTR(ret); 866 } 867 868 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 869 if (!group->devices_kobj) { 870 kobject_put(&group->kobj); /* triggers .release & free */ 871 return ERR_PTR(-ENOMEM); 872 } 873 874 /* 875 * The devices_kobj holds a reference on the group kobject, so 876 * as long as that exists so will the group. We can therefore 877 * use the devices_kobj for reference counting. 878 */ 879 kobject_put(&group->kobj); 880 881 ret = iommu_group_create_file(group, 882 &iommu_group_attr_reserved_regions); 883 if (ret) { 884 kobject_put(group->devices_kobj); 885 return ERR_PTR(ret); 886 } 887 888 ret = iommu_group_create_file(group, &iommu_group_attr_type); 889 if (ret) { 890 kobject_put(group->devices_kobj); 891 return ERR_PTR(ret); 892 } 893 894 pr_debug("Allocated group %d\n", group->id); 895 896 return group; 897 } 898 EXPORT_SYMBOL_GPL(iommu_group_alloc); 899 900 /** 901 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 902 * @group: the group 903 * 904 * iommu drivers can store data in the group for use when doing iommu 905 * operations. This function provides a way to retrieve it. Caller 906 * should hold a group reference. 907 */ 908 void *iommu_group_get_iommudata(struct iommu_group *group) 909 { 910 return group->iommu_data; 911 } 912 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 913 914 /** 915 * iommu_group_set_iommudata - set iommu_data for a group 916 * @group: the group 917 * @iommu_data: new data 918 * @release: release function for iommu_data 919 * 920 * iommu drivers can store data in the group for use when doing iommu 921 * operations. This function provides a way to set the data after 922 * the group has been allocated. Caller should hold a group reference. 923 */ 924 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 925 void (*release)(void *iommu_data)) 926 { 927 group->iommu_data = iommu_data; 928 group->iommu_data_release = release; 929 } 930 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 931 932 /** 933 * iommu_group_set_name - set name for a group 934 * @group: the group 935 * @name: name 936 * 937 * Allow iommu driver to set a name for a group. When set it will 938 * appear in a name attribute file under the group in sysfs. 939 */ 940 int iommu_group_set_name(struct iommu_group *group, const char *name) 941 { 942 int ret; 943 944 if (group->name) { 945 iommu_group_remove_file(group, &iommu_group_attr_name); 946 kfree(group->name); 947 group->name = NULL; 948 if (!name) 949 return 0; 950 } 951 952 group->name = kstrdup(name, GFP_KERNEL); 953 if (!group->name) 954 return -ENOMEM; 955 956 ret = iommu_group_create_file(group, &iommu_group_attr_name); 957 if (ret) { 958 kfree(group->name); 959 group->name = NULL; 960 return ret; 961 } 962 963 return 0; 964 } 965 EXPORT_SYMBOL_GPL(iommu_group_set_name); 966 967 static int iommu_create_device_direct_mappings(struct iommu_group *group, 968 struct device *dev) 969 { 970 struct iommu_domain *domain = group->default_domain; 971 struct iommu_resv_region *entry; 972 struct list_head mappings; 973 unsigned long pg_size; 974 int ret = 0; 975 976 if (!domain || !iommu_is_dma_domain(domain)) 977 return 0; 978 979 BUG_ON(!domain->pgsize_bitmap); 980 981 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 982 INIT_LIST_HEAD(&mappings); 983 984 iommu_get_resv_regions(dev, &mappings); 985 986 /* We need to consider overlapping regions for different devices */ 987 list_for_each_entry(entry, &mappings, list) { 988 dma_addr_t start, end, addr; 989 size_t map_size = 0; 990 991 start = ALIGN(entry->start, pg_size); 992 end = ALIGN(entry->start + entry->length, pg_size); 993 994 if (entry->type != IOMMU_RESV_DIRECT && 995 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 996 continue; 997 998 for (addr = start; addr <= end; addr += pg_size) { 999 phys_addr_t phys_addr; 1000 1001 if (addr == end) 1002 goto map_end; 1003 1004 phys_addr = iommu_iova_to_phys(domain, addr); 1005 if (!phys_addr) { 1006 map_size += pg_size; 1007 continue; 1008 } 1009 1010 map_end: 1011 if (map_size) { 1012 ret = iommu_map(domain, addr - map_size, 1013 addr - map_size, map_size, 1014 entry->prot, GFP_KERNEL); 1015 if (ret) 1016 goto out; 1017 map_size = 0; 1018 } 1019 } 1020 1021 } 1022 1023 iommu_flush_iotlb_all(domain); 1024 1025 out: 1026 iommu_put_resv_regions(dev, &mappings); 1027 1028 return ret; 1029 } 1030 1031 /** 1032 * iommu_group_add_device - add a device to an iommu group 1033 * @group: the group into which to add the device (reference should be held) 1034 * @dev: the device 1035 * 1036 * This function is called by an iommu driver to add a device into a 1037 * group. Adding a device increments the group reference count. 1038 */ 1039 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1040 { 1041 int ret, i = 0; 1042 struct group_device *device; 1043 1044 device = kzalloc(sizeof(*device), GFP_KERNEL); 1045 if (!device) 1046 return -ENOMEM; 1047 1048 device->dev = dev; 1049 1050 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1051 if (ret) 1052 goto err_free_device; 1053 1054 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1055 rename: 1056 if (!device->name) { 1057 ret = -ENOMEM; 1058 goto err_remove_link; 1059 } 1060 1061 ret = sysfs_create_link_nowarn(group->devices_kobj, 1062 &dev->kobj, device->name); 1063 if (ret) { 1064 if (ret == -EEXIST && i >= 0) { 1065 /* 1066 * Account for the slim chance of collision 1067 * and append an instance to the name. 1068 */ 1069 kfree(device->name); 1070 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1071 kobject_name(&dev->kobj), i++); 1072 goto rename; 1073 } 1074 goto err_free_name; 1075 } 1076 1077 kobject_get(group->devices_kobj); 1078 1079 dev->iommu_group = group; 1080 1081 mutex_lock(&group->mutex); 1082 list_add_tail(&device->list, &group->devices); 1083 mutex_unlock(&group->mutex); 1084 trace_add_device_to_group(group->id, dev); 1085 1086 dev_info(dev, "Adding to iommu group %d\n", group->id); 1087 1088 return 0; 1089 1090 err_free_name: 1091 kfree(device->name); 1092 err_remove_link: 1093 sysfs_remove_link(&dev->kobj, "iommu_group"); 1094 err_free_device: 1095 kfree(device); 1096 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1097 return ret; 1098 } 1099 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1100 1101 /** 1102 * iommu_group_remove_device - remove a device from it's current group 1103 * @dev: device to be removed 1104 * 1105 * This function is called by an iommu driver to remove the device from 1106 * it's current group. This decrements the iommu group reference count. 1107 */ 1108 void iommu_group_remove_device(struct device *dev) 1109 { 1110 struct iommu_group *group = dev->iommu_group; 1111 struct group_device *device; 1112 1113 if (!group) 1114 return; 1115 1116 dev_info(dev, "Removing from iommu group %d\n", group->id); 1117 1118 mutex_lock(&group->mutex); 1119 device = __iommu_group_remove_device(group, dev); 1120 mutex_unlock(&group->mutex); 1121 1122 if (device) 1123 __iommu_group_release_device(group, device); 1124 } 1125 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1126 1127 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1128 int (*fn)(struct device *, void *)) 1129 { 1130 struct group_device *device; 1131 int ret = 0; 1132 1133 for_each_group_device(group, device) { 1134 ret = fn(device->dev, data); 1135 if (ret) 1136 break; 1137 } 1138 return ret; 1139 } 1140 1141 /** 1142 * iommu_group_for_each_dev - iterate over each device in the group 1143 * @group: the group 1144 * @data: caller opaque data to be passed to callback function 1145 * @fn: caller supplied callback function 1146 * 1147 * This function is called by group users to iterate over group devices. 1148 * Callers should hold a reference count to the group during callback. 1149 * The group->mutex is held across callbacks, which will block calls to 1150 * iommu_group_add/remove_device. 1151 */ 1152 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1153 int (*fn)(struct device *, void *)) 1154 { 1155 int ret; 1156 1157 mutex_lock(&group->mutex); 1158 ret = __iommu_group_for_each_dev(group, data, fn); 1159 mutex_unlock(&group->mutex); 1160 1161 return ret; 1162 } 1163 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1164 1165 /** 1166 * iommu_group_get - Return the group for a device and increment reference 1167 * @dev: get the group that this device belongs to 1168 * 1169 * This function is called by iommu drivers and users to get the group 1170 * for the specified device. If found, the group is returned and the group 1171 * reference in incremented, else NULL. 1172 */ 1173 struct iommu_group *iommu_group_get(struct device *dev) 1174 { 1175 struct iommu_group *group = dev->iommu_group; 1176 1177 if (group) 1178 kobject_get(group->devices_kobj); 1179 1180 return group; 1181 } 1182 EXPORT_SYMBOL_GPL(iommu_group_get); 1183 1184 /** 1185 * iommu_group_ref_get - Increment reference on a group 1186 * @group: the group to use, must not be NULL 1187 * 1188 * This function is called by iommu drivers to take additional references on an 1189 * existing group. Returns the given group for convenience. 1190 */ 1191 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1192 { 1193 kobject_get(group->devices_kobj); 1194 return group; 1195 } 1196 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1197 1198 /** 1199 * iommu_group_put - Decrement group reference 1200 * @group: the group to use 1201 * 1202 * This function is called by iommu drivers and users to release the 1203 * iommu group. Once the reference count is zero, the group is released. 1204 */ 1205 void iommu_group_put(struct iommu_group *group) 1206 { 1207 if (group) 1208 kobject_put(group->devices_kobj); 1209 } 1210 EXPORT_SYMBOL_GPL(iommu_group_put); 1211 1212 /** 1213 * iommu_register_device_fault_handler() - Register a device fault handler 1214 * @dev: the device 1215 * @handler: the fault handler 1216 * @data: private data passed as argument to the handler 1217 * 1218 * When an IOMMU fault event is received, this handler gets called with the 1219 * fault event and data as argument. The handler should return 0 on success. If 1220 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1221 * complete the fault by calling iommu_page_response() with one of the following 1222 * response code: 1223 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1224 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1225 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1226 * page faults if possible. 1227 * 1228 * Return 0 if the fault handler was installed successfully, or an error. 1229 */ 1230 int iommu_register_device_fault_handler(struct device *dev, 1231 iommu_dev_fault_handler_t handler, 1232 void *data) 1233 { 1234 struct dev_iommu *param = dev->iommu; 1235 int ret = 0; 1236 1237 if (!param) 1238 return -EINVAL; 1239 1240 mutex_lock(¶m->lock); 1241 /* Only allow one fault handler registered for each device */ 1242 if (param->fault_param) { 1243 ret = -EBUSY; 1244 goto done_unlock; 1245 } 1246 1247 get_device(dev); 1248 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1249 if (!param->fault_param) { 1250 put_device(dev); 1251 ret = -ENOMEM; 1252 goto done_unlock; 1253 } 1254 param->fault_param->handler = handler; 1255 param->fault_param->data = data; 1256 mutex_init(¶m->fault_param->lock); 1257 INIT_LIST_HEAD(¶m->fault_param->faults); 1258 1259 done_unlock: 1260 mutex_unlock(¶m->lock); 1261 1262 return ret; 1263 } 1264 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1265 1266 /** 1267 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1268 * @dev: the device 1269 * 1270 * Remove the device fault handler installed with 1271 * iommu_register_device_fault_handler(). 1272 * 1273 * Return 0 on success, or an error. 1274 */ 1275 int iommu_unregister_device_fault_handler(struct device *dev) 1276 { 1277 struct dev_iommu *param = dev->iommu; 1278 int ret = 0; 1279 1280 if (!param) 1281 return -EINVAL; 1282 1283 mutex_lock(¶m->lock); 1284 1285 if (!param->fault_param) 1286 goto unlock; 1287 1288 /* we cannot unregister handler if there are pending faults */ 1289 if (!list_empty(¶m->fault_param->faults)) { 1290 ret = -EBUSY; 1291 goto unlock; 1292 } 1293 1294 kfree(param->fault_param); 1295 param->fault_param = NULL; 1296 put_device(dev); 1297 unlock: 1298 mutex_unlock(¶m->lock); 1299 1300 return ret; 1301 } 1302 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1303 1304 /** 1305 * iommu_report_device_fault() - Report fault event to device driver 1306 * @dev: the device 1307 * @evt: fault event data 1308 * 1309 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1310 * handler. When this function fails and the fault is recoverable, it is the 1311 * caller's responsibility to complete the fault. 1312 * 1313 * Return 0 on success, or an error. 1314 */ 1315 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1316 { 1317 struct dev_iommu *param = dev->iommu; 1318 struct iommu_fault_event *evt_pending = NULL; 1319 struct iommu_fault_param *fparam; 1320 int ret = 0; 1321 1322 if (!param || !evt) 1323 return -EINVAL; 1324 1325 /* we only report device fault if there is a handler registered */ 1326 mutex_lock(¶m->lock); 1327 fparam = param->fault_param; 1328 if (!fparam || !fparam->handler) { 1329 ret = -EINVAL; 1330 goto done_unlock; 1331 } 1332 1333 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1334 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1335 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1336 GFP_KERNEL); 1337 if (!evt_pending) { 1338 ret = -ENOMEM; 1339 goto done_unlock; 1340 } 1341 mutex_lock(&fparam->lock); 1342 list_add_tail(&evt_pending->list, &fparam->faults); 1343 mutex_unlock(&fparam->lock); 1344 } 1345 1346 ret = fparam->handler(&evt->fault, fparam->data); 1347 if (ret && evt_pending) { 1348 mutex_lock(&fparam->lock); 1349 list_del(&evt_pending->list); 1350 mutex_unlock(&fparam->lock); 1351 kfree(evt_pending); 1352 } 1353 done_unlock: 1354 mutex_unlock(¶m->lock); 1355 return ret; 1356 } 1357 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1358 1359 int iommu_page_response(struct device *dev, 1360 struct iommu_page_response *msg) 1361 { 1362 bool needs_pasid; 1363 int ret = -EINVAL; 1364 struct iommu_fault_event *evt; 1365 struct iommu_fault_page_request *prm; 1366 struct dev_iommu *param = dev->iommu; 1367 const struct iommu_ops *ops = dev_iommu_ops(dev); 1368 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1369 1370 if (!ops->page_response) 1371 return -ENODEV; 1372 1373 if (!param || !param->fault_param) 1374 return -EINVAL; 1375 1376 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1377 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1378 return -EINVAL; 1379 1380 /* Only send response if there is a fault report pending */ 1381 mutex_lock(¶m->fault_param->lock); 1382 if (list_empty(¶m->fault_param->faults)) { 1383 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1384 goto done_unlock; 1385 } 1386 /* 1387 * Check if we have a matching page request pending to respond, 1388 * otherwise return -EINVAL 1389 */ 1390 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1391 prm = &evt->fault.prm; 1392 if (prm->grpid != msg->grpid) 1393 continue; 1394 1395 /* 1396 * If the PASID is required, the corresponding request is 1397 * matched using the group ID, the PASID valid bit and the PASID 1398 * value. Otherwise only the group ID matches request and 1399 * response. 1400 */ 1401 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1402 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1403 continue; 1404 1405 if (!needs_pasid && has_pasid) { 1406 /* No big deal, just clear it. */ 1407 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1408 msg->pasid = 0; 1409 } 1410 1411 ret = ops->page_response(dev, evt, msg); 1412 list_del(&evt->list); 1413 kfree(evt); 1414 break; 1415 } 1416 1417 done_unlock: 1418 mutex_unlock(¶m->fault_param->lock); 1419 return ret; 1420 } 1421 EXPORT_SYMBOL_GPL(iommu_page_response); 1422 1423 /** 1424 * iommu_group_id - Return ID for a group 1425 * @group: the group to ID 1426 * 1427 * Return the unique ID for the group matching the sysfs group number. 1428 */ 1429 int iommu_group_id(struct iommu_group *group) 1430 { 1431 return group->id; 1432 } 1433 EXPORT_SYMBOL_GPL(iommu_group_id); 1434 1435 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1436 unsigned long *devfns); 1437 1438 /* 1439 * To consider a PCI device isolated, we require ACS to support Source 1440 * Validation, Request Redirection, Completer Redirection, and Upstream 1441 * Forwarding. This effectively means that devices cannot spoof their 1442 * requester ID, requests and completions cannot be redirected, and all 1443 * transactions are forwarded upstream, even as it passes through a 1444 * bridge where the target device is downstream. 1445 */ 1446 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1447 1448 /* 1449 * For multifunction devices which are not isolated from each other, find 1450 * all the other non-isolated functions and look for existing groups. For 1451 * each function, we also need to look for aliases to or from other devices 1452 * that may already have a group. 1453 */ 1454 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1455 unsigned long *devfns) 1456 { 1457 struct pci_dev *tmp = NULL; 1458 struct iommu_group *group; 1459 1460 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1461 return NULL; 1462 1463 for_each_pci_dev(tmp) { 1464 if (tmp == pdev || tmp->bus != pdev->bus || 1465 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1466 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1467 continue; 1468 1469 group = get_pci_alias_group(tmp, devfns); 1470 if (group) { 1471 pci_dev_put(tmp); 1472 return group; 1473 } 1474 } 1475 1476 return NULL; 1477 } 1478 1479 /* 1480 * Look for aliases to or from the given device for existing groups. DMA 1481 * aliases are only supported on the same bus, therefore the search 1482 * space is quite small (especially since we're really only looking at pcie 1483 * device, and therefore only expect multiple slots on the root complex or 1484 * downstream switch ports). It's conceivable though that a pair of 1485 * multifunction devices could have aliases between them that would cause a 1486 * loop. To prevent this, we use a bitmap to track where we've been. 1487 */ 1488 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1489 unsigned long *devfns) 1490 { 1491 struct pci_dev *tmp = NULL; 1492 struct iommu_group *group; 1493 1494 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1495 return NULL; 1496 1497 group = iommu_group_get(&pdev->dev); 1498 if (group) 1499 return group; 1500 1501 for_each_pci_dev(tmp) { 1502 if (tmp == pdev || tmp->bus != pdev->bus) 1503 continue; 1504 1505 /* We alias them or they alias us */ 1506 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1507 group = get_pci_alias_group(tmp, devfns); 1508 if (group) { 1509 pci_dev_put(tmp); 1510 return group; 1511 } 1512 1513 group = get_pci_function_alias_group(tmp, devfns); 1514 if (group) { 1515 pci_dev_put(tmp); 1516 return group; 1517 } 1518 } 1519 } 1520 1521 return NULL; 1522 } 1523 1524 struct group_for_pci_data { 1525 struct pci_dev *pdev; 1526 struct iommu_group *group; 1527 }; 1528 1529 /* 1530 * DMA alias iterator callback, return the last seen device. Stop and return 1531 * the IOMMU group if we find one along the way. 1532 */ 1533 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1534 { 1535 struct group_for_pci_data *data = opaque; 1536 1537 data->pdev = pdev; 1538 data->group = iommu_group_get(&pdev->dev); 1539 1540 return data->group != NULL; 1541 } 1542 1543 /* 1544 * Generic device_group call-back function. It just allocates one 1545 * iommu-group per device. 1546 */ 1547 struct iommu_group *generic_device_group(struct device *dev) 1548 { 1549 return iommu_group_alloc(); 1550 } 1551 EXPORT_SYMBOL_GPL(generic_device_group); 1552 1553 /* 1554 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1555 * to find or create an IOMMU group for a device. 1556 */ 1557 struct iommu_group *pci_device_group(struct device *dev) 1558 { 1559 struct pci_dev *pdev = to_pci_dev(dev); 1560 struct group_for_pci_data data; 1561 struct pci_bus *bus; 1562 struct iommu_group *group = NULL; 1563 u64 devfns[4] = { 0 }; 1564 1565 if (WARN_ON(!dev_is_pci(dev))) 1566 return ERR_PTR(-EINVAL); 1567 1568 /* 1569 * Find the upstream DMA alias for the device. A device must not 1570 * be aliased due to topology in order to have its own IOMMU group. 1571 * If we find an alias along the way that already belongs to a 1572 * group, use it. 1573 */ 1574 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1575 return data.group; 1576 1577 pdev = data.pdev; 1578 1579 /* 1580 * Continue upstream from the point of minimum IOMMU granularity 1581 * due to aliases to the point where devices are protected from 1582 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1583 * group, use it. 1584 */ 1585 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1586 if (!bus->self) 1587 continue; 1588 1589 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1590 break; 1591 1592 pdev = bus->self; 1593 1594 group = iommu_group_get(&pdev->dev); 1595 if (group) 1596 return group; 1597 } 1598 1599 /* 1600 * Look for existing groups on device aliases. If we alias another 1601 * device or another device aliases us, use the same group. 1602 */ 1603 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1604 if (group) 1605 return group; 1606 1607 /* 1608 * Look for existing groups on non-isolated functions on the same 1609 * slot and aliases of those funcions, if any. No need to clear 1610 * the search bitmap, the tested devfns are still valid. 1611 */ 1612 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1613 if (group) 1614 return group; 1615 1616 /* No shared group found, allocate new */ 1617 return iommu_group_alloc(); 1618 } 1619 EXPORT_SYMBOL_GPL(pci_device_group); 1620 1621 /* Get the IOMMU group for device on fsl-mc bus */ 1622 struct iommu_group *fsl_mc_device_group(struct device *dev) 1623 { 1624 struct device *cont_dev = fsl_mc_cont_dev(dev); 1625 struct iommu_group *group; 1626 1627 group = iommu_group_get(cont_dev); 1628 if (!group) 1629 group = iommu_group_alloc(); 1630 return group; 1631 } 1632 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1633 1634 static int iommu_get_def_domain_type(struct device *dev) 1635 { 1636 const struct iommu_ops *ops = dev_iommu_ops(dev); 1637 1638 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1639 return IOMMU_DOMAIN_DMA; 1640 1641 if (ops->def_domain_type) 1642 return ops->def_domain_type(dev); 1643 1644 return 0; 1645 } 1646 1647 static int iommu_group_alloc_default_domain(const struct bus_type *bus, 1648 struct iommu_group *group, 1649 unsigned int type) 1650 { 1651 struct iommu_domain *dom; 1652 1653 dom = __iommu_domain_alloc(bus, type); 1654 if (!dom && type != IOMMU_DOMAIN_DMA) { 1655 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1656 if (dom) 1657 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1658 type, group->name); 1659 } 1660 1661 if (!dom) 1662 return -ENOMEM; 1663 1664 group->default_domain = dom; 1665 if (!group->domain) 1666 group->domain = dom; 1667 return 0; 1668 } 1669 1670 static int iommu_alloc_default_domain(struct iommu_group *group, 1671 struct device *dev) 1672 { 1673 unsigned int type; 1674 1675 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1676 1677 return iommu_group_alloc_default_domain(dev->bus, group, type); 1678 } 1679 1680 /** 1681 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1682 * @dev: target device 1683 * 1684 * This function is intended to be called by IOMMU drivers and extended to 1685 * support common, bus-defined algorithms when determining or creating the 1686 * IOMMU group for a device. On success, the caller will hold a reference 1687 * to the returned IOMMU group, which will already include the provided 1688 * device. The reference should be released with iommu_group_put(). 1689 */ 1690 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1691 { 1692 const struct iommu_ops *ops = dev_iommu_ops(dev); 1693 struct iommu_group *group; 1694 int ret; 1695 1696 group = iommu_group_get(dev); 1697 if (group) 1698 return group; 1699 1700 group = ops->device_group(dev); 1701 if (WARN_ON_ONCE(group == NULL)) 1702 return ERR_PTR(-EINVAL); 1703 1704 if (IS_ERR(group)) 1705 return group; 1706 1707 ret = iommu_group_add_device(group, dev); 1708 if (ret) 1709 goto out_put_group; 1710 1711 return group; 1712 1713 out_put_group: 1714 iommu_group_put(group); 1715 1716 return ERR_PTR(ret); 1717 } 1718 1719 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1720 { 1721 return group->default_domain; 1722 } 1723 1724 static int probe_iommu_group(struct device *dev, void *data) 1725 { 1726 struct list_head *group_list = data; 1727 struct iommu_group *group; 1728 int ret; 1729 1730 /* Device is probed already if in a group */ 1731 group = iommu_group_get(dev); 1732 if (group) { 1733 iommu_group_put(group); 1734 return 0; 1735 } 1736 1737 ret = __iommu_probe_device(dev, group_list); 1738 if (ret == -ENODEV) 1739 ret = 0; 1740 1741 return ret; 1742 } 1743 1744 static int iommu_bus_notifier(struct notifier_block *nb, 1745 unsigned long action, void *data) 1746 { 1747 struct device *dev = data; 1748 1749 if (action == BUS_NOTIFY_ADD_DEVICE) { 1750 int ret; 1751 1752 ret = iommu_probe_device(dev); 1753 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1754 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1755 iommu_release_device(dev); 1756 return NOTIFY_OK; 1757 } 1758 1759 return 0; 1760 } 1761 1762 struct __group_domain_type { 1763 struct device *dev; 1764 unsigned int type; 1765 }; 1766 1767 static int probe_get_default_domain_type(struct device *dev, void *data) 1768 { 1769 struct __group_domain_type *gtype = data; 1770 unsigned int type = iommu_get_def_domain_type(dev); 1771 1772 if (type) { 1773 if (gtype->type && gtype->type != type) { 1774 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1775 iommu_domain_type_str(type), 1776 dev_name(gtype->dev), 1777 iommu_domain_type_str(gtype->type)); 1778 gtype->type = 0; 1779 } 1780 1781 if (!gtype->dev) { 1782 gtype->dev = dev; 1783 gtype->type = type; 1784 } 1785 } 1786 1787 return 0; 1788 } 1789 1790 static void probe_alloc_default_domain(const struct bus_type *bus, 1791 struct iommu_group *group) 1792 { 1793 struct __group_domain_type gtype; 1794 1795 memset(>ype, 0, sizeof(gtype)); 1796 1797 /* Ask for default domain requirements of all devices in the group */ 1798 __iommu_group_for_each_dev(group, >ype, 1799 probe_get_default_domain_type); 1800 1801 if (!gtype.type) 1802 gtype.type = iommu_def_domain_type; 1803 1804 iommu_group_alloc_default_domain(bus, group, gtype.type); 1805 1806 } 1807 1808 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1809 { 1810 const struct iommu_ops *ops = dev_iommu_ops(dev); 1811 1812 if (ops->probe_finalize) 1813 ops->probe_finalize(dev); 1814 1815 return 0; 1816 } 1817 1818 static void __iommu_group_dma_finalize(struct iommu_group *group) 1819 { 1820 __iommu_group_for_each_dev(group, group->default_domain, 1821 iommu_group_do_probe_finalize); 1822 } 1823 1824 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1825 { 1826 struct iommu_group *group = data; 1827 1828 iommu_create_device_direct_mappings(group, dev); 1829 1830 return 0; 1831 } 1832 1833 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1834 { 1835 return __iommu_group_for_each_dev(group, group, 1836 iommu_do_create_direct_mappings); 1837 } 1838 1839 int bus_iommu_probe(const struct bus_type *bus) 1840 { 1841 struct iommu_group *group, *next; 1842 LIST_HEAD(group_list); 1843 int ret; 1844 1845 /* 1846 * This code-path does not allocate the default domain when 1847 * creating the iommu group, so do it after the groups are 1848 * created. 1849 */ 1850 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1851 if (ret) 1852 return ret; 1853 1854 list_for_each_entry_safe(group, next, &group_list, entry) { 1855 mutex_lock(&group->mutex); 1856 1857 /* Remove item from the list */ 1858 list_del_init(&group->entry); 1859 1860 /* Try to allocate default domain */ 1861 probe_alloc_default_domain(bus, group); 1862 1863 if (!group->default_domain) { 1864 mutex_unlock(&group->mutex); 1865 continue; 1866 } 1867 1868 iommu_group_create_direct_mappings(group); 1869 1870 group->domain = NULL; 1871 ret = __iommu_group_set_domain(group, group->default_domain); 1872 1873 mutex_unlock(&group->mutex); 1874 1875 if (ret) 1876 break; 1877 1878 __iommu_group_dma_finalize(group); 1879 } 1880 1881 return ret; 1882 } 1883 1884 bool iommu_present(const struct bus_type *bus) 1885 { 1886 return bus->iommu_ops != NULL; 1887 } 1888 EXPORT_SYMBOL_GPL(iommu_present); 1889 1890 /** 1891 * device_iommu_capable() - check for a general IOMMU capability 1892 * @dev: device to which the capability would be relevant, if available 1893 * @cap: IOMMU capability 1894 * 1895 * Return: true if an IOMMU is present and supports the given capability 1896 * for the given device, otherwise false. 1897 */ 1898 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1899 { 1900 const struct iommu_ops *ops; 1901 1902 if (!dev->iommu || !dev->iommu->iommu_dev) 1903 return false; 1904 1905 ops = dev_iommu_ops(dev); 1906 if (!ops->capable) 1907 return false; 1908 1909 return ops->capable(dev, cap); 1910 } 1911 EXPORT_SYMBOL_GPL(device_iommu_capable); 1912 1913 /** 1914 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1915 * for a group 1916 * @group: Group to query 1917 * 1918 * IOMMU groups should not have differing values of 1919 * msi_device_has_isolated_msi() for devices in a group. However nothing 1920 * directly prevents this, so ensure mistakes don't result in isolation failures 1921 * by checking that all the devices are the same. 1922 */ 1923 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1924 { 1925 struct group_device *group_dev; 1926 bool ret = true; 1927 1928 mutex_lock(&group->mutex); 1929 for_each_group_device(group, group_dev) 1930 ret &= msi_device_has_isolated_msi(group_dev->dev); 1931 mutex_unlock(&group->mutex); 1932 return ret; 1933 } 1934 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1935 1936 /** 1937 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1938 * @domain: iommu domain 1939 * @handler: fault handler 1940 * @token: user data, will be passed back to the fault handler 1941 * 1942 * This function should be used by IOMMU users which want to be notified 1943 * whenever an IOMMU fault happens. 1944 * 1945 * The fault handler itself should return 0 on success, and an appropriate 1946 * error code otherwise. 1947 */ 1948 void iommu_set_fault_handler(struct iommu_domain *domain, 1949 iommu_fault_handler_t handler, 1950 void *token) 1951 { 1952 BUG_ON(!domain); 1953 1954 domain->handler = handler; 1955 domain->handler_token = token; 1956 } 1957 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1958 1959 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 1960 unsigned type) 1961 { 1962 struct iommu_domain *domain; 1963 unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; 1964 1965 if (bus == NULL || bus->iommu_ops == NULL) 1966 return NULL; 1967 1968 domain = bus->iommu_ops->domain_alloc(alloc_type); 1969 if (!domain) 1970 return NULL; 1971 1972 domain->type = type; 1973 /* 1974 * If not already set, assume all sizes by default; the driver 1975 * may override this later 1976 */ 1977 if (!domain->pgsize_bitmap) 1978 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1979 1980 if (!domain->ops) 1981 domain->ops = bus->iommu_ops->default_domain_ops; 1982 1983 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 1984 iommu_domain_free(domain); 1985 domain = NULL; 1986 } 1987 return domain; 1988 } 1989 1990 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) 1991 { 1992 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 1993 } 1994 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 1995 1996 void iommu_domain_free(struct iommu_domain *domain) 1997 { 1998 if (domain->type == IOMMU_DOMAIN_SVA) 1999 mmdrop(domain->mm); 2000 iommu_put_dma_cookie(domain); 2001 domain->ops->free(domain); 2002 } 2003 EXPORT_SYMBOL_GPL(iommu_domain_free); 2004 2005 /* 2006 * Put the group's domain back to the appropriate core-owned domain - either the 2007 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2008 */ 2009 static void __iommu_group_set_core_domain(struct iommu_group *group) 2010 { 2011 struct iommu_domain *new_domain; 2012 2013 if (group->owner) 2014 new_domain = group->blocking_domain; 2015 else 2016 new_domain = group->default_domain; 2017 2018 __iommu_group_set_domain_nofail(group, new_domain); 2019 } 2020 2021 static int __iommu_attach_device(struct iommu_domain *domain, 2022 struct device *dev) 2023 { 2024 int ret; 2025 2026 if (unlikely(domain->ops->attach_dev == NULL)) 2027 return -ENODEV; 2028 2029 ret = domain->ops->attach_dev(domain, dev); 2030 if (ret) 2031 return ret; 2032 dev->iommu->attach_deferred = 0; 2033 trace_attach_device_to_domain(dev); 2034 return 0; 2035 } 2036 2037 /** 2038 * iommu_attach_device - Attach an IOMMU domain to a device 2039 * @domain: IOMMU domain to attach 2040 * @dev: Device that will be attached 2041 * 2042 * Returns 0 on success and error code on failure 2043 * 2044 * Note that EINVAL can be treated as a soft failure, indicating 2045 * that certain configuration of the domain is incompatible with 2046 * the device. In this case attaching a different domain to the 2047 * device may succeed. 2048 */ 2049 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2050 { 2051 struct iommu_group *group; 2052 int ret; 2053 2054 group = iommu_group_get(dev); 2055 if (!group) 2056 return -ENODEV; 2057 2058 /* 2059 * Lock the group to make sure the device-count doesn't 2060 * change while we are attaching 2061 */ 2062 mutex_lock(&group->mutex); 2063 ret = -EINVAL; 2064 if (list_count_nodes(&group->devices) != 1) 2065 goto out_unlock; 2066 2067 ret = __iommu_attach_group(domain, group); 2068 2069 out_unlock: 2070 mutex_unlock(&group->mutex); 2071 iommu_group_put(group); 2072 2073 return ret; 2074 } 2075 EXPORT_SYMBOL_GPL(iommu_attach_device); 2076 2077 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2078 { 2079 if (dev->iommu && dev->iommu->attach_deferred) 2080 return __iommu_attach_device(domain, dev); 2081 2082 return 0; 2083 } 2084 2085 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2086 { 2087 struct iommu_group *group; 2088 2089 group = iommu_group_get(dev); 2090 if (!group) 2091 return; 2092 2093 mutex_lock(&group->mutex); 2094 if (WARN_ON(domain != group->domain) || 2095 WARN_ON(list_count_nodes(&group->devices) != 1)) 2096 goto out_unlock; 2097 __iommu_group_set_core_domain(group); 2098 2099 out_unlock: 2100 mutex_unlock(&group->mutex); 2101 iommu_group_put(group); 2102 } 2103 EXPORT_SYMBOL_GPL(iommu_detach_device); 2104 2105 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2106 { 2107 struct iommu_domain *domain; 2108 struct iommu_group *group; 2109 2110 group = iommu_group_get(dev); 2111 if (!group) 2112 return NULL; 2113 2114 domain = group->domain; 2115 2116 iommu_group_put(group); 2117 2118 return domain; 2119 } 2120 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2121 2122 /* 2123 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2124 * guarantees that the group and its default domain are valid and correct. 2125 */ 2126 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2127 { 2128 return dev->iommu_group->default_domain; 2129 } 2130 2131 static int __iommu_attach_group(struct iommu_domain *domain, 2132 struct iommu_group *group) 2133 { 2134 if (group->domain && group->domain != group->default_domain && 2135 group->domain != group->blocking_domain) 2136 return -EBUSY; 2137 2138 return __iommu_group_set_domain(group, domain); 2139 } 2140 2141 /** 2142 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2143 * @domain: IOMMU domain to attach 2144 * @group: IOMMU group that will be attached 2145 * 2146 * Returns 0 on success and error code on failure 2147 * 2148 * Note that EINVAL can be treated as a soft failure, indicating 2149 * that certain configuration of the domain is incompatible with 2150 * the group. In this case attaching a different domain to the 2151 * group may succeed. 2152 */ 2153 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2154 { 2155 int ret; 2156 2157 mutex_lock(&group->mutex); 2158 ret = __iommu_attach_group(domain, group); 2159 mutex_unlock(&group->mutex); 2160 2161 return ret; 2162 } 2163 EXPORT_SYMBOL_GPL(iommu_attach_group); 2164 2165 static int __iommu_device_set_domain(struct iommu_group *group, 2166 struct device *dev, 2167 struct iommu_domain *new_domain, 2168 unsigned int flags) 2169 { 2170 int ret; 2171 2172 if (dev->iommu->attach_deferred) { 2173 if (new_domain == group->default_domain) 2174 return 0; 2175 dev->iommu->attach_deferred = 0; 2176 } 2177 2178 ret = __iommu_attach_device(new_domain, dev); 2179 if (ret) { 2180 /* 2181 * If we have a blocking domain then try to attach that in hopes 2182 * of avoiding a UAF. Modern drivers should implement blocking 2183 * domains as global statics that cannot fail. 2184 */ 2185 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2186 group->blocking_domain && 2187 group->blocking_domain != new_domain) 2188 __iommu_attach_device(group->blocking_domain, dev); 2189 return ret; 2190 } 2191 return 0; 2192 } 2193 2194 /* 2195 * If 0 is returned the group's domain is new_domain. If an error is returned 2196 * then the group's domain will be set back to the existing domain unless 2197 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2198 * domains is left inconsistent. This is a driver bug to fail attach with a 2199 * previously good domain. We try to avoid a kernel UAF because of this. 2200 * 2201 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2202 * API works on domains and devices. Bridge that gap by iterating over the 2203 * devices in a group. Ideally we'd have a single device which represents the 2204 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2205 * defined minimum sets, where the physical hardware may be able to distiguish 2206 * members, but we wish to group them at a higher level (ex. untrusted 2207 * multi-function PCI devices). Thus we attach each device. 2208 */ 2209 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2210 struct iommu_domain *new_domain, 2211 unsigned int flags) 2212 { 2213 struct group_device *last_gdev; 2214 struct group_device *gdev; 2215 int result; 2216 int ret; 2217 2218 lockdep_assert_held(&group->mutex); 2219 2220 if (group->domain == new_domain) 2221 return 0; 2222 2223 /* 2224 * New drivers should support default domains, so set_platform_dma() 2225 * op will never be called. Otherwise the NULL domain represents some 2226 * platform specific behavior. 2227 */ 2228 if (!new_domain) { 2229 for_each_group_device(group, gdev) { 2230 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2231 2232 if (!WARN_ON(!ops->set_platform_dma_ops)) 2233 ops->set_platform_dma_ops(gdev->dev); 2234 } 2235 group->domain = NULL; 2236 return 0; 2237 } 2238 2239 /* 2240 * Changing the domain is done by calling attach_dev() on the new 2241 * domain. This switch does not have to be atomic and DMA can be 2242 * discarded during the transition. DMA must only be able to access 2243 * either new_domain or group->domain, never something else. 2244 */ 2245 result = 0; 2246 for_each_group_device(group, gdev) { 2247 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2248 flags); 2249 if (ret) { 2250 result = ret; 2251 /* 2252 * Keep trying the other devices in the group. If a 2253 * driver fails attach to an otherwise good domain, and 2254 * does not support blocking domains, it should at least 2255 * drop its reference on the current domain so we don't 2256 * UAF. 2257 */ 2258 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2259 continue; 2260 goto err_revert; 2261 } 2262 } 2263 group->domain = new_domain; 2264 return result; 2265 2266 err_revert: 2267 /* 2268 * This is called in error unwind paths. A well behaved driver should 2269 * always allow us to attach to a domain that was already attached. 2270 */ 2271 last_gdev = gdev; 2272 for_each_group_device(group, gdev) { 2273 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2274 2275 /* 2276 * If set_platform_dma_ops is not present a NULL domain can 2277 * happen only for first probe, in which case we leave 2278 * group->domain as NULL and let release clean everything up. 2279 */ 2280 if (group->domain) 2281 WARN_ON(__iommu_device_set_domain( 2282 group, gdev->dev, group->domain, 2283 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2284 else if (ops->set_platform_dma_ops) 2285 ops->set_platform_dma_ops(gdev->dev); 2286 if (gdev == last_gdev) 2287 break; 2288 } 2289 return ret; 2290 } 2291 2292 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2293 { 2294 mutex_lock(&group->mutex); 2295 __iommu_group_set_core_domain(group); 2296 mutex_unlock(&group->mutex); 2297 } 2298 EXPORT_SYMBOL_GPL(iommu_detach_group); 2299 2300 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2301 { 2302 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2303 return iova; 2304 2305 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2306 return 0; 2307 2308 return domain->ops->iova_to_phys(domain, iova); 2309 } 2310 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2311 2312 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2313 phys_addr_t paddr, size_t size, size_t *count) 2314 { 2315 unsigned int pgsize_idx, pgsize_idx_next; 2316 unsigned long pgsizes; 2317 size_t offset, pgsize, pgsize_next; 2318 unsigned long addr_merge = paddr | iova; 2319 2320 /* Page sizes supported by the hardware and small enough for @size */ 2321 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2322 2323 /* Constrain the page sizes further based on the maximum alignment */ 2324 if (likely(addr_merge)) 2325 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2326 2327 /* Make sure we have at least one suitable page size */ 2328 BUG_ON(!pgsizes); 2329 2330 /* Pick the biggest page size remaining */ 2331 pgsize_idx = __fls(pgsizes); 2332 pgsize = BIT(pgsize_idx); 2333 if (!count) 2334 return pgsize; 2335 2336 /* Find the next biggest support page size, if it exists */ 2337 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2338 if (!pgsizes) 2339 goto out_set_count; 2340 2341 pgsize_idx_next = __ffs(pgsizes); 2342 pgsize_next = BIT(pgsize_idx_next); 2343 2344 /* 2345 * There's no point trying a bigger page size unless the virtual 2346 * and physical addresses are similarly offset within the larger page. 2347 */ 2348 if ((iova ^ paddr) & (pgsize_next - 1)) 2349 goto out_set_count; 2350 2351 /* Calculate the offset to the next page size alignment boundary */ 2352 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2353 2354 /* 2355 * If size is big enough to accommodate the larger page, reduce 2356 * the number of smaller pages. 2357 */ 2358 if (offset + pgsize_next <= size) 2359 size = offset; 2360 2361 out_set_count: 2362 *count = size >> pgsize_idx; 2363 return pgsize; 2364 } 2365 2366 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2367 phys_addr_t paddr, size_t size, int prot, 2368 gfp_t gfp, size_t *mapped) 2369 { 2370 const struct iommu_domain_ops *ops = domain->ops; 2371 size_t pgsize, count; 2372 int ret; 2373 2374 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2375 2376 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2377 iova, &paddr, pgsize, count); 2378 2379 if (ops->map_pages) { 2380 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2381 gfp, mapped); 2382 } else { 2383 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2384 *mapped = ret ? 0 : pgsize; 2385 } 2386 2387 return ret; 2388 } 2389 2390 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2391 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2392 { 2393 const struct iommu_domain_ops *ops = domain->ops; 2394 unsigned long orig_iova = iova; 2395 unsigned int min_pagesz; 2396 size_t orig_size = size; 2397 phys_addr_t orig_paddr = paddr; 2398 int ret = 0; 2399 2400 if (unlikely(!(ops->map || ops->map_pages) || 2401 domain->pgsize_bitmap == 0UL)) 2402 return -ENODEV; 2403 2404 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2405 return -EINVAL; 2406 2407 /* find out the minimum page size supported */ 2408 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2409 2410 /* 2411 * both the virtual address and the physical one, as well as 2412 * the size of the mapping, must be aligned (at least) to the 2413 * size of the smallest page supported by the hardware 2414 */ 2415 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2416 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2417 iova, &paddr, size, min_pagesz); 2418 return -EINVAL; 2419 } 2420 2421 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2422 2423 while (size) { 2424 size_t mapped = 0; 2425 2426 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2427 &mapped); 2428 /* 2429 * Some pages may have been mapped, even if an error occurred, 2430 * so we should account for those so they can be unmapped. 2431 */ 2432 size -= mapped; 2433 2434 if (ret) 2435 break; 2436 2437 iova += mapped; 2438 paddr += mapped; 2439 } 2440 2441 /* unroll mapping in case something went wrong */ 2442 if (ret) 2443 iommu_unmap(domain, orig_iova, orig_size - size); 2444 else 2445 trace_map(orig_iova, orig_paddr, orig_size); 2446 2447 return ret; 2448 } 2449 2450 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2451 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2452 { 2453 const struct iommu_domain_ops *ops = domain->ops; 2454 int ret; 2455 2456 might_sleep_if(gfpflags_allow_blocking(gfp)); 2457 2458 /* Discourage passing strange GFP flags */ 2459 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2460 __GFP_HIGHMEM))) 2461 return -EINVAL; 2462 2463 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2464 if (ret == 0 && ops->iotlb_sync_map) 2465 ops->iotlb_sync_map(domain, iova, size); 2466 2467 return ret; 2468 } 2469 EXPORT_SYMBOL_GPL(iommu_map); 2470 2471 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2472 unsigned long iova, size_t size, 2473 struct iommu_iotlb_gather *iotlb_gather) 2474 { 2475 const struct iommu_domain_ops *ops = domain->ops; 2476 size_t pgsize, count; 2477 2478 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2479 return ops->unmap_pages ? 2480 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2481 ops->unmap(domain, iova, pgsize, iotlb_gather); 2482 } 2483 2484 static size_t __iommu_unmap(struct iommu_domain *domain, 2485 unsigned long iova, size_t size, 2486 struct iommu_iotlb_gather *iotlb_gather) 2487 { 2488 const struct iommu_domain_ops *ops = domain->ops; 2489 size_t unmapped_page, unmapped = 0; 2490 unsigned long orig_iova = iova; 2491 unsigned int min_pagesz; 2492 2493 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2494 domain->pgsize_bitmap == 0UL)) 2495 return 0; 2496 2497 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2498 return 0; 2499 2500 /* find out the minimum page size supported */ 2501 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2502 2503 /* 2504 * The virtual address, as well as the size of the mapping, must be 2505 * aligned (at least) to the size of the smallest page supported 2506 * by the hardware 2507 */ 2508 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2509 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2510 iova, size, min_pagesz); 2511 return 0; 2512 } 2513 2514 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2515 2516 /* 2517 * Keep iterating until we either unmap 'size' bytes (or more) 2518 * or we hit an area that isn't mapped. 2519 */ 2520 while (unmapped < size) { 2521 unmapped_page = __iommu_unmap_pages(domain, iova, 2522 size - unmapped, 2523 iotlb_gather); 2524 if (!unmapped_page) 2525 break; 2526 2527 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2528 iova, unmapped_page); 2529 2530 iova += unmapped_page; 2531 unmapped += unmapped_page; 2532 } 2533 2534 trace_unmap(orig_iova, size, unmapped); 2535 return unmapped; 2536 } 2537 2538 size_t iommu_unmap(struct iommu_domain *domain, 2539 unsigned long iova, size_t size) 2540 { 2541 struct iommu_iotlb_gather iotlb_gather; 2542 size_t ret; 2543 2544 iommu_iotlb_gather_init(&iotlb_gather); 2545 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2546 iommu_iotlb_sync(domain, &iotlb_gather); 2547 2548 return ret; 2549 } 2550 EXPORT_SYMBOL_GPL(iommu_unmap); 2551 2552 size_t iommu_unmap_fast(struct iommu_domain *domain, 2553 unsigned long iova, size_t size, 2554 struct iommu_iotlb_gather *iotlb_gather) 2555 { 2556 return __iommu_unmap(domain, iova, size, iotlb_gather); 2557 } 2558 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2559 2560 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2561 struct scatterlist *sg, unsigned int nents, int prot, 2562 gfp_t gfp) 2563 { 2564 const struct iommu_domain_ops *ops = domain->ops; 2565 size_t len = 0, mapped = 0; 2566 phys_addr_t start; 2567 unsigned int i = 0; 2568 int ret; 2569 2570 might_sleep_if(gfpflags_allow_blocking(gfp)); 2571 2572 /* Discourage passing strange GFP flags */ 2573 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2574 __GFP_HIGHMEM))) 2575 return -EINVAL; 2576 2577 while (i <= nents) { 2578 phys_addr_t s_phys = sg_phys(sg); 2579 2580 if (len && s_phys != start + len) { 2581 ret = __iommu_map(domain, iova + mapped, start, 2582 len, prot, gfp); 2583 2584 if (ret) 2585 goto out_err; 2586 2587 mapped += len; 2588 len = 0; 2589 } 2590 2591 if (sg_is_dma_bus_address(sg)) 2592 goto next; 2593 2594 if (len) { 2595 len += sg->length; 2596 } else { 2597 len = sg->length; 2598 start = s_phys; 2599 } 2600 2601 next: 2602 if (++i < nents) 2603 sg = sg_next(sg); 2604 } 2605 2606 if (ops->iotlb_sync_map) 2607 ops->iotlb_sync_map(domain, iova, mapped); 2608 return mapped; 2609 2610 out_err: 2611 /* undo mappings already done */ 2612 iommu_unmap(domain, iova, mapped); 2613 2614 return ret; 2615 } 2616 EXPORT_SYMBOL_GPL(iommu_map_sg); 2617 2618 /** 2619 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2620 * @domain: the iommu domain where the fault has happened 2621 * @dev: the device where the fault has happened 2622 * @iova: the faulting address 2623 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2624 * 2625 * This function should be called by the low-level IOMMU implementations 2626 * whenever IOMMU faults happen, to allow high-level users, that are 2627 * interested in such events, to know about them. 2628 * 2629 * This event may be useful for several possible use cases: 2630 * - mere logging of the event 2631 * - dynamic TLB/PTE loading 2632 * - if restarting of the faulting device is required 2633 * 2634 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2635 * PTE/TLB loading will one day be supported, implementations will be able 2636 * to tell whether it succeeded or not according to this return value). 2637 * 2638 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2639 * (though fault handlers can also return -ENOSYS, in case they want to 2640 * elicit the default behavior of the IOMMU drivers). 2641 */ 2642 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2643 unsigned long iova, int flags) 2644 { 2645 int ret = -ENOSYS; 2646 2647 /* 2648 * if upper layers showed interest and installed a fault handler, 2649 * invoke it. 2650 */ 2651 if (domain->handler) 2652 ret = domain->handler(domain, dev, iova, flags, 2653 domain->handler_token); 2654 2655 trace_io_page_fault(dev, iova, flags); 2656 return ret; 2657 } 2658 EXPORT_SYMBOL_GPL(report_iommu_fault); 2659 2660 static int __init iommu_init(void) 2661 { 2662 iommu_group_kset = kset_create_and_add("iommu_groups", 2663 NULL, kernel_kobj); 2664 BUG_ON(!iommu_group_kset); 2665 2666 iommu_debugfs_setup(); 2667 2668 return 0; 2669 } 2670 core_initcall(iommu_init); 2671 2672 int iommu_enable_nesting(struct iommu_domain *domain) 2673 { 2674 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2675 return -EINVAL; 2676 if (!domain->ops->enable_nesting) 2677 return -EINVAL; 2678 return domain->ops->enable_nesting(domain); 2679 } 2680 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2681 2682 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2683 unsigned long quirk) 2684 { 2685 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2686 return -EINVAL; 2687 if (!domain->ops->set_pgtable_quirks) 2688 return -EINVAL; 2689 return domain->ops->set_pgtable_quirks(domain, quirk); 2690 } 2691 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2692 2693 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2694 { 2695 const struct iommu_ops *ops = dev_iommu_ops(dev); 2696 2697 if (ops->get_resv_regions) 2698 ops->get_resv_regions(dev, list); 2699 } 2700 2701 /** 2702 * iommu_put_resv_regions - release resered regions 2703 * @dev: device for which to free reserved regions 2704 * @list: reserved region list for device 2705 * 2706 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2707 */ 2708 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2709 { 2710 struct iommu_resv_region *entry, *next; 2711 2712 list_for_each_entry_safe(entry, next, list, list) { 2713 if (entry->free) 2714 entry->free(dev, entry); 2715 else 2716 kfree(entry); 2717 } 2718 } 2719 EXPORT_SYMBOL(iommu_put_resv_regions); 2720 2721 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2722 size_t length, int prot, 2723 enum iommu_resv_type type, 2724 gfp_t gfp) 2725 { 2726 struct iommu_resv_region *region; 2727 2728 region = kzalloc(sizeof(*region), gfp); 2729 if (!region) 2730 return NULL; 2731 2732 INIT_LIST_HEAD(®ion->list); 2733 region->start = start; 2734 region->length = length; 2735 region->prot = prot; 2736 region->type = type; 2737 return region; 2738 } 2739 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2740 2741 void iommu_set_default_passthrough(bool cmd_line) 2742 { 2743 if (cmd_line) 2744 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2745 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2746 } 2747 2748 void iommu_set_default_translated(bool cmd_line) 2749 { 2750 if (cmd_line) 2751 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2752 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2753 } 2754 2755 bool iommu_default_passthrough(void) 2756 { 2757 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2758 } 2759 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2760 2761 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2762 { 2763 const struct iommu_ops *ops = NULL; 2764 struct iommu_device *iommu; 2765 2766 spin_lock(&iommu_device_lock); 2767 list_for_each_entry(iommu, &iommu_device_list, list) 2768 if (iommu->fwnode == fwnode) { 2769 ops = iommu->ops; 2770 break; 2771 } 2772 spin_unlock(&iommu_device_lock); 2773 return ops; 2774 } 2775 2776 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2777 const struct iommu_ops *ops) 2778 { 2779 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2780 2781 if (fwspec) 2782 return ops == fwspec->ops ? 0 : -EINVAL; 2783 2784 if (!dev_iommu_get(dev)) 2785 return -ENOMEM; 2786 2787 /* Preallocate for the overwhelmingly common case of 1 ID */ 2788 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2789 if (!fwspec) 2790 return -ENOMEM; 2791 2792 of_node_get(to_of_node(iommu_fwnode)); 2793 fwspec->iommu_fwnode = iommu_fwnode; 2794 fwspec->ops = ops; 2795 dev_iommu_fwspec_set(dev, fwspec); 2796 return 0; 2797 } 2798 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2799 2800 void iommu_fwspec_free(struct device *dev) 2801 { 2802 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2803 2804 if (fwspec) { 2805 fwnode_handle_put(fwspec->iommu_fwnode); 2806 kfree(fwspec); 2807 dev_iommu_fwspec_set(dev, NULL); 2808 } 2809 } 2810 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2811 2812 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2813 { 2814 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2815 int i, new_num; 2816 2817 if (!fwspec) 2818 return -EINVAL; 2819 2820 new_num = fwspec->num_ids + num_ids; 2821 if (new_num > 1) { 2822 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2823 GFP_KERNEL); 2824 if (!fwspec) 2825 return -ENOMEM; 2826 2827 dev_iommu_fwspec_set(dev, fwspec); 2828 } 2829 2830 for (i = 0; i < num_ids; i++) 2831 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2832 2833 fwspec->num_ids = new_num; 2834 return 0; 2835 } 2836 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2837 2838 /* 2839 * Per device IOMMU features. 2840 */ 2841 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2842 { 2843 if (dev->iommu && dev->iommu->iommu_dev) { 2844 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2845 2846 if (ops->dev_enable_feat) 2847 return ops->dev_enable_feat(dev, feat); 2848 } 2849 2850 return -ENODEV; 2851 } 2852 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2853 2854 /* 2855 * The device drivers should do the necessary cleanups before calling this. 2856 */ 2857 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2858 { 2859 if (dev->iommu && dev->iommu->iommu_dev) { 2860 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2861 2862 if (ops->dev_disable_feat) 2863 return ops->dev_disable_feat(dev, feat); 2864 } 2865 2866 return -EBUSY; 2867 } 2868 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2869 2870 /* 2871 * Changes the default domain of an iommu group 2872 * 2873 * @group: The group for which the default domain should be changed 2874 * @dev: The first device in the group 2875 * @type: The type of the new default domain that gets associated with the group 2876 * 2877 * Returns 0 on success and error code on failure 2878 * 2879 * Note: 2880 * 1. Presently, this function is called only when user requests to change the 2881 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2882 * Please take a closer look if intended to use for other purposes. 2883 */ 2884 static int iommu_change_dev_def_domain(struct iommu_group *group, 2885 struct device *dev, int type) 2886 { 2887 struct __group_domain_type gtype = {NULL, 0}; 2888 struct iommu_domain *prev_dom; 2889 int ret; 2890 2891 lockdep_assert_held(&group->mutex); 2892 2893 prev_dom = group->default_domain; 2894 __iommu_group_for_each_dev(group, >ype, 2895 probe_get_default_domain_type); 2896 if (!type) { 2897 /* 2898 * If the user hasn't requested any specific type of domain and 2899 * if the device supports both the domains, then default to the 2900 * domain the device was booted with 2901 */ 2902 type = gtype.type ? : iommu_def_domain_type; 2903 } else if (gtype.type && type != gtype.type) { 2904 dev_err_ratelimited(dev, "Device cannot be in %s domain\n", 2905 iommu_domain_type_str(type)); 2906 return -EINVAL; 2907 } 2908 2909 /* 2910 * Switch to a new domain only if the requested domain type is different 2911 * from the existing default domain type 2912 */ 2913 if (prev_dom->type == type) 2914 return 0; 2915 2916 group->default_domain = NULL; 2917 group->domain = NULL; 2918 2919 /* Sets group->default_domain to the newly allocated domain */ 2920 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2921 if (ret) 2922 goto restore_old_domain; 2923 2924 group->domain = prev_dom; 2925 ret = iommu_create_device_direct_mappings(group, dev); 2926 if (ret) 2927 goto free_new_domain; 2928 2929 ret = __iommu_group_set_domain(group, group->default_domain); 2930 if (ret) 2931 goto free_new_domain; 2932 2933 iommu_domain_free(prev_dom); 2934 2935 return 0; 2936 2937 free_new_domain: 2938 iommu_domain_free(group->default_domain); 2939 restore_old_domain: 2940 group->default_domain = prev_dom; 2941 2942 return ret; 2943 } 2944 2945 /* 2946 * Changing the default domain through sysfs requires the users to unbind the 2947 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2948 * transition. Return failure if this isn't met. 2949 * 2950 * We need to consider the race between this and the device release path. 2951 * group->mutex is used here to guarantee that the device release path 2952 * will not be entered at the same time. 2953 */ 2954 static ssize_t iommu_group_store_type(struct iommu_group *group, 2955 const char *buf, size_t count) 2956 { 2957 struct group_device *grp_dev; 2958 struct device *dev; 2959 int ret, req_type; 2960 2961 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2962 return -EACCES; 2963 2964 if (WARN_ON(!group) || !group->default_domain) 2965 return -EINVAL; 2966 2967 if (sysfs_streq(buf, "identity")) 2968 req_type = IOMMU_DOMAIN_IDENTITY; 2969 else if (sysfs_streq(buf, "DMA")) 2970 req_type = IOMMU_DOMAIN_DMA; 2971 else if (sysfs_streq(buf, "DMA-FQ")) 2972 req_type = IOMMU_DOMAIN_DMA_FQ; 2973 else if (sysfs_streq(buf, "auto")) 2974 req_type = 0; 2975 else 2976 return -EINVAL; 2977 2978 mutex_lock(&group->mutex); 2979 /* We can bring up a flush queue without tearing down the domain. */ 2980 if (req_type == IOMMU_DOMAIN_DMA_FQ && 2981 group->default_domain->type == IOMMU_DOMAIN_DMA) { 2982 ret = iommu_dma_init_fq(group->default_domain); 2983 if (!ret) 2984 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 2985 mutex_unlock(&group->mutex); 2986 2987 return ret ?: count; 2988 } 2989 2990 /* Otherwise, ensure that device exists and no driver is bound. */ 2991 if (list_empty(&group->devices) || group->owner_cnt) { 2992 mutex_unlock(&group->mutex); 2993 return -EPERM; 2994 } 2995 2996 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2997 dev = grp_dev->dev; 2998 2999 ret = iommu_change_dev_def_domain(group, dev, req_type); 3000 3001 /* 3002 * Release the mutex here because ops->probe_finalize() call-back of 3003 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 3004 * in-turn might call back into IOMMU core code, where it tries to take 3005 * group->mutex, resulting in a deadlock. 3006 */ 3007 mutex_unlock(&group->mutex); 3008 3009 /* Make sure dma_ops is appropriatley set */ 3010 if (!ret) 3011 __iommu_group_dma_finalize(group); 3012 3013 return ret ?: count; 3014 } 3015 3016 static bool iommu_is_default_domain(struct iommu_group *group) 3017 { 3018 if (group->domain == group->default_domain) 3019 return true; 3020 3021 /* 3022 * If the default domain was set to identity and it is still an identity 3023 * domain then we consider this a pass. This happens because of 3024 * amd_iommu_init_device() replacing the default idenytity domain with an 3025 * identity domain that has a different configuration for AMDGPU. 3026 */ 3027 if (group->default_domain && 3028 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3029 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3030 return true; 3031 return false; 3032 } 3033 3034 /** 3035 * iommu_device_use_default_domain() - Device driver wants to handle device 3036 * DMA through the kernel DMA API. 3037 * @dev: The device. 3038 * 3039 * The device driver about to bind @dev wants to do DMA through the kernel 3040 * DMA API. Return 0 if it is allowed, otherwise an error. 3041 */ 3042 int iommu_device_use_default_domain(struct device *dev) 3043 { 3044 struct iommu_group *group = iommu_group_get(dev); 3045 int ret = 0; 3046 3047 if (!group) 3048 return 0; 3049 3050 mutex_lock(&group->mutex); 3051 if (group->owner_cnt) { 3052 if (group->owner || !iommu_is_default_domain(group) || 3053 !xa_empty(&group->pasid_array)) { 3054 ret = -EBUSY; 3055 goto unlock_out; 3056 } 3057 } 3058 3059 group->owner_cnt++; 3060 3061 unlock_out: 3062 mutex_unlock(&group->mutex); 3063 iommu_group_put(group); 3064 3065 return ret; 3066 } 3067 3068 /** 3069 * iommu_device_unuse_default_domain() - Device driver stops handling device 3070 * DMA through the kernel DMA API. 3071 * @dev: The device. 3072 * 3073 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3074 * It must be called after iommu_device_use_default_domain(). 3075 */ 3076 void iommu_device_unuse_default_domain(struct device *dev) 3077 { 3078 struct iommu_group *group = iommu_group_get(dev); 3079 3080 if (!group) 3081 return; 3082 3083 mutex_lock(&group->mutex); 3084 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3085 group->owner_cnt--; 3086 3087 mutex_unlock(&group->mutex); 3088 iommu_group_put(group); 3089 } 3090 3091 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3092 { 3093 struct group_device *dev = 3094 list_first_entry(&group->devices, struct group_device, list); 3095 3096 if (group->blocking_domain) 3097 return 0; 3098 3099 group->blocking_domain = 3100 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3101 if (!group->blocking_domain) { 3102 /* 3103 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3104 * create an empty domain instead. 3105 */ 3106 group->blocking_domain = __iommu_domain_alloc( 3107 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3108 if (!group->blocking_domain) 3109 return -EINVAL; 3110 } 3111 return 0; 3112 } 3113 3114 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3115 { 3116 int ret; 3117 3118 if ((group->domain && group->domain != group->default_domain) || 3119 !xa_empty(&group->pasid_array)) 3120 return -EBUSY; 3121 3122 ret = __iommu_group_alloc_blocking_domain(group); 3123 if (ret) 3124 return ret; 3125 ret = __iommu_group_set_domain(group, group->blocking_domain); 3126 if (ret) 3127 return ret; 3128 3129 group->owner = owner; 3130 group->owner_cnt++; 3131 return 0; 3132 } 3133 3134 /** 3135 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3136 * @group: The group. 3137 * @owner: Caller specified pointer. Used for exclusive ownership. 3138 * 3139 * This is to support backward compatibility for vfio which manages the dma 3140 * ownership in iommu_group level. New invocations on this interface should be 3141 * prohibited. Only a single owner may exist for a group. 3142 */ 3143 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3144 { 3145 int ret = 0; 3146 3147 if (WARN_ON(!owner)) 3148 return -EINVAL; 3149 3150 mutex_lock(&group->mutex); 3151 if (group->owner_cnt) { 3152 ret = -EPERM; 3153 goto unlock_out; 3154 } 3155 3156 ret = __iommu_take_dma_ownership(group, owner); 3157 unlock_out: 3158 mutex_unlock(&group->mutex); 3159 3160 return ret; 3161 } 3162 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3163 3164 /** 3165 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3166 * @dev: The device. 3167 * @owner: Caller specified pointer. Used for exclusive ownership. 3168 * 3169 * Claim the DMA ownership of a device. Multiple devices in the same group may 3170 * concurrently claim ownership if they present the same owner value. Returns 0 3171 * on success and error code on failure 3172 */ 3173 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3174 { 3175 struct iommu_group *group; 3176 int ret = 0; 3177 3178 if (WARN_ON(!owner)) 3179 return -EINVAL; 3180 3181 group = iommu_group_get(dev); 3182 if (!group) 3183 return -ENODEV; 3184 3185 mutex_lock(&group->mutex); 3186 if (group->owner_cnt) { 3187 if (group->owner != owner) { 3188 ret = -EPERM; 3189 goto unlock_out; 3190 } 3191 group->owner_cnt++; 3192 goto unlock_out; 3193 } 3194 3195 ret = __iommu_take_dma_ownership(group, owner); 3196 unlock_out: 3197 mutex_unlock(&group->mutex); 3198 iommu_group_put(group); 3199 3200 return ret; 3201 } 3202 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3203 3204 static void __iommu_release_dma_ownership(struct iommu_group *group) 3205 { 3206 if (WARN_ON(!group->owner_cnt || !group->owner || 3207 !xa_empty(&group->pasid_array))) 3208 return; 3209 3210 group->owner_cnt = 0; 3211 group->owner = NULL; 3212 __iommu_group_set_domain_nofail(group, group->default_domain); 3213 } 3214 3215 /** 3216 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3217 * @dev: The device 3218 * 3219 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3220 */ 3221 void iommu_group_release_dma_owner(struct iommu_group *group) 3222 { 3223 mutex_lock(&group->mutex); 3224 __iommu_release_dma_ownership(group); 3225 mutex_unlock(&group->mutex); 3226 } 3227 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3228 3229 /** 3230 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3231 * @group: The device. 3232 * 3233 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3234 */ 3235 void iommu_device_release_dma_owner(struct device *dev) 3236 { 3237 struct iommu_group *group = iommu_group_get(dev); 3238 3239 mutex_lock(&group->mutex); 3240 if (group->owner_cnt > 1) 3241 group->owner_cnt--; 3242 else 3243 __iommu_release_dma_ownership(group); 3244 mutex_unlock(&group->mutex); 3245 iommu_group_put(group); 3246 } 3247 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3248 3249 /** 3250 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3251 * @group: The group. 3252 * 3253 * This provides status query on a given group. It is racy and only for 3254 * non-binding status reporting. 3255 */ 3256 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3257 { 3258 unsigned int user; 3259 3260 mutex_lock(&group->mutex); 3261 user = group->owner_cnt; 3262 mutex_unlock(&group->mutex); 3263 3264 return user; 3265 } 3266 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3267 3268 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3269 struct iommu_group *group, ioasid_t pasid) 3270 { 3271 struct group_device *device; 3272 int ret = 0; 3273 3274 for_each_group_device(group, device) { 3275 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3276 if (ret) 3277 break; 3278 } 3279 3280 return ret; 3281 } 3282 3283 static void __iommu_remove_group_pasid(struct iommu_group *group, 3284 ioasid_t pasid) 3285 { 3286 struct group_device *device; 3287 const struct iommu_ops *ops; 3288 3289 for_each_group_device(group, device) { 3290 ops = dev_iommu_ops(device->dev); 3291 ops->remove_dev_pasid(device->dev, pasid); 3292 } 3293 } 3294 3295 /* 3296 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3297 * @domain: the iommu domain. 3298 * @dev: the attached device. 3299 * @pasid: the pasid of the device. 3300 * 3301 * Return: 0 on success, or an error. 3302 */ 3303 int iommu_attach_device_pasid(struct iommu_domain *domain, 3304 struct device *dev, ioasid_t pasid) 3305 { 3306 struct iommu_group *group; 3307 void *curr; 3308 int ret; 3309 3310 if (!domain->ops->set_dev_pasid) 3311 return -EOPNOTSUPP; 3312 3313 group = iommu_group_get(dev); 3314 if (!group) 3315 return -ENODEV; 3316 3317 mutex_lock(&group->mutex); 3318 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3319 if (curr) { 3320 ret = xa_err(curr) ? : -EBUSY; 3321 goto out_unlock; 3322 } 3323 3324 ret = __iommu_set_group_pasid(domain, group, pasid); 3325 if (ret) { 3326 __iommu_remove_group_pasid(group, pasid); 3327 xa_erase(&group->pasid_array, pasid); 3328 } 3329 out_unlock: 3330 mutex_unlock(&group->mutex); 3331 iommu_group_put(group); 3332 3333 return ret; 3334 } 3335 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3336 3337 /* 3338 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3339 * @domain: the iommu domain. 3340 * @dev: the attached device. 3341 * @pasid: the pasid of the device. 3342 * 3343 * The @domain must have been attached to @pasid of the @dev with 3344 * iommu_attach_device_pasid(). 3345 */ 3346 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3347 ioasid_t pasid) 3348 { 3349 struct iommu_group *group = iommu_group_get(dev); 3350 3351 mutex_lock(&group->mutex); 3352 __iommu_remove_group_pasid(group, pasid); 3353 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3354 mutex_unlock(&group->mutex); 3355 3356 iommu_group_put(group); 3357 } 3358 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3359 3360 /* 3361 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3362 * @dev: the queried device 3363 * @pasid: the pasid of the device 3364 * @type: matched domain type, 0 for any match 3365 * 3366 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3367 * domain attached to pasid of a device. Callers must hold a lock around this 3368 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3369 * type is being manipulated. This API does not internally resolve races with 3370 * attach/detach. 3371 * 3372 * Return: attached domain on success, NULL otherwise. 3373 */ 3374 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3375 ioasid_t pasid, 3376 unsigned int type) 3377 { 3378 struct iommu_domain *domain; 3379 struct iommu_group *group; 3380 3381 group = iommu_group_get(dev); 3382 if (!group) 3383 return NULL; 3384 3385 xa_lock(&group->pasid_array); 3386 domain = xa_load(&group->pasid_array, pasid); 3387 if (type && domain && domain->type != type) 3388 domain = ERR_PTR(-EBUSY); 3389 xa_unlock(&group->pasid_array); 3390 iommu_group_put(group); 3391 3392 return domain; 3393 } 3394 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3395 3396 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3397 struct mm_struct *mm) 3398 { 3399 const struct iommu_ops *ops = dev_iommu_ops(dev); 3400 struct iommu_domain *domain; 3401 3402 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3403 if (!domain) 3404 return NULL; 3405 3406 domain->type = IOMMU_DOMAIN_SVA; 3407 mmgrab(mm); 3408 domain->mm = mm; 3409 domain->iopf_handler = iommu_sva_handle_iopf; 3410 domain->fault_data = mm; 3411 3412 return domain; 3413 } 3414