1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/idr.h> 22 #include <linux/err.h> 23 #include <linux/pci.h> 24 #include <linux/pci-ats.h> 25 #include <linux/bitops.h> 26 #include <linux/platform_device.h> 27 #include <linux/property.h> 28 #include <linux/fsl/mc.h> 29 #include <linux/module.h> 30 #include <linux/cc_platform.h> 31 #include <linux/cdx/cdx_bus.h> 32 #include <trace/events/iommu.h> 33 #include <linux/sched/mm.h> 34 #include <linux/msi.h> 35 36 #include "dma-iommu.h" 37 38 #include "iommu-sva.h" 39 40 static struct kset *iommu_group_kset; 41 static DEFINE_IDA(iommu_group_ida); 42 43 static unsigned int iommu_def_domain_type __read_mostly; 44 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 45 static u32 iommu_cmd_line __read_mostly; 46 47 struct iommu_group { 48 struct kobject kobj; 49 struct kobject *devices_kobj; 50 struct list_head devices; 51 struct xarray pasid_array; 52 struct mutex mutex; 53 void *iommu_data; 54 void (*iommu_data_release)(void *iommu_data); 55 char *name; 56 int id; 57 struct iommu_domain *default_domain; 58 struct iommu_domain *blocking_domain; 59 struct iommu_domain *domain; 60 struct list_head entry; 61 unsigned int owner_cnt; 62 void *owner; 63 }; 64 65 struct group_device { 66 struct list_head list; 67 struct device *dev; 68 char *name; 69 }; 70 71 /* Iterate over each struct group_device in a struct iommu_group */ 72 #define for_each_group_device(group, pos) \ 73 list_for_each_entry(pos, &(group)->devices, list) 74 75 struct iommu_group_attribute { 76 struct attribute attr; 77 ssize_t (*show)(struct iommu_group *group, char *buf); 78 ssize_t (*store)(struct iommu_group *group, 79 const char *buf, size_t count); 80 }; 81 82 static const char * const iommu_group_resv_type_string[] = { 83 [IOMMU_RESV_DIRECT] = "direct", 84 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 85 [IOMMU_RESV_RESERVED] = "reserved", 86 [IOMMU_RESV_MSI] = "msi", 87 [IOMMU_RESV_SW_MSI] = "msi", 88 }; 89 90 #define IOMMU_CMD_LINE_DMA_API BIT(0) 91 #define IOMMU_CMD_LINE_STRICT BIT(1) 92 93 static int iommu_bus_notifier(struct notifier_block *nb, 94 unsigned long action, void *data); 95 static void iommu_release_device(struct device *dev); 96 static int iommu_alloc_default_domain(struct iommu_group *group, 97 struct device *dev); 98 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 99 unsigned type); 100 static int __iommu_attach_device(struct iommu_domain *domain, 101 struct device *dev); 102 static int __iommu_attach_group(struct iommu_domain *domain, 103 struct iommu_group *group); 104 105 enum { 106 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 107 }; 108 109 static int __iommu_group_set_domain_internal(struct iommu_group *group, 110 struct iommu_domain *new_domain, 111 unsigned int flags); 112 static int __iommu_group_set_domain(struct iommu_group *group, 113 struct iommu_domain *new_domain) 114 { 115 return __iommu_group_set_domain_internal(group, new_domain, 0); 116 } 117 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 118 struct iommu_domain *new_domain) 119 { 120 WARN_ON(__iommu_group_set_domain_internal( 121 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 122 } 123 124 static int iommu_create_device_direct_mappings(struct iommu_group *group, 125 struct device *dev); 126 static struct iommu_group *iommu_group_get_for_dev(struct device *dev); 127 static ssize_t iommu_group_store_type(struct iommu_group *group, 128 const char *buf, size_t count); 129 130 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 131 struct iommu_group_attribute iommu_group_attr_##_name = \ 132 __ATTR(_name, _mode, _show, _store) 133 134 #define to_iommu_group_attr(_attr) \ 135 container_of(_attr, struct iommu_group_attribute, attr) 136 #define to_iommu_group(_kobj) \ 137 container_of(_kobj, struct iommu_group, kobj) 138 139 static LIST_HEAD(iommu_device_list); 140 static DEFINE_SPINLOCK(iommu_device_lock); 141 142 static struct bus_type * const iommu_buses[] = { 143 &platform_bus_type, 144 #ifdef CONFIG_PCI 145 &pci_bus_type, 146 #endif 147 #ifdef CONFIG_ARM_AMBA 148 &amba_bustype, 149 #endif 150 #ifdef CONFIG_FSL_MC_BUS 151 &fsl_mc_bus_type, 152 #endif 153 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 154 &host1x_context_device_bus_type, 155 #endif 156 #ifdef CONFIG_CDX_BUS 157 &cdx_bus_type, 158 #endif 159 }; 160 161 /* 162 * Use a function instead of an array here because the domain-type is a 163 * bit-field, so an array would waste memory. 164 */ 165 static const char *iommu_domain_type_str(unsigned int t) 166 { 167 switch (t) { 168 case IOMMU_DOMAIN_BLOCKED: 169 return "Blocked"; 170 case IOMMU_DOMAIN_IDENTITY: 171 return "Passthrough"; 172 case IOMMU_DOMAIN_UNMANAGED: 173 return "Unmanaged"; 174 case IOMMU_DOMAIN_DMA: 175 case IOMMU_DOMAIN_DMA_FQ: 176 return "Translated"; 177 default: 178 return "Unknown"; 179 } 180 } 181 182 static int __init iommu_subsys_init(void) 183 { 184 struct notifier_block *nb; 185 186 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 187 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 188 iommu_set_default_passthrough(false); 189 else 190 iommu_set_default_translated(false); 191 192 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 193 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 194 iommu_set_default_translated(false); 195 } 196 } 197 198 if (!iommu_default_passthrough() && !iommu_dma_strict) 199 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 200 201 pr_info("Default domain type: %s%s\n", 202 iommu_domain_type_str(iommu_def_domain_type), 203 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 204 " (set via kernel command line)" : ""); 205 206 if (!iommu_default_passthrough()) 207 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 208 iommu_dma_strict ? "strict" : "lazy", 209 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 210 " (set via kernel command line)" : ""); 211 212 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 213 if (!nb) 214 return -ENOMEM; 215 216 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 217 nb[i].notifier_call = iommu_bus_notifier; 218 bus_register_notifier(iommu_buses[i], &nb[i]); 219 } 220 221 return 0; 222 } 223 subsys_initcall(iommu_subsys_init); 224 225 static int remove_iommu_group(struct device *dev, void *data) 226 { 227 if (dev->iommu && dev->iommu->iommu_dev == data) 228 iommu_release_device(dev); 229 230 return 0; 231 } 232 233 /** 234 * iommu_device_register() - Register an IOMMU hardware instance 235 * @iommu: IOMMU handle for the instance 236 * @ops: IOMMU ops to associate with the instance 237 * @hwdev: (optional) actual instance device, used for fwnode lookup 238 * 239 * Return: 0 on success, or an error. 240 */ 241 int iommu_device_register(struct iommu_device *iommu, 242 const struct iommu_ops *ops, struct device *hwdev) 243 { 244 int err = 0; 245 246 /* We need to be able to take module references appropriately */ 247 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 248 return -EINVAL; 249 /* 250 * Temporarily enforce global restriction to a single driver. This was 251 * already the de-facto behaviour, since any possible combination of 252 * existing drivers would compete for at least the PCI or platform bus. 253 */ 254 if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) 255 return -EBUSY; 256 257 iommu->ops = ops; 258 if (hwdev) 259 iommu->fwnode = dev_fwnode(hwdev); 260 261 spin_lock(&iommu_device_lock); 262 list_add_tail(&iommu->list, &iommu_device_list); 263 spin_unlock(&iommu_device_lock); 264 265 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { 266 iommu_buses[i]->iommu_ops = ops; 267 err = bus_iommu_probe(iommu_buses[i]); 268 } 269 if (err) 270 iommu_device_unregister(iommu); 271 return err; 272 } 273 EXPORT_SYMBOL_GPL(iommu_device_register); 274 275 void iommu_device_unregister(struct iommu_device *iommu) 276 { 277 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 278 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 279 280 spin_lock(&iommu_device_lock); 281 list_del(&iommu->list); 282 spin_unlock(&iommu_device_lock); 283 } 284 EXPORT_SYMBOL_GPL(iommu_device_unregister); 285 286 static struct dev_iommu *dev_iommu_get(struct device *dev) 287 { 288 struct dev_iommu *param = dev->iommu; 289 290 if (param) 291 return param; 292 293 param = kzalloc(sizeof(*param), GFP_KERNEL); 294 if (!param) 295 return NULL; 296 297 mutex_init(¶m->lock); 298 dev->iommu = param; 299 return param; 300 } 301 302 static void dev_iommu_free(struct device *dev) 303 { 304 struct dev_iommu *param = dev->iommu; 305 306 dev->iommu = NULL; 307 if (param->fwspec) { 308 fwnode_handle_put(param->fwspec->iommu_fwnode); 309 kfree(param->fwspec); 310 } 311 kfree(param); 312 } 313 314 static u32 dev_iommu_get_max_pasids(struct device *dev) 315 { 316 u32 max_pasids = 0, bits = 0; 317 int ret; 318 319 if (dev_is_pci(dev)) { 320 ret = pci_max_pasids(to_pci_dev(dev)); 321 if (ret > 0) 322 max_pasids = ret; 323 } else { 324 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 325 if (!ret) 326 max_pasids = 1UL << bits; 327 } 328 329 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 330 } 331 332 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 333 { 334 const struct iommu_ops *ops = dev->bus->iommu_ops; 335 struct iommu_device *iommu_dev; 336 struct iommu_group *group; 337 static DEFINE_MUTEX(iommu_probe_device_lock); 338 int ret; 339 340 if (!ops) 341 return -ENODEV; 342 /* 343 * Serialise to avoid races between IOMMU drivers registering in 344 * parallel and/or the "replay" calls from ACPI/OF code via client 345 * driver probe. Once the latter have been cleaned up we should 346 * probably be able to use device_lock() here to minimise the scope, 347 * but for now enforcing a simple global ordering is fine. 348 */ 349 mutex_lock(&iommu_probe_device_lock); 350 if (!dev_iommu_get(dev)) { 351 ret = -ENOMEM; 352 goto err_unlock; 353 } 354 355 if (!try_module_get(ops->owner)) { 356 ret = -EINVAL; 357 goto err_free; 358 } 359 360 iommu_dev = ops->probe_device(dev); 361 if (IS_ERR(iommu_dev)) { 362 ret = PTR_ERR(iommu_dev); 363 goto out_module_put; 364 } 365 366 dev->iommu->iommu_dev = iommu_dev; 367 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 368 369 group = iommu_group_get_for_dev(dev); 370 if (IS_ERR(group)) { 371 ret = PTR_ERR(group); 372 goto out_release; 373 } 374 375 mutex_lock(&group->mutex); 376 if (group_list && !group->default_domain && list_empty(&group->entry)) 377 list_add_tail(&group->entry, group_list); 378 mutex_unlock(&group->mutex); 379 iommu_group_put(group); 380 381 mutex_unlock(&iommu_probe_device_lock); 382 iommu_device_link(iommu_dev, dev); 383 384 return 0; 385 386 out_release: 387 if (ops->release_device) 388 ops->release_device(dev); 389 390 out_module_put: 391 module_put(ops->owner); 392 393 err_free: 394 dev_iommu_free(dev); 395 396 err_unlock: 397 mutex_unlock(&iommu_probe_device_lock); 398 399 return ret; 400 } 401 402 static bool iommu_is_attach_deferred(struct device *dev) 403 { 404 const struct iommu_ops *ops = dev_iommu_ops(dev); 405 406 if (ops->is_attach_deferred) 407 return ops->is_attach_deferred(dev); 408 409 return false; 410 } 411 412 static int iommu_group_do_dma_first_attach(struct device *dev, void *data) 413 { 414 struct iommu_domain *domain = data; 415 416 lockdep_assert_held(&dev->iommu_group->mutex); 417 418 if (iommu_is_attach_deferred(dev)) { 419 dev->iommu->attach_deferred = 1; 420 return 0; 421 } 422 423 return __iommu_attach_device(domain, dev); 424 } 425 426 int iommu_probe_device(struct device *dev) 427 { 428 const struct iommu_ops *ops; 429 struct iommu_group *group; 430 int ret; 431 432 ret = __iommu_probe_device(dev, NULL); 433 if (ret) 434 goto err_out; 435 436 group = iommu_group_get(dev); 437 if (!group) { 438 ret = -ENODEV; 439 goto err_release; 440 } 441 442 /* 443 * Try to allocate a default domain - needs support from the 444 * IOMMU driver. There are still some drivers which don't 445 * support default domains, so the return value is not yet 446 * checked. 447 */ 448 mutex_lock(&group->mutex); 449 iommu_alloc_default_domain(group, dev); 450 451 /* 452 * If device joined an existing group which has been claimed, don't 453 * attach the default domain. 454 */ 455 if (group->default_domain && !group->owner) { 456 ret = iommu_group_do_dma_first_attach(dev, group->default_domain); 457 if (ret) { 458 mutex_unlock(&group->mutex); 459 iommu_group_put(group); 460 goto err_release; 461 } 462 } 463 464 iommu_create_device_direct_mappings(group, dev); 465 466 mutex_unlock(&group->mutex); 467 iommu_group_put(group); 468 469 ops = dev_iommu_ops(dev); 470 if (ops->probe_finalize) 471 ops->probe_finalize(dev); 472 473 return 0; 474 475 err_release: 476 iommu_release_device(dev); 477 478 err_out: 479 return ret; 480 481 } 482 483 /* 484 * Remove a device from a group's device list and return the group device 485 * if successful. 486 */ 487 static struct group_device * 488 __iommu_group_remove_device(struct iommu_group *group, struct device *dev) 489 { 490 struct group_device *device; 491 492 lockdep_assert_held(&group->mutex); 493 for_each_group_device(group, device) { 494 if (device->dev == dev) { 495 list_del(&device->list); 496 return device; 497 } 498 } 499 500 return NULL; 501 } 502 503 /* 504 * Release a device from its group and decrements the iommu group reference 505 * count. 506 */ 507 static void __iommu_group_release_device(struct iommu_group *group, 508 struct group_device *grp_dev) 509 { 510 struct device *dev = grp_dev->dev; 511 512 sysfs_remove_link(group->devices_kobj, grp_dev->name); 513 sysfs_remove_link(&dev->kobj, "iommu_group"); 514 515 trace_remove_device_from_group(group->id, dev); 516 517 kfree(grp_dev->name); 518 kfree(grp_dev); 519 dev->iommu_group = NULL; 520 kobject_put(group->devices_kobj); 521 } 522 523 static void iommu_release_device(struct device *dev) 524 { 525 struct iommu_group *group = dev->iommu_group; 526 struct group_device *device; 527 const struct iommu_ops *ops; 528 529 if (!dev->iommu || !group) 530 return; 531 532 iommu_device_unlink(dev->iommu->iommu_dev, dev); 533 534 mutex_lock(&group->mutex); 535 device = __iommu_group_remove_device(group, dev); 536 537 /* 538 * If the group has become empty then ownership must have been released, 539 * and the current domain must be set back to NULL or the default 540 * domain. 541 */ 542 if (list_empty(&group->devices)) 543 WARN_ON(group->owner_cnt || 544 group->domain != group->default_domain); 545 546 /* 547 * release_device() must stop using any attached domain on the device. 548 * If there are still other devices in the group they are not effected 549 * by this callback. 550 * 551 * The IOMMU driver must set the device to either an identity or 552 * blocking translation and stop using any domain pointer, as it is 553 * going to be freed. 554 */ 555 ops = dev_iommu_ops(dev); 556 if (ops->release_device) 557 ops->release_device(dev); 558 mutex_unlock(&group->mutex); 559 560 if (device) 561 __iommu_group_release_device(group, device); 562 563 module_put(ops->owner); 564 dev_iommu_free(dev); 565 } 566 567 static int __init iommu_set_def_domain_type(char *str) 568 { 569 bool pt; 570 int ret; 571 572 ret = kstrtobool(str, &pt); 573 if (ret) 574 return ret; 575 576 if (pt) 577 iommu_set_default_passthrough(true); 578 else 579 iommu_set_default_translated(true); 580 581 return 0; 582 } 583 early_param("iommu.passthrough", iommu_set_def_domain_type); 584 585 static int __init iommu_dma_setup(char *str) 586 { 587 int ret = kstrtobool(str, &iommu_dma_strict); 588 589 if (!ret) 590 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 591 return ret; 592 } 593 early_param("iommu.strict", iommu_dma_setup); 594 595 void iommu_set_dma_strict(void) 596 { 597 iommu_dma_strict = true; 598 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 599 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 600 } 601 602 static ssize_t iommu_group_attr_show(struct kobject *kobj, 603 struct attribute *__attr, char *buf) 604 { 605 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 606 struct iommu_group *group = to_iommu_group(kobj); 607 ssize_t ret = -EIO; 608 609 if (attr->show) 610 ret = attr->show(group, buf); 611 return ret; 612 } 613 614 static ssize_t iommu_group_attr_store(struct kobject *kobj, 615 struct attribute *__attr, 616 const char *buf, size_t count) 617 { 618 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 619 struct iommu_group *group = to_iommu_group(kobj); 620 ssize_t ret = -EIO; 621 622 if (attr->store) 623 ret = attr->store(group, buf, count); 624 return ret; 625 } 626 627 static const struct sysfs_ops iommu_group_sysfs_ops = { 628 .show = iommu_group_attr_show, 629 .store = iommu_group_attr_store, 630 }; 631 632 static int iommu_group_create_file(struct iommu_group *group, 633 struct iommu_group_attribute *attr) 634 { 635 return sysfs_create_file(&group->kobj, &attr->attr); 636 } 637 638 static void iommu_group_remove_file(struct iommu_group *group, 639 struct iommu_group_attribute *attr) 640 { 641 sysfs_remove_file(&group->kobj, &attr->attr); 642 } 643 644 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 645 { 646 return sysfs_emit(buf, "%s\n", group->name); 647 } 648 649 /** 650 * iommu_insert_resv_region - Insert a new region in the 651 * list of reserved regions. 652 * @new: new region to insert 653 * @regions: list of regions 654 * 655 * Elements are sorted by start address and overlapping segments 656 * of the same type are merged. 657 */ 658 static int iommu_insert_resv_region(struct iommu_resv_region *new, 659 struct list_head *regions) 660 { 661 struct iommu_resv_region *iter, *tmp, *nr, *top; 662 LIST_HEAD(stack); 663 664 nr = iommu_alloc_resv_region(new->start, new->length, 665 new->prot, new->type, GFP_KERNEL); 666 if (!nr) 667 return -ENOMEM; 668 669 /* First add the new element based on start address sorting */ 670 list_for_each_entry(iter, regions, list) { 671 if (nr->start < iter->start || 672 (nr->start == iter->start && nr->type <= iter->type)) 673 break; 674 } 675 list_add_tail(&nr->list, &iter->list); 676 677 /* Merge overlapping segments of type nr->type in @regions, if any */ 678 list_for_each_entry_safe(iter, tmp, regions, list) { 679 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 680 681 /* no merge needed on elements of different types than @new */ 682 if (iter->type != new->type) { 683 list_move_tail(&iter->list, &stack); 684 continue; 685 } 686 687 /* look for the last stack element of same type as @iter */ 688 list_for_each_entry_reverse(top, &stack, list) 689 if (top->type == iter->type) 690 goto check_overlap; 691 692 list_move_tail(&iter->list, &stack); 693 continue; 694 695 check_overlap: 696 top_end = top->start + top->length - 1; 697 698 if (iter->start > top_end + 1) { 699 list_move_tail(&iter->list, &stack); 700 } else { 701 top->length = max(top_end, iter_end) - top->start + 1; 702 list_del(&iter->list); 703 kfree(iter); 704 } 705 } 706 list_splice(&stack, regions); 707 return 0; 708 } 709 710 static int 711 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 712 struct list_head *group_resv_regions) 713 { 714 struct iommu_resv_region *entry; 715 int ret = 0; 716 717 list_for_each_entry(entry, dev_resv_regions, list) { 718 ret = iommu_insert_resv_region(entry, group_resv_regions); 719 if (ret) 720 break; 721 } 722 return ret; 723 } 724 725 int iommu_get_group_resv_regions(struct iommu_group *group, 726 struct list_head *head) 727 { 728 struct group_device *device; 729 int ret = 0; 730 731 mutex_lock(&group->mutex); 732 for_each_group_device(group, device) { 733 struct list_head dev_resv_regions; 734 735 /* 736 * Non-API groups still expose reserved_regions in sysfs, 737 * so filter out calls that get here that way. 738 */ 739 if (!device->dev->iommu) 740 break; 741 742 INIT_LIST_HEAD(&dev_resv_regions); 743 iommu_get_resv_regions(device->dev, &dev_resv_regions); 744 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 745 iommu_put_resv_regions(device->dev, &dev_resv_regions); 746 if (ret) 747 break; 748 } 749 mutex_unlock(&group->mutex); 750 return ret; 751 } 752 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 753 754 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 755 char *buf) 756 { 757 struct iommu_resv_region *region, *next; 758 struct list_head group_resv_regions; 759 int offset = 0; 760 761 INIT_LIST_HEAD(&group_resv_regions); 762 iommu_get_group_resv_regions(group, &group_resv_regions); 763 764 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 765 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 766 (long long)region->start, 767 (long long)(region->start + 768 region->length - 1), 769 iommu_group_resv_type_string[region->type]); 770 kfree(region); 771 } 772 773 return offset; 774 } 775 776 static ssize_t iommu_group_show_type(struct iommu_group *group, 777 char *buf) 778 { 779 char *type = "unknown"; 780 781 mutex_lock(&group->mutex); 782 if (group->default_domain) { 783 switch (group->default_domain->type) { 784 case IOMMU_DOMAIN_BLOCKED: 785 type = "blocked"; 786 break; 787 case IOMMU_DOMAIN_IDENTITY: 788 type = "identity"; 789 break; 790 case IOMMU_DOMAIN_UNMANAGED: 791 type = "unmanaged"; 792 break; 793 case IOMMU_DOMAIN_DMA: 794 type = "DMA"; 795 break; 796 case IOMMU_DOMAIN_DMA_FQ: 797 type = "DMA-FQ"; 798 break; 799 } 800 } 801 mutex_unlock(&group->mutex); 802 803 return sysfs_emit(buf, "%s\n", type); 804 } 805 806 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 807 808 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 809 iommu_group_show_resv_regions, NULL); 810 811 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 812 iommu_group_store_type); 813 814 static void iommu_group_release(struct kobject *kobj) 815 { 816 struct iommu_group *group = to_iommu_group(kobj); 817 818 pr_debug("Releasing group %d\n", group->id); 819 820 if (group->iommu_data_release) 821 group->iommu_data_release(group->iommu_data); 822 823 ida_free(&iommu_group_ida, group->id); 824 825 if (group->default_domain) 826 iommu_domain_free(group->default_domain); 827 if (group->blocking_domain) 828 iommu_domain_free(group->blocking_domain); 829 830 kfree(group->name); 831 kfree(group); 832 } 833 834 static const struct kobj_type iommu_group_ktype = { 835 .sysfs_ops = &iommu_group_sysfs_ops, 836 .release = iommu_group_release, 837 }; 838 839 /** 840 * iommu_group_alloc - Allocate a new group 841 * 842 * This function is called by an iommu driver to allocate a new iommu 843 * group. The iommu group represents the minimum granularity of the iommu. 844 * Upon successful return, the caller holds a reference to the supplied 845 * group in order to hold the group until devices are added. Use 846 * iommu_group_put() to release this extra reference count, allowing the 847 * group to be automatically reclaimed once it has no devices or external 848 * references. 849 */ 850 struct iommu_group *iommu_group_alloc(void) 851 { 852 struct iommu_group *group; 853 int ret; 854 855 group = kzalloc(sizeof(*group), GFP_KERNEL); 856 if (!group) 857 return ERR_PTR(-ENOMEM); 858 859 group->kobj.kset = iommu_group_kset; 860 mutex_init(&group->mutex); 861 INIT_LIST_HEAD(&group->devices); 862 INIT_LIST_HEAD(&group->entry); 863 xa_init(&group->pasid_array); 864 865 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 866 if (ret < 0) { 867 kfree(group); 868 return ERR_PTR(ret); 869 } 870 group->id = ret; 871 872 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 873 NULL, "%d", group->id); 874 if (ret) { 875 kobject_put(&group->kobj); 876 return ERR_PTR(ret); 877 } 878 879 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 880 if (!group->devices_kobj) { 881 kobject_put(&group->kobj); /* triggers .release & free */ 882 return ERR_PTR(-ENOMEM); 883 } 884 885 /* 886 * The devices_kobj holds a reference on the group kobject, so 887 * as long as that exists so will the group. We can therefore 888 * use the devices_kobj for reference counting. 889 */ 890 kobject_put(&group->kobj); 891 892 ret = iommu_group_create_file(group, 893 &iommu_group_attr_reserved_regions); 894 if (ret) { 895 kobject_put(group->devices_kobj); 896 return ERR_PTR(ret); 897 } 898 899 ret = iommu_group_create_file(group, &iommu_group_attr_type); 900 if (ret) { 901 kobject_put(group->devices_kobj); 902 return ERR_PTR(ret); 903 } 904 905 pr_debug("Allocated group %d\n", group->id); 906 907 return group; 908 } 909 EXPORT_SYMBOL_GPL(iommu_group_alloc); 910 911 /** 912 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 913 * @group: the group 914 * 915 * iommu drivers can store data in the group for use when doing iommu 916 * operations. This function provides a way to retrieve it. Caller 917 * should hold a group reference. 918 */ 919 void *iommu_group_get_iommudata(struct iommu_group *group) 920 { 921 return group->iommu_data; 922 } 923 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 924 925 /** 926 * iommu_group_set_iommudata - set iommu_data for a group 927 * @group: the group 928 * @iommu_data: new data 929 * @release: release function for iommu_data 930 * 931 * iommu drivers can store data in the group for use when doing iommu 932 * operations. This function provides a way to set the data after 933 * the group has been allocated. Caller should hold a group reference. 934 */ 935 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 936 void (*release)(void *iommu_data)) 937 { 938 group->iommu_data = iommu_data; 939 group->iommu_data_release = release; 940 } 941 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 942 943 /** 944 * iommu_group_set_name - set name for a group 945 * @group: the group 946 * @name: name 947 * 948 * Allow iommu driver to set a name for a group. When set it will 949 * appear in a name attribute file under the group in sysfs. 950 */ 951 int iommu_group_set_name(struct iommu_group *group, const char *name) 952 { 953 int ret; 954 955 if (group->name) { 956 iommu_group_remove_file(group, &iommu_group_attr_name); 957 kfree(group->name); 958 group->name = NULL; 959 if (!name) 960 return 0; 961 } 962 963 group->name = kstrdup(name, GFP_KERNEL); 964 if (!group->name) 965 return -ENOMEM; 966 967 ret = iommu_group_create_file(group, &iommu_group_attr_name); 968 if (ret) { 969 kfree(group->name); 970 group->name = NULL; 971 return ret; 972 } 973 974 return 0; 975 } 976 EXPORT_SYMBOL_GPL(iommu_group_set_name); 977 978 static int iommu_create_device_direct_mappings(struct iommu_group *group, 979 struct device *dev) 980 { 981 struct iommu_domain *domain = group->default_domain; 982 struct iommu_resv_region *entry; 983 struct list_head mappings; 984 unsigned long pg_size; 985 int ret = 0; 986 987 if (!domain || !iommu_is_dma_domain(domain)) 988 return 0; 989 990 BUG_ON(!domain->pgsize_bitmap); 991 992 pg_size = 1UL << __ffs(domain->pgsize_bitmap); 993 INIT_LIST_HEAD(&mappings); 994 995 iommu_get_resv_regions(dev, &mappings); 996 997 /* We need to consider overlapping regions for different devices */ 998 list_for_each_entry(entry, &mappings, list) { 999 dma_addr_t start, end, addr; 1000 size_t map_size = 0; 1001 1002 start = ALIGN(entry->start, pg_size); 1003 end = ALIGN(entry->start + entry->length, pg_size); 1004 1005 if (entry->type != IOMMU_RESV_DIRECT && 1006 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) 1007 continue; 1008 1009 for (addr = start; addr <= end; addr += pg_size) { 1010 phys_addr_t phys_addr; 1011 1012 if (addr == end) 1013 goto map_end; 1014 1015 phys_addr = iommu_iova_to_phys(domain, addr); 1016 if (!phys_addr) { 1017 map_size += pg_size; 1018 continue; 1019 } 1020 1021 map_end: 1022 if (map_size) { 1023 ret = iommu_map(domain, addr - map_size, 1024 addr - map_size, map_size, 1025 entry->prot, GFP_KERNEL); 1026 if (ret) 1027 goto out; 1028 map_size = 0; 1029 } 1030 } 1031 1032 } 1033 1034 iommu_flush_iotlb_all(domain); 1035 1036 out: 1037 iommu_put_resv_regions(dev, &mappings); 1038 1039 return ret; 1040 } 1041 1042 /** 1043 * iommu_group_add_device - add a device to an iommu group 1044 * @group: the group into which to add the device (reference should be held) 1045 * @dev: the device 1046 * 1047 * This function is called by an iommu driver to add a device into a 1048 * group. Adding a device increments the group reference count. 1049 */ 1050 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1051 { 1052 int ret, i = 0; 1053 struct group_device *device; 1054 1055 device = kzalloc(sizeof(*device), GFP_KERNEL); 1056 if (!device) 1057 return -ENOMEM; 1058 1059 device->dev = dev; 1060 1061 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1062 if (ret) 1063 goto err_free_device; 1064 1065 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1066 rename: 1067 if (!device->name) { 1068 ret = -ENOMEM; 1069 goto err_remove_link; 1070 } 1071 1072 ret = sysfs_create_link_nowarn(group->devices_kobj, 1073 &dev->kobj, device->name); 1074 if (ret) { 1075 if (ret == -EEXIST && i >= 0) { 1076 /* 1077 * Account for the slim chance of collision 1078 * and append an instance to the name. 1079 */ 1080 kfree(device->name); 1081 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1082 kobject_name(&dev->kobj), i++); 1083 goto rename; 1084 } 1085 goto err_free_name; 1086 } 1087 1088 kobject_get(group->devices_kobj); 1089 1090 dev->iommu_group = group; 1091 1092 mutex_lock(&group->mutex); 1093 list_add_tail(&device->list, &group->devices); 1094 if (group->domain) 1095 ret = iommu_group_do_dma_first_attach(dev, group->domain); 1096 mutex_unlock(&group->mutex); 1097 if (ret) 1098 goto err_put_group; 1099 1100 trace_add_device_to_group(group->id, dev); 1101 1102 dev_info(dev, "Adding to iommu group %d\n", group->id); 1103 1104 return 0; 1105 1106 err_put_group: 1107 mutex_lock(&group->mutex); 1108 list_del(&device->list); 1109 mutex_unlock(&group->mutex); 1110 dev->iommu_group = NULL; 1111 kobject_put(group->devices_kobj); 1112 sysfs_remove_link(group->devices_kobj, device->name); 1113 err_free_name: 1114 kfree(device->name); 1115 err_remove_link: 1116 sysfs_remove_link(&dev->kobj, "iommu_group"); 1117 err_free_device: 1118 kfree(device); 1119 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1120 return ret; 1121 } 1122 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1123 1124 /** 1125 * iommu_group_remove_device - remove a device from it's current group 1126 * @dev: device to be removed 1127 * 1128 * This function is called by an iommu driver to remove the device from 1129 * it's current group. This decrements the iommu group reference count. 1130 */ 1131 void iommu_group_remove_device(struct device *dev) 1132 { 1133 struct iommu_group *group = dev->iommu_group; 1134 struct group_device *device; 1135 1136 if (!group) 1137 return; 1138 1139 dev_info(dev, "Removing from iommu group %d\n", group->id); 1140 1141 mutex_lock(&group->mutex); 1142 device = __iommu_group_remove_device(group, dev); 1143 mutex_unlock(&group->mutex); 1144 1145 if (device) 1146 __iommu_group_release_device(group, device); 1147 } 1148 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1149 1150 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, 1151 int (*fn)(struct device *, void *)) 1152 { 1153 struct group_device *device; 1154 int ret = 0; 1155 1156 for_each_group_device(group, device) { 1157 ret = fn(device->dev, data); 1158 if (ret) 1159 break; 1160 } 1161 return ret; 1162 } 1163 1164 /** 1165 * iommu_group_for_each_dev - iterate over each device in the group 1166 * @group: the group 1167 * @data: caller opaque data to be passed to callback function 1168 * @fn: caller supplied callback function 1169 * 1170 * This function is called by group users to iterate over group devices. 1171 * Callers should hold a reference count to the group during callback. 1172 * The group->mutex is held across callbacks, which will block calls to 1173 * iommu_group_add/remove_device. 1174 */ 1175 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1176 int (*fn)(struct device *, void *)) 1177 { 1178 int ret; 1179 1180 mutex_lock(&group->mutex); 1181 ret = __iommu_group_for_each_dev(group, data, fn); 1182 mutex_unlock(&group->mutex); 1183 1184 return ret; 1185 } 1186 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1187 1188 /** 1189 * iommu_group_get - Return the group for a device and increment reference 1190 * @dev: get the group that this device belongs to 1191 * 1192 * This function is called by iommu drivers and users to get the group 1193 * for the specified device. If found, the group is returned and the group 1194 * reference in incremented, else NULL. 1195 */ 1196 struct iommu_group *iommu_group_get(struct device *dev) 1197 { 1198 struct iommu_group *group = dev->iommu_group; 1199 1200 if (group) 1201 kobject_get(group->devices_kobj); 1202 1203 return group; 1204 } 1205 EXPORT_SYMBOL_GPL(iommu_group_get); 1206 1207 /** 1208 * iommu_group_ref_get - Increment reference on a group 1209 * @group: the group to use, must not be NULL 1210 * 1211 * This function is called by iommu drivers to take additional references on an 1212 * existing group. Returns the given group for convenience. 1213 */ 1214 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1215 { 1216 kobject_get(group->devices_kobj); 1217 return group; 1218 } 1219 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1220 1221 /** 1222 * iommu_group_put - Decrement group reference 1223 * @group: the group to use 1224 * 1225 * This function is called by iommu drivers and users to release the 1226 * iommu group. Once the reference count is zero, the group is released. 1227 */ 1228 void iommu_group_put(struct iommu_group *group) 1229 { 1230 if (group) 1231 kobject_put(group->devices_kobj); 1232 } 1233 EXPORT_SYMBOL_GPL(iommu_group_put); 1234 1235 /** 1236 * iommu_register_device_fault_handler() - Register a device fault handler 1237 * @dev: the device 1238 * @handler: the fault handler 1239 * @data: private data passed as argument to the handler 1240 * 1241 * When an IOMMU fault event is received, this handler gets called with the 1242 * fault event and data as argument. The handler should return 0 on success. If 1243 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also 1244 * complete the fault by calling iommu_page_response() with one of the following 1245 * response code: 1246 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation 1247 * - IOMMU_PAGE_RESP_INVALID: terminate the fault 1248 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting 1249 * page faults if possible. 1250 * 1251 * Return 0 if the fault handler was installed successfully, or an error. 1252 */ 1253 int iommu_register_device_fault_handler(struct device *dev, 1254 iommu_dev_fault_handler_t handler, 1255 void *data) 1256 { 1257 struct dev_iommu *param = dev->iommu; 1258 int ret = 0; 1259 1260 if (!param) 1261 return -EINVAL; 1262 1263 mutex_lock(¶m->lock); 1264 /* Only allow one fault handler registered for each device */ 1265 if (param->fault_param) { 1266 ret = -EBUSY; 1267 goto done_unlock; 1268 } 1269 1270 get_device(dev); 1271 param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); 1272 if (!param->fault_param) { 1273 put_device(dev); 1274 ret = -ENOMEM; 1275 goto done_unlock; 1276 } 1277 param->fault_param->handler = handler; 1278 param->fault_param->data = data; 1279 mutex_init(¶m->fault_param->lock); 1280 INIT_LIST_HEAD(¶m->fault_param->faults); 1281 1282 done_unlock: 1283 mutex_unlock(¶m->lock); 1284 1285 return ret; 1286 } 1287 EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); 1288 1289 /** 1290 * iommu_unregister_device_fault_handler() - Unregister the device fault handler 1291 * @dev: the device 1292 * 1293 * Remove the device fault handler installed with 1294 * iommu_register_device_fault_handler(). 1295 * 1296 * Return 0 on success, or an error. 1297 */ 1298 int iommu_unregister_device_fault_handler(struct device *dev) 1299 { 1300 struct dev_iommu *param = dev->iommu; 1301 int ret = 0; 1302 1303 if (!param) 1304 return -EINVAL; 1305 1306 mutex_lock(¶m->lock); 1307 1308 if (!param->fault_param) 1309 goto unlock; 1310 1311 /* we cannot unregister handler if there are pending faults */ 1312 if (!list_empty(¶m->fault_param->faults)) { 1313 ret = -EBUSY; 1314 goto unlock; 1315 } 1316 1317 kfree(param->fault_param); 1318 param->fault_param = NULL; 1319 put_device(dev); 1320 unlock: 1321 mutex_unlock(¶m->lock); 1322 1323 return ret; 1324 } 1325 EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); 1326 1327 /** 1328 * iommu_report_device_fault() - Report fault event to device driver 1329 * @dev: the device 1330 * @evt: fault event data 1331 * 1332 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 1333 * handler. When this function fails and the fault is recoverable, it is the 1334 * caller's responsibility to complete the fault. 1335 * 1336 * Return 0 on success, or an error. 1337 */ 1338 int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) 1339 { 1340 struct dev_iommu *param = dev->iommu; 1341 struct iommu_fault_event *evt_pending = NULL; 1342 struct iommu_fault_param *fparam; 1343 int ret = 0; 1344 1345 if (!param || !evt) 1346 return -EINVAL; 1347 1348 /* we only report device fault if there is a handler registered */ 1349 mutex_lock(¶m->lock); 1350 fparam = param->fault_param; 1351 if (!fparam || !fparam->handler) { 1352 ret = -EINVAL; 1353 goto done_unlock; 1354 } 1355 1356 if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && 1357 (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 1358 evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), 1359 GFP_KERNEL); 1360 if (!evt_pending) { 1361 ret = -ENOMEM; 1362 goto done_unlock; 1363 } 1364 mutex_lock(&fparam->lock); 1365 list_add_tail(&evt_pending->list, &fparam->faults); 1366 mutex_unlock(&fparam->lock); 1367 } 1368 1369 ret = fparam->handler(&evt->fault, fparam->data); 1370 if (ret && evt_pending) { 1371 mutex_lock(&fparam->lock); 1372 list_del(&evt_pending->list); 1373 mutex_unlock(&fparam->lock); 1374 kfree(evt_pending); 1375 } 1376 done_unlock: 1377 mutex_unlock(¶m->lock); 1378 return ret; 1379 } 1380 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 1381 1382 int iommu_page_response(struct device *dev, 1383 struct iommu_page_response *msg) 1384 { 1385 bool needs_pasid; 1386 int ret = -EINVAL; 1387 struct iommu_fault_event *evt; 1388 struct iommu_fault_page_request *prm; 1389 struct dev_iommu *param = dev->iommu; 1390 const struct iommu_ops *ops = dev_iommu_ops(dev); 1391 bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; 1392 1393 if (!ops->page_response) 1394 return -ENODEV; 1395 1396 if (!param || !param->fault_param) 1397 return -EINVAL; 1398 1399 if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || 1400 msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) 1401 return -EINVAL; 1402 1403 /* Only send response if there is a fault report pending */ 1404 mutex_lock(¶m->fault_param->lock); 1405 if (list_empty(¶m->fault_param->faults)) { 1406 dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); 1407 goto done_unlock; 1408 } 1409 /* 1410 * Check if we have a matching page request pending to respond, 1411 * otherwise return -EINVAL 1412 */ 1413 list_for_each_entry(evt, ¶m->fault_param->faults, list) { 1414 prm = &evt->fault.prm; 1415 if (prm->grpid != msg->grpid) 1416 continue; 1417 1418 /* 1419 * If the PASID is required, the corresponding request is 1420 * matched using the group ID, the PASID valid bit and the PASID 1421 * value. Otherwise only the group ID matches request and 1422 * response. 1423 */ 1424 needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 1425 if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) 1426 continue; 1427 1428 if (!needs_pasid && has_pasid) { 1429 /* No big deal, just clear it. */ 1430 msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; 1431 msg->pasid = 0; 1432 } 1433 1434 ret = ops->page_response(dev, evt, msg); 1435 list_del(&evt->list); 1436 kfree(evt); 1437 break; 1438 } 1439 1440 done_unlock: 1441 mutex_unlock(¶m->fault_param->lock); 1442 return ret; 1443 } 1444 EXPORT_SYMBOL_GPL(iommu_page_response); 1445 1446 /** 1447 * iommu_group_id - Return ID for a group 1448 * @group: the group to ID 1449 * 1450 * Return the unique ID for the group matching the sysfs group number. 1451 */ 1452 int iommu_group_id(struct iommu_group *group) 1453 { 1454 return group->id; 1455 } 1456 EXPORT_SYMBOL_GPL(iommu_group_id); 1457 1458 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1459 unsigned long *devfns); 1460 1461 /* 1462 * To consider a PCI device isolated, we require ACS to support Source 1463 * Validation, Request Redirection, Completer Redirection, and Upstream 1464 * Forwarding. This effectively means that devices cannot spoof their 1465 * requester ID, requests and completions cannot be redirected, and all 1466 * transactions are forwarded upstream, even as it passes through a 1467 * bridge where the target device is downstream. 1468 */ 1469 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1470 1471 /* 1472 * For multifunction devices which are not isolated from each other, find 1473 * all the other non-isolated functions and look for existing groups. For 1474 * each function, we also need to look for aliases to or from other devices 1475 * that may already have a group. 1476 */ 1477 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1478 unsigned long *devfns) 1479 { 1480 struct pci_dev *tmp = NULL; 1481 struct iommu_group *group; 1482 1483 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1484 return NULL; 1485 1486 for_each_pci_dev(tmp) { 1487 if (tmp == pdev || tmp->bus != pdev->bus || 1488 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1489 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1490 continue; 1491 1492 group = get_pci_alias_group(tmp, devfns); 1493 if (group) { 1494 pci_dev_put(tmp); 1495 return group; 1496 } 1497 } 1498 1499 return NULL; 1500 } 1501 1502 /* 1503 * Look for aliases to or from the given device for existing groups. DMA 1504 * aliases are only supported on the same bus, therefore the search 1505 * space is quite small (especially since we're really only looking at pcie 1506 * device, and therefore only expect multiple slots on the root complex or 1507 * downstream switch ports). It's conceivable though that a pair of 1508 * multifunction devices could have aliases between them that would cause a 1509 * loop. To prevent this, we use a bitmap to track where we've been. 1510 */ 1511 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1512 unsigned long *devfns) 1513 { 1514 struct pci_dev *tmp = NULL; 1515 struct iommu_group *group; 1516 1517 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1518 return NULL; 1519 1520 group = iommu_group_get(&pdev->dev); 1521 if (group) 1522 return group; 1523 1524 for_each_pci_dev(tmp) { 1525 if (tmp == pdev || tmp->bus != pdev->bus) 1526 continue; 1527 1528 /* We alias them or they alias us */ 1529 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1530 group = get_pci_alias_group(tmp, devfns); 1531 if (group) { 1532 pci_dev_put(tmp); 1533 return group; 1534 } 1535 1536 group = get_pci_function_alias_group(tmp, devfns); 1537 if (group) { 1538 pci_dev_put(tmp); 1539 return group; 1540 } 1541 } 1542 } 1543 1544 return NULL; 1545 } 1546 1547 struct group_for_pci_data { 1548 struct pci_dev *pdev; 1549 struct iommu_group *group; 1550 }; 1551 1552 /* 1553 * DMA alias iterator callback, return the last seen device. Stop and return 1554 * the IOMMU group if we find one along the way. 1555 */ 1556 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1557 { 1558 struct group_for_pci_data *data = opaque; 1559 1560 data->pdev = pdev; 1561 data->group = iommu_group_get(&pdev->dev); 1562 1563 return data->group != NULL; 1564 } 1565 1566 /* 1567 * Generic device_group call-back function. It just allocates one 1568 * iommu-group per device. 1569 */ 1570 struct iommu_group *generic_device_group(struct device *dev) 1571 { 1572 return iommu_group_alloc(); 1573 } 1574 EXPORT_SYMBOL_GPL(generic_device_group); 1575 1576 /* 1577 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1578 * to find or create an IOMMU group for a device. 1579 */ 1580 struct iommu_group *pci_device_group(struct device *dev) 1581 { 1582 struct pci_dev *pdev = to_pci_dev(dev); 1583 struct group_for_pci_data data; 1584 struct pci_bus *bus; 1585 struct iommu_group *group = NULL; 1586 u64 devfns[4] = { 0 }; 1587 1588 if (WARN_ON(!dev_is_pci(dev))) 1589 return ERR_PTR(-EINVAL); 1590 1591 /* 1592 * Find the upstream DMA alias for the device. A device must not 1593 * be aliased due to topology in order to have its own IOMMU group. 1594 * If we find an alias along the way that already belongs to a 1595 * group, use it. 1596 */ 1597 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1598 return data.group; 1599 1600 pdev = data.pdev; 1601 1602 /* 1603 * Continue upstream from the point of minimum IOMMU granularity 1604 * due to aliases to the point where devices are protected from 1605 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1606 * group, use it. 1607 */ 1608 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1609 if (!bus->self) 1610 continue; 1611 1612 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1613 break; 1614 1615 pdev = bus->self; 1616 1617 group = iommu_group_get(&pdev->dev); 1618 if (group) 1619 return group; 1620 } 1621 1622 /* 1623 * Look for existing groups on device aliases. If we alias another 1624 * device or another device aliases us, use the same group. 1625 */ 1626 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1627 if (group) 1628 return group; 1629 1630 /* 1631 * Look for existing groups on non-isolated functions on the same 1632 * slot and aliases of those funcions, if any. No need to clear 1633 * the search bitmap, the tested devfns are still valid. 1634 */ 1635 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1636 if (group) 1637 return group; 1638 1639 /* No shared group found, allocate new */ 1640 return iommu_group_alloc(); 1641 } 1642 EXPORT_SYMBOL_GPL(pci_device_group); 1643 1644 /* Get the IOMMU group for device on fsl-mc bus */ 1645 struct iommu_group *fsl_mc_device_group(struct device *dev) 1646 { 1647 struct device *cont_dev = fsl_mc_cont_dev(dev); 1648 struct iommu_group *group; 1649 1650 group = iommu_group_get(cont_dev); 1651 if (!group) 1652 group = iommu_group_alloc(); 1653 return group; 1654 } 1655 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1656 1657 static int iommu_get_def_domain_type(struct device *dev) 1658 { 1659 const struct iommu_ops *ops = dev_iommu_ops(dev); 1660 1661 if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) 1662 return IOMMU_DOMAIN_DMA; 1663 1664 if (ops->def_domain_type) 1665 return ops->def_domain_type(dev); 1666 1667 return 0; 1668 } 1669 1670 static int iommu_group_alloc_default_domain(const struct bus_type *bus, 1671 struct iommu_group *group, 1672 unsigned int type) 1673 { 1674 struct iommu_domain *dom; 1675 1676 dom = __iommu_domain_alloc(bus, type); 1677 if (!dom && type != IOMMU_DOMAIN_DMA) { 1678 dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); 1679 if (dom) 1680 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1681 type, group->name); 1682 } 1683 1684 if (!dom) 1685 return -ENOMEM; 1686 1687 group->default_domain = dom; 1688 if (!group->domain) 1689 group->domain = dom; 1690 return 0; 1691 } 1692 1693 static int iommu_alloc_default_domain(struct iommu_group *group, 1694 struct device *dev) 1695 { 1696 unsigned int type; 1697 1698 if (group->default_domain) 1699 return 0; 1700 1701 type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; 1702 1703 return iommu_group_alloc_default_domain(dev->bus, group, type); 1704 } 1705 1706 /** 1707 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 1708 * @dev: target device 1709 * 1710 * This function is intended to be called by IOMMU drivers and extended to 1711 * support common, bus-defined algorithms when determining or creating the 1712 * IOMMU group for a device. On success, the caller will hold a reference 1713 * to the returned IOMMU group, which will already include the provided 1714 * device. The reference should be released with iommu_group_put(). 1715 */ 1716 static struct iommu_group *iommu_group_get_for_dev(struct device *dev) 1717 { 1718 const struct iommu_ops *ops = dev_iommu_ops(dev); 1719 struct iommu_group *group; 1720 int ret; 1721 1722 group = iommu_group_get(dev); 1723 if (group) 1724 return group; 1725 1726 group = ops->device_group(dev); 1727 if (WARN_ON_ONCE(group == NULL)) 1728 return ERR_PTR(-EINVAL); 1729 1730 if (IS_ERR(group)) 1731 return group; 1732 1733 ret = iommu_group_add_device(group, dev); 1734 if (ret) 1735 goto out_put_group; 1736 1737 return group; 1738 1739 out_put_group: 1740 iommu_group_put(group); 1741 1742 return ERR_PTR(ret); 1743 } 1744 1745 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1746 { 1747 return group->default_domain; 1748 } 1749 1750 static int probe_iommu_group(struct device *dev, void *data) 1751 { 1752 struct list_head *group_list = data; 1753 struct iommu_group *group; 1754 int ret; 1755 1756 /* Device is probed already if in a group */ 1757 group = iommu_group_get(dev); 1758 if (group) { 1759 iommu_group_put(group); 1760 return 0; 1761 } 1762 1763 ret = __iommu_probe_device(dev, group_list); 1764 if (ret == -ENODEV) 1765 ret = 0; 1766 1767 return ret; 1768 } 1769 1770 static int iommu_bus_notifier(struct notifier_block *nb, 1771 unsigned long action, void *data) 1772 { 1773 struct device *dev = data; 1774 1775 if (action == BUS_NOTIFY_ADD_DEVICE) { 1776 int ret; 1777 1778 ret = iommu_probe_device(dev); 1779 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1780 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1781 iommu_release_device(dev); 1782 return NOTIFY_OK; 1783 } 1784 1785 return 0; 1786 } 1787 1788 struct __group_domain_type { 1789 struct device *dev; 1790 unsigned int type; 1791 }; 1792 1793 static int probe_get_default_domain_type(struct device *dev, void *data) 1794 { 1795 struct __group_domain_type *gtype = data; 1796 unsigned int type = iommu_get_def_domain_type(dev); 1797 1798 if (type) { 1799 if (gtype->type && gtype->type != type) { 1800 dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", 1801 iommu_domain_type_str(type), 1802 dev_name(gtype->dev), 1803 iommu_domain_type_str(gtype->type)); 1804 gtype->type = 0; 1805 } 1806 1807 if (!gtype->dev) { 1808 gtype->dev = dev; 1809 gtype->type = type; 1810 } 1811 } 1812 1813 return 0; 1814 } 1815 1816 static void probe_alloc_default_domain(const struct bus_type *bus, 1817 struct iommu_group *group) 1818 { 1819 struct __group_domain_type gtype; 1820 1821 memset(>ype, 0, sizeof(gtype)); 1822 1823 /* Ask for default domain requirements of all devices in the group */ 1824 __iommu_group_for_each_dev(group, >ype, 1825 probe_get_default_domain_type); 1826 1827 if (!gtype.type) 1828 gtype.type = iommu_def_domain_type; 1829 1830 iommu_group_alloc_default_domain(bus, group, gtype.type); 1831 1832 } 1833 1834 static int __iommu_group_dma_first_attach(struct iommu_group *group) 1835 { 1836 return __iommu_group_for_each_dev(group, group->default_domain, 1837 iommu_group_do_dma_first_attach); 1838 } 1839 1840 static int iommu_group_do_probe_finalize(struct device *dev, void *data) 1841 { 1842 const struct iommu_ops *ops = dev_iommu_ops(dev); 1843 1844 if (ops->probe_finalize) 1845 ops->probe_finalize(dev); 1846 1847 return 0; 1848 } 1849 1850 static void __iommu_group_dma_finalize(struct iommu_group *group) 1851 { 1852 __iommu_group_for_each_dev(group, group->default_domain, 1853 iommu_group_do_probe_finalize); 1854 } 1855 1856 static int iommu_do_create_direct_mappings(struct device *dev, void *data) 1857 { 1858 struct iommu_group *group = data; 1859 1860 iommu_create_device_direct_mappings(group, dev); 1861 1862 return 0; 1863 } 1864 1865 static int iommu_group_create_direct_mappings(struct iommu_group *group) 1866 { 1867 return __iommu_group_for_each_dev(group, group, 1868 iommu_do_create_direct_mappings); 1869 } 1870 1871 int bus_iommu_probe(const struct bus_type *bus) 1872 { 1873 struct iommu_group *group, *next; 1874 LIST_HEAD(group_list); 1875 int ret; 1876 1877 /* 1878 * This code-path does not allocate the default domain when 1879 * creating the iommu group, so do it after the groups are 1880 * created. 1881 */ 1882 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1883 if (ret) 1884 return ret; 1885 1886 list_for_each_entry_safe(group, next, &group_list, entry) { 1887 mutex_lock(&group->mutex); 1888 1889 /* Remove item from the list */ 1890 list_del_init(&group->entry); 1891 1892 /* Try to allocate default domain */ 1893 probe_alloc_default_domain(bus, group); 1894 1895 if (!group->default_domain) { 1896 mutex_unlock(&group->mutex); 1897 continue; 1898 } 1899 1900 iommu_group_create_direct_mappings(group); 1901 1902 ret = __iommu_group_dma_first_attach(group); 1903 1904 mutex_unlock(&group->mutex); 1905 1906 if (ret) 1907 break; 1908 1909 __iommu_group_dma_finalize(group); 1910 } 1911 1912 return ret; 1913 } 1914 1915 bool iommu_present(const struct bus_type *bus) 1916 { 1917 return bus->iommu_ops != NULL; 1918 } 1919 EXPORT_SYMBOL_GPL(iommu_present); 1920 1921 /** 1922 * device_iommu_capable() - check for a general IOMMU capability 1923 * @dev: device to which the capability would be relevant, if available 1924 * @cap: IOMMU capability 1925 * 1926 * Return: true if an IOMMU is present and supports the given capability 1927 * for the given device, otherwise false. 1928 */ 1929 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1930 { 1931 const struct iommu_ops *ops; 1932 1933 if (!dev->iommu || !dev->iommu->iommu_dev) 1934 return false; 1935 1936 ops = dev_iommu_ops(dev); 1937 if (!ops->capable) 1938 return false; 1939 1940 return ops->capable(dev, cap); 1941 } 1942 EXPORT_SYMBOL_GPL(device_iommu_capable); 1943 1944 /** 1945 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1946 * for a group 1947 * @group: Group to query 1948 * 1949 * IOMMU groups should not have differing values of 1950 * msi_device_has_isolated_msi() for devices in a group. However nothing 1951 * directly prevents this, so ensure mistakes don't result in isolation failures 1952 * by checking that all the devices are the same. 1953 */ 1954 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1955 { 1956 struct group_device *group_dev; 1957 bool ret = true; 1958 1959 mutex_lock(&group->mutex); 1960 for_each_group_device(group, group_dev) 1961 ret &= msi_device_has_isolated_msi(group_dev->dev); 1962 mutex_unlock(&group->mutex); 1963 return ret; 1964 } 1965 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1966 1967 /** 1968 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1969 * @domain: iommu domain 1970 * @handler: fault handler 1971 * @token: user data, will be passed back to the fault handler 1972 * 1973 * This function should be used by IOMMU users which want to be notified 1974 * whenever an IOMMU fault happens. 1975 * 1976 * The fault handler itself should return 0 on success, and an appropriate 1977 * error code otherwise. 1978 */ 1979 void iommu_set_fault_handler(struct iommu_domain *domain, 1980 iommu_fault_handler_t handler, 1981 void *token) 1982 { 1983 BUG_ON(!domain); 1984 1985 domain->handler = handler; 1986 domain->handler_token = token; 1987 } 1988 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1989 1990 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, 1991 unsigned type) 1992 { 1993 struct iommu_domain *domain; 1994 unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; 1995 1996 if (bus == NULL || bus->iommu_ops == NULL) 1997 return NULL; 1998 1999 domain = bus->iommu_ops->domain_alloc(alloc_type); 2000 if (!domain) 2001 return NULL; 2002 2003 domain->type = type; 2004 /* 2005 * If not already set, assume all sizes by default; the driver 2006 * may override this later 2007 */ 2008 if (!domain->pgsize_bitmap) 2009 domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 2010 2011 if (!domain->ops) 2012 domain->ops = bus->iommu_ops->default_domain_ops; 2013 2014 if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { 2015 iommu_domain_free(domain); 2016 domain = NULL; 2017 } 2018 return domain; 2019 } 2020 2021 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) 2022 { 2023 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); 2024 } 2025 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 2026 2027 void iommu_domain_free(struct iommu_domain *domain) 2028 { 2029 if (domain->type == IOMMU_DOMAIN_SVA) 2030 mmdrop(domain->mm); 2031 iommu_put_dma_cookie(domain); 2032 domain->ops->free(domain); 2033 } 2034 EXPORT_SYMBOL_GPL(iommu_domain_free); 2035 2036 /* 2037 * Put the group's domain back to the appropriate core-owned domain - either the 2038 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2039 */ 2040 static void __iommu_group_set_core_domain(struct iommu_group *group) 2041 { 2042 struct iommu_domain *new_domain; 2043 2044 if (group->owner) 2045 new_domain = group->blocking_domain; 2046 else 2047 new_domain = group->default_domain; 2048 2049 __iommu_group_set_domain_nofail(group, new_domain); 2050 } 2051 2052 static int __iommu_attach_device(struct iommu_domain *domain, 2053 struct device *dev) 2054 { 2055 int ret; 2056 2057 if (unlikely(domain->ops->attach_dev == NULL)) 2058 return -ENODEV; 2059 2060 ret = domain->ops->attach_dev(domain, dev); 2061 if (ret) 2062 return ret; 2063 dev->iommu->attach_deferred = 0; 2064 trace_attach_device_to_domain(dev); 2065 return 0; 2066 } 2067 2068 /** 2069 * iommu_attach_device - Attach an IOMMU domain to a device 2070 * @domain: IOMMU domain to attach 2071 * @dev: Device that will be attached 2072 * 2073 * Returns 0 on success and error code on failure 2074 * 2075 * Note that EINVAL can be treated as a soft failure, indicating 2076 * that certain configuration of the domain is incompatible with 2077 * the device. In this case attaching a different domain to the 2078 * device may succeed. 2079 */ 2080 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2081 { 2082 struct iommu_group *group; 2083 int ret; 2084 2085 group = iommu_group_get(dev); 2086 if (!group) 2087 return -ENODEV; 2088 2089 /* 2090 * Lock the group to make sure the device-count doesn't 2091 * change while we are attaching 2092 */ 2093 mutex_lock(&group->mutex); 2094 ret = -EINVAL; 2095 if (list_count_nodes(&group->devices) != 1) 2096 goto out_unlock; 2097 2098 ret = __iommu_attach_group(domain, group); 2099 2100 out_unlock: 2101 mutex_unlock(&group->mutex); 2102 iommu_group_put(group); 2103 2104 return ret; 2105 } 2106 EXPORT_SYMBOL_GPL(iommu_attach_device); 2107 2108 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2109 { 2110 if (dev->iommu && dev->iommu->attach_deferred) 2111 return __iommu_attach_device(domain, dev); 2112 2113 return 0; 2114 } 2115 2116 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2117 { 2118 struct iommu_group *group; 2119 2120 group = iommu_group_get(dev); 2121 if (!group) 2122 return; 2123 2124 mutex_lock(&group->mutex); 2125 if (WARN_ON(domain != group->domain) || 2126 WARN_ON(list_count_nodes(&group->devices) != 1)) 2127 goto out_unlock; 2128 __iommu_group_set_core_domain(group); 2129 2130 out_unlock: 2131 mutex_unlock(&group->mutex); 2132 iommu_group_put(group); 2133 } 2134 EXPORT_SYMBOL_GPL(iommu_detach_device); 2135 2136 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2137 { 2138 struct iommu_domain *domain; 2139 struct iommu_group *group; 2140 2141 group = iommu_group_get(dev); 2142 if (!group) 2143 return NULL; 2144 2145 domain = group->domain; 2146 2147 iommu_group_put(group); 2148 2149 return domain; 2150 } 2151 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2152 2153 /* 2154 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2155 * guarantees that the group and its default domain are valid and correct. 2156 */ 2157 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2158 { 2159 return dev->iommu_group->default_domain; 2160 } 2161 2162 static int __iommu_attach_group(struct iommu_domain *domain, 2163 struct iommu_group *group) 2164 { 2165 if (group->domain && group->domain != group->default_domain && 2166 group->domain != group->blocking_domain) 2167 return -EBUSY; 2168 2169 return __iommu_group_set_domain(group, domain); 2170 } 2171 2172 /** 2173 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2174 * @domain: IOMMU domain to attach 2175 * @group: IOMMU group that will be attached 2176 * 2177 * Returns 0 on success and error code on failure 2178 * 2179 * Note that EINVAL can be treated as a soft failure, indicating 2180 * that certain configuration of the domain is incompatible with 2181 * the group. In this case attaching a different domain to the 2182 * group may succeed. 2183 */ 2184 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2185 { 2186 int ret; 2187 2188 mutex_lock(&group->mutex); 2189 ret = __iommu_attach_group(domain, group); 2190 mutex_unlock(&group->mutex); 2191 2192 return ret; 2193 } 2194 EXPORT_SYMBOL_GPL(iommu_attach_group); 2195 2196 static int __iommu_device_set_domain(struct iommu_group *group, 2197 struct device *dev, 2198 struct iommu_domain *new_domain, 2199 unsigned int flags) 2200 { 2201 int ret; 2202 2203 ret = __iommu_attach_device(new_domain, dev); 2204 if (ret) { 2205 /* 2206 * If we have a blocking domain then try to attach that in hopes 2207 * of avoiding a UAF. Modern drivers should implement blocking 2208 * domains as global statics that cannot fail. 2209 */ 2210 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2211 group->blocking_domain && 2212 group->blocking_domain != new_domain) 2213 __iommu_attach_device(group->blocking_domain, dev); 2214 return ret; 2215 } 2216 return 0; 2217 } 2218 2219 /* 2220 * If 0 is returned the group's domain is new_domain. If an error is returned 2221 * then the group's domain will be set back to the existing domain unless 2222 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2223 * domains is left inconsistent. This is a driver bug to fail attach with a 2224 * previously good domain. We try to avoid a kernel UAF because of this. 2225 * 2226 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2227 * API works on domains and devices. Bridge that gap by iterating over the 2228 * devices in a group. Ideally we'd have a single device which represents the 2229 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2230 * defined minimum sets, where the physical hardware may be able to distiguish 2231 * members, but we wish to group them at a higher level (ex. untrusted 2232 * multi-function PCI devices). Thus we attach each device. 2233 */ 2234 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2235 struct iommu_domain *new_domain, 2236 unsigned int flags) 2237 { 2238 struct group_device *last_gdev; 2239 struct group_device *gdev; 2240 int result; 2241 int ret; 2242 2243 lockdep_assert_held(&group->mutex); 2244 2245 if (group->domain == new_domain) 2246 return 0; 2247 2248 /* 2249 * New drivers should support default domains, so set_platform_dma() 2250 * op will never be called. Otherwise the NULL domain represents some 2251 * platform specific behavior. 2252 */ 2253 if (!new_domain) { 2254 for_each_group_device(group, gdev) { 2255 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2256 2257 if (!WARN_ON(!ops->set_platform_dma_ops)) 2258 ops->set_platform_dma_ops(gdev->dev); 2259 } 2260 group->domain = NULL; 2261 return 0; 2262 } 2263 2264 /* 2265 * Changing the domain is done by calling attach_dev() on the new 2266 * domain. This switch does not have to be atomic and DMA can be 2267 * discarded during the transition. DMA must only be able to access 2268 * either new_domain or group->domain, never something else. 2269 */ 2270 result = 0; 2271 for_each_group_device(group, gdev) { 2272 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2273 flags); 2274 if (ret) { 2275 result = ret; 2276 /* 2277 * Keep trying the other devices in the group. If a 2278 * driver fails attach to an otherwise good domain, and 2279 * does not support blocking domains, it should at least 2280 * drop its reference on the current domain so we don't 2281 * UAF. 2282 */ 2283 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2284 continue; 2285 goto err_revert; 2286 } 2287 } 2288 group->domain = new_domain; 2289 return result; 2290 2291 err_revert: 2292 /* 2293 * This is called in error unwind paths. A well behaved driver should 2294 * always allow us to attach to a domain that was already attached. 2295 */ 2296 last_gdev = gdev; 2297 for_each_group_device(group, gdev) { 2298 const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); 2299 2300 /* 2301 * If set_platform_dma_ops is not present a NULL domain can 2302 * happen only for first probe, in which case we leave 2303 * group->domain as NULL and let release clean everything up. 2304 */ 2305 if (group->domain) 2306 WARN_ON(__iommu_device_set_domain( 2307 group, gdev->dev, group->domain, 2308 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2309 else if (ops->set_platform_dma_ops) 2310 ops->set_platform_dma_ops(gdev->dev); 2311 if (gdev == last_gdev) 2312 break; 2313 } 2314 return ret; 2315 } 2316 2317 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2318 { 2319 mutex_lock(&group->mutex); 2320 __iommu_group_set_core_domain(group); 2321 mutex_unlock(&group->mutex); 2322 } 2323 EXPORT_SYMBOL_GPL(iommu_detach_group); 2324 2325 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2326 { 2327 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2328 return iova; 2329 2330 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2331 return 0; 2332 2333 return domain->ops->iova_to_phys(domain, iova); 2334 } 2335 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2336 2337 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2338 phys_addr_t paddr, size_t size, size_t *count) 2339 { 2340 unsigned int pgsize_idx, pgsize_idx_next; 2341 unsigned long pgsizes; 2342 size_t offset, pgsize, pgsize_next; 2343 unsigned long addr_merge = paddr | iova; 2344 2345 /* Page sizes supported by the hardware and small enough for @size */ 2346 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2347 2348 /* Constrain the page sizes further based on the maximum alignment */ 2349 if (likely(addr_merge)) 2350 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2351 2352 /* Make sure we have at least one suitable page size */ 2353 BUG_ON(!pgsizes); 2354 2355 /* Pick the biggest page size remaining */ 2356 pgsize_idx = __fls(pgsizes); 2357 pgsize = BIT(pgsize_idx); 2358 if (!count) 2359 return pgsize; 2360 2361 /* Find the next biggest support page size, if it exists */ 2362 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2363 if (!pgsizes) 2364 goto out_set_count; 2365 2366 pgsize_idx_next = __ffs(pgsizes); 2367 pgsize_next = BIT(pgsize_idx_next); 2368 2369 /* 2370 * There's no point trying a bigger page size unless the virtual 2371 * and physical addresses are similarly offset within the larger page. 2372 */ 2373 if ((iova ^ paddr) & (pgsize_next - 1)) 2374 goto out_set_count; 2375 2376 /* Calculate the offset to the next page size alignment boundary */ 2377 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2378 2379 /* 2380 * If size is big enough to accommodate the larger page, reduce 2381 * the number of smaller pages. 2382 */ 2383 if (offset + pgsize_next <= size) 2384 size = offset; 2385 2386 out_set_count: 2387 *count = size >> pgsize_idx; 2388 return pgsize; 2389 } 2390 2391 static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, 2392 phys_addr_t paddr, size_t size, int prot, 2393 gfp_t gfp, size_t *mapped) 2394 { 2395 const struct iommu_domain_ops *ops = domain->ops; 2396 size_t pgsize, count; 2397 int ret; 2398 2399 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2400 2401 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2402 iova, &paddr, pgsize, count); 2403 2404 if (ops->map_pages) { 2405 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2406 gfp, mapped); 2407 } else { 2408 ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); 2409 *mapped = ret ? 0 : pgsize; 2410 } 2411 2412 return ret; 2413 } 2414 2415 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2416 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2417 { 2418 const struct iommu_domain_ops *ops = domain->ops; 2419 unsigned long orig_iova = iova; 2420 unsigned int min_pagesz; 2421 size_t orig_size = size; 2422 phys_addr_t orig_paddr = paddr; 2423 int ret = 0; 2424 2425 if (unlikely(!(ops->map || ops->map_pages) || 2426 domain->pgsize_bitmap == 0UL)) 2427 return -ENODEV; 2428 2429 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2430 return -EINVAL; 2431 2432 /* find out the minimum page size supported */ 2433 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2434 2435 /* 2436 * both the virtual address and the physical one, as well as 2437 * the size of the mapping, must be aligned (at least) to the 2438 * size of the smallest page supported by the hardware 2439 */ 2440 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2441 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2442 iova, &paddr, size, min_pagesz); 2443 return -EINVAL; 2444 } 2445 2446 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2447 2448 while (size) { 2449 size_t mapped = 0; 2450 2451 ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, 2452 &mapped); 2453 /* 2454 * Some pages may have been mapped, even if an error occurred, 2455 * so we should account for those so they can be unmapped. 2456 */ 2457 size -= mapped; 2458 2459 if (ret) 2460 break; 2461 2462 iova += mapped; 2463 paddr += mapped; 2464 } 2465 2466 /* unroll mapping in case something went wrong */ 2467 if (ret) 2468 iommu_unmap(domain, orig_iova, orig_size - size); 2469 else 2470 trace_map(orig_iova, orig_paddr, orig_size); 2471 2472 return ret; 2473 } 2474 2475 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2476 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2477 { 2478 const struct iommu_domain_ops *ops = domain->ops; 2479 int ret; 2480 2481 might_sleep_if(gfpflags_allow_blocking(gfp)); 2482 2483 /* Discourage passing strange GFP flags */ 2484 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2485 __GFP_HIGHMEM))) 2486 return -EINVAL; 2487 2488 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2489 if (ret == 0 && ops->iotlb_sync_map) 2490 ops->iotlb_sync_map(domain, iova, size); 2491 2492 return ret; 2493 } 2494 EXPORT_SYMBOL_GPL(iommu_map); 2495 2496 static size_t __iommu_unmap_pages(struct iommu_domain *domain, 2497 unsigned long iova, size_t size, 2498 struct iommu_iotlb_gather *iotlb_gather) 2499 { 2500 const struct iommu_domain_ops *ops = domain->ops; 2501 size_t pgsize, count; 2502 2503 pgsize = iommu_pgsize(domain, iova, iova, size, &count); 2504 return ops->unmap_pages ? 2505 ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : 2506 ops->unmap(domain, iova, pgsize, iotlb_gather); 2507 } 2508 2509 static size_t __iommu_unmap(struct iommu_domain *domain, 2510 unsigned long iova, size_t size, 2511 struct iommu_iotlb_gather *iotlb_gather) 2512 { 2513 const struct iommu_domain_ops *ops = domain->ops; 2514 size_t unmapped_page, unmapped = 0; 2515 unsigned long orig_iova = iova; 2516 unsigned int min_pagesz; 2517 2518 if (unlikely(!(ops->unmap || ops->unmap_pages) || 2519 domain->pgsize_bitmap == 0UL)) 2520 return 0; 2521 2522 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2523 return 0; 2524 2525 /* find out the minimum page size supported */ 2526 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2527 2528 /* 2529 * The virtual address, as well as the size of the mapping, must be 2530 * aligned (at least) to the size of the smallest page supported 2531 * by the hardware 2532 */ 2533 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2534 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2535 iova, size, min_pagesz); 2536 return 0; 2537 } 2538 2539 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2540 2541 /* 2542 * Keep iterating until we either unmap 'size' bytes (or more) 2543 * or we hit an area that isn't mapped. 2544 */ 2545 while (unmapped < size) { 2546 unmapped_page = __iommu_unmap_pages(domain, iova, 2547 size - unmapped, 2548 iotlb_gather); 2549 if (!unmapped_page) 2550 break; 2551 2552 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2553 iova, unmapped_page); 2554 2555 iova += unmapped_page; 2556 unmapped += unmapped_page; 2557 } 2558 2559 trace_unmap(orig_iova, size, unmapped); 2560 return unmapped; 2561 } 2562 2563 size_t iommu_unmap(struct iommu_domain *domain, 2564 unsigned long iova, size_t size) 2565 { 2566 struct iommu_iotlb_gather iotlb_gather; 2567 size_t ret; 2568 2569 iommu_iotlb_gather_init(&iotlb_gather); 2570 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2571 iommu_iotlb_sync(domain, &iotlb_gather); 2572 2573 return ret; 2574 } 2575 EXPORT_SYMBOL_GPL(iommu_unmap); 2576 2577 size_t iommu_unmap_fast(struct iommu_domain *domain, 2578 unsigned long iova, size_t size, 2579 struct iommu_iotlb_gather *iotlb_gather) 2580 { 2581 return __iommu_unmap(domain, iova, size, iotlb_gather); 2582 } 2583 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2584 2585 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2586 struct scatterlist *sg, unsigned int nents, int prot, 2587 gfp_t gfp) 2588 { 2589 const struct iommu_domain_ops *ops = domain->ops; 2590 size_t len = 0, mapped = 0; 2591 phys_addr_t start; 2592 unsigned int i = 0; 2593 int ret; 2594 2595 might_sleep_if(gfpflags_allow_blocking(gfp)); 2596 2597 /* Discourage passing strange GFP flags */ 2598 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2599 __GFP_HIGHMEM))) 2600 return -EINVAL; 2601 2602 while (i <= nents) { 2603 phys_addr_t s_phys = sg_phys(sg); 2604 2605 if (len && s_phys != start + len) { 2606 ret = __iommu_map(domain, iova + mapped, start, 2607 len, prot, gfp); 2608 2609 if (ret) 2610 goto out_err; 2611 2612 mapped += len; 2613 len = 0; 2614 } 2615 2616 if (sg_is_dma_bus_address(sg)) 2617 goto next; 2618 2619 if (len) { 2620 len += sg->length; 2621 } else { 2622 len = sg->length; 2623 start = s_phys; 2624 } 2625 2626 next: 2627 if (++i < nents) 2628 sg = sg_next(sg); 2629 } 2630 2631 if (ops->iotlb_sync_map) 2632 ops->iotlb_sync_map(domain, iova, mapped); 2633 return mapped; 2634 2635 out_err: 2636 /* undo mappings already done */ 2637 iommu_unmap(domain, iova, mapped); 2638 2639 return ret; 2640 } 2641 EXPORT_SYMBOL_GPL(iommu_map_sg); 2642 2643 /** 2644 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2645 * @domain: the iommu domain where the fault has happened 2646 * @dev: the device where the fault has happened 2647 * @iova: the faulting address 2648 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2649 * 2650 * This function should be called by the low-level IOMMU implementations 2651 * whenever IOMMU faults happen, to allow high-level users, that are 2652 * interested in such events, to know about them. 2653 * 2654 * This event may be useful for several possible use cases: 2655 * - mere logging of the event 2656 * - dynamic TLB/PTE loading 2657 * - if restarting of the faulting device is required 2658 * 2659 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2660 * PTE/TLB loading will one day be supported, implementations will be able 2661 * to tell whether it succeeded or not according to this return value). 2662 * 2663 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2664 * (though fault handlers can also return -ENOSYS, in case they want to 2665 * elicit the default behavior of the IOMMU drivers). 2666 */ 2667 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2668 unsigned long iova, int flags) 2669 { 2670 int ret = -ENOSYS; 2671 2672 /* 2673 * if upper layers showed interest and installed a fault handler, 2674 * invoke it. 2675 */ 2676 if (domain->handler) 2677 ret = domain->handler(domain, dev, iova, flags, 2678 domain->handler_token); 2679 2680 trace_io_page_fault(dev, iova, flags); 2681 return ret; 2682 } 2683 EXPORT_SYMBOL_GPL(report_iommu_fault); 2684 2685 static int __init iommu_init(void) 2686 { 2687 iommu_group_kset = kset_create_and_add("iommu_groups", 2688 NULL, kernel_kobj); 2689 BUG_ON(!iommu_group_kset); 2690 2691 iommu_debugfs_setup(); 2692 2693 return 0; 2694 } 2695 core_initcall(iommu_init); 2696 2697 int iommu_enable_nesting(struct iommu_domain *domain) 2698 { 2699 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2700 return -EINVAL; 2701 if (!domain->ops->enable_nesting) 2702 return -EINVAL; 2703 return domain->ops->enable_nesting(domain); 2704 } 2705 EXPORT_SYMBOL_GPL(iommu_enable_nesting); 2706 2707 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2708 unsigned long quirk) 2709 { 2710 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2711 return -EINVAL; 2712 if (!domain->ops->set_pgtable_quirks) 2713 return -EINVAL; 2714 return domain->ops->set_pgtable_quirks(domain, quirk); 2715 } 2716 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2717 2718 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2719 { 2720 const struct iommu_ops *ops = dev_iommu_ops(dev); 2721 2722 if (ops->get_resv_regions) 2723 ops->get_resv_regions(dev, list); 2724 } 2725 2726 /** 2727 * iommu_put_resv_regions - release resered regions 2728 * @dev: device for which to free reserved regions 2729 * @list: reserved region list for device 2730 * 2731 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2732 */ 2733 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2734 { 2735 struct iommu_resv_region *entry, *next; 2736 2737 list_for_each_entry_safe(entry, next, list, list) { 2738 if (entry->free) 2739 entry->free(dev, entry); 2740 else 2741 kfree(entry); 2742 } 2743 } 2744 EXPORT_SYMBOL(iommu_put_resv_regions); 2745 2746 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2747 size_t length, int prot, 2748 enum iommu_resv_type type, 2749 gfp_t gfp) 2750 { 2751 struct iommu_resv_region *region; 2752 2753 region = kzalloc(sizeof(*region), gfp); 2754 if (!region) 2755 return NULL; 2756 2757 INIT_LIST_HEAD(®ion->list); 2758 region->start = start; 2759 region->length = length; 2760 region->prot = prot; 2761 region->type = type; 2762 return region; 2763 } 2764 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2765 2766 void iommu_set_default_passthrough(bool cmd_line) 2767 { 2768 if (cmd_line) 2769 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2770 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2771 } 2772 2773 void iommu_set_default_translated(bool cmd_line) 2774 { 2775 if (cmd_line) 2776 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2777 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2778 } 2779 2780 bool iommu_default_passthrough(void) 2781 { 2782 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2783 } 2784 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2785 2786 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 2787 { 2788 const struct iommu_ops *ops = NULL; 2789 struct iommu_device *iommu; 2790 2791 spin_lock(&iommu_device_lock); 2792 list_for_each_entry(iommu, &iommu_device_list, list) 2793 if (iommu->fwnode == fwnode) { 2794 ops = iommu->ops; 2795 break; 2796 } 2797 spin_unlock(&iommu_device_lock); 2798 return ops; 2799 } 2800 2801 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 2802 const struct iommu_ops *ops) 2803 { 2804 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2805 2806 if (fwspec) 2807 return ops == fwspec->ops ? 0 : -EINVAL; 2808 2809 if (!dev_iommu_get(dev)) 2810 return -ENOMEM; 2811 2812 /* Preallocate for the overwhelmingly common case of 1 ID */ 2813 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2814 if (!fwspec) 2815 return -ENOMEM; 2816 2817 of_node_get(to_of_node(iommu_fwnode)); 2818 fwspec->iommu_fwnode = iommu_fwnode; 2819 fwspec->ops = ops; 2820 dev_iommu_fwspec_set(dev, fwspec); 2821 return 0; 2822 } 2823 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2824 2825 void iommu_fwspec_free(struct device *dev) 2826 { 2827 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2828 2829 if (fwspec) { 2830 fwnode_handle_put(fwspec->iommu_fwnode); 2831 kfree(fwspec); 2832 dev_iommu_fwspec_set(dev, NULL); 2833 } 2834 } 2835 EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2836 2837 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2838 { 2839 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2840 int i, new_num; 2841 2842 if (!fwspec) 2843 return -EINVAL; 2844 2845 new_num = fwspec->num_ids + num_ids; 2846 if (new_num > 1) { 2847 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2848 GFP_KERNEL); 2849 if (!fwspec) 2850 return -ENOMEM; 2851 2852 dev_iommu_fwspec_set(dev, fwspec); 2853 } 2854 2855 for (i = 0; i < num_ids; i++) 2856 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2857 2858 fwspec->num_ids = new_num; 2859 return 0; 2860 } 2861 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2862 2863 /* 2864 * Per device IOMMU features. 2865 */ 2866 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2867 { 2868 if (dev->iommu && dev->iommu->iommu_dev) { 2869 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2870 2871 if (ops->dev_enable_feat) 2872 return ops->dev_enable_feat(dev, feat); 2873 } 2874 2875 return -ENODEV; 2876 } 2877 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2878 2879 /* 2880 * The device drivers should do the necessary cleanups before calling this. 2881 */ 2882 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2883 { 2884 if (dev->iommu && dev->iommu->iommu_dev) { 2885 const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; 2886 2887 if (ops->dev_disable_feat) 2888 return ops->dev_disable_feat(dev, feat); 2889 } 2890 2891 return -EBUSY; 2892 } 2893 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2894 2895 /* 2896 * Changes the default domain of an iommu group 2897 * 2898 * @group: The group for which the default domain should be changed 2899 * @dev: The first device in the group 2900 * @type: The type of the new default domain that gets associated with the group 2901 * 2902 * Returns 0 on success and error code on failure 2903 * 2904 * Note: 2905 * 1. Presently, this function is called only when user requests to change the 2906 * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type 2907 * Please take a closer look if intended to use for other purposes. 2908 */ 2909 static int iommu_change_dev_def_domain(struct iommu_group *group, 2910 struct device *dev, int type) 2911 { 2912 struct __group_domain_type gtype = {NULL, 0}; 2913 struct iommu_domain *prev_dom; 2914 int ret; 2915 2916 lockdep_assert_held(&group->mutex); 2917 2918 prev_dom = group->default_domain; 2919 __iommu_group_for_each_dev(group, >ype, 2920 probe_get_default_domain_type); 2921 if (!type) { 2922 /* 2923 * If the user hasn't requested any specific type of domain and 2924 * if the device supports both the domains, then default to the 2925 * domain the device was booted with 2926 */ 2927 type = gtype.type ? : iommu_def_domain_type; 2928 } else if (gtype.type && type != gtype.type) { 2929 dev_err_ratelimited(dev, "Device cannot be in %s domain\n", 2930 iommu_domain_type_str(type)); 2931 return -EINVAL; 2932 } 2933 2934 /* 2935 * Switch to a new domain only if the requested domain type is different 2936 * from the existing default domain type 2937 */ 2938 if (prev_dom->type == type) 2939 return 0; 2940 2941 group->default_domain = NULL; 2942 group->domain = NULL; 2943 2944 /* Sets group->default_domain to the newly allocated domain */ 2945 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2946 if (ret) 2947 goto restore_old_domain; 2948 2949 group->domain = prev_dom; 2950 ret = iommu_create_device_direct_mappings(group, dev); 2951 if (ret) 2952 goto free_new_domain; 2953 2954 ret = __iommu_group_set_domain(group, group->default_domain); 2955 if (ret) 2956 goto free_new_domain; 2957 2958 iommu_domain_free(prev_dom); 2959 2960 return 0; 2961 2962 free_new_domain: 2963 iommu_domain_free(group->default_domain); 2964 restore_old_domain: 2965 group->default_domain = prev_dom; 2966 2967 return ret; 2968 } 2969 2970 /* 2971 * Changing the default domain through sysfs requires the users to unbind the 2972 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 2973 * transition. Return failure if this isn't met. 2974 * 2975 * We need to consider the race between this and the device release path. 2976 * group->mutex is used here to guarantee that the device release path 2977 * will not be entered at the same time. 2978 */ 2979 static ssize_t iommu_group_store_type(struct iommu_group *group, 2980 const char *buf, size_t count) 2981 { 2982 struct group_device *grp_dev; 2983 struct device *dev; 2984 int ret, req_type; 2985 2986 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2987 return -EACCES; 2988 2989 if (WARN_ON(!group) || !group->default_domain) 2990 return -EINVAL; 2991 2992 if (sysfs_streq(buf, "identity")) 2993 req_type = IOMMU_DOMAIN_IDENTITY; 2994 else if (sysfs_streq(buf, "DMA")) 2995 req_type = IOMMU_DOMAIN_DMA; 2996 else if (sysfs_streq(buf, "DMA-FQ")) 2997 req_type = IOMMU_DOMAIN_DMA_FQ; 2998 else if (sysfs_streq(buf, "auto")) 2999 req_type = 0; 3000 else 3001 return -EINVAL; 3002 3003 mutex_lock(&group->mutex); 3004 /* We can bring up a flush queue without tearing down the domain. */ 3005 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3006 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3007 ret = iommu_dma_init_fq(group->default_domain); 3008 if (!ret) 3009 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3010 mutex_unlock(&group->mutex); 3011 3012 return ret ?: count; 3013 } 3014 3015 /* Otherwise, ensure that device exists and no driver is bound. */ 3016 if (list_empty(&group->devices) || group->owner_cnt) { 3017 mutex_unlock(&group->mutex); 3018 return -EPERM; 3019 } 3020 3021 grp_dev = list_first_entry(&group->devices, struct group_device, list); 3022 dev = grp_dev->dev; 3023 3024 ret = iommu_change_dev_def_domain(group, dev, req_type); 3025 3026 /* 3027 * Release the mutex here because ops->probe_finalize() call-back of 3028 * some vendor IOMMU drivers calls arm_iommu_attach_device() which 3029 * in-turn might call back into IOMMU core code, where it tries to take 3030 * group->mutex, resulting in a deadlock. 3031 */ 3032 mutex_unlock(&group->mutex); 3033 3034 /* Make sure dma_ops is appropriatley set */ 3035 if (!ret) 3036 __iommu_group_dma_finalize(group); 3037 3038 return ret ?: count; 3039 } 3040 3041 static bool iommu_is_default_domain(struct iommu_group *group) 3042 { 3043 if (group->domain == group->default_domain) 3044 return true; 3045 3046 /* 3047 * If the default domain was set to identity and it is still an identity 3048 * domain then we consider this a pass. This happens because of 3049 * amd_iommu_init_device() replacing the default idenytity domain with an 3050 * identity domain that has a different configuration for AMDGPU. 3051 */ 3052 if (group->default_domain && 3053 group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3054 group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3055 return true; 3056 return false; 3057 } 3058 3059 /** 3060 * iommu_device_use_default_domain() - Device driver wants to handle device 3061 * DMA through the kernel DMA API. 3062 * @dev: The device. 3063 * 3064 * The device driver about to bind @dev wants to do DMA through the kernel 3065 * DMA API. Return 0 if it is allowed, otherwise an error. 3066 */ 3067 int iommu_device_use_default_domain(struct device *dev) 3068 { 3069 struct iommu_group *group = iommu_group_get(dev); 3070 int ret = 0; 3071 3072 if (!group) 3073 return 0; 3074 3075 mutex_lock(&group->mutex); 3076 if (group->owner_cnt) { 3077 if (group->owner || !iommu_is_default_domain(group) || 3078 !xa_empty(&group->pasid_array)) { 3079 ret = -EBUSY; 3080 goto unlock_out; 3081 } 3082 } 3083 3084 group->owner_cnt++; 3085 3086 unlock_out: 3087 mutex_unlock(&group->mutex); 3088 iommu_group_put(group); 3089 3090 return ret; 3091 } 3092 3093 /** 3094 * iommu_device_unuse_default_domain() - Device driver stops handling device 3095 * DMA through the kernel DMA API. 3096 * @dev: The device. 3097 * 3098 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3099 * It must be called after iommu_device_use_default_domain(). 3100 */ 3101 void iommu_device_unuse_default_domain(struct device *dev) 3102 { 3103 struct iommu_group *group = iommu_group_get(dev); 3104 3105 if (!group) 3106 return; 3107 3108 mutex_lock(&group->mutex); 3109 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3110 group->owner_cnt--; 3111 3112 mutex_unlock(&group->mutex); 3113 iommu_group_put(group); 3114 } 3115 3116 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3117 { 3118 struct group_device *dev = 3119 list_first_entry(&group->devices, struct group_device, list); 3120 3121 if (group->blocking_domain) 3122 return 0; 3123 3124 group->blocking_domain = 3125 __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); 3126 if (!group->blocking_domain) { 3127 /* 3128 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED 3129 * create an empty domain instead. 3130 */ 3131 group->blocking_domain = __iommu_domain_alloc( 3132 dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); 3133 if (!group->blocking_domain) 3134 return -EINVAL; 3135 } 3136 return 0; 3137 } 3138 3139 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3140 { 3141 int ret; 3142 3143 if ((group->domain && group->domain != group->default_domain) || 3144 !xa_empty(&group->pasid_array)) 3145 return -EBUSY; 3146 3147 ret = __iommu_group_alloc_blocking_domain(group); 3148 if (ret) 3149 return ret; 3150 ret = __iommu_group_set_domain(group, group->blocking_domain); 3151 if (ret) 3152 return ret; 3153 3154 group->owner = owner; 3155 group->owner_cnt++; 3156 return 0; 3157 } 3158 3159 /** 3160 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3161 * @group: The group. 3162 * @owner: Caller specified pointer. Used for exclusive ownership. 3163 * 3164 * This is to support backward compatibility for vfio which manages the dma 3165 * ownership in iommu_group level. New invocations on this interface should be 3166 * prohibited. Only a single owner may exist for a group. 3167 */ 3168 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3169 { 3170 int ret = 0; 3171 3172 if (WARN_ON(!owner)) 3173 return -EINVAL; 3174 3175 mutex_lock(&group->mutex); 3176 if (group->owner_cnt) { 3177 ret = -EPERM; 3178 goto unlock_out; 3179 } 3180 3181 ret = __iommu_take_dma_ownership(group, owner); 3182 unlock_out: 3183 mutex_unlock(&group->mutex); 3184 3185 return ret; 3186 } 3187 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3188 3189 /** 3190 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3191 * @dev: The device. 3192 * @owner: Caller specified pointer. Used for exclusive ownership. 3193 * 3194 * Claim the DMA ownership of a device. Multiple devices in the same group may 3195 * concurrently claim ownership if they present the same owner value. Returns 0 3196 * on success and error code on failure 3197 */ 3198 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3199 { 3200 struct iommu_group *group; 3201 int ret = 0; 3202 3203 if (WARN_ON(!owner)) 3204 return -EINVAL; 3205 3206 group = iommu_group_get(dev); 3207 if (!group) 3208 return -ENODEV; 3209 3210 mutex_lock(&group->mutex); 3211 if (group->owner_cnt) { 3212 if (group->owner != owner) { 3213 ret = -EPERM; 3214 goto unlock_out; 3215 } 3216 group->owner_cnt++; 3217 goto unlock_out; 3218 } 3219 3220 ret = __iommu_take_dma_ownership(group, owner); 3221 unlock_out: 3222 mutex_unlock(&group->mutex); 3223 iommu_group_put(group); 3224 3225 return ret; 3226 } 3227 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3228 3229 static void __iommu_release_dma_ownership(struct iommu_group *group) 3230 { 3231 if (WARN_ON(!group->owner_cnt || !group->owner || 3232 !xa_empty(&group->pasid_array))) 3233 return; 3234 3235 group->owner_cnt = 0; 3236 group->owner = NULL; 3237 __iommu_group_set_domain_nofail(group, group->default_domain); 3238 } 3239 3240 /** 3241 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3242 * @dev: The device 3243 * 3244 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3245 */ 3246 void iommu_group_release_dma_owner(struct iommu_group *group) 3247 { 3248 mutex_lock(&group->mutex); 3249 __iommu_release_dma_ownership(group); 3250 mutex_unlock(&group->mutex); 3251 } 3252 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3253 3254 /** 3255 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3256 * @group: The device. 3257 * 3258 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3259 */ 3260 void iommu_device_release_dma_owner(struct device *dev) 3261 { 3262 struct iommu_group *group = iommu_group_get(dev); 3263 3264 mutex_lock(&group->mutex); 3265 if (group->owner_cnt > 1) 3266 group->owner_cnt--; 3267 else 3268 __iommu_release_dma_ownership(group); 3269 mutex_unlock(&group->mutex); 3270 iommu_group_put(group); 3271 } 3272 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3273 3274 /** 3275 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3276 * @group: The group. 3277 * 3278 * This provides status query on a given group. It is racy and only for 3279 * non-binding status reporting. 3280 */ 3281 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3282 { 3283 unsigned int user; 3284 3285 mutex_lock(&group->mutex); 3286 user = group->owner_cnt; 3287 mutex_unlock(&group->mutex); 3288 3289 return user; 3290 } 3291 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3292 3293 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3294 struct iommu_group *group, ioasid_t pasid) 3295 { 3296 struct group_device *device; 3297 int ret = 0; 3298 3299 for_each_group_device(group, device) { 3300 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); 3301 if (ret) 3302 break; 3303 } 3304 3305 return ret; 3306 } 3307 3308 static void __iommu_remove_group_pasid(struct iommu_group *group, 3309 ioasid_t pasid) 3310 { 3311 struct group_device *device; 3312 const struct iommu_ops *ops; 3313 3314 for_each_group_device(group, device) { 3315 ops = dev_iommu_ops(device->dev); 3316 ops->remove_dev_pasid(device->dev, pasid); 3317 } 3318 } 3319 3320 /* 3321 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3322 * @domain: the iommu domain. 3323 * @dev: the attached device. 3324 * @pasid: the pasid of the device. 3325 * 3326 * Return: 0 on success, or an error. 3327 */ 3328 int iommu_attach_device_pasid(struct iommu_domain *domain, 3329 struct device *dev, ioasid_t pasid) 3330 { 3331 struct iommu_group *group; 3332 void *curr; 3333 int ret; 3334 3335 if (!domain->ops->set_dev_pasid) 3336 return -EOPNOTSUPP; 3337 3338 group = iommu_group_get(dev); 3339 if (!group) 3340 return -ENODEV; 3341 3342 mutex_lock(&group->mutex); 3343 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); 3344 if (curr) { 3345 ret = xa_err(curr) ? : -EBUSY; 3346 goto out_unlock; 3347 } 3348 3349 ret = __iommu_set_group_pasid(domain, group, pasid); 3350 if (ret) { 3351 __iommu_remove_group_pasid(group, pasid); 3352 xa_erase(&group->pasid_array, pasid); 3353 } 3354 out_unlock: 3355 mutex_unlock(&group->mutex); 3356 iommu_group_put(group); 3357 3358 return ret; 3359 } 3360 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3361 3362 /* 3363 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3364 * @domain: the iommu domain. 3365 * @dev: the attached device. 3366 * @pasid: the pasid of the device. 3367 * 3368 * The @domain must have been attached to @pasid of the @dev with 3369 * iommu_attach_device_pasid(). 3370 */ 3371 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3372 ioasid_t pasid) 3373 { 3374 struct iommu_group *group = iommu_group_get(dev); 3375 3376 mutex_lock(&group->mutex); 3377 __iommu_remove_group_pasid(group, pasid); 3378 WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); 3379 mutex_unlock(&group->mutex); 3380 3381 iommu_group_put(group); 3382 } 3383 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3384 3385 /* 3386 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev 3387 * @dev: the queried device 3388 * @pasid: the pasid of the device 3389 * @type: matched domain type, 0 for any match 3390 * 3391 * This is a variant of iommu_get_domain_for_dev(). It returns the existing 3392 * domain attached to pasid of a device. Callers must hold a lock around this 3393 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of 3394 * type is being manipulated. This API does not internally resolve races with 3395 * attach/detach. 3396 * 3397 * Return: attached domain on success, NULL otherwise. 3398 */ 3399 struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, 3400 ioasid_t pasid, 3401 unsigned int type) 3402 { 3403 struct iommu_domain *domain; 3404 struct iommu_group *group; 3405 3406 group = iommu_group_get(dev); 3407 if (!group) 3408 return NULL; 3409 3410 xa_lock(&group->pasid_array); 3411 domain = xa_load(&group->pasid_array, pasid); 3412 if (type && domain && domain->type != type) 3413 domain = ERR_PTR(-EBUSY); 3414 xa_unlock(&group->pasid_array); 3415 iommu_group_put(group); 3416 3417 return domain; 3418 } 3419 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); 3420 3421 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, 3422 struct mm_struct *mm) 3423 { 3424 const struct iommu_ops *ops = dev_iommu_ops(dev); 3425 struct iommu_domain *domain; 3426 3427 domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); 3428 if (!domain) 3429 return NULL; 3430 3431 domain->type = IOMMU_DOMAIN_SVA; 3432 mmgrab(mm); 3433 domain->mm = mm; 3434 domain->iopf_handler = iommu_sva_handle_iopf; 3435 domain->fault_data = mm; 3436 3437 return domain; 3438 } 3439