1 /* 2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 3 * Author: Joerg Roedel <jroedel@suse.de> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published 7 * by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 #define pr_fmt(fmt) "%s: " fmt, __func__ 20 21 #include <linux/device.h> 22 #include <linux/kernel.h> 23 #include <linux/bug.h> 24 #include <linux/types.h> 25 #include <linux/module.h> 26 #include <linux/slab.h> 27 #include <linux/errno.h> 28 #include <linux/iommu.h> 29 #include <linux/idr.h> 30 #include <linux/notifier.h> 31 #include <linux/err.h> 32 #include <linux/pci.h> 33 #include <linux/bitops.h> 34 #include <trace/events/iommu.h> 35 36 static struct kset *iommu_group_kset; 37 static struct ida iommu_group_ida; 38 static struct mutex iommu_group_mutex; 39 40 struct iommu_callback_data { 41 const struct iommu_ops *ops; 42 }; 43 44 struct iommu_group { 45 struct kobject kobj; 46 struct kobject *devices_kobj; 47 struct list_head devices; 48 struct mutex mutex; 49 struct blocking_notifier_head notifier; 50 void *iommu_data; 51 void (*iommu_data_release)(void *iommu_data); 52 char *name; 53 int id; 54 }; 55 56 struct iommu_device { 57 struct list_head list; 58 struct device *dev; 59 char *name; 60 }; 61 62 struct iommu_group_attribute { 63 struct attribute attr; 64 ssize_t (*show)(struct iommu_group *group, char *buf); 65 ssize_t (*store)(struct iommu_group *group, 66 const char *buf, size_t count); 67 }; 68 69 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 70 struct iommu_group_attribute iommu_group_attr_##_name = \ 71 __ATTR(_name, _mode, _show, _store) 72 73 #define to_iommu_group_attr(_attr) \ 74 container_of(_attr, struct iommu_group_attribute, attr) 75 #define to_iommu_group(_kobj) \ 76 container_of(_kobj, struct iommu_group, kobj) 77 78 static ssize_t iommu_group_attr_show(struct kobject *kobj, 79 struct attribute *__attr, char *buf) 80 { 81 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 82 struct iommu_group *group = to_iommu_group(kobj); 83 ssize_t ret = -EIO; 84 85 if (attr->show) 86 ret = attr->show(group, buf); 87 return ret; 88 } 89 90 static ssize_t iommu_group_attr_store(struct kobject *kobj, 91 struct attribute *__attr, 92 const char *buf, size_t count) 93 { 94 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 95 struct iommu_group *group = to_iommu_group(kobj); 96 ssize_t ret = -EIO; 97 98 if (attr->store) 99 ret = attr->store(group, buf, count); 100 return ret; 101 } 102 103 static const struct sysfs_ops iommu_group_sysfs_ops = { 104 .show = iommu_group_attr_show, 105 .store = iommu_group_attr_store, 106 }; 107 108 static int iommu_group_create_file(struct iommu_group *group, 109 struct iommu_group_attribute *attr) 110 { 111 return sysfs_create_file(&group->kobj, &attr->attr); 112 } 113 114 static void iommu_group_remove_file(struct iommu_group *group, 115 struct iommu_group_attribute *attr) 116 { 117 sysfs_remove_file(&group->kobj, &attr->attr); 118 } 119 120 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 121 { 122 return sprintf(buf, "%s\n", group->name); 123 } 124 125 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 126 127 static void iommu_group_release(struct kobject *kobj) 128 { 129 struct iommu_group *group = to_iommu_group(kobj); 130 131 if (group->iommu_data_release) 132 group->iommu_data_release(group->iommu_data); 133 134 mutex_lock(&iommu_group_mutex); 135 ida_remove(&iommu_group_ida, group->id); 136 mutex_unlock(&iommu_group_mutex); 137 138 kfree(group->name); 139 kfree(group); 140 } 141 142 static struct kobj_type iommu_group_ktype = { 143 .sysfs_ops = &iommu_group_sysfs_ops, 144 .release = iommu_group_release, 145 }; 146 147 /** 148 * iommu_group_alloc - Allocate a new group 149 * @name: Optional name to associate with group, visible in sysfs 150 * 151 * This function is called by an iommu driver to allocate a new iommu 152 * group. The iommu group represents the minimum granularity of the iommu. 153 * Upon successful return, the caller holds a reference to the supplied 154 * group in order to hold the group until devices are added. Use 155 * iommu_group_put() to release this extra reference count, allowing the 156 * group to be automatically reclaimed once it has no devices or external 157 * references. 158 */ 159 struct iommu_group *iommu_group_alloc(void) 160 { 161 struct iommu_group *group; 162 int ret; 163 164 group = kzalloc(sizeof(*group), GFP_KERNEL); 165 if (!group) 166 return ERR_PTR(-ENOMEM); 167 168 group->kobj.kset = iommu_group_kset; 169 mutex_init(&group->mutex); 170 INIT_LIST_HEAD(&group->devices); 171 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 172 173 mutex_lock(&iommu_group_mutex); 174 175 again: 176 if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) { 177 kfree(group); 178 mutex_unlock(&iommu_group_mutex); 179 return ERR_PTR(-ENOMEM); 180 } 181 182 if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id)) 183 goto again; 184 185 mutex_unlock(&iommu_group_mutex); 186 187 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 188 NULL, "%d", group->id); 189 if (ret) { 190 mutex_lock(&iommu_group_mutex); 191 ida_remove(&iommu_group_ida, group->id); 192 mutex_unlock(&iommu_group_mutex); 193 kfree(group); 194 return ERR_PTR(ret); 195 } 196 197 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 198 if (!group->devices_kobj) { 199 kobject_put(&group->kobj); /* triggers .release & free */ 200 return ERR_PTR(-ENOMEM); 201 } 202 203 /* 204 * The devices_kobj holds a reference on the group kobject, so 205 * as long as that exists so will the group. We can therefore 206 * use the devices_kobj for reference counting. 207 */ 208 kobject_put(&group->kobj); 209 210 return group; 211 } 212 EXPORT_SYMBOL_GPL(iommu_group_alloc); 213 214 struct iommu_group *iommu_group_get_by_id(int id) 215 { 216 struct kobject *group_kobj; 217 struct iommu_group *group; 218 const char *name; 219 220 if (!iommu_group_kset) 221 return NULL; 222 223 name = kasprintf(GFP_KERNEL, "%d", id); 224 if (!name) 225 return NULL; 226 227 group_kobj = kset_find_obj(iommu_group_kset, name); 228 kfree(name); 229 230 if (!group_kobj) 231 return NULL; 232 233 group = container_of(group_kobj, struct iommu_group, kobj); 234 BUG_ON(group->id != id); 235 236 kobject_get(group->devices_kobj); 237 kobject_put(&group->kobj); 238 239 return group; 240 } 241 EXPORT_SYMBOL_GPL(iommu_group_get_by_id); 242 243 /** 244 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 245 * @group: the group 246 * 247 * iommu drivers can store data in the group for use when doing iommu 248 * operations. This function provides a way to retrieve it. Caller 249 * should hold a group reference. 250 */ 251 void *iommu_group_get_iommudata(struct iommu_group *group) 252 { 253 return group->iommu_data; 254 } 255 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 256 257 /** 258 * iommu_group_set_iommudata - set iommu_data for a group 259 * @group: the group 260 * @iommu_data: new data 261 * @release: release function for iommu_data 262 * 263 * iommu drivers can store data in the group for use when doing iommu 264 * operations. This function provides a way to set the data after 265 * the group has been allocated. Caller should hold a group reference. 266 */ 267 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 268 void (*release)(void *iommu_data)) 269 { 270 group->iommu_data = iommu_data; 271 group->iommu_data_release = release; 272 } 273 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 274 275 /** 276 * iommu_group_set_name - set name for a group 277 * @group: the group 278 * @name: name 279 * 280 * Allow iommu driver to set a name for a group. When set it will 281 * appear in a name attribute file under the group in sysfs. 282 */ 283 int iommu_group_set_name(struct iommu_group *group, const char *name) 284 { 285 int ret; 286 287 if (group->name) { 288 iommu_group_remove_file(group, &iommu_group_attr_name); 289 kfree(group->name); 290 group->name = NULL; 291 if (!name) 292 return 0; 293 } 294 295 group->name = kstrdup(name, GFP_KERNEL); 296 if (!group->name) 297 return -ENOMEM; 298 299 ret = iommu_group_create_file(group, &iommu_group_attr_name); 300 if (ret) { 301 kfree(group->name); 302 group->name = NULL; 303 return ret; 304 } 305 306 return 0; 307 } 308 EXPORT_SYMBOL_GPL(iommu_group_set_name); 309 310 /** 311 * iommu_group_add_device - add a device to an iommu group 312 * @group: the group into which to add the device (reference should be held) 313 * @dev: the device 314 * 315 * This function is called by an iommu driver to add a device into a 316 * group. Adding a device increments the group reference count. 317 */ 318 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 319 { 320 int ret, i = 0; 321 struct iommu_device *device; 322 323 device = kzalloc(sizeof(*device), GFP_KERNEL); 324 if (!device) 325 return -ENOMEM; 326 327 device->dev = dev; 328 329 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 330 if (ret) { 331 kfree(device); 332 return ret; 333 } 334 335 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 336 rename: 337 if (!device->name) { 338 sysfs_remove_link(&dev->kobj, "iommu_group"); 339 kfree(device); 340 return -ENOMEM; 341 } 342 343 ret = sysfs_create_link_nowarn(group->devices_kobj, 344 &dev->kobj, device->name); 345 if (ret) { 346 kfree(device->name); 347 if (ret == -EEXIST && i >= 0) { 348 /* 349 * Account for the slim chance of collision 350 * and append an instance to the name. 351 */ 352 device->name = kasprintf(GFP_KERNEL, "%s.%d", 353 kobject_name(&dev->kobj), i++); 354 goto rename; 355 } 356 357 sysfs_remove_link(&dev->kobj, "iommu_group"); 358 kfree(device); 359 return ret; 360 } 361 362 kobject_get(group->devices_kobj); 363 364 dev->iommu_group = group; 365 366 mutex_lock(&group->mutex); 367 list_add_tail(&device->list, &group->devices); 368 mutex_unlock(&group->mutex); 369 370 /* Notify any listeners about change to group. */ 371 blocking_notifier_call_chain(&group->notifier, 372 IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); 373 374 trace_add_device_to_group(group->id, dev); 375 return 0; 376 } 377 EXPORT_SYMBOL_GPL(iommu_group_add_device); 378 379 /** 380 * iommu_group_remove_device - remove a device from it's current group 381 * @dev: device to be removed 382 * 383 * This function is called by an iommu driver to remove the device from 384 * it's current group. This decrements the iommu group reference count. 385 */ 386 void iommu_group_remove_device(struct device *dev) 387 { 388 struct iommu_group *group = dev->iommu_group; 389 struct iommu_device *tmp_device, *device = NULL; 390 391 /* Pre-notify listeners that a device is being removed. */ 392 blocking_notifier_call_chain(&group->notifier, 393 IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev); 394 395 mutex_lock(&group->mutex); 396 list_for_each_entry(tmp_device, &group->devices, list) { 397 if (tmp_device->dev == dev) { 398 device = tmp_device; 399 list_del(&device->list); 400 break; 401 } 402 } 403 mutex_unlock(&group->mutex); 404 405 if (!device) 406 return; 407 408 sysfs_remove_link(group->devices_kobj, device->name); 409 sysfs_remove_link(&dev->kobj, "iommu_group"); 410 411 trace_remove_device_from_group(group->id, dev); 412 413 kfree(device->name); 414 kfree(device); 415 dev->iommu_group = NULL; 416 kobject_put(group->devices_kobj); 417 } 418 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 419 420 /** 421 * iommu_group_for_each_dev - iterate over each device in the group 422 * @group: the group 423 * @data: caller opaque data to be passed to callback function 424 * @fn: caller supplied callback function 425 * 426 * This function is called by group users to iterate over group devices. 427 * Callers should hold a reference count to the group during callback. 428 * The group->mutex is held across callbacks, which will block calls to 429 * iommu_group_add/remove_device. 430 */ 431 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 432 int (*fn)(struct device *, void *)) 433 { 434 struct iommu_device *device; 435 int ret = 0; 436 437 mutex_lock(&group->mutex); 438 list_for_each_entry(device, &group->devices, list) { 439 ret = fn(device->dev, data); 440 if (ret) 441 break; 442 } 443 mutex_unlock(&group->mutex); 444 return ret; 445 } 446 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 447 448 /** 449 * iommu_group_get - Return the group for a device and increment reference 450 * @dev: get the group that this device belongs to 451 * 452 * This function is called by iommu drivers and users to get the group 453 * for the specified device. If found, the group is returned and the group 454 * reference in incremented, else NULL. 455 */ 456 struct iommu_group *iommu_group_get(struct device *dev) 457 { 458 struct iommu_group *group = dev->iommu_group; 459 460 if (group) 461 kobject_get(group->devices_kobj); 462 463 return group; 464 } 465 EXPORT_SYMBOL_GPL(iommu_group_get); 466 467 /** 468 * iommu_group_put - Decrement group reference 469 * @group: the group to use 470 * 471 * This function is called by iommu drivers and users to release the 472 * iommu group. Once the reference count is zero, the group is released. 473 */ 474 void iommu_group_put(struct iommu_group *group) 475 { 476 if (group) 477 kobject_put(group->devices_kobj); 478 } 479 EXPORT_SYMBOL_GPL(iommu_group_put); 480 481 /** 482 * iommu_group_register_notifier - Register a notifier for group changes 483 * @group: the group to watch 484 * @nb: notifier block to signal 485 * 486 * This function allows iommu group users to track changes in a group. 487 * See include/linux/iommu.h for actions sent via this notifier. Caller 488 * should hold a reference to the group throughout notifier registration. 489 */ 490 int iommu_group_register_notifier(struct iommu_group *group, 491 struct notifier_block *nb) 492 { 493 return blocking_notifier_chain_register(&group->notifier, nb); 494 } 495 EXPORT_SYMBOL_GPL(iommu_group_register_notifier); 496 497 /** 498 * iommu_group_unregister_notifier - Unregister a notifier 499 * @group: the group to watch 500 * @nb: notifier block to signal 501 * 502 * Unregister a previously registered group notifier block. 503 */ 504 int iommu_group_unregister_notifier(struct iommu_group *group, 505 struct notifier_block *nb) 506 { 507 return blocking_notifier_chain_unregister(&group->notifier, nb); 508 } 509 EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); 510 511 /** 512 * iommu_group_id - Return ID for a group 513 * @group: the group to ID 514 * 515 * Return the unique ID for the group matching the sysfs group number. 516 */ 517 int iommu_group_id(struct iommu_group *group) 518 { 519 return group->id; 520 } 521 EXPORT_SYMBOL_GPL(iommu_group_id); 522 523 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 524 unsigned long *devfns); 525 526 /* 527 * To consider a PCI device isolated, we require ACS to support Source 528 * Validation, Request Redirection, Completer Redirection, and Upstream 529 * Forwarding. This effectively means that devices cannot spoof their 530 * requester ID, requests and completions cannot be redirected, and all 531 * transactions are forwarded upstream, even as it passes through a 532 * bridge where the target device is downstream. 533 */ 534 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 535 536 /* 537 * For multifunction devices which are not isolated from each other, find 538 * all the other non-isolated functions and look for existing groups. For 539 * each function, we also need to look for aliases to or from other devices 540 * that may already have a group. 541 */ 542 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 543 unsigned long *devfns) 544 { 545 struct pci_dev *tmp = NULL; 546 struct iommu_group *group; 547 548 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 549 return NULL; 550 551 for_each_pci_dev(tmp) { 552 if (tmp == pdev || tmp->bus != pdev->bus || 553 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 554 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 555 continue; 556 557 group = get_pci_alias_group(tmp, devfns); 558 if (group) { 559 pci_dev_put(tmp); 560 return group; 561 } 562 } 563 564 return NULL; 565 } 566 567 /* 568 * Look for aliases to or from the given device for exisiting groups. The 569 * dma_alias_devfn only supports aliases on the same bus, therefore the search 570 * space is quite small (especially since we're really only looking at pcie 571 * device, and therefore only expect multiple slots on the root complex or 572 * downstream switch ports). It's conceivable though that a pair of 573 * multifunction devices could have aliases between them that would cause a 574 * loop. To prevent this, we use a bitmap to track where we've been. 575 */ 576 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 577 unsigned long *devfns) 578 { 579 struct pci_dev *tmp = NULL; 580 struct iommu_group *group; 581 582 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 583 return NULL; 584 585 group = iommu_group_get(&pdev->dev); 586 if (group) 587 return group; 588 589 for_each_pci_dev(tmp) { 590 if (tmp == pdev || tmp->bus != pdev->bus) 591 continue; 592 593 /* We alias them or they alias us */ 594 if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && 595 pdev->dma_alias_devfn == tmp->devfn) || 596 ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && 597 tmp->dma_alias_devfn == pdev->devfn)) { 598 599 group = get_pci_alias_group(tmp, devfns); 600 if (group) { 601 pci_dev_put(tmp); 602 return group; 603 } 604 605 group = get_pci_function_alias_group(tmp, devfns); 606 if (group) { 607 pci_dev_put(tmp); 608 return group; 609 } 610 } 611 } 612 613 return NULL; 614 } 615 616 struct group_for_pci_data { 617 struct pci_dev *pdev; 618 struct iommu_group *group; 619 }; 620 621 /* 622 * DMA alias iterator callback, return the last seen device. Stop and return 623 * the IOMMU group if we find one along the way. 624 */ 625 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 626 { 627 struct group_for_pci_data *data = opaque; 628 629 data->pdev = pdev; 630 data->group = iommu_group_get(&pdev->dev); 631 632 return data->group != NULL; 633 } 634 635 /* 636 * Use standard PCI bus topology, isolation features, and DMA alias quirks 637 * to find or create an IOMMU group for a device. 638 */ 639 static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) 640 { 641 struct group_for_pci_data data; 642 struct pci_bus *bus; 643 struct iommu_group *group = NULL; 644 u64 devfns[4] = { 0 }; 645 646 /* 647 * Find the upstream DMA alias for the device. A device must not 648 * be aliased due to topology in order to have its own IOMMU group. 649 * If we find an alias along the way that already belongs to a 650 * group, use it. 651 */ 652 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 653 return data.group; 654 655 pdev = data.pdev; 656 657 /* 658 * Continue upstream from the point of minimum IOMMU granularity 659 * due to aliases to the point where devices are protected from 660 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 661 * group, use it. 662 */ 663 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 664 if (!bus->self) 665 continue; 666 667 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 668 break; 669 670 pdev = bus->self; 671 672 group = iommu_group_get(&pdev->dev); 673 if (group) 674 return group; 675 } 676 677 /* 678 * Look for existing groups on device aliases. If we alias another 679 * device or another device aliases us, use the same group. 680 */ 681 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 682 if (group) 683 return group; 684 685 /* 686 * Look for existing groups on non-isolated functions on the same 687 * slot and aliases of those funcions, if any. No need to clear 688 * the search bitmap, the tested devfns are still valid. 689 */ 690 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 691 if (group) 692 return group; 693 694 /* No shared group found, allocate new */ 695 return iommu_group_alloc(); 696 } 697 698 /** 699 * iommu_group_get_for_dev - Find or create the IOMMU group for a device 700 * @dev: target device 701 * 702 * This function is intended to be called by IOMMU drivers and extended to 703 * support common, bus-defined algorithms when determining or creating the 704 * IOMMU group for a device. On success, the caller will hold a reference 705 * to the returned IOMMU group, which will already include the provided 706 * device. The reference should be released with iommu_group_put(). 707 */ 708 struct iommu_group *iommu_group_get_for_dev(struct device *dev) 709 { 710 struct iommu_group *group; 711 int ret; 712 713 group = iommu_group_get(dev); 714 if (group) 715 return group; 716 717 if (!dev_is_pci(dev)) 718 return ERR_PTR(-EINVAL); 719 720 group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); 721 722 if (IS_ERR(group)) 723 return group; 724 725 ret = iommu_group_add_device(group, dev); 726 if (ret) { 727 iommu_group_put(group); 728 return ERR_PTR(ret); 729 } 730 731 return group; 732 } 733 734 static int add_iommu_group(struct device *dev, void *data) 735 { 736 struct iommu_callback_data *cb = data; 737 const struct iommu_ops *ops = cb->ops; 738 739 if (!ops->add_device) 740 return 0; 741 742 WARN_ON(dev->iommu_group); 743 744 ops->add_device(dev); 745 746 return 0; 747 } 748 749 static int iommu_bus_notifier(struct notifier_block *nb, 750 unsigned long action, void *data) 751 { 752 struct device *dev = data; 753 const struct iommu_ops *ops = dev->bus->iommu_ops; 754 struct iommu_group *group; 755 unsigned long group_action = 0; 756 757 /* 758 * ADD/DEL call into iommu driver ops if provided, which may 759 * result in ADD/DEL notifiers to group->notifier 760 */ 761 if (action == BUS_NOTIFY_ADD_DEVICE) { 762 if (ops->add_device) 763 return ops->add_device(dev); 764 } else if (action == BUS_NOTIFY_DEL_DEVICE) { 765 if (ops->remove_device && dev->iommu_group) { 766 ops->remove_device(dev); 767 return 0; 768 } 769 } 770 771 /* 772 * Remaining BUS_NOTIFYs get filtered and republished to the 773 * group, if anyone is listening 774 */ 775 group = iommu_group_get(dev); 776 if (!group) 777 return 0; 778 779 switch (action) { 780 case BUS_NOTIFY_BIND_DRIVER: 781 group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER; 782 break; 783 case BUS_NOTIFY_BOUND_DRIVER: 784 group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER; 785 break; 786 case BUS_NOTIFY_UNBIND_DRIVER: 787 group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER; 788 break; 789 case BUS_NOTIFY_UNBOUND_DRIVER: 790 group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER; 791 break; 792 } 793 794 if (group_action) 795 blocking_notifier_call_chain(&group->notifier, 796 group_action, dev); 797 798 iommu_group_put(group); 799 return 0; 800 } 801 802 static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) 803 { 804 int err; 805 struct notifier_block *nb; 806 struct iommu_callback_data cb = { 807 .ops = ops, 808 }; 809 810 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); 811 if (!nb) 812 return -ENOMEM; 813 814 nb->notifier_call = iommu_bus_notifier; 815 816 err = bus_register_notifier(bus, nb); 817 if (err) { 818 kfree(nb); 819 return err; 820 } 821 822 err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); 823 if (err) { 824 bus_unregister_notifier(bus, nb); 825 kfree(nb); 826 return err; 827 } 828 829 return 0; 830 } 831 832 /** 833 * bus_set_iommu - set iommu-callbacks for the bus 834 * @bus: bus. 835 * @ops: the callbacks provided by the iommu-driver 836 * 837 * This function is called by an iommu driver to set the iommu methods 838 * used for a particular bus. Drivers for devices on that bus can use 839 * the iommu-api after these ops are registered. 840 * This special function is needed because IOMMUs are usually devices on 841 * the bus itself, so the iommu drivers are not initialized when the bus 842 * is set up. With this function the iommu-driver can set the iommu-ops 843 * afterwards. 844 */ 845 int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) 846 { 847 int err; 848 849 if (bus->iommu_ops != NULL) 850 return -EBUSY; 851 852 bus->iommu_ops = ops; 853 854 /* Do IOMMU specific setup for this bus-type */ 855 err = iommu_bus_init(bus, ops); 856 if (err) 857 bus->iommu_ops = NULL; 858 859 return err; 860 } 861 EXPORT_SYMBOL_GPL(bus_set_iommu); 862 863 bool iommu_present(struct bus_type *bus) 864 { 865 return bus->iommu_ops != NULL; 866 } 867 EXPORT_SYMBOL_GPL(iommu_present); 868 869 bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) 870 { 871 if (!bus->iommu_ops || !bus->iommu_ops->capable) 872 return false; 873 874 return bus->iommu_ops->capable(cap); 875 } 876 EXPORT_SYMBOL_GPL(iommu_capable); 877 878 /** 879 * iommu_set_fault_handler() - set a fault handler for an iommu domain 880 * @domain: iommu domain 881 * @handler: fault handler 882 * @token: user data, will be passed back to the fault handler 883 * 884 * This function should be used by IOMMU users which want to be notified 885 * whenever an IOMMU fault happens. 886 * 887 * The fault handler itself should return 0 on success, and an appropriate 888 * error code otherwise. 889 */ 890 void iommu_set_fault_handler(struct iommu_domain *domain, 891 iommu_fault_handler_t handler, 892 void *token) 893 { 894 BUG_ON(!domain); 895 896 domain->handler = handler; 897 domain->handler_token = token; 898 } 899 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 900 901 struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) 902 { 903 struct iommu_domain *domain; 904 905 if (bus == NULL || bus->iommu_ops == NULL) 906 return NULL; 907 908 domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 909 if (!domain) 910 return NULL; 911 912 domain->ops = bus->iommu_ops; 913 domain->type = IOMMU_DOMAIN_UNMANAGED; 914 915 return domain; 916 } 917 EXPORT_SYMBOL_GPL(iommu_domain_alloc); 918 919 void iommu_domain_free(struct iommu_domain *domain) 920 { 921 domain->ops->domain_free(domain); 922 } 923 EXPORT_SYMBOL_GPL(iommu_domain_free); 924 925 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 926 { 927 int ret; 928 if (unlikely(domain->ops->attach_dev == NULL)) 929 return -ENODEV; 930 931 ret = domain->ops->attach_dev(domain, dev); 932 if (!ret) 933 trace_attach_device_to_domain(dev); 934 return ret; 935 } 936 EXPORT_SYMBOL_GPL(iommu_attach_device); 937 938 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 939 { 940 if (unlikely(domain->ops->detach_dev == NULL)) 941 return; 942 943 domain->ops->detach_dev(domain, dev); 944 trace_detach_device_from_domain(dev); 945 } 946 EXPORT_SYMBOL_GPL(iommu_detach_device); 947 948 /* 949 * IOMMU groups are really the natrual working unit of the IOMMU, but 950 * the IOMMU API works on domains and devices. Bridge that gap by 951 * iterating over the devices in a group. Ideally we'd have a single 952 * device which represents the requestor ID of the group, but we also 953 * allow IOMMU drivers to create policy defined minimum sets, where 954 * the physical hardware may be able to distiguish members, but we 955 * wish to group them at a higher level (ex. untrusted multi-function 956 * PCI devices). Thus we attach each device. 957 */ 958 static int iommu_group_do_attach_device(struct device *dev, void *data) 959 { 960 struct iommu_domain *domain = data; 961 962 return iommu_attach_device(domain, dev); 963 } 964 965 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 966 { 967 return iommu_group_for_each_dev(group, domain, 968 iommu_group_do_attach_device); 969 } 970 EXPORT_SYMBOL_GPL(iommu_attach_group); 971 972 static int iommu_group_do_detach_device(struct device *dev, void *data) 973 { 974 struct iommu_domain *domain = data; 975 976 iommu_detach_device(domain, dev); 977 978 return 0; 979 } 980 981 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 982 { 983 iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); 984 } 985 EXPORT_SYMBOL_GPL(iommu_detach_group); 986 987 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 988 { 989 if (unlikely(domain->ops->iova_to_phys == NULL)) 990 return 0; 991 992 return domain->ops->iova_to_phys(domain, iova); 993 } 994 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 995 996 static size_t iommu_pgsize(struct iommu_domain *domain, 997 unsigned long addr_merge, size_t size) 998 { 999 unsigned int pgsize_idx; 1000 size_t pgsize; 1001 1002 /* Max page size that still fits into 'size' */ 1003 pgsize_idx = __fls(size); 1004 1005 /* need to consider alignment requirements ? */ 1006 if (likely(addr_merge)) { 1007 /* Max page size allowed by address */ 1008 unsigned int align_pgsize_idx = __ffs(addr_merge); 1009 pgsize_idx = min(pgsize_idx, align_pgsize_idx); 1010 } 1011 1012 /* build a mask of acceptable page sizes */ 1013 pgsize = (1UL << (pgsize_idx + 1)) - 1; 1014 1015 /* throw away page sizes not supported by the hardware */ 1016 pgsize &= domain->ops->pgsize_bitmap; 1017 1018 /* make sure we're still sane */ 1019 BUG_ON(!pgsize); 1020 1021 /* pick the biggest page */ 1022 pgsize_idx = __fls(pgsize); 1023 pgsize = 1UL << pgsize_idx; 1024 1025 return pgsize; 1026 } 1027 1028 int iommu_map(struct iommu_domain *domain, unsigned long iova, 1029 phys_addr_t paddr, size_t size, int prot) 1030 { 1031 unsigned long orig_iova = iova; 1032 unsigned int min_pagesz; 1033 size_t orig_size = size; 1034 int ret = 0; 1035 1036 if (unlikely(domain->ops->map == NULL || 1037 domain->ops->pgsize_bitmap == 0UL)) 1038 return -ENODEV; 1039 1040 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 1041 return -EINVAL; 1042 1043 /* find out the minimum page size supported */ 1044 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); 1045 1046 /* 1047 * both the virtual address and the physical one, as well as 1048 * the size of the mapping, must be aligned (at least) to the 1049 * size of the smallest page supported by the hardware 1050 */ 1051 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 1052 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 1053 iova, &paddr, size, min_pagesz); 1054 return -EINVAL; 1055 } 1056 1057 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 1058 1059 while (size) { 1060 size_t pgsize = iommu_pgsize(domain, iova | paddr, size); 1061 1062 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", 1063 iova, &paddr, pgsize); 1064 1065 ret = domain->ops->map(domain, iova, paddr, pgsize, prot); 1066 if (ret) 1067 break; 1068 1069 iova += pgsize; 1070 paddr += pgsize; 1071 size -= pgsize; 1072 } 1073 1074 /* unroll mapping in case something went wrong */ 1075 if (ret) 1076 iommu_unmap(domain, orig_iova, orig_size - size); 1077 else 1078 trace_map(orig_iova, paddr, orig_size); 1079 1080 return ret; 1081 } 1082 EXPORT_SYMBOL_GPL(iommu_map); 1083 1084 size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) 1085 { 1086 size_t unmapped_page, unmapped = 0; 1087 unsigned int min_pagesz; 1088 unsigned long orig_iova = iova; 1089 1090 if (unlikely(domain->ops->unmap == NULL || 1091 domain->ops->pgsize_bitmap == 0UL)) 1092 return -ENODEV; 1093 1094 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 1095 return -EINVAL; 1096 1097 /* find out the minimum page size supported */ 1098 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); 1099 1100 /* 1101 * The virtual address, as well as the size of the mapping, must be 1102 * aligned (at least) to the size of the smallest page supported 1103 * by the hardware 1104 */ 1105 if (!IS_ALIGNED(iova | size, min_pagesz)) { 1106 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 1107 iova, size, min_pagesz); 1108 return -EINVAL; 1109 } 1110 1111 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 1112 1113 /* 1114 * Keep iterating until we either unmap 'size' bytes (or more) 1115 * or we hit an area that isn't mapped. 1116 */ 1117 while (unmapped < size) { 1118 size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); 1119 1120 unmapped_page = domain->ops->unmap(domain, iova, pgsize); 1121 if (!unmapped_page) 1122 break; 1123 1124 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 1125 iova, unmapped_page); 1126 1127 iova += unmapped_page; 1128 unmapped += unmapped_page; 1129 } 1130 1131 trace_unmap(orig_iova, size, unmapped); 1132 return unmapped; 1133 } 1134 EXPORT_SYMBOL_GPL(iommu_unmap); 1135 1136 size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 1137 struct scatterlist *sg, unsigned int nents, int prot) 1138 { 1139 struct scatterlist *s; 1140 size_t mapped = 0; 1141 unsigned int i, min_pagesz; 1142 int ret; 1143 1144 if (unlikely(domain->ops->pgsize_bitmap == 0UL)) 1145 return 0; 1146 1147 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); 1148 1149 for_each_sg(sg, s, nents, i) { 1150 phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; 1151 1152 /* 1153 * We are mapping on IOMMU page boundaries, so offset within 1154 * the page must be 0. However, the IOMMU may support pages 1155 * smaller than PAGE_SIZE, so s->offset may still represent 1156 * an offset of that boundary within the CPU page. 1157 */ 1158 if (!IS_ALIGNED(s->offset, min_pagesz)) 1159 goto out_err; 1160 1161 ret = iommu_map(domain, iova + mapped, phys, s->length, prot); 1162 if (ret) 1163 goto out_err; 1164 1165 mapped += s->length; 1166 } 1167 1168 return mapped; 1169 1170 out_err: 1171 /* undo mappings already done */ 1172 iommu_unmap(domain, iova, mapped); 1173 1174 return 0; 1175 1176 } 1177 EXPORT_SYMBOL_GPL(default_iommu_map_sg); 1178 1179 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, 1180 phys_addr_t paddr, u64 size, int prot) 1181 { 1182 if (unlikely(domain->ops->domain_window_enable == NULL)) 1183 return -ENODEV; 1184 1185 return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size, 1186 prot); 1187 } 1188 EXPORT_SYMBOL_GPL(iommu_domain_window_enable); 1189 1190 void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) 1191 { 1192 if (unlikely(domain->ops->domain_window_disable == NULL)) 1193 return; 1194 1195 return domain->ops->domain_window_disable(domain, wnd_nr); 1196 } 1197 EXPORT_SYMBOL_GPL(iommu_domain_window_disable); 1198 1199 static int __init iommu_init(void) 1200 { 1201 iommu_group_kset = kset_create_and_add("iommu_groups", 1202 NULL, kernel_kobj); 1203 ida_init(&iommu_group_ida); 1204 mutex_init(&iommu_group_mutex); 1205 1206 BUG_ON(!iommu_group_kset); 1207 1208 return 0; 1209 } 1210 arch_initcall(iommu_init); 1211 1212 int iommu_domain_get_attr(struct iommu_domain *domain, 1213 enum iommu_attr attr, void *data) 1214 { 1215 struct iommu_domain_geometry *geometry; 1216 bool *paging; 1217 int ret = 0; 1218 u32 *count; 1219 1220 switch (attr) { 1221 case DOMAIN_ATTR_GEOMETRY: 1222 geometry = data; 1223 *geometry = domain->geometry; 1224 1225 break; 1226 case DOMAIN_ATTR_PAGING: 1227 paging = data; 1228 *paging = (domain->ops->pgsize_bitmap != 0UL); 1229 break; 1230 case DOMAIN_ATTR_WINDOWS: 1231 count = data; 1232 1233 if (domain->ops->domain_get_windows != NULL) 1234 *count = domain->ops->domain_get_windows(domain); 1235 else 1236 ret = -ENODEV; 1237 1238 break; 1239 default: 1240 if (!domain->ops->domain_get_attr) 1241 return -EINVAL; 1242 1243 ret = domain->ops->domain_get_attr(domain, attr, data); 1244 } 1245 1246 return ret; 1247 } 1248 EXPORT_SYMBOL_GPL(iommu_domain_get_attr); 1249 1250 int iommu_domain_set_attr(struct iommu_domain *domain, 1251 enum iommu_attr attr, void *data) 1252 { 1253 int ret = 0; 1254 u32 *count; 1255 1256 switch (attr) { 1257 case DOMAIN_ATTR_WINDOWS: 1258 count = data; 1259 1260 if (domain->ops->domain_set_windows != NULL) 1261 ret = domain->ops->domain_set_windows(domain, *count); 1262 else 1263 ret = -ENODEV; 1264 1265 break; 1266 default: 1267 if (domain->ops->domain_set_attr == NULL) 1268 return -EINVAL; 1269 1270 ret = domain->ops->domain_set_attr(domain, attr, data); 1271 } 1272 1273 return ret; 1274 } 1275 EXPORT_SYMBOL_GPL(iommu_domain_set_attr); 1276