1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2014 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org>, 6 * Ashok Raj <ashok.raj@intel.com>, 7 * Shaohua Li <shaohua.li@intel.com>, 8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Joerg Roedel <jroedel@suse.de> 11 */ 12 13 #define pr_fmt(fmt) "DMAR: " fmt 14 #define dev_fmt(fmt) pr_fmt(fmt) 15 16 #include <linux/init.h> 17 #include <linux/bitmap.h> 18 #include <linux/debugfs.h> 19 #include <linux/export.h> 20 #include <linux/slab.h> 21 #include <linux/irq.h> 22 #include <linux/interrupt.h> 23 #include <linux/spinlock.h> 24 #include <linux/pci.h> 25 #include <linux/dmar.h> 26 #include <linux/dma-mapping.h> 27 #include <linux/mempool.h> 28 #include <linux/memory.h> 29 #include <linux/cpu.h> 30 #include <linux/timer.h> 31 #include <linux/io.h> 32 #include <linux/iova.h> 33 #include <linux/iommu.h> 34 #include <linux/intel-iommu.h> 35 #include <linux/syscore_ops.h> 36 #include <linux/tboot.h> 37 #include <linux/dmi.h> 38 #include <linux/pci-ats.h> 39 #include <linux/memblock.h> 40 #include <linux/dma-contiguous.h> 41 #include <linux/dma-direct.h> 42 #include <linux/crash_dump.h> 43 #include <linux/numa.h> 44 #include <linux/swiotlb.h> 45 #include <asm/irq_remapping.h> 46 #include <asm/cacheflush.h> 47 #include <asm/iommu.h> 48 #include <trace/events/intel_iommu.h> 49 50 #include "../irq_remapping.h" 51 #include "intel-pasid.h" 52 53 #define ROOT_SIZE VTD_PAGE_SIZE 54 #define CONTEXT_SIZE VTD_PAGE_SIZE 55 56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 60 61 #define IOAPIC_RANGE_START (0xfee00000) 62 #define IOAPIC_RANGE_END (0xfeefffff) 63 #define IOVA_START_ADDR (0x1000) 64 65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 66 67 #define MAX_AGAW_WIDTH 64 68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) 69 70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) 71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) 72 73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR 74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */ 75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ 76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) 77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) 78 79 /* IO virtual address start page frame number */ 80 #define IOVA_START_PFN (1) 81 82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 83 84 /* page table handling */ 85 #define LEVEL_STRIDE (9) 86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 87 88 /* 89 * This bitmap is used to advertise the page sizes our hardware support 90 * to the IOMMU core, which will then use this information to split 91 * physically contiguous memory regions it is mapping into page sizes 92 * that we support. 93 * 94 * Traditionally the IOMMU core just handed us the mappings directly, 95 * after making sure the size is an order of a 4KiB page and that the 96 * mapping has natural alignment. 97 * 98 * To retain this behavior, we currently advertise that we support 99 * all page sizes that are an order of 4KiB. 100 * 101 * If at some point we'd like to utilize the IOMMU core's new behavior, 102 * we could change this to advertise the real page sizes we support. 103 */ 104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL) 105 106 static inline int agaw_to_level(int agaw) 107 { 108 return agaw + 2; 109 } 110 111 static inline int agaw_to_width(int agaw) 112 { 113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); 114 } 115 116 static inline int width_to_agaw(int width) 117 { 118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); 119 } 120 121 static inline unsigned int level_to_offset_bits(int level) 122 { 123 return (level - 1) * LEVEL_STRIDE; 124 } 125 126 static inline int pfn_level_offset(unsigned long pfn, int level) 127 { 128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; 129 } 130 131 static inline unsigned long level_mask(int level) 132 { 133 return -1UL << level_to_offset_bits(level); 134 } 135 136 static inline unsigned long level_size(int level) 137 { 138 return 1UL << level_to_offset_bits(level); 139 } 140 141 static inline unsigned long align_to_level(unsigned long pfn, int level) 142 { 143 return (pfn + level_size(level) - 1) & level_mask(level); 144 } 145 146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl) 147 { 148 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); 149 } 150 151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things 152 are never going to work. */ 153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 154 { 155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); 156 } 157 158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) 159 { 160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); 161 } 162 static inline unsigned long page_to_dma_pfn(struct page *pg) 163 { 164 return mm_to_dma_pfn(page_to_pfn(pg)); 165 } 166 static inline unsigned long virt_to_dma_pfn(void *p) 167 { 168 return page_to_dma_pfn(virt_to_page(p)); 169 } 170 171 /* global iommu list, set NULL for ignored DMAR units */ 172 static struct intel_iommu **g_iommus; 173 174 static void __init check_tylersburg_isoch(void); 175 static int rwbf_quirk; 176 177 /* 178 * set to 1 to panic kernel if can't successfully enable VT-d 179 * (used when kernel is launched w/ TXT) 180 */ 181 static int force_on = 0; 182 int intel_iommu_tboot_noforce; 183 static int no_platform_optin; 184 185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 186 187 /* 188 * Take a root_entry and return the Lower Context Table Pointer (LCTP) 189 * if marked present. 190 */ 191 static phys_addr_t root_entry_lctp(struct root_entry *re) 192 { 193 if (!(re->lo & 1)) 194 return 0; 195 196 return re->lo & VTD_PAGE_MASK; 197 } 198 199 /* 200 * Take a root_entry and return the Upper Context Table Pointer (UCTP) 201 * if marked present. 202 */ 203 static phys_addr_t root_entry_uctp(struct root_entry *re) 204 { 205 if (!(re->hi & 1)) 206 return 0; 207 208 return re->hi & VTD_PAGE_MASK; 209 } 210 211 static inline void context_clear_pasid_enable(struct context_entry *context) 212 { 213 context->lo &= ~(1ULL << 11); 214 } 215 216 static inline bool context_pasid_enabled(struct context_entry *context) 217 { 218 return !!(context->lo & (1ULL << 11)); 219 } 220 221 static inline void context_set_copied(struct context_entry *context) 222 { 223 context->hi |= (1ull << 3); 224 } 225 226 static inline bool context_copied(struct context_entry *context) 227 { 228 return !!(context->hi & (1ULL << 3)); 229 } 230 231 static inline bool __context_present(struct context_entry *context) 232 { 233 return (context->lo & 1); 234 } 235 236 bool context_present(struct context_entry *context) 237 { 238 return context_pasid_enabled(context) ? 239 __context_present(context) : 240 __context_present(context) && !context_copied(context); 241 } 242 243 static inline void context_set_present(struct context_entry *context) 244 { 245 context->lo |= 1; 246 } 247 248 static inline void context_set_fault_enable(struct context_entry *context) 249 { 250 context->lo &= (((u64)-1) << 2) | 1; 251 } 252 253 static inline void context_set_translation_type(struct context_entry *context, 254 unsigned long value) 255 { 256 context->lo &= (((u64)-1) << 4) | 3; 257 context->lo |= (value & 3) << 2; 258 } 259 260 static inline void context_set_address_root(struct context_entry *context, 261 unsigned long value) 262 { 263 context->lo &= ~VTD_PAGE_MASK; 264 context->lo |= value & VTD_PAGE_MASK; 265 } 266 267 static inline void context_set_address_width(struct context_entry *context, 268 unsigned long value) 269 { 270 context->hi |= value & 7; 271 } 272 273 static inline void context_set_domain_id(struct context_entry *context, 274 unsigned long value) 275 { 276 context->hi |= (value & ((1 << 16) - 1)) << 8; 277 } 278 279 static inline int context_domain_id(struct context_entry *c) 280 { 281 return((c->hi >> 8) & 0xffff); 282 } 283 284 static inline void context_clear_entry(struct context_entry *context) 285 { 286 context->lo = 0; 287 context->hi = 0; 288 } 289 290 /* 291 * This domain is a statically identity mapping domain. 292 * 1. This domain creats a static 1:1 mapping to all usable memory. 293 * 2. It maps to each iommu if successful. 294 * 3. Each iommu mapps to this domain if successful. 295 */ 296 static struct dmar_domain *si_domain; 297 static int hw_pass_through = 1; 298 299 #define for_each_domain_iommu(idx, domain) \ 300 for (idx = 0; idx < g_num_of_iommus; idx++) \ 301 if (domain->iommu_refcnt[idx]) 302 303 struct dmar_rmrr_unit { 304 struct list_head list; /* list of rmrr units */ 305 struct acpi_dmar_header *hdr; /* ACPI header */ 306 u64 base_address; /* reserved base address*/ 307 u64 end_address; /* reserved end address */ 308 struct dmar_dev_scope *devices; /* target devices */ 309 int devices_cnt; /* target device count */ 310 }; 311 312 struct dmar_atsr_unit { 313 struct list_head list; /* list of ATSR units */ 314 struct acpi_dmar_header *hdr; /* ACPI header */ 315 struct dmar_dev_scope *devices; /* target devices */ 316 int devices_cnt; /* target device count */ 317 u8 include_all:1; /* include all ports */ 318 }; 319 320 static LIST_HEAD(dmar_atsr_units); 321 static LIST_HEAD(dmar_rmrr_units); 322 323 #define for_each_rmrr_units(rmrr) \ 324 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 325 326 /* bitmap for indexing intel_iommus */ 327 static int g_num_of_iommus; 328 329 static void domain_exit(struct dmar_domain *domain); 330 static void domain_remove_dev_info(struct dmar_domain *domain); 331 static void dmar_remove_one_dev_info(struct device *dev); 332 static void __dmar_remove_one_dev_info(struct device_domain_info *info); 333 static int intel_iommu_attach_device(struct iommu_domain *domain, 334 struct device *dev); 335 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 336 dma_addr_t iova); 337 338 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON 339 int dmar_disabled = 0; 340 #else 341 int dmar_disabled = 1; 342 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */ 343 344 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON 345 int intel_iommu_sm = 1; 346 #else 347 int intel_iommu_sm; 348 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */ 349 350 int intel_iommu_enabled = 0; 351 EXPORT_SYMBOL_GPL(intel_iommu_enabled); 352 353 static int dmar_map_gfx = 1; 354 static int dmar_forcedac; 355 static int intel_iommu_strict; 356 static int intel_iommu_superpage = 1; 357 static int iommu_identity_mapping; 358 static int intel_no_bounce; 359 360 #define IDENTMAP_GFX 2 361 #define IDENTMAP_AZALIA 4 362 363 int intel_iommu_gfx_mapped; 364 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); 365 366 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 367 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) 368 struct device_domain_info *get_domain_info(struct device *dev) 369 { 370 struct device_domain_info *info; 371 372 if (!dev) 373 return NULL; 374 375 info = dev->archdata.iommu; 376 if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || 377 info == DEFER_DEVICE_DOMAIN_INFO)) 378 return NULL; 379 380 return info; 381 } 382 383 DEFINE_SPINLOCK(device_domain_lock); 384 static LIST_HEAD(device_domain_list); 385 386 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \ 387 to_pci_dev(d)->untrusted) 388 389 /* 390 * Iterate over elements in device_domain_list and call the specified 391 * callback @fn against each element. 392 */ 393 int for_each_device_domain(int (*fn)(struct device_domain_info *info, 394 void *data), void *data) 395 { 396 int ret = 0; 397 unsigned long flags; 398 struct device_domain_info *info; 399 400 spin_lock_irqsave(&device_domain_lock, flags); 401 list_for_each_entry(info, &device_domain_list, global) { 402 ret = fn(info, data); 403 if (ret) { 404 spin_unlock_irqrestore(&device_domain_lock, flags); 405 return ret; 406 } 407 } 408 spin_unlock_irqrestore(&device_domain_lock, flags); 409 410 return 0; 411 } 412 413 const struct iommu_ops intel_iommu_ops; 414 415 static bool translation_pre_enabled(struct intel_iommu *iommu) 416 { 417 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); 418 } 419 420 static void clear_translation_pre_enabled(struct intel_iommu *iommu) 421 { 422 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; 423 } 424 425 static void init_translation_status(struct intel_iommu *iommu) 426 { 427 u32 gsts; 428 429 gsts = readl(iommu->reg + DMAR_GSTS_REG); 430 if (gsts & DMA_GSTS_TES) 431 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; 432 } 433 434 static int __init intel_iommu_setup(char *str) 435 { 436 if (!str) 437 return -EINVAL; 438 while (*str) { 439 if (!strncmp(str, "on", 2)) { 440 dmar_disabled = 0; 441 pr_info("IOMMU enabled\n"); 442 } else if (!strncmp(str, "off", 3)) { 443 dmar_disabled = 1; 444 no_platform_optin = 1; 445 pr_info("IOMMU disabled\n"); 446 } else if (!strncmp(str, "igfx_off", 8)) { 447 dmar_map_gfx = 0; 448 pr_info("Disable GFX device mapping\n"); 449 } else if (!strncmp(str, "forcedac", 8)) { 450 pr_info("Forcing DAC for PCI devices\n"); 451 dmar_forcedac = 1; 452 } else if (!strncmp(str, "strict", 6)) { 453 pr_info("Disable batched IOTLB flush\n"); 454 intel_iommu_strict = 1; 455 } else if (!strncmp(str, "sp_off", 6)) { 456 pr_info("Disable supported super page\n"); 457 intel_iommu_superpage = 0; 458 } else if (!strncmp(str, "sm_on", 5)) { 459 pr_info("Intel-IOMMU: scalable mode supported\n"); 460 intel_iommu_sm = 1; 461 } else if (!strncmp(str, "tboot_noforce", 13)) { 462 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 463 intel_iommu_tboot_noforce = 1; 464 } else if (!strncmp(str, "nobounce", 8)) { 465 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); 466 intel_no_bounce = 1; 467 } 468 469 str += strcspn(str, ","); 470 while (*str == ',') 471 str++; 472 } 473 return 0; 474 } 475 __setup("intel_iommu=", intel_iommu_setup); 476 477 static struct kmem_cache *iommu_domain_cache; 478 static struct kmem_cache *iommu_devinfo_cache; 479 480 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did) 481 { 482 struct dmar_domain **domains; 483 int idx = did >> 8; 484 485 domains = iommu->domains[idx]; 486 if (!domains) 487 return NULL; 488 489 return domains[did & 0xff]; 490 } 491 492 static void set_iommu_domain(struct intel_iommu *iommu, u16 did, 493 struct dmar_domain *domain) 494 { 495 struct dmar_domain **domains; 496 int idx = did >> 8; 497 498 if (!iommu->domains[idx]) { 499 size_t size = 256 * sizeof(struct dmar_domain *); 500 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC); 501 } 502 503 domains = iommu->domains[idx]; 504 if (WARN_ON(!domains)) 505 return; 506 else 507 domains[did & 0xff] = domain; 508 } 509 510 void *alloc_pgtable_page(int node) 511 { 512 struct page *page; 513 void *vaddr = NULL; 514 515 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); 516 if (page) 517 vaddr = page_address(page); 518 return vaddr; 519 } 520 521 void free_pgtable_page(void *vaddr) 522 { 523 free_page((unsigned long)vaddr); 524 } 525 526 static inline void *alloc_domain_mem(void) 527 { 528 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); 529 } 530 531 static void free_domain_mem(void *vaddr) 532 { 533 kmem_cache_free(iommu_domain_cache, vaddr); 534 } 535 536 static inline void * alloc_devinfo_mem(void) 537 { 538 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); 539 } 540 541 static inline void free_devinfo_mem(void *vaddr) 542 { 543 kmem_cache_free(iommu_devinfo_cache, vaddr); 544 } 545 546 static inline int domain_type_is_si(struct dmar_domain *domain) 547 { 548 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; 549 } 550 551 static inline bool domain_use_first_level(struct dmar_domain *domain) 552 { 553 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL; 554 } 555 556 static inline int domain_pfn_supported(struct dmar_domain *domain, 557 unsigned long pfn) 558 { 559 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 560 561 return !(addr_width < BITS_PER_LONG && pfn >> addr_width); 562 } 563 564 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 565 { 566 unsigned long sagaw; 567 int agaw = -1; 568 569 sagaw = cap_sagaw(iommu->cap); 570 for (agaw = width_to_agaw(max_gaw); 571 agaw >= 0; agaw--) { 572 if (test_bit(agaw, &sagaw)) 573 break; 574 } 575 576 return agaw; 577 } 578 579 /* 580 * Calculate max SAGAW for each iommu. 581 */ 582 int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 583 { 584 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 585 } 586 587 /* 588 * calculate agaw for each iommu. 589 * "SAGAW" may be different across iommus, use a default agaw, and 590 * get a supported less agaw for iommus that don't support the default agaw. 591 */ 592 int iommu_calculate_agaw(struct intel_iommu *iommu) 593 { 594 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 595 } 596 597 /* This functionin only returns single iommu in a domain */ 598 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 599 { 600 int iommu_id; 601 602 /* si_domain and vm domain should not get here. */ 603 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA)) 604 return NULL; 605 606 for_each_domain_iommu(iommu_id, domain) 607 break; 608 609 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 610 return NULL; 611 612 return g_iommus[iommu_id]; 613 } 614 615 static void domain_update_iommu_coherency(struct dmar_domain *domain) 616 { 617 struct dmar_drhd_unit *drhd; 618 struct intel_iommu *iommu; 619 bool found = false; 620 int i; 621 622 domain->iommu_coherency = 1; 623 624 for_each_domain_iommu(i, domain) { 625 found = true; 626 if (!ecap_coherent(g_iommus[i]->ecap)) { 627 domain->iommu_coherency = 0; 628 break; 629 } 630 } 631 if (found) 632 return; 633 634 /* No hardware attached; use lowest common denominator */ 635 rcu_read_lock(); 636 for_each_active_iommu(iommu, drhd) { 637 if (!ecap_coherent(iommu->ecap)) { 638 domain->iommu_coherency = 0; 639 break; 640 } 641 } 642 rcu_read_unlock(); 643 } 644 645 static int domain_update_iommu_snooping(struct intel_iommu *skip) 646 { 647 struct dmar_drhd_unit *drhd; 648 struct intel_iommu *iommu; 649 int ret = 1; 650 651 rcu_read_lock(); 652 for_each_active_iommu(iommu, drhd) { 653 if (iommu != skip) { 654 if (!ecap_sc_support(iommu->ecap)) { 655 ret = 0; 656 break; 657 } 658 } 659 } 660 rcu_read_unlock(); 661 662 return ret; 663 } 664 665 static int domain_update_iommu_superpage(struct dmar_domain *domain, 666 struct intel_iommu *skip) 667 { 668 struct dmar_drhd_unit *drhd; 669 struct intel_iommu *iommu; 670 int mask = 0x3; 671 672 if (!intel_iommu_superpage) { 673 return 0; 674 } 675 676 /* set iommu_superpage to the smallest common denominator */ 677 rcu_read_lock(); 678 for_each_active_iommu(iommu, drhd) { 679 if (iommu != skip) { 680 if (domain && domain_use_first_level(domain)) { 681 if (!cap_fl1gp_support(iommu->cap)) 682 mask = 0x1; 683 } else { 684 mask &= cap_super_page_val(iommu->cap); 685 } 686 687 if (!mask) 688 break; 689 } 690 } 691 rcu_read_unlock(); 692 693 return fls(mask); 694 } 695 696 /* Some capabilities may be different across iommus */ 697 static void domain_update_iommu_cap(struct dmar_domain *domain) 698 { 699 domain_update_iommu_coherency(domain); 700 domain->iommu_snooping = domain_update_iommu_snooping(NULL); 701 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); 702 } 703 704 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 705 u8 devfn, int alloc) 706 { 707 struct root_entry *root = &iommu->root_entry[bus]; 708 struct context_entry *context; 709 u64 *entry; 710 711 entry = &root->lo; 712 if (sm_supported(iommu)) { 713 if (devfn >= 0x80) { 714 devfn -= 0x80; 715 entry = &root->hi; 716 } 717 devfn *= 2; 718 } 719 if (*entry & 1) 720 context = phys_to_virt(*entry & VTD_PAGE_MASK); 721 else { 722 unsigned long phy_addr; 723 if (!alloc) 724 return NULL; 725 726 context = alloc_pgtable_page(iommu->node); 727 if (!context) 728 return NULL; 729 730 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 731 phy_addr = virt_to_phys((void *)context); 732 *entry = phy_addr | 1; 733 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 734 } 735 return &context[devfn]; 736 } 737 738 static int iommu_dummy(struct device *dev) 739 { 740 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; 741 } 742 743 static bool attach_deferred(struct device *dev) 744 { 745 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; 746 } 747 748 /** 749 * is_downstream_to_pci_bridge - test if a device belongs to the PCI 750 * sub-hierarchy of a candidate PCI-PCI bridge 751 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy 752 * @bridge: the candidate PCI-PCI bridge 753 * 754 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. 755 */ 756 static bool 757 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) 758 { 759 struct pci_dev *pdev, *pbridge; 760 761 if (!dev_is_pci(dev) || !dev_is_pci(bridge)) 762 return false; 763 764 pdev = to_pci_dev(dev); 765 pbridge = to_pci_dev(bridge); 766 767 if (pbridge->subordinate && 768 pbridge->subordinate->number <= pdev->bus->number && 769 pbridge->subordinate->busn_res.end >= pdev->bus->number) 770 return true; 771 772 return false; 773 } 774 775 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) 776 { 777 struct dmar_drhd_unit *drhd = NULL; 778 struct intel_iommu *iommu; 779 struct device *tmp; 780 struct pci_dev *pdev = NULL; 781 u16 segment = 0; 782 int i; 783 784 if (iommu_dummy(dev)) 785 return NULL; 786 787 if (dev_is_pci(dev)) { 788 struct pci_dev *pf_pdev; 789 790 pdev = pci_real_dma_dev(to_pci_dev(dev)); 791 792 /* VFs aren't listed in scope tables; we need to look up 793 * the PF instead to find the IOMMU. */ 794 pf_pdev = pci_physfn(pdev); 795 dev = &pf_pdev->dev; 796 segment = pci_domain_nr(pdev->bus); 797 } else if (has_acpi_companion(dev)) 798 dev = &ACPI_COMPANION(dev)->dev; 799 800 rcu_read_lock(); 801 for_each_active_iommu(iommu, drhd) { 802 if (pdev && segment != drhd->segment) 803 continue; 804 805 for_each_active_dev_scope(drhd->devices, 806 drhd->devices_cnt, i, tmp) { 807 if (tmp == dev) { 808 /* For a VF use its original BDF# not that of the PF 809 * which we used for the IOMMU lookup. Strictly speaking 810 * we could do this for all PCI devices; we only need to 811 * get the BDF# from the scope table for ACPI matches. */ 812 if (pdev && pdev->is_virtfn) 813 goto got_pdev; 814 815 *bus = drhd->devices[i].bus; 816 *devfn = drhd->devices[i].devfn; 817 goto out; 818 } 819 820 if (is_downstream_to_pci_bridge(dev, tmp)) 821 goto got_pdev; 822 } 823 824 if (pdev && drhd->include_all) { 825 got_pdev: 826 *bus = pdev->bus->number; 827 *devfn = pdev->devfn; 828 goto out; 829 } 830 } 831 iommu = NULL; 832 out: 833 rcu_read_unlock(); 834 835 return iommu; 836 } 837 838 static void domain_flush_cache(struct dmar_domain *domain, 839 void *addr, int size) 840 { 841 if (!domain->iommu_coherency) 842 clflush_cache_range(addr, size); 843 } 844 845 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) 846 { 847 struct context_entry *context; 848 int ret = 0; 849 unsigned long flags; 850 851 spin_lock_irqsave(&iommu->lock, flags); 852 context = iommu_context_addr(iommu, bus, devfn, 0); 853 if (context) 854 ret = context_present(context); 855 spin_unlock_irqrestore(&iommu->lock, flags); 856 return ret; 857 } 858 859 static void free_context_table(struct intel_iommu *iommu) 860 { 861 int i; 862 unsigned long flags; 863 struct context_entry *context; 864 865 spin_lock_irqsave(&iommu->lock, flags); 866 if (!iommu->root_entry) { 867 goto out; 868 } 869 for (i = 0; i < ROOT_ENTRY_NR; i++) { 870 context = iommu_context_addr(iommu, i, 0, 0); 871 if (context) 872 free_pgtable_page(context); 873 874 if (!sm_supported(iommu)) 875 continue; 876 877 context = iommu_context_addr(iommu, i, 0x80, 0); 878 if (context) 879 free_pgtable_page(context); 880 881 } 882 free_pgtable_page(iommu->root_entry); 883 iommu->root_entry = NULL; 884 out: 885 spin_unlock_irqrestore(&iommu->lock, flags); 886 } 887 888 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 889 unsigned long pfn, int *target_level) 890 { 891 struct dma_pte *parent, *pte; 892 int level = agaw_to_level(domain->agaw); 893 int offset; 894 895 BUG_ON(!domain->pgd); 896 897 if (!domain_pfn_supported(domain, pfn)) 898 /* Address beyond IOMMU's addressing capabilities. */ 899 return NULL; 900 901 parent = domain->pgd; 902 903 while (1) { 904 void *tmp_page; 905 906 offset = pfn_level_offset(pfn, level); 907 pte = &parent[offset]; 908 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) 909 break; 910 if (level == *target_level) 911 break; 912 913 if (!dma_pte_present(pte)) { 914 uint64_t pteval; 915 916 tmp_page = alloc_pgtable_page(domain->nid); 917 918 if (!tmp_page) 919 return NULL; 920 921 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 922 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 923 if (domain_use_first_level(domain)) 924 pteval |= DMA_FL_PTE_XD; 925 if (cmpxchg64(&pte->val, 0ULL, pteval)) 926 /* Someone else set it while we were thinking; use theirs. */ 927 free_pgtable_page(tmp_page); 928 else 929 domain_flush_cache(domain, pte, sizeof(*pte)); 930 } 931 if (level == 1) 932 break; 933 934 parent = phys_to_virt(dma_pte_addr(pte)); 935 level--; 936 } 937 938 if (!*target_level) 939 *target_level = level; 940 941 return pte; 942 } 943 944 /* return address's pte at specific level */ 945 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 946 unsigned long pfn, 947 int level, int *large_page) 948 { 949 struct dma_pte *parent, *pte; 950 int total = agaw_to_level(domain->agaw); 951 int offset; 952 953 parent = domain->pgd; 954 while (level <= total) { 955 offset = pfn_level_offset(pfn, total); 956 pte = &parent[offset]; 957 if (level == total) 958 return pte; 959 960 if (!dma_pte_present(pte)) { 961 *large_page = total; 962 break; 963 } 964 965 if (dma_pte_superpage(pte)) { 966 *large_page = total; 967 return pte; 968 } 969 970 parent = phys_to_virt(dma_pte_addr(pte)); 971 total--; 972 } 973 return NULL; 974 } 975 976 /* clear last level pte, a tlb flush should be followed */ 977 static void dma_pte_clear_range(struct dmar_domain *domain, 978 unsigned long start_pfn, 979 unsigned long last_pfn) 980 { 981 unsigned int large_page; 982 struct dma_pte *first_pte, *pte; 983 984 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 985 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 986 BUG_ON(start_pfn > last_pfn); 987 988 /* we don't need lock here; nobody else touches the iova range */ 989 do { 990 large_page = 1; 991 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); 992 if (!pte) { 993 start_pfn = align_to_level(start_pfn + 1, large_page + 1); 994 continue; 995 } 996 do { 997 dma_clear_pte(pte); 998 start_pfn += lvl_to_nr_pages(large_page); 999 pte++; 1000 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 1001 1002 domain_flush_cache(domain, first_pte, 1003 (void *)pte - (void *)first_pte); 1004 1005 } while (start_pfn && start_pfn <= last_pfn); 1006 } 1007 1008 static void dma_pte_free_level(struct dmar_domain *domain, int level, 1009 int retain_level, struct dma_pte *pte, 1010 unsigned long pfn, unsigned long start_pfn, 1011 unsigned long last_pfn) 1012 { 1013 pfn = max(start_pfn, pfn); 1014 pte = &pte[pfn_level_offset(pfn, level)]; 1015 1016 do { 1017 unsigned long level_pfn; 1018 struct dma_pte *level_pte; 1019 1020 if (!dma_pte_present(pte) || dma_pte_superpage(pte)) 1021 goto next; 1022 1023 level_pfn = pfn & level_mask(level); 1024 level_pte = phys_to_virt(dma_pte_addr(pte)); 1025 1026 if (level > 2) { 1027 dma_pte_free_level(domain, level - 1, retain_level, 1028 level_pte, level_pfn, start_pfn, 1029 last_pfn); 1030 } 1031 1032 /* 1033 * Free the page table if we're below the level we want to 1034 * retain and the range covers the entire table. 1035 */ 1036 if (level < retain_level && !(start_pfn > level_pfn || 1037 last_pfn < level_pfn + level_size(level) - 1)) { 1038 dma_clear_pte(pte); 1039 domain_flush_cache(domain, pte, sizeof(*pte)); 1040 free_pgtable_page(level_pte); 1041 } 1042 next: 1043 pfn += level_size(level); 1044 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1045 } 1046 1047 /* 1048 * clear last level (leaf) ptes and free page table pages below the 1049 * level we wish to keep intact. 1050 */ 1051 static void dma_pte_free_pagetable(struct dmar_domain *domain, 1052 unsigned long start_pfn, 1053 unsigned long last_pfn, 1054 int retain_level) 1055 { 1056 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1057 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1058 BUG_ON(start_pfn > last_pfn); 1059 1060 dma_pte_clear_range(domain, start_pfn, last_pfn); 1061 1062 /* We don't need lock here; nobody else touches the iova range */ 1063 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level, 1064 domain->pgd, 0, start_pfn, last_pfn); 1065 1066 /* free pgd */ 1067 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1068 free_pgtable_page(domain->pgd); 1069 domain->pgd = NULL; 1070 } 1071 } 1072 1073 /* When a page at a given level is being unlinked from its parent, we don't 1074 need to *modify* it at all. All we need to do is make a list of all the 1075 pages which can be freed just as soon as we've flushed the IOTLB and we 1076 know the hardware page-walk will no longer touch them. 1077 The 'pte' argument is the *parent* PTE, pointing to the page that is to 1078 be freed. */ 1079 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, 1080 int level, struct dma_pte *pte, 1081 struct page *freelist) 1082 { 1083 struct page *pg; 1084 1085 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); 1086 pg->freelist = freelist; 1087 freelist = pg; 1088 1089 if (level == 1) 1090 return freelist; 1091 1092 pte = page_address(pg); 1093 do { 1094 if (dma_pte_present(pte) && !dma_pte_superpage(pte)) 1095 freelist = dma_pte_list_pagetables(domain, level - 1, 1096 pte, freelist); 1097 pte++; 1098 } while (!first_pte_in_page(pte)); 1099 1100 return freelist; 1101 } 1102 1103 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, 1104 struct dma_pte *pte, unsigned long pfn, 1105 unsigned long start_pfn, 1106 unsigned long last_pfn, 1107 struct page *freelist) 1108 { 1109 struct dma_pte *first_pte = NULL, *last_pte = NULL; 1110 1111 pfn = max(start_pfn, pfn); 1112 pte = &pte[pfn_level_offset(pfn, level)]; 1113 1114 do { 1115 unsigned long level_pfn; 1116 1117 if (!dma_pte_present(pte)) 1118 goto next; 1119 1120 level_pfn = pfn & level_mask(level); 1121 1122 /* If range covers entire pagetable, free it */ 1123 if (start_pfn <= level_pfn && 1124 last_pfn >= level_pfn + level_size(level) - 1) { 1125 /* These suborbinate page tables are going away entirely. Don't 1126 bother to clear them; we're just going to *free* them. */ 1127 if (level > 1 && !dma_pte_superpage(pte)) 1128 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1129 1130 dma_clear_pte(pte); 1131 if (!first_pte) 1132 first_pte = pte; 1133 last_pte = pte; 1134 } else if (level > 1) { 1135 /* Recurse down into a level that isn't *entirely* obsolete */ 1136 freelist = dma_pte_clear_level(domain, level - 1, 1137 phys_to_virt(dma_pte_addr(pte)), 1138 level_pfn, start_pfn, last_pfn, 1139 freelist); 1140 } 1141 next: 1142 pfn += level_size(level); 1143 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1144 1145 if (first_pte) 1146 domain_flush_cache(domain, first_pte, 1147 (void *)++last_pte - (void *)first_pte); 1148 1149 return freelist; 1150 } 1151 1152 /* We can't just free the pages because the IOMMU may still be walking 1153 the page tables, and may have cached the intermediate levels. The 1154 pages can only be freed after the IOTLB flush has been done. */ 1155 static struct page *domain_unmap(struct dmar_domain *domain, 1156 unsigned long start_pfn, 1157 unsigned long last_pfn) 1158 { 1159 struct page *freelist; 1160 1161 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1162 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1163 BUG_ON(start_pfn > last_pfn); 1164 1165 /* we don't need lock here; nobody else touches the iova range */ 1166 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), 1167 domain->pgd, 0, start_pfn, last_pfn, NULL); 1168 1169 /* free pgd */ 1170 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1171 struct page *pgd_page = virt_to_page(domain->pgd); 1172 pgd_page->freelist = freelist; 1173 freelist = pgd_page; 1174 1175 domain->pgd = NULL; 1176 } 1177 1178 return freelist; 1179 } 1180 1181 static void dma_free_pagelist(struct page *freelist) 1182 { 1183 struct page *pg; 1184 1185 while ((pg = freelist)) { 1186 freelist = pg->freelist; 1187 free_pgtable_page(page_address(pg)); 1188 } 1189 } 1190 1191 static void iova_entry_free(unsigned long data) 1192 { 1193 struct page *freelist = (struct page *)data; 1194 1195 dma_free_pagelist(freelist); 1196 } 1197 1198 /* iommu handling */ 1199 static int iommu_alloc_root_entry(struct intel_iommu *iommu) 1200 { 1201 struct root_entry *root; 1202 unsigned long flags; 1203 1204 root = (struct root_entry *)alloc_pgtable_page(iommu->node); 1205 if (!root) { 1206 pr_err("Allocating root entry for %s failed\n", 1207 iommu->name); 1208 return -ENOMEM; 1209 } 1210 1211 __iommu_flush_cache(iommu, root, ROOT_SIZE); 1212 1213 spin_lock_irqsave(&iommu->lock, flags); 1214 iommu->root_entry = root; 1215 spin_unlock_irqrestore(&iommu->lock, flags); 1216 1217 return 0; 1218 } 1219 1220 static void iommu_set_root_entry(struct intel_iommu *iommu) 1221 { 1222 u64 addr; 1223 u32 sts; 1224 unsigned long flag; 1225 1226 addr = virt_to_phys(iommu->root_entry); 1227 if (sm_supported(iommu)) 1228 addr |= DMA_RTADDR_SMT; 1229 1230 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1231 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 1232 1233 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 1234 1235 /* Make sure hardware complete it */ 1236 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1237 readl, (sts & DMA_GSTS_RTPS), sts); 1238 1239 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1240 } 1241 1242 void iommu_flush_write_buffer(struct intel_iommu *iommu) 1243 { 1244 u32 val; 1245 unsigned long flag; 1246 1247 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 1248 return; 1249 1250 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1251 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 1252 1253 /* Make sure hardware complete it */ 1254 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1255 readl, (!(val & DMA_GSTS_WBFS)), val); 1256 1257 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1258 } 1259 1260 /* return value determine if we need a write buffer flush */ 1261 static void __iommu_flush_context(struct intel_iommu *iommu, 1262 u16 did, u16 source_id, u8 function_mask, 1263 u64 type) 1264 { 1265 u64 val = 0; 1266 unsigned long flag; 1267 1268 switch (type) { 1269 case DMA_CCMD_GLOBAL_INVL: 1270 val = DMA_CCMD_GLOBAL_INVL; 1271 break; 1272 case DMA_CCMD_DOMAIN_INVL: 1273 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 1274 break; 1275 case DMA_CCMD_DEVICE_INVL: 1276 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 1277 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 1278 break; 1279 default: 1280 BUG(); 1281 } 1282 val |= DMA_CCMD_ICC; 1283 1284 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1285 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 1286 1287 /* Make sure hardware complete it */ 1288 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 1289 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 1290 1291 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1292 } 1293 1294 /* return value determine if we need a write buffer flush */ 1295 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 1296 u64 addr, unsigned int size_order, u64 type) 1297 { 1298 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 1299 u64 val = 0, val_iva = 0; 1300 unsigned long flag; 1301 1302 switch (type) { 1303 case DMA_TLB_GLOBAL_FLUSH: 1304 /* global flush doesn't need set IVA_REG */ 1305 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 1306 break; 1307 case DMA_TLB_DSI_FLUSH: 1308 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1309 break; 1310 case DMA_TLB_PSI_FLUSH: 1311 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1312 /* IH bit is passed in as part of address */ 1313 val_iva = size_order | addr; 1314 break; 1315 default: 1316 BUG(); 1317 } 1318 /* Note: set drain read/write */ 1319 #if 0 1320 /* 1321 * This is probably to be super secure.. Looks like we can 1322 * ignore it without any impact. 1323 */ 1324 if (cap_read_drain(iommu->cap)) 1325 val |= DMA_TLB_READ_DRAIN; 1326 #endif 1327 if (cap_write_drain(iommu->cap)) 1328 val |= DMA_TLB_WRITE_DRAIN; 1329 1330 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1331 /* Note: Only uses first TLB reg currently */ 1332 if (val_iva) 1333 dmar_writeq(iommu->reg + tlb_offset, val_iva); 1334 dmar_writeq(iommu->reg + tlb_offset + 8, val); 1335 1336 /* Make sure hardware complete it */ 1337 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 1338 dmar_readq, (!(val & DMA_TLB_IVT)), val); 1339 1340 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1341 1342 /* check IOTLB invalidation granularity */ 1343 if (DMA_TLB_IAIG(val) == 0) 1344 pr_err("Flush IOTLB failed\n"); 1345 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 1346 pr_debug("TLB flush request %Lx, actual %Lx\n", 1347 (unsigned long long)DMA_TLB_IIRG(type), 1348 (unsigned long long)DMA_TLB_IAIG(val)); 1349 } 1350 1351 static struct device_domain_info * 1352 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, 1353 u8 bus, u8 devfn) 1354 { 1355 struct device_domain_info *info; 1356 1357 assert_spin_locked(&device_domain_lock); 1358 1359 if (!iommu->qi) 1360 return NULL; 1361 1362 list_for_each_entry(info, &domain->devices, link) 1363 if (info->iommu == iommu && info->bus == bus && 1364 info->devfn == devfn) { 1365 if (info->ats_supported && info->dev) 1366 return info; 1367 break; 1368 } 1369 1370 return NULL; 1371 } 1372 1373 static void domain_update_iotlb(struct dmar_domain *domain) 1374 { 1375 struct device_domain_info *info; 1376 bool has_iotlb_device = false; 1377 1378 assert_spin_locked(&device_domain_lock); 1379 1380 list_for_each_entry(info, &domain->devices, link) { 1381 struct pci_dev *pdev; 1382 1383 if (!info->dev || !dev_is_pci(info->dev)) 1384 continue; 1385 1386 pdev = to_pci_dev(info->dev); 1387 if (pdev->ats_enabled) { 1388 has_iotlb_device = true; 1389 break; 1390 } 1391 } 1392 1393 domain->has_iotlb_device = has_iotlb_device; 1394 } 1395 1396 static void iommu_enable_dev_iotlb(struct device_domain_info *info) 1397 { 1398 struct pci_dev *pdev; 1399 1400 assert_spin_locked(&device_domain_lock); 1401 1402 if (!info || !dev_is_pci(info->dev)) 1403 return; 1404 1405 pdev = to_pci_dev(info->dev); 1406 /* For IOMMU that supports device IOTLB throttling (DIT), we assign 1407 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge 1408 * queue depth at PF level. If DIT is not set, PFSID will be treated as 1409 * reserved, which should be set to 0. 1410 */ 1411 if (!ecap_dit(info->iommu->ecap)) 1412 info->pfsid = 0; 1413 else { 1414 struct pci_dev *pf_pdev; 1415 1416 /* pdev will be returned if device is not a vf */ 1417 pf_pdev = pci_physfn(pdev); 1418 info->pfsid = pci_dev_id(pf_pdev); 1419 } 1420 1421 #ifdef CONFIG_INTEL_IOMMU_SVM 1422 /* The PCIe spec, in its wisdom, declares that the behaviour of 1423 the device if you enable PASID support after ATS support is 1424 undefined. So always enable PASID support on devices which 1425 have it, even if we can't yet know if we're ever going to 1426 use it. */ 1427 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) 1428 info->pasid_enabled = 1; 1429 1430 if (info->pri_supported && 1431 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && 1432 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) 1433 info->pri_enabled = 1; 1434 #endif 1435 if (info->ats_supported && pci_ats_page_aligned(pdev) && 1436 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { 1437 info->ats_enabled = 1; 1438 domain_update_iotlb(info->domain); 1439 info->ats_qdep = pci_ats_queue_depth(pdev); 1440 } 1441 } 1442 1443 static void iommu_disable_dev_iotlb(struct device_domain_info *info) 1444 { 1445 struct pci_dev *pdev; 1446 1447 assert_spin_locked(&device_domain_lock); 1448 1449 if (!dev_is_pci(info->dev)) 1450 return; 1451 1452 pdev = to_pci_dev(info->dev); 1453 1454 if (info->ats_enabled) { 1455 pci_disable_ats(pdev); 1456 info->ats_enabled = 0; 1457 domain_update_iotlb(info->domain); 1458 } 1459 #ifdef CONFIG_INTEL_IOMMU_SVM 1460 if (info->pri_enabled) { 1461 pci_disable_pri(pdev); 1462 info->pri_enabled = 0; 1463 } 1464 if (info->pasid_enabled) { 1465 pci_disable_pasid(pdev); 1466 info->pasid_enabled = 0; 1467 } 1468 #endif 1469 } 1470 1471 static void iommu_flush_dev_iotlb(struct dmar_domain *domain, 1472 u64 addr, unsigned mask) 1473 { 1474 u16 sid, qdep; 1475 unsigned long flags; 1476 struct device_domain_info *info; 1477 1478 if (!domain->has_iotlb_device) 1479 return; 1480 1481 spin_lock_irqsave(&device_domain_lock, flags); 1482 list_for_each_entry(info, &domain->devices, link) { 1483 if (!info->ats_enabled) 1484 continue; 1485 1486 sid = info->bus << 8 | info->devfn; 1487 qdep = info->ats_qdep; 1488 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 1489 qdep, addr, mask); 1490 } 1491 spin_unlock_irqrestore(&device_domain_lock, flags); 1492 } 1493 1494 static void domain_flush_piotlb(struct intel_iommu *iommu, 1495 struct dmar_domain *domain, 1496 u64 addr, unsigned long npages, bool ih) 1497 { 1498 u16 did = domain->iommu_did[iommu->seq_id]; 1499 1500 if (domain->default_pasid) 1501 qi_flush_piotlb(iommu, did, domain->default_pasid, 1502 addr, npages, ih); 1503 1504 if (!list_empty(&domain->devices)) 1505 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih); 1506 } 1507 1508 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, 1509 struct dmar_domain *domain, 1510 unsigned long pfn, unsigned int pages, 1511 int ih, int map) 1512 { 1513 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1514 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1515 u16 did = domain->iommu_did[iommu->seq_id]; 1516 1517 BUG_ON(pages == 0); 1518 1519 if (ih) 1520 ih = 1 << 6; 1521 1522 if (domain_use_first_level(domain)) { 1523 domain_flush_piotlb(iommu, domain, addr, pages, ih); 1524 } else { 1525 /* 1526 * Fallback to domain selective flush if no PSI support or 1527 * the size is too big. PSI requires page size to be 2 ^ x, 1528 * and the base address is naturally aligned to the size. 1529 */ 1530 if (!cap_pgsel_inv(iommu->cap) || 1531 mask > cap_max_amask_val(iommu->cap)) 1532 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1533 DMA_TLB_DSI_FLUSH); 1534 else 1535 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, 1536 DMA_TLB_PSI_FLUSH); 1537 } 1538 1539 /* 1540 * In caching mode, changes of pages from non-present to present require 1541 * flush. However, device IOTLB doesn't need to be flushed in this case. 1542 */ 1543 if (!cap_caching_mode(iommu->cap) || !map) 1544 iommu_flush_dev_iotlb(domain, addr, mask); 1545 } 1546 1547 /* Notification for newly created mappings */ 1548 static inline void __mapping_notify_one(struct intel_iommu *iommu, 1549 struct dmar_domain *domain, 1550 unsigned long pfn, unsigned int pages) 1551 { 1552 /* 1553 * It's a non-present to present mapping. Only flush if caching mode 1554 * and second level. 1555 */ 1556 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain)) 1557 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); 1558 else 1559 iommu_flush_write_buffer(iommu); 1560 } 1561 1562 static void iommu_flush_iova(struct iova_domain *iovad) 1563 { 1564 struct dmar_domain *domain; 1565 int idx; 1566 1567 domain = container_of(iovad, struct dmar_domain, iovad); 1568 1569 for_each_domain_iommu(idx, domain) { 1570 struct intel_iommu *iommu = g_iommus[idx]; 1571 u16 did = domain->iommu_did[iommu->seq_id]; 1572 1573 if (domain_use_first_level(domain)) 1574 domain_flush_piotlb(iommu, domain, 0, -1, 0); 1575 else 1576 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1577 DMA_TLB_DSI_FLUSH); 1578 1579 if (!cap_caching_mode(iommu->cap)) 1580 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), 1581 0, MAX_AGAW_PFN_WIDTH); 1582 } 1583 } 1584 1585 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1586 { 1587 u32 pmen; 1588 unsigned long flags; 1589 1590 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) 1591 return; 1592 1593 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1594 pmen = readl(iommu->reg + DMAR_PMEN_REG); 1595 pmen &= ~DMA_PMEN_EPM; 1596 writel(pmen, iommu->reg + DMAR_PMEN_REG); 1597 1598 /* wait for the protected region status bit to clear */ 1599 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 1600 readl, !(pmen & DMA_PMEN_PRS), pmen); 1601 1602 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1603 } 1604 1605 static void iommu_enable_translation(struct intel_iommu *iommu) 1606 { 1607 u32 sts; 1608 unsigned long flags; 1609 1610 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1611 iommu->gcmd |= DMA_GCMD_TE; 1612 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1613 1614 /* Make sure hardware complete it */ 1615 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1616 readl, (sts & DMA_GSTS_TES), sts); 1617 1618 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1619 } 1620 1621 static void iommu_disable_translation(struct intel_iommu *iommu) 1622 { 1623 u32 sts; 1624 unsigned long flag; 1625 1626 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1627 iommu->gcmd &= ~DMA_GCMD_TE; 1628 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1629 1630 /* Make sure hardware complete it */ 1631 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1632 readl, (!(sts & DMA_GSTS_TES)), sts); 1633 1634 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1635 } 1636 1637 static int iommu_init_domains(struct intel_iommu *iommu) 1638 { 1639 u32 ndomains, nlongs; 1640 size_t size; 1641 1642 ndomains = cap_ndoms(iommu->cap); 1643 pr_debug("%s: Number of Domains supported <%d>\n", 1644 iommu->name, ndomains); 1645 nlongs = BITS_TO_LONGS(ndomains); 1646 1647 spin_lock_init(&iommu->lock); 1648 1649 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1650 if (!iommu->domain_ids) { 1651 pr_err("%s: Allocating domain id array failed\n", 1652 iommu->name); 1653 return -ENOMEM; 1654 } 1655 1656 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **); 1657 iommu->domains = kzalloc(size, GFP_KERNEL); 1658 1659 if (iommu->domains) { 1660 size = 256 * sizeof(struct dmar_domain *); 1661 iommu->domains[0] = kzalloc(size, GFP_KERNEL); 1662 } 1663 1664 if (!iommu->domains || !iommu->domains[0]) { 1665 pr_err("%s: Allocating domain array failed\n", 1666 iommu->name); 1667 kfree(iommu->domain_ids); 1668 kfree(iommu->domains); 1669 iommu->domain_ids = NULL; 1670 iommu->domains = NULL; 1671 return -ENOMEM; 1672 } 1673 1674 /* 1675 * If Caching mode is set, then invalid translations are tagged 1676 * with domain-id 0, hence we need to pre-allocate it. We also 1677 * use domain-id 0 as a marker for non-allocated domain-id, so 1678 * make sure it is not used for a real domain. 1679 */ 1680 set_bit(0, iommu->domain_ids); 1681 1682 /* 1683 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid 1684 * entry for first-level or pass-through translation modes should 1685 * be programmed with a domain id different from those used for 1686 * second-level or nested translation. We reserve a domain id for 1687 * this purpose. 1688 */ 1689 if (sm_supported(iommu)) 1690 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); 1691 1692 return 0; 1693 } 1694 1695 static void disable_dmar_iommu(struct intel_iommu *iommu) 1696 { 1697 struct device_domain_info *info, *tmp; 1698 unsigned long flags; 1699 1700 if (!iommu->domains || !iommu->domain_ids) 1701 return; 1702 1703 spin_lock_irqsave(&device_domain_lock, flags); 1704 list_for_each_entry_safe(info, tmp, &device_domain_list, global) { 1705 if (info->iommu != iommu) 1706 continue; 1707 1708 if (!info->dev || !info->domain) 1709 continue; 1710 1711 __dmar_remove_one_dev_info(info); 1712 } 1713 spin_unlock_irqrestore(&device_domain_lock, flags); 1714 1715 if (iommu->gcmd & DMA_GCMD_TE) 1716 iommu_disable_translation(iommu); 1717 } 1718 1719 static void free_dmar_iommu(struct intel_iommu *iommu) 1720 { 1721 if ((iommu->domains) && (iommu->domain_ids)) { 1722 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8; 1723 int i; 1724 1725 for (i = 0; i < elems; i++) 1726 kfree(iommu->domains[i]); 1727 kfree(iommu->domains); 1728 kfree(iommu->domain_ids); 1729 iommu->domains = NULL; 1730 iommu->domain_ids = NULL; 1731 } 1732 1733 g_iommus[iommu->seq_id] = NULL; 1734 1735 /* free context mapping */ 1736 free_context_table(iommu); 1737 1738 #ifdef CONFIG_INTEL_IOMMU_SVM 1739 if (pasid_supported(iommu)) { 1740 if (ecap_prs(iommu->ecap)) 1741 intel_svm_finish_prq(iommu); 1742 } 1743 if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap)) 1744 ioasid_unregister_allocator(&iommu->pasid_allocator); 1745 1746 #endif 1747 } 1748 1749 /* 1750 * Check and return whether first level is used by default for 1751 * DMA translation. 1752 */ 1753 static bool first_level_by_default(void) 1754 { 1755 struct dmar_drhd_unit *drhd; 1756 struct intel_iommu *iommu; 1757 static int first_level_support = -1; 1758 1759 if (likely(first_level_support != -1)) 1760 return first_level_support; 1761 1762 first_level_support = 1; 1763 1764 rcu_read_lock(); 1765 for_each_active_iommu(iommu, drhd) { 1766 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) { 1767 first_level_support = 0; 1768 break; 1769 } 1770 } 1771 rcu_read_unlock(); 1772 1773 return first_level_support; 1774 } 1775 1776 static struct dmar_domain *alloc_domain(int flags) 1777 { 1778 struct dmar_domain *domain; 1779 1780 domain = alloc_domain_mem(); 1781 if (!domain) 1782 return NULL; 1783 1784 memset(domain, 0, sizeof(*domain)); 1785 domain->nid = NUMA_NO_NODE; 1786 domain->flags = flags; 1787 if (first_level_by_default()) 1788 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; 1789 domain->has_iotlb_device = false; 1790 INIT_LIST_HEAD(&domain->devices); 1791 1792 return domain; 1793 } 1794 1795 /* Must be called with iommu->lock */ 1796 static int domain_attach_iommu(struct dmar_domain *domain, 1797 struct intel_iommu *iommu) 1798 { 1799 unsigned long ndomains; 1800 int num; 1801 1802 assert_spin_locked(&device_domain_lock); 1803 assert_spin_locked(&iommu->lock); 1804 1805 domain->iommu_refcnt[iommu->seq_id] += 1; 1806 domain->iommu_count += 1; 1807 if (domain->iommu_refcnt[iommu->seq_id] == 1) { 1808 ndomains = cap_ndoms(iommu->cap); 1809 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1810 1811 if (num >= ndomains) { 1812 pr_err("%s: No free domain ids\n", iommu->name); 1813 domain->iommu_refcnt[iommu->seq_id] -= 1; 1814 domain->iommu_count -= 1; 1815 return -ENOSPC; 1816 } 1817 1818 set_bit(num, iommu->domain_ids); 1819 set_iommu_domain(iommu, num, domain); 1820 1821 domain->iommu_did[iommu->seq_id] = num; 1822 domain->nid = iommu->node; 1823 1824 domain_update_iommu_cap(domain); 1825 } 1826 1827 return 0; 1828 } 1829 1830 static int domain_detach_iommu(struct dmar_domain *domain, 1831 struct intel_iommu *iommu) 1832 { 1833 int num, count; 1834 1835 assert_spin_locked(&device_domain_lock); 1836 assert_spin_locked(&iommu->lock); 1837 1838 domain->iommu_refcnt[iommu->seq_id] -= 1; 1839 count = --domain->iommu_count; 1840 if (domain->iommu_refcnt[iommu->seq_id] == 0) { 1841 num = domain->iommu_did[iommu->seq_id]; 1842 clear_bit(num, iommu->domain_ids); 1843 set_iommu_domain(iommu, num, NULL); 1844 1845 domain_update_iommu_cap(domain); 1846 domain->iommu_did[iommu->seq_id] = 0; 1847 } 1848 1849 return count; 1850 } 1851 1852 static struct iova_domain reserved_iova_list; 1853 static struct lock_class_key reserved_rbtree_key; 1854 1855 static int dmar_init_reserved_ranges(void) 1856 { 1857 struct pci_dev *pdev = NULL; 1858 struct iova *iova; 1859 int i; 1860 1861 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); 1862 1863 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1864 &reserved_rbtree_key); 1865 1866 /* IOAPIC ranges shouldn't be accessed by DMA */ 1867 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1868 IOVA_PFN(IOAPIC_RANGE_END)); 1869 if (!iova) { 1870 pr_err("Reserve IOAPIC range failed\n"); 1871 return -ENODEV; 1872 } 1873 1874 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1875 for_each_pci_dev(pdev) { 1876 struct resource *r; 1877 1878 for (i = 0; i < PCI_NUM_RESOURCES; i++) { 1879 r = &pdev->resource[i]; 1880 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1881 continue; 1882 iova = reserve_iova(&reserved_iova_list, 1883 IOVA_PFN(r->start), 1884 IOVA_PFN(r->end)); 1885 if (!iova) { 1886 pci_err(pdev, "Reserve iova for %pR failed\n", r); 1887 return -ENODEV; 1888 } 1889 } 1890 } 1891 return 0; 1892 } 1893 1894 static inline int guestwidth_to_adjustwidth(int gaw) 1895 { 1896 int agaw; 1897 int r = (gaw - 12) % 9; 1898 1899 if (r == 0) 1900 agaw = gaw; 1901 else 1902 agaw = gaw + 9 - r; 1903 if (agaw > 64) 1904 agaw = 64; 1905 return agaw; 1906 } 1907 1908 static void domain_exit(struct dmar_domain *domain) 1909 { 1910 1911 /* Remove associated devices and clear attached or cached domains */ 1912 domain_remove_dev_info(domain); 1913 1914 /* destroy iovas */ 1915 if (domain->domain.type == IOMMU_DOMAIN_DMA) 1916 put_iova_domain(&domain->iovad); 1917 1918 if (domain->pgd) { 1919 struct page *freelist; 1920 1921 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1922 dma_free_pagelist(freelist); 1923 } 1924 1925 free_domain_mem(domain); 1926 } 1927 1928 /* 1929 * Get the PASID directory size for scalable mode context entry. 1930 * Value of X in the PDTS field of a scalable mode context entry 1931 * indicates PASID directory with 2^(X + 7) entries. 1932 */ 1933 static inline unsigned long context_get_sm_pds(struct pasid_table *table) 1934 { 1935 int pds, max_pde; 1936 1937 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 1938 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS); 1939 if (pds < 7) 1940 return 0; 1941 1942 return pds - 7; 1943 } 1944 1945 /* 1946 * Set the RID_PASID field of a scalable mode context entry. The 1947 * IOMMU hardware will use the PASID value set in this field for 1948 * DMA translations of DMA requests without PASID. 1949 */ 1950 static inline void 1951 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) 1952 { 1953 context->hi |= pasid & ((1 << 20) - 1); 1954 context->hi |= (1 << 20); 1955 } 1956 1957 /* 1958 * Set the DTE(Device-TLB Enable) field of a scalable mode context 1959 * entry. 1960 */ 1961 static inline void context_set_sm_dte(struct context_entry *context) 1962 { 1963 context->lo |= (1 << 2); 1964 } 1965 1966 /* 1967 * Set the PRE(Page Request Enable) field of a scalable mode context 1968 * entry. 1969 */ 1970 static inline void context_set_sm_pre(struct context_entry *context) 1971 { 1972 context->lo |= (1 << 4); 1973 } 1974 1975 /* Convert value to context PASID directory size field coding. */ 1976 #define context_pdts(pds) (((pds) & 0x7) << 9) 1977 1978 static int domain_context_mapping_one(struct dmar_domain *domain, 1979 struct intel_iommu *iommu, 1980 struct pasid_table *table, 1981 u8 bus, u8 devfn) 1982 { 1983 u16 did = domain->iommu_did[iommu->seq_id]; 1984 int translation = CONTEXT_TT_MULTI_LEVEL; 1985 struct device_domain_info *info = NULL; 1986 struct context_entry *context; 1987 unsigned long flags; 1988 int ret; 1989 1990 WARN_ON(did == 0); 1991 1992 if (hw_pass_through && domain_type_is_si(domain)) 1993 translation = CONTEXT_TT_PASS_THROUGH; 1994 1995 pr_debug("Set context mapping for %02x:%02x.%d\n", 1996 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1997 1998 BUG_ON(!domain->pgd); 1999 2000 spin_lock_irqsave(&device_domain_lock, flags); 2001 spin_lock(&iommu->lock); 2002 2003 ret = -ENOMEM; 2004 context = iommu_context_addr(iommu, bus, devfn, 1); 2005 if (!context) 2006 goto out_unlock; 2007 2008 ret = 0; 2009 if (context_present(context)) 2010 goto out_unlock; 2011 2012 /* 2013 * For kdump cases, old valid entries may be cached due to the 2014 * in-flight DMA and copied pgtable, but there is no unmapping 2015 * behaviour for them, thus we need an explicit cache flush for 2016 * the newly-mapped device. For kdump, at this point, the device 2017 * is supposed to finish reset at its driver probe stage, so no 2018 * in-flight DMA will exist, and we don't need to worry anymore 2019 * hereafter. 2020 */ 2021 if (context_copied(context)) { 2022 u16 did_old = context_domain_id(context); 2023 2024 if (did_old < cap_ndoms(iommu->cap)) { 2025 iommu->flush.flush_context(iommu, did_old, 2026 (((u16)bus) << 8) | devfn, 2027 DMA_CCMD_MASK_NOBIT, 2028 DMA_CCMD_DEVICE_INVL); 2029 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 2030 DMA_TLB_DSI_FLUSH); 2031 } 2032 } 2033 2034 context_clear_entry(context); 2035 2036 if (sm_supported(iommu)) { 2037 unsigned long pds; 2038 2039 WARN_ON(!table); 2040 2041 /* Setup the PASID DIR pointer: */ 2042 pds = context_get_sm_pds(table); 2043 context->lo = (u64)virt_to_phys(table->table) | 2044 context_pdts(pds); 2045 2046 /* Setup the RID_PASID field: */ 2047 context_set_sm_rid2pasid(context, PASID_RID2PASID); 2048 2049 /* 2050 * Setup the Device-TLB enable bit and Page request 2051 * Enable bit: 2052 */ 2053 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); 2054 if (info && info->ats_supported) 2055 context_set_sm_dte(context); 2056 if (info && info->pri_supported) 2057 context_set_sm_pre(context); 2058 } else { 2059 struct dma_pte *pgd = domain->pgd; 2060 int agaw; 2061 2062 context_set_domain_id(context, did); 2063 2064 if (translation != CONTEXT_TT_PASS_THROUGH) { 2065 /* 2066 * Skip top levels of page tables for iommu which has 2067 * less agaw than default. Unnecessary for PT mode. 2068 */ 2069 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 2070 ret = -ENOMEM; 2071 pgd = phys_to_virt(dma_pte_addr(pgd)); 2072 if (!dma_pte_present(pgd)) 2073 goto out_unlock; 2074 } 2075 2076 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); 2077 if (info && info->ats_supported) 2078 translation = CONTEXT_TT_DEV_IOTLB; 2079 else 2080 translation = CONTEXT_TT_MULTI_LEVEL; 2081 2082 context_set_address_root(context, virt_to_phys(pgd)); 2083 context_set_address_width(context, agaw); 2084 } else { 2085 /* 2086 * In pass through mode, AW must be programmed to 2087 * indicate the largest AGAW value supported by 2088 * hardware. And ASR is ignored by hardware. 2089 */ 2090 context_set_address_width(context, iommu->msagaw); 2091 } 2092 2093 context_set_translation_type(context, translation); 2094 } 2095 2096 context_set_fault_enable(context); 2097 context_set_present(context); 2098 domain_flush_cache(domain, context, sizeof(*context)); 2099 2100 /* 2101 * It's a non-present to present mapping. If hardware doesn't cache 2102 * non-present entry we only need to flush the write-buffer. If the 2103 * _does_ cache non-present entries, then it does so in the special 2104 * domain #0, which we have to flush: 2105 */ 2106 if (cap_caching_mode(iommu->cap)) { 2107 iommu->flush.flush_context(iommu, 0, 2108 (((u16)bus) << 8) | devfn, 2109 DMA_CCMD_MASK_NOBIT, 2110 DMA_CCMD_DEVICE_INVL); 2111 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 2112 } else { 2113 iommu_flush_write_buffer(iommu); 2114 } 2115 iommu_enable_dev_iotlb(info); 2116 2117 ret = 0; 2118 2119 out_unlock: 2120 spin_unlock(&iommu->lock); 2121 spin_unlock_irqrestore(&device_domain_lock, flags); 2122 2123 return ret; 2124 } 2125 2126 struct domain_context_mapping_data { 2127 struct dmar_domain *domain; 2128 struct intel_iommu *iommu; 2129 struct pasid_table *table; 2130 }; 2131 2132 static int domain_context_mapping_cb(struct pci_dev *pdev, 2133 u16 alias, void *opaque) 2134 { 2135 struct domain_context_mapping_data *data = opaque; 2136 2137 return domain_context_mapping_one(data->domain, data->iommu, 2138 data->table, PCI_BUS_NUM(alias), 2139 alias & 0xff); 2140 } 2141 2142 static int 2143 domain_context_mapping(struct dmar_domain *domain, struct device *dev) 2144 { 2145 struct domain_context_mapping_data data; 2146 struct pasid_table *table; 2147 struct intel_iommu *iommu; 2148 u8 bus, devfn; 2149 2150 iommu = device_to_iommu(dev, &bus, &devfn); 2151 if (!iommu) 2152 return -ENODEV; 2153 2154 table = intel_pasid_get_table(dev); 2155 2156 if (!dev_is_pci(dev)) 2157 return domain_context_mapping_one(domain, iommu, table, 2158 bus, devfn); 2159 2160 data.domain = domain; 2161 data.iommu = iommu; 2162 data.table = table; 2163 2164 return pci_for_each_dma_alias(to_pci_dev(dev), 2165 &domain_context_mapping_cb, &data); 2166 } 2167 2168 static int domain_context_mapped_cb(struct pci_dev *pdev, 2169 u16 alias, void *opaque) 2170 { 2171 struct intel_iommu *iommu = opaque; 2172 2173 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); 2174 } 2175 2176 static int domain_context_mapped(struct device *dev) 2177 { 2178 struct intel_iommu *iommu; 2179 u8 bus, devfn; 2180 2181 iommu = device_to_iommu(dev, &bus, &devfn); 2182 if (!iommu) 2183 return -ENODEV; 2184 2185 if (!dev_is_pci(dev)) 2186 return device_context_mapped(iommu, bus, devfn); 2187 2188 return !pci_for_each_dma_alias(to_pci_dev(dev), 2189 domain_context_mapped_cb, iommu); 2190 } 2191 2192 /* Returns a number of VTD pages, but aligned to MM page size */ 2193 static inline unsigned long aligned_nrpages(unsigned long host_addr, 2194 size_t size) 2195 { 2196 host_addr &= ~PAGE_MASK; 2197 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 2198 } 2199 2200 /* Return largest possible superpage level for a given mapping */ 2201 static inline int hardware_largepage_caps(struct dmar_domain *domain, 2202 unsigned long iov_pfn, 2203 unsigned long phy_pfn, 2204 unsigned long pages) 2205 { 2206 int support, level = 1; 2207 unsigned long pfnmerge; 2208 2209 support = domain->iommu_superpage; 2210 2211 /* To use a large page, the virtual *and* physical addresses 2212 must be aligned to 2MiB/1GiB/etc. Lower bits set in either 2213 of them will mean we have to use smaller pages. So just 2214 merge them and check both at once. */ 2215 pfnmerge = iov_pfn | phy_pfn; 2216 2217 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { 2218 pages >>= VTD_STRIDE_SHIFT; 2219 if (!pages) 2220 break; 2221 pfnmerge >>= VTD_STRIDE_SHIFT; 2222 level++; 2223 support--; 2224 } 2225 return level; 2226 } 2227 2228 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2229 struct scatterlist *sg, unsigned long phys_pfn, 2230 unsigned long nr_pages, int prot) 2231 { 2232 struct dma_pte *first_pte = NULL, *pte = NULL; 2233 phys_addr_t uninitialized_var(pteval); 2234 unsigned long sg_res = 0; 2235 unsigned int largepage_lvl = 0; 2236 unsigned long lvl_pages = 0; 2237 u64 attr; 2238 2239 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); 2240 2241 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 2242 return -EINVAL; 2243 2244 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); 2245 if (domain_use_first_level(domain)) 2246 attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD; 2247 2248 if (!sg) { 2249 sg_res = nr_pages; 2250 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; 2251 } 2252 2253 while (nr_pages > 0) { 2254 uint64_t tmp; 2255 2256 if (!sg_res) { 2257 unsigned int pgoff = sg->offset & ~PAGE_MASK; 2258 2259 sg_res = aligned_nrpages(sg->offset, sg->length); 2260 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; 2261 sg->dma_length = sg->length; 2262 pteval = (sg_phys(sg) - pgoff) | attr; 2263 phys_pfn = pteval >> VTD_PAGE_SHIFT; 2264 } 2265 2266 if (!pte) { 2267 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); 2268 2269 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); 2270 if (!pte) 2271 return -ENOMEM; 2272 /* It is large page*/ 2273 if (largepage_lvl > 1) { 2274 unsigned long nr_superpages, end_pfn; 2275 2276 pteval |= DMA_PTE_LARGE_PAGE; 2277 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2278 2279 nr_superpages = sg_res / lvl_pages; 2280 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; 2281 2282 /* 2283 * Ensure that old small page tables are 2284 * removed to make room for superpage(s). 2285 * We're adding new large pages, so make sure 2286 * we don't remove their parent tables. 2287 */ 2288 dma_pte_free_pagetable(domain, iov_pfn, end_pfn, 2289 largepage_lvl + 1); 2290 } else { 2291 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; 2292 } 2293 2294 } 2295 /* We don't need lock here, nobody else 2296 * touches the iova range 2297 */ 2298 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 2299 if (tmp) { 2300 static int dumps = 5; 2301 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 2302 iov_pfn, tmp, (unsigned long long)pteval); 2303 if (dumps) { 2304 dumps--; 2305 debug_dma_dump_mappings(NULL); 2306 } 2307 WARN_ON(1); 2308 } 2309 2310 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2311 2312 BUG_ON(nr_pages < lvl_pages); 2313 BUG_ON(sg_res < lvl_pages); 2314 2315 nr_pages -= lvl_pages; 2316 iov_pfn += lvl_pages; 2317 phys_pfn += lvl_pages; 2318 pteval += lvl_pages * VTD_PAGE_SIZE; 2319 sg_res -= lvl_pages; 2320 2321 /* If the next PTE would be the first in a new page, then we 2322 need to flush the cache on the entries we've just written. 2323 And then we'll need to recalculate 'pte', so clear it and 2324 let it get set again in the if (!pte) block above. 2325 2326 If we're done (!nr_pages) we need to flush the cache too. 2327 2328 Also if we've been setting superpages, we may need to 2329 recalculate 'pte' and switch back to smaller pages for the 2330 end of the mapping, if the trailing size is not enough to 2331 use another superpage (i.e. sg_res < lvl_pages). */ 2332 pte++; 2333 if (!nr_pages || first_pte_in_page(pte) || 2334 (largepage_lvl > 1 && sg_res < lvl_pages)) { 2335 domain_flush_cache(domain, first_pte, 2336 (void *)pte - (void *)first_pte); 2337 pte = NULL; 2338 } 2339 2340 if (!sg_res && nr_pages) 2341 sg = sg_next(sg); 2342 } 2343 return 0; 2344 } 2345 2346 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2347 struct scatterlist *sg, unsigned long phys_pfn, 2348 unsigned long nr_pages, int prot) 2349 { 2350 int iommu_id, ret; 2351 struct intel_iommu *iommu; 2352 2353 /* Do the real mapping first */ 2354 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); 2355 if (ret) 2356 return ret; 2357 2358 for_each_domain_iommu(iommu_id, domain) { 2359 iommu = g_iommus[iommu_id]; 2360 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2361 } 2362 2363 return 0; 2364 } 2365 2366 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2367 struct scatterlist *sg, unsigned long nr_pages, 2368 int prot) 2369 { 2370 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); 2371 } 2372 2373 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2374 unsigned long phys_pfn, unsigned long nr_pages, 2375 int prot) 2376 { 2377 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); 2378 } 2379 2380 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn) 2381 { 2382 unsigned long flags; 2383 struct context_entry *context; 2384 u16 did_old; 2385 2386 if (!iommu) 2387 return; 2388 2389 spin_lock_irqsave(&iommu->lock, flags); 2390 context = iommu_context_addr(iommu, bus, devfn, 0); 2391 if (!context) { 2392 spin_unlock_irqrestore(&iommu->lock, flags); 2393 return; 2394 } 2395 did_old = context_domain_id(context); 2396 context_clear_entry(context); 2397 __iommu_flush_cache(iommu, context, sizeof(*context)); 2398 spin_unlock_irqrestore(&iommu->lock, flags); 2399 iommu->flush.flush_context(iommu, 2400 did_old, 2401 (((u16)bus) << 8) | devfn, 2402 DMA_CCMD_MASK_NOBIT, 2403 DMA_CCMD_DEVICE_INVL); 2404 iommu->flush.flush_iotlb(iommu, 2405 did_old, 2406 0, 2407 0, 2408 DMA_TLB_DSI_FLUSH); 2409 } 2410 2411 static inline void unlink_domain_info(struct device_domain_info *info) 2412 { 2413 assert_spin_locked(&device_domain_lock); 2414 list_del(&info->link); 2415 list_del(&info->global); 2416 if (info->dev) 2417 info->dev->archdata.iommu = NULL; 2418 } 2419 2420 static void domain_remove_dev_info(struct dmar_domain *domain) 2421 { 2422 struct device_domain_info *info, *tmp; 2423 unsigned long flags; 2424 2425 spin_lock_irqsave(&device_domain_lock, flags); 2426 list_for_each_entry_safe(info, tmp, &domain->devices, link) 2427 __dmar_remove_one_dev_info(info); 2428 spin_unlock_irqrestore(&device_domain_lock, flags); 2429 } 2430 2431 struct dmar_domain *find_domain(struct device *dev) 2432 { 2433 struct device_domain_info *info; 2434 2435 if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) 2436 return NULL; 2437 2438 /* No lock here, assumes no domain exit in normal case */ 2439 info = get_domain_info(dev); 2440 if (likely(info)) 2441 return info->domain; 2442 2443 return NULL; 2444 } 2445 2446 static void do_deferred_attach(struct device *dev) 2447 { 2448 struct iommu_domain *domain; 2449 2450 dev->archdata.iommu = NULL; 2451 domain = iommu_get_domain_for_dev(dev); 2452 if (domain) 2453 intel_iommu_attach_device(domain, dev); 2454 } 2455 2456 static inline struct device_domain_info * 2457 dmar_search_domain_by_dev_info(int segment, int bus, int devfn) 2458 { 2459 struct device_domain_info *info; 2460 2461 list_for_each_entry(info, &device_domain_list, global) 2462 if (info->segment == segment && info->bus == bus && 2463 info->devfn == devfn) 2464 return info; 2465 2466 return NULL; 2467 } 2468 2469 static int domain_setup_first_level(struct intel_iommu *iommu, 2470 struct dmar_domain *domain, 2471 struct device *dev, 2472 int pasid) 2473 { 2474 int flags = PASID_FLAG_SUPERVISOR_MODE; 2475 struct dma_pte *pgd = domain->pgd; 2476 int agaw, level; 2477 2478 /* 2479 * Skip top levels of page tables for iommu which has 2480 * less agaw than default. Unnecessary for PT mode. 2481 */ 2482 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 2483 pgd = phys_to_virt(dma_pte_addr(pgd)); 2484 if (!dma_pte_present(pgd)) 2485 return -ENOMEM; 2486 } 2487 2488 level = agaw_to_level(agaw); 2489 if (level != 4 && level != 5) 2490 return -EINVAL; 2491 2492 flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; 2493 2494 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, 2495 domain->iommu_did[iommu->seq_id], 2496 flags); 2497 } 2498 2499 static bool dev_is_real_dma_subdevice(struct device *dev) 2500 { 2501 return dev && dev_is_pci(dev) && 2502 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); 2503 } 2504 2505 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, 2506 int bus, int devfn, 2507 struct device *dev, 2508 struct dmar_domain *domain) 2509 { 2510 struct dmar_domain *found = NULL; 2511 struct device_domain_info *info; 2512 unsigned long flags; 2513 int ret; 2514 2515 info = alloc_devinfo_mem(); 2516 if (!info) 2517 return NULL; 2518 2519 if (!dev_is_real_dma_subdevice(dev)) { 2520 info->bus = bus; 2521 info->devfn = devfn; 2522 info->segment = iommu->segment; 2523 } else { 2524 struct pci_dev *pdev = to_pci_dev(dev); 2525 2526 info->bus = pdev->bus->number; 2527 info->devfn = pdev->devfn; 2528 info->segment = pci_domain_nr(pdev->bus); 2529 } 2530 2531 info->ats_supported = info->pasid_supported = info->pri_supported = 0; 2532 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; 2533 info->ats_qdep = 0; 2534 info->dev = dev; 2535 info->domain = domain; 2536 info->iommu = iommu; 2537 info->pasid_table = NULL; 2538 info->auxd_enabled = 0; 2539 INIT_LIST_HEAD(&info->auxiliary_domains); 2540 2541 if (dev && dev_is_pci(dev)) { 2542 struct pci_dev *pdev = to_pci_dev(info->dev); 2543 2544 if (ecap_dev_iotlb_support(iommu->ecap) && 2545 pci_ats_supported(pdev) && 2546 dmar_find_matched_atsr_unit(pdev)) 2547 info->ats_supported = 1; 2548 2549 if (sm_supported(iommu)) { 2550 if (pasid_supported(iommu)) { 2551 int features = pci_pasid_features(pdev); 2552 if (features >= 0) 2553 info->pasid_supported = features | 1; 2554 } 2555 2556 if (info->ats_supported && ecap_prs(iommu->ecap) && 2557 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI)) 2558 info->pri_supported = 1; 2559 } 2560 } 2561 2562 spin_lock_irqsave(&device_domain_lock, flags); 2563 if (dev) 2564 found = find_domain(dev); 2565 2566 if (!found) { 2567 struct device_domain_info *info2; 2568 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, 2569 info->devfn); 2570 if (info2) { 2571 found = info2->domain; 2572 info2->dev = dev; 2573 } 2574 } 2575 2576 if (found) { 2577 spin_unlock_irqrestore(&device_domain_lock, flags); 2578 free_devinfo_mem(info); 2579 /* Caller must free the original domain */ 2580 return found; 2581 } 2582 2583 spin_lock(&iommu->lock); 2584 ret = domain_attach_iommu(domain, iommu); 2585 spin_unlock(&iommu->lock); 2586 2587 if (ret) { 2588 spin_unlock_irqrestore(&device_domain_lock, flags); 2589 free_devinfo_mem(info); 2590 return NULL; 2591 } 2592 2593 list_add(&info->link, &domain->devices); 2594 list_add(&info->global, &device_domain_list); 2595 if (dev) 2596 dev->archdata.iommu = info; 2597 spin_unlock_irqrestore(&device_domain_lock, flags); 2598 2599 /* PASID table is mandatory for a PCI device in scalable mode. */ 2600 if (dev && dev_is_pci(dev) && sm_supported(iommu)) { 2601 ret = intel_pasid_alloc_table(dev); 2602 if (ret) { 2603 dev_err(dev, "PASID table allocation failed\n"); 2604 dmar_remove_one_dev_info(dev); 2605 return NULL; 2606 } 2607 2608 /* Setup the PASID entry for requests without PASID: */ 2609 spin_lock(&iommu->lock); 2610 if (hw_pass_through && domain_type_is_si(domain)) 2611 ret = intel_pasid_setup_pass_through(iommu, domain, 2612 dev, PASID_RID2PASID); 2613 else if (domain_use_first_level(domain)) 2614 ret = domain_setup_first_level(iommu, domain, dev, 2615 PASID_RID2PASID); 2616 else 2617 ret = intel_pasid_setup_second_level(iommu, domain, 2618 dev, PASID_RID2PASID); 2619 spin_unlock(&iommu->lock); 2620 if (ret) { 2621 dev_err(dev, "Setup RID2PASID failed\n"); 2622 dmar_remove_one_dev_info(dev); 2623 return NULL; 2624 } 2625 } 2626 2627 if (dev && domain_context_mapping(domain, dev)) { 2628 dev_err(dev, "Domain context map failed\n"); 2629 dmar_remove_one_dev_info(dev); 2630 return NULL; 2631 } 2632 2633 return domain; 2634 } 2635 2636 static int iommu_domain_identity_map(struct dmar_domain *domain, 2637 unsigned long first_vpfn, 2638 unsigned long last_vpfn) 2639 { 2640 /* 2641 * RMRR range might have overlap with physical memory range, 2642 * clear it first 2643 */ 2644 dma_pte_clear_range(domain, first_vpfn, last_vpfn); 2645 2646 return __domain_mapping(domain, first_vpfn, NULL, 2647 first_vpfn, last_vpfn - first_vpfn + 1, 2648 DMA_PTE_READ|DMA_PTE_WRITE); 2649 } 2650 2651 static int md_domain_init(struct dmar_domain *domain, int guest_width); 2652 2653 static int __init si_domain_init(int hw) 2654 { 2655 struct dmar_rmrr_unit *rmrr; 2656 struct device *dev; 2657 int i, nid, ret; 2658 2659 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); 2660 if (!si_domain) 2661 return -EFAULT; 2662 2663 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2664 domain_exit(si_domain); 2665 return -EFAULT; 2666 } 2667 2668 if (hw) 2669 return 0; 2670 2671 for_each_online_node(nid) { 2672 unsigned long start_pfn, end_pfn; 2673 int i; 2674 2675 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 2676 ret = iommu_domain_identity_map(si_domain, 2677 mm_to_dma_pfn(start_pfn), 2678 mm_to_dma_pfn(end_pfn)); 2679 if (ret) 2680 return ret; 2681 } 2682 } 2683 2684 /* 2685 * Identity map the RMRRs so that devices with RMRRs could also use 2686 * the si_domain. 2687 */ 2688 for_each_rmrr_units(rmrr) { 2689 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 2690 i, dev) { 2691 unsigned long long start = rmrr->base_address; 2692 unsigned long long end = rmrr->end_address; 2693 2694 if (WARN_ON(end < start || 2695 end >> agaw_to_width(si_domain->agaw))) 2696 continue; 2697 2698 ret = iommu_domain_identity_map(si_domain, start, end); 2699 if (ret) 2700 return ret; 2701 } 2702 } 2703 2704 return 0; 2705 } 2706 2707 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) 2708 { 2709 struct dmar_domain *ndomain; 2710 struct intel_iommu *iommu; 2711 u8 bus, devfn; 2712 2713 iommu = device_to_iommu(dev, &bus, &devfn); 2714 if (!iommu) 2715 return -ENODEV; 2716 2717 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); 2718 if (ndomain != domain) 2719 return -EBUSY; 2720 2721 return 0; 2722 } 2723 2724 static bool device_has_rmrr(struct device *dev) 2725 { 2726 struct dmar_rmrr_unit *rmrr; 2727 struct device *tmp; 2728 int i; 2729 2730 rcu_read_lock(); 2731 for_each_rmrr_units(rmrr) { 2732 /* 2733 * Return TRUE if this RMRR contains the device that 2734 * is passed in. 2735 */ 2736 for_each_active_dev_scope(rmrr->devices, 2737 rmrr->devices_cnt, i, tmp) 2738 if (tmp == dev || 2739 is_downstream_to_pci_bridge(dev, tmp)) { 2740 rcu_read_unlock(); 2741 return true; 2742 } 2743 } 2744 rcu_read_unlock(); 2745 return false; 2746 } 2747 2748 /** 2749 * device_rmrr_is_relaxable - Test whether the RMRR of this device 2750 * is relaxable (ie. is allowed to be not enforced under some conditions) 2751 * @dev: device handle 2752 * 2753 * We assume that PCI USB devices with RMRRs have them largely 2754 * for historical reasons and that the RMRR space is not actively used post 2755 * boot. This exclusion may change if vendors begin to abuse it. 2756 * 2757 * The same exception is made for graphics devices, with the requirement that 2758 * any use of the RMRR regions will be torn down before assigning the device 2759 * to a guest. 2760 * 2761 * Return: true if the RMRR is relaxable, false otherwise 2762 */ 2763 static bool device_rmrr_is_relaxable(struct device *dev) 2764 { 2765 struct pci_dev *pdev; 2766 2767 if (!dev_is_pci(dev)) 2768 return false; 2769 2770 pdev = to_pci_dev(dev); 2771 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 2772 return true; 2773 else 2774 return false; 2775 } 2776 2777 /* 2778 * There are a couple cases where we need to restrict the functionality of 2779 * devices associated with RMRRs. The first is when evaluating a device for 2780 * identity mapping because problems exist when devices are moved in and out 2781 * of domains and their respective RMRR information is lost. This means that 2782 * a device with associated RMRRs will never be in a "passthrough" domain. 2783 * The second is use of the device through the IOMMU API. This interface 2784 * expects to have full control of the IOVA space for the device. We cannot 2785 * satisfy both the requirement that RMRR access is maintained and have an 2786 * unencumbered IOVA space. We also have no ability to quiesce the device's 2787 * use of the RMRR space or even inform the IOMMU API user of the restriction. 2788 * We therefore prevent devices associated with an RMRR from participating in 2789 * the IOMMU API, which eliminates them from device assignment. 2790 * 2791 * In both cases, devices which have relaxable RMRRs are not concerned by this 2792 * restriction. See device_rmrr_is_relaxable comment. 2793 */ 2794 static bool device_is_rmrr_locked(struct device *dev) 2795 { 2796 if (!device_has_rmrr(dev)) 2797 return false; 2798 2799 if (device_rmrr_is_relaxable(dev)) 2800 return false; 2801 2802 return true; 2803 } 2804 2805 /* 2806 * Return the required default domain type for a specific device. 2807 * 2808 * @dev: the device in query 2809 * @startup: true if this is during early boot 2810 * 2811 * Returns: 2812 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain 2813 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain 2814 * - 0: both identity and dynamic domains work for this device 2815 */ 2816 static int device_def_domain_type(struct device *dev) 2817 { 2818 if (dev_is_pci(dev)) { 2819 struct pci_dev *pdev = to_pci_dev(dev); 2820 2821 /* 2822 * Prevent any device marked as untrusted from getting 2823 * placed into the statically identity mapping domain. 2824 */ 2825 if (pdev->untrusted) 2826 return IOMMU_DOMAIN_DMA; 2827 2828 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 2829 return IOMMU_DOMAIN_IDENTITY; 2830 2831 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) 2832 return IOMMU_DOMAIN_IDENTITY; 2833 } 2834 2835 return 0; 2836 } 2837 2838 static void intel_iommu_init_qi(struct intel_iommu *iommu) 2839 { 2840 /* 2841 * Start from the sane iommu hardware state. 2842 * If the queued invalidation is already initialized by us 2843 * (for example, while enabling interrupt-remapping) then 2844 * we got the things already rolling from a sane state. 2845 */ 2846 if (!iommu->qi) { 2847 /* 2848 * Clear any previous faults. 2849 */ 2850 dmar_fault(-1, iommu); 2851 /* 2852 * Disable queued invalidation if supported and already enabled 2853 * before OS handover. 2854 */ 2855 dmar_disable_qi(iommu); 2856 } 2857 2858 if (dmar_enable_qi(iommu)) { 2859 /* 2860 * Queued Invalidate not enabled, use Register Based Invalidate 2861 */ 2862 iommu->flush.flush_context = __iommu_flush_context; 2863 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 2864 pr_info("%s: Using Register based invalidation\n", 2865 iommu->name); 2866 } else { 2867 iommu->flush.flush_context = qi_flush_context; 2868 iommu->flush.flush_iotlb = qi_flush_iotlb; 2869 pr_info("%s: Using Queued invalidation\n", iommu->name); 2870 } 2871 } 2872 2873 static int copy_context_table(struct intel_iommu *iommu, 2874 struct root_entry *old_re, 2875 struct context_entry **tbl, 2876 int bus, bool ext) 2877 { 2878 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 2879 struct context_entry *new_ce = NULL, ce; 2880 struct context_entry *old_ce = NULL; 2881 struct root_entry re; 2882 phys_addr_t old_ce_phys; 2883 2884 tbl_idx = ext ? bus * 2 : bus; 2885 memcpy(&re, old_re, sizeof(re)); 2886 2887 for (devfn = 0; devfn < 256; devfn++) { 2888 /* First calculate the correct index */ 2889 idx = (ext ? devfn * 2 : devfn) % 256; 2890 2891 if (idx == 0) { 2892 /* First save what we may have and clean up */ 2893 if (new_ce) { 2894 tbl[tbl_idx] = new_ce; 2895 __iommu_flush_cache(iommu, new_ce, 2896 VTD_PAGE_SIZE); 2897 pos = 1; 2898 } 2899 2900 if (old_ce) 2901 memunmap(old_ce); 2902 2903 ret = 0; 2904 if (devfn < 0x80) 2905 old_ce_phys = root_entry_lctp(&re); 2906 else 2907 old_ce_phys = root_entry_uctp(&re); 2908 2909 if (!old_ce_phys) { 2910 if (ext && devfn == 0) { 2911 /* No LCTP, try UCTP */ 2912 devfn = 0x7f; 2913 continue; 2914 } else { 2915 goto out; 2916 } 2917 } 2918 2919 ret = -ENOMEM; 2920 old_ce = memremap(old_ce_phys, PAGE_SIZE, 2921 MEMREMAP_WB); 2922 if (!old_ce) 2923 goto out; 2924 2925 new_ce = alloc_pgtable_page(iommu->node); 2926 if (!new_ce) 2927 goto out_unmap; 2928 2929 ret = 0; 2930 } 2931 2932 /* Now copy the context entry */ 2933 memcpy(&ce, old_ce + idx, sizeof(ce)); 2934 2935 if (!__context_present(&ce)) 2936 continue; 2937 2938 did = context_domain_id(&ce); 2939 if (did >= 0 && did < cap_ndoms(iommu->cap)) 2940 set_bit(did, iommu->domain_ids); 2941 2942 /* 2943 * We need a marker for copied context entries. This 2944 * marker needs to work for the old format as well as 2945 * for extended context entries. 2946 * 2947 * Bit 67 of the context entry is used. In the old 2948 * format this bit is available to software, in the 2949 * extended format it is the PGE bit, but PGE is ignored 2950 * by HW if PASIDs are disabled (and thus still 2951 * available). 2952 * 2953 * So disable PASIDs first and then mark the entry 2954 * copied. This means that we don't copy PASID 2955 * translations from the old kernel, but this is fine as 2956 * faults there are not fatal. 2957 */ 2958 context_clear_pasid_enable(&ce); 2959 context_set_copied(&ce); 2960 2961 new_ce[idx] = ce; 2962 } 2963 2964 tbl[tbl_idx + pos] = new_ce; 2965 2966 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 2967 2968 out_unmap: 2969 memunmap(old_ce); 2970 2971 out: 2972 return ret; 2973 } 2974 2975 static int copy_translation_tables(struct intel_iommu *iommu) 2976 { 2977 struct context_entry **ctxt_tbls; 2978 struct root_entry *old_rt; 2979 phys_addr_t old_rt_phys; 2980 int ctxt_table_entries; 2981 unsigned long flags; 2982 u64 rtaddr_reg; 2983 int bus, ret; 2984 bool new_ext, ext; 2985 2986 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 2987 ext = !!(rtaddr_reg & DMA_RTADDR_RTT); 2988 new_ext = !!ecap_ecs(iommu->ecap); 2989 2990 /* 2991 * The RTT bit can only be changed when translation is disabled, 2992 * but disabling translation means to open a window for data 2993 * corruption. So bail out and don't copy anything if we would 2994 * have to change the bit. 2995 */ 2996 if (new_ext != ext) 2997 return -EINVAL; 2998 2999 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 3000 if (!old_rt_phys) 3001 return -EINVAL; 3002 3003 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 3004 if (!old_rt) 3005 return -ENOMEM; 3006 3007 /* This is too big for the stack - allocate it from slab */ 3008 ctxt_table_entries = ext ? 512 : 256; 3009 ret = -ENOMEM; 3010 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL); 3011 if (!ctxt_tbls) 3012 goto out_unmap; 3013 3014 for (bus = 0; bus < 256; bus++) { 3015 ret = copy_context_table(iommu, &old_rt[bus], 3016 ctxt_tbls, bus, ext); 3017 if (ret) { 3018 pr_err("%s: Failed to copy context table for bus %d\n", 3019 iommu->name, bus); 3020 continue; 3021 } 3022 } 3023 3024 spin_lock_irqsave(&iommu->lock, flags); 3025 3026 /* Context tables are copied, now write them to the root_entry table */ 3027 for (bus = 0; bus < 256; bus++) { 3028 int idx = ext ? bus * 2 : bus; 3029 u64 val; 3030 3031 if (ctxt_tbls[idx]) { 3032 val = virt_to_phys(ctxt_tbls[idx]) | 1; 3033 iommu->root_entry[bus].lo = val; 3034 } 3035 3036 if (!ext || !ctxt_tbls[idx + 1]) 3037 continue; 3038 3039 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1; 3040 iommu->root_entry[bus].hi = val; 3041 } 3042 3043 spin_unlock_irqrestore(&iommu->lock, flags); 3044 3045 kfree(ctxt_tbls); 3046 3047 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE); 3048 3049 ret = 0; 3050 3051 out_unmap: 3052 memunmap(old_rt); 3053 3054 return ret; 3055 } 3056 3057 #ifdef CONFIG_INTEL_IOMMU_SVM 3058 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) 3059 { 3060 struct intel_iommu *iommu = data; 3061 ioasid_t ioasid; 3062 3063 if (!iommu) 3064 return INVALID_IOASID; 3065 /* 3066 * VT-d virtual command interface always uses the full 20 bit 3067 * PASID range. Host can partition guest PASID range based on 3068 * policies but it is out of guest's control. 3069 */ 3070 if (min < PASID_MIN || max > intel_pasid_max_id) 3071 return INVALID_IOASID; 3072 3073 if (vcmd_alloc_pasid(iommu, &ioasid)) 3074 return INVALID_IOASID; 3075 3076 return ioasid; 3077 } 3078 3079 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) 3080 { 3081 struct intel_iommu *iommu = data; 3082 3083 if (!iommu) 3084 return; 3085 /* 3086 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO 3087 * We can only free the PASID when all the devices are unbound. 3088 */ 3089 if (ioasid_find(NULL, ioasid, NULL)) { 3090 pr_alert("Cannot free active IOASID %d\n", ioasid); 3091 return; 3092 } 3093 vcmd_free_pasid(iommu, ioasid); 3094 } 3095 3096 static void register_pasid_allocator(struct intel_iommu *iommu) 3097 { 3098 /* 3099 * If we are running in the host, no need for custom allocator 3100 * in that PASIDs are allocated from the host system-wide. 3101 */ 3102 if (!cap_caching_mode(iommu->cap)) 3103 return; 3104 3105 if (!sm_supported(iommu)) { 3106 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); 3107 return; 3108 } 3109 3110 /* 3111 * Register a custom PASID allocator if we are running in a guest, 3112 * guest PASID must be obtained via virtual command interface. 3113 * There can be multiple vIOMMUs in each guest but only one allocator 3114 * is active. All vIOMMU allocators will eventually be calling the same 3115 * host allocator. 3116 */ 3117 if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap)) 3118 return; 3119 3120 pr_info("Register custom PASID allocator\n"); 3121 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; 3122 iommu->pasid_allocator.free = intel_vcmd_ioasid_free; 3123 iommu->pasid_allocator.pdata = (void *)iommu; 3124 if (ioasid_register_allocator(&iommu->pasid_allocator)) { 3125 pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); 3126 /* 3127 * Disable scalable mode on this IOMMU if there 3128 * is no custom allocator. Mixing SM capable vIOMMU 3129 * and non-SM vIOMMU are not supported. 3130 */ 3131 intel_iommu_sm = 0; 3132 } 3133 } 3134 #endif 3135 3136 static int __init init_dmars(void) 3137 { 3138 struct dmar_drhd_unit *drhd; 3139 struct intel_iommu *iommu; 3140 int ret; 3141 3142 /* 3143 * for each drhd 3144 * allocate root 3145 * initialize and program root entry to not present 3146 * endfor 3147 */ 3148 for_each_drhd_unit(drhd) { 3149 /* 3150 * lock not needed as this is only incremented in the single 3151 * threaded kernel __init code path all other access are read 3152 * only 3153 */ 3154 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) { 3155 g_num_of_iommus++; 3156 continue; 3157 } 3158 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED); 3159 } 3160 3161 /* Preallocate enough resources for IOMMU hot-addition */ 3162 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) 3163 g_num_of_iommus = DMAR_UNITS_SUPPORTED; 3164 3165 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 3166 GFP_KERNEL); 3167 if (!g_iommus) { 3168 pr_err("Allocating global iommu array failed\n"); 3169 ret = -ENOMEM; 3170 goto error; 3171 } 3172 3173 for_each_iommu(iommu, drhd) { 3174 if (drhd->ignored) { 3175 iommu_disable_translation(iommu); 3176 continue; 3177 } 3178 3179 /* 3180 * Find the max pasid size of all IOMMU's in the system. 3181 * We need to ensure the system pasid table is no bigger 3182 * than the smallest supported. 3183 */ 3184 if (pasid_supported(iommu)) { 3185 u32 temp = 2 << ecap_pss(iommu->ecap); 3186 3187 intel_pasid_max_id = min_t(u32, temp, 3188 intel_pasid_max_id); 3189 } 3190 3191 g_iommus[iommu->seq_id] = iommu; 3192 3193 intel_iommu_init_qi(iommu); 3194 3195 ret = iommu_init_domains(iommu); 3196 if (ret) 3197 goto free_iommu; 3198 3199 init_translation_status(iommu); 3200 3201 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 3202 iommu_disable_translation(iommu); 3203 clear_translation_pre_enabled(iommu); 3204 pr_warn("Translation was enabled for %s but we are not in kdump mode\n", 3205 iommu->name); 3206 } 3207 3208 /* 3209 * TBD: 3210 * we could share the same root & context tables 3211 * among all IOMMU's. Need to Split it later. 3212 */ 3213 ret = iommu_alloc_root_entry(iommu); 3214 if (ret) 3215 goto free_iommu; 3216 3217 if (translation_pre_enabled(iommu)) { 3218 pr_info("Translation already enabled - trying to copy translation structures\n"); 3219 3220 ret = copy_translation_tables(iommu); 3221 if (ret) { 3222 /* 3223 * We found the IOMMU with translation 3224 * enabled - but failed to copy over the 3225 * old root-entry table. Try to proceed 3226 * by disabling translation now and 3227 * allocating a clean root-entry table. 3228 * This might cause DMAR faults, but 3229 * probably the dump will still succeed. 3230 */ 3231 pr_err("Failed to copy translation tables from previous kernel for %s\n", 3232 iommu->name); 3233 iommu_disable_translation(iommu); 3234 clear_translation_pre_enabled(iommu); 3235 } else { 3236 pr_info("Copied translation tables from previous kernel for %s\n", 3237 iommu->name); 3238 } 3239 } 3240 3241 if (!ecap_pass_through(iommu->ecap)) 3242 hw_pass_through = 0; 3243 intel_svm_check(iommu); 3244 } 3245 3246 /* 3247 * Now that qi is enabled on all iommus, set the root entry and flush 3248 * caches. This is required on some Intel X58 chipsets, otherwise the 3249 * flush_context function will loop forever and the boot hangs. 3250 */ 3251 for_each_active_iommu(iommu, drhd) { 3252 iommu_flush_write_buffer(iommu); 3253 #ifdef CONFIG_INTEL_IOMMU_SVM 3254 register_pasid_allocator(iommu); 3255 #endif 3256 iommu_set_root_entry(iommu); 3257 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 3258 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 3259 } 3260 3261 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA 3262 dmar_map_gfx = 0; 3263 #endif 3264 3265 if (!dmar_map_gfx) 3266 iommu_identity_mapping |= IDENTMAP_GFX; 3267 3268 check_tylersburg_isoch(); 3269 3270 ret = si_domain_init(hw_pass_through); 3271 if (ret) 3272 goto free_iommu; 3273 3274 /* 3275 * for each drhd 3276 * enable fault log 3277 * global invalidate context cache 3278 * global invalidate iotlb 3279 * enable translation 3280 */ 3281 for_each_iommu(iommu, drhd) { 3282 if (drhd->ignored) { 3283 /* 3284 * we always have to disable PMRs or DMA may fail on 3285 * this device 3286 */ 3287 if (force_on) 3288 iommu_disable_protect_mem_regions(iommu); 3289 continue; 3290 } 3291 3292 iommu_flush_write_buffer(iommu); 3293 3294 #ifdef CONFIG_INTEL_IOMMU_SVM 3295 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 3296 /* 3297 * Call dmar_alloc_hwirq() with dmar_global_lock held, 3298 * could cause possible lock race condition. 3299 */ 3300 up_write(&dmar_global_lock); 3301 ret = intel_svm_enable_prq(iommu); 3302 down_write(&dmar_global_lock); 3303 if (ret) 3304 goto free_iommu; 3305 } 3306 #endif 3307 ret = dmar_set_interrupt(iommu); 3308 if (ret) 3309 goto free_iommu; 3310 } 3311 3312 return 0; 3313 3314 free_iommu: 3315 for_each_active_iommu(iommu, drhd) { 3316 disable_dmar_iommu(iommu); 3317 free_dmar_iommu(iommu); 3318 } 3319 3320 kfree(g_iommus); 3321 3322 error: 3323 return ret; 3324 } 3325 3326 /* This takes a number of _MM_ pages, not VTD pages */ 3327 static unsigned long intel_alloc_iova(struct device *dev, 3328 struct dmar_domain *domain, 3329 unsigned long nrpages, uint64_t dma_mask) 3330 { 3331 unsigned long iova_pfn; 3332 3333 /* 3334 * Restrict dma_mask to the width that the iommu can handle. 3335 * First-level translation restricts the input-address to a 3336 * canonical address (i.e., address bits 63:N have the same 3337 * value as address bit [N-1], where N is 48-bits with 4-level 3338 * paging and 57-bits with 5-level paging). Hence, skip bit 3339 * [N-1]. 3340 */ 3341 if (domain_use_first_level(domain)) 3342 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1), 3343 dma_mask); 3344 else 3345 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), 3346 dma_mask); 3347 3348 /* Ensure we reserve the whole size-aligned region */ 3349 nrpages = __roundup_pow_of_two(nrpages); 3350 3351 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { 3352 /* 3353 * First try to allocate an io virtual address in 3354 * DMA_BIT_MASK(32) and if that fails then try allocating 3355 * from higher range 3356 */ 3357 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3358 IOVA_PFN(DMA_BIT_MASK(32)), false); 3359 if (iova_pfn) 3360 return iova_pfn; 3361 } 3362 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3363 IOVA_PFN(dma_mask), true); 3364 if (unlikely(!iova_pfn)) { 3365 dev_err_once(dev, "Allocating %ld-page iova failed\n", 3366 nrpages); 3367 return 0; 3368 } 3369 3370 return iova_pfn; 3371 } 3372 3373 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, 3374 size_t size, int dir, u64 dma_mask) 3375 { 3376 struct dmar_domain *domain; 3377 phys_addr_t start_paddr; 3378 unsigned long iova_pfn; 3379 int prot = 0; 3380 int ret; 3381 struct intel_iommu *iommu; 3382 unsigned long paddr_pfn = paddr >> PAGE_SHIFT; 3383 3384 BUG_ON(dir == DMA_NONE); 3385 3386 if (unlikely(attach_deferred(dev))) 3387 do_deferred_attach(dev); 3388 3389 domain = find_domain(dev); 3390 if (!domain) 3391 return DMA_MAPPING_ERROR; 3392 3393 iommu = domain_get_iommu(domain); 3394 size = aligned_nrpages(paddr, size); 3395 3396 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask); 3397 if (!iova_pfn) 3398 goto error; 3399 3400 /* 3401 * Check if DMAR supports zero-length reads on write only 3402 * mappings.. 3403 */ 3404 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3405 !cap_zlr(iommu->cap)) 3406 prot |= DMA_PTE_READ; 3407 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3408 prot |= DMA_PTE_WRITE; 3409 /* 3410 * paddr - (paddr + size) might be partial page, we should map the whole 3411 * page. Note: if two part of one page are separately mapped, we 3412 * might have two guest_addr mapping to the same host paddr, but this 3413 * is not a big problem 3414 */ 3415 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn), 3416 mm_to_dma_pfn(paddr_pfn), size, prot); 3417 if (ret) 3418 goto error; 3419 3420 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT; 3421 start_paddr += paddr & ~PAGE_MASK; 3422 3423 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT); 3424 3425 return start_paddr; 3426 3427 error: 3428 if (iova_pfn) 3429 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); 3430 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n", 3431 size, (unsigned long long)paddr, dir); 3432 return DMA_MAPPING_ERROR; 3433 } 3434 3435 static dma_addr_t intel_map_page(struct device *dev, struct page *page, 3436 unsigned long offset, size_t size, 3437 enum dma_data_direction dir, 3438 unsigned long attrs) 3439 { 3440 return __intel_map_single(dev, page_to_phys(page) + offset, 3441 size, dir, *dev->dma_mask); 3442 } 3443 3444 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, 3445 size_t size, enum dma_data_direction dir, 3446 unsigned long attrs) 3447 { 3448 return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); 3449 } 3450 3451 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) 3452 { 3453 struct dmar_domain *domain; 3454 unsigned long start_pfn, last_pfn; 3455 unsigned long nrpages; 3456 unsigned long iova_pfn; 3457 struct intel_iommu *iommu; 3458 struct page *freelist; 3459 struct pci_dev *pdev = NULL; 3460 3461 domain = find_domain(dev); 3462 BUG_ON(!domain); 3463 3464 iommu = domain_get_iommu(domain); 3465 3466 iova_pfn = IOVA_PFN(dev_addr); 3467 3468 nrpages = aligned_nrpages(dev_addr, size); 3469 start_pfn = mm_to_dma_pfn(iova_pfn); 3470 last_pfn = start_pfn + nrpages - 1; 3471 3472 if (dev_is_pci(dev)) 3473 pdev = to_pci_dev(dev); 3474 3475 freelist = domain_unmap(domain, start_pfn, last_pfn); 3476 if (intel_iommu_strict || (pdev && pdev->untrusted) || 3477 !has_iova_flush_queue(&domain->iovad)) { 3478 iommu_flush_iotlb_psi(iommu, domain, start_pfn, 3479 nrpages, !freelist, 0); 3480 /* free iova */ 3481 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); 3482 dma_free_pagelist(freelist); 3483 } else { 3484 queue_iova(&domain->iovad, iova_pfn, nrpages, 3485 (unsigned long)freelist); 3486 /* 3487 * queue up the release of the unmap to save the 1/6th of the 3488 * cpu used up by the iotlb flush operation... 3489 */ 3490 } 3491 3492 trace_unmap_single(dev, dev_addr, size); 3493 } 3494 3495 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, 3496 size_t size, enum dma_data_direction dir, 3497 unsigned long attrs) 3498 { 3499 intel_unmap(dev, dev_addr, size); 3500 } 3501 3502 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, 3503 size_t size, enum dma_data_direction dir, unsigned long attrs) 3504 { 3505 intel_unmap(dev, dev_addr, size); 3506 } 3507 3508 static void *intel_alloc_coherent(struct device *dev, size_t size, 3509 dma_addr_t *dma_handle, gfp_t flags, 3510 unsigned long attrs) 3511 { 3512 struct page *page = NULL; 3513 int order; 3514 3515 if (unlikely(attach_deferred(dev))) 3516 do_deferred_attach(dev); 3517 3518 size = PAGE_ALIGN(size); 3519 order = get_order(size); 3520 3521 if (gfpflags_allow_blocking(flags)) { 3522 unsigned int count = size >> PAGE_SHIFT; 3523 3524 page = dma_alloc_from_contiguous(dev, count, order, 3525 flags & __GFP_NOWARN); 3526 } 3527 3528 if (!page) 3529 page = alloc_pages(flags, order); 3530 if (!page) 3531 return NULL; 3532 memset(page_address(page), 0, size); 3533 3534 *dma_handle = __intel_map_single(dev, page_to_phys(page), size, 3535 DMA_BIDIRECTIONAL, 3536 dev->coherent_dma_mask); 3537 if (*dma_handle != DMA_MAPPING_ERROR) 3538 return page_address(page); 3539 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) 3540 __free_pages(page, order); 3541 3542 return NULL; 3543 } 3544 3545 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, 3546 dma_addr_t dma_handle, unsigned long attrs) 3547 { 3548 int order; 3549 struct page *page = virt_to_page(vaddr); 3550 3551 size = PAGE_ALIGN(size); 3552 order = get_order(size); 3553 3554 intel_unmap(dev, dma_handle, size); 3555 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) 3556 __free_pages(page, order); 3557 } 3558 3559 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, 3560 int nelems, enum dma_data_direction dir, 3561 unsigned long attrs) 3562 { 3563 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK; 3564 unsigned long nrpages = 0; 3565 struct scatterlist *sg; 3566 int i; 3567 3568 for_each_sg(sglist, sg, nelems, i) { 3569 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); 3570 } 3571 3572 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT); 3573 3574 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT); 3575 } 3576 3577 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, 3578 enum dma_data_direction dir, unsigned long attrs) 3579 { 3580 int i; 3581 struct dmar_domain *domain; 3582 size_t size = 0; 3583 int prot = 0; 3584 unsigned long iova_pfn; 3585 int ret; 3586 struct scatterlist *sg; 3587 unsigned long start_vpfn; 3588 struct intel_iommu *iommu; 3589 3590 BUG_ON(dir == DMA_NONE); 3591 3592 if (unlikely(attach_deferred(dev))) 3593 do_deferred_attach(dev); 3594 3595 domain = find_domain(dev); 3596 if (!domain) 3597 return 0; 3598 3599 iommu = domain_get_iommu(domain); 3600 3601 for_each_sg(sglist, sg, nelems, i) 3602 size += aligned_nrpages(sg->offset, sg->length); 3603 3604 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), 3605 *dev->dma_mask); 3606 if (!iova_pfn) { 3607 sglist->dma_length = 0; 3608 return 0; 3609 } 3610 3611 /* 3612 * Check if DMAR supports zero-length reads on write only 3613 * mappings.. 3614 */ 3615 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3616 !cap_zlr(iommu->cap)) 3617 prot |= DMA_PTE_READ; 3618 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3619 prot |= DMA_PTE_WRITE; 3620 3621 start_vpfn = mm_to_dma_pfn(iova_pfn); 3622 3623 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); 3624 if (unlikely(ret)) { 3625 dma_pte_free_pagetable(domain, start_vpfn, 3626 start_vpfn + size - 1, 3627 agaw_to_level(domain->agaw) + 1); 3628 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); 3629 return 0; 3630 } 3631 3632 for_each_sg(sglist, sg, nelems, i) 3633 trace_map_sg(dev, i + 1, nelems, sg); 3634 3635 return nelems; 3636 } 3637 3638 static u64 intel_get_required_mask(struct device *dev) 3639 { 3640 return DMA_BIT_MASK(32); 3641 } 3642 3643 static const struct dma_map_ops intel_dma_ops = { 3644 .alloc = intel_alloc_coherent, 3645 .free = intel_free_coherent, 3646 .map_sg = intel_map_sg, 3647 .unmap_sg = intel_unmap_sg, 3648 .map_page = intel_map_page, 3649 .unmap_page = intel_unmap_page, 3650 .map_resource = intel_map_resource, 3651 .unmap_resource = intel_unmap_resource, 3652 .dma_supported = dma_direct_supported, 3653 .mmap = dma_common_mmap, 3654 .get_sgtable = dma_common_get_sgtable, 3655 .get_required_mask = intel_get_required_mask, 3656 }; 3657 3658 static void 3659 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size, 3660 enum dma_data_direction dir, enum dma_sync_target target) 3661 { 3662 struct dmar_domain *domain; 3663 phys_addr_t tlb_addr; 3664 3665 domain = find_domain(dev); 3666 if (WARN_ON(!domain)) 3667 return; 3668 3669 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr); 3670 if (is_swiotlb_buffer(tlb_addr)) 3671 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target); 3672 } 3673 3674 static dma_addr_t 3675 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, 3676 enum dma_data_direction dir, unsigned long attrs, 3677 u64 dma_mask) 3678 { 3679 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE); 3680 struct dmar_domain *domain; 3681 struct intel_iommu *iommu; 3682 unsigned long iova_pfn; 3683 unsigned long nrpages; 3684 phys_addr_t tlb_addr; 3685 int prot = 0; 3686 int ret; 3687 3688 if (unlikely(attach_deferred(dev))) 3689 do_deferred_attach(dev); 3690 3691 domain = find_domain(dev); 3692 3693 if (WARN_ON(dir == DMA_NONE || !domain)) 3694 return DMA_MAPPING_ERROR; 3695 3696 iommu = domain_get_iommu(domain); 3697 if (WARN_ON(!iommu)) 3698 return DMA_MAPPING_ERROR; 3699 3700 nrpages = aligned_nrpages(0, size); 3701 iova_pfn = intel_alloc_iova(dev, domain, 3702 dma_to_mm_pfn(nrpages), dma_mask); 3703 if (!iova_pfn) 3704 return DMA_MAPPING_ERROR; 3705 3706 /* 3707 * Check if DMAR supports zero-length reads on write only 3708 * mappings.. 3709 */ 3710 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || 3711 !cap_zlr(iommu->cap)) 3712 prot |= DMA_PTE_READ; 3713 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3714 prot |= DMA_PTE_WRITE; 3715 3716 /* 3717 * If both the physical buffer start address and size are 3718 * page aligned, we don't need to use a bounce page. 3719 */ 3720 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { 3721 tlb_addr = swiotlb_tbl_map_single(dev, 3722 __phys_to_dma(dev, io_tlb_start), 3723 paddr, size, aligned_size, dir, attrs); 3724 if (tlb_addr == DMA_MAPPING_ERROR) { 3725 goto swiotlb_error; 3726 } else { 3727 /* Cleanup the padding area. */ 3728 void *padding_start = phys_to_virt(tlb_addr); 3729 size_t padding_size = aligned_size; 3730 3731 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 3732 (dir == DMA_TO_DEVICE || 3733 dir == DMA_BIDIRECTIONAL)) { 3734 padding_start += size; 3735 padding_size -= size; 3736 } 3737 3738 memset(padding_start, 0, padding_size); 3739 } 3740 } else { 3741 tlb_addr = paddr; 3742 } 3743 3744 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn), 3745 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot); 3746 if (ret) 3747 goto mapping_error; 3748 3749 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size); 3750 3751 return (phys_addr_t)iova_pfn << PAGE_SHIFT; 3752 3753 mapping_error: 3754 if (is_swiotlb_buffer(tlb_addr)) 3755 swiotlb_tbl_unmap_single(dev, tlb_addr, size, 3756 aligned_size, dir, attrs); 3757 swiotlb_error: 3758 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); 3759 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n", 3760 size, (unsigned long long)paddr, dir); 3761 3762 return DMA_MAPPING_ERROR; 3763 } 3764 3765 static void 3766 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, 3767 enum dma_data_direction dir, unsigned long attrs) 3768 { 3769 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE); 3770 struct dmar_domain *domain; 3771 phys_addr_t tlb_addr; 3772 3773 domain = find_domain(dev); 3774 if (WARN_ON(!domain)) 3775 return; 3776 3777 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr); 3778 if (WARN_ON(!tlb_addr)) 3779 return; 3780 3781 intel_unmap(dev, dev_addr, size); 3782 if (is_swiotlb_buffer(tlb_addr)) 3783 swiotlb_tbl_unmap_single(dev, tlb_addr, size, 3784 aligned_size, dir, attrs); 3785 3786 trace_bounce_unmap_single(dev, dev_addr, size); 3787 } 3788 3789 static dma_addr_t 3790 bounce_map_page(struct device *dev, struct page *page, unsigned long offset, 3791 size_t size, enum dma_data_direction dir, unsigned long attrs) 3792 { 3793 return bounce_map_single(dev, page_to_phys(page) + offset, 3794 size, dir, attrs, *dev->dma_mask); 3795 } 3796 3797 static dma_addr_t 3798 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, 3799 enum dma_data_direction dir, unsigned long attrs) 3800 { 3801 return bounce_map_single(dev, phys_addr, size, 3802 dir, attrs, *dev->dma_mask); 3803 } 3804 3805 static void 3806 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, 3807 enum dma_data_direction dir, unsigned long attrs) 3808 { 3809 bounce_unmap_single(dev, dev_addr, size, dir, attrs); 3810 } 3811 3812 static void 3813 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size, 3814 enum dma_data_direction dir, unsigned long attrs) 3815 { 3816 bounce_unmap_single(dev, dev_addr, size, dir, attrs); 3817 } 3818 3819 static void 3820 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, 3821 enum dma_data_direction dir, unsigned long attrs) 3822 { 3823 struct scatterlist *sg; 3824 int i; 3825 3826 for_each_sg(sglist, sg, nelems, i) 3827 bounce_unmap_page(dev, sg->dma_address, 3828 sg_dma_len(sg), dir, attrs); 3829 } 3830 3831 static int 3832 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, 3833 enum dma_data_direction dir, unsigned long attrs) 3834 { 3835 int i; 3836 struct scatterlist *sg; 3837 3838 for_each_sg(sglist, sg, nelems, i) { 3839 sg->dma_address = bounce_map_page(dev, sg_page(sg), 3840 sg->offset, sg->length, 3841 dir, attrs); 3842 if (sg->dma_address == DMA_MAPPING_ERROR) 3843 goto out_unmap; 3844 sg_dma_len(sg) = sg->length; 3845 } 3846 3847 for_each_sg(sglist, sg, nelems, i) 3848 trace_bounce_map_sg(dev, i + 1, nelems, sg); 3849 3850 return nelems; 3851 3852 out_unmap: 3853 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 3854 return 0; 3855 } 3856 3857 static void 3858 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr, 3859 size_t size, enum dma_data_direction dir) 3860 { 3861 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU); 3862 } 3863 3864 static void 3865 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr, 3866 size_t size, enum dma_data_direction dir) 3867 { 3868 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE); 3869 } 3870 3871 static void 3872 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, 3873 int nelems, enum dma_data_direction dir) 3874 { 3875 struct scatterlist *sg; 3876 int i; 3877 3878 for_each_sg(sglist, sg, nelems, i) 3879 bounce_sync_single(dev, sg_dma_address(sg), 3880 sg_dma_len(sg), dir, SYNC_FOR_CPU); 3881 } 3882 3883 static void 3884 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, 3885 int nelems, enum dma_data_direction dir) 3886 { 3887 struct scatterlist *sg; 3888 int i; 3889 3890 for_each_sg(sglist, sg, nelems, i) 3891 bounce_sync_single(dev, sg_dma_address(sg), 3892 sg_dma_len(sg), dir, SYNC_FOR_DEVICE); 3893 } 3894 3895 static const struct dma_map_ops bounce_dma_ops = { 3896 .alloc = intel_alloc_coherent, 3897 .free = intel_free_coherent, 3898 .map_sg = bounce_map_sg, 3899 .unmap_sg = bounce_unmap_sg, 3900 .map_page = bounce_map_page, 3901 .unmap_page = bounce_unmap_page, 3902 .sync_single_for_cpu = bounce_sync_single_for_cpu, 3903 .sync_single_for_device = bounce_sync_single_for_device, 3904 .sync_sg_for_cpu = bounce_sync_sg_for_cpu, 3905 .sync_sg_for_device = bounce_sync_sg_for_device, 3906 .map_resource = bounce_map_resource, 3907 .unmap_resource = bounce_unmap_resource, 3908 .dma_supported = dma_direct_supported, 3909 }; 3910 3911 static inline int iommu_domain_cache_init(void) 3912 { 3913 int ret = 0; 3914 3915 iommu_domain_cache = kmem_cache_create("iommu_domain", 3916 sizeof(struct dmar_domain), 3917 0, 3918 SLAB_HWCACHE_ALIGN, 3919 3920 NULL); 3921 if (!iommu_domain_cache) { 3922 pr_err("Couldn't create iommu_domain cache\n"); 3923 ret = -ENOMEM; 3924 } 3925 3926 return ret; 3927 } 3928 3929 static inline int iommu_devinfo_cache_init(void) 3930 { 3931 int ret = 0; 3932 3933 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", 3934 sizeof(struct device_domain_info), 3935 0, 3936 SLAB_HWCACHE_ALIGN, 3937 NULL); 3938 if (!iommu_devinfo_cache) { 3939 pr_err("Couldn't create devinfo cache\n"); 3940 ret = -ENOMEM; 3941 } 3942 3943 return ret; 3944 } 3945 3946 static int __init iommu_init_mempool(void) 3947 { 3948 int ret; 3949 ret = iova_cache_get(); 3950 if (ret) 3951 return ret; 3952 3953 ret = iommu_domain_cache_init(); 3954 if (ret) 3955 goto domain_error; 3956 3957 ret = iommu_devinfo_cache_init(); 3958 if (!ret) 3959 return ret; 3960 3961 kmem_cache_destroy(iommu_domain_cache); 3962 domain_error: 3963 iova_cache_put(); 3964 3965 return -ENOMEM; 3966 } 3967 3968 static void __init iommu_exit_mempool(void) 3969 { 3970 kmem_cache_destroy(iommu_devinfo_cache); 3971 kmem_cache_destroy(iommu_domain_cache); 3972 iova_cache_put(); 3973 } 3974 3975 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 3976 { 3977 struct dmar_drhd_unit *drhd; 3978 u32 vtbar; 3979 int rc; 3980 3981 /* We know that this device on this chipset has its own IOMMU. 3982 * If we find it under a different IOMMU, then the BIOS is lying 3983 * to us. Hope that the IOMMU for this device is actually 3984 * disabled, and it needs no translation... 3985 */ 3986 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 3987 if (rc) { 3988 /* "can't" happen */ 3989 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 3990 return; 3991 } 3992 vtbar &= 0xffff0000; 3993 3994 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 3995 drhd = dmar_find_matched_drhd_unit(pdev); 3996 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { 3997 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); 3998 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 3999 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 4000 } 4001 } 4002 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); 4003 4004 static void __init init_no_remapping_devices(void) 4005 { 4006 struct dmar_drhd_unit *drhd; 4007 struct device *dev; 4008 int i; 4009 4010 for_each_drhd_unit(drhd) { 4011 if (!drhd->include_all) { 4012 for_each_active_dev_scope(drhd->devices, 4013 drhd->devices_cnt, i, dev) 4014 break; 4015 /* ignore DMAR unit if no devices exist */ 4016 if (i == drhd->devices_cnt) 4017 drhd->ignored = 1; 4018 } 4019 } 4020 4021 for_each_active_drhd_unit(drhd) { 4022 if (drhd->include_all) 4023 continue; 4024 4025 for_each_active_dev_scope(drhd->devices, 4026 drhd->devices_cnt, i, dev) 4027 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 4028 break; 4029 if (i < drhd->devices_cnt) 4030 continue; 4031 4032 /* This IOMMU has *only* gfx devices. Either bypass it or 4033 set the gfx_mapped flag, as appropriate */ 4034 if (!dmar_map_gfx) { 4035 drhd->ignored = 1; 4036 for_each_active_dev_scope(drhd->devices, 4037 drhd->devices_cnt, i, dev) 4038 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 4039 } 4040 } 4041 } 4042 4043 #ifdef CONFIG_SUSPEND 4044 static int init_iommu_hw(void) 4045 { 4046 struct dmar_drhd_unit *drhd; 4047 struct intel_iommu *iommu = NULL; 4048 4049 for_each_active_iommu(iommu, drhd) 4050 if (iommu->qi) 4051 dmar_reenable_qi(iommu); 4052 4053 for_each_iommu(iommu, drhd) { 4054 if (drhd->ignored) { 4055 /* 4056 * we always have to disable PMRs or DMA may fail on 4057 * this device 4058 */ 4059 if (force_on) 4060 iommu_disable_protect_mem_regions(iommu); 4061 continue; 4062 } 4063 4064 iommu_flush_write_buffer(iommu); 4065 4066 iommu_set_root_entry(iommu); 4067 4068 iommu->flush.flush_context(iommu, 0, 0, 0, 4069 DMA_CCMD_GLOBAL_INVL); 4070 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 4071 iommu_enable_translation(iommu); 4072 iommu_disable_protect_mem_regions(iommu); 4073 } 4074 4075 return 0; 4076 } 4077 4078 static void iommu_flush_all(void) 4079 { 4080 struct dmar_drhd_unit *drhd; 4081 struct intel_iommu *iommu; 4082 4083 for_each_active_iommu(iommu, drhd) { 4084 iommu->flush.flush_context(iommu, 0, 0, 0, 4085 DMA_CCMD_GLOBAL_INVL); 4086 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 4087 DMA_TLB_GLOBAL_FLUSH); 4088 } 4089 } 4090 4091 static int iommu_suspend(void) 4092 { 4093 struct dmar_drhd_unit *drhd; 4094 struct intel_iommu *iommu = NULL; 4095 unsigned long flag; 4096 4097 for_each_active_iommu(iommu, drhd) { 4098 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32), 4099 GFP_ATOMIC); 4100 if (!iommu->iommu_state) 4101 goto nomem; 4102 } 4103 4104 iommu_flush_all(); 4105 4106 for_each_active_iommu(iommu, drhd) { 4107 iommu_disable_translation(iommu); 4108 4109 raw_spin_lock_irqsave(&iommu->register_lock, flag); 4110 4111 iommu->iommu_state[SR_DMAR_FECTL_REG] = 4112 readl(iommu->reg + DMAR_FECTL_REG); 4113 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 4114 readl(iommu->reg + DMAR_FEDATA_REG); 4115 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 4116 readl(iommu->reg + DMAR_FEADDR_REG); 4117 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 4118 readl(iommu->reg + DMAR_FEUADDR_REG); 4119 4120 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 4121 } 4122 return 0; 4123 4124 nomem: 4125 for_each_active_iommu(iommu, drhd) 4126 kfree(iommu->iommu_state); 4127 4128 return -ENOMEM; 4129 } 4130 4131 static void iommu_resume(void) 4132 { 4133 struct dmar_drhd_unit *drhd; 4134 struct intel_iommu *iommu = NULL; 4135 unsigned long flag; 4136 4137 if (init_iommu_hw()) { 4138 if (force_on) 4139 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 4140 else 4141 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 4142 return; 4143 } 4144 4145 for_each_active_iommu(iommu, drhd) { 4146 4147 raw_spin_lock_irqsave(&iommu->register_lock, flag); 4148 4149 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 4150 iommu->reg + DMAR_FECTL_REG); 4151 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 4152 iommu->reg + DMAR_FEDATA_REG); 4153 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 4154 iommu->reg + DMAR_FEADDR_REG); 4155 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 4156 iommu->reg + DMAR_FEUADDR_REG); 4157 4158 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 4159 } 4160 4161 for_each_active_iommu(iommu, drhd) 4162 kfree(iommu->iommu_state); 4163 } 4164 4165 static struct syscore_ops iommu_syscore_ops = { 4166 .resume = iommu_resume, 4167 .suspend = iommu_suspend, 4168 }; 4169 4170 static void __init init_iommu_pm_ops(void) 4171 { 4172 register_syscore_ops(&iommu_syscore_ops); 4173 } 4174 4175 #else 4176 static inline void init_iommu_pm_ops(void) {} 4177 #endif /* CONFIG_PM */ 4178 4179 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) 4180 { 4181 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || 4182 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || 4183 rmrr->end_address <= rmrr->base_address || 4184 arch_rmrr_sanity_check(rmrr)) 4185 return -EINVAL; 4186 4187 return 0; 4188 } 4189 4190 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 4191 { 4192 struct acpi_dmar_reserved_memory *rmrr; 4193 struct dmar_rmrr_unit *rmrru; 4194 4195 rmrr = (struct acpi_dmar_reserved_memory *)header; 4196 if (rmrr_sanity_check(rmrr)) { 4197 pr_warn(FW_BUG 4198 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" 4199 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 4200 rmrr->base_address, rmrr->end_address, 4201 dmi_get_system_info(DMI_BIOS_VENDOR), 4202 dmi_get_system_info(DMI_BIOS_VERSION), 4203 dmi_get_system_info(DMI_PRODUCT_VERSION)); 4204 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 4205 } 4206 4207 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 4208 if (!rmrru) 4209 goto out; 4210 4211 rmrru->hdr = header; 4212 4213 rmrru->base_address = rmrr->base_address; 4214 rmrru->end_address = rmrr->end_address; 4215 4216 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 4217 ((void *)rmrr) + rmrr->header.length, 4218 &rmrru->devices_cnt); 4219 if (rmrru->devices_cnt && rmrru->devices == NULL) 4220 goto free_rmrru; 4221 4222 list_add(&rmrru->list, &dmar_rmrr_units); 4223 4224 return 0; 4225 free_rmrru: 4226 kfree(rmrru); 4227 out: 4228 return -ENOMEM; 4229 } 4230 4231 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 4232 { 4233 struct dmar_atsr_unit *atsru; 4234 struct acpi_dmar_atsr *tmp; 4235 4236 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list, 4237 dmar_rcu_check()) { 4238 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 4239 if (atsr->segment != tmp->segment) 4240 continue; 4241 if (atsr->header.length != tmp->header.length) 4242 continue; 4243 if (memcmp(atsr, tmp, atsr->header.length) == 0) 4244 return atsru; 4245 } 4246 4247 return NULL; 4248 } 4249 4250 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 4251 { 4252 struct acpi_dmar_atsr *atsr; 4253 struct dmar_atsr_unit *atsru; 4254 4255 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 4256 return 0; 4257 4258 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 4259 atsru = dmar_find_atsr(atsr); 4260 if (atsru) 4261 return 0; 4262 4263 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 4264 if (!atsru) 4265 return -ENOMEM; 4266 4267 /* 4268 * If memory is allocated from slab by ACPI _DSM method, we need to 4269 * copy the memory content because the memory buffer will be freed 4270 * on return. 4271 */ 4272 atsru->hdr = (void *)(atsru + 1); 4273 memcpy(atsru->hdr, hdr, hdr->length); 4274 atsru->include_all = atsr->flags & 0x1; 4275 if (!atsru->include_all) { 4276 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 4277 (void *)atsr + atsr->header.length, 4278 &atsru->devices_cnt); 4279 if (atsru->devices_cnt && atsru->devices == NULL) { 4280 kfree(atsru); 4281 return -ENOMEM; 4282 } 4283 } 4284 4285 list_add_rcu(&atsru->list, &dmar_atsr_units); 4286 4287 return 0; 4288 } 4289 4290 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 4291 { 4292 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 4293 kfree(atsru); 4294 } 4295 4296 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 4297 { 4298 struct acpi_dmar_atsr *atsr; 4299 struct dmar_atsr_unit *atsru; 4300 4301 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 4302 atsru = dmar_find_atsr(atsr); 4303 if (atsru) { 4304 list_del_rcu(&atsru->list); 4305 synchronize_rcu(); 4306 intel_iommu_free_atsr(atsru); 4307 } 4308 4309 return 0; 4310 } 4311 4312 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 4313 { 4314 int i; 4315 struct device *dev; 4316 struct acpi_dmar_atsr *atsr; 4317 struct dmar_atsr_unit *atsru; 4318 4319 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 4320 atsru = dmar_find_atsr(atsr); 4321 if (!atsru) 4322 return 0; 4323 4324 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) { 4325 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 4326 i, dev) 4327 return -EBUSY; 4328 } 4329 4330 return 0; 4331 } 4332 4333 static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 4334 { 4335 int sp, ret; 4336 struct intel_iommu *iommu = dmaru->iommu; 4337 4338 if (g_iommus[iommu->seq_id]) 4339 return 0; 4340 4341 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { 4342 pr_warn("%s: Doesn't support hardware pass through.\n", 4343 iommu->name); 4344 return -ENXIO; 4345 } 4346 if (!ecap_sc_support(iommu->ecap) && 4347 domain_update_iommu_snooping(iommu)) { 4348 pr_warn("%s: Doesn't support snooping.\n", 4349 iommu->name); 4350 return -ENXIO; 4351 } 4352 sp = domain_update_iommu_superpage(NULL, iommu) - 1; 4353 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { 4354 pr_warn("%s: Doesn't support large page.\n", 4355 iommu->name); 4356 return -ENXIO; 4357 } 4358 4359 /* 4360 * Disable translation if already enabled prior to OS handover. 4361 */ 4362 if (iommu->gcmd & DMA_GCMD_TE) 4363 iommu_disable_translation(iommu); 4364 4365 g_iommus[iommu->seq_id] = iommu; 4366 ret = iommu_init_domains(iommu); 4367 if (ret == 0) 4368 ret = iommu_alloc_root_entry(iommu); 4369 if (ret) 4370 goto out; 4371 4372 intel_svm_check(iommu); 4373 4374 if (dmaru->ignored) { 4375 /* 4376 * we always have to disable PMRs or DMA may fail on this device 4377 */ 4378 if (force_on) 4379 iommu_disable_protect_mem_regions(iommu); 4380 return 0; 4381 } 4382 4383 intel_iommu_init_qi(iommu); 4384 iommu_flush_write_buffer(iommu); 4385 4386 #ifdef CONFIG_INTEL_IOMMU_SVM 4387 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 4388 ret = intel_svm_enable_prq(iommu); 4389 if (ret) 4390 goto disable_iommu; 4391 } 4392 #endif 4393 ret = dmar_set_interrupt(iommu); 4394 if (ret) 4395 goto disable_iommu; 4396 4397 iommu_set_root_entry(iommu); 4398 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 4399 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 4400 iommu_enable_translation(iommu); 4401 4402 iommu_disable_protect_mem_regions(iommu); 4403 return 0; 4404 4405 disable_iommu: 4406 disable_dmar_iommu(iommu); 4407 out: 4408 free_dmar_iommu(iommu); 4409 return ret; 4410 } 4411 4412 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 4413 { 4414 int ret = 0; 4415 struct intel_iommu *iommu = dmaru->iommu; 4416 4417 if (!intel_iommu_enabled) 4418 return 0; 4419 if (iommu == NULL) 4420 return -EINVAL; 4421 4422 if (insert) { 4423 ret = intel_iommu_add(dmaru); 4424 } else { 4425 disable_dmar_iommu(iommu); 4426 free_dmar_iommu(iommu); 4427 } 4428 4429 return ret; 4430 } 4431 4432 static void intel_iommu_free_dmars(void) 4433 { 4434 struct dmar_rmrr_unit *rmrru, *rmrr_n; 4435 struct dmar_atsr_unit *atsru, *atsr_n; 4436 4437 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 4438 list_del(&rmrru->list); 4439 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 4440 kfree(rmrru); 4441 } 4442 4443 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 4444 list_del(&atsru->list); 4445 intel_iommu_free_atsr(atsru); 4446 } 4447 } 4448 4449 int dmar_find_matched_atsr_unit(struct pci_dev *dev) 4450 { 4451 int i, ret = 1; 4452 struct pci_bus *bus; 4453 struct pci_dev *bridge = NULL; 4454 struct device *tmp; 4455 struct acpi_dmar_atsr *atsr; 4456 struct dmar_atsr_unit *atsru; 4457 4458 dev = pci_physfn(dev); 4459 for (bus = dev->bus; bus; bus = bus->parent) { 4460 bridge = bus->self; 4461 /* If it's an integrated device, allow ATS */ 4462 if (!bridge) 4463 return 1; 4464 /* Connected via non-PCIe: no ATS */ 4465 if (!pci_is_pcie(bridge) || 4466 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 4467 return 0; 4468 /* If we found the root port, look it up in the ATSR */ 4469 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 4470 break; 4471 } 4472 4473 rcu_read_lock(); 4474 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 4475 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 4476 if (atsr->segment != pci_domain_nr(dev->bus)) 4477 continue; 4478 4479 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 4480 if (tmp == &bridge->dev) 4481 goto out; 4482 4483 if (atsru->include_all) 4484 goto out; 4485 } 4486 ret = 0; 4487 out: 4488 rcu_read_unlock(); 4489 4490 return ret; 4491 } 4492 4493 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 4494 { 4495 int ret; 4496 struct dmar_rmrr_unit *rmrru; 4497 struct dmar_atsr_unit *atsru; 4498 struct acpi_dmar_atsr *atsr; 4499 struct acpi_dmar_reserved_memory *rmrr; 4500 4501 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 4502 return 0; 4503 4504 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 4505 rmrr = container_of(rmrru->hdr, 4506 struct acpi_dmar_reserved_memory, header); 4507 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 4508 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 4509 ((void *)rmrr) + rmrr->header.length, 4510 rmrr->segment, rmrru->devices, 4511 rmrru->devices_cnt); 4512 if (ret < 0) 4513 return ret; 4514 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 4515 dmar_remove_dev_scope(info, rmrr->segment, 4516 rmrru->devices, rmrru->devices_cnt); 4517 } 4518 } 4519 4520 list_for_each_entry(atsru, &dmar_atsr_units, list) { 4521 if (atsru->include_all) 4522 continue; 4523 4524 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 4525 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 4526 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 4527 (void *)atsr + atsr->header.length, 4528 atsr->segment, atsru->devices, 4529 atsru->devices_cnt); 4530 if (ret > 0) 4531 break; 4532 else if (ret < 0) 4533 return ret; 4534 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 4535 if (dmar_remove_dev_scope(info, atsr->segment, 4536 atsru->devices, atsru->devices_cnt)) 4537 break; 4538 } 4539 } 4540 4541 return 0; 4542 } 4543 4544 static int intel_iommu_memory_notifier(struct notifier_block *nb, 4545 unsigned long val, void *v) 4546 { 4547 struct memory_notify *mhp = v; 4548 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); 4549 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + 4550 mhp->nr_pages - 1); 4551 4552 switch (val) { 4553 case MEM_GOING_ONLINE: 4554 if (iommu_domain_identity_map(si_domain, 4555 start_vpfn, last_vpfn)) { 4556 pr_warn("Failed to build identity map for [%lx-%lx]\n", 4557 start_vpfn, last_vpfn); 4558 return NOTIFY_BAD; 4559 } 4560 break; 4561 4562 case MEM_OFFLINE: 4563 case MEM_CANCEL_ONLINE: 4564 { 4565 struct dmar_drhd_unit *drhd; 4566 struct intel_iommu *iommu; 4567 struct page *freelist; 4568 4569 freelist = domain_unmap(si_domain, 4570 start_vpfn, last_vpfn); 4571 4572 rcu_read_lock(); 4573 for_each_active_iommu(iommu, drhd) 4574 iommu_flush_iotlb_psi(iommu, si_domain, 4575 start_vpfn, mhp->nr_pages, 4576 !freelist, 0); 4577 rcu_read_unlock(); 4578 dma_free_pagelist(freelist); 4579 } 4580 break; 4581 } 4582 4583 return NOTIFY_OK; 4584 } 4585 4586 static struct notifier_block intel_iommu_memory_nb = { 4587 .notifier_call = intel_iommu_memory_notifier, 4588 .priority = 0 4589 }; 4590 4591 static void free_all_cpu_cached_iovas(unsigned int cpu) 4592 { 4593 int i; 4594 4595 for (i = 0; i < g_num_of_iommus; i++) { 4596 struct intel_iommu *iommu = g_iommus[i]; 4597 struct dmar_domain *domain; 4598 int did; 4599 4600 if (!iommu) 4601 continue; 4602 4603 for (did = 0; did < cap_ndoms(iommu->cap); did++) { 4604 domain = get_iommu_domain(iommu, (u16)did); 4605 4606 if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) 4607 continue; 4608 4609 free_cpu_cached_iovas(cpu, &domain->iovad); 4610 } 4611 } 4612 } 4613 4614 static int intel_iommu_cpu_dead(unsigned int cpu) 4615 { 4616 free_all_cpu_cached_iovas(cpu); 4617 return 0; 4618 } 4619 4620 static void intel_disable_iommus(void) 4621 { 4622 struct intel_iommu *iommu = NULL; 4623 struct dmar_drhd_unit *drhd; 4624 4625 for_each_iommu(iommu, drhd) 4626 iommu_disable_translation(iommu); 4627 } 4628 4629 void intel_iommu_shutdown(void) 4630 { 4631 struct dmar_drhd_unit *drhd; 4632 struct intel_iommu *iommu = NULL; 4633 4634 if (no_iommu || dmar_disabled) 4635 return; 4636 4637 down_write(&dmar_global_lock); 4638 4639 /* Disable PMRs explicitly here. */ 4640 for_each_iommu(iommu, drhd) 4641 iommu_disable_protect_mem_regions(iommu); 4642 4643 /* Make sure the IOMMUs are switched off */ 4644 intel_disable_iommus(); 4645 4646 up_write(&dmar_global_lock); 4647 } 4648 4649 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) 4650 { 4651 struct iommu_device *iommu_dev = dev_to_iommu_device(dev); 4652 4653 return container_of(iommu_dev, struct intel_iommu, iommu); 4654 } 4655 4656 static ssize_t intel_iommu_show_version(struct device *dev, 4657 struct device_attribute *attr, 4658 char *buf) 4659 { 4660 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4661 u32 ver = readl(iommu->reg + DMAR_VER_REG); 4662 return sprintf(buf, "%d:%d\n", 4663 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 4664 } 4665 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); 4666 4667 static ssize_t intel_iommu_show_address(struct device *dev, 4668 struct device_attribute *attr, 4669 char *buf) 4670 { 4671 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4672 return sprintf(buf, "%llx\n", iommu->reg_phys); 4673 } 4674 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); 4675 4676 static ssize_t intel_iommu_show_cap(struct device *dev, 4677 struct device_attribute *attr, 4678 char *buf) 4679 { 4680 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4681 return sprintf(buf, "%llx\n", iommu->cap); 4682 } 4683 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); 4684 4685 static ssize_t intel_iommu_show_ecap(struct device *dev, 4686 struct device_attribute *attr, 4687 char *buf) 4688 { 4689 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4690 return sprintf(buf, "%llx\n", iommu->ecap); 4691 } 4692 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); 4693 4694 static ssize_t intel_iommu_show_ndoms(struct device *dev, 4695 struct device_attribute *attr, 4696 char *buf) 4697 { 4698 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4699 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); 4700 } 4701 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL); 4702 4703 static ssize_t intel_iommu_show_ndoms_used(struct device *dev, 4704 struct device_attribute *attr, 4705 char *buf) 4706 { 4707 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4708 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, 4709 cap_ndoms(iommu->cap))); 4710 } 4711 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL); 4712 4713 static struct attribute *intel_iommu_attrs[] = { 4714 &dev_attr_version.attr, 4715 &dev_attr_address.attr, 4716 &dev_attr_cap.attr, 4717 &dev_attr_ecap.attr, 4718 &dev_attr_domains_supported.attr, 4719 &dev_attr_domains_used.attr, 4720 NULL, 4721 }; 4722 4723 static struct attribute_group intel_iommu_group = { 4724 .name = "intel-iommu", 4725 .attrs = intel_iommu_attrs, 4726 }; 4727 4728 const struct attribute_group *intel_iommu_groups[] = { 4729 &intel_iommu_group, 4730 NULL, 4731 }; 4732 4733 static inline bool has_untrusted_dev(void) 4734 { 4735 struct pci_dev *pdev = NULL; 4736 4737 for_each_pci_dev(pdev) 4738 if (pdev->untrusted) 4739 return true; 4740 4741 return false; 4742 } 4743 4744 static int __init platform_optin_force_iommu(void) 4745 { 4746 if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev()) 4747 return 0; 4748 4749 if (no_iommu || dmar_disabled) 4750 pr_info("Intel-IOMMU force enabled due to platform opt in\n"); 4751 4752 /* 4753 * If Intel-IOMMU is disabled by default, we will apply identity 4754 * map for all devices except those marked as being untrusted. 4755 */ 4756 if (dmar_disabled) 4757 iommu_set_default_passthrough(false); 4758 4759 dmar_disabled = 0; 4760 no_iommu = 0; 4761 4762 return 1; 4763 } 4764 4765 static int __init probe_acpi_namespace_devices(void) 4766 { 4767 struct dmar_drhd_unit *drhd; 4768 /* To avoid a -Wunused-but-set-variable warning. */ 4769 struct intel_iommu *iommu __maybe_unused; 4770 struct device *dev; 4771 int i, ret = 0; 4772 4773 for_each_active_iommu(iommu, drhd) { 4774 for_each_active_dev_scope(drhd->devices, 4775 drhd->devices_cnt, i, dev) { 4776 struct acpi_device_physical_node *pn; 4777 struct iommu_group *group; 4778 struct acpi_device *adev; 4779 4780 if (dev->bus != &acpi_bus_type) 4781 continue; 4782 4783 adev = to_acpi_device(dev); 4784 mutex_lock(&adev->physical_node_lock); 4785 list_for_each_entry(pn, 4786 &adev->physical_node_list, node) { 4787 group = iommu_group_get(pn->dev); 4788 if (group) { 4789 iommu_group_put(group); 4790 continue; 4791 } 4792 4793 pn->dev->bus->iommu_ops = &intel_iommu_ops; 4794 ret = iommu_probe_device(pn->dev); 4795 if (ret) 4796 break; 4797 } 4798 mutex_unlock(&adev->physical_node_lock); 4799 4800 if (ret) 4801 return ret; 4802 } 4803 } 4804 4805 return 0; 4806 } 4807 4808 int __init intel_iommu_init(void) 4809 { 4810 int ret = -ENODEV; 4811 struct dmar_drhd_unit *drhd; 4812 struct intel_iommu *iommu; 4813 4814 /* 4815 * Intel IOMMU is required for a TXT/tboot launch or platform 4816 * opt in, so enforce that. 4817 */ 4818 force_on = tboot_force_iommu() || platform_optin_force_iommu(); 4819 4820 if (iommu_init_mempool()) { 4821 if (force_on) 4822 panic("tboot: Failed to initialize iommu memory\n"); 4823 return -ENOMEM; 4824 } 4825 4826 down_write(&dmar_global_lock); 4827 if (dmar_table_init()) { 4828 if (force_on) 4829 panic("tboot: Failed to initialize DMAR table\n"); 4830 goto out_free_dmar; 4831 } 4832 4833 if (dmar_dev_scope_init() < 0) { 4834 if (force_on) 4835 panic("tboot: Failed to initialize DMAR device scope\n"); 4836 goto out_free_dmar; 4837 } 4838 4839 up_write(&dmar_global_lock); 4840 4841 /* 4842 * The bus notifier takes the dmar_global_lock, so lockdep will 4843 * complain later when we register it under the lock. 4844 */ 4845 dmar_register_bus_notifier(); 4846 4847 down_write(&dmar_global_lock); 4848 4849 if (!no_iommu) 4850 intel_iommu_debugfs_init(); 4851 4852 if (no_iommu || dmar_disabled) { 4853 /* 4854 * We exit the function here to ensure IOMMU's remapping and 4855 * mempool aren't setup, which means that the IOMMU's PMRs 4856 * won't be disabled via the call to init_dmars(). So disable 4857 * it explicitly here. The PMRs were setup by tboot prior to 4858 * calling SENTER, but the kernel is expected to reset/tear 4859 * down the PMRs. 4860 */ 4861 if (intel_iommu_tboot_noforce) { 4862 for_each_iommu(iommu, drhd) 4863 iommu_disable_protect_mem_regions(iommu); 4864 } 4865 4866 /* 4867 * Make sure the IOMMUs are switched off, even when we 4868 * boot into a kexec kernel and the previous kernel left 4869 * them enabled 4870 */ 4871 intel_disable_iommus(); 4872 goto out_free_dmar; 4873 } 4874 4875 if (list_empty(&dmar_rmrr_units)) 4876 pr_info("No RMRR found\n"); 4877 4878 if (list_empty(&dmar_atsr_units)) 4879 pr_info("No ATSR found\n"); 4880 4881 if (dmar_init_reserved_ranges()) { 4882 if (force_on) 4883 panic("tboot: Failed to reserve iommu ranges\n"); 4884 goto out_free_reserved_range; 4885 } 4886 4887 if (dmar_map_gfx) 4888 intel_iommu_gfx_mapped = 1; 4889 4890 init_no_remapping_devices(); 4891 4892 ret = init_dmars(); 4893 if (ret) { 4894 if (force_on) 4895 panic("tboot: Failed to initialize DMARs\n"); 4896 pr_err("Initialization failed\n"); 4897 goto out_free_reserved_range; 4898 } 4899 up_write(&dmar_global_lock); 4900 4901 init_iommu_pm_ops(); 4902 4903 down_read(&dmar_global_lock); 4904 for_each_active_iommu(iommu, drhd) { 4905 iommu_device_sysfs_add(&iommu->iommu, NULL, 4906 intel_iommu_groups, 4907 "%s", iommu->name); 4908 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); 4909 iommu_device_register(&iommu->iommu); 4910 } 4911 up_read(&dmar_global_lock); 4912 4913 bus_set_iommu(&pci_bus_type, &intel_iommu_ops); 4914 if (si_domain && !hw_pass_through) 4915 register_memory_notifier(&intel_iommu_memory_nb); 4916 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, 4917 intel_iommu_cpu_dead); 4918 4919 down_read(&dmar_global_lock); 4920 if (probe_acpi_namespace_devices()) 4921 pr_warn("ACPI name space devices didn't probe correctly\n"); 4922 4923 /* Finally, we enable the DMA remapping hardware. */ 4924 for_each_iommu(iommu, drhd) { 4925 if (!drhd->ignored && !translation_pre_enabled(iommu)) 4926 iommu_enable_translation(iommu); 4927 4928 iommu_disable_protect_mem_regions(iommu); 4929 } 4930 up_read(&dmar_global_lock); 4931 4932 pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 4933 4934 intel_iommu_enabled = 1; 4935 4936 return 0; 4937 4938 out_free_reserved_range: 4939 put_iova_domain(&reserved_iova_list); 4940 out_free_dmar: 4941 intel_iommu_free_dmars(); 4942 up_write(&dmar_global_lock); 4943 iommu_exit_mempool(); 4944 return ret; 4945 } 4946 4947 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) 4948 { 4949 struct intel_iommu *iommu = opaque; 4950 4951 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff); 4952 return 0; 4953 } 4954 4955 /* 4956 * NB - intel-iommu lacks any sort of reference counting for the users of 4957 * dependent devices. If multiple endpoints have intersecting dependent 4958 * devices, unbinding the driver from any one of them will possibly leave 4959 * the others unable to operate. 4960 */ 4961 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) 4962 { 4963 if (!iommu || !dev || !dev_is_pci(dev)) 4964 return; 4965 4966 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu); 4967 } 4968 4969 static void __dmar_remove_one_dev_info(struct device_domain_info *info) 4970 { 4971 struct dmar_domain *domain; 4972 struct intel_iommu *iommu; 4973 unsigned long flags; 4974 4975 assert_spin_locked(&device_domain_lock); 4976 4977 if (WARN_ON(!info)) 4978 return; 4979 4980 iommu = info->iommu; 4981 domain = info->domain; 4982 4983 if (info->dev) { 4984 if (dev_is_pci(info->dev) && sm_supported(iommu)) 4985 intel_pasid_tear_down_entry(iommu, info->dev, 4986 PASID_RID2PASID, false); 4987 4988 iommu_disable_dev_iotlb(info); 4989 if (!dev_is_real_dma_subdevice(info->dev)) 4990 domain_context_clear(iommu, info->dev); 4991 intel_pasid_free_table(info->dev); 4992 } 4993 4994 unlink_domain_info(info); 4995 4996 spin_lock_irqsave(&iommu->lock, flags); 4997 domain_detach_iommu(domain, iommu); 4998 spin_unlock_irqrestore(&iommu->lock, flags); 4999 5000 free_devinfo_mem(info); 5001 } 5002 5003 static void dmar_remove_one_dev_info(struct device *dev) 5004 { 5005 struct device_domain_info *info; 5006 unsigned long flags; 5007 5008 spin_lock_irqsave(&device_domain_lock, flags); 5009 info = get_domain_info(dev); 5010 if (info) 5011 __dmar_remove_one_dev_info(info); 5012 spin_unlock_irqrestore(&device_domain_lock, flags); 5013 } 5014 5015 static int md_domain_init(struct dmar_domain *domain, int guest_width) 5016 { 5017 int adjust_width; 5018 5019 /* calculate AGAW */ 5020 domain->gaw = guest_width; 5021 adjust_width = guestwidth_to_adjustwidth(guest_width); 5022 domain->agaw = width_to_agaw(adjust_width); 5023 5024 domain->iommu_coherency = 0; 5025 domain->iommu_snooping = 0; 5026 domain->iommu_superpage = 0; 5027 domain->max_addr = 0; 5028 5029 /* always allocate the top pgd */ 5030 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 5031 if (!domain->pgd) 5032 return -ENOMEM; 5033 domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 5034 return 0; 5035 } 5036 5037 static void intel_init_iova_domain(struct dmar_domain *dmar_domain) 5038 { 5039 init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); 5040 copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad); 5041 5042 if (!intel_iommu_strict && 5043 init_iova_flush_queue(&dmar_domain->iovad, 5044 iommu_flush_iova, iova_entry_free)) 5045 pr_info("iova flush queue initialization failed\n"); 5046 } 5047 5048 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) 5049 { 5050 struct dmar_domain *dmar_domain; 5051 struct iommu_domain *domain; 5052 5053 switch (type) { 5054 case IOMMU_DOMAIN_DMA: 5055 /* fallthrough */ 5056 case IOMMU_DOMAIN_UNMANAGED: 5057 dmar_domain = alloc_domain(0); 5058 if (!dmar_domain) { 5059 pr_err("Can't allocate dmar_domain\n"); 5060 return NULL; 5061 } 5062 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 5063 pr_err("Domain initialization failed\n"); 5064 domain_exit(dmar_domain); 5065 return NULL; 5066 } 5067 5068 if (type == IOMMU_DOMAIN_DMA) 5069 intel_init_iova_domain(dmar_domain); 5070 5071 domain_update_iommu_cap(dmar_domain); 5072 5073 domain = &dmar_domain->domain; 5074 domain->geometry.aperture_start = 0; 5075 domain->geometry.aperture_end = 5076 __DOMAIN_MAX_ADDR(dmar_domain->gaw); 5077 domain->geometry.force_aperture = true; 5078 5079 return domain; 5080 case IOMMU_DOMAIN_IDENTITY: 5081 return &si_domain->domain; 5082 default: 5083 return NULL; 5084 } 5085 5086 return NULL; 5087 } 5088 5089 static void intel_iommu_domain_free(struct iommu_domain *domain) 5090 { 5091 if (domain != &si_domain->domain) 5092 domain_exit(to_dmar_domain(domain)); 5093 } 5094 5095 /* 5096 * Check whether a @domain could be attached to the @dev through the 5097 * aux-domain attach/detach APIs. 5098 */ 5099 static inline bool 5100 is_aux_domain(struct device *dev, struct iommu_domain *domain) 5101 { 5102 struct device_domain_info *info = get_domain_info(dev); 5103 5104 return info && info->auxd_enabled && 5105 domain->type == IOMMU_DOMAIN_UNMANAGED; 5106 } 5107 5108 static void auxiliary_link_device(struct dmar_domain *domain, 5109 struct device *dev) 5110 { 5111 struct device_domain_info *info = get_domain_info(dev); 5112 5113 assert_spin_locked(&device_domain_lock); 5114 if (WARN_ON(!info)) 5115 return; 5116 5117 domain->auxd_refcnt++; 5118 list_add(&domain->auxd, &info->auxiliary_domains); 5119 } 5120 5121 static void auxiliary_unlink_device(struct dmar_domain *domain, 5122 struct device *dev) 5123 { 5124 struct device_domain_info *info = get_domain_info(dev); 5125 5126 assert_spin_locked(&device_domain_lock); 5127 if (WARN_ON(!info)) 5128 return; 5129 5130 list_del(&domain->auxd); 5131 domain->auxd_refcnt--; 5132 5133 if (!domain->auxd_refcnt && domain->default_pasid > 0) 5134 ioasid_free(domain->default_pasid); 5135 } 5136 5137 static int aux_domain_add_dev(struct dmar_domain *domain, 5138 struct device *dev) 5139 { 5140 int ret; 5141 u8 bus, devfn; 5142 unsigned long flags; 5143 struct intel_iommu *iommu; 5144 5145 iommu = device_to_iommu(dev, &bus, &devfn); 5146 if (!iommu) 5147 return -ENODEV; 5148 5149 if (domain->default_pasid <= 0) { 5150 int pasid; 5151 5152 /* No private data needed for the default pasid */ 5153 pasid = ioasid_alloc(NULL, PASID_MIN, 5154 pci_max_pasids(to_pci_dev(dev)) - 1, 5155 NULL); 5156 if (pasid == INVALID_IOASID) { 5157 pr_err("Can't allocate default pasid\n"); 5158 return -ENODEV; 5159 } 5160 domain->default_pasid = pasid; 5161 } 5162 5163 spin_lock_irqsave(&device_domain_lock, flags); 5164 /* 5165 * iommu->lock must be held to attach domain to iommu and setup the 5166 * pasid entry for second level translation. 5167 */ 5168 spin_lock(&iommu->lock); 5169 ret = domain_attach_iommu(domain, iommu); 5170 if (ret) 5171 goto attach_failed; 5172 5173 /* Setup the PASID entry for mediated devices: */ 5174 if (domain_use_first_level(domain)) 5175 ret = domain_setup_first_level(iommu, domain, dev, 5176 domain->default_pasid); 5177 else 5178 ret = intel_pasid_setup_second_level(iommu, domain, dev, 5179 domain->default_pasid); 5180 if (ret) 5181 goto table_failed; 5182 spin_unlock(&iommu->lock); 5183 5184 auxiliary_link_device(domain, dev); 5185 5186 spin_unlock_irqrestore(&device_domain_lock, flags); 5187 5188 return 0; 5189 5190 table_failed: 5191 domain_detach_iommu(domain, iommu); 5192 attach_failed: 5193 spin_unlock(&iommu->lock); 5194 spin_unlock_irqrestore(&device_domain_lock, flags); 5195 if (!domain->auxd_refcnt && domain->default_pasid > 0) 5196 ioasid_free(domain->default_pasid); 5197 5198 return ret; 5199 } 5200 5201 static void aux_domain_remove_dev(struct dmar_domain *domain, 5202 struct device *dev) 5203 { 5204 struct device_domain_info *info; 5205 struct intel_iommu *iommu; 5206 unsigned long flags; 5207 5208 if (!is_aux_domain(dev, &domain->domain)) 5209 return; 5210 5211 spin_lock_irqsave(&device_domain_lock, flags); 5212 info = get_domain_info(dev); 5213 iommu = info->iommu; 5214 5215 auxiliary_unlink_device(domain, dev); 5216 5217 spin_lock(&iommu->lock); 5218 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); 5219 domain_detach_iommu(domain, iommu); 5220 spin_unlock(&iommu->lock); 5221 5222 spin_unlock_irqrestore(&device_domain_lock, flags); 5223 } 5224 5225 static int prepare_domain_attach_device(struct iommu_domain *domain, 5226 struct device *dev) 5227 { 5228 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5229 struct intel_iommu *iommu; 5230 int addr_width; 5231 u8 bus, devfn; 5232 5233 iommu = device_to_iommu(dev, &bus, &devfn); 5234 if (!iommu) 5235 return -ENODEV; 5236 5237 /* check if this iommu agaw is sufficient for max mapped address */ 5238 addr_width = agaw_to_width(iommu->agaw); 5239 if (addr_width > cap_mgaw(iommu->cap)) 5240 addr_width = cap_mgaw(iommu->cap); 5241 5242 if (dmar_domain->max_addr > (1LL << addr_width)) { 5243 dev_err(dev, "%s: iommu width (%d) is not " 5244 "sufficient for the mapped address (%llx)\n", 5245 __func__, addr_width, dmar_domain->max_addr); 5246 return -EFAULT; 5247 } 5248 dmar_domain->gaw = addr_width; 5249 5250 /* 5251 * Knock out extra levels of page tables if necessary 5252 */ 5253 while (iommu->agaw < dmar_domain->agaw) { 5254 struct dma_pte *pte; 5255 5256 pte = dmar_domain->pgd; 5257 if (dma_pte_present(pte)) { 5258 dmar_domain->pgd = (struct dma_pte *) 5259 phys_to_virt(dma_pte_addr(pte)); 5260 free_pgtable_page(pte); 5261 } 5262 dmar_domain->agaw--; 5263 } 5264 5265 return 0; 5266 } 5267 5268 static int intel_iommu_attach_device(struct iommu_domain *domain, 5269 struct device *dev) 5270 { 5271 int ret; 5272 5273 if (domain->type == IOMMU_DOMAIN_UNMANAGED && 5274 device_is_rmrr_locked(dev)) { 5275 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); 5276 return -EPERM; 5277 } 5278 5279 if (is_aux_domain(dev, domain)) 5280 return -EPERM; 5281 5282 /* normally dev is not mapped */ 5283 if (unlikely(domain_context_mapped(dev))) { 5284 struct dmar_domain *old_domain; 5285 5286 old_domain = find_domain(dev); 5287 if (old_domain) 5288 dmar_remove_one_dev_info(dev); 5289 } 5290 5291 ret = prepare_domain_attach_device(domain, dev); 5292 if (ret) 5293 return ret; 5294 5295 return domain_add_dev_info(to_dmar_domain(domain), dev); 5296 } 5297 5298 static int intel_iommu_aux_attach_device(struct iommu_domain *domain, 5299 struct device *dev) 5300 { 5301 int ret; 5302 5303 if (!is_aux_domain(dev, domain)) 5304 return -EPERM; 5305 5306 ret = prepare_domain_attach_device(domain, dev); 5307 if (ret) 5308 return ret; 5309 5310 return aux_domain_add_dev(to_dmar_domain(domain), dev); 5311 } 5312 5313 static void intel_iommu_detach_device(struct iommu_domain *domain, 5314 struct device *dev) 5315 { 5316 dmar_remove_one_dev_info(dev); 5317 } 5318 5319 static void intel_iommu_aux_detach_device(struct iommu_domain *domain, 5320 struct device *dev) 5321 { 5322 aux_domain_remove_dev(to_dmar_domain(domain), dev); 5323 } 5324 5325 /* 5326 * 2D array for converting and sanitizing IOMMU generic TLB granularity to 5327 * VT-d granularity. Invalidation is typically included in the unmap operation 5328 * as a result of DMA or VFIO unmap. However, for assigned devices guest 5329 * owns the first level page tables. Invalidations of translation caches in the 5330 * guest are trapped and passed down to the host. 5331 * 5332 * vIOMMU in the guest will only expose first level page tables, therefore 5333 * we do not support IOTLB granularity for request without PASID (second level). 5334 * 5335 * For example, to find the VT-d granularity encoding for IOTLB 5336 * type and page selective granularity within PASID: 5337 * X: indexed by iommu cache type 5338 * Y: indexed by enum iommu_inv_granularity 5339 * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] 5340 */ 5341 5342 static const int 5343 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { 5344 /* 5345 * PASID based IOTLB invalidation: PASID selective (per PASID), 5346 * page selective (address granularity) 5347 */ 5348 {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, 5349 /* PASID based dev TLBs */ 5350 {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, 5351 /* PASID cache */ 5352 {-EINVAL, -EINVAL, -EINVAL} 5353 }; 5354 5355 static inline int to_vtd_granularity(int type, int granu) 5356 { 5357 return inv_type_granu_table[type][granu]; 5358 } 5359 5360 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) 5361 { 5362 u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; 5363 5364 /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. 5365 * IOMMU cache invalidate API passes granu_size in bytes, and number of 5366 * granu size in contiguous memory. 5367 */ 5368 return order_base_2(nr_pages); 5369 } 5370 5371 #ifdef CONFIG_INTEL_IOMMU_SVM 5372 static int 5373 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, 5374 struct iommu_cache_invalidate_info *inv_info) 5375 { 5376 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5377 struct device_domain_info *info; 5378 struct intel_iommu *iommu; 5379 unsigned long flags; 5380 int cache_type; 5381 u8 bus, devfn; 5382 u16 did, sid; 5383 int ret = 0; 5384 u64 size = 0; 5385 5386 if (!inv_info || !dmar_domain || 5387 inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) 5388 return -EINVAL; 5389 5390 if (!dev || !dev_is_pci(dev)) 5391 return -ENODEV; 5392 5393 iommu = device_to_iommu(dev, &bus, &devfn); 5394 if (!iommu) 5395 return -ENODEV; 5396 5397 if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) 5398 return -EINVAL; 5399 5400 spin_lock_irqsave(&device_domain_lock, flags); 5401 spin_lock(&iommu->lock); 5402 info = get_domain_info(dev); 5403 if (!info) { 5404 ret = -EINVAL; 5405 goto out_unlock; 5406 } 5407 did = dmar_domain->iommu_did[iommu->seq_id]; 5408 sid = PCI_DEVID(bus, devfn); 5409 5410 /* Size is only valid in address selective invalidation */ 5411 if (inv_info->granularity != IOMMU_INV_GRANU_PASID) 5412 size = to_vtd_size(inv_info->addr_info.granule_size, 5413 inv_info->addr_info.nb_granules); 5414 5415 for_each_set_bit(cache_type, 5416 (unsigned long *)&inv_info->cache, 5417 IOMMU_CACHE_INV_TYPE_NR) { 5418 int granu = 0; 5419 u64 pasid = 0; 5420 5421 granu = to_vtd_granularity(cache_type, inv_info->granularity); 5422 if (granu == -EINVAL) { 5423 pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", 5424 cache_type, inv_info->granularity); 5425 break; 5426 } 5427 5428 /* 5429 * PASID is stored in different locations based on the 5430 * granularity. 5431 */ 5432 if (inv_info->granularity == IOMMU_INV_GRANU_PASID && 5433 (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) 5434 pasid = inv_info->pasid_info.pasid; 5435 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && 5436 (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) 5437 pasid = inv_info->addr_info.pasid; 5438 5439 switch (BIT(cache_type)) { 5440 case IOMMU_CACHE_INV_TYPE_IOTLB: 5441 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && 5442 size && 5443 (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { 5444 pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", 5445 inv_info->addr_info.addr, size); 5446 ret = -ERANGE; 5447 goto out_unlock; 5448 } 5449 5450 /* 5451 * If granu is PASID-selective, address is ignored. 5452 * We use npages = -1 to indicate that. 5453 */ 5454 qi_flush_piotlb(iommu, did, pasid, 5455 mm_to_dma_pfn(inv_info->addr_info.addr), 5456 (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, 5457 inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); 5458 5459 /* 5460 * Always flush device IOTLB if ATS is enabled. vIOMMU 5461 * in the guest may assume IOTLB flush is inclusive, 5462 * which is more efficient. 5463 */ 5464 if (info->ats_enabled) 5465 qi_flush_dev_iotlb_pasid(iommu, sid, 5466 info->pfsid, pasid, 5467 info->ats_qdep, 5468 inv_info->addr_info.addr, 5469 size, granu); 5470 break; 5471 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: 5472 if (info->ats_enabled) 5473 qi_flush_dev_iotlb_pasid(iommu, sid, 5474 info->pfsid, pasid, 5475 info->ats_qdep, 5476 inv_info->addr_info.addr, 5477 size, granu); 5478 else 5479 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); 5480 break; 5481 default: 5482 dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", 5483 cache_type); 5484 ret = -EINVAL; 5485 } 5486 } 5487 out_unlock: 5488 spin_unlock(&iommu->lock); 5489 spin_unlock_irqrestore(&device_domain_lock, flags); 5490 5491 return ret; 5492 } 5493 #endif 5494 5495 static int intel_iommu_map(struct iommu_domain *domain, 5496 unsigned long iova, phys_addr_t hpa, 5497 size_t size, int iommu_prot, gfp_t gfp) 5498 { 5499 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5500 u64 max_addr; 5501 int prot = 0; 5502 int ret; 5503 5504 if (iommu_prot & IOMMU_READ) 5505 prot |= DMA_PTE_READ; 5506 if (iommu_prot & IOMMU_WRITE) 5507 prot |= DMA_PTE_WRITE; 5508 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 5509 prot |= DMA_PTE_SNP; 5510 5511 max_addr = iova + size; 5512 if (dmar_domain->max_addr < max_addr) { 5513 u64 end; 5514 5515 /* check if minimum agaw is sufficient for mapped address */ 5516 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; 5517 if (end < max_addr) { 5518 pr_err("%s: iommu width (%d) is not " 5519 "sufficient for the mapped address (%llx)\n", 5520 __func__, dmar_domain->gaw, max_addr); 5521 return -EFAULT; 5522 } 5523 dmar_domain->max_addr = max_addr; 5524 } 5525 /* Round up size to next multiple of PAGE_SIZE, if it and 5526 the low bits of hpa would take us onto the next page */ 5527 size = aligned_nrpages(hpa, size); 5528 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 5529 hpa >> VTD_PAGE_SHIFT, size, prot); 5530 return ret; 5531 } 5532 5533 static size_t intel_iommu_unmap(struct iommu_domain *domain, 5534 unsigned long iova, size_t size, 5535 struct iommu_iotlb_gather *gather) 5536 { 5537 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5538 struct page *freelist = NULL; 5539 unsigned long start_pfn, last_pfn; 5540 unsigned int npages; 5541 int iommu_id, level = 0; 5542 5543 /* Cope with horrid API which requires us to unmap more than the 5544 size argument if it happens to be a large-page mapping. */ 5545 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); 5546 5547 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) 5548 size = VTD_PAGE_SIZE << level_to_offset_bits(level); 5549 5550 start_pfn = iova >> VTD_PAGE_SHIFT; 5551 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; 5552 5553 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); 5554 5555 npages = last_pfn - start_pfn + 1; 5556 5557 for_each_domain_iommu(iommu_id, dmar_domain) 5558 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain, 5559 start_pfn, npages, !freelist, 0); 5560 5561 dma_free_pagelist(freelist); 5562 5563 if (dmar_domain->max_addr == iova + size) 5564 dmar_domain->max_addr = iova; 5565 5566 return size; 5567 } 5568 5569 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 5570 dma_addr_t iova) 5571 { 5572 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5573 struct dma_pte *pte; 5574 int level = 0; 5575 u64 phys = 0; 5576 5577 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); 5578 if (pte && dma_pte_present(pte)) 5579 phys = dma_pte_addr(pte) + 5580 (iova & (BIT_MASK(level_to_offset_bits(level) + 5581 VTD_PAGE_SHIFT) - 1)); 5582 5583 return phys; 5584 } 5585 5586 static inline bool scalable_mode_support(void) 5587 { 5588 struct dmar_drhd_unit *drhd; 5589 struct intel_iommu *iommu; 5590 bool ret = true; 5591 5592 rcu_read_lock(); 5593 for_each_active_iommu(iommu, drhd) { 5594 if (!sm_supported(iommu)) { 5595 ret = false; 5596 break; 5597 } 5598 } 5599 rcu_read_unlock(); 5600 5601 return ret; 5602 } 5603 5604 static inline bool iommu_pasid_support(void) 5605 { 5606 struct dmar_drhd_unit *drhd; 5607 struct intel_iommu *iommu; 5608 bool ret = true; 5609 5610 rcu_read_lock(); 5611 for_each_active_iommu(iommu, drhd) { 5612 if (!pasid_supported(iommu)) { 5613 ret = false; 5614 break; 5615 } 5616 } 5617 rcu_read_unlock(); 5618 5619 return ret; 5620 } 5621 5622 static inline bool nested_mode_support(void) 5623 { 5624 struct dmar_drhd_unit *drhd; 5625 struct intel_iommu *iommu; 5626 bool ret = true; 5627 5628 rcu_read_lock(); 5629 for_each_active_iommu(iommu, drhd) { 5630 if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) { 5631 ret = false; 5632 break; 5633 } 5634 } 5635 rcu_read_unlock(); 5636 5637 return ret; 5638 } 5639 5640 static bool intel_iommu_capable(enum iommu_cap cap) 5641 { 5642 if (cap == IOMMU_CAP_CACHE_COHERENCY) 5643 return domain_update_iommu_snooping(NULL) == 1; 5644 if (cap == IOMMU_CAP_INTR_REMAP) 5645 return irq_remapping_enabled == 1; 5646 5647 return false; 5648 } 5649 5650 static struct iommu_device *intel_iommu_probe_device(struct device *dev) 5651 { 5652 struct intel_iommu *iommu; 5653 u8 bus, devfn; 5654 5655 iommu = device_to_iommu(dev, &bus, &devfn); 5656 if (!iommu) 5657 return ERR_PTR(-ENODEV); 5658 5659 if (translation_pre_enabled(iommu)) 5660 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; 5661 5662 return &iommu->iommu; 5663 } 5664 5665 static void intel_iommu_release_device(struct device *dev) 5666 { 5667 struct intel_iommu *iommu; 5668 u8 bus, devfn; 5669 5670 iommu = device_to_iommu(dev, &bus, &devfn); 5671 if (!iommu) 5672 return; 5673 5674 dmar_remove_one_dev_info(dev); 5675 5676 set_dma_ops(dev, NULL); 5677 } 5678 5679 static void intel_iommu_probe_finalize(struct device *dev) 5680 { 5681 struct iommu_domain *domain; 5682 5683 domain = iommu_get_domain_for_dev(dev); 5684 if (device_needs_bounce(dev)) 5685 set_dma_ops(dev, &bounce_dma_ops); 5686 else if (domain && domain->type == IOMMU_DOMAIN_DMA) 5687 set_dma_ops(dev, &intel_dma_ops); 5688 else 5689 set_dma_ops(dev, NULL); 5690 } 5691 5692 static void intel_iommu_get_resv_regions(struct device *device, 5693 struct list_head *head) 5694 { 5695 int prot = DMA_PTE_READ | DMA_PTE_WRITE; 5696 struct iommu_resv_region *reg; 5697 struct dmar_rmrr_unit *rmrr; 5698 struct device *i_dev; 5699 int i; 5700 5701 down_read(&dmar_global_lock); 5702 for_each_rmrr_units(rmrr) { 5703 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 5704 i, i_dev) { 5705 struct iommu_resv_region *resv; 5706 enum iommu_resv_type type; 5707 size_t length; 5708 5709 if (i_dev != device && 5710 !is_downstream_to_pci_bridge(device, i_dev)) 5711 continue; 5712 5713 length = rmrr->end_address - rmrr->base_address + 1; 5714 5715 type = device_rmrr_is_relaxable(device) ? 5716 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; 5717 5718 resv = iommu_alloc_resv_region(rmrr->base_address, 5719 length, prot, type); 5720 if (!resv) 5721 break; 5722 5723 list_add_tail(&resv->list, head); 5724 } 5725 } 5726 up_read(&dmar_global_lock); 5727 5728 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 5729 if (dev_is_pci(device)) { 5730 struct pci_dev *pdev = to_pci_dev(device); 5731 5732 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { 5733 reg = iommu_alloc_resv_region(0, 1UL << 24, prot, 5734 IOMMU_RESV_DIRECT_RELAXABLE); 5735 if (reg) 5736 list_add_tail(®->list, head); 5737 } 5738 } 5739 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ 5740 5741 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, 5742 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 5743 0, IOMMU_RESV_MSI); 5744 if (!reg) 5745 return; 5746 list_add_tail(®->list, head); 5747 } 5748 5749 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) 5750 { 5751 struct device_domain_info *info; 5752 struct context_entry *context; 5753 struct dmar_domain *domain; 5754 unsigned long flags; 5755 u64 ctx_lo; 5756 int ret; 5757 5758 domain = find_domain(dev); 5759 if (!domain) 5760 return -EINVAL; 5761 5762 spin_lock_irqsave(&device_domain_lock, flags); 5763 spin_lock(&iommu->lock); 5764 5765 ret = -EINVAL; 5766 info = get_domain_info(dev); 5767 if (!info || !info->pasid_supported) 5768 goto out; 5769 5770 context = iommu_context_addr(iommu, info->bus, info->devfn, 0); 5771 if (WARN_ON(!context)) 5772 goto out; 5773 5774 ctx_lo = context[0].lo; 5775 5776 if (!(ctx_lo & CONTEXT_PASIDE)) { 5777 ctx_lo |= CONTEXT_PASIDE; 5778 context[0].lo = ctx_lo; 5779 wmb(); 5780 iommu->flush.flush_context(iommu, 5781 domain->iommu_did[iommu->seq_id], 5782 PCI_DEVID(info->bus, info->devfn), 5783 DMA_CCMD_MASK_NOBIT, 5784 DMA_CCMD_DEVICE_INVL); 5785 } 5786 5787 /* Enable PASID support in the device, if it wasn't already */ 5788 if (!info->pasid_enabled) 5789 iommu_enable_dev_iotlb(info); 5790 5791 ret = 0; 5792 5793 out: 5794 spin_unlock(&iommu->lock); 5795 spin_unlock_irqrestore(&device_domain_lock, flags); 5796 5797 return ret; 5798 } 5799 5800 static void intel_iommu_apply_resv_region(struct device *dev, 5801 struct iommu_domain *domain, 5802 struct iommu_resv_region *region) 5803 { 5804 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5805 unsigned long start, end; 5806 5807 start = IOVA_PFN(region->start); 5808 end = IOVA_PFN(region->start + region->length - 1); 5809 5810 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end)); 5811 } 5812 5813 static struct iommu_group *intel_iommu_device_group(struct device *dev) 5814 { 5815 if (dev_is_pci(dev)) 5816 return pci_device_group(dev); 5817 return generic_device_group(dev); 5818 } 5819 5820 #ifdef CONFIG_INTEL_IOMMU_SVM 5821 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) 5822 { 5823 struct intel_iommu *iommu; 5824 u8 bus, devfn; 5825 5826 if (iommu_dummy(dev)) { 5827 dev_warn(dev, 5828 "No IOMMU translation for device; cannot enable SVM\n"); 5829 return NULL; 5830 } 5831 5832 iommu = device_to_iommu(dev, &bus, &devfn); 5833 if ((!iommu)) { 5834 dev_err(dev, "No IOMMU for device; cannot enable SVM\n"); 5835 return NULL; 5836 } 5837 5838 return iommu; 5839 } 5840 #endif /* CONFIG_INTEL_IOMMU_SVM */ 5841 5842 static int intel_iommu_enable_auxd(struct device *dev) 5843 { 5844 struct device_domain_info *info; 5845 struct intel_iommu *iommu; 5846 unsigned long flags; 5847 u8 bus, devfn; 5848 int ret; 5849 5850 iommu = device_to_iommu(dev, &bus, &devfn); 5851 if (!iommu || dmar_disabled) 5852 return -EINVAL; 5853 5854 if (!sm_supported(iommu) || !pasid_supported(iommu)) 5855 return -EINVAL; 5856 5857 ret = intel_iommu_enable_pasid(iommu, dev); 5858 if (ret) 5859 return -ENODEV; 5860 5861 spin_lock_irqsave(&device_domain_lock, flags); 5862 info = get_domain_info(dev); 5863 info->auxd_enabled = 1; 5864 spin_unlock_irqrestore(&device_domain_lock, flags); 5865 5866 return 0; 5867 } 5868 5869 static int intel_iommu_disable_auxd(struct device *dev) 5870 { 5871 struct device_domain_info *info; 5872 unsigned long flags; 5873 5874 spin_lock_irqsave(&device_domain_lock, flags); 5875 info = get_domain_info(dev); 5876 if (!WARN_ON(!info)) 5877 info->auxd_enabled = 0; 5878 spin_unlock_irqrestore(&device_domain_lock, flags); 5879 5880 return 0; 5881 } 5882 5883 /* 5884 * A PCI express designated vendor specific extended capability is defined 5885 * in the section 3.7 of Intel scalable I/O virtualization technical spec 5886 * for system software and tools to detect endpoint devices supporting the 5887 * Intel scalable IO virtualization without host driver dependency. 5888 * 5889 * Returns the address of the matching extended capability structure within 5890 * the device's PCI configuration space or 0 if the device does not support 5891 * it. 5892 */ 5893 static int siov_find_pci_dvsec(struct pci_dev *pdev) 5894 { 5895 int pos; 5896 u16 vendor, id; 5897 5898 pos = pci_find_next_ext_capability(pdev, 0, 0x23); 5899 while (pos) { 5900 pci_read_config_word(pdev, pos + 4, &vendor); 5901 pci_read_config_word(pdev, pos + 8, &id); 5902 if (vendor == PCI_VENDOR_ID_INTEL && id == 5) 5903 return pos; 5904 5905 pos = pci_find_next_ext_capability(pdev, pos, 0x23); 5906 } 5907 5908 return 0; 5909 } 5910 5911 static bool 5912 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) 5913 { 5914 if (feat == IOMMU_DEV_FEAT_AUX) { 5915 int ret; 5916 5917 if (!dev_is_pci(dev) || dmar_disabled || 5918 !scalable_mode_support() || !iommu_pasid_support()) 5919 return false; 5920 5921 ret = pci_pasid_features(to_pci_dev(dev)); 5922 if (ret < 0) 5923 return false; 5924 5925 return !!siov_find_pci_dvsec(to_pci_dev(dev)); 5926 } 5927 5928 if (feat == IOMMU_DEV_FEAT_SVA) { 5929 struct device_domain_info *info = get_domain_info(dev); 5930 5931 return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && 5932 info->pasid_supported && info->pri_supported && 5933 info->ats_supported; 5934 } 5935 5936 return false; 5937 } 5938 5939 static int 5940 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) 5941 { 5942 if (feat == IOMMU_DEV_FEAT_AUX) 5943 return intel_iommu_enable_auxd(dev); 5944 5945 if (feat == IOMMU_DEV_FEAT_SVA) { 5946 struct device_domain_info *info = get_domain_info(dev); 5947 5948 if (!info) 5949 return -EINVAL; 5950 5951 if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) 5952 return 0; 5953 } 5954 5955 return -ENODEV; 5956 } 5957 5958 static int 5959 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) 5960 { 5961 if (feat == IOMMU_DEV_FEAT_AUX) 5962 return intel_iommu_disable_auxd(dev); 5963 5964 return -ENODEV; 5965 } 5966 5967 static bool 5968 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) 5969 { 5970 struct device_domain_info *info = get_domain_info(dev); 5971 5972 if (feat == IOMMU_DEV_FEAT_AUX) 5973 return scalable_mode_support() && info && info->auxd_enabled; 5974 5975 return false; 5976 } 5977 5978 static int 5979 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) 5980 { 5981 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5982 5983 return dmar_domain->default_pasid > 0 ? 5984 dmar_domain->default_pasid : -EINVAL; 5985 } 5986 5987 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, 5988 struct device *dev) 5989 { 5990 return attach_deferred(dev); 5991 } 5992 5993 static int 5994 intel_iommu_domain_set_attr(struct iommu_domain *domain, 5995 enum iommu_attr attr, void *data) 5996 { 5997 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5998 unsigned long flags; 5999 int ret = 0; 6000 6001 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 6002 return -EINVAL; 6003 6004 switch (attr) { 6005 case DOMAIN_ATTR_NESTING: 6006 spin_lock_irqsave(&device_domain_lock, flags); 6007 if (nested_mode_support() && 6008 list_empty(&dmar_domain->devices)) { 6009 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; 6010 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; 6011 } else { 6012 ret = -ENODEV; 6013 } 6014 spin_unlock_irqrestore(&device_domain_lock, flags); 6015 break; 6016 default: 6017 ret = -EINVAL; 6018 break; 6019 } 6020 6021 return ret; 6022 } 6023 6024 const struct iommu_ops intel_iommu_ops = { 6025 .capable = intel_iommu_capable, 6026 .domain_alloc = intel_iommu_domain_alloc, 6027 .domain_free = intel_iommu_domain_free, 6028 .domain_set_attr = intel_iommu_domain_set_attr, 6029 .attach_dev = intel_iommu_attach_device, 6030 .detach_dev = intel_iommu_detach_device, 6031 .aux_attach_dev = intel_iommu_aux_attach_device, 6032 .aux_detach_dev = intel_iommu_aux_detach_device, 6033 .aux_get_pasid = intel_iommu_aux_get_pasid, 6034 .map = intel_iommu_map, 6035 .unmap = intel_iommu_unmap, 6036 .iova_to_phys = intel_iommu_iova_to_phys, 6037 .probe_device = intel_iommu_probe_device, 6038 .probe_finalize = intel_iommu_probe_finalize, 6039 .release_device = intel_iommu_release_device, 6040 .get_resv_regions = intel_iommu_get_resv_regions, 6041 .put_resv_regions = generic_iommu_put_resv_regions, 6042 .apply_resv_region = intel_iommu_apply_resv_region, 6043 .device_group = intel_iommu_device_group, 6044 .dev_has_feat = intel_iommu_dev_has_feat, 6045 .dev_feat_enabled = intel_iommu_dev_feat_enabled, 6046 .dev_enable_feat = intel_iommu_dev_enable_feat, 6047 .dev_disable_feat = intel_iommu_dev_disable_feat, 6048 .is_attach_deferred = intel_iommu_is_attach_deferred, 6049 .def_domain_type = device_def_domain_type, 6050 .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 6051 #ifdef CONFIG_INTEL_IOMMU_SVM 6052 .cache_invalidate = intel_iommu_sva_invalidate, 6053 .sva_bind_gpasid = intel_svm_bind_gpasid, 6054 .sva_unbind_gpasid = intel_svm_unbind_gpasid, 6055 .sva_bind = intel_svm_bind, 6056 .sva_unbind = intel_svm_unbind, 6057 .sva_get_pasid = intel_svm_get_pasid, 6058 #endif 6059 }; 6060 6061 static void quirk_iommu_igfx(struct pci_dev *dev) 6062 { 6063 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); 6064 dmar_map_gfx = 0; 6065 } 6066 6067 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 6068 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); 6069 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); 6070 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx); 6071 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx); 6072 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); 6073 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); 6074 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); 6075 6076 /* Broadwell igfx malfunctions with dmar */ 6077 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); 6078 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); 6079 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx); 6080 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx); 6081 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx); 6082 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx); 6083 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx); 6084 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx); 6085 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx); 6086 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx); 6087 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx); 6088 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx); 6089 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx); 6090 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx); 6091 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx); 6092 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx); 6093 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx); 6094 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx); 6095 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx); 6096 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx); 6097 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx); 6098 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); 6099 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); 6100 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); 6101 6102 static void quirk_iommu_rwbf(struct pci_dev *dev) 6103 { 6104 /* 6105 * Mobile 4 Series Chipset neglects to set RWBF capability, 6106 * but needs it. Same seems to hold for the desktop versions. 6107 */ 6108 pci_info(dev, "Forcing write-buffer flush capability\n"); 6109 rwbf_quirk = 1; 6110 } 6111 6112 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 6113 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 6114 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 6115 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 6116 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 6117 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 6118 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 6119 6120 #define GGC 0x52 6121 #define GGC_MEMORY_SIZE_MASK (0xf << 8) 6122 #define GGC_MEMORY_SIZE_NONE (0x0 << 8) 6123 #define GGC_MEMORY_SIZE_1M (0x1 << 8) 6124 #define GGC_MEMORY_SIZE_2M (0x3 << 8) 6125 #define GGC_MEMORY_VT_ENABLED (0x8 << 8) 6126 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 6127 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 6128 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 6129 6130 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 6131 { 6132 unsigned short ggc; 6133 6134 if (pci_read_config_word(dev, GGC, &ggc)) 6135 return; 6136 6137 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 6138 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 6139 dmar_map_gfx = 0; 6140 } else if (dmar_map_gfx) { 6141 /* we have to ensure the gfx device is idle before we flush */ 6142 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); 6143 intel_iommu_strict = 1; 6144 } 6145 } 6146 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 6147 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); 6148 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 6149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 6150 6151 /* On Tylersburg chipsets, some BIOSes have been known to enable the 6152 ISOCH DMAR unit for the Azalia sound device, but not give it any 6153 TLB entries, which causes it to deadlock. Check for that. We do 6154 this in a function called from init_dmars(), instead of in a PCI 6155 quirk, because we don't want to print the obnoxious "BIOS broken" 6156 message if VT-d is actually disabled. 6157 */ 6158 static void __init check_tylersburg_isoch(void) 6159 { 6160 struct pci_dev *pdev; 6161 uint32_t vtisochctrl; 6162 6163 /* If there's no Azalia in the system anyway, forget it. */ 6164 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 6165 if (!pdev) 6166 return; 6167 pci_dev_put(pdev); 6168 6169 /* System Management Registers. Might be hidden, in which case 6170 we can't do the sanity check. But that's OK, because the 6171 known-broken BIOSes _don't_ actually hide it, so far. */ 6172 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 6173 if (!pdev) 6174 return; 6175 6176 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 6177 pci_dev_put(pdev); 6178 return; 6179 } 6180 6181 pci_dev_put(pdev); 6182 6183 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 6184 if (vtisochctrl & 1) 6185 return; 6186 6187 /* Drop all bits other than the number of TLB entries */ 6188 vtisochctrl &= 0x1c; 6189 6190 /* If we have the recommended number of TLB entries (16), fine. */ 6191 if (vtisochctrl == 0x10) 6192 return; 6193 6194 /* Zero TLB entries? You get to ride the short bus to school. */ 6195 if (!vtisochctrl) { 6196 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 6197 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 6198 dmi_get_system_info(DMI_BIOS_VENDOR), 6199 dmi_get_system_info(DMI_BIOS_VERSION), 6200 dmi_get_system_info(DMI_PRODUCT_VERSION)); 6201 iommu_identity_mapping |= IDENTMAP_AZALIA; 6202 return; 6203 } 6204 6205 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 6206 vtisochctrl); 6207 } 6208