1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8 #define pr_fmt(fmt) "AMD-Vi: " fmt 9 #define dev_fmt(fmt) pr_fmt(fmt) 10 11 #include <linux/pci.h> 12 #include <linux/acpi.h> 13 #include <linux/list.h> 14 #include <linux/bitmap.h> 15 #include <linux/slab.h> 16 #include <linux/syscore_ops.h> 17 #include <linux/interrupt.h> 18 #include <linux/msi.h> 19 #include <linux/irq.h> 20 #include <linux/amd-iommu.h> 21 #include <linux/export.h> 22 #include <linux/kmemleak.h> 23 #include <linux/cc_platform.h> 24 #include <asm/pci-direct.h> 25 #include <asm/iommu.h> 26 #include <asm/apic.h> 27 #include <asm/gart.h> 28 #include <asm/x86_init.h> 29 #include <asm/iommu_table.h> 30 #include <asm/io_apic.h> 31 #include <asm/irq_remapping.h> 32 #include <asm/set_memory.h> 33 34 #include <linux/crash_dump.h> 35 36 #include "amd_iommu.h" 37 #include "../irq_remapping.h" 38 39 /* 40 * definitions for the ACPI scanning code 41 */ 42 #define IVRS_HEADER_LENGTH 48 43 44 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 45 #define ACPI_IVMD_TYPE_ALL 0x20 46 #define ACPI_IVMD_TYPE 0x21 47 #define ACPI_IVMD_TYPE_RANGE 0x22 48 49 #define IVHD_DEV_ALL 0x01 50 #define IVHD_DEV_SELECT 0x02 51 #define IVHD_DEV_SELECT_RANGE_START 0x03 52 #define IVHD_DEV_RANGE_END 0x04 53 #define IVHD_DEV_ALIAS 0x42 54 #define IVHD_DEV_ALIAS_RANGE 0x43 55 #define IVHD_DEV_EXT_SELECT 0x46 56 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 57 #define IVHD_DEV_SPECIAL 0x48 58 #define IVHD_DEV_ACPI_HID 0xf0 59 60 #define UID_NOT_PRESENT 0 61 #define UID_IS_INTEGER 1 62 #define UID_IS_CHARACTER 2 63 64 #define IVHD_SPECIAL_IOAPIC 1 65 #define IVHD_SPECIAL_HPET 2 66 67 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 68 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 69 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 70 #define IVHD_FLAG_ISOC_EN_MASK 0x08 71 72 #define IVMD_FLAG_EXCL_RANGE 0x08 73 #define IVMD_FLAG_IW 0x04 74 #define IVMD_FLAG_IR 0x02 75 #define IVMD_FLAG_UNITY_MAP 0x01 76 77 #define ACPI_DEVFLAG_INITPASS 0x01 78 #define ACPI_DEVFLAG_EXTINT 0x02 79 #define ACPI_DEVFLAG_NMI 0x04 80 #define ACPI_DEVFLAG_SYSMGT1 0x10 81 #define ACPI_DEVFLAG_SYSMGT2 0x20 82 #define ACPI_DEVFLAG_LINT0 0x40 83 #define ACPI_DEVFLAG_LINT1 0x80 84 #define ACPI_DEVFLAG_ATSDIS 0x10000000 85 86 #define LOOP_TIMEOUT 100000 87 /* 88 * ACPI table definitions 89 * 90 * These data structures are laid over the table to parse the important values 91 * out of it. 92 */ 93 94 extern const struct iommu_ops amd_iommu_ops; 95 96 /* 97 * structure describing one IOMMU in the ACPI table. Typically followed by one 98 * or more ivhd_entrys. 99 */ 100 struct ivhd_header { 101 u8 type; 102 u8 flags; 103 u16 length; 104 u16 devid; 105 u16 cap_ptr; 106 u64 mmio_phys; 107 u16 pci_seg; 108 u16 info; 109 u32 efr_attr; 110 111 /* Following only valid on IVHD type 11h and 40h */ 112 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 113 u64 res; 114 } __attribute__((packed)); 115 116 /* 117 * A device entry describing which devices a specific IOMMU translates and 118 * which requestor ids they use. 119 */ 120 struct ivhd_entry { 121 u8 type; 122 u16 devid; 123 u8 flags; 124 struct_group(ext_hid, 125 u32 ext; 126 u32 hidh; 127 ); 128 u64 cid; 129 u8 uidf; 130 u8 uidl; 131 u8 uid; 132 } __attribute__((packed)); 133 134 /* 135 * An AMD IOMMU memory definition structure. It defines things like exclusion 136 * ranges for devices and regions that should be unity mapped. 137 */ 138 struct ivmd_header { 139 u8 type; 140 u8 flags; 141 u16 length; 142 u16 devid; 143 u16 aux; 144 u64 resv; 145 u64 range_start; 146 u64 range_length; 147 } __attribute__((packed)); 148 149 bool amd_iommu_dump; 150 bool amd_iommu_irq_remap __read_mostly; 151 152 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; 153 154 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 155 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 156 157 static bool amd_iommu_detected; 158 static bool amd_iommu_disabled __initdata; 159 static bool amd_iommu_force_enable __initdata; 160 static int amd_iommu_target_ivhd_type; 161 162 u16 amd_iommu_last_bdf; /* largest PCI device id we have 163 to handle */ 164 LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 165 we find in ACPI */ 166 167 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 168 system */ 169 170 /* Array to assign indices to IOMMUs*/ 171 struct amd_iommu *amd_iommus[MAX_IOMMUS]; 172 173 /* Number of IOMMUs present in the system */ 174 static int amd_iommus_present; 175 176 /* IOMMUs have a non-present cache? */ 177 bool amd_iommu_np_cache __read_mostly; 178 bool amd_iommu_iotlb_sup __read_mostly = true; 179 180 u32 amd_iommu_max_pasid __read_mostly = ~0; 181 182 bool amd_iommu_v2_present __read_mostly; 183 static bool amd_iommu_pc_present __read_mostly; 184 185 bool amd_iommu_force_isolation __read_mostly; 186 187 /* 188 * Pointer to the device table which is shared by all AMD IOMMUs 189 * it is indexed by the PCI device id or the HT unit id and contains 190 * information about the domain the device belongs to as well as the 191 * page table root pointer. 192 */ 193 struct dev_table_entry *amd_iommu_dev_table; 194 /* 195 * Pointer to a device table which the content of old device table 196 * will be copied to. It's only be used in kdump kernel. 197 */ 198 static struct dev_table_entry *old_dev_tbl_cpy; 199 200 /* 201 * The alias table is a driver specific data structure which contains the 202 * mappings of the PCI device ids to the actual requestor ids on the IOMMU. 203 * More than one device can share the same requestor id. 204 */ 205 u16 *amd_iommu_alias_table; 206 207 /* 208 * The rlookup table is used to find the IOMMU which is responsible 209 * for a specific device. It is also indexed by the PCI device id. 210 */ 211 struct amd_iommu **amd_iommu_rlookup_table; 212 213 /* 214 * This table is used to find the irq remapping table for a given device id 215 * quickly. 216 */ 217 struct irq_remap_table **irq_lookup_table; 218 219 /* 220 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap 221 * to know which ones are already in use. 222 */ 223 unsigned long *amd_iommu_pd_alloc_bitmap; 224 225 static u32 dev_table_size; /* size of the device table */ 226 static u32 alias_table_size; /* size of the alias table */ 227 static u32 rlookup_table_size; /* size if the rlookup table */ 228 229 enum iommu_init_state { 230 IOMMU_START_STATE, 231 IOMMU_IVRS_DETECTED, 232 IOMMU_ACPI_FINISHED, 233 IOMMU_ENABLED, 234 IOMMU_PCI_INIT, 235 IOMMU_INTERRUPTS_EN, 236 IOMMU_INITIALIZED, 237 IOMMU_NOT_FOUND, 238 IOMMU_INIT_ERROR, 239 IOMMU_CMDLINE_DISABLED, 240 }; 241 242 /* Early ioapic and hpet maps from kernel command line */ 243 #define EARLY_MAP_SIZE 4 244 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 245 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 246 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 247 248 static int __initdata early_ioapic_map_size; 249 static int __initdata early_hpet_map_size; 250 static int __initdata early_acpihid_map_size; 251 252 static bool __initdata cmdline_maps; 253 254 static enum iommu_init_state init_state = IOMMU_START_STATE; 255 256 static int amd_iommu_enable_interrupts(void); 257 static int __init iommu_go_to_state(enum iommu_init_state state); 258 static void init_device_table_dma(void); 259 260 static bool amd_iommu_pre_enabled = true; 261 262 static u32 amd_iommu_ivinfo __initdata; 263 264 bool translation_pre_enabled(struct amd_iommu *iommu) 265 { 266 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 267 } 268 269 static void clear_translation_pre_enabled(struct amd_iommu *iommu) 270 { 271 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 272 } 273 274 static void init_translation_status(struct amd_iommu *iommu) 275 { 276 u64 ctrl; 277 278 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 279 if (ctrl & (1<<CONTROL_IOMMU_EN)) 280 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 281 } 282 283 static inline void update_last_devid(u16 devid) 284 { 285 if (devid > amd_iommu_last_bdf) 286 amd_iommu_last_bdf = devid; 287 } 288 289 static inline unsigned long tbl_size(int entry_size) 290 { 291 unsigned shift = PAGE_SHIFT + 292 get_order(((int)amd_iommu_last_bdf + 1) * entry_size); 293 294 return 1UL << shift; 295 } 296 297 int amd_iommu_get_num_iommus(void) 298 { 299 return amd_iommus_present; 300 } 301 302 #ifdef CONFIG_IRQ_REMAP 303 static bool check_feature_on_all_iommus(u64 mask) 304 { 305 bool ret = false; 306 struct amd_iommu *iommu; 307 308 for_each_iommu(iommu) { 309 ret = iommu_feature(iommu, mask); 310 if (!ret) 311 return false; 312 } 313 314 return true; 315 } 316 #endif 317 318 /* 319 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 320 * Default to IVHD EFR since it is available sooner 321 * (i.e. before PCI init). 322 */ 323 static void __init early_iommu_features_init(struct amd_iommu *iommu, 324 struct ivhd_header *h) 325 { 326 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) 327 iommu->features = h->efr_reg; 328 } 329 330 /* Access to l1 and l2 indexed register spaces */ 331 332 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 333 { 334 u32 val; 335 336 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 337 pci_read_config_dword(iommu->dev, 0xfc, &val); 338 return val; 339 } 340 341 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 342 { 343 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 344 pci_write_config_dword(iommu->dev, 0xfc, val); 345 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 346 } 347 348 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 349 { 350 u32 val; 351 352 pci_write_config_dword(iommu->dev, 0xf0, address); 353 pci_read_config_dword(iommu->dev, 0xf4, &val); 354 return val; 355 } 356 357 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 358 { 359 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 360 pci_write_config_dword(iommu->dev, 0xf4, val); 361 } 362 363 /**************************************************************************** 364 * 365 * AMD IOMMU MMIO register space handling functions 366 * 367 * These functions are used to program the IOMMU device registers in 368 * MMIO space required for that driver. 369 * 370 ****************************************************************************/ 371 372 /* 373 * This function set the exclusion range in the IOMMU. DMA accesses to the 374 * exclusion range are passed through untranslated 375 */ 376 static void iommu_set_exclusion_range(struct amd_iommu *iommu) 377 { 378 u64 start = iommu->exclusion_start & PAGE_MASK; 379 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; 380 u64 entry; 381 382 if (!iommu->exclusion_start) 383 return; 384 385 entry = start | MMIO_EXCL_ENABLE_MASK; 386 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 387 &entry, sizeof(entry)); 388 389 entry = limit; 390 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 391 &entry, sizeof(entry)); 392 } 393 394 static void iommu_set_cwwb_range(struct amd_iommu *iommu) 395 { 396 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 397 u64 entry = start & PM_ADDR_MASK; 398 399 if (!iommu_feature(iommu, FEATURE_SNP)) 400 return; 401 402 /* Note: 403 * Re-purpose Exclusion base/limit registers for Completion wait 404 * write-back base/limit. 405 */ 406 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 407 &entry, sizeof(entry)); 408 409 /* Note: 410 * Default to 4 Kbytes, which can be specified by setting base 411 * address equal to the limit address. 412 */ 413 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 414 &entry, sizeof(entry)); 415 } 416 417 /* Programs the physical address of the device table into the IOMMU hardware */ 418 static void iommu_set_device_table(struct amd_iommu *iommu) 419 { 420 u64 entry; 421 422 BUG_ON(iommu->mmio_base == NULL); 423 424 entry = iommu_virt_to_phys(amd_iommu_dev_table); 425 entry |= (dev_table_size >> 12) - 1; 426 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 427 &entry, sizeof(entry)); 428 } 429 430 /* Generic functions to enable/disable certain features of the IOMMU. */ 431 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 432 { 433 u64 ctrl; 434 435 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 436 ctrl |= (1ULL << bit); 437 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 438 } 439 440 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 441 { 442 u64 ctrl; 443 444 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 445 ctrl &= ~(1ULL << bit); 446 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 447 } 448 449 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) 450 { 451 u64 ctrl; 452 453 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 454 ctrl &= ~CTRL_INV_TO_MASK; 455 ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; 456 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 457 } 458 459 /* Function to enable the hardware */ 460 static void iommu_enable(struct amd_iommu *iommu) 461 { 462 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 463 } 464 465 static void iommu_disable(struct amd_iommu *iommu) 466 { 467 if (!iommu->mmio_base) 468 return; 469 470 /* Disable command buffer */ 471 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 472 473 /* Disable event logging and event interrupts */ 474 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 475 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 476 477 /* Disable IOMMU GA_LOG */ 478 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 479 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 480 481 /* Disable IOMMU hardware itself */ 482 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 483 } 484 485 /* 486 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 487 * the system has one. 488 */ 489 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 490 { 491 if (!request_mem_region(address, end, "amd_iommu")) { 492 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 493 address, end); 494 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 495 return NULL; 496 } 497 498 return (u8 __iomem *)ioremap(address, end); 499 } 500 501 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 502 { 503 if (iommu->mmio_base) 504 iounmap(iommu->mmio_base); 505 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 506 } 507 508 static inline u32 get_ivhd_header_size(struct ivhd_header *h) 509 { 510 u32 size = 0; 511 512 switch (h->type) { 513 case 0x10: 514 size = 24; 515 break; 516 case 0x11: 517 case 0x40: 518 size = 40; 519 break; 520 } 521 return size; 522 } 523 524 /**************************************************************************** 525 * 526 * The functions below belong to the first pass of AMD IOMMU ACPI table 527 * parsing. In this pass we try to find out the highest device id this 528 * code has to handle. Upon this information the size of the shared data 529 * structures is determined later. 530 * 531 ****************************************************************************/ 532 533 /* 534 * This function calculates the length of a given IVHD entry 535 */ 536 static inline int ivhd_entry_length(u8 *ivhd) 537 { 538 u32 type = ((struct ivhd_entry *)ivhd)->type; 539 540 if (type < 0x80) { 541 return 0x04 << (*ivhd >> 6); 542 } else if (type == IVHD_DEV_ACPI_HID) { 543 /* For ACPI_HID, offset 21 is uid len */ 544 return *((u8 *)ivhd + 21) + 22; 545 } 546 return 0; 547 } 548 549 /* 550 * After reading the highest device id from the IOMMU PCI capability header 551 * this function looks if there is a higher device id defined in the ACPI table 552 */ 553 static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 554 { 555 u8 *p = (void *)h, *end = (void *)h; 556 struct ivhd_entry *dev; 557 558 u32 ivhd_size = get_ivhd_header_size(h); 559 560 if (!ivhd_size) { 561 pr_err("Unsupported IVHD type %#x\n", h->type); 562 return -EINVAL; 563 } 564 565 p += ivhd_size; 566 end += h->length; 567 568 while (p < end) { 569 dev = (struct ivhd_entry *)p; 570 switch (dev->type) { 571 case IVHD_DEV_ALL: 572 /* Use maximum BDF value for DEV_ALL */ 573 update_last_devid(0xffff); 574 break; 575 case IVHD_DEV_SELECT: 576 case IVHD_DEV_RANGE_END: 577 case IVHD_DEV_ALIAS: 578 case IVHD_DEV_EXT_SELECT: 579 /* all the above subfield types refer to device ids */ 580 update_last_devid(dev->devid); 581 break; 582 default: 583 break; 584 } 585 p += ivhd_entry_length(p); 586 } 587 588 WARN_ON(p != end); 589 590 return 0; 591 } 592 593 static int __init check_ivrs_checksum(struct acpi_table_header *table) 594 { 595 int i; 596 u8 checksum = 0, *p = (u8 *)table; 597 598 for (i = 0; i < table->length; ++i) 599 checksum += p[i]; 600 if (checksum != 0) { 601 /* ACPI table corrupt */ 602 pr_err(FW_BUG "IVRS invalid checksum\n"); 603 return -ENODEV; 604 } 605 606 return 0; 607 } 608 609 /* 610 * Iterate over all IVHD entries in the ACPI table and find the highest device 611 * id which we need to handle. This is the first of three functions which parse 612 * the ACPI table. So we check the checksum here. 613 */ 614 static int __init find_last_devid_acpi(struct acpi_table_header *table) 615 { 616 u8 *p = (u8 *)table, *end = (u8 *)table; 617 struct ivhd_header *h; 618 619 p += IVRS_HEADER_LENGTH; 620 621 end += table->length; 622 while (p < end) { 623 h = (struct ivhd_header *)p; 624 if (h->type == amd_iommu_target_ivhd_type) { 625 int ret = find_last_devid_from_ivhd(h); 626 627 if (ret) 628 return ret; 629 } 630 p += h->length; 631 } 632 WARN_ON(p != end); 633 634 return 0; 635 } 636 637 /**************************************************************************** 638 * 639 * The following functions belong to the code path which parses the ACPI table 640 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 641 * data structures, initialize the device/alias/rlookup table and also 642 * basically initialize the hardware. 643 * 644 ****************************************************************************/ 645 646 /* 647 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 648 * write commands to that buffer later and the IOMMU will execute them 649 * asynchronously 650 */ 651 static int __init alloc_command_buffer(struct amd_iommu *iommu) 652 { 653 iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 654 get_order(CMD_BUFFER_SIZE)); 655 656 return iommu->cmd_buf ? 0 : -ENOMEM; 657 } 658 659 /* 660 * This function resets the command buffer if the IOMMU stopped fetching 661 * commands from it. 662 */ 663 void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 664 { 665 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 666 667 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 668 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 669 iommu->cmd_buf_head = 0; 670 iommu->cmd_buf_tail = 0; 671 672 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 673 } 674 675 /* 676 * This function writes the command buffer address to the hardware and 677 * enables it. 678 */ 679 static void iommu_enable_command_buffer(struct amd_iommu *iommu) 680 { 681 u64 entry; 682 683 BUG_ON(iommu->cmd_buf == NULL); 684 685 entry = iommu_virt_to_phys(iommu->cmd_buf); 686 entry |= MMIO_CMD_SIZE_512; 687 688 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 689 &entry, sizeof(entry)); 690 691 amd_iommu_reset_cmd_buffer(iommu); 692 } 693 694 /* 695 * This function disables the command buffer 696 */ 697 static void iommu_disable_command_buffer(struct amd_iommu *iommu) 698 { 699 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 700 } 701 702 static void __init free_command_buffer(struct amd_iommu *iommu) 703 { 704 free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 705 } 706 707 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, 708 gfp_t gfp, size_t size) 709 { 710 int order = get_order(size); 711 void *buf = (void *)__get_free_pages(gfp, order); 712 713 if (buf && 714 iommu_feature(iommu, FEATURE_SNP) && 715 set_memory_4k((unsigned long)buf, (1 << order))) { 716 free_pages((unsigned long)buf, order); 717 buf = NULL; 718 } 719 720 return buf; 721 } 722 723 /* allocates the memory where the IOMMU will log its events to */ 724 static int __init alloc_event_buffer(struct amd_iommu *iommu) 725 { 726 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 727 EVT_BUFFER_SIZE); 728 729 return iommu->evt_buf ? 0 : -ENOMEM; 730 } 731 732 static void iommu_enable_event_buffer(struct amd_iommu *iommu) 733 { 734 u64 entry; 735 736 BUG_ON(iommu->evt_buf == NULL); 737 738 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 739 740 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 741 &entry, sizeof(entry)); 742 743 /* set head and tail to zero manually */ 744 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 745 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 746 747 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 748 } 749 750 /* 751 * This function disables the event log buffer 752 */ 753 static void iommu_disable_event_buffer(struct amd_iommu *iommu) 754 { 755 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 756 } 757 758 static void __init free_event_buffer(struct amd_iommu *iommu) 759 { 760 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 761 } 762 763 /* allocates the memory where the IOMMU will log its events to */ 764 static int __init alloc_ppr_log(struct amd_iommu *iommu) 765 { 766 iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 767 PPR_LOG_SIZE); 768 769 return iommu->ppr_log ? 0 : -ENOMEM; 770 } 771 772 static void iommu_enable_ppr_log(struct amd_iommu *iommu) 773 { 774 u64 entry; 775 776 if (iommu->ppr_log == NULL) 777 return; 778 779 entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; 780 781 memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, 782 &entry, sizeof(entry)); 783 784 /* set head and tail to zero manually */ 785 writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 786 writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 787 788 iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); 789 iommu_feature_enable(iommu, CONTROL_PPR_EN); 790 } 791 792 static void __init free_ppr_log(struct amd_iommu *iommu) 793 { 794 free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); 795 } 796 797 static void free_ga_log(struct amd_iommu *iommu) 798 { 799 #ifdef CONFIG_IRQ_REMAP 800 free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE)); 801 free_pages((unsigned long)iommu->ga_log_tail, get_order(8)); 802 #endif 803 } 804 805 static int iommu_ga_log_enable(struct amd_iommu *iommu) 806 { 807 #ifdef CONFIG_IRQ_REMAP 808 u32 status, i; 809 810 if (!iommu->ga_log) 811 return -EINVAL; 812 813 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 814 815 /* Check if already running */ 816 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 817 return 0; 818 819 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 820 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 821 822 for (i = 0; i < LOOP_TIMEOUT; ++i) { 823 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 824 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 825 break; 826 } 827 828 if (i >= LOOP_TIMEOUT) 829 return -EINVAL; 830 #endif /* CONFIG_IRQ_REMAP */ 831 return 0; 832 } 833 834 static int iommu_init_ga_log(struct amd_iommu *iommu) 835 { 836 #ifdef CONFIG_IRQ_REMAP 837 u64 entry; 838 839 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 840 return 0; 841 842 iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 843 get_order(GA_LOG_SIZE)); 844 if (!iommu->ga_log) 845 goto err_out; 846 847 iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 848 get_order(8)); 849 if (!iommu->ga_log_tail) 850 goto err_out; 851 852 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 853 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 854 &entry, sizeof(entry)); 855 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 856 (BIT_ULL(52)-1)) & ~7ULL; 857 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 858 &entry, sizeof(entry)); 859 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 860 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 861 862 return 0; 863 err_out: 864 free_ga_log(iommu); 865 return -EINVAL; 866 #else 867 return 0; 868 #endif /* CONFIG_IRQ_REMAP */ 869 } 870 871 static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 872 { 873 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); 874 875 return iommu->cmd_sem ? 0 : -ENOMEM; 876 } 877 878 static void __init free_cwwb_sem(struct amd_iommu *iommu) 879 { 880 if (iommu->cmd_sem) 881 free_page((unsigned long)iommu->cmd_sem); 882 } 883 884 static void iommu_enable_xt(struct amd_iommu *iommu) 885 { 886 #ifdef CONFIG_IRQ_REMAP 887 /* 888 * XT mode (32-bit APIC destination ID) requires 889 * GA mode (128-bit IRTE support) as a prerequisite. 890 */ 891 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 892 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 893 iommu_feature_enable(iommu, CONTROL_XT_EN); 894 #endif /* CONFIG_IRQ_REMAP */ 895 } 896 897 static void iommu_enable_gt(struct amd_iommu *iommu) 898 { 899 if (!iommu_feature(iommu, FEATURE_GT)) 900 return; 901 902 iommu_feature_enable(iommu, CONTROL_GT_EN); 903 } 904 905 /* sets a specific bit in the device table entry. */ 906 static void set_dev_entry_bit(u16 devid, u8 bit) 907 { 908 int i = (bit >> 6) & 0x03; 909 int _bit = bit & 0x3f; 910 911 amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); 912 } 913 914 static int get_dev_entry_bit(u16 devid, u8 bit) 915 { 916 int i = (bit >> 6) & 0x03; 917 int _bit = bit & 0x3f; 918 919 return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; 920 } 921 922 923 static bool copy_device_table(void) 924 { 925 u64 int_ctl, int_tab_len, entry = 0, last_entry = 0; 926 struct dev_table_entry *old_devtb = NULL; 927 u32 lo, hi, devid, old_devtb_size; 928 phys_addr_t old_devtb_phys; 929 struct amd_iommu *iommu; 930 u16 dom_id, dte_v, irq_v; 931 gfp_t gfp_flag; 932 u64 tmp; 933 934 if (!amd_iommu_pre_enabled) 935 return false; 936 937 pr_warn("Translation is already enabled - trying to copy translation structures\n"); 938 for_each_iommu(iommu) { 939 /* All IOMMUs should use the same device table with the same size */ 940 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 941 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 942 entry = (((u64) hi) << 32) + lo; 943 if (last_entry && last_entry != entry) { 944 pr_err("IOMMU:%d should use the same dev table as others!\n", 945 iommu->index); 946 return false; 947 } 948 last_entry = entry; 949 950 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 951 if (old_devtb_size != dev_table_size) { 952 pr_err("The device table size of IOMMU:%d is not expected!\n", 953 iommu->index); 954 return false; 955 } 956 } 957 958 /* 959 * When SME is enabled in the first kernel, the entry includes the 960 * memory encryption mask(sme_me_mask), we must remove the memory 961 * encryption mask to obtain the true physical address in kdump kernel. 962 */ 963 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 964 965 if (old_devtb_phys >= 0x100000000ULL) { 966 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 967 return false; 968 } 969 old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) 970 ? (__force void *)ioremap_encrypted(old_devtb_phys, 971 dev_table_size) 972 : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); 973 974 if (!old_devtb) 975 return false; 976 977 gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; 978 old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, 979 get_order(dev_table_size)); 980 if (old_dev_tbl_cpy == NULL) { 981 pr_err("Failed to allocate memory for copying old device table!\n"); 982 return false; 983 } 984 985 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { 986 old_dev_tbl_cpy[devid] = old_devtb[devid]; 987 dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; 988 dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; 989 990 if (dte_v && dom_id) { 991 old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; 992 old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; 993 __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); 994 /* If gcr3 table existed, mask it out */ 995 if (old_devtb[devid].data[0] & DTE_FLAG_GV) { 996 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; 997 tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; 998 old_dev_tbl_cpy[devid].data[1] &= ~tmp; 999 tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; 1000 tmp |= DTE_FLAG_GV; 1001 old_dev_tbl_cpy[devid].data[0] &= ~tmp; 1002 } 1003 } 1004 1005 irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; 1006 int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; 1007 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; 1008 if (irq_v && (int_ctl || int_tab_len)) { 1009 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || 1010 (int_tab_len != DTE_INTTABLEN)) { 1011 pr_err("Wrong old irq remapping flag: %#x\n", devid); 1012 return false; 1013 } 1014 1015 old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; 1016 } 1017 } 1018 memunmap(old_devtb); 1019 1020 return true; 1021 } 1022 1023 void amd_iommu_apply_erratum_63(u16 devid) 1024 { 1025 int sysmgt; 1026 1027 sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | 1028 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); 1029 1030 if (sysmgt == 0x01) 1031 set_dev_entry_bit(devid, DEV_ENTRY_IW); 1032 } 1033 1034 /* Writes the specific IOMMU for a device into the rlookup table */ 1035 static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) 1036 { 1037 amd_iommu_rlookup_table[devid] = iommu; 1038 } 1039 1040 /* 1041 * This function takes the device specific flags read from the ACPI 1042 * table and sets up the device table entry with that information 1043 */ 1044 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1045 u16 devid, u32 flags, u32 ext_flags) 1046 { 1047 if (flags & ACPI_DEVFLAG_INITPASS) 1048 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); 1049 if (flags & ACPI_DEVFLAG_EXTINT) 1050 set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); 1051 if (flags & ACPI_DEVFLAG_NMI) 1052 set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); 1053 if (flags & ACPI_DEVFLAG_SYSMGT1) 1054 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); 1055 if (flags & ACPI_DEVFLAG_SYSMGT2) 1056 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); 1057 if (flags & ACPI_DEVFLAG_LINT0) 1058 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); 1059 if (flags & ACPI_DEVFLAG_LINT1) 1060 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); 1061 1062 amd_iommu_apply_erratum_63(devid); 1063 1064 set_iommu_for_device(iommu, devid); 1065 } 1066 1067 int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) 1068 { 1069 struct devid_map *entry; 1070 struct list_head *list; 1071 1072 if (type == IVHD_SPECIAL_IOAPIC) 1073 list = &ioapic_map; 1074 else if (type == IVHD_SPECIAL_HPET) 1075 list = &hpet_map; 1076 else 1077 return -EINVAL; 1078 1079 list_for_each_entry(entry, list, list) { 1080 if (!(entry->id == id && entry->cmd_line)) 1081 continue; 1082 1083 pr_info("Command-line override present for %s id %d - ignoring\n", 1084 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1085 1086 *devid = entry->devid; 1087 1088 return 0; 1089 } 1090 1091 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1092 if (!entry) 1093 return -ENOMEM; 1094 1095 entry->id = id; 1096 entry->devid = *devid; 1097 entry->cmd_line = cmd_line; 1098 1099 list_add_tail(&entry->list, list); 1100 1101 return 0; 1102 } 1103 1104 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid, 1105 bool cmd_line) 1106 { 1107 struct acpihid_map_entry *entry; 1108 struct list_head *list = &acpihid_map; 1109 1110 list_for_each_entry(entry, list, list) { 1111 if (strcmp(entry->hid, hid) || 1112 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1113 !entry->cmd_line) 1114 continue; 1115 1116 pr_info("Command-line override for hid:%s uid:%s\n", 1117 hid, uid); 1118 *devid = entry->devid; 1119 return 0; 1120 } 1121 1122 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1123 if (!entry) 1124 return -ENOMEM; 1125 1126 memcpy(entry->uid, uid, strlen(uid)); 1127 memcpy(entry->hid, hid, strlen(hid)); 1128 entry->devid = *devid; 1129 entry->cmd_line = cmd_line; 1130 entry->root_devid = (entry->devid & (~0x7)); 1131 1132 pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", 1133 entry->cmd_line ? "cmd" : "ivrs", 1134 entry->hid, entry->uid, entry->root_devid); 1135 1136 list_add_tail(&entry->list, list); 1137 return 0; 1138 } 1139 1140 static int __init add_early_maps(void) 1141 { 1142 int i, ret; 1143 1144 for (i = 0; i < early_ioapic_map_size; ++i) { 1145 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1146 early_ioapic_map[i].id, 1147 &early_ioapic_map[i].devid, 1148 early_ioapic_map[i].cmd_line); 1149 if (ret) 1150 return ret; 1151 } 1152 1153 for (i = 0; i < early_hpet_map_size; ++i) { 1154 ret = add_special_device(IVHD_SPECIAL_HPET, 1155 early_hpet_map[i].id, 1156 &early_hpet_map[i].devid, 1157 early_hpet_map[i].cmd_line); 1158 if (ret) 1159 return ret; 1160 } 1161 1162 for (i = 0; i < early_acpihid_map_size; ++i) { 1163 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1164 early_acpihid_map[i].uid, 1165 &early_acpihid_map[i].devid, 1166 early_acpihid_map[i].cmd_line); 1167 if (ret) 1168 return ret; 1169 } 1170 1171 return 0; 1172 } 1173 1174 /* 1175 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1176 * initializes the hardware and our data structures with it. 1177 */ 1178 static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1179 struct ivhd_header *h) 1180 { 1181 u8 *p = (u8 *)h; 1182 u8 *end = p, flags = 0; 1183 u16 devid = 0, devid_start = 0, devid_to = 0; 1184 u32 dev_i, ext_flags = 0; 1185 bool alias = false; 1186 struct ivhd_entry *e; 1187 u32 ivhd_size; 1188 int ret; 1189 1190 1191 ret = add_early_maps(); 1192 if (ret) 1193 return ret; 1194 1195 amd_iommu_apply_ivrs_quirks(); 1196 1197 /* 1198 * First save the recommended feature enable bits from ACPI 1199 */ 1200 iommu->acpi_flags = h->flags; 1201 1202 /* 1203 * Done. Now parse the device entries 1204 */ 1205 ivhd_size = get_ivhd_header_size(h); 1206 if (!ivhd_size) { 1207 pr_err("Unsupported IVHD type %#x\n", h->type); 1208 return -EINVAL; 1209 } 1210 1211 p += ivhd_size; 1212 1213 end += h->length; 1214 1215 1216 while (p < end) { 1217 e = (struct ivhd_entry *)p; 1218 switch (e->type) { 1219 case IVHD_DEV_ALL: 1220 1221 DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); 1222 1223 for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) 1224 set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); 1225 break; 1226 case IVHD_DEV_SELECT: 1227 1228 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " 1229 "flags: %02x\n", 1230 PCI_BUS_NUM(e->devid), 1231 PCI_SLOT(e->devid), 1232 PCI_FUNC(e->devid), 1233 e->flags); 1234 1235 devid = e->devid; 1236 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1237 break; 1238 case IVHD_DEV_SELECT_RANGE_START: 1239 1240 DUMP_printk(" DEV_SELECT_RANGE_START\t " 1241 "devid: %02x:%02x.%x flags: %02x\n", 1242 PCI_BUS_NUM(e->devid), 1243 PCI_SLOT(e->devid), 1244 PCI_FUNC(e->devid), 1245 e->flags); 1246 1247 devid_start = e->devid; 1248 flags = e->flags; 1249 ext_flags = 0; 1250 alias = false; 1251 break; 1252 case IVHD_DEV_ALIAS: 1253 1254 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " 1255 "flags: %02x devid_to: %02x:%02x.%x\n", 1256 PCI_BUS_NUM(e->devid), 1257 PCI_SLOT(e->devid), 1258 PCI_FUNC(e->devid), 1259 e->flags, 1260 PCI_BUS_NUM(e->ext >> 8), 1261 PCI_SLOT(e->ext >> 8), 1262 PCI_FUNC(e->ext >> 8)); 1263 1264 devid = e->devid; 1265 devid_to = e->ext >> 8; 1266 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1267 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1268 amd_iommu_alias_table[devid] = devid_to; 1269 break; 1270 case IVHD_DEV_ALIAS_RANGE: 1271 1272 DUMP_printk(" DEV_ALIAS_RANGE\t\t " 1273 "devid: %02x:%02x.%x flags: %02x " 1274 "devid_to: %02x:%02x.%x\n", 1275 PCI_BUS_NUM(e->devid), 1276 PCI_SLOT(e->devid), 1277 PCI_FUNC(e->devid), 1278 e->flags, 1279 PCI_BUS_NUM(e->ext >> 8), 1280 PCI_SLOT(e->ext >> 8), 1281 PCI_FUNC(e->ext >> 8)); 1282 1283 devid_start = e->devid; 1284 flags = e->flags; 1285 devid_to = e->ext >> 8; 1286 ext_flags = 0; 1287 alias = true; 1288 break; 1289 case IVHD_DEV_EXT_SELECT: 1290 1291 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " 1292 "flags: %02x ext: %08x\n", 1293 PCI_BUS_NUM(e->devid), 1294 PCI_SLOT(e->devid), 1295 PCI_FUNC(e->devid), 1296 e->flags, e->ext); 1297 1298 devid = e->devid; 1299 set_dev_entry_from_acpi(iommu, devid, e->flags, 1300 e->ext); 1301 break; 1302 case IVHD_DEV_EXT_SELECT_RANGE: 1303 1304 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " 1305 "%02x:%02x.%x flags: %02x ext: %08x\n", 1306 PCI_BUS_NUM(e->devid), 1307 PCI_SLOT(e->devid), 1308 PCI_FUNC(e->devid), 1309 e->flags, e->ext); 1310 1311 devid_start = e->devid; 1312 flags = e->flags; 1313 ext_flags = e->ext; 1314 alias = false; 1315 break; 1316 case IVHD_DEV_RANGE_END: 1317 1318 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", 1319 PCI_BUS_NUM(e->devid), 1320 PCI_SLOT(e->devid), 1321 PCI_FUNC(e->devid)); 1322 1323 devid = e->devid; 1324 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 1325 if (alias) { 1326 amd_iommu_alias_table[dev_i] = devid_to; 1327 set_dev_entry_from_acpi(iommu, 1328 devid_to, flags, ext_flags); 1329 } 1330 set_dev_entry_from_acpi(iommu, dev_i, 1331 flags, ext_flags); 1332 } 1333 break; 1334 case IVHD_DEV_SPECIAL: { 1335 u8 handle, type; 1336 const char *var; 1337 u16 devid; 1338 int ret; 1339 1340 handle = e->ext & 0xff; 1341 devid = (e->ext >> 8) & 0xffff; 1342 type = (e->ext >> 24) & 0xff; 1343 1344 if (type == IVHD_SPECIAL_IOAPIC) 1345 var = "IOAPIC"; 1346 else if (type == IVHD_SPECIAL_HPET) 1347 var = "HPET"; 1348 else 1349 var = "UNKNOWN"; 1350 1351 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", 1352 var, (int)handle, 1353 PCI_BUS_NUM(devid), 1354 PCI_SLOT(devid), 1355 PCI_FUNC(devid)); 1356 1357 ret = add_special_device(type, handle, &devid, false); 1358 if (ret) 1359 return ret; 1360 1361 /* 1362 * add_special_device might update the devid in case a 1363 * command-line override is present. So call 1364 * set_dev_entry_from_acpi after add_special_device. 1365 */ 1366 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1367 1368 break; 1369 } 1370 case IVHD_DEV_ACPI_HID: { 1371 u16 devid; 1372 u8 hid[ACPIHID_HID_LEN]; 1373 u8 uid[ACPIHID_UID_LEN]; 1374 int ret; 1375 1376 if (h->type != 0x40) { 1377 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1378 e->type); 1379 break; 1380 } 1381 1382 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1383 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1384 hid[ACPIHID_HID_LEN - 1] = '\0'; 1385 1386 if (!(*hid)) { 1387 pr_err(FW_BUG "Invalid HID.\n"); 1388 break; 1389 } 1390 1391 uid[0] = '\0'; 1392 switch (e->uidf) { 1393 case UID_NOT_PRESENT: 1394 1395 if (e->uidl != 0) 1396 pr_warn(FW_BUG "Invalid UID length.\n"); 1397 1398 break; 1399 case UID_IS_INTEGER: 1400 1401 sprintf(uid, "%d", e->uid); 1402 1403 break; 1404 case UID_IS_CHARACTER: 1405 1406 memcpy(uid, &e->uid, e->uidl); 1407 uid[e->uidl] = '\0'; 1408 1409 break; 1410 default: 1411 break; 1412 } 1413 1414 devid = e->devid; 1415 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n", 1416 hid, uid, 1417 PCI_BUS_NUM(devid), 1418 PCI_SLOT(devid), 1419 PCI_FUNC(devid)); 1420 1421 flags = e->flags; 1422 1423 ret = add_acpi_hid_device(hid, uid, &devid, false); 1424 if (ret) 1425 return ret; 1426 1427 /* 1428 * add_special_device might update the devid in case a 1429 * command-line override is present. So call 1430 * set_dev_entry_from_acpi after add_special_device. 1431 */ 1432 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1433 1434 break; 1435 } 1436 default: 1437 break; 1438 } 1439 1440 p += ivhd_entry_length(p); 1441 } 1442 1443 return 0; 1444 } 1445 1446 static void __init free_iommu_one(struct amd_iommu *iommu) 1447 { 1448 free_cwwb_sem(iommu); 1449 free_command_buffer(iommu); 1450 free_event_buffer(iommu); 1451 free_ppr_log(iommu); 1452 free_ga_log(iommu); 1453 iommu_unmap_mmio_space(iommu); 1454 } 1455 1456 static void __init free_iommu_all(void) 1457 { 1458 struct amd_iommu *iommu, *next; 1459 1460 for_each_iommu_safe(iommu, next) { 1461 list_del(&iommu->list); 1462 free_iommu_one(iommu); 1463 kfree(iommu); 1464 } 1465 } 1466 1467 /* 1468 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1469 * Workaround: 1470 * BIOS should disable L2B micellaneous clock gating by setting 1471 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1472 */ 1473 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1474 { 1475 u32 value; 1476 1477 if ((boot_cpu_data.x86 != 0x15) || 1478 (boot_cpu_data.x86_model < 0x10) || 1479 (boot_cpu_data.x86_model > 0x1f)) 1480 return; 1481 1482 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1483 pci_read_config_dword(iommu->dev, 0xf4, &value); 1484 1485 if (value & BIT(2)) 1486 return; 1487 1488 /* Select NB indirect register 0x90 and enable writing */ 1489 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1490 1491 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1492 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1493 1494 /* Clear the enable writing bit */ 1495 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1496 } 1497 1498 /* 1499 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1500 * Workaround: 1501 * BIOS should enable ATS write permission check by setting 1502 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1503 */ 1504 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1505 { 1506 u32 value; 1507 1508 if ((boot_cpu_data.x86 != 0x15) || 1509 (boot_cpu_data.x86_model < 0x30) || 1510 (boot_cpu_data.x86_model > 0x3f)) 1511 return; 1512 1513 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1514 value = iommu_read_l2(iommu, 0x47); 1515 1516 if (value & BIT(0)) 1517 return; 1518 1519 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1520 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1521 1522 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1523 } 1524 1525 /* 1526 * This function clues the initialization function for one IOMMU 1527 * together and also allocates the command buffer and programs the 1528 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1529 */ 1530 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) 1531 { 1532 int ret; 1533 1534 raw_spin_lock_init(&iommu->lock); 1535 iommu->cmd_sem_val = 0; 1536 1537 /* Add IOMMU to internal data structures */ 1538 list_add_tail(&iommu->list, &amd_iommu_list); 1539 iommu->index = amd_iommus_present++; 1540 1541 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1542 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1543 return -ENOSYS; 1544 } 1545 1546 /* Index is fine - add IOMMU to the array */ 1547 amd_iommus[iommu->index] = iommu; 1548 1549 /* 1550 * Copy data from ACPI table entry to the iommu struct 1551 */ 1552 iommu->devid = h->devid; 1553 iommu->cap_ptr = h->cap_ptr; 1554 iommu->pci_seg = h->pci_seg; 1555 iommu->mmio_phys = h->mmio_phys; 1556 1557 switch (h->type) { 1558 case 0x10: 1559 /* Check if IVHD EFR contains proper max banks/counters */ 1560 if ((h->efr_attr != 0) && 1561 ((h->efr_attr & (0xF << 13)) != 0) && 1562 ((h->efr_attr & (0x3F << 17)) != 0)) 1563 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1564 else 1565 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1566 1567 /* 1568 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1569 * GAM also requires GA mode. Therefore, we need to 1570 * check cmpxchg16b support before enabling it. 1571 */ 1572 if (!boot_cpu_has(X86_FEATURE_CX16) || 1573 ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) 1574 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1575 break; 1576 case 0x11: 1577 case 0x40: 1578 if (h->efr_reg & (1 << 9)) 1579 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1580 else 1581 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1582 1583 /* 1584 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1585 * XT, GAM also requires GA mode. Therefore, we need to 1586 * check cmpxchg16b support before enabling them. 1587 */ 1588 if (!boot_cpu_has(X86_FEATURE_CX16) || 1589 ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { 1590 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1591 break; 1592 } 1593 1594 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1595 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1596 1597 early_iommu_features_init(iommu, h); 1598 1599 break; 1600 default: 1601 return -EINVAL; 1602 } 1603 1604 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1605 iommu->mmio_phys_end); 1606 if (!iommu->mmio_base) 1607 return -ENOMEM; 1608 1609 if (alloc_cwwb_sem(iommu)) 1610 return -ENOMEM; 1611 1612 if (alloc_command_buffer(iommu)) 1613 return -ENOMEM; 1614 1615 if (alloc_event_buffer(iommu)) 1616 return -ENOMEM; 1617 1618 iommu->int_enabled = false; 1619 1620 init_translation_status(iommu); 1621 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1622 iommu_disable(iommu); 1623 clear_translation_pre_enabled(iommu); 1624 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1625 iommu->index); 1626 } 1627 if (amd_iommu_pre_enabled) 1628 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1629 1630 ret = init_iommu_from_acpi(iommu, h); 1631 if (ret) 1632 return ret; 1633 1634 if (amd_iommu_irq_remap) { 1635 ret = amd_iommu_create_irq_domain(iommu); 1636 if (ret) 1637 return ret; 1638 } 1639 1640 /* 1641 * Make sure IOMMU is not considered to translate itself. The IVRS 1642 * table tells us so, but this is a lie! 1643 */ 1644 amd_iommu_rlookup_table[iommu->devid] = NULL; 1645 1646 return 0; 1647 } 1648 1649 /** 1650 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 1651 * @ivrs: Pointer to the IVRS header 1652 * 1653 * This function search through all IVDB of the maximum supported IVHD 1654 */ 1655 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 1656 { 1657 u8 *base = (u8 *)ivrs; 1658 struct ivhd_header *ivhd = (struct ivhd_header *) 1659 (base + IVRS_HEADER_LENGTH); 1660 u8 last_type = ivhd->type; 1661 u16 devid = ivhd->devid; 1662 1663 while (((u8 *)ivhd - base < ivrs->length) && 1664 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 1665 u8 *p = (u8 *) ivhd; 1666 1667 if (ivhd->devid == devid) 1668 last_type = ivhd->type; 1669 ivhd = (struct ivhd_header *)(p + ivhd->length); 1670 } 1671 1672 return last_type; 1673 } 1674 1675 /* 1676 * Iterates over all IOMMU entries in the ACPI table, allocates the 1677 * IOMMU structure and initializes it with init_iommu_one() 1678 */ 1679 static int __init init_iommu_all(struct acpi_table_header *table) 1680 { 1681 u8 *p = (u8 *)table, *end = (u8 *)table; 1682 struct ivhd_header *h; 1683 struct amd_iommu *iommu; 1684 int ret; 1685 1686 end += table->length; 1687 p += IVRS_HEADER_LENGTH; 1688 1689 while (p < end) { 1690 h = (struct ivhd_header *)p; 1691 if (*p == amd_iommu_target_ivhd_type) { 1692 1693 DUMP_printk("device: %02x:%02x.%01x cap: %04x " 1694 "seg: %d flags: %01x info %04x\n", 1695 PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), 1696 PCI_FUNC(h->devid), h->cap_ptr, 1697 h->pci_seg, h->flags, h->info); 1698 DUMP_printk(" mmio-addr: %016llx\n", 1699 h->mmio_phys); 1700 1701 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 1702 if (iommu == NULL) 1703 return -ENOMEM; 1704 1705 ret = init_iommu_one(iommu, h); 1706 if (ret) 1707 return ret; 1708 } 1709 p += h->length; 1710 1711 } 1712 WARN_ON(p != end); 1713 1714 return 0; 1715 } 1716 1717 static void init_iommu_perf_ctr(struct amd_iommu *iommu) 1718 { 1719 u64 val; 1720 struct pci_dev *pdev = iommu->dev; 1721 1722 if (!iommu_feature(iommu, FEATURE_PC)) 1723 return; 1724 1725 amd_iommu_pc_present = true; 1726 1727 pci_info(pdev, "IOMMU performance counters supported\n"); 1728 1729 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 1730 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 1731 iommu->max_counters = (u8) ((val >> 7) & 0xf); 1732 1733 return; 1734 } 1735 1736 static ssize_t amd_iommu_show_cap(struct device *dev, 1737 struct device_attribute *attr, 1738 char *buf) 1739 { 1740 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1741 return sprintf(buf, "%x\n", iommu->cap); 1742 } 1743 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 1744 1745 static ssize_t amd_iommu_show_features(struct device *dev, 1746 struct device_attribute *attr, 1747 char *buf) 1748 { 1749 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1750 return sprintf(buf, "%llx\n", iommu->features); 1751 } 1752 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 1753 1754 static struct attribute *amd_iommu_attrs[] = { 1755 &dev_attr_cap.attr, 1756 &dev_attr_features.attr, 1757 NULL, 1758 }; 1759 1760 static struct attribute_group amd_iommu_group = { 1761 .name = "amd-iommu", 1762 .attrs = amd_iommu_attrs, 1763 }; 1764 1765 static const struct attribute_group *amd_iommu_groups[] = { 1766 &amd_iommu_group, 1767 NULL, 1768 }; 1769 1770 /* 1771 * Note: IVHD 0x11 and 0x40 also contains exact copy 1772 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 1773 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 1774 */ 1775 static void __init late_iommu_features_init(struct amd_iommu *iommu) 1776 { 1777 u64 features; 1778 1779 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 1780 return; 1781 1782 /* read extended feature bits */ 1783 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 1784 1785 if (!iommu->features) { 1786 iommu->features = features; 1787 return; 1788 } 1789 1790 /* 1791 * Sanity check and warn if EFR values from 1792 * IVHD and MMIO conflict. 1793 */ 1794 if (features != iommu->features) 1795 pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", 1796 features, iommu->features); 1797 } 1798 1799 static int __init iommu_init_pci(struct amd_iommu *iommu) 1800 { 1801 int cap_ptr = iommu->cap_ptr; 1802 int ret; 1803 1804 iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid), 1805 iommu->devid & 0xff); 1806 if (!iommu->dev) 1807 return -ENODEV; 1808 1809 /* Prevent binding other PCI device drivers to IOMMU devices */ 1810 iommu->dev->match_driver = false; 1811 1812 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 1813 &iommu->cap); 1814 1815 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 1816 amd_iommu_iotlb_sup = false; 1817 1818 late_iommu_features_init(iommu); 1819 1820 if (iommu_feature(iommu, FEATURE_GT)) { 1821 int glxval; 1822 u32 max_pasid; 1823 u64 pasmax; 1824 1825 pasmax = iommu->features & FEATURE_PASID_MASK; 1826 pasmax >>= FEATURE_PASID_SHIFT; 1827 max_pasid = (1 << (pasmax + 1)) - 1; 1828 1829 amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); 1830 1831 BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); 1832 1833 glxval = iommu->features & FEATURE_GLXVAL_MASK; 1834 glxval >>= FEATURE_GLXVAL_SHIFT; 1835 1836 if (amd_iommu_max_glx_val == -1) 1837 amd_iommu_max_glx_val = glxval; 1838 else 1839 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 1840 } 1841 1842 if (iommu_feature(iommu, FEATURE_GT) && 1843 iommu_feature(iommu, FEATURE_PPR)) { 1844 iommu->is_iommu_v2 = true; 1845 amd_iommu_v2_present = true; 1846 } 1847 1848 if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) 1849 return -ENOMEM; 1850 1851 ret = iommu_init_ga_log(iommu); 1852 if (ret) 1853 return ret; 1854 1855 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 1856 pr_info("Using strict mode due to virtualization\n"); 1857 iommu_set_dma_strict(); 1858 amd_iommu_np_cache = true; 1859 } 1860 1861 init_iommu_perf_ctr(iommu); 1862 1863 if (is_rd890_iommu(iommu->dev)) { 1864 int i, j; 1865 1866 iommu->root_pdev = 1867 pci_get_domain_bus_and_slot(0, iommu->dev->bus->number, 1868 PCI_DEVFN(0, 0)); 1869 1870 /* 1871 * Some rd890 systems may not be fully reconfigured by the 1872 * BIOS, so it's necessary for us to store this information so 1873 * it can be reprogrammed on resume 1874 */ 1875 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 1876 &iommu->stored_addr_lo); 1877 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 1878 &iommu->stored_addr_hi); 1879 1880 /* Low bit locks writes to configuration space */ 1881 iommu->stored_addr_lo &= ~1; 1882 1883 for (i = 0; i < 6; i++) 1884 for (j = 0; j < 0x12; j++) 1885 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 1886 1887 for (i = 0; i < 0x83; i++) 1888 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 1889 } 1890 1891 amd_iommu_erratum_746_workaround(iommu); 1892 amd_iommu_ats_write_check_workaround(iommu); 1893 1894 iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 1895 amd_iommu_groups, "ivhd%d", iommu->index); 1896 iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 1897 1898 return pci_enable_device(iommu->dev); 1899 } 1900 1901 static void print_iommu_info(void) 1902 { 1903 static const char * const feat_str[] = { 1904 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 1905 "IA", "GA", "HE", "PC" 1906 }; 1907 struct amd_iommu *iommu; 1908 1909 for_each_iommu(iommu) { 1910 struct pci_dev *pdev = iommu->dev; 1911 int i; 1912 1913 pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); 1914 1915 if (iommu->cap & (1 << IOMMU_CAP_EFR)) { 1916 pr_info("Extended features (%#llx):", iommu->features); 1917 1918 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 1919 if (iommu_feature(iommu, (1ULL << i))) 1920 pr_cont(" %s", feat_str[i]); 1921 } 1922 1923 if (iommu->features & FEATURE_GAM_VAPIC) 1924 pr_cont(" GA_vAPIC"); 1925 1926 pr_cont("\n"); 1927 } 1928 } 1929 if (irq_remapping_enabled) { 1930 pr_info("Interrupt remapping enabled\n"); 1931 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 1932 pr_info("Virtual APIC enabled\n"); 1933 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 1934 pr_info("X2APIC enabled\n"); 1935 } 1936 } 1937 1938 static int __init amd_iommu_init_pci(void) 1939 { 1940 struct amd_iommu *iommu; 1941 int ret; 1942 1943 for_each_iommu(iommu) { 1944 ret = iommu_init_pci(iommu); 1945 if (ret) 1946 break; 1947 1948 /* Need to setup range after PCI init */ 1949 iommu_set_cwwb_range(iommu); 1950 } 1951 1952 /* 1953 * Order is important here to make sure any unity map requirements are 1954 * fulfilled. The unity mappings are created and written to the device 1955 * table during the amd_iommu_init_api() call. 1956 * 1957 * After that we call init_device_table_dma() to make sure any 1958 * uninitialized DTE will block DMA, and in the end we flush the caches 1959 * of all IOMMUs to make sure the changes to the device table are 1960 * active. 1961 */ 1962 ret = amd_iommu_init_api(); 1963 1964 init_device_table_dma(); 1965 1966 for_each_iommu(iommu) 1967 iommu_flush_all_caches(iommu); 1968 1969 if (!ret) 1970 print_iommu_info(); 1971 1972 return ret; 1973 } 1974 1975 /**************************************************************************** 1976 * 1977 * The following functions initialize the MSI interrupts for all IOMMUs 1978 * in the system. It's a bit challenging because there could be multiple 1979 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 1980 * pci_dev. 1981 * 1982 ****************************************************************************/ 1983 1984 static int iommu_setup_msi(struct amd_iommu *iommu) 1985 { 1986 int r; 1987 1988 r = pci_enable_msi(iommu->dev); 1989 if (r) 1990 return r; 1991 1992 r = request_threaded_irq(iommu->dev->irq, 1993 amd_iommu_int_handler, 1994 amd_iommu_int_thread, 1995 0, "AMD-Vi", 1996 iommu); 1997 1998 if (r) { 1999 pci_disable_msi(iommu->dev); 2000 return r; 2001 } 2002 2003 return 0; 2004 } 2005 2006 union intcapxt { 2007 u64 capxt; 2008 struct { 2009 u64 reserved_0 : 2, 2010 dest_mode_logical : 1, 2011 reserved_1 : 5, 2012 destid_0_23 : 24, 2013 vector : 8, 2014 reserved_2 : 16, 2015 destid_24_31 : 8; 2016 }; 2017 } __attribute__ ((packed)); 2018 2019 /* 2020 * There isn't really any need to mask/unmask at the irqchip level because 2021 * the 64-bit INTCAPXT registers can be updated atomically without tearing 2022 * when the affinity is being updated. 2023 */ 2024 static void intcapxt_unmask_irq(struct irq_data *data) 2025 { 2026 } 2027 2028 static void intcapxt_mask_irq(struct irq_data *data) 2029 { 2030 } 2031 2032 static struct irq_chip intcapxt_controller; 2033 2034 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2035 struct irq_data *irqd, bool reserve) 2036 { 2037 struct amd_iommu *iommu = irqd->chip_data; 2038 struct irq_cfg *cfg = irqd_cfg(irqd); 2039 union intcapxt xt; 2040 2041 xt.capxt = 0ULL; 2042 xt.dest_mode_logical = apic->dest_mode_logical; 2043 xt.vector = cfg->vector; 2044 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2045 xt.destid_24_31 = cfg->dest_apicid >> 24; 2046 2047 /** 2048 * Current IOMMU implemtation uses the same IRQ for all 2049 * 3 IOMMU interrupts. 2050 */ 2051 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2052 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2053 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2054 return 0; 2055 } 2056 2057 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2058 struct irq_data *irqd) 2059 { 2060 intcapxt_mask_irq(irqd); 2061 } 2062 2063 2064 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2065 unsigned int nr_irqs, void *arg) 2066 { 2067 struct irq_alloc_info *info = arg; 2068 int i, ret; 2069 2070 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2071 return -EINVAL; 2072 2073 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2074 if (ret < 0) 2075 return ret; 2076 2077 for (i = virq; i < virq + nr_irqs; i++) { 2078 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2079 2080 irqd->chip = &intcapxt_controller; 2081 irqd->chip_data = info->data; 2082 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2083 } 2084 2085 return ret; 2086 } 2087 2088 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2089 unsigned int nr_irqs) 2090 { 2091 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2092 } 2093 2094 static int intcapxt_set_affinity(struct irq_data *irqd, 2095 const struct cpumask *mask, bool force) 2096 { 2097 struct irq_data *parent = irqd->parent_data; 2098 int ret; 2099 2100 ret = parent->chip->irq_set_affinity(parent, mask, force); 2101 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2102 return ret; 2103 2104 return intcapxt_irqdomain_activate(irqd->domain, irqd, false); 2105 } 2106 2107 static struct irq_chip intcapxt_controller = { 2108 .name = "IOMMU-MSI", 2109 .irq_unmask = intcapxt_unmask_irq, 2110 .irq_mask = intcapxt_mask_irq, 2111 .irq_ack = irq_chip_ack_parent, 2112 .irq_retrigger = irq_chip_retrigger_hierarchy, 2113 .irq_set_affinity = intcapxt_set_affinity, 2114 .flags = IRQCHIP_SKIP_SET_WAKE, 2115 }; 2116 2117 static const struct irq_domain_ops intcapxt_domain_ops = { 2118 .alloc = intcapxt_irqdomain_alloc, 2119 .free = intcapxt_irqdomain_free, 2120 .activate = intcapxt_irqdomain_activate, 2121 .deactivate = intcapxt_irqdomain_deactivate, 2122 }; 2123 2124 2125 static struct irq_domain *iommu_irqdomain; 2126 2127 static struct irq_domain *iommu_get_irqdomain(void) 2128 { 2129 struct fwnode_handle *fn; 2130 2131 /* No need for locking here (yet) as the init is single-threaded */ 2132 if (iommu_irqdomain) 2133 return iommu_irqdomain; 2134 2135 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2136 if (!fn) 2137 return NULL; 2138 2139 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2140 fn, &intcapxt_domain_ops, 2141 NULL); 2142 if (!iommu_irqdomain) 2143 irq_domain_free_fwnode(fn); 2144 2145 return iommu_irqdomain; 2146 } 2147 2148 static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2149 { 2150 struct irq_domain *domain; 2151 struct irq_alloc_info info; 2152 int irq, ret; 2153 2154 domain = iommu_get_irqdomain(); 2155 if (!domain) 2156 return -ENXIO; 2157 2158 init_irq_alloc_info(&info, NULL); 2159 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2160 info.data = iommu; 2161 2162 irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); 2163 if (irq < 0) { 2164 irq_domain_remove(domain); 2165 return irq; 2166 } 2167 2168 ret = request_threaded_irq(irq, amd_iommu_int_handler, 2169 amd_iommu_int_thread, 0, "AMD-Vi", iommu); 2170 if (ret) { 2171 irq_domain_free_irqs(irq, 1); 2172 irq_domain_remove(domain); 2173 return ret; 2174 } 2175 2176 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2177 return 0; 2178 } 2179 2180 static int iommu_init_irq(struct amd_iommu *iommu) 2181 { 2182 int ret; 2183 2184 if (iommu->int_enabled) 2185 goto enable_faults; 2186 2187 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2188 ret = iommu_setup_intcapxt(iommu); 2189 else if (iommu->dev->msi_cap) 2190 ret = iommu_setup_msi(iommu); 2191 else 2192 ret = -ENODEV; 2193 2194 if (ret) 2195 return ret; 2196 2197 iommu->int_enabled = true; 2198 enable_faults: 2199 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2200 2201 if (iommu->ppr_log != NULL) 2202 iommu_feature_enable(iommu, CONTROL_PPRINT_EN); 2203 2204 iommu_ga_log_enable(iommu); 2205 2206 return 0; 2207 } 2208 2209 /**************************************************************************** 2210 * 2211 * The next functions belong to the third pass of parsing the ACPI 2212 * table. In this last pass the memory mapping requirements are 2213 * gathered (like exclusion and unity mapping ranges). 2214 * 2215 ****************************************************************************/ 2216 2217 static void __init free_unity_maps(void) 2218 { 2219 struct unity_map_entry *entry, *next; 2220 2221 list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { 2222 list_del(&entry->list); 2223 kfree(entry); 2224 } 2225 } 2226 2227 /* called for unity map ACPI definition */ 2228 static int __init init_unity_map_range(struct ivmd_header *m) 2229 { 2230 struct unity_map_entry *e = NULL; 2231 char *s; 2232 2233 e = kzalloc(sizeof(*e), GFP_KERNEL); 2234 if (e == NULL) 2235 return -ENOMEM; 2236 2237 switch (m->type) { 2238 default: 2239 kfree(e); 2240 return 0; 2241 case ACPI_IVMD_TYPE: 2242 s = "IVMD_TYPEi\t\t\t"; 2243 e->devid_start = e->devid_end = m->devid; 2244 break; 2245 case ACPI_IVMD_TYPE_ALL: 2246 s = "IVMD_TYPE_ALL\t\t"; 2247 e->devid_start = 0; 2248 e->devid_end = amd_iommu_last_bdf; 2249 break; 2250 case ACPI_IVMD_TYPE_RANGE: 2251 s = "IVMD_TYPE_RANGE\t\t"; 2252 e->devid_start = m->devid; 2253 e->devid_end = m->aux; 2254 break; 2255 } 2256 e->address_start = PAGE_ALIGN(m->range_start); 2257 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2258 e->prot = m->flags >> 1; 2259 2260 /* 2261 * Treat per-device exclusion ranges as r/w unity-mapped regions 2262 * since some buggy BIOSes might lead to the overwritten exclusion 2263 * range (exclusion_start and exclusion_length members). This 2264 * happens when there are multiple exclusion ranges (IVMD entries) 2265 * defined in ACPI table. 2266 */ 2267 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2268 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2269 2270 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" 2271 " range_start: %016llx range_end: %016llx flags: %x\n", s, 2272 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2273 PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end), 2274 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2275 e->address_start, e->address_end, m->flags); 2276 2277 list_add_tail(&e->list, &amd_iommu_unity_map); 2278 2279 return 0; 2280 } 2281 2282 /* iterates over all memory definitions we find in the ACPI table */ 2283 static int __init init_memory_definitions(struct acpi_table_header *table) 2284 { 2285 u8 *p = (u8 *)table, *end = (u8 *)table; 2286 struct ivmd_header *m; 2287 2288 end += table->length; 2289 p += IVRS_HEADER_LENGTH; 2290 2291 while (p < end) { 2292 m = (struct ivmd_header *)p; 2293 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2294 init_unity_map_range(m); 2295 2296 p += m->length; 2297 } 2298 2299 return 0; 2300 } 2301 2302 /* 2303 * Init the device table to not allow DMA access for devices 2304 */ 2305 static void init_device_table_dma(void) 2306 { 2307 u32 devid; 2308 2309 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { 2310 set_dev_entry_bit(devid, DEV_ENTRY_VALID); 2311 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); 2312 } 2313 } 2314 2315 static void __init uninit_device_table_dma(void) 2316 { 2317 u32 devid; 2318 2319 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { 2320 amd_iommu_dev_table[devid].data[0] = 0ULL; 2321 amd_iommu_dev_table[devid].data[1] = 0ULL; 2322 } 2323 } 2324 2325 static void init_device_table(void) 2326 { 2327 u32 devid; 2328 2329 if (!amd_iommu_irq_remap) 2330 return; 2331 2332 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 2333 set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); 2334 } 2335 2336 static void iommu_init_flags(struct amd_iommu *iommu) 2337 { 2338 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2339 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2340 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2341 2342 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2343 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2344 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2345 2346 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2347 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2348 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2349 2350 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2351 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2352 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2353 2354 /* 2355 * make IOMMU memory accesses cache coherent 2356 */ 2357 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2358 2359 /* Set IOTLB invalidation timeout to 1s */ 2360 iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); 2361 } 2362 2363 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2364 { 2365 int i, j; 2366 u32 ioc_feature_control; 2367 struct pci_dev *pdev = iommu->root_pdev; 2368 2369 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2370 if (!is_rd890_iommu(iommu->dev) || !pdev) 2371 return; 2372 2373 /* 2374 * First, we need to ensure that the iommu is enabled. This is 2375 * controlled by a register in the northbridge 2376 */ 2377 2378 /* Select Northbridge indirect register 0x75 and enable writing */ 2379 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2380 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2381 2382 /* Enable the iommu */ 2383 if (!(ioc_feature_control & 0x1)) 2384 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2385 2386 /* Restore the iommu BAR */ 2387 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2388 iommu->stored_addr_lo); 2389 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2390 iommu->stored_addr_hi); 2391 2392 /* Restore the l1 indirect regs for each of the 6 l1s */ 2393 for (i = 0; i < 6; i++) 2394 for (j = 0; j < 0x12; j++) 2395 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2396 2397 /* Restore the l2 indirect regs */ 2398 for (i = 0; i < 0x83; i++) 2399 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2400 2401 /* Lock PCI setup registers */ 2402 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2403 iommu->stored_addr_lo | 1); 2404 } 2405 2406 static void iommu_enable_ga(struct amd_iommu *iommu) 2407 { 2408 #ifdef CONFIG_IRQ_REMAP 2409 switch (amd_iommu_guest_ir) { 2410 case AMD_IOMMU_GUEST_IR_VAPIC: 2411 iommu_feature_enable(iommu, CONTROL_GAM_EN); 2412 fallthrough; 2413 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2414 iommu_feature_enable(iommu, CONTROL_GA_EN); 2415 iommu->irte_ops = &irte_128_ops; 2416 break; 2417 default: 2418 iommu->irte_ops = &irte_32_ops; 2419 break; 2420 } 2421 #endif 2422 } 2423 2424 static void early_enable_iommu(struct amd_iommu *iommu) 2425 { 2426 iommu_disable(iommu); 2427 iommu_init_flags(iommu); 2428 iommu_set_device_table(iommu); 2429 iommu_enable_command_buffer(iommu); 2430 iommu_enable_event_buffer(iommu); 2431 iommu_set_exclusion_range(iommu); 2432 iommu_enable_ga(iommu); 2433 iommu_enable_xt(iommu); 2434 iommu_enable(iommu); 2435 iommu_flush_all_caches(iommu); 2436 } 2437 2438 /* 2439 * This function finally enables all IOMMUs found in the system after 2440 * they have been initialized. 2441 * 2442 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy 2443 * the old content of device table entries. Not this case or copy failed, 2444 * just continue as normal kernel does. 2445 */ 2446 static void early_enable_iommus(void) 2447 { 2448 struct amd_iommu *iommu; 2449 2450 2451 if (!copy_device_table()) { 2452 /* 2453 * If come here because of failure in copying device table from old 2454 * kernel with all IOMMUs enabled, print error message and try to 2455 * free allocated old_dev_tbl_cpy. 2456 */ 2457 if (amd_iommu_pre_enabled) 2458 pr_err("Failed to copy DEV table from previous kernel.\n"); 2459 if (old_dev_tbl_cpy != NULL) 2460 free_pages((unsigned long)old_dev_tbl_cpy, 2461 get_order(dev_table_size)); 2462 2463 for_each_iommu(iommu) { 2464 clear_translation_pre_enabled(iommu); 2465 early_enable_iommu(iommu); 2466 } 2467 } else { 2468 pr_info("Copied DEV table from previous kernel.\n"); 2469 free_pages((unsigned long)amd_iommu_dev_table, 2470 get_order(dev_table_size)); 2471 amd_iommu_dev_table = old_dev_tbl_cpy; 2472 for_each_iommu(iommu) { 2473 iommu_disable_command_buffer(iommu); 2474 iommu_disable_event_buffer(iommu); 2475 iommu_enable_command_buffer(iommu); 2476 iommu_enable_event_buffer(iommu); 2477 iommu_enable_ga(iommu); 2478 iommu_enable_xt(iommu); 2479 iommu_set_device_table(iommu); 2480 iommu_flush_all_caches(iommu); 2481 } 2482 } 2483 2484 #ifdef CONFIG_IRQ_REMAP 2485 /* 2486 * Note: We have already checked GASup from IVRS table. 2487 * Now, we need to make sure that GAMSup is set. 2488 */ 2489 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && 2490 !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) 2491 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2492 2493 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 2494 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 2495 #endif 2496 } 2497 2498 static void enable_iommus_v2(void) 2499 { 2500 struct amd_iommu *iommu; 2501 2502 for_each_iommu(iommu) { 2503 iommu_enable_ppr_log(iommu); 2504 iommu_enable_gt(iommu); 2505 } 2506 } 2507 2508 static void enable_iommus(void) 2509 { 2510 early_enable_iommus(); 2511 2512 enable_iommus_v2(); 2513 } 2514 2515 static void disable_iommus(void) 2516 { 2517 struct amd_iommu *iommu; 2518 2519 for_each_iommu(iommu) 2520 iommu_disable(iommu); 2521 2522 #ifdef CONFIG_IRQ_REMAP 2523 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 2524 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 2525 #endif 2526 } 2527 2528 /* 2529 * Suspend/Resume support 2530 * disable suspend until real resume implemented 2531 */ 2532 2533 static void amd_iommu_resume(void) 2534 { 2535 struct amd_iommu *iommu; 2536 2537 for_each_iommu(iommu) 2538 iommu_apply_resume_quirks(iommu); 2539 2540 /* re-load the hardware */ 2541 enable_iommus(); 2542 2543 amd_iommu_enable_interrupts(); 2544 } 2545 2546 static int amd_iommu_suspend(void) 2547 { 2548 /* disable IOMMUs to go out of the way for BIOS */ 2549 disable_iommus(); 2550 2551 return 0; 2552 } 2553 2554 static struct syscore_ops amd_iommu_syscore_ops = { 2555 .suspend = amd_iommu_suspend, 2556 .resume = amd_iommu_resume, 2557 }; 2558 2559 static void __init free_iommu_resources(void) 2560 { 2561 kmemleak_free(irq_lookup_table); 2562 free_pages((unsigned long)irq_lookup_table, 2563 get_order(rlookup_table_size)); 2564 irq_lookup_table = NULL; 2565 2566 kmem_cache_destroy(amd_iommu_irq_cache); 2567 amd_iommu_irq_cache = NULL; 2568 2569 free_pages((unsigned long)amd_iommu_rlookup_table, 2570 get_order(rlookup_table_size)); 2571 amd_iommu_rlookup_table = NULL; 2572 2573 free_pages((unsigned long)amd_iommu_alias_table, 2574 get_order(alias_table_size)); 2575 amd_iommu_alias_table = NULL; 2576 2577 free_pages((unsigned long)amd_iommu_dev_table, 2578 get_order(dev_table_size)); 2579 amd_iommu_dev_table = NULL; 2580 2581 free_iommu_all(); 2582 } 2583 2584 /* SB IOAPIC is always on this device in AMD systems */ 2585 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 2586 2587 static bool __init check_ioapic_information(void) 2588 { 2589 const char *fw_bug = FW_BUG; 2590 bool ret, has_sb_ioapic; 2591 int idx; 2592 2593 has_sb_ioapic = false; 2594 ret = false; 2595 2596 /* 2597 * If we have map overrides on the kernel command line the 2598 * messages in this function might not describe firmware bugs 2599 * anymore - so be careful 2600 */ 2601 if (cmdline_maps) 2602 fw_bug = ""; 2603 2604 for (idx = 0; idx < nr_ioapics; idx++) { 2605 int devid, id = mpc_ioapic_id(idx); 2606 2607 devid = get_ioapic_devid(id); 2608 if (devid < 0) { 2609 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 2610 fw_bug, id); 2611 ret = false; 2612 } else if (devid == IOAPIC_SB_DEVID) { 2613 has_sb_ioapic = true; 2614 ret = true; 2615 } 2616 } 2617 2618 if (!has_sb_ioapic) { 2619 /* 2620 * We expect the SB IOAPIC to be listed in the IVRS 2621 * table. The system timer is connected to the SB IOAPIC 2622 * and if we don't have it in the list the system will 2623 * panic at boot time. This situation usually happens 2624 * when the BIOS is buggy and provides us the wrong 2625 * device id for the IOAPIC in the system. 2626 */ 2627 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 2628 } 2629 2630 if (!ret) 2631 pr_err("Disabling interrupt remapping\n"); 2632 2633 return ret; 2634 } 2635 2636 static void __init free_dma_resources(void) 2637 { 2638 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 2639 get_order(MAX_DOMAIN_ID/8)); 2640 amd_iommu_pd_alloc_bitmap = NULL; 2641 2642 free_unity_maps(); 2643 } 2644 2645 static void __init ivinfo_init(void *ivrs) 2646 { 2647 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 2648 } 2649 2650 /* 2651 * This is the hardware init function for AMD IOMMU in the system. 2652 * This function is called either from amd_iommu_init or from the interrupt 2653 * remapping setup code. 2654 * 2655 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 2656 * four times: 2657 * 2658 * 1 pass) Discover the most comprehensive IVHD type to use. 2659 * 2660 * 2 pass) Find the highest PCI device id the driver has to handle. 2661 * Upon this information the size of the data structures is 2662 * determined that needs to be allocated. 2663 * 2664 * 3 pass) Initialize the data structures just allocated with the 2665 * information in the ACPI table about available AMD IOMMUs 2666 * in the system. It also maps the PCI devices in the 2667 * system to specific IOMMUs 2668 * 2669 * 4 pass) After the basic data structures are allocated and 2670 * initialized we update them with information about memory 2671 * remapping requirements parsed out of the ACPI table in 2672 * this last pass. 2673 * 2674 * After everything is set up the IOMMUs are enabled and the necessary 2675 * hotplug and suspend notifiers are registered. 2676 */ 2677 static int __init early_amd_iommu_init(void) 2678 { 2679 struct acpi_table_header *ivrs_base; 2680 int i, remap_cache_sz, ret; 2681 acpi_status status; 2682 2683 if (!amd_iommu_detected) 2684 return -ENODEV; 2685 2686 status = acpi_get_table("IVRS", 0, &ivrs_base); 2687 if (status == AE_NOT_FOUND) 2688 return -ENODEV; 2689 else if (ACPI_FAILURE(status)) { 2690 const char *err = acpi_format_exception(status); 2691 pr_err("IVRS table error: %s\n", err); 2692 return -EINVAL; 2693 } 2694 2695 /* 2696 * Validate checksum here so we don't need to do it when 2697 * we actually parse the table 2698 */ 2699 ret = check_ivrs_checksum(ivrs_base); 2700 if (ret) 2701 goto out; 2702 2703 ivinfo_init(ivrs_base); 2704 2705 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 2706 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 2707 2708 /* 2709 * First parse ACPI tables to find the largest Bus/Dev/Func 2710 * we need to handle. Upon this information the shared data 2711 * structures for the IOMMUs in the system will be allocated 2712 */ 2713 ret = find_last_devid_acpi(ivrs_base); 2714 if (ret) 2715 goto out; 2716 2717 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); 2718 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); 2719 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); 2720 2721 /* Device table - directly used by all IOMMUs */ 2722 ret = -ENOMEM; 2723 amd_iommu_dev_table = (void *)__get_free_pages( 2724 GFP_KERNEL | __GFP_ZERO | GFP_DMA32, 2725 get_order(dev_table_size)); 2726 if (amd_iommu_dev_table == NULL) 2727 goto out; 2728 2729 /* 2730 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the 2731 * IOMMU see for that device 2732 */ 2733 amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, 2734 get_order(alias_table_size)); 2735 if (amd_iommu_alias_table == NULL) 2736 goto out; 2737 2738 /* IOMMU rlookup table - find the IOMMU for a specific device */ 2739 amd_iommu_rlookup_table = (void *)__get_free_pages( 2740 GFP_KERNEL | __GFP_ZERO, 2741 get_order(rlookup_table_size)); 2742 if (amd_iommu_rlookup_table == NULL) 2743 goto out; 2744 2745 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( 2746 GFP_KERNEL | __GFP_ZERO, 2747 get_order(MAX_DOMAIN_ID/8)); 2748 if (amd_iommu_pd_alloc_bitmap == NULL) 2749 goto out; 2750 2751 /* 2752 * let all alias entries point to itself 2753 */ 2754 for (i = 0; i <= amd_iommu_last_bdf; ++i) 2755 amd_iommu_alias_table[i] = i; 2756 2757 /* 2758 * never allocate domain 0 because its used as the non-allocated and 2759 * error value placeholder 2760 */ 2761 __set_bit(0, amd_iommu_pd_alloc_bitmap); 2762 2763 /* 2764 * now the data structures are allocated and basically initialized 2765 * start the real acpi table scan 2766 */ 2767 ret = init_iommu_all(ivrs_base); 2768 if (ret) 2769 goto out; 2770 2771 /* Disable any previously enabled IOMMUs */ 2772 if (!is_kdump_kernel() || amd_iommu_disabled) 2773 disable_iommus(); 2774 2775 if (amd_iommu_irq_remap) 2776 amd_iommu_irq_remap = check_ioapic_information(); 2777 2778 if (amd_iommu_irq_remap) { 2779 /* 2780 * Interrupt remapping enabled, create kmem_cache for the 2781 * remapping tables. 2782 */ 2783 ret = -ENOMEM; 2784 if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 2785 remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); 2786 else 2787 remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); 2788 amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", 2789 remap_cache_sz, 2790 DTE_INTTAB_ALIGNMENT, 2791 0, NULL); 2792 if (!amd_iommu_irq_cache) 2793 goto out; 2794 2795 irq_lookup_table = (void *)__get_free_pages( 2796 GFP_KERNEL | __GFP_ZERO, 2797 get_order(rlookup_table_size)); 2798 kmemleak_alloc(irq_lookup_table, rlookup_table_size, 2799 1, GFP_KERNEL); 2800 if (!irq_lookup_table) 2801 goto out; 2802 } 2803 2804 ret = init_memory_definitions(ivrs_base); 2805 if (ret) 2806 goto out; 2807 2808 /* init the device table */ 2809 init_device_table(); 2810 2811 out: 2812 /* Don't leak any ACPI memory */ 2813 acpi_put_table(ivrs_base); 2814 2815 return ret; 2816 } 2817 2818 static int amd_iommu_enable_interrupts(void) 2819 { 2820 struct amd_iommu *iommu; 2821 int ret = 0; 2822 2823 for_each_iommu(iommu) { 2824 ret = iommu_init_irq(iommu); 2825 if (ret) 2826 goto out; 2827 } 2828 2829 out: 2830 return ret; 2831 } 2832 2833 static bool __init detect_ivrs(void) 2834 { 2835 struct acpi_table_header *ivrs_base; 2836 acpi_status status; 2837 int i; 2838 2839 status = acpi_get_table("IVRS", 0, &ivrs_base); 2840 if (status == AE_NOT_FOUND) 2841 return false; 2842 else if (ACPI_FAILURE(status)) { 2843 const char *err = acpi_format_exception(status); 2844 pr_err("IVRS table error: %s\n", err); 2845 return false; 2846 } 2847 2848 acpi_put_table(ivrs_base); 2849 2850 if (amd_iommu_force_enable) 2851 goto out; 2852 2853 /* Don't use IOMMU if there is Stoney Ridge graphics */ 2854 for (i = 0; i < 32; i++) { 2855 u32 pci_id; 2856 2857 pci_id = read_pci_config(0, i, 0, 0); 2858 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 2859 pr_info("Disable IOMMU on Stoney Ridge\n"); 2860 return false; 2861 } 2862 } 2863 2864 out: 2865 /* Make sure ACS will be enabled during PCI probe */ 2866 pci_request_acs(); 2867 2868 return true; 2869 } 2870 2871 /**************************************************************************** 2872 * 2873 * AMD IOMMU Initialization State Machine 2874 * 2875 ****************************************************************************/ 2876 2877 static int __init state_next(void) 2878 { 2879 int ret = 0; 2880 2881 switch (init_state) { 2882 case IOMMU_START_STATE: 2883 if (!detect_ivrs()) { 2884 init_state = IOMMU_NOT_FOUND; 2885 ret = -ENODEV; 2886 } else { 2887 init_state = IOMMU_IVRS_DETECTED; 2888 } 2889 break; 2890 case IOMMU_IVRS_DETECTED: 2891 if (amd_iommu_disabled) { 2892 init_state = IOMMU_CMDLINE_DISABLED; 2893 ret = -EINVAL; 2894 } else { 2895 ret = early_amd_iommu_init(); 2896 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 2897 } 2898 break; 2899 case IOMMU_ACPI_FINISHED: 2900 early_enable_iommus(); 2901 x86_platform.iommu_shutdown = disable_iommus; 2902 init_state = IOMMU_ENABLED; 2903 break; 2904 case IOMMU_ENABLED: 2905 register_syscore_ops(&amd_iommu_syscore_ops); 2906 ret = amd_iommu_init_pci(); 2907 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 2908 enable_iommus_v2(); 2909 break; 2910 case IOMMU_PCI_INIT: 2911 ret = amd_iommu_enable_interrupts(); 2912 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 2913 break; 2914 case IOMMU_INTERRUPTS_EN: 2915 init_state = IOMMU_INITIALIZED; 2916 break; 2917 case IOMMU_INITIALIZED: 2918 /* Nothing to do */ 2919 break; 2920 case IOMMU_NOT_FOUND: 2921 case IOMMU_INIT_ERROR: 2922 case IOMMU_CMDLINE_DISABLED: 2923 /* Error states => do nothing */ 2924 ret = -EINVAL; 2925 break; 2926 default: 2927 /* Unknown state */ 2928 BUG(); 2929 } 2930 2931 if (ret) { 2932 free_dma_resources(); 2933 if (!irq_remapping_enabled) { 2934 disable_iommus(); 2935 free_iommu_resources(); 2936 } else { 2937 struct amd_iommu *iommu; 2938 2939 uninit_device_table_dma(); 2940 for_each_iommu(iommu) 2941 iommu_flush_all_caches(iommu); 2942 } 2943 } 2944 return ret; 2945 } 2946 2947 static int __init iommu_go_to_state(enum iommu_init_state state) 2948 { 2949 int ret = -EINVAL; 2950 2951 while (init_state != state) { 2952 if (init_state == IOMMU_NOT_FOUND || 2953 init_state == IOMMU_INIT_ERROR || 2954 init_state == IOMMU_CMDLINE_DISABLED) 2955 break; 2956 ret = state_next(); 2957 } 2958 2959 return ret; 2960 } 2961 2962 #ifdef CONFIG_IRQ_REMAP 2963 int __init amd_iommu_prepare(void) 2964 { 2965 int ret; 2966 2967 amd_iommu_irq_remap = true; 2968 2969 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 2970 if (ret) { 2971 amd_iommu_irq_remap = false; 2972 return ret; 2973 } 2974 2975 return amd_iommu_irq_remap ? 0 : -ENODEV; 2976 } 2977 2978 int __init amd_iommu_enable(void) 2979 { 2980 int ret; 2981 2982 ret = iommu_go_to_state(IOMMU_ENABLED); 2983 if (ret) 2984 return ret; 2985 2986 irq_remapping_enabled = 1; 2987 return amd_iommu_xt_mode; 2988 } 2989 2990 void amd_iommu_disable(void) 2991 { 2992 amd_iommu_suspend(); 2993 } 2994 2995 int amd_iommu_reenable(int mode) 2996 { 2997 amd_iommu_resume(); 2998 2999 return 0; 3000 } 3001 3002 int __init amd_iommu_enable_faulting(void) 3003 { 3004 /* We enable MSI later when PCI is initialized */ 3005 return 0; 3006 } 3007 #endif 3008 3009 /* 3010 * This is the core init function for AMD IOMMU hardware in the system. 3011 * This function is called from the generic x86 DMA layer initialization 3012 * code. 3013 */ 3014 static int __init amd_iommu_init(void) 3015 { 3016 struct amd_iommu *iommu; 3017 int ret; 3018 3019 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3020 #ifdef CONFIG_GART_IOMMU 3021 if (ret && list_empty(&amd_iommu_list)) { 3022 /* 3023 * We failed to initialize the AMD IOMMU - try fallback 3024 * to GART if possible. 3025 */ 3026 gart_iommu_init(); 3027 } 3028 #endif 3029 3030 for_each_iommu(iommu) 3031 amd_iommu_debugfs_setup(iommu); 3032 3033 return ret; 3034 } 3035 3036 static bool amd_iommu_sme_check(void) 3037 { 3038 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3039 (boot_cpu_data.x86 != 0x17)) 3040 return true; 3041 3042 /* For Fam17h, a specific level of support is required */ 3043 if (boot_cpu_data.microcode >= 0x08001205) 3044 return true; 3045 3046 if ((boot_cpu_data.microcode >= 0x08001126) && 3047 (boot_cpu_data.microcode <= 0x080011ff)) 3048 return true; 3049 3050 pr_notice("IOMMU not currently supported when SME is active\n"); 3051 3052 return false; 3053 } 3054 3055 /**************************************************************************** 3056 * 3057 * Early detect code. This code runs at IOMMU detection time in the DMA 3058 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3059 * IOMMUs 3060 * 3061 ****************************************************************************/ 3062 int __init amd_iommu_detect(void) 3063 { 3064 int ret; 3065 3066 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3067 return -ENODEV; 3068 3069 if (!amd_iommu_sme_check()) 3070 return -ENODEV; 3071 3072 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3073 if (ret) 3074 return ret; 3075 3076 amd_iommu_detected = true; 3077 iommu_detected = 1; 3078 x86_init.iommu.iommu_init = amd_iommu_init; 3079 3080 return 1; 3081 } 3082 3083 /**************************************************************************** 3084 * 3085 * Parsing functions for the AMD IOMMU specific kernel command line 3086 * options. 3087 * 3088 ****************************************************************************/ 3089 3090 static int __init parse_amd_iommu_dump(char *str) 3091 { 3092 amd_iommu_dump = true; 3093 3094 return 1; 3095 } 3096 3097 static int __init parse_amd_iommu_intr(char *str) 3098 { 3099 for (; *str; ++str) { 3100 if (strncmp(str, "legacy", 6) == 0) { 3101 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3102 break; 3103 } 3104 if (strncmp(str, "vapic", 5) == 0) { 3105 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3106 break; 3107 } 3108 } 3109 return 1; 3110 } 3111 3112 static int __init parse_amd_iommu_options(char *str) 3113 { 3114 for (; *str; ++str) { 3115 if (strncmp(str, "fullflush", 9) == 0) { 3116 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3117 iommu_set_dma_strict(); 3118 } 3119 if (strncmp(str, "force_enable", 12) == 0) 3120 amd_iommu_force_enable = true; 3121 if (strncmp(str, "off", 3) == 0) 3122 amd_iommu_disabled = true; 3123 if (strncmp(str, "force_isolation", 15) == 0) 3124 amd_iommu_force_isolation = true; 3125 } 3126 3127 return 1; 3128 } 3129 3130 static int __init parse_ivrs_ioapic(char *str) 3131 { 3132 unsigned int bus, dev, fn; 3133 int ret, id, i; 3134 u16 devid; 3135 3136 ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); 3137 3138 if (ret != 4) { 3139 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3140 return 1; 3141 } 3142 3143 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3144 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3145 str); 3146 return 1; 3147 } 3148 3149 devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); 3150 3151 cmdline_maps = true; 3152 i = early_ioapic_map_size++; 3153 early_ioapic_map[i].id = id; 3154 early_ioapic_map[i].devid = devid; 3155 early_ioapic_map[i].cmd_line = true; 3156 3157 return 1; 3158 } 3159 3160 static int __init parse_ivrs_hpet(char *str) 3161 { 3162 unsigned int bus, dev, fn; 3163 int ret, id, i; 3164 u16 devid; 3165 3166 ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); 3167 3168 if (ret != 4) { 3169 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3170 return 1; 3171 } 3172 3173 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3174 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3175 str); 3176 return 1; 3177 } 3178 3179 devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); 3180 3181 cmdline_maps = true; 3182 i = early_hpet_map_size++; 3183 early_hpet_map[i].id = id; 3184 early_hpet_map[i].devid = devid; 3185 early_hpet_map[i].cmd_line = true; 3186 3187 return 1; 3188 } 3189 3190 static int __init parse_ivrs_acpihid(char *str) 3191 { 3192 u32 bus, dev, fn; 3193 char *hid, *uid, *p; 3194 char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; 3195 int ret, i; 3196 3197 ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); 3198 if (ret != 4) { 3199 pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); 3200 return 1; 3201 } 3202 3203 p = acpiid; 3204 hid = strsep(&p, ":"); 3205 uid = p; 3206 3207 if (!hid || !(*hid) || !uid) { 3208 pr_err("Invalid command line: hid or uid\n"); 3209 return 1; 3210 } 3211 3212 i = early_acpihid_map_size++; 3213 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3214 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3215 early_acpihid_map[i].devid = 3216 ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); 3217 early_acpihid_map[i].cmd_line = true; 3218 3219 return 1; 3220 } 3221 3222 __setup("amd_iommu_dump", parse_amd_iommu_dump); 3223 __setup("amd_iommu=", parse_amd_iommu_options); 3224 __setup("amd_iommu_intr=", parse_amd_iommu_intr); 3225 __setup("ivrs_ioapic", parse_ivrs_ioapic); 3226 __setup("ivrs_hpet", parse_ivrs_hpet); 3227 __setup("ivrs_acpihid", parse_ivrs_acpihid); 3228 3229 IOMMU_INIT_FINISH(amd_iommu_detect, 3230 gart_iommu_hole_init, 3231 NULL, 3232 NULL); 3233 3234 bool amd_iommu_v2_supported(void) 3235 { 3236 return amd_iommu_v2_present; 3237 } 3238 EXPORT_SYMBOL(amd_iommu_v2_supported); 3239 3240 struct amd_iommu *get_amd_iommu(unsigned int idx) 3241 { 3242 unsigned int i = 0; 3243 struct amd_iommu *iommu; 3244 3245 for_each_iommu(iommu) 3246 if (i++ == idx) 3247 return iommu; 3248 return NULL; 3249 } 3250 3251 /**************************************************************************** 3252 * 3253 * IOMMU EFR Performance Counter support functionality. This code allows 3254 * access to the IOMMU PC functionality. 3255 * 3256 ****************************************************************************/ 3257 3258 u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3259 { 3260 struct amd_iommu *iommu = get_amd_iommu(idx); 3261 3262 if (iommu) 3263 return iommu->max_banks; 3264 3265 return 0; 3266 } 3267 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); 3268 3269 bool amd_iommu_pc_supported(void) 3270 { 3271 return amd_iommu_pc_present; 3272 } 3273 EXPORT_SYMBOL(amd_iommu_pc_supported); 3274 3275 u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3276 { 3277 struct amd_iommu *iommu = get_amd_iommu(idx); 3278 3279 if (iommu) 3280 return iommu->max_counters; 3281 3282 return 0; 3283 } 3284 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); 3285 3286 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3287 u8 fxn, u64 *value, bool is_write) 3288 { 3289 u32 offset; 3290 u32 max_offset_lim; 3291 3292 /* Make sure the IOMMU PC resource is available */ 3293 if (!amd_iommu_pc_present) 3294 return -ENODEV; 3295 3296 /* Check for valid iommu and pc register indexing */ 3297 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3298 return -ENODEV; 3299 3300 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3301 3302 /* Limit the offset to the hw defined mmio region aperture */ 3303 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3304 (iommu->max_counters << 8) | 0x28); 3305 if ((offset < MMIO_CNTR_REG_OFFSET) || 3306 (offset > max_offset_lim)) 3307 return -EINVAL; 3308 3309 if (is_write) { 3310 u64 val = *value & GENMASK_ULL(47, 0); 3311 3312 writel((u32)val, iommu->mmio_base + offset); 3313 writel((val >> 32), iommu->mmio_base + offset + 4); 3314 } else { 3315 *value = readl(iommu->mmio_base + offset + 4); 3316 *value <<= 32; 3317 *value |= readl(iommu->mmio_base + offset); 3318 *value &= GENMASK_ULL(47, 0); 3319 } 3320 3321 return 0; 3322 } 3323 3324 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3325 { 3326 if (!iommu) 3327 return -EINVAL; 3328 3329 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 3330 } 3331 3332 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3333 { 3334 if (!iommu) 3335 return -EINVAL; 3336 3337 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 3338 } 3339