1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8 #define pr_fmt(fmt) "AMD-Vi: " fmt 9 #define dev_fmt(fmt) pr_fmt(fmt) 10 11 #include <linux/pci.h> 12 #include <linux/acpi.h> 13 #include <linux/list.h> 14 #include <linux/bitmap.h> 15 #include <linux/slab.h> 16 #include <linux/syscore_ops.h> 17 #include <linux/interrupt.h> 18 #include <linux/msi.h> 19 #include <linux/irq.h> 20 #include <linux/amd-iommu.h> 21 #include <linux/export.h> 22 #include <linux/kmemleak.h> 23 #include <linux/cc_platform.h> 24 #include <linux/iopoll.h> 25 #include <asm/pci-direct.h> 26 #include <asm/iommu.h> 27 #include <asm/apic.h> 28 #include <asm/gart.h> 29 #include <asm/x86_init.h> 30 #include <asm/io_apic.h> 31 #include <asm/irq_remapping.h> 32 #include <asm/set_memory.h> 33 34 #include <linux/crash_dump.h> 35 36 #include "amd_iommu.h" 37 #include "../irq_remapping.h" 38 39 /* 40 * definitions for the ACPI scanning code 41 */ 42 #define IVRS_HEADER_LENGTH 48 43 44 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 45 #define ACPI_IVMD_TYPE_ALL 0x20 46 #define ACPI_IVMD_TYPE 0x21 47 #define ACPI_IVMD_TYPE_RANGE 0x22 48 49 #define IVHD_DEV_ALL 0x01 50 #define IVHD_DEV_SELECT 0x02 51 #define IVHD_DEV_SELECT_RANGE_START 0x03 52 #define IVHD_DEV_RANGE_END 0x04 53 #define IVHD_DEV_ALIAS 0x42 54 #define IVHD_DEV_ALIAS_RANGE 0x43 55 #define IVHD_DEV_EXT_SELECT 0x46 56 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 57 #define IVHD_DEV_SPECIAL 0x48 58 #define IVHD_DEV_ACPI_HID 0xf0 59 60 #define UID_NOT_PRESENT 0 61 #define UID_IS_INTEGER 1 62 #define UID_IS_CHARACTER 2 63 64 #define IVHD_SPECIAL_IOAPIC 1 65 #define IVHD_SPECIAL_HPET 2 66 67 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 68 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 69 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 70 #define IVHD_FLAG_ISOC_EN_MASK 0x08 71 72 #define IVMD_FLAG_EXCL_RANGE 0x08 73 #define IVMD_FLAG_IW 0x04 74 #define IVMD_FLAG_IR 0x02 75 #define IVMD_FLAG_UNITY_MAP 0x01 76 77 #define ACPI_DEVFLAG_INITPASS 0x01 78 #define ACPI_DEVFLAG_EXTINT 0x02 79 #define ACPI_DEVFLAG_NMI 0x04 80 #define ACPI_DEVFLAG_SYSMGT1 0x10 81 #define ACPI_DEVFLAG_SYSMGT2 0x20 82 #define ACPI_DEVFLAG_LINT0 0x40 83 #define ACPI_DEVFLAG_LINT1 0x80 84 #define ACPI_DEVFLAG_ATSDIS 0x10000000 85 86 #define LOOP_TIMEOUT 2000000 87 88 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ 89 | ((dev & 0x1f) << 3) | (fn & 0x7)) 90 91 /* 92 * ACPI table definitions 93 * 94 * These data structures are laid over the table to parse the important values 95 * out of it. 96 */ 97 98 /* 99 * structure describing one IOMMU in the ACPI table. Typically followed by one 100 * or more ivhd_entrys. 101 */ 102 struct ivhd_header { 103 u8 type; 104 u8 flags; 105 u16 length; 106 u16 devid; 107 u16 cap_ptr; 108 u64 mmio_phys; 109 u16 pci_seg; 110 u16 info; 111 u32 efr_attr; 112 113 /* Following only valid on IVHD type 11h and 40h */ 114 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 115 u64 efr_reg2; 116 } __attribute__((packed)); 117 118 /* 119 * A device entry describing which devices a specific IOMMU translates and 120 * which requestor ids they use. 121 */ 122 struct ivhd_entry { 123 u8 type; 124 u16 devid; 125 u8 flags; 126 struct_group(ext_hid, 127 u32 ext; 128 u32 hidh; 129 ); 130 u64 cid; 131 u8 uidf; 132 u8 uidl; 133 u8 uid; 134 } __attribute__((packed)); 135 136 /* 137 * An AMD IOMMU memory definition structure. It defines things like exclusion 138 * ranges for devices and regions that should be unity mapped. 139 */ 140 struct ivmd_header { 141 u8 type; 142 u8 flags; 143 u16 length; 144 u16 devid; 145 u16 aux; 146 u16 pci_seg; 147 u8 resv[6]; 148 u64 range_start; 149 u64 range_length; 150 } __attribute__((packed)); 151 152 bool amd_iommu_dump; 153 bool amd_iommu_irq_remap __read_mostly; 154 155 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; 156 /* Guest page table level */ 157 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 158 159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 161 162 static bool amd_iommu_detected; 163 static bool amd_iommu_disabled __initdata; 164 static bool amd_iommu_force_enable __initdata; 165 static bool amd_iommu_irtcachedis; 166 static int amd_iommu_target_ivhd_type; 167 168 /* Global EFR and EFR2 registers */ 169 u64 amd_iommu_efr; 170 u64 amd_iommu_efr2; 171 172 /* SNP is enabled on the system? */ 173 bool amd_iommu_snp_en; 174 EXPORT_SYMBOL(amd_iommu_snp_en); 175 176 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ 177 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 178 system */ 179 180 /* Array to assign indices to IOMMUs*/ 181 struct amd_iommu *amd_iommus[MAX_IOMMUS]; 182 183 /* Number of IOMMUs present in the system */ 184 static int amd_iommus_present; 185 186 /* IOMMUs have a non-present cache? */ 187 bool amd_iommu_np_cache __read_mostly; 188 bool amd_iommu_iotlb_sup __read_mostly = true; 189 190 u32 amd_iommu_max_pasid __read_mostly = ~0; 191 192 bool amd_iommu_v2_present __read_mostly; 193 static bool amd_iommu_pc_present __read_mostly; 194 bool amdr_ivrs_remap_support __read_mostly; 195 196 bool amd_iommu_force_isolation __read_mostly; 197 198 /* 199 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap 200 * to know which ones are already in use. 201 */ 202 unsigned long *amd_iommu_pd_alloc_bitmap; 203 204 enum iommu_init_state { 205 IOMMU_START_STATE, 206 IOMMU_IVRS_DETECTED, 207 IOMMU_ACPI_FINISHED, 208 IOMMU_ENABLED, 209 IOMMU_PCI_INIT, 210 IOMMU_INTERRUPTS_EN, 211 IOMMU_INITIALIZED, 212 IOMMU_NOT_FOUND, 213 IOMMU_INIT_ERROR, 214 IOMMU_CMDLINE_DISABLED, 215 }; 216 217 /* Early ioapic and hpet maps from kernel command line */ 218 #define EARLY_MAP_SIZE 4 219 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 220 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 221 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 222 223 static int __initdata early_ioapic_map_size; 224 static int __initdata early_hpet_map_size; 225 static int __initdata early_acpihid_map_size; 226 227 static bool __initdata cmdline_maps; 228 229 static enum iommu_init_state init_state = IOMMU_START_STATE; 230 231 static int amd_iommu_enable_interrupts(void); 232 static int __init iommu_go_to_state(enum iommu_init_state state); 233 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 234 235 static bool amd_iommu_pre_enabled = true; 236 237 static u32 amd_iommu_ivinfo __initdata; 238 239 bool translation_pre_enabled(struct amd_iommu *iommu) 240 { 241 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 242 } 243 244 static void clear_translation_pre_enabled(struct amd_iommu *iommu) 245 { 246 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 247 } 248 249 static void init_translation_status(struct amd_iommu *iommu) 250 { 251 u64 ctrl; 252 253 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 254 if (ctrl & (1<<CONTROL_IOMMU_EN)) 255 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 256 } 257 258 static inline unsigned long tbl_size(int entry_size, int last_bdf) 259 { 260 unsigned shift = PAGE_SHIFT + 261 get_order((last_bdf + 1) * entry_size); 262 263 return 1UL << shift; 264 } 265 266 int amd_iommu_get_num_iommus(void) 267 { 268 return amd_iommus_present; 269 } 270 271 /* 272 * Iterate through all the IOMMUs to get common EFR 273 * masks among all IOMMUs and warn if found inconsistency. 274 */ 275 static void get_global_efr(void) 276 { 277 struct amd_iommu *iommu; 278 279 for_each_iommu(iommu) { 280 u64 tmp = iommu->features; 281 u64 tmp2 = iommu->features2; 282 283 if (list_is_first(&iommu->list, &amd_iommu_list)) { 284 amd_iommu_efr = tmp; 285 amd_iommu_efr2 = tmp2; 286 continue; 287 } 288 289 if (amd_iommu_efr == tmp && 290 amd_iommu_efr2 == tmp2) 291 continue; 292 293 pr_err(FW_BUG 294 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", 295 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, 296 iommu->index, iommu->pci_seg->id, 297 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), 298 PCI_FUNC(iommu->devid)); 299 300 amd_iommu_efr &= tmp; 301 amd_iommu_efr2 &= tmp2; 302 } 303 304 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); 305 } 306 307 static bool check_feature_on_all_iommus(u64 mask) 308 { 309 return !!(amd_iommu_efr & mask); 310 } 311 312 static inline int check_feature_gpt_level(void) 313 { 314 return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); 315 } 316 317 /* 318 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 319 * Default to IVHD EFR since it is available sooner 320 * (i.e. before PCI init). 321 */ 322 static void __init early_iommu_features_init(struct amd_iommu *iommu, 323 struct ivhd_header *h) 324 { 325 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { 326 iommu->features = h->efr_reg; 327 iommu->features2 = h->efr_reg2; 328 } 329 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) 330 amdr_ivrs_remap_support = true; 331 } 332 333 /* Access to l1 and l2 indexed register spaces */ 334 335 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 336 { 337 u32 val; 338 339 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 340 pci_read_config_dword(iommu->dev, 0xfc, &val); 341 return val; 342 } 343 344 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 345 { 346 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 347 pci_write_config_dword(iommu->dev, 0xfc, val); 348 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 349 } 350 351 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 352 { 353 u32 val; 354 355 pci_write_config_dword(iommu->dev, 0xf0, address); 356 pci_read_config_dword(iommu->dev, 0xf4, &val); 357 return val; 358 } 359 360 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 361 { 362 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 363 pci_write_config_dword(iommu->dev, 0xf4, val); 364 } 365 366 /**************************************************************************** 367 * 368 * AMD IOMMU MMIO register space handling functions 369 * 370 * These functions are used to program the IOMMU device registers in 371 * MMIO space required for that driver. 372 * 373 ****************************************************************************/ 374 375 /* 376 * This function set the exclusion range in the IOMMU. DMA accesses to the 377 * exclusion range are passed through untranslated 378 */ 379 static void iommu_set_exclusion_range(struct amd_iommu *iommu) 380 { 381 u64 start = iommu->exclusion_start & PAGE_MASK; 382 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; 383 u64 entry; 384 385 if (!iommu->exclusion_start) 386 return; 387 388 entry = start | MMIO_EXCL_ENABLE_MASK; 389 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 390 &entry, sizeof(entry)); 391 392 entry = limit; 393 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 394 &entry, sizeof(entry)); 395 } 396 397 static void iommu_set_cwwb_range(struct amd_iommu *iommu) 398 { 399 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 400 u64 entry = start & PM_ADDR_MASK; 401 402 if (!check_feature_on_all_iommus(FEATURE_SNP)) 403 return; 404 405 /* Note: 406 * Re-purpose Exclusion base/limit registers for Completion wait 407 * write-back base/limit. 408 */ 409 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 410 &entry, sizeof(entry)); 411 412 /* Note: 413 * Default to 4 Kbytes, which can be specified by setting base 414 * address equal to the limit address. 415 */ 416 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 417 &entry, sizeof(entry)); 418 } 419 420 /* Programs the physical address of the device table into the IOMMU hardware */ 421 static void iommu_set_device_table(struct amd_iommu *iommu) 422 { 423 u64 entry; 424 u32 dev_table_size = iommu->pci_seg->dev_table_size; 425 void *dev_table = (void *)get_dev_table(iommu); 426 427 BUG_ON(iommu->mmio_base == NULL); 428 429 entry = iommu_virt_to_phys(dev_table); 430 entry |= (dev_table_size >> 12) - 1; 431 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 432 &entry, sizeof(entry)); 433 } 434 435 /* Generic functions to enable/disable certain features of the IOMMU. */ 436 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 437 { 438 u64 ctrl; 439 440 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 441 ctrl |= (1ULL << bit); 442 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 443 } 444 445 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 446 { 447 u64 ctrl; 448 449 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 450 ctrl &= ~(1ULL << bit); 451 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 452 } 453 454 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) 455 { 456 u64 ctrl; 457 458 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 459 ctrl &= ~CTRL_INV_TO_MASK; 460 ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; 461 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 462 } 463 464 /* Function to enable the hardware */ 465 static void iommu_enable(struct amd_iommu *iommu) 466 { 467 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 468 } 469 470 static void iommu_disable(struct amd_iommu *iommu) 471 { 472 if (!iommu->mmio_base) 473 return; 474 475 /* Disable command buffer */ 476 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 477 478 /* Disable event logging and event interrupts */ 479 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 480 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 481 482 /* Disable IOMMU GA_LOG */ 483 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 484 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 485 486 /* Disable IOMMU hardware itself */ 487 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 488 489 /* Clear IRTE cache disabling bit */ 490 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 491 } 492 493 /* 494 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 495 * the system has one. 496 */ 497 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 498 { 499 if (!request_mem_region(address, end, "amd_iommu")) { 500 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 501 address, end); 502 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 503 return NULL; 504 } 505 506 return (u8 __iomem *)ioremap(address, end); 507 } 508 509 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 510 { 511 if (iommu->mmio_base) 512 iounmap(iommu->mmio_base); 513 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 514 } 515 516 static inline u32 get_ivhd_header_size(struct ivhd_header *h) 517 { 518 u32 size = 0; 519 520 switch (h->type) { 521 case 0x10: 522 size = 24; 523 break; 524 case 0x11: 525 case 0x40: 526 size = 40; 527 break; 528 } 529 return size; 530 } 531 532 /**************************************************************************** 533 * 534 * The functions below belong to the first pass of AMD IOMMU ACPI table 535 * parsing. In this pass we try to find out the highest device id this 536 * code has to handle. Upon this information the size of the shared data 537 * structures is determined later. 538 * 539 ****************************************************************************/ 540 541 /* 542 * This function calculates the length of a given IVHD entry 543 */ 544 static inline int ivhd_entry_length(u8 *ivhd) 545 { 546 u32 type = ((struct ivhd_entry *)ivhd)->type; 547 548 if (type < 0x80) { 549 return 0x04 << (*ivhd >> 6); 550 } else if (type == IVHD_DEV_ACPI_HID) { 551 /* For ACPI_HID, offset 21 is uid len */ 552 return *((u8 *)ivhd + 21) + 22; 553 } 554 return 0; 555 } 556 557 /* 558 * After reading the highest device id from the IOMMU PCI capability header 559 * this function looks if there is a higher device id defined in the ACPI table 560 */ 561 static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 562 { 563 u8 *p = (void *)h, *end = (void *)h; 564 struct ivhd_entry *dev; 565 int last_devid = -EINVAL; 566 567 u32 ivhd_size = get_ivhd_header_size(h); 568 569 if (!ivhd_size) { 570 pr_err("Unsupported IVHD type %#x\n", h->type); 571 return -EINVAL; 572 } 573 574 p += ivhd_size; 575 end += h->length; 576 577 while (p < end) { 578 dev = (struct ivhd_entry *)p; 579 switch (dev->type) { 580 case IVHD_DEV_ALL: 581 /* Use maximum BDF value for DEV_ALL */ 582 return 0xffff; 583 case IVHD_DEV_SELECT: 584 case IVHD_DEV_RANGE_END: 585 case IVHD_DEV_ALIAS: 586 case IVHD_DEV_EXT_SELECT: 587 /* all the above subfield types refer to device ids */ 588 if (dev->devid > last_devid) 589 last_devid = dev->devid; 590 break; 591 default: 592 break; 593 } 594 p += ivhd_entry_length(p); 595 } 596 597 WARN_ON(p != end); 598 599 return last_devid; 600 } 601 602 static int __init check_ivrs_checksum(struct acpi_table_header *table) 603 { 604 int i; 605 u8 checksum = 0, *p = (u8 *)table; 606 607 for (i = 0; i < table->length; ++i) 608 checksum += p[i]; 609 if (checksum != 0) { 610 /* ACPI table corrupt */ 611 pr_err(FW_BUG "IVRS invalid checksum\n"); 612 return -ENODEV; 613 } 614 615 return 0; 616 } 617 618 /* 619 * Iterate over all IVHD entries in the ACPI table and find the highest device 620 * id which we need to handle. This is the first of three functions which parse 621 * the ACPI table. So we check the checksum here. 622 */ 623 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) 624 { 625 u8 *p = (u8 *)table, *end = (u8 *)table; 626 struct ivhd_header *h; 627 int last_devid, last_bdf = 0; 628 629 p += IVRS_HEADER_LENGTH; 630 631 end += table->length; 632 while (p < end) { 633 h = (struct ivhd_header *)p; 634 if (h->pci_seg == pci_seg && 635 h->type == amd_iommu_target_ivhd_type) { 636 last_devid = find_last_devid_from_ivhd(h); 637 638 if (last_devid < 0) 639 return -EINVAL; 640 if (last_devid > last_bdf) 641 last_bdf = last_devid; 642 } 643 p += h->length; 644 } 645 WARN_ON(p != end); 646 647 return last_bdf; 648 } 649 650 /**************************************************************************** 651 * 652 * The following functions belong to the code path which parses the ACPI table 653 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 654 * data structures, initialize the per PCI segment device/alias/rlookup table 655 * and also basically initialize the hardware. 656 * 657 ****************************************************************************/ 658 659 /* Allocate per PCI segment device table */ 660 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) 661 { 662 pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, 663 get_order(pci_seg->dev_table_size)); 664 if (!pci_seg->dev_table) 665 return -ENOMEM; 666 667 return 0; 668 } 669 670 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) 671 { 672 free_pages((unsigned long)pci_seg->dev_table, 673 get_order(pci_seg->dev_table_size)); 674 pci_seg->dev_table = NULL; 675 } 676 677 /* Allocate per PCI segment IOMMU rlookup table. */ 678 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 679 { 680 pci_seg->rlookup_table = (void *)__get_free_pages( 681 GFP_KERNEL | __GFP_ZERO, 682 get_order(pci_seg->rlookup_table_size)); 683 if (pci_seg->rlookup_table == NULL) 684 return -ENOMEM; 685 686 return 0; 687 } 688 689 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 690 { 691 free_pages((unsigned long)pci_seg->rlookup_table, 692 get_order(pci_seg->rlookup_table_size)); 693 pci_seg->rlookup_table = NULL; 694 } 695 696 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 697 { 698 pci_seg->irq_lookup_table = (void *)__get_free_pages( 699 GFP_KERNEL | __GFP_ZERO, 700 get_order(pci_seg->rlookup_table_size)); 701 kmemleak_alloc(pci_seg->irq_lookup_table, 702 pci_seg->rlookup_table_size, 1, GFP_KERNEL); 703 if (pci_seg->irq_lookup_table == NULL) 704 return -ENOMEM; 705 706 return 0; 707 } 708 709 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 710 { 711 kmemleak_free(pci_seg->irq_lookup_table); 712 free_pages((unsigned long)pci_seg->irq_lookup_table, 713 get_order(pci_seg->rlookup_table_size)); 714 pci_seg->irq_lookup_table = NULL; 715 } 716 717 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) 718 { 719 int i; 720 721 pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, 722 get_order(pci_seg->alias_table_size)); 723 if (!pci_seg->alias_table) 724 return -ENOMEM; 725 726 /* 727 * let all alias entries point to itself 728 */ 729 for (i = 0; i <= pci_seg->last_bdf; ++i) 730 pci_seg->alias_table[i] = i; 731 732 return 0; 733 } 734 735 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) 736 { 737 free_pages((unsigned long)pci_seg->alias_table, 738 get_order(pci_seg->alias_table_size)); 739 pci_seg->alias_table = NULL; 740 } 741 742 /* 743 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 744 * write commands to that buffer later and the IOMMU will execute them 745 * asynchronously 746 */ 747 static int __init alloc_command_buffer(struct amd_iommu *iommu) 748 { 749 iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 750 get_order(CMD_BUFFER_SIZE)); 751 752 return iommu->cmd_buf ? 0 : -ENOMEM; 753 } 754 755 /* 756 * This function restarts event logging in case the IOMMU experienced 757 * an event log buffer overflow. 758 */ 759 void amd_iommu_restart_event_logging(struct amd_iommu *iommu) 760 { 761 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 762 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 763 } 764 765 /* 766 * This function restarts event logging in case the IOMMU experienced 767 * an GA log overflow. 768 */ 769 void amd_iommu_restart_ga_log(struct amd_iommu *iommu) 770 { 771 u32 status; 772 773 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 774 if (status & MMIO_STATUS_GALOG_RUN_MASK) 775 return; 776 777 pr_info_ratelimited("IOMMU GA Log restarting\n"); 778 779 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 780 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 781 782 writel(MMIO_STATUS_GALOG_OVERFLOW_MASK, 783 iommu->mmio_base + MMIO_STATUS_OFFSET); 784 785 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 786 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 787 } 788 789 /* 790 * This function resets the command buffer if the IOMMU stopped fetching 791 * commands from it. 792 */ 793 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 794 { 795 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 796 797 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 798 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 799 iommu->cmd_buf_head = 0; 800 iommu->cmd_buf_tail = 0; 801 802 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 803 } 804 805 /* 806 * This function writes the command buffer address to the hardware and 807 * enables it. 808 */ 809 static void iommu_enable_command_buffer(struct amd_iommu *iommu) 810 { 811 u64 entry; 812 813 BUG_ON(iommu->cmd_buf == NULL); 814 815 entry = iommu_virt_to_phys(iommu->cmd_buf); 816 entry |= MMIO_CMD_SIZE_512; 817 818 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 819 &entry, sizeof(entry)); 820 821 amd_iommu_reset_cmd_buffer(iommu); 822 } 823 824 /* 825 * This function disables the command buffer 826 */ 827 static void iommu_disable_command_buffer(struct amd_iommu *iommu) 828 { 829 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 830 } 831 832 static void __init free_command_buffer(struct amd_iommu *iommu) 833 { 834 free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 835 } 836 837 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, 838 gfp_t gfp, size_t size) 839 { 840 int order = get_order(size); 841 void *buf = (void *)__get_free_pages(gfp, order); 842 843 if (buf && 844 check_feature_on_all_iommus(FEATURE_SNP) && 845 set_memory_4k((unsigned long)buf, (1 << order))) { 846 free_pages((unsigned long)buf, order); 847 buf = NULL; 848 } 849 850 return buf; 851 } 852 853 /* allocates the memory where the IOMMU will log its events to */ 854 static int __init alloc_event_buffer(struct amd_iommu *iommu) 855 { 856 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 857 EVT_BUFFER_SIZE); 858 859 return iommu->evt_buf ? 0 : -ENOMEM; 860 } 861 862 static void iommu_enable_event_buffer(struct amd_iommu *iommu) 863 { 864 u64 entry; 865 866 BUG_ON(iommu->evt_buf == NULL); 867 868 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 869 870 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 871 &entry, sizeof(entry)); 872 873 /* set head and tail to zero manually */ 874 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 875 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 876 877 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 878 } 879 880 /* 881 * This function disables the event log buffer 882 */ 883 static void iommu_disable_event_buffer(struct amd_iommu *iommu) 884 { 885 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 886 } 887 888 static void __init free_event_buffer(struct amd_iommu *iommu) 889 { 890 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 891 } 892 893 /* allocates the memory where the IOMMU will log its events to */ 894 static int __init alloc_ppr_log(struct amd_iommu *iommu) 895 { 896 iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 897 PPR_LOG_SIZE); 898 899 return iommu->ppr_log ? 0 : -ENOMEM; 900 } 901 902 static void iommu_enable_ppr_log(struct amd_iommu *iommu) 903 { 904 u64 entry; 905 906 if (iommu->ppr_log == NULL) 907 return; 908 909 entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; 910 911 memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, 912 &entry, sizeof(entry)); 913 914 /* set head and tail to zero manually */ 915 writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 916 writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 917 918 iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); 919 iommu_feature_enable(iommu, CONTROL_PPR_EN); 920 } 921 922 static void __init free_ppr_log(struct amd_iommu *iommu) 923 { 924 free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); 925 } 926 927 static void free_ga_log(struct amd_iommu *iommu) 928 { 929 #ifdef CONFIG_IRQ_REMAP 930 free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE)); 931 free_pages((unsigned long)iommu->ga_log_tail, get_order(8)); 932 #endif 933 } 934 935 #ifdef CONFIG_IRQ_REMAP 936 static int iommu_ga_log_enable(struct amd_iommu *iommu) 937 { 938 u32 status, i; 939 u64 entry; 940 941 if (!iommu->ga_log) 942 return -EINVAL; 943 944 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 945 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 946 &entry, sizeof(entry)); 947 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 948 (BIT_ULL(52)-1)) & ~7ULL; 949 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 950 &entry, sizeof(entry)); 951 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 952 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 953 954 955 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 956 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 957 958 for (i = 0; i < LOOP_TIMEOUT; ++i) { 959 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 960 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 961 break; 962 udelay(10); 963 } 964 965 if (WARN_ON(i >= LOOP_TIMEOUT)) 966 return -EINVAL; 967 968 return 0; 969 } 970 971 static int iommu_init_ga_log(struct amd_iommu *iommu) 972 { 973 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 974 return 0; 975 976 iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 977 get_order(GA_LOG_SIZE)); 978 if (!iommu->ga_log) 979 goto err_out; 980 981 iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 982 get_order(8)); 983 if (!iommu->ga_log_tail) 984 goto err_out; 985 986 return 0; 987 err_out: 988 free_ga_log(iommu); 989 return -EINVAL; 990 } 991 #endif /* CONFIG_IRQ_REMAP */ 992 993 static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 994 { 995 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); 996 997 return iommu->cmd_sem ? 0 : -ENOMEM; 998 } 999 1000 static void __init free_cwwb_sem(struct amd_iommu *iommu) 1001 { 1002 if (iommu->cmd_sem) 1003 free_page((unsigned long)iommu->cmd_sem); 1004 } 1005 1006 static void iommu_enable_xt(struct amd_iommu *iommu) 1007 { 1008 #ifdef CONFIG_IRQ_REMAP 1009 /* 1010 * XT mode (32-bit APIC destination ID) requires 1011 * GA mode (128-bit IRTE support) as a prerequisite. 1012 */ 1013 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 1014 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 1015 iommu_feature_enable(iommu, CONTROL_XT_EN); 1016 #endif /* CONFIG_IRQ_REMAP */ 1017 } 1018 1019 static void iommu_enable_gt(struct amd_iommu *iommu) 1020 { 1021 if (!iommu_feature(iommu, FEATURE_GT)) 1022 return; 1023 1024 iommu_feature_enable(iommu, CONTROL_GT_EN); 1025 } 1026 1027 /* sets a specific bit in the device table entry. */ 1028 static void __set_dev_entry_bit(struct dev_table_entry *dev_table, 1029 u16 devid, u8 bit) 1030 { 1031 int i = (bit >> 6) & 0x03; 1032 int _bit = bit & 0x3f; 1033 1034 dev_table[devid].data[i] |= (1UL << _bit); 1035 } 1036 1037 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) 1038 { 1039 struct dev_table_entry *dev_table = get_dev_table(iommu); 1040 1041 return __set_dev_entry_bit(dev_table, devid, bit); 1042 } 1043 1044 static int __get_dev_entry_bit(struct dev_table_entry *dev_table, 1045 u16 devid, u8 bit) 1046 { 1047 int i = (bit >> 6) & 0x03; 1048 int _bit = bit & 0x3f; 1049 1050 return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; 1051 } 1052 1053 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) 1054 { 1055 struct dev_table_entry *dev_table = get_dev_table(iommu); 1056 1057 return __get_dev_entry_bit(dev_table, devid, bit); 1058 } 1059 1060 static bool __copy_device_table(struct amd_iommu *iommu) 1061 { 1062 u64 int_ctl, int_tab_len, entry = 0; 1063 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1064 struct dev_table_entry *old_devtb = NULL; 1065 u32 lo, hi, devid, old_devtb_size; 1066 phys_addr_t old_devtb_phys; 1067 u16 dom_id, dte_v, irq_v; 1068 gfp_t gfp_flag; 1069 u64 tmp; 1070 1071 /* Each IOMMU use separate device table with the same size */ 1072 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 1073 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 1074 entry = (((u64) hi) << 32) + lo; 1075 1076 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 1077 if (old_devtb_size != pci_seg->dev_table_size) { 1078 pr_err("The device table size of IOMMU:%d is not expected!\n", 1079 iommu->index); 1080 return false; 1081 } 1082 1083 /* 1084 * When SME is enabled in the first kernel, the entry includes the 1085 * memory encryption mask(sme_me_mask), we must remove the memory 1086 * encryption mask to obtain the true physical address in kdump kernel. 1087 */ 1088 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 1089 1090 if (old_devtb_phys >= 0x100000000ULL) { 1091 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 1092 return false; 1093 } 1094 old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) 1095 ? (__force void *)ioremap_encrypted(old_devtb_phys, 1096 pci_seg->dev_table_size) 1097 : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); 1098 1099 if (!old_devtb) 1100 return false; 1101 1102 gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; 1103 pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, 1104 get_order(pci_seg->dev_table_size)); 1105 if (pci_seg->old_dev_tbl_cpy == NULL) { 1106 pr_err("Failed to allocate memory for copying old device table!\n"); 1107 memunmap(old_devtb); 1108 return false; 1109 } 1110 1111 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 1112 pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; 1113 dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; 1114 dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; 1115 1116 if (dte_v && dom_id) { 1117 pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; 1118 pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; 1119 __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); 1120 /* If gcr3 table existed, mask it out */ 1121 if (old_devtb[devid].data[0] & DTE_FLAG_GV) { 1122 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; 1123 tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; 1124 pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; 1125 tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; 1126 tmp |= DTE_FLAG_GV; 1127 pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; 1128 } 1129 } 1130 1131 irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; 1132 int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; 1133 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; 1134 if (irq_v && (int_ctl || int_tab_len)) { 1135 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || 1136 (int_tab_len != DTE_INTTABLEN)) { 1137 pr_err("Wrong old irq remapping flag: %#x\n", devid); 1138 memunmap(old_devtb); 1139 return false; 1140 } 1141 1142 pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; 1143 } 1144 } 1145 memunmap(old_devtb); 1146 1147 return true; 1148 } 1149 1150 static bool copy_device_table(void) 1151 { 1152 struct amd_iommu *iommu; 1153 struct amd_iommu_pci_seg *pci_seg; 1154 1155 if (!amd_iommu_pre_enabled) 1156 return false; 1157 1158 pr_warn("Translation is already enabled - trying to copy translation structures\n"); 1159 1160 /* 1161 * All IOMMUs within PCI segment shares common device table. 1162 * Hence copy device table only once per PCI segment. 1163 */ 1164 for_each_pci_segment(pci_seg) { 1165 for_each_iommu(iommu) { 1166 if (pci_seg->id != iommu->pci_seg->id) 1167 continue; 1168 if (!__copy_device_table(iommu)) 1169 return false; 1170 break; 1171 } 1172 } 1173 1174 return true; 1175 } 1176 1177 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) 1178 { 1179 int sysmgt; 1180 1181 sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | 1182 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); 1183 1184 if (sysmgt == 0x01) 1185 set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); 1186 } 1187 1188 /* 1189 * This function takes the device specific flags read from the ACPI 1190 * table and sets up the device table entry with that information 1191 */ 1192 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1193 u16 devid, u32 flags, u32 ext_flags) 1194 { 1195 if (flags & ACPI_DEVFLAG_INITPASS) 1196 set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); 1197 if (flags & ACPI_DEVFLAG_EXTINT) 1198 set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); 1199 if (flags & ACPI_DEVFLAG_NMI) 1200 set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); 1201 if (flags & ACPI_DEVFLAG_SYSMGT1) 1202 set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); 1203 if (flags & ACPI_DEVFLAG_SYSMGT2) 1204 set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); 1205 if (flags & ACPI_DEVFLAG_LINT0) 1206 set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); 1207 if (flags & ACPI_DEVFLAG_LINT1) 1208 set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); 1209 1210 amd_iommu_apply_erratum_63(iommu, devid); 1211 1212 amd_iommu_set_rlookup_table(iommu, devid); 1213 } 1214 1215 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) 1216 { 1217 struct devid_map *entry; 1218 struct list_head *list; 1219 1220 if (type == IVHD_SPECIAL_IOAPIC) 1221 list = &ioapic_map; 1222 else if (type == IVHD_SPECIAL_HPET) 1223 list = &hpet_map; 1224 else 1225 return -EINVAL; 1226 1227 list_for_each_entry(entry, list, list) { 1228 if (!(entry->id == id && entry->cmd_line)) 1229 continue; 1230 1231 pr_info("Command-line override present for %s id %d - ignoring\n", 1232 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1233 1234 *devid = entry->devid; 1235 1236 return 0; 1237 } 1238 1239 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1240 if (!entry) 1241 return -ENOMEM; 1242 1243 entry->id = id; 1244 entry->devid = *devid; 1245 entry->cmd_line = cmd_line; 1246 1247 list_add_tail(&entry->list, list); 1248 1249 return 0; 1250 } 1251 1252 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, 1253 bool cmd_line) 1254 { 1255 struct acpihid_map_entry *entry; 1256 struct list_head *list = &acpihid_map; 1257 1258 list_for_each_entry(entry, list, list) { 1259 if (strcmp(entry->hid, hid) || 1260 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1261 !entry->cmd_line) 1262 continue; 1263 1264 pr_info("Command-line override for hid:%s uid:%s\n", 1265 hid, uid); 1266 *devid = entry->devid; 1267 return 0; 1268 } 1269 1270 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1271 if (!entry) 1272 return -ENOMEM; 1273 1274 memcpy(entry->uid, uid, strlen(uid)); 1275 memcpy(entry->hid, hid, strlen(hid)); 1276 entry->devid = *devid; 1277 entry->cmd_line = cmd_line; 1278 entry->root_devid = (entry->devid & (~0x7)); 1279 1280 pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", 1281 entry->cmd_line ? "cmd" : "ivrs", 1282 entry->hid, entry->uid, entry->root_devid); 1283 1284 list_add_tail(&entry->list, list); 1285 return 0; 1286 } 1287 1288 static int __init add_early_maps(void) 1289 { 1290 int i, ret; 1291 1292 for (i = 0; i < early_ioapic_map_size; ++i) { 1293 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1294 early_ioapic_map[i].id, 1295 &early_ioapic_map[i].devid, 1296 early_ioapic_map[i].cmd_line); 1297 if (ret) 1298 return ret; 1299 } 1300 1301 for (i = 0; i < early_hpet_map_size; ++i) { 1302 ret = add_special_device(IVHD_SPECIAL_HPET, 1303 early_hpet_map[i].id, 1304 &early_hpet_map[i].devid, 1305 early_hpet_map[i].cmd_line); 1306 if (ret) 1307 return ret; 1308 } 1309 1310 for (i = 0; i < early_acpihid_map_size; ++i) { 1311 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1312 early_acpihid_map[i].uid, 1313 &early_acpihid_map[i].devid, 1314 early_acpihid_map[i].cmd_line); 1315 if (ret) 1316 return ret; 1317 } 1318 1319 return 0; 1320 } 1321 1322 /* 1323 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1324 * initializes the hardware and our data structures with it. 1325 */ 1326 static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1327 struct ivhd_header *h) 1328 { 1329 u8 *p = (u8 *)h; 1330 u8 *end = p, flags = 0; 1331 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; 1332 u32 dev_i, ext_flags = 0; 1333 bool alias = false; 1334 struct ivhd_entry *e; 1335 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1336 u32 ivhd_size; 1337 int ret; 1338 1339 1340 ret = add_early_maps(); 1341 if (ret) 1342 return ret; 1343 1344 amd_iommu_apply_ivrs_quirks(); 1345 1346 /* 1347 * First save the recommended feature enable bits from ACPI 1348 */ 1349 iommu->acpi_flags = h->flags; 1350 1351 /* 1352 * Done. Now parse the device entries 1353 */ 1354 ivhd_size = get_ivhd_header_size(h); 1355 if (!ivhd_size) { 1356 pr_err("Unsupported IVHD type %#x\n", h->type); 1357 return -EINVAL; 1358 } 1359 1360 p += ivhd_size; 1361 1362 end += h->length; 1363 1364 1365 while (p < end) { 1366 e = (struct ivhd_entry *)p; 1367 seg_id = pci_seg->id; 1368 1369 switch (e->type) { 1370 case IVHD_DEV_ALL: 1371 1372 DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); 1373 1374 for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) 1375 set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); 1376 break; 1377 case IVHD_DEV_SELECT: 1378 1379 DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " 1380 "flags: %02x\n", 1381 seg_id, PCI_BUS_NUM(e->devid), 1382 PCI_SLOT(e->devid), 1383 PCI_FUNC(e->devid), 1384 e->flags); 1385 1386 devid = e->devid; 1387 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1388 break; 1389 case IVHD_DEV_SELECT_RANGE_START: 1390 1391 DUMP_printk(" DEV_SELECT_RANGE_START\t " 1392 "devid: %04x:%02x:%02x.%x flags: %02x\n", 1393 seg_id, PCI_BUS_NUM(e->devid), 1394 PCI_SLOT(e->devid), 1395 PCI_FUNC(e->devid), 1396 e->flags); 1397 1398 devid_start = e->devid; 1399 flags = e->flags; 1400 ext_flags = 0; 1401 alias = false; 1402 break; 1403 case IVHD_DEV_ALIAS: 1404 1405 DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " 1406 "flags: %02x devid_to: %02x:%02x.%x\n", 1407 seg_id, PCI_BUS_NUM(e->devid), 1408 PCI_SLOT(e->devid), 1409 PCI_FUNC(e->devid), 1410 e->flags, 1411 PCI_BUS_NUM(e->ext >> 8), 1412 PCI_SLOT(e->ext >> 8), 1413 PCI_FUNC(e->ext >> 8)); 1414 1415 devid = e->devid; 1416 devid_to = e->ext >> 8; 1417 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1418 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1419 pci_seg->alias_table[devid] = devid_to; 1420 break; 1421 case IVHD_DEV_ALIAS_RANGE: 1422 1423 DUMP_printk(" DEV_ALIAS_RANGE\t\t " 1424 "devid: %04x:%02x:%02x.%x flags: %02x " 1425 "devid_to: %04x:%02x:%02x.%x\n", 1426 seg_id, PCI_BUS_NUM(e->devid), 1427 PCI_SLOT(e->devid), 1428 PCI_FUNC(e->devid), 1429 e->flags, 1430 seg_id, PCI_BUS_NUM(e->ext >> 8), 1431 PCI_SLOT(e->ext >> 8), 1432 PCI_FUNC(e->ext >> 8)); 1433 1434 devid_start = e->devid; 1435 flags = e->flags; 1436 devid_to = e->ext >> 8; 1437 ext_flags = 0; 1438 alias = true; 1439 break; 1440 case IVHD_DEV_EXT_SELECT: 1441 1442 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " 1443 "flags: %02x ext: %08x\n", 1444 seg_id, PCI_BUS_NUM(e->devid), 1445 PCI_SLOT(e->devid), 1446 PCI_FUNC(e->devid), 1447 e->flags, e->ext); 1448 1449 devid = e->devid; 1450 set_dev_entry_from_acpi(iommu, devid, e->flags, 1451 e->ext); 1452 break; 1453 case IVHD_DEV_EXT_SELECT_RANGE: 1454 1455 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " 1456 "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", 1457 seg_id, PCI_BUS_NUM(e->devid), 1458 PCI_SLOT(e->devid), 1459 PCI_FUNC(e->devid), 1460 e->flags, e->ext); 1461 1462 devid_start = e->devid; 1463 flags = e->flags; 1464 ext_flags = e->ext; 1465 alias = false; 1466 break; 1467 case IVHD_DEV_RANGE_END: 1468 1469 DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", 1470 seg_id, PCI_BUS_NUM(e->devid), 1471 PCI_SLOT(e->devid), 1472 PCI_FUNC(e->devid)); 1473 1474 devid = e->devid; 1475 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 1476 if (alias) { 1477 pci_seg->alias_table[dev_i] = devid_to; 1478 set_dev_entry_from_acpi(iommu, 1479 devid_to, flags, ext_flags); 1480 } 1481 set_dev_entry_from_acpi(iommu, dev_i, 1482 flags, ext_flags); 1483 } 1484 break; 1485 case IVHD_DEV_SPECIAL: { 1486 u8 handle, type; 1487 const char *var; 1488 u32 devid; 1489 int ret; 1490 1491 handle = e->ext & 0xff; 1492 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); 1493 type = (e->ext >> 24) & 0xff; 1494 1495 if (type == IVHD_SPECIAL_IOAPIC) 1496 var = "IOAPIC"; 1497 else if (type == IVHD_SPECIAL_HPET) 1498 var = "HPET"; 1499 else 1500 var = "UNKNOWN"; 1501 1502 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", 1503 var, (int)handle, 1504 seg_id, PCI_BUS_NUM(devid), 1505 PCI_SLOT(devid), 1506 PCI_FUNC(devid)); 1507 1508 ret = add_special_device(type, handle, &devid, false); 1509 if (ret) 1510 return ret; 1511 1512 /* 1513 * add_special_device might update the devid in case a 1514 * command-line override is present. So call 1515 * set_dev_entry_from_acpi after add_special_device. 1516 */ 1517 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1518 1519 break; 1520 } 1521 case IVHD_DEV_ACPI_HID: { 1522 u32 devid; 1523 u8 hid[ACPIHID_HID_LEN]; 1524 u8 uid[ACPIHID_UID_LEN]; 1525 int ret; 1526 1527 if (h->type != 0x40) { 1528 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1529 e->type); 1530 break; 1531 } 1532 1533 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1534 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1535 hid[ACPIHID_HID_LEN - 1] = '\0'; 1536 1537 if (!(*hid)) { 1538 pr_err(FW_BUG "Invalid HID.\n"); 1539 break; 1540 } 1541 1542 uid[0] = '\0'; 1543 switch (e->uidf) { 1544 case UID_NOT_PRESENT: 1545 1546 if (e->uidl != 0) 1547 pr_warn(FW_BUG "Invalid UID length.\n"); 1548 1549 break; 1550 case UID_IS_INTEGER: 1551 1552 sprintf(uid, "%d", e->uid); 1553 1554 break; 1555 case UID_IS_CHARACTER: 1556 1557 memcpy(uid, &e->uid, e->uidl); 1558 uid[e->uidl] = '\0'; 1559 1560 break; 1561 default: 1562 break; 1563 } 1564 1565 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); 1566 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", 1567 hid, uid, seg_id, 1568 PCI_BUS_NUM(devid), 1569 PCI_SLOT(devid), 1570 PCI_FUNC(devid)); 1571 1572 flags = e->flags; 1573 1574 ret = add_acpi_hid_device(hid, uid, &devid, false); 1575 if (ret) 1576 return ret; 1577 1578 /* 1579 * add_special_device might update the devid in case a 1580 * command-line override is present. So call 1581 * set_dev_entry_from_acpi after add_special_device. 1582 */ 1583 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1584 1585 break; 1586 } 1587 default: 1588 break; 1589 } 1590 1591 p += ivhd_entry_length(p); 1592 } 1593 1594 return 0; 1595 } 1596 1597 /* Allocate PCI segment data structure */ 1598 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, 1599 struct acpi_table_header *ivrs_base) 1600 { 1601 struct amd_iommu_pci_seg *pci_seg; 1602 int last_bdf; 1603 1604 /* 1605 * First parse ACPI tables to find the largest Bus/Dev/Func we need to 1606 * handle in this PCI segment. Upon this information the shared data 1607 * structures for the PCI segments in the system will be allocated. 1608 */ 1609 last_bdf = find_last_devid_acpi(ivrs_base, id); 1610 if (last_bdf < 0) 1611 return NULL; 1612 1613 pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); 1614 if (pci_seg == NULL) 1615 return NULL; 1616 1617 pci_seg->last_bdf = last_bdf; 1618 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); 1619 pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); 1620 pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); 1621 pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); 1622 1623 pci_seg->id = id; 1624 init_llist_head(&pci_seg->dev_data_list); 1625 INIT_LIST_HEAD(&pci_seg->unity_map); 1626 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); 1627 1628 if (alloc_dev_table(pci_seg)) 1629 return NULL; 1630 if (alloc_alias_table(pci_seg)) 1631 return NULL; 1632 if (alloc_rlookup_table(pci_seg)) 1633 return NULL; 1634 1635 return pci_seg; 1636 } 1637 1638 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, 1639 struct acpi_table_header *ivrs_base) 1640 { 1641 struct amd_iommu_pci_seg *pci_seg; 1642 1643 for_each_pci_segment(pci_seg) { 1644 if (pci_seg->id == id) 1645 return pci_seg; 1646 } 1647 1648 return alloc_pci_segment(id, ivrs_base); 1649 } 1650 1651 static void __init free_pci_segments(void) 1652 { 1653 struct amd_iommu_pci_seg *pci_seg, *next; 1654 1655 for_each_pci_segment_safe(pci_seg, next) { 1656 list_del(&pci_seg->list); 1657 free_irq_lookup_table(pci_seg); 1658 free_rlookup_table(pci_seg); 1659 free_alias_table(pci_seg); 1660 free_dev_table(pci_seg); 1661 kfree(pci_seg); 1662 } 1663 } 1664 1665 static void __init free_iommu_one(struct amd_iommu *iommu) 1666 { 1667 free_cwwb_sem(iommu); 1668 free_command_buffer(iommu); 1669 free_event_buffer(iommu); 1670 free_ppr_log(iommu); 1671 free_ga_log(iommu); 1672 iommu_unmap_mmio_space(iommu); 1673 } 1674 1675 static void __init free_iommu_all(void) 1676 { 1677 struct amd_iommu *iommu, *next; 1678 1679 for_each_iommu_safe(iommu, next) { 1680 list_del(&iommu->list); 1681 free_iommu_one(iommu); 1682 kfree(iommu); 1683 } 1684 } 1685 1686 /* 1687 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1688 * Workaround: 1689 * BIOS should disable L2B micellaneous clock gating by setting 1690 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1691 */ 1692 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1693 { 1694 u32 value; 1695 1696 if ((boot_cpu_data.x86 != 0x15) || 1697 (boot_cpu_data.x86_model < 0x10) || 1698 (boot_cpu_data.x86_model > 0x1f)) 1699 return; 1700 1701 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1702 pci_read_config_dword(iommu->dev, 0xf4, &value); 1703 1704 if (value & BIT(2)) 1705 return; 1706 1707 /* Select NB indirect register 0x90 and enable writing */ 1708 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1709 1710 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1711 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1712 1713 /* Clear the enable writing bit */ 1714 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1715 } 1716 1717 /* 1718 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1719 * Workaround: 1720 * BIOS should enable ATS write permission check by setting 1721 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1722 */ 1723 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1724 { 1725 u32 value; 1726 1727 if ((boot_cpu_data.x86 != 0x15) || 1728 (boot_cpu_data.x86_model < 0x30) || 1729 (boot_cpu_data.x86_model > 0x3f)) 1730 return; 1731 1732 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1733 value = iommu_read_l2(iommu, 0x47); 1734 1735 if (value & BIT(0)) 1736 return; 1737 1738 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1739 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1740 1741 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1742 } 1743 1744 /* 1745 * This function glues the initialization function for one IOMMU 1746 * together and also allocates the command buffer and programs the 1747 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1748 */ 1749 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, 1750 struct acpi_table_header *ivrs_base) 1751 { 1752 struct amd_iommu_pci_seg *pci_seg; 1753 1754 pci_seg = get_pci_segment(h->pci_seg, ivrs_base); 1755 if (pci_seg == NULL) 1756 return -ENOMEM; 1757 iommu->pci_seg = pci_seg; 1758 1759 raw_spin_lock_init(&iommu->lock); 1760 atomic64_set(&iommu->cmd_sem_val, 0); 1761 1762 /* Add IOMMU to internal data structures */ 1763 list_add_tail(&iommu->list, &amd_iommu_list); 1764 iommu->index = amd_iommus_present++; 1765 1766 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1767 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1768 return -ENOSYS; 1769 } 1770 1771 /* Index is fine - add IOMMU to the array */ 1772 amd_iommus[iommu->index] = iommu; 1773 1774 /* 1775 * Copy data from ACPI table entry to the iommu struct 1776 */ 1777 iommu->devid = h->devid; 1778 iommu->cap_ptr = h->cap_ptr; 1779 iommu->mmio_phys = h->mmio_phys; 1780 1781 switch (h->type) { 1782 case 0x10: 1783 /* Check if IVHD EFR contains proper max banks/counters */ 1784 if ((h->efr_attr != 0) && 1785 ((h->efr_attr & (0xF << 13)) != 0) && 1786 ((h->efr_attr & (0x3F << 17)) != 0)) 1787 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1788 else 1789 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1790 1791 /* 1792 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1793 * GAM also requires GA mode. Therefore, we need to 1794 * check cmpxchg16b support before enabling it. 1795 */ 1796 if (!boot_cpu_has(X86_FEATURE_CX16) || 1797 ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) 1798 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1799 break; 1800 case 0x11: 1801 case 0x40: 1802 if (h->efr_reg & (1 << 9)) 1803 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1804 else 1805 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1806 1807 /* 1808 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1809 * XT, GAM also requires GA mode. Therefore, we need to 1810 * check cmpxchg16b support before enabling them. 1811 */ 1812 if (!boot_cpu_has(X86_FEATURE_CX16) || 1813 ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { 1814 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1815 break; 1816 } 1817 1818 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1819 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1820 1821 early_iommu_features_init(iommu, h); 1822 1823 break; 1824 default: 1825 return -EINVAL; 1826 } 1827 1828 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1829 iommu->mmio_phys_end); 1830 if (!iommu->mmio_base) 1831 return -ENOMEM; 1832 1833 return init_iommu_from_acpi(iommu, h); 1834 } 1835 1836 static int __init init_iommu_one_late(struct amd_iommu *iommu) 1837 { 1838 int ret; 1839 1840 if (alloc_cwwb_sem(iommu)) 1841 return -ENOMEM; 1842 1843 if (alloc_command_buffer(iommu)) 1844 return -ENOMEM; 1845 1846 if (alloc_event_buffer(iommu)) 1847 return -ENOMEM; 1848 1849 iommu->int_enabled = false; 1850 1851 init_translation_status(iommu); 1852 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1853 iommu_disable(iommu); 1854 clear_translation_pre_enabled(iommu); 1855 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1856 iommu->index); 1857 } 1858 if (amd_iommu_pre_enabled) 1859 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1860 1861 if (amd_iommu_irq_remap) { 1862 ret = amd_iommu_create_irq_domain(iommu); 1863 if (ret) 1864 return ret; 1865 } 1866 1867 /* 1868 * Make sure IOMMU is not considered to translate itself. The IVRS 1869 * table tells us so, but this is a lie! 1870 */ 1871 iommu->pci_seg->rlookup_table[iommu->devid] = NULL; 1872 1873 return 0; 1874 } 1875 1876 /** 1877 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 1878 * @ivrs: Pointer to the IVRS header 1879 * 1880 * This function search through all IVDB of the maximum supported IVHD 1881 */ 1882 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 1883 { 1884 u8 *base = (u8 *)ivrs; 1885 struct ivhd_header *ivhd = (struct ivhd_header *) 1886 (base + IVRS_HEADER_LENGTH); 1887 u8 last_type = ivhd->type; 1888 u16 devid = ivhd->devid; 1889 1890 while (((u8 *)ivhd - base < ivrs->length) && 1891 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 1892 u8 *p = (u8 *) ivhd; 1893 1894 if (ivhd->devid == devid) 1895 last_type = ivhd->type; 1896 ivhd = (struct ivhd_header *)(p + ivhd->length); 1897 } 1898 1899 return last_type; 1900 } 1901 1902 /* 1903 * Iterates over all IOMMU entries in the ACPI table, allocates the 1904 * IOMMU structure and initializes it with init_iommu_one() 1905 */ 1906 static int __init init_iommu_all(struct acpi_table_header *table) 1907 { 1908 u8 *p = (u8 *)table, *end = (u8 *)table; 1909 struct ivhd_header *h; 1910 struct amd_iommu *iommu; 1911 int ret; 1912 1913 end += table->length; 1914 p += IVRS_HEADER_LENGTH; 1915 1916 /* Phase 1: Process all IVHD blocks */ 1917 while (p < end) { 1918 h = (struct ivhd_header *)p; 1919 if (*p == amd_iommu_target_ivhd_type) { 1920 1921 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " 1922 "flags: %01x info %04x\n", 1923 h->pci_seg, PCI_BUS_NUM(h->devid), 1924 PCI_SLOT(h->devid), PCI_FUNC(h->devid), 1925 h->cap_ptr, h->flags, h->info); 1926 DUMP_printk(" mmio-addr: %016llx\n", 1927 h->mmio_phys); 1928 1929 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 1930 if (iommu == NULL) 1931 return -ENOMEM; 1932 1933 ret = init_iommu_one(iommu, h, table); 1934 if (ret) 1935 return ret; 1936 } 1937 p += h->length; 1938 1939 } 1940 WARN_ON(p != end); 1941 1942 /* Phase 2 : Early feature support check */ 1943 get_global_efr(); 1944 1945 /* Phase 3 : Enabling IOMMU features */ 1946 for_each_iommu(iommu) { 1947 ret = init_iommu_one_late(iommu); 1948 if (ret) 1949 return ret; 1950 } 1951 1952 return 0; 1953 } 1954 1955 static void init_iommu_perf_ctr(struct amd_iommu *iommu) 1956 { 1957 u64 val; 1958 struct pci_dev *pdev = iommu->dev; 1959 1960 if (!iommu_feature(iommu, FEATURE_PC)) 1961 return; 1962 1963 amd_iommu_pc_present = true; 1964 1965 pci_info(pdev, "IOMMU performance counters supported\n"); 1966 1967 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 1968 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 1969 iommu->max_counters = (u8) ((val >> 7) & 0xf); 1970 1971 return; 1972 } 1973 1974 static ssize_t amd_iommu_show_cap(struct device *dev, 1975 struct device_attribute *attr, 1976 char *buf) 1977 { 1978 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1979 return sysfs_emit(buf, "%x\n", iommu->cap); 1980 } 1981 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 1982 1983 static ssize_t amd_iommu_show_features(struct device *dev, 1984 struct device_attribute *attr, 1985 char *buf) 1986 { 1987 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1988 return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features); 1989 } 1990 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 1991 1992 static struct attribute *amd_iommu_attrs[] = { 1993 &dev_attr_cap.attr, 1994 &dev_attr_features.attr, 1995 NULL, 1996 }; 1997 1998 static struct attribute_group amd_iommu_group = { 1999 .name = "amd-iommu", 2000 .attrs = amd_iommu_attrs, 2001 }; 2002 2003 static const struct attribute_group *amd_iommu_groups[] = { 2004 &amd_iommu_group, 2005 NULL, 2006 }; 2007 2008 /* 2009 * Note: IVHD 0x11 and 0x40 also contains exact copy 2010 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 2011 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 2012 */ 2013 static void __init late_iommu_features_init(struct amd_iommu *iommu) 2014 { 2015 u64 features, features2; 2016 2017 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 2018 return; 2019 2020 /* read extended feature bits */ 2021 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 2022 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); 2023 2024 if (!iommu->features) { 2025 iommu->features = features; 2026 iommu->features2 = features2; 2027 return; 2028 } 2029 2030 /* 2031 * Sanity check and warn if EFR values from 2032 * IVHD and MMIO conflict. 2033 */ 2034 if (features != iommu->features || 2035 features2 != iommu->features2) { 2036 pr_warn(FW_WARN 2037 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", 2038 features, iommu->features, 2039 features2, iommu->features2); 2040 } 2041 } 2042 2043 static int __init iommu_init_pci(struct amd_iommu *iommu) 2044 { 2045 int cap_ptr = iommu->cap_ptr; 2046 int ret; 2047 2048 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2049 PCI_BUS_NUM(iommu->devid), 2050 iommu->devid & 0xff); 2051 if (!iommu->dev) 2052 return -ENODEV; 2053 2054 /* Prevent binding other PCI device drivers to IOMMU devices */ 2055 iommu->dev->match_driver = false; 2056 2057 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 2058 &iommu->cap); 2059 2060 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 2061 amd_iommu_iotlb_sup = false; 2062 2063 late_iommu_features_init(iommu); 2064 2065 if (iommu_feature(iommu, FEATURE_GT)) { 2066 int glxval; 2067 u32 max_pasid; 2068 u64 pasmax; 2069 2070 pasmax = iommu->features & FEATURE_PASID_MASK; 2071 pasmax >>= FEATURE_PASID_SHIFT; 2072 max_pasid = (1 << (pasmax + 1)) - 1; 2073 2074 amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); 2075 2076 BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); 2077 2078 glxval = iommu->features & FEATURE_GLXVAL_MASK; 2079 glxval >>= FEATURE_GLXVAL_SHIFT; 2080 2081 if (amd_iommu_max_glx_val == -1) 2082 amd_iommu_max_glx_val = glxval; 2083 else 2084 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 2085 } 2086 2087 if (iommu_feature(iommu, FEATURE_GT) && 2088 iommu_feature(iommu, FEATURE_PPR)) { 2089 iommu->is_iommu_v2 = true; 2090 amd_iommu_v2_present = true; 2091 } 2092 2093 if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) 2094 return -ENOMEM; 2095 2096 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 2097 pr_info("Using strict mode due to virtualization\n"); 2098 iommu_set_dma_strict(); 2099 amd_iommu_np_cache = true; 2100 } 2101 2102 init_iommu_perf_ctr(iommu); 2103 2104 if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2105 if (!iommu_feature(iommu, FEATURE_GIOSUP) || 2106 !iommu_feature(iommu, FEATURE_GT)) { 2107 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); 2108 amd_iommu_pgtable = AMD_IOMMU_V1; 2109 } else if (iommu_default_passthrough()) { 2110 pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n"); 2111 amd_iommu_pgtable = AMD_IOMMU_V1; 2112 } 2113 } 2114 2115 if (is_rd890_iommu(iommu->dev)) { 2116 int i, j; 2117 2118 iommu->root_pdev = 2119 pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2120 iommu->dev->bus->number, 2121 PCI_DEVFN(0, 0)); 2122 2123 /* 2124 * Some rd890 systems may not be fully reconfigured by the 2125 * BIOS, so it's necessary for us to store this information so 2126 * it can be reprogrammed on resume 2127 */ 2128 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 2129 &iommu->stored_addr_lo); 2130 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 2131 &iommu->stored_addr_hi); 2132 2133 /* Low bit locks writes to configuration space */ 2134 iommu->stored_addr_lo &= ~1; 2135 2136 for (i = 0; i < 6; i++) 2137 for (j = 0; j < 0x12; j++) 2138 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 2139 2140 for (i = 0; i < 0x83; i++) 2141 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 2142 } 2143 2144 amd_iommu_erratum_746_workaround(iommu); 2145 amd_iommu_ats_write_check_workaround(iommu); 2146 2147 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 2148 amd_iommu_groups, "ivhd%d", iommu->index); 2149 if (ret) 2150 return ret; 2151 2152 iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 2153 2154 return pci_enable_device(iommu->dev); 2155 } 2156 2157 static void print_iommu_info(void) 2158 { 2159 static const char * const feat_str[] = { 2160 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 2161 "IA", "GA", "HE", "PC" 2162 }; 2163 struct amd_iommu *iommu; 2164 2165 for_each_iommu(iommu) { 2166 struct pci_dev *pdev = iommu->dev; 2167 int i; 2168 2169 pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); 2170 2171 if (iommu->cap & (1 << IOMMU_CAP_EFR)) { 2172 pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2); 2173 2174 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 2175 if (iommu_feature(iommu, (1ULL << i))) 2176 pr_cont(" %s", feat_str[i]); 2177 } 2178 2179 if (iommu->features & FEATURE_GAM_VAPIC) 2180 pr_cont(" GA_vAPIC"); 2181 2182 if (iommu->features & FEATURE_SNP) 2183 pr_cont(" SNP"); 2184 2185 pr_cont("\n"); 2186 } 2187 } 2188 if (irq_remapping_enabled) { 2189 pr_info("Interrupt remapping enabled\n"); 2190 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2191 pr_info("X2APIC enabled\n"); 2192 } 2193 if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2194 pr_info("V2 page table enabled (Paging mode : %d level)\n", 2195 amd_iommu_gpt_level); 2196 } 2197 } 2198 2199 static int __init amd_iommu_init_pci(void) 2200 { 2201 struct amd_iommu *iommu; 2202 struct amd_iommu_pci_seg *pci_seg; 2203 int ret; 2204 2205 for_each_iommu(iommu) { 2206 ret = iommu_init_pci(iommu); 2207 if (ret) { 2208 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", 2209 iommu->index, ret); 2210 goto out; 2211 } 2212 /* Need to setup range after PCI init */ 2213 iommu_set_cwwb_range(iommu); 2214 } 2215 2216 /* 2217 * Order is important here to make sure any unity map requirements are 2218 * fulfilled. The unity mappings are created and written to the device 2219 * table during the iommu_init_pci() call. 2220 * 2221 * After that we call init_device_table_dma() to make sure any 2222 * uninitialized DTE will block DMA, and in the end we flush the caches 2223 * of all IOMMUs to make sure the changes to the device table are 2224 * active. 2225 */ 2226 for_each_pci_segment(pci_seg) 2227 init_device_table_dma(pci_seg); 2228 2229 for_each_iommu(iommu) 2230 iommu_flush_all_caches(iommu); 2231 2232 print_iommu_info(); 2233 2234 out: 2235 return ret; 2236 } 2237 2238 /**************************************************************************** 2239 * 2240 * The following functions initialize the MSI interrupts for all IOMMUs 2241 * in the system. It's a bit challenging because there could be multiple 2242 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 2243 * pci_dev. 2244 * 2245 ****************************************************************************/ 2246 2247 static int iommu_setup_msi(struct amd_iommu *iommu) 2248 { 2249 int r; 2250 2251 r = pci_enable_msi(iommu->dev); 2252 if (r) 2253 return r; 2254 2255 r = request_threaded_irq(iommu->dev->irq, 2256 amd_iommu_int_handler, 2257 amd_iommu_int_thread, 2258 0, "AMD-Vi", 2259 iommu); 2260 2261 if (r) { 2262 pci_disable_msi(iommu->dev); 2263 return r; 2264 } 2265 2266 return 0; 2267 } 2268 2269 union intcapxt { 2270 u64 capxt; 2271 struct { 2272 u64 reserved_0 : 2, 2273 dest_mode_logical : 1, 2274 reserved_1 : 5, 2275 destid_0_23 : 24, 2276 vector : 8, 2277 reserved_2 : 16, 2278 destid_24_31 : 8; 2279 }; 2280 } __attribute__ ((packed)); 2281 2282 2283 static struct irq_chip intcapxt_controller; 2284 2285 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2286 struct irq_data *irqd, bool reserve) 2287 { 2288 return 0; 2289 } 2290 2291 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2292 struct irq_data *irqd) 2293 { 2294 } 2295 2296 2297 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2298 unsigned int nr_irqs, void *arg) 2299 { 2300 struct irq_alloc_info *info = arg; 2301 int i, ret; 2302 2303 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2304 return -EINVAL; 2305 2306 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2307 if (ret < 0) 2308 return ret; 2309 2310 for (i = virq; i < virq + nr_irqs; i++) { 2311 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2312 2313 irqd->chip = &intcapxt_controller; 2314 irqd->chip_data = info->data; 2315 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2316 } 2317 2318 return ret; 2319 } 2320 2321 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2322 unsigned int nr_irqs) 2323 { 2324 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2325 } 2326 2327 2328 static void intcapxt_unmask_irq(struct irq_data *irqd) 2329 { 2330 struct amd_iommu *iommu = irqd->chip_data; 2331 struct irq_cfg *cfg = irqd_cfg(irqd); 2332 union intcapxt xt; 2333 2334 xt.capxt = 0ULL; 2335 xt.dest_mode_logical = apic->dest_mode_logical; 2336 xt.vector = cfg->vector; 2337 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2338 xt.destid_24_31 = cfg->dest_apicid >> 24; 2339 2340 /** 2341 * Current IOMMU implementation uses the same IRQ for all 2342 * 3 IOMMU interrupts. 2343 */ 2344 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2345 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2346 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2347 } 2348 2349 static void intcapxt_mask_irq(struct irq_data *irqd) 2350 { 2351 struct amd_iommu *iommu = irqd->chip_data; 2352 2353 writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2354 writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2355 writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2356 } 2357 2358 2359 static int intcapxt_set_affinity(struct irq_data *irqd, 2360 const struct cpumask *mask, bool force) 2361 { 2362 struct irq_data *parent = irqd->parent_data; 2363 int ret; 2364 2365 ret = parent->chip->irq_set_affinity(parent, mask, force); 2366 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2367 return ret; 2368 return 0; 2369 } 2370 2371 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2372 { 2373 return on ? -EOPNOTSUPP : 0; 2374 } 2375 2376 static struct irq_chip intcapxt_controller = { 2377 .name = "IOMMU-MSI", 2378 .irq_unmask = intcapxt_unmask_irq, 2379 .irq_mask = intcapxt_mask_irq, 2380 .irq_ack = irq_chip_ack_parent, 2381 .irq_retrigger = irq_chip_retrigger_hierarchy, 2382 .irq_set_affinity = intcapxt_set_affinity, 2383 .irq_set_wake = intcapxt_set_wake, 2384 .flags = IRQCHIP_MASK_ON_SUSPEND, 2385 }; 2386 2387 static const struct irq_domain_ops intcapxt_domain_ops = { 2388 .alloc = intcapxt_irqdomain_alloc, 2389 .free = intcapxt_irqdomain_free, 2390 .activate = intcapxt_irqdomain_activate, 2391 .deactivate = intcapxt_irqdomain_deactivate, 2392 }; 2393 2394 2395 static struct irq_domain *iommu_irqdomain; 2396 2397 static struct irq_domain *iommu_get_irqdomain(void) 2398 { 2399 struct fwnode_handle *fn; 2400 2401 /* No need for locking here (yet) as the init is single-threaded */ 2402 if (iommu_irqdomain) 2403 return iommu_irqdomain; 2404 2405 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2406 if (!fn) 2407 return NULL; 2408 2409 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2410 fn, &intcapxt_domain_ops, 2411 NULL); 2412 if (!iommu_irqdomain) 2413 irq_domain_free_fwnode(fn); 2414 2415 return iommu_irqdomain; 2416 } 2417 2418 static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2419 { 2420 struct irq_domain *domain; 2421 struct irq_alloc_info info; 2422 int irq, ret; 2423 int node = dev_to_node(&iommu->dev->dev); 2424 2425 domain = iommu_get_irqdomain(); 2426 if (!domain) 2427 return -ENXIO; 2428 2429 init_irq_alloc_info(&info, NULL); 2430 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2431 info.data = iommu; 2432 2433 irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2434 if (irq < 0) { 2435 irq_domain_remove(domain); 2436 return irq; 2437 } 2438 2439 ret = request_threaded_irq(irq, amd_iommu_int_handler, 2440 amd_iommu_int_thread, 0, "AMD-Vi", iommu); 2441 if (ret) { 2442 irq_domain_free_irqs(irq, 1); 2443 irq_domain_remove(domain); 2444 return ret; 2445 } 2446 2447 return 0; 2448 } 2449 2450 static int iommu_init_irq(struct amd_iommu *iommu) 2451 { 2452 int ret; 2453 2454 if (iommu->int_enabled) 2455 goto enable_faults; 2456 2457 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2458 ret = iommu_setup_intcapxt(iommu); 2459 else if (iommu->dev->msi_cap) 2460 ret = iommu_setup_msi(iommu); 2461 else 2462 ret = -ENODEV; 2463 2464 if (ret) 2465 return ret; 2466 2467 iommu->int_enabled = true; 2468 enable_faults: 2469 2470 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2471 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2472 2473 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2474 2475 if (iommu->ppr_log != NULL) 2476 iommu_feature_enable(iommu, CONTROL_PPRINT_EN); 2477 return 0; 2478 } 2479 2480 /**************************************************************************** 2481 * 2482 * The next functions belong to the third pass of parsing the ACPI 2483 * table. In this last pass the memory mapping requirements are 2484 * gathered (like exclusion and unity mapping ranges). 2485 * 2486 ****************************************************************************/ 2487 2488 static void __init free_unity_maps(void) 2489 { 2490 struct unity_map_entry *entry, *next; 2491 struct amd_iommu_pci_seg *p, *pci_seg; 2492 2493 for_each_pci_segment_safe(pci_seg, p) { 2494 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { 2495 list_del(&entry->list); 2496 kfree(entry); 2497 } 2498 } 2499 } 2500 2501 /* called for unity map ACPI definition */ 2502 static int __init init_unity_map_range(struct ivmd_header *m, 2503 struct acpi_table_header *ivrs_base) 2504 { 2505 struct unity_map_entry *e = NULL; 2506 struct amd_iommu_pci_seg *pci_seg; 2507 char *s; 2508 2509 pci_seg = get_pci_segment(m->pci_seg, ivrs_base); 2510 if (pci_seg == NULL) 2511 return -ENOMEM; 2512 2513 e = kzalloc(sizeof(*e), GFP_KERNEL); 2514 if (e == NULL) 2515 return -ENOMEM; 2516 2517 switch (m->type) { 2518 default: 2519 kfree(e); 2520 return 0; 2521 case ACPI_IVMD_TYPE: 2522 s = "IVMD_TYPEi\t\t\t"; 2523 e->devid_start = e->devid_end = m->devid; 2524 break; 2525 case ACPI_IVMD_TYPE_ALL: 2526 s = "IVMD_TYPE_ALL\t\t"; 2527 e->devid_start = 0; 2528 e->devid_end = pci_seg->last_bdf; 2529 break; 2530 case ACPI_IVMD_TYPE_RANGE: 2531 s = "IVMD_TYPE_RANGE\t\t"; 2532 e->devid_start = m->devid; 2533 e->devid_end = m->aux; 2534 break; 2535 } 2536 e->address_start = PAGE_ALIGN(m->range_start); 2537 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2538 e->prot = m->flags >> 1; 2539 2540 /* 2541 * Treat per-device exclusion ranges as r/w unity-mapped regions 2542 * since some buggy BIOSes might lead to the overwritten exclusion 2543 * range (exclusion_start and exclusion_length members). This 2544 * happens when there are multiple exclusion ranges (IVMD entries) 2545 * defined in ACPI table. 2546 */ 2547 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2548 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2549 2550 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " 2551 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" 2552 " flags: %x\n", s, m->pci_seg, 2553 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2554 PCI_FUNC(e->devid_start), m->pci_seg, 2555 PCI_BUS_NUM(e->devid_end), 2556 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2557 e->address_start, e->address_end, m->flags); 2558 2559 list_add_tail(&e->list, &pci_seg->unity_map); 2560 2561 return 0; 2562 } 2563 2564 /* iterates over all memory definitions we find in the ACPI table */ 2565 static int __init init_memory_definitions(struct acpi_table_header *table) 2566 { 2567 u8 *p = (u8 *)table, *end = (u8 *)table; 2568 struct ivmd_header *m; 2569 2570 end += table->length; 2571 p += IVRS_HEADER_LENGTH; 2572 2573 while (p < end) { 2574 m = (struct ivmd_header *)p; 2575 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2576 init_unity_map_range(m, table); 2577 2578 p += m->length; 2579 } 2580 2581 return 0; 2582 } 2583 2584 /* 2585 * Init the device table to not allow DMA access for devices 2586 */ 2587 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2588 { 2589 u32 devid; 2590 struct dev_table_entry *dev_table = pci_seg->dev_table; 2591 2592 if (dev_table == NULL) 2593 return; 2594 2595 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2596 __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); 2597 if (!amd_iommu_snp_en) 2598 __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); 2599 } 2600 } 2601 2602 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2603 { 2604 u32 devid; 2605 struct dev_table_entry *dev_table = pci_seg->dev_table; 2606 2607 if (dev_table == NULL) 2608 return; 2609 2610 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2611 dev_table[devid].data[0] = 0ULL; 2612 dev_table[devid].data[1] = 0ULL; 2613 } 2614 } 2615 2616 static void init_device_table(void) 2617 { 2618 struct amd_iommu_pci_seg *pci_seg; 2619 u32 devid; 2620 2621 if (!amd_iommu_irq_remap) 2622 return; 2623 2624 for_each_pci_segment(pci_seg) { 2625 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) 2626 __set_dev_entry_bit(pci_seg->dev_table, 2627 devid, DEV_ENTRY_IRQ_TBL_EN); 2628 } 2629 } 2630 2631 static void iommu_init_flags(struct amd_iommu *iommu) 2632 { 2633 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2634 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2635 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2636 2637 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2638 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2639 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2640 2641 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2642 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2643 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2644 2645 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2646 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2647 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2648 2649 /* 2650 * make IOMMU memory accesses cache coherent 2651 */ 2652 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2653 2654 /* Set IOTLB invalidation timeout to 1s */ 2655 iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); 2656 } 2657 2658 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2659 { 2660 int i, j; 2661 u32 ioc_feature_control; 2662 struct pci_dev *pdev = iommu->root_pdev; 2663 2664 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2665 if (!is_rd890_iommu(iommu->dev) || !pdev) 2666 return; 2667 2668 /* 2669 * First, we need to ensure that the iommu is enabled. This is 2670 * controlled by a register in the northbridge 2671 */ 2672 2673 /* Select Northbridge indirect register 0x75 and enable writing */ 2674 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2675 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2676 2677 /* Enable the iommu */ 2678 if (!(ioc_feature_control & 0x1)) 2679 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2680 2681 /* Restore the iommu BAR */ 2682 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2683 iommu->stored_addr_lo); 2684 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2685 iommu->stored_addr_hi); 2686 2687 /* Restore the l1 indirect regs for each of the 6 l1s */ 2688 for (i = 0; i < 6; i++) 2689 for (j = 0; j < 0x12; j++) 2690 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2691 2692 /* Restore the l2 indirect regs */ 2693 for (i = 0; i < 0x83; i++) 2694 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2695 2696 /* Lock PCI setup registers */ 2697 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2698 iommu->stored_addr_lo | 1); 2699 } 2700 2701 static void iommu_enable_ga(struct amd_iommu *iommu) 2702 { 2703 #ifdef CONFIG_IRQ_REMAP 2704 switch (amd_iommu_guest_ir) { 2705 case AMD_IOMMU_GUEST_IR_VAPIC: 2706 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2707 iommu_feature_enable(iommu, CONTROL_GA_EN); 2708 iommu->irte_ops = &irte_128_ops; 2709 break; 2710 default: 2711 iommu->irte_ops = &irte_32_ops; 2712 break; 2713 } 2714 #endif 2715 } 2716 2717 static void iommu_disable_irtcachedis(struct amd_iommu *iommu) 2718 { 2719 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 2720 } 2721 2722 static void iommu_enable_irtcachedis(struct amd_iommu *iommu) 2723 { 2724 u64 ctrl; 2725 2726 if (!amd_iommu_irtcachedis) 2727 return; 2728 2729 /* 2730 * Note: 2731 * The support for IRTCacheDis feature is dertermined by 2732 * checking if the bit is writable. 2733 */ 2734 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); 2735 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 2736 ctrl &= (1ULL << CONTROL_IRTCACHEDIS); 2737 if (ctrl) 2738 iommu->irtcachedis_enabled = true; 2739 pr_info("iommu%d (%#06x) : IRT cache is %s\n", 2740 iommu->index, iommu->devid, 2741 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2742 } 2743 2744 static void early_enable_iommu(struct amd_iommu *iommu) 2745 { 2746 iommu_disable(iommu); 2747 iommu_init_flags(iommu); 2748 iommu_set_device_table(iommu); 2749 iommu_enable_command_buffer(iommu); 2750 iommu_enable_event_buffer(iommu); 2751 iommu_set_exclusion_range(iommu); 2752 iommu_enable_ga(iommu); 2753 iommu_enable_xt(iommu); 2754 iommu_enable_irtcachedis(iommu); 2755 iommu_enable(iommu); 2756 iommu_flush_all_caches(iommu); 2757 } 2758 2759 /* 2760 * This function finally enables all IOMMUs found in the system after 2761 * they have been initialized. 2762 * 2763 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy 2764 * the old content of device table entries. Not this case or copy failed, 2765 * just continue as normal kernel does. 2766 */ 2767 static void early_enable_iommus(void) 2768 { 2769 struct amd_iommu *iommu; 2770 struct amd_iommu_pci_seg *pci_seg; 2771 2772 if (!copy_device_table()) { 2773 /* 2774 * If come here because of failure in copying device table from old 2775 * kernel with all IOMMUs enabled, print error message and try to 2776 * free allocated old_dev_tbl_cpy. 2777 */ 2778 if (amd_iommu_pre_enabled) 2779 pr_err("Failed to copy DEV table from previous kernel.\n"); 2780 2781 for_each_pci_segment(pci_seg) { 2782 if (pci_seg->old_dev_tbl_cpy != NULL) { 2783 free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, 2784 get_order(pci_seg->dev_table_size)); 2785 pci_seg->old_dev_tbl_cpy = NULL; 2786 } 2787 } 2788 2789 for_each_iommu(iommu) { 2790 clear_translation_pre_enabled(iommu); 2791 early_enable_iommu(iommu); 2792 } 2793 } else { 2794 pr_info("Copied DEV table from previous kernel.\n"); 2795 2796 for_each_pci_segment(pci_seg) { 2797 free_pages((unsigned long)pci_seg->dev_table, 2798 get_order(pci_seg->dev_table_size)); 2799 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; 2800 } 2801 2802 for_each_iommu(iommu) { 2803 iommu_disable_command_buffer(iommu); 2804 iommu_disable_event_buffer(iommu); 2805 iommu_disable_irtcachedis(iommu); 2806 iommu_enable_command_buffer(iommu); 2807 iommu_enable_event_buffer(iommu); 2808 iommu_enable_ga(iommu); 2809 iommu_enable_xt(iommu); 2810 iommu_enable_irtcachedis(iommu); 2811 iommu_set_device_table(iommu); 2812 iommu_flush_all_caches(iommu); 2813 } 2814 } 2815 } 2816 2817 static void enable_iommus_v2(void) 2818 { 2819 struct amd_iommu *iommu; 2820 2821 for_each_iommu(iommu) { 2822 iommu_enable_ppr_log(iommu); 2823 iommu_enable_gt(iommu); 2824 } 2825 } 2826 2827 static void enable_iommus_vapic(void) 2828 { 2829 #ifdef CONFIG_IRQ_REMAP 2830 u32 status, i; 2831 struct amd_iommu *iommu; 2832 2833 for_each_iommu(iommu) { 2834 /* 2835 * Disable GALog if already running. It could have been enabled 2836 * in the previous boot before kdump. 2837 */ 2838 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2839 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2840 continue; 2841 2842 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 2843 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 2844 2845 /* 2846 * Need to set and poll check the GALOGRun bit to zero before 2847 * we can set/ modify GA Log registers safely. 2848 */ 2849 for (i = 0; i < LOOP_TIMEOUT; ++i) { 2850 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2851 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2852 break; 2853 udelay(10); 2854 } 2855 2856 if (WARN_ON(i >= LOOP_TIMEOUT)) 2857 return; 2858 } 2859 2860 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && 2861 !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) { 2862 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2863 return; 2864 } 2865 2866 if (amd_iommu_snp_en && 2867 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { 2868 pr_warn("Force to disable Virtual APIC due to SNP\n"); 2869 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2870 return; 2871 } 2872 2873 /* Enabling GAM and SNPAVIC support */ 2874 for_each_iommu(iommu) { 2875 if (iommu_init_ga_log(iommu) || 2876 iommu_ga_log_enable(iommu)) 2877 return; 2878 2879 iommu_feature_enable(iommu, CONTROL_GAM_EN); 2880 if (amd_iommu_snp_en) 2881 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); 2882 } 2883 2884 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 2885 pr_info("Virtual APIC enabled\n"); 2886 #endif 2887 } 2888 2889 static void enable_iommus(void) 2890 { 2891 early_enable_iommus(); 2892 enable_iommus_vapic(); 2893 enable_iommus_v2(); 2894 } 2895 2896 static void disable_iommus(void) 2897 { 2898 struct amd_iommu *iommu; 2899 2900 for_each_iommu(iommu) 2901 iommu_disable(iommu); 2902 2903 #ifdef CONFIG_IRQ_REMAP 2904 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 2905 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 2906 #endif 2907 } 2908 2909 /* 2910 * Suspend/Resume support 2911 * disable suspend until real resume implemented 2912 */ 2913 2914 static void amd_iommu_resume(void) 2915 { 2916 struct amd_iommu *iommu; 2917 2918 for_each_iommu(iommu) 2919 iommu_apply_resume_quirks(iommu); 2920 2921 /* re-load the hardware */ 2922 enable_iommus(); 2923 2924 amd_iommu_enable_interrupts(); 2925 } 2926 2927 static int amd_iommu_suspend(void) 2928 { 2929 /* disable IOMMUs to go out of the way for BIOS */ 2930 disable_iommus(); 2931 2932 return 0; 2933 } 2934 2935 static struct syscore_ops amd_iommu_syscore_ops = { 2936 .suspend = amd_iommu_suspend, 2937 .resume = amd_iommu_resume, 2938 }; 2939 2940 static void __init free_iommu_resources(void) 2941 { 2942 kmem_cache_destroy(amd_iommu_irq_cache); 2943 amd_iommu_irq_cache = NULL; 2944 2945 free_iommu_all(); 2946 free_pci_segments(); 2947 } 2948 2949 /* SB IOAPIC is always on this device in AMD systems */ 2950 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 2951 2952 static bool __init check_ioapic_information(void) 2953 { 2954 const char *fw_bug = FW_BUG; 2955 bool ret, has_sb_ioapic; 2956 int idx; 2957 2958 has_sb_ioapic = false; 2959 ret = false; 2960 2961 /* 2962 * If we have map overrides on the kernel command line the 2963 * messages in this function might not describe firmware bugs 2964 * anymore - so be careful 2965 */ 2966 if (cmdline_maps) 2967 fw_bug = ""; 2968 2969 for (idx = 0; idx < nr_ioapics; idx++) { 2970 int devid, id = mpc_ioapic_id(idx); 2971 2972 devid = get_ioapic_devid(id); 2973 if (devid < 0) { 2974 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 2975 fw_bug, id); 2976 ret = false; 2977 } else if (devid == IOAPIC_SB_DEVID) { 2978 has_sb_ioapic = true; 2979 ret = true; 2980 } 2981 } 2982 2983 if (!has_sb_ioapic) { 2984 /* 2985 * We expect the SB IOAPIC to be listed in the IVRS 2986 * table. The system timer is connected to the SB IOAPIC 2987 * and if we don't have it in the list the system will 2988 * panic at boot time. This situation usually happens 2989 * when the BIOS is buggy and provides us the wrong 2990 * device id for the IOAPIC in the system. 2991 */ 2992 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 2993 } 2994 2995 if (!ret) 2996 pr_err("Disabling interrupt remapping\n"); 2997 2998 return ret; 2999 } 3000 3001 static void __init free_dma_resources(void) 3002 { 3003 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 3004 get_order(MAX_DOMAIN_ID/8)); 3005 amd_iommu_pd_alloc_bitmap = NULL; 3006 3007 free_unity_maps(); 3008 } 3009 3010 static void __init ivinfo_init(void *ivrs) 3011 { 3012 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 3013 } 3014 3015 /* 3016 * This is the hardware init function for AMD IOMMU in the system. 3017 * This function is called either from amd_iommu_init or from the interrupt 3018 * remapping setup code. 3019 * 3020 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 3021 * four times: 3022 * 3023 * 1 pass) Discover the most comprehensive IVHD type to use. 3024 * 3025 * 2 pass) Find the highest PCI device id the driver has to handle. 3026 * Upon this information the size of the data structures is 3027 * determined that needs to be allocated. 3028 * 3029 * 3 pass) Initialize the data structures just allocated with the 3030 * information in the ACPI table about available AMD IOMMUs 3031 * in the system. It also maps the PCI devices in the 3032 * system to specific IOMMUs 3033 * 3034 * 4 pass) After the basic data structures are allocated and 3035 * initialized we update them with information about memory 3036 * remapping requirements parsed out of the ACPI table in 3037 * this last pass. 3038 * 3039 * After everything is set up the IOMMUs are enabled and the necessary 3040 * hotplug and suspend notifiers are registered. 3041 */ 3042 static int __init early_amd_iommu_init(void) 3043 { 3044 struct acpi_table_header *ivrs_base; 3045 int remap_cache_sz, ret; 3046 acpi_status status; 3047 3048 if (!amd_iommu_detected) 3049 return -ENODEV; 3050 3051 status = acpi_get_table("IVRS", 0, &ivrs_base); 3052 if (status == AE_NOT_FOUND) 3053 return -ENODEV; 3054 else if (ACPI_FAILURE(status)) { 3055 const char *err = acpi_format_exception(status); 3056 pr_err("IVRS table error: %s\n", err); 3057 return -EINVAL; 3058 } 3059 3060 /* 3061 * Validate checksum here so we don't need to do it when 3062 * we actually parse the table 3063 */ 3064 ret = check_ivrs_checksum(ivrs_base); 3065 if (ret) 3066 goto out; 3067 3068 ivinfo_init(ivrs_base); 3069 3070 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 3071 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 3072 3073 /* Device table - directly used by all IOMMUs */ 3074 ret = -ENOMEM; 3075 3076 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( 3077 GFP_KERNEL | __GFP_ZERO, 3078 get_order(MAX_DOMAIN_ID/8)); 3079 if (amd_iommu_pd_alloc_bitmap == NULL) 3080 goto out; 3081 3082 /* 3083 * never allocate domain 0 because its used as the non-allocated and 3084 * error value placeholder 3085 */ 3086 __set_bit(0, amd_iommu_pd_alloc_bitmap); 3087 3088 /* 3089 * now the data structures are allocated and basically initialized 3090 * start the real acpi table scan 3091 */ 3092 ret = init_iommu_all(ivrs_base); 3093 if (ret) 3094 goto out; 3095 3096 /* 5 level guest page table */ 3097 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3098 check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) 3099 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3100 3101 /* Disable any previously enabled IOMMUs */ 3102 if (!is_kdump_kernel() || amd_iommu_disabled) 3103 disable_iommus(); 3104 3105 if (amd_iommu_irq_remap) 3106 amd_iommu_irq_remap = check_ioapic_information(); 3107 3108 if (amd_iommu_irq_remap) { 3109 struct amd_iommu_pci_seg *pci_seg; 3110 /* 3111 * Interrupt remapping enabled, create kmem_cache for the 3112 * remapping tables. 3113 */ 3114 ret = -ENOMEM; 3115 if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 3116 remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); 3117 else 3118 remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); 3119 amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", 3120 remap_cache_sz, 3121 DTE_INTTAB_ALIGNMENT, 3122 0, NULL); 3123 if (!amd_iommu_irq_cache) 3124 goto out; 3125 3126 for_each_pci_segment(pci_seg) { 3127 if (alloc_irq_lookup_table(pci_seg)) 3128 goto out; 3129 } 3130 } 3131 3132 ret = init_memory_definitions(ivrs_base); 3133 if (ret) 3134 goto out; 3135 3136 /* init the device table */ 3137 init_device_table(); 3138 3139 out: 3140 /* Don't leak any ACPI memory */ 3141 acpi_put_table(ivrs_base); 3142 3143 return ret; 3144 } 3145 3146 static int amd_iommu_enable_interrupts(void) 3147 { 3148 struct amd_iommu *iommu; 3149 int ret = 0; 3150 3151 for_each_iommu(iommu) { 3152 ret = iommu_init_irq(iommu); 3153 if (ret) 3154 goto out; 3155 } 3156 3157 out: 3158 return ret; 3159 } 3160 3161 static bool __init detect_ivrs(void) 3162 { 3163 struct acpi_table_header *ivrs_base; 3164 acpi_status status; 3165 int i; 3166 3167 status = acpi_get_table("IVRS", 0, &ivrs_base); 3168 if (status == AE_NOT_FOUND) 3169 return false; 3170 else if (ACPI_FAILURE(status)) { 3171 const char *err = acpi_format_exception(status); 3172 pr_err("IVRS table error: %s\n", err); 3173 return false; 3174 } 3175 3176 acpi_put_table(ivrs_base); 3177 3178 if (amd_iommu_force_enable) 3179 goto out; 3180 3181 /* Don't use IOMMU if there is Stoney Ridge graphics */ 3182 for (i = 0; i < 32; i++) { 3183 u32 pci_id; 3184 3185 pci_id = read_pci_config(0, i, 0, 0); 3186 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 3187 pr_info("Disable IOMMU on Stoney Ridge\n"); 3188 return false; 3189 } 3190 } 3191 3192 out: 3193 /* Make sure ACS will be enabled during PCI probe */ 3194 pci_request_acs(); 3195 3196 return true; 3197 } 3198 3199 /**************************************************************************** 3200 * 3201 * AMD IOMMU Initialization State Machine 3202 * 3203 ****************************************************************************/ 3204 3205 static int __init state_next(void) 3206 { 3207 int ret = 0; 3208 3209 switch (init_state) { 3210 case IOMMU_START_STATE: 3211 if (!detect_ivrs()) { 3212 init_state = IOMMU_NOT_FOUND; 3213 ret = -ENODEV; 3214 } else { 3215 init_state = IOMMU_IVRS_DETECTED; 3216 } 3217 break; 3218 case IOMMU_IVRS_DETECTED: 3219 if (amd_iommu_disabled) { 3220 init_state = IOMMU_CMDLINE_DISABLED; 3221 ret = -EINVAL; 3222 } else { 3223 ret = early_amd_iommu_init(); 3224 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 3225 } 3226 break; 3227 case IOMMU_ACPI_FINISHED: 3228 early_enable_iommus(); 3229 x86_platform.iommu_shutdown = disable_iommus; 3230 init_state = IOMMU_ENABLED; 3231 break; 3232 case IOMMU_ENABLED: 3233 register_syscore_ops(&amd_iommu_syscore_ops); 3234 ret = amd_iommu_init_pci(); 3235 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 3236 enable_iommus_vapic(); 3237 enable_iommus_v2(); 3238 break; 3239 case IOMMU_PCI_INIT: 3240 ret = amd_iommu_enable_interrupts(); 3241 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 3242 break; 3243 case IOMMU_INTERRUPTS_EN: 3244 init_state = IOMMU_INITIALIZED; 3245 break; 3246 case IOMMU_INITIALIZED: 3247 /* Nothing to do */ 3248 break; 3249 case IOMMU_NOT_FOUND: 3250 case IOMMU_INIT_ERROR: 3251 case IOMMU_CMDLINE_DISABLED: 3252 /* Error states => do nothing */ 3253 ret = -EINVAL; 3254 break; 3255 default: 3256 /* Unknown state */ 3257 BUG(); 3258 } 3259 3260 if (ret) { 3261 free_dma_resources(); 3262 if (!irq_remapping_enabled) { 3263 disable_iommus(); 3264 free_iommu_resources(); 3265 } else { 3266 struct amd_iommu *iommu; 3267 struct amd_iommu_pci_seg *pci_seg; 3268 3269 for_each_pci_segment(pci_seg) 3270 uninit_device_table_dma(pci_seg); 3271 3272 for_each_iommu(iommu) 3273 iommu_flush_all_caches(iommu); 3274 } 3275 } 3276 return ret; 3277 } 3278 3279 static int __init iommu_go_to_state(enum iommu_init_state state) 3280 { 3281 int ret = -EINVAL; 3282 3283 while (init_state != state) { 3284 if (init_state == IOMMU_NOT_FOUND || 3285 init_state == IOMMU_INIT_ERROR || 3286 init_state == IOMMU_CMDLINE_DISABLED) 3287 break; 3288 ret = state_next(); 3289 } 3290 3291 return ret; 3292 } 3293 3294 #ifdef CONFIG_IRQ_REMAP 3295 int __init amd_iommu_prepare(void) 3296 { 3297 int ret; 3298 3299 amd_iommu_irq_remap = true; 3300 3301 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 3302 if (ret) { 3303 amd_iommu_irq_remap = false; 3304 return ret; 3305 } 3306 3307 return amd_iommu_irq_remap ? 0 : -ENODEV; 3308 } 3309 3310 int __init amd_iommu_enable(void) 3311 { 3312 int ret; 3313 3314 ret = iommu_go_to_state(IOMMU_ENABLED); 3315 if (ret) 3316 return ret; 3317 3318 irq_remapping_enabled = 1; 3319 return amd_iommu_xt_mode; 3320 } 3321 3322 void amd_iommu_disable(void) 3323 { 3324 amd_iommu_suspend(); 3325 } 3326 3327 int amd_iommu_reenable(int mode) 3328 { 3329 amd_iommu_resume(); 3330 3331 return 0; 3332 } 3333 3334 int __init amd_iommu_enable_faulting(void) 3335 { 3336 /* We enable MSI later when PCI is initialized */ 3337 return 0; 3338 } 3339 #endif 3340 3341 /* 3342 * This is the core init function for AMD IOMMU hardware in the system. 3343 * This function is called from the generic x86 DMA layer initialization 3344 * code. 3345 */ 3346 static int __init amd_iommu_init(void) 3347 { 3348 struct amd_iommu *iommu; 3349 int ret; 3350 3351 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3352 #ifdef CONFIG_GART_IOMMU 3353 if (ret && list_empty(&amd_iommu_list)) { 3354 /* 3355 * We failed to initialize the AMD IOMMU - try fallback 3356 * to GART if possible. 3357 */ 3358 gart_iommu_init(); 3359 } 3360 #endif 3361 3362 for_each_iommu(iommu) 3363 amd_iommu_debugfs_setup(iommu); 3364 3365 return ret; 3366 } 3367 3368 static bool amd_iommu_sme_check(void) 3369 { 3370 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3371 (boot_cpu_data.x86 != 0x17)) 3372 return true; 3373 3374 /* For Fam17h, a specific level of support is required */ 3375 if (boot_cpu_data.microcode >= 0x08001205) 3376 return true; 3377 3378 if ((boot_cpu_data.microcode >= 0x08001126) && 3379 (boot_cpu_data.microcode <= 0x080011ff)) 3380 return true; 3381 3382 pr_notice("IOMMU not currently supported when SME is active\n"); 3383 3384 return false; 3385 } 3386 3387 /**************************************************************************** 3388 * 3389 * Early detect code. This code runs at IOMMU detection time in the DMA 3390 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3391 * IOMMUs 3392 * 3393 ****************************************************************************/ 3394 int __init amd_iommu_detect(void) 3395 { 3396 int ret; 3397 3398 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3399 return -ENODEV; 3400 3401 if (!amd_iommu_sme_check()) 3402 return -ENODEV; 3403 3404 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3405 if (ret) 3406 return ret; 3407 3408 amd_iommu_detected = true; 3409 iommu_detected = 1; 3410 x86_init.iommu.iommu_init = amd_iommu_init; 3411 3412 return 1; 3413 } 3414 3415 /**************************************************************************** 3416 * 3417 * Parsing functions for the AMD IOMMU specific kernel command line 3418 * options. 3419 * 3420 ****************************************************************************/ 3421 3422 static int __init parse_amd_iommu_dump(char *str) 3423 { 3424 amd_iommu_dump = true; 3425 3426 return 1; 3427 } 3428 3429 static int __init parse_amd_iommu_intr(char *str) 3430 { 3431 for (; *str; ++str) { 3432 if (strncmp(str, "legacy", 6) == 0) { 3433 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3434 break; 3435 } 3436 if (strncmp(str, "vapic", 5) == 0) { 3437 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3438 break; 3439 } 3440 } 3441 return 1; 3442 } 3443 3444 static int __init parse_amd_iommu_options(char *str) 3445 { 3446 if (!str) 3447 return -EINVAL; 3448 3449 while (*str) { 3450 if (strncmp(str, "fullflush", 9) == 0) { 3451 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3452 iommu_set_dma_strict(); 3453 } else if (strncmp(str, "force_enable", 12) == 0) { 3454 amd_iommu_force_enable = true; 3455 } else if (strncmp(str, "off", 3) == 0) { 3456 amd_iommu_disabled = true; 3457 } else if (strncmp(str, "force_isolation", 15) == 0) { 3458 amd_iommu_force_isolation = true; 3459 } else if (strncmp(str, "pgtbl_v1", 8) == 0) { 3460 amd_iommu_pgtable = AMD_IOMMU_V1; 3461 } else if (strncmp(str, "pgtbl_v2", 8) == 0) { 3462 amd_iommu_pgtable = AMD_IOMMU_V2; 3463 } else if (strncmp(str, "irtcachedis", 11) == 0) { 3464 amd_iommu_irtcachedis = true; 3465 } else { 3466 pr_notice("Unknown option - '%s'\n", str); 3467 } 3468 3469 str += strcspn(str, ","); 3470 while (*str == ',') 3471 str++; 3472 } 3473 3474 return 1; 3475 } 3476 3477 static int __init parse_ivrs_ioapic(char *str) 3478 { 3479 u32 seg = 0, bus, dev, fn; 3480 int id, i; 3481 u32 devid; 3482 3483 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3484 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3485 goto found; 3486 3487 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3488 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3489 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", 3490 str, id, seg, bus, dev, fn); 3491 goto found; 3492 } 3493 3494 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3495 return 1; 3496 3497 found: 3498 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3499 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3500 str); 3501 return 1; 3502 } 3503 3504 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3505 3506 cmdline_maps = true; 3507 i = early_ioapic_map_size++; 3508 early_ioapic_map[i].id = id; 3509 early_ioapic_map[i].devid = devid; 3510 early_ioapic_map[i].cmd_line = true; 3511 3512 return 1; 3513 } 3514 3515 static int __init parse_ivrs_hpet(char *str) 3516 { 3517 u32 seg = 0, bus, dev, fn; 3518 int id, i; 3519 u32 devid; 3520 3521 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3522 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3523 goto found; 3524 3525 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3526 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3527 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", 3528 str, id, seg, bus, dev, fn); 3529 goto found; 3530 } 3531 3532 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3533 return 1; 3534 3535 found: 3536 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3537 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3538 str); 3539 return 1; 3540 } 3541 3542 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3543 3544 cmdline_maps = true; 3545 i = early_hpet_map_size++; 3546 early_hpet_map[i].id = id; 3547 early_hpet_map[i].devid = devid; 3548 early_hpet_map[i].cmd_line = true; 3549 3550 return 1; 3551 } 3552 3553 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) 3554 3555 static int __init parse_ivrs_acpihid(char *str) 3556 { 3557 u32 seg = 0, bus, dev, fn; 3558 char *hid, *uid, *p, *addr; 3559 char acpiid[ACPIID_LEN] = {0}; 3560 int i; 3561 3562 addr = strchr(str, '@'); 3563 if (!addr) { 3564 addr = strchr(str, '='); 3565 if (!addr) 3566 goto not_found; 3567 3568 ++addr; 3569 3570 if (strlen(addr) > ACPIID_LEN) 3571 goto not_found; 3572 3573 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || 3574 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { 3575 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", 3576 str, acpiid, seg, bus, dev, fn); 3577 goto found; 3578 } 3579 goto not_found; 3580 } 3581 3582 /* We have the '@', make it the terminator to get just the acpiid */ 3583 *addr++ = 0; 3584 3585 if (strlen(str) > ACPIID_LEN + 1) 3586 goto not_found; 3587 3588 if (sscanf(str, "=%s", acpiid) != 1) 3589 goto not_found; 3590 3591 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || 3592 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) 3593 goto found; 3594 3595 not_found: 3596 pr_err("Invalid command line: ivrs_acpihid%s\n", str); 3597 return 1; 3598 3599 found: 3600 p = acpiid; 3601 hid = strsep(&p, ":"); 3602 uid = p; 3603 3604 if (!hid || !(*hid) || !uid) { 3605 pr_err("Invalid command line: hid or uid\n"); 3606 return 1; 3607 } 3608 3609 /* 3610 * Ignore leading zeroes after ':', so e.g., AMDI0095:00 3611 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match 3612 */ 3613 while (*uid == '0' && *(uid + 1)) 3614 uid++; 3615 3616 i = early_acpihid_map_size++; 3617 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3618 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3619 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3620 early_acpihid_map[i].cmd_line = true; 3621 3622 return 1; 3623 } 3624 3625 __setup("amd_iommu_dump", parse_amd_iommu_dump); 3626 __setup("amd_iommu=", parse_amd_iommu_options); 3627 __setup("amd_iommu_intr=", parse_amd_iommu_intr); 3628 __setup("ivrs_ioapic", parse_ivrs_ioapic); 3629 __setup("ivrs_hpet", parse_ivrs_hpet); 3630 __setup("ivrs_acpihid", parse_ivrs_acpihid); 3631 3632 bool amd_iommu_v2_supported(void) 3633 { 3634 /* CPU page table size should match IOMMU guest page table size */ 3635 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3636 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3637 return false; 3638 3639 /* 3640 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3641 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without 3642 * setting up IOMMUv1 page table. 3643 */ 3644 return amd_iommu_v2_present && !amd_iommu_snp_en; 3645 } 3646 EXPORT_SYMBOL(amd_iommu_v2_supported); 3647 3648 struct amd_iommu *get_amd_iommu(unsigned int idx) 3649 { 3650 unsigned int i = 0; 3651 struct amd_iommu *iommu; 3652 3653 for_each_iommu(iommu) 3654 if (i++ == idx) 3655 return iommu; 3656 return NULL; 3657 } 3658 3659 /**************************************************************************** 3660 * 3661 * IOMMU EFR Performance Counter support functionality. This code allows 3662 * access to the IOMMU PC functionality. 3663 * 3664 ****************************************************************************/ 3665 3666 u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3667 { 3668 struct amd_iommu *iommu = get_amd_iommu(idx); 3669 3670 if (iommu) 3671 return iommu->max_banks; 3672 3673 return 0; 3674 } 3675 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); 3676 3677 bool amd_iommu_pc_supported(void) 3678 { 3679 return amd_iommu_pc_present; 3680 } 3681 EXPORT_SYMBOL(amd_iommu_pc_supported); 3682 3683 u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3684 { 3685 struct amd_iommu *iommu = get_amd_iommu(idx); 3686 3687 if (iommu) 3688 return iommu->max_counters; 3689 3690 return 0; 3691 } 3692 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); 3693 3694 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3695 u8 fxn, u64 *value, bool is_write) 3696 { 3697 u32 offset; 3698 u32 max_offset_lim; 3699 3700 /* Make sure the IOMMU PC resource is available */ 3701 if (!amd_iommu_pc_present) 3702 return -ENODEV; 3703 3704 /* Check for valid iommu and pc register indexing */ 3705 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3706 return -ENODEV; 3707 3708 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3709 3710 /* Limit the offset to the hw defined mmio region aperture */ 3711 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3712 (iommu->max_counters << 8) | 0x28); 3713 if ((offset < MMIO_CNTR_REG_OFFSET) || 3714 (offset > max_offset_lim)) 3715 return -EINVAL; 3716 3717 if (is_write) { 3718 u64 val = *value & GENMASK_ULL(47, 0); 3719 3720 writel((u32)val, iommu->mmio_base + offset); 3721 writel((val >> 32), iommu->mmio_base + offset + 4); 3722 } else { 3723 *value = readl(iommu->mmio_base + offset + 4); 3724 *value <<= 32; 3725 *value |= readl(iommu->mmio_base + offset); 3726 *value &= GENMASK_ULL(47, 0); 3727 } 3728 3729 return 0; 3730 } 3731 3732 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3733 { 3734 if (!iommu) 3735 return -EINVAL; 3736 3737 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 3738 } 3739 3740 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3741 { 3742 if (!iommu) 3743 return -EINVAL; 3744 3745 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 3746 } 3747 3748 #ifdef CONFIG_AMD_MEM_ENCRYPT 3749 int amd_iommu_snp_enable(void) 3750 { 3751 /* 3752 * The SNP support requires that IOMMU must be enabled, and is 3753 * not configured in the passthrough mode. 3754 */ 3755 if (no_iommu || iommu_default_passthrough()) { 3756 pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); 3757 return -EINVAL; 3758 } 3759 3760 /* 3761 * Prevent enabling SNP after IOMMU_ENABLED state because this process 3762 * affect how IOMMU driver sets up data structures and configures 3763 * IOMMU hardware. 3764 */ 3765 if (init_state > IOMMU_ENABLED) { 3766 pr_err("SNP: Too late to enable SNP for IOMMU.\n"); 3767 return -EINVAL; 3768 } 3769 3770 amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); 3771 if (!amd_iommu_snp_en) 3772 return -EINVAL; 3773 3774 pr_info("SNP enabled\n"); 3775 3776 /* Enforce IOMMU v1 pagetable when SNP is enabled. */ 3777 if (amd_iommu_pgtable != AMD_IOMMU_V1) { 3778 pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); 3779 amd_iommu_pgtable = AMD_IOMMU_V1; 3780 } 3781 3782 return 0; 3783 } 3784 #endif 3785