xref: /openbmc/linux/drivers/iommu/amd/init.c (revision 26b32974)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  *         Leo Duran <leo.duran@amd.com>
6  */
7 
8 #define pr_fmt(fmt)     "AMD-Vi: " fmt
9 #define dev_fmt(fmt)    pr_fmt(fmt)
10 
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/slab.h>
16 #include <linux/syscore_ops.h>
17 #include <linux/interrupt.h>
18 #include <linux/msi.h>
19 #include <linux/irq.h>
20 #include <linux/amd-iommu.h>
21 #include <linux/export.h>
22 #include <linux/kmemleak.h>
23 #include <linux/cc_platform.h>
24 #include <linux/iopoll.h>
25 #include <asm/pci-direct.h>
26 #include <asm/iommu.h>
27 #include <asm/apic.h>
28 #include <asm/gart.h>
29 #include <asm/x86_init.h>
30 #include <asm/io_apic.h>
31 #include <asm/irq_remapping.h>
32 #include <asm/set_memory.h>
33 
34 #include <linux/crash_dump.h>
35 
36 #include "amd_iommu.h"
37 #include "../irq_remapping.h"
38 
39 /*
40  * definitions for the ACPI scanning code
41  */
42 #define IVRS_HEADER_LENGTH 48
43 
44 #define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
45 #define ACPI_IVMD_TYPE_ALL              0x20
46 #define ACPI_IVMD_TYPE                  0x21
47 #define ACPI_IVMD_TYPE_RANGE            0x22
48 
49 #define IVHD_DEV_ALL                    0x01
50 #define IVHD_DEV_SELECT                 0x02
51 #define IVHD_DEV_SELECT_RANGE_START     0x03
52 #define IVHD_DEV_RANGE_END              0x04
53 #define IVHD_DEV_ALIAS                  0x42
54 #define IVHD_DEV_ALIAS_RANGE            0x43
55 #define IVHD_DEV_EXT_SELECT             0x46
56 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
57 #define IVHD_DEV_SPECIAL		0x48
58 #define IVHD_DEV_ACPI_HID		0xf0
59 
60 #define UID_NOT_PRESENT                 0
61 #define UID_IS_INTEGER                  1
62 #define UID_IS_CHARACTER                2
63 
64 #define IVHD_SPECIAL_IOAPIC		1
65 #define IVHD_SPECIAL_HPET		2
66 
67 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
68 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
69 #define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
70 #define IVHD_FLAG_ISOC_EN_MASK          0x08
71 
72 #define IVMD_FLAG_EXCL_RANGE            0x08
73 #define IVMD_FLAG_IW                    0x04
74 #define IVMD_FLAG_IR                    0x02
75 #define IVMD_FLAG_UNITY_MAP             0x01
76 
77 #define ACPI_DEVFLAG_INITPASS           0x01
78 #define ACPI_DEVFLAG_EXTINT             0x02
79 #define ACPI_DEVFLAG_NMI                0x04
80 #define ACPI_DEVFLAG_SYSMGT1            0x10
81 #define ACPI_DEVFLAG_SYSMGT2            0x20
82 #define ACPI_DEVFLAG_LINT0              0x40
83 #define ACPI_DEVFLAG_LINT1              0x80
84 #define ACPI_DEVFLAG_ATSDIS             0x10000000
85 
86 #define LOOP_TIMEOUT	2000000
87 
88 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
89 						 | ((dev & 0x1f) << 3) | (fn & 0x7))
90 
91 /*
92  * ACPI table definitions
93  *
94  * These data structures are laid over the table to parse the important values
95  * out of it.
96  */
97 
98 /*
99  * structure describing one IOMMU in the ACPI table. Typically followed by one
100  * or more ivhd_entrys.
101  */
102 struct ivhd_header {
103 	u8 type;
104 	u8 flags;
105 	u16 length;
106 	u16 devid;
107 	u16 cap_ptr;
108 	u64 mmio_phys;
109 	u16 pci_seg;
110 	u16 info;
111 	u32 efr_attr;
112 
113 	/* Following only valid on IVHD type 11h and 40h */
114 	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
115 	u64 efr_reg2;
116 } __attribute__((packed));
117 
118 /*
119  * A device entry describing which devices a specific IOMMU translates and
120  * which requestor ids they use.
121  */
122 struct ivhd_entry {
123 	u8 type;
124 	u16 devid;
125 	u8 flags;
126 	struct_group(ext_hid,
127 		u32 ext;
128 		u32 hidh;
129 	);
130 	u64 cid;
131 	u8 uidf;
132 	u8 uidl;
133 	u8 uid;
134 } __attribute__((packed));
135 
136 /*
137  * An AMD IOMMU memory definition structure. It defines things like exclusion
138  * ranges for devices and regions that should be unity mapped.
139  */
140 struct ivmd_header {
141 	u8 type;
142 	u8 flags;
143 	u16 length;
144 	u16 devid;
145 	u16 aux;
146 	u16 pci_seg;
147 	u8  resv[6];
148 	u64 range_start;
149 	u64 range_length;
150 } __attribute__((packed));
151 
152 bool amd_iommu_dump;
153 bool amd_iommu_irq_remap __read_mostly;
154 
155 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
156 /* Guest page table level */
157 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
158 
159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
161 
162 static bool amd_iommu_detected;
163 static bool amd_iommu_disabled __initdata;
164 static bool amd_iommu_force_enable __initdata;
165 static int amd_iommu_target_ivhd_type;
166 
167 /* Global EFR and EFR2 registers */
168 u64 amd_iommu_efr;
169 u64 amd_iommu_efr2;
170 
171 /* SNP is enabled on the system? */
172 bool amd_iommu_snp_en;
173 EXPORT_SYMBOL(amd_iommu_snp_en);
174 
175 LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
176 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
177 					   system */
178 
179 /* Array to assign indices to IOMMUs*/
180 struct amd_iommu *amd_iommus[MAX_IOMMUS];
181 
182 /* Number of IOMMUs present in the system */
183 static int amd_iommus_present;
184 
185 /* IOMMUs have a non-present cache? */
186 bool amd_iommu_np_cache __read_mostly;
187 bool amd_iommu_iotlb_sup __read_mostly = true;
188 
189 u32 amd_iommu_max_pasid __read_mostly = ~0;
190 
191 bool amd_iommu_v2_present __read_mostly;
192 static bool amd_iommu_pc_present __read_mostly;
193 bool amdr_ivrs_remap_support __read_mostly;
194 
195 bool amd_iommu_force_isolation __read_mostly;
196 
197 /*
198  * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
199  * to know which ones are already in use.
200  */
201 unsigned long *amd_iommu_pd_alloc_bitmap;
202 
203 enum iommu_init_state {
204 	IOMMU_START_STATE,
205 	IOMMU_IVRS_DETECTED,
206 	IOMMU_ACPI_FINISHED,
207 	IOMMU_ENABLED,
208 	IOMMU_PCI_INIT,
209 	IOMMU_INTERRUPTS_EN,
210 	IOMMU_INITIALIZED,
211 	IOMMU_NOT_FOUND,
212 	IOMMU_INIT_ERROR,
213 	IOMMU_CMDLINE_DISABLED,
214 };
215 
216 /* Early ioapic and hpet maps from kernel command line */
217 #define EARLY_MAP_SIZE		4
218 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
219 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
220 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
221 
222 static int __initdata early_ioapic_map_size;
223 static int __initdata early_hpet_map_size;
224 static int __initdata early_acpihid_map_size;
225 
226 static bool __initdata cmdline_maps;
227 
228 static enum iommu_init_state init_state = IOMMU_START_STATE;
229 
230 static int amd_iommu_enable_interrupts(void);
231 static int __init iommu_go_to_state(enum iommu_init_state state);
232 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
233 
234 static bool amd_iommu_pre_enabled = true;
235 
236 static u32 amd_iommu_ivinfo __initdata;
237 
238 bool translation_pre_enabled(struct amd_iommu *iommu)
239 {
240 	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
241 }
242 
243 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
244 {
245 	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
246 }
247 
248 static void init_translation_status(struct amd_iommu *iommu)
249 {
250 	u64 ctrl;
251 
252 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
253 	if (ctrl & (1<<CONTROL_IOMMU_EN))
254 		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
255 }
256 
257 static inline unsigned long tbl_size(int entry_size, int last_bdf)
258 {
259 	unsigned shift = PAGE_SHIFT +
260 			 get_order((last_bdf + 1) * entry_size);
261 
262 	return 1UL << shift;
263 }
264 
265 int amd_iommu_get_num_iommus(void)
266 {
267 	return amd_iommus_present;
268 }
269 
270 /*
271  * Iterate through all the IOMMUs to get common EFR
272  * masks among all IOMMUs and warn if found inconsistency.
273  */
274 static void get_global_efr(void)
275 {
276 	struct amd_iommu *iommu;
277 
278 	for_each_iommu(iommu) {
279 		u64 tmp = iommu->features;
280 		u64 tmp2 = iommu->features2;
281 
282 		if (list_is_first(&iommu->list, &amd_iommu_list)) {
283 			amd_iommu_efr = tmp;
284 			amd_iommu_efr2 = tmp2;
285 			continue;
286 		}
287 
288 		if (amd_iommu_efr == tmp &&
289 		    amd_iommu_efr2 == tmp2)
290 			continue;
291 
292 		pr_err(FW_BUG
293 		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
294 		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
295 		       iommu->index, iommu->pci_seg->id,
296 		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
297 		       PCI_FUNC(iommu->devid));
298 
299 		amd_iommu_efr &= tmp;
300 		amd_iommu_efr2 &= tmp2;
301 	}
302 
303 	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
304 }
305 
306 static bool check_feature_on_all_iommus(u64 mask)
307 {
308 	return !!(amd_iommu_efr & mask);
309 }
310 
311 static inline int check_feature_gpt_level(void)
312 {
313 	return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
314 }
315 
316 /*
317  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
318  * Default to IVHD EFR since it is available sooner
319  * (i.e. before PCI init).
320  */
321 static void __init early_iommu_features_init(struct amd_iommu *iommu,
322 					     struct ivhd_header *h)
323 {
324 	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
325 		iommu->features = h->efr_reg;
326 		iommu->features2 = h->efr_reg2;
327 	}
328 	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
329 		amdr_ivrs_remap_support = true;
330 }
331 
332 /* Access to l1 and l2 indexed register spaces */
333 
334 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
335 {
336 	u32 val;
337 
338 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
339 	pci_read_config_dword(iommu->dev, 0xfc, &val);
340 	return val;
341 }
342 
343 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
344 {
345 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
346 	pci_write_config_dword(iommu->dev, 0xfc, val);
347 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
348 }
349 
350 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
351 {
352 	u32 val;
353 
354 	pci_write_config_dword(iommu->dev, 0xf0, address);
355 	pci_read_config_dword(iommu->dev, 0xf4, &val);
356 	return val;
357 }
358 
359 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
360 {
361 	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
362 	pci_write_config_dword(iommu->dev, 0xf4, val);
363 }
364 
365 /****************************************************************************
366  *
367  * AMD IOMMU MMIO register space handling functions
368  *
369  * These functions are used to program the IOMMU device registers in
370  * MMIO space required for that driver.
371  *
372  ****************************************************************************/
373 
374 /*
375  * This function set the exclusion range in the IOMMU. DMA accesses to the
376  * exclusion range are passed through untranslated
377  */
378 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
379 {
380 	u64 start = iommu->exclusion_start & PAGE_MASK;
381 	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
382 	u64 entry;
383 
384 	if (!iommu->exclusion_start)
385 		return;
386 
387 	entry = start | MMIO_EXCL_ENABLE_MASK;
388 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
389 			&entry, sizeof(entry));
390 
391 	entry = limit;
392 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
393 			&entry, sizeof(entry));
394 }
395 
396 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
397 {
398 	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
399 	u64 entry = start & PM_ADDR_MASK;
400 
401 	if (!check_feature_on_all_iommus(FEATURE_SNP))
402 		return;
403 
404 	/* Note:
405 	 * Re-purpose Exclusion base/limit registers for Completion wait
406 	 * write-back base/limit.
407 	 */
408 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
409 		    &entry, sizeof(entry));
410 
411 	/* Note:
412 	 * Default to 4 Kbytes, which can be specified by setting base
413 	 * address equal to the limit address.
414 	 */
415 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
416 		    &entry, sizeof(entry));
417 }
418 
419 /* Programs the physical address of the device table into the IOMMU hardware */
420 static void iommu_set_device_table(struct amd_iommu *iommu)
421 {
422 	u64 entry;
423 	u32 dev_table_size = iommu->pci_seg->dev_table_size;
424 	void *dev_table = (void *)get_dev_table(iommu);
425 
426 	BUG_ON(iommu->mmio_base == NULL);
427 
428 	entry = iommu_virt_to_phys(dev_table);
429 	entry |= (dev_table_size >> 12) - 1;
430 	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
431 			&entry, sizeof(entry));
432 }
433 
434 /* Generic functions to enable/disable certain features of the IOMMU. */
435 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
436 {
437 	u64 ctrl;
438 
439 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
440 	ctrl |= (1ULL << bit);
441 	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
442 }
443 
444 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
445 {
446 	u64 ctrl;
447 
448 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
449 	ctrl &= ~(1ULL << bit);
450 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
451 }
452 
453 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
454 {
455 	u64 ctrl;
456 
457 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
458 	ctrl &= ~CTRL_INV_TO_MASK;
459 	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
460 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
461 }
462 
463 /* Function to enable the hardware */
464 static void iommu_enable(struct amd_iommu *iommu)
465 {
466 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
467 }
468 
469 static void iommu_disable(struct amd_iommu *iommu)
470 {
471 	if (!iommu->mmio_base)
472 		return;
473 
474 	/* Disable command buffer */
475 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
476 
477 	/* Disable event logging and event interrupts */
478 	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
479 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
480 
481 	/* Disable IOMMU GA_LOG */
482 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
483 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
484 
485 	/* Disable IOMMU hardware itself */
486 	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
487 }
488 
489 /*
490  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
491  * the system has one.
492  */
493 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
494 {
495 	if (!request_mem_region(address, end, "amd_iommu")) {
496 		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
497 			address, end);
498 		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
499 		return NULL;
500 	}
501 
502 	return (u8 __iomem *)ioremap(address, end);
503 }
504 
505 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
506 {
507 	if (iommu->mmio_base)
508 		iounmap(iommu->mmio_base);
509 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
510 }
511 
512 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
513 {
514 	u32 size = 0;
515 
516 	switch (h->type) {
517 	case 0x10:
518 		size = 24;
519 		break;
520 	case 0x11:
521 	case 0x40:
522 		size = 40;
523 		break;
524 	}
525 	return size;
526 }
527 
528 /****************************************************************************
529  *
530  * The functions below belong to the first pass of AMD IOMMU ACPI table
531  * parsing. In this pass we try to find out the highest device id this
532  * code has to handle. Upon this information the size of the shared data
533  * structures is determined later.
534  *
535  ****************************************************************************/
536 
537 /*
538  * This function calculates the length of a given IVHD entry
539  */
540 static inline int ivhd_entry_length(u8 *ivhd)
541 {
542 	u32 type = ((struct ivhd_entry *)ivhd)->type;
543 
544 	if (type < 0x80) {
545 		return 0x04 << (*ivhd >> 6);
546 	} else if (type == IVHD_DEV_ACPI_HID) {
547 		/* For ACPI_HID, offset 21 is uid len */
548 		return *((u8 *)ivhd + 21) + 22;
549 	}
550 	return 0;
551 }
552 
553 /*
554  * After reading the highest device id from the IOMMU PCI capability header
555  * this function looks if there is a higher device id defined in the ACPI table
556  */
557 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
558 {
559 	u8 *p = (void *)h, *end = (void *)h;
560 	struct ivhd_entry *dev;
561 	int last_devid = -EINVAL;
562 
563 	u32 ivhd_size = get_ivhd_header_size(h);
564 
565 	if (!ivhd_size) {
566 		pr_err("Unsupported IVHD type %#x\n", h->type);
567 		return -EINVAL;
568 	}
569 
570 	p += ivhd_size;
571 	end += h->length;
572 
573 	while (p < end) {
574 		dev = (struct ivhd_entry *)p;
575 		switch (dev->type) {
576 		case IVHD_DEV_ALL:
577 			/* Use maximum BDF value for DEV_ALL */
578 			return 0xffff;
579 		case IVHD_DEV_SELECT:
580 		case IVHD_DEV_RANGE_END:
581 		case IVHD_DEV_ALIAS:
582 		case IVHD_DEV_EXT_SELECT:
583 			/* all the above subfield types refer to device ids */
584 			if (dev->devid > last_devid)
585 				last_devid = dev->devid;
586 			break;
587 		default:
588 			break;
589 		}
590 		p += ivhd_entry_length(p);
591 	}
592 
593 	WARN_ON(p != end);
594 
595 	return last_devid;
596 }
597 
598 static int __init check_ivrs_checksum(struct acpi_table_header *table)
599 {
600 	int i;
601 	u8 checksum = 0, *p = (u8 *)table;
602 
603 	for (i = 0; i < table->length; ++i)
604 		checksum += p[i];
605 	if (checksum != 0) {
606 		/* ACPI table corrupt */
607 		pr_err(FW_BUG "IVRS invalid checksum\n");
608 		return -ENODEV;
609 	}
610 
611 	return 0;
612 }
613 
614 /*
615  * Iterate over all IVHD entries in the ACPI table and find the highest device
616  * id which we need to handle. This is the first of three functions which parse
617  * the ACPI table. So we check the checksum here.
618  */
619 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
620 {
621 	u8 *p = (u8 *)table, *end = (u8 *)table;
622 	struct ivhd_header *h;
623 	int last_devid, last_bdf = 0;
624 
625 	p += IVRS_HEADER_LENGTH;
626 
627 	end += table->length;
628 	while (p < end) {
629 		h = (struct ivhd_header *)p;
630 		if (h->pci_seg == pci_seg &&
631 		    h->type == amd_iommu_target_ivhd_type) {
632 			last_devid = find_last_devid_from_ivhd(h);
633 
634 			if (last_devid < 0)
635 				return -EINVAL;
636 			if (last_devid > last_bdf)
637 				last_bdf = last_devid;
638 		}
639 		p += h->length;
640 	}
641 	WARN_ON(p != end);
642 
643 	return last_bdf;
644 }
645 
646 /****************************************************************************
647  *
648  * The following functions belong to the code path which parses the ACPI table
649  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
650  * data structures, initialize the per PCI segment device/alias/rlookup table
651  * and also basically initialize the hardware.
652  *
653  ****************************************************************************/
654 
655 /* Allocate per PCI segment device table */
656 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
657 {
658 	pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
659 						      get_order(pci_seg->dev_table_size));
660 	if (!pci_seg->dev_table)
661 		return -ENOMEM;
662 
663 	return 0;
664 }
665 
666 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
667 {
668 	free_pages((unsigned long)pci_seg->dev_table,
669 		    get_order(pci_seg->dev_table_size));
670 	pci_seg->dev_table = NULL;
671 }
672 
673 /* Allocate per PCI segment IOMMU rlookup table. */
674 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
675 {
676 	pci_seg->rlookup_table = (void *)__get_free_pages(
677 						GFP_KERNEL | __GFP_ZERO,
678 						get_order(pci_seg->rlookup_table_size));
679 	if (pci_seg->rlookup_table == NULL)
680 		return -ENOMEM;
681 
682 	return 0;
683 }
684 
685 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
686 {
687 	free_pages((unsigned long)pci_seg->rlookup_table,
688 		   get_order(pci_seg->rlookup_table_size));
689 	pci_seg->rlookup_table = NULL;
690 }
691 
692 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
693 {
694 	pci_seg->irq_lookup_table = (void *)__get_free_pages(
695 					     GFP_KERNEL | __GFP_ZERO,
696 					     get_order(pci_seg->rlookup_table_size));
697 	kmemleak_alloc(pci_seg->irq_lookup_table,
698 		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
699 	if (pci_seg->irq_lookup_table == NULL)
700 		return -ENOMEM;
701 
702 	return 0;
703 }
704 
705 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
706 {
707 	kmemleak_free(pci_seg->irq_lookup_table);
708 	free_pages((unsigned long)pci_seg->irq_lookup_table,
709 		   get_order(pci_seg->rlookup_table_size));
710 	pci_seg->irq_lookup_table = NULL;
711 }
712 
713 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
714 {
715 	int i;
716 
717 	pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
718 					get_order(pci_seg->alias_table_size));
719 	if (!pci_seg->alias_table)
720 		return -ENOMEM;
721 
722 	/*
723 	 * let all alias entries point to itself
724 	 */
725 	for (i = 0; i <= pci_seg->last_bdf; ++i)
726 		pci_seg->alias_table[i] = i;
727 
728 	return 0;
729 }
730 
731 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
732 {
733 	free_pages((unsigned long)pci_seg->alias_table,
734 		   get_order(pci_seg->alias_table_size));
735 	pci_seg->alias_table = NULL;
736 }
737 
738 /*
739  * Allocates the command buffer. This buffer is per AMD IOMMU. We can
740  * write commands to that buffer later and the IOMMU will execute them
741  * asynchronously
742  */
743 static int __init alloc_command_buffer(struct amd_iommu *iommu)
744 {
745 	iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
746 						  get_order(CMD_BUFFER_SIZE));
747 
748 	return iommu->cmd_buf ? 0 : -ENOMEM;
749 }
750 
751 /*
752  * This function restarts event logging in case the IOMMU experienced
753  * an event log buffer overflow.
754  */
755 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
756 {
757 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
758 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
759 }
760 
761 /*
762  * This function restarts event logging in case the IOMMU experienced
763  * an GA log overflow.
764  */
765 void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
766 {
767 	u32 status;
768 
769 	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
770 	if (status & MMIO_STATUS_GALOG_RUN_MASK)
771 		return;
772 
773 	pr_info_ratelimited("IOMMU GA Log restarting\n");
774 
775 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
776 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
777 
778 	writel(MMIO_STATUS_GALOG_OVERFLOW_MASK,
779 	       iommu->mmio_base + MMIO_STATUS_OFFSET);
780 
781 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
782 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
783 }
784 
785 /*
786  * This function resets the command buffer if the IOMMU stopped fetching
787  * commands from it.
788  */
789 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
790 {
791 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
792 
793 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
794 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
795 	iommu->cmd_buf_head = 0;
796 	iommu->cmd_buf_tail = 0;
797 
798 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
799 }
800 
801 /*
802  * This function writes the command buffer address to the hardware and
803  * enables it.
804  */
805 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
806 {
807 	u64 entry;
808 
809 	BUG_ON(iommu->cmd_buf == NULL);
810 
811 	entry = iommu_virt_to_phys(iommu->cmd_buf);
812 	entry |= MMIO_CMD_SIZE_512;
813 
814 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
815 		    &entry, sizeof(entry));
816 
817 	amd_iommu_reset_cmd_buffer(iommu);
818 }
819 
820 /*
821  * This function disables the command buffer
822  */
823 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
824 {
825 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
826 }
827 
828 static void __init free_command_buffer(struct amd_iommu *iommu)
829 {
830 	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
831 }
832 
833 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
834 					 gfp_t gfp, size_t size)
835 {
836 	int order = get_order(size);
837 	void *buf = (void *)__get_free_pages(gfp, order);
838 
839 	if (buf &&
840 	    check_feature_on_all_iommus(FEATURE_SNP) &&
841 	    set_memory_4k((unsigned long)buf, (1 << order))) {
842 		free_pages((unsigned long)buf, order);
843 		buf = NULL;
844 	}
845 
846 	return buf;
847 }
848 
849 /* allocates the memory where the IOMMU will log its events to */
850 static int __init alloc_event_buffer(struct amd_iommu *iommu)
851 {
852 	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
853 					      EVT_BUFFER_SIZE);
854 
855 	return iommu->evt_buf ? 0 : -ENOMEM;
856 }
857 
858 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
859 {
860 	u64 entry;
861 
862 	BUG_ON(iommu->evt_buf == NULL);
863 
864 	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
865 
866 	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
867 		    &entry, sizeof(entry));
868 
869 	/* set head and tail to zero manually */
870 	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
871 	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
872 
873 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
874 }
875 
876 /*
877  * This function disables the event log buffer
878  */
879 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
880 {
881 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
882 }
883 
884 static void __init free_event_buffer(struct amd_iommu *iommu)
885 {
886 	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
887 }
888 
889 /* allocates the memory where the IOMMU will log its events to */
890 static int __init alloc_ppr_log(struct amd_iommu *iommu)
891 {
892 	iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
893 					      PPR_LOG_SIZE);
894 
895 	return iommu->ppr_log ? 0 : -ENOMEM;
896 }
897 
898 static void iommu_enable_ppr_log(struct amd_iommu *iommu)
899 {
900 	u64 entry;
901 
902 	if (iommu->ppr_log == NULL)
903 		return;
904 
905 	entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
906 
907 	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
908 		    &entry, sizeof(entry));
909 
910 	/* set head and tail to zero manually */
911 	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
912 	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
913 
914 	iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
915 	iommu_feature_enable(iommu, CONTROL_PPR_EN);
916 }
917 
918 static void __init free_ppr_log(struct amd_iommu *iommu)
919 {
920 	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
921 }
922 
923 static void free_ga_log(struct amd_iommu *iommu)
924 {
925 #ifdef CONFIG_IRQ_REMAP
926 	free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
927 	free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
928 #endif
929 }
930 
931 #ifdef CONFIG_IRQ_REMAP
932 static int iommu_ga_log_enable(struct amd_iommu *iommu)
933 {
934 	u32 status, i;
935 	u64 entry;
936 
937 	if (!iommu->ga_log)
938 		return -EINVAL;
939 
940 	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
941 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
942 		    &entry, sizeof(entry));
943 	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
944 		 (BIT_ULL(52)-1)) & ~7ULL;
945 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
946 		    &entry, sizeof(entry));
947 	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
948 	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
949 
950 
951 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
952 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
953 
954 	for (i = 0; i < LOOP_TIMEOUT; ++i) {
955 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
956 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
957 			break;
958 		udelay(10);
959 	}
960 
961 	if (WARN_ON(i >= LOOP_TIMEOUT))
962 		return -EINVAL;
963 
964 	return 0;
965 }
966 
967 static int iommu_init_ga_log(struct amd_iommu *iommu)
968 {
969 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
970 		return 0;
971 
972 	iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
973 					get_order(GA_LOG_SIZE));
974 	if (!iommu->ga_log)
975 		goto err_out;
976 
977 	iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
978 					get_order(8));
979 	if (!iommu->ga_log_tail)
980 		goto err_out;
981 
982 	return 0;
983 err_out:
984 	free_ga_log(iommu);
985 	return -EINVAL;
986 }
987 #endif /* CONFIG_IRQ_REMAP */
988 
989 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
990 {
991 	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
992 
993 	return iommu->cmd_sem ? 0 : -ENOMEM;
994 }
995 
996 static void __init free_cwwb_sem(struct amd_iommu *iommu)
997 {
998 	if (iommu->cmd_sem)
999 		free_page((unsigned long)iommu->cmd_sem);
1000 }
1001 
1002 static void iommu_enable_xt(struct amd_iommu *iommu)
1003 {
1004 #ifdef CONFIG_IRQ_REMAP
1005 	/*
1006 	 * XT mode (32-bit APIC destination ID) requires
1007 	 * GA mode (128-bit IRTE support) as a prerequisite.
1008 	 */
1009 	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
1010 	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1011 		iommu_feature_enable(iommu, CONTROL_XT_EN);
1012 #endif /* CONFIG_IRQ_REMAP */
1013 }
1014 
1015 static void iommu_enable_gt(struct amd_iommu *iommu)
1016 {
1017 	if (!iommu_feature(iommu, FEATURE_GT))
1018 		return;
1019 
1020 	iommu_feature_enable(iommu, CONTROL_GT_EN);
1021 }
1022 
1023 /* sets a specific bit in the device table entry. */
1024 static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
1025 				u16 devid, u8 bit)
1026 {
1027 	int i = (bit >> 6) & 0x03;
1028 	int _bit = bit & 0x3f;
1029 
1030 	dev_table[devid].data[i] |= (1UL << _bit);
1031 }
1032 
1033 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1034 {
1035 	struct dev_table_entry *dev_table = get_dev_table(iommu);
1036 
1037 	return __set_dev_entry_bit(dev_table, devid, bit);
1038 }
1039 
1040 static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1041 			       u16 devid, u8 bit)
1042 {
1043 	int i = (bit >> 6) & 0x03;
1044 	int _bit = bit & 0x3f;
1045 
1046 	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1047 }
1048 
1049 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1050 {
1051 	struct dev_table_entry *dev_table = get_dev_table(iommu);
1052 
1053 	return __get_dev_entry_bit(dev_table, devid, bit);
1054 }
1055 
1056 static bool __copy_device_table(struct amd_iommu *iommu)
1057 {
1058 	u64 int_ctl, int_tab_len, entry = 0;
1059 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1060 	struct dev_table_entry *old_devtb = NULL;
1061 	u32 lo, hi, devid, old_devtb_size;
1062 	phys_addr_t old_devtb_phys;
1063 	u16 dom_id, dte_v, irq_v;
1064 	gfp_t gfp_flag;
1065 	u64 tmp;
1066 
1067 	/* Each IOMMU use separate device table with the same size */
1068 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1069 	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1070 	entry = (((u64) hi) << 32) + lo;
1071 
1072 	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1073 	if (old_devtb_size != pci_seg->dev_table_size) {
1074 		pr_err("The device table size of IOMMU:%d is not expected!\n",
1075 			iommu->index);
1076 		return false;
1077 	}
1078 
1079 	/*
1080 	 * When SME is enabled in the first kernel, the entry includes the
1081 	 * memory encryption mask(sme_me_mask), we must remove the memory
1082 	 * encryption mask to obtain the true physical address in kdump kernel.
1083 	 */
1084 	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1085 
1086 	if (old_devtb_phys >= 0x100000000ULL) {
1087 		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1088 		return false;
1089 	}
1090 	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1091 		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
1092 							pci_seg->dev_table_size)
1093 		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1094 
1095 	if (!old_devtb)
1096 		return false;
1097 
1098 	gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
1099 	pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
1100 						    get_order(pci_seg->dev_table_size));
1101 	if (pci_seg->old_dev_tbl_cpy == NULL) {
1102 		pr_err("Failed to allocate memory for copying old device table!\n");
1103 		memunmap(old_devtb);
1104 		return false;
1105 	}
1106 
1107 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1108 		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1109 		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1110 		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1111 
1112 		if (dte_v && dom_id) {
1113 			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1114 			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1115 			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
1116 			/* If gcr3 table existed, mask it out */
1117 			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1118 				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1119 				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1120 				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1121 				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1122 				tmp |= DTE_FLAG_GV;
1123 				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1124 			}
1125 		}
1126 
1127 		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1128 		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1129 		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1130 		if (irq_v && (int_ctl || int_tab_len)) {
1131 			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1132 			    (int_tab_len != DTE_INTTABLEN)) {
1133 				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1134 				memunmap(old_devtb);
1135 				return false;
1136 			}
1137 
1138 			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1139 		}
1140 	}
1141 	memunmap(old_devtb);
1142 
1143 	return true;
1144 }
1145 
1146 static bool copy_device_table(void)
1147 {
1148 	struct amd_iommu *iommu;
1149 	struct amd_iommu_pci_seg *pci_seg;
1150 
1151 	if (!amd_iommu_pre_enabled)
1152 		return false;
1153 
1154 	pr_warn("Translation is already enabled - trying to copy translation structures\n");
1155 
1156 	/*
1157 	 * All IOMMUs within PCI segment shares common device table.
1158 	 * Hence copy device table only once per PCI segment.
1159 	 */
1160 	for_each_pci_segment(pci_seg) {
1161 		for_each_iommu(iommu) {
1162 			if (pci_seg->id != iommu->pci_seg->id)
1163 				continue;
1164 			if (!__copy_device_table(iommu))
1165 				return false;
1166 			break;
1167 		}
1168 	}
1169 
1170 	return true;
1171 }
1172 
1173 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1174 {
1175 	int sysmgt;
1176 
1177 	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1178 		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1179 
1180 	if (sysmgt == 0x01)
1181 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1182 }
1183 
1184 /*
1185  * This function takes the device specific flags read from the ACPI
1186  * table and sets up the device table entry with that information
1187  */
1188 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1189 					   u16 devid, u32 flags, u32 ext_flags)
1190 {
1191 	if (flags & ACPI_DEVFLAG_INITPASS)
1192 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1193 	if (flags & ACPI_DEVFLAG_EXTINT)
1194 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1195 	if (flags & ACPI_DEVFLAG_NMI)
1196 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1197 	if (flags & ACPI_DEVFLAG_SYSMGT1)
1198 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1199 	if (flags & ACPI_DEVFLAG_SYSMGT2)
1200 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1201 	if (flags & ACPI_DEVFLAG_LINT0)
1202 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1203 	if (flags & ACPI_DEVFLAG_LINT1)
1204 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1205 
1206 	amd_iommu_apply_erratum_63(iommu, devid);
1207 
1208 	amd_iommu_set_rlookup_table(iommu, devid);
1209 }
1210 
1211 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1212 {
1213 	struct devid_map *entry;
1214 	struct list_head *list;
1215 
1216 	if (type == IVHD_SPECIAL_IOAPIC)
1217 		list = &ioapic_map;
1218 	else if (type == IVHD_SPECIAL_HPET)
1219 		list = &hpet_map;
1220 	else
1221 		return -EINVAL;
1222 
1223 	list_for_each_entry(entry, list, list) {
1224 		if (!(entry->id == id && entry->cmd_line))
1225 			continue;
1226 
1227 		pr_info("Command-line override present for %s id %d - ignoring\n",
1228 			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1229 
1230 		*devid = entry->devid;
1231 
1232 		return 0;
1233 	}
1234 
1235 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1236 	if (!entry)
1237 		return -ENOMEM;
1238 
1239 	entry->id	= id;
1240 	entry->devid	= *devid;
1241 	entry->cmd_line	= cmd_line;
1242 
1243 	list_add_tail(&entry->list, list);
1244 
1245 	return 0;
1246 }
1247 
1248 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1249 				      bool cmd_line)
1250 {
1251 	struct acpihid_map_entry *entry;
1252 	struct list_head *list = &acpihid_map;
1253 
1254 	list_for_each_entry(entry, list, list) {
1255 		if (strcmp(entry->hid, hid) ||
1256 		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1257 		    !entry->cmd_line)
1258 			continue;
1259 
1260 		pr_info("Command-line override for hid:%s uid:%s\n",
1261 			hid, uid);
1262 		*devid = entry->devid;
1263 		return 0;
1264 	}
1265 
1266 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1267 	if (!entry)
1268 		return -ENOMEM;
1269 
1270 	memcpy(entry->uid, uid, strlen(uid));
1271 	memcpy(entry->hid, hid, strlen(hid));
1272 	entry->devid = *devid;
1273 	entry->cmd_line	= cmd_line;
1274 	entry->root_devid = (entry->devid & (~0x7));
1275 
1276 	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1277 		entry->cmd_line ? "cmd" : "ivrs",
1278 		entry->hid, entry->uid, entry->root_devid);
1279 
1280 	list_add_tail(&entry->list, list);
1281 	return 0;
1282 }
1283 
1284 static int __init add_early_maps(void)
1285 {
1286 	int i, ret;
1287 
1288 	for (i = 0; i < early_ioapic_map_size; ++i) {
1289 		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1290 					 early_ioapic_map[i].id,
1291 					 &early_ioapic_map[i].devid,
1292 					 early_ioapic_map[i].cmd_line);
1293 		if (ret)
1294 			return ret;
1295 	}
1296 
1297 	for (i = 0; i < early_hpet_map_size; ++i) {
1298 		ret = add_special_device(IVHD_SPECIAL_HPET,
1299 					 early_hpet_map[i].id,
1300 					 &early_hpet_map[i].devid,
1301 					 early_hpet_map[i].cmd_line);
1302 		if (ret)
1303 			return ret;
1304 	}
1305 
1306 	for (i = 0; i < early_acpihid_map_size; ++i) {
1307 		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1308 					  early_acpihid_map[i].uid,
1309 					  &early_acpihid_map[i].devid,
1310 					  early_acpihid_map[i].cmd_line);
1311 		if (ret)
1312 			return ret;
1313 	}
1314 
1315 	return 0;
1316 }
1317 
1318 /*
1319  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1320  * initializes the hardware and our data structures with it.
1321  */
1322 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1323 					struct ivhd_header *h)
1324 {
1325 	u8 *p = (u8 *)h;
1326 	u8 *end = p, flags = 0;
1327 	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1328 	u32 dev_i, ext_flags = 0;
1329 	bool alias = false;
1330 	struct ivhd_entry *e;
1331 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1332 	u32 ivhd_size;
1333 	int ret;
1334 
1335 
1336 	ret = add_early_maps();
1337 	if (ret)
1338 		return ret;
1339 
1340 	amd_iommu_apply_ivrs_quirks();
1341 
1342 	/*
1343 	 * First save the recommended feature enable bits from ACPI
1344 	 */
1345 	iommu->acpi_flags = h->flags;
1346 
1347 	/*
1348 	 * Done. Now parse the device entries
1349 	 */
1350 	ivhd_size = get_ivhd_header_size(h);
1351 	if (!ivhd_size) {
1352 		pr_err("Unsupported IVHD type %#x\n", h->type);
1353 		return -EINVAL;
1354 	}
1355 
1356 	p += ivhd_size;
1357 
1358 	end += h->length;
1359 
1360 
1361 	while (p < end) {
1362 		e = (struct ivhd_entry *)p;
1363 		seg_id = pci_seg->id;
1364 
1365 		switch (e->type) {
1366 		case IVHD_DEV_ALL:
1367 
1368 			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1369 
1370 			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1371 				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1372 			break;
1373 		case IVHD_DEV_SELECT:
1374 
1375 			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1376 				    "flags: %02x\n",
1377 				    seg_id, PCI_BUS_NUM(e->devid),
1378 				    PCI_SLOT(e->devid),
1379 				    PCI_FUNC(e->devid),
1380 				    e->flags);
1381 
1382 			devid = e->devid;
1383 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1384 			break;
1385 		case IVHD_DEV_SELECT_RANGE_START:
1386 
1387 			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1388 				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
1389 				    seg_id, PCI_BUS_NUM(e->devid),
1390 				    PCI_SLOT(e->devid),
1391 				    PCI_FUNC(e->devid),
1392 				    e->flags);
1393 
1394 			devid_start = e->devid;
1395 			flags = e->flags;
1396 			ext_flags = 0;
1397 			alias = false;
1398 			break;
1399 		case IVHD_DEV_ALIAS:
1400 
1401 			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1402 				    "flags: %02x devid_to: %02x:%02x.%x\n",
1403 				    seg_id, PCI_BUS_NUM(e->devid),
1404 				    PCI_SLOT(e->devid),
1405 				    PCI_FUNC(e->devid),
1406 				    e->flags,
1407 				    PCI_BUS_NUM(e->ext >> 8),
1408 				    PCI_SLOT(e->ext >> 8),
1409 				    PCI_FUNC(e->ext >> 8));
1410 
1411 			devid = e->devid;
1412 			devid_to = e->ext >> 8;
1413 			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1414 			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1415 			pci_seg->alias_table[devid] = devid_to;
1416 			break;
1417 		case IVHD_DEV_ALIAS_RANGE:
1418 
1419 			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1420 				    "devid: %04x:%02x:%02x.%x flags: %02x "
1421 				    "devid_to: %04x:%02x:%02x.%x\n",
1422 				    seg_id, PCI_BUS_NUM(e->devid),
1423 				    PCI_SLOT(e->devid),
1424 				    PCI_FUNC(e->devid),
1425 				    e->flags,
1426 				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1427 				    PCI_SLOT(e->ext >> 8),
1428 				    PCI_FUNC(e->ext >> 8));
1429 
1430 			devid_start = e->devid;
1431 			flags = e->flags;
1432 			devid_to = e->ext >> 8;
1433 			ext_flags = 0;
1434 			alias = true;
1435 			break;
1436 		case IVHD_DEV_EXT_SELECT:
1437 
1438 			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1439 				    "flags: %02x ext: %08x\n",
1440 				    seg_id, PCI_BUS_NUM(e->devid),
1441 				    PCI_SLOT(e->devid),
1442 				    PCI_FUNC(e->devid),
1443 				    e->flags, e->ext);
1444 
1445 			devid = e->devid;
1446 			set_dev_entry_from_acpi(iommu, devid, e->flags,
1447 						e->ext);
1448 			break;
1449 		case IVHD_DEV_EXT_SELECT_RANGE:
1450 
1451 			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1452 				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1453 				    seg_id, PCI_BUS_NUM(e->devid),
1454 				    PCI_SLOT(e->devid),
1455 				    PCI_FUNC(e->devid),
1456 				    e->flags, e->ext);
1457 
1458 			devid_start = e->devid;
1459 			flags = e->flags;
1460 			ext_flags = e->ext;
1461 			alias = false;
1462 			break;
1463 		case IVHD_DEV_RANGE_END:
1464 
1465 			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1466 				    seg_id, PCI_BUS_NUM(e->devid),
1467 				    PCI_SLOT(e->devid),
1468 				    PCI_FUNC(e->devid));
1469 
1470 			devid = e->devid;
1471 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1472 				if (alias) {
1473 					pci_seg->alias_table[dev_i] = devid_to;
1474 					set_dev_entry_from_acpi(iommu,
1475 						devid_to, flags, ext_flags);
1476 				}
1477 				set_dev_entry_from_acpi(iommu, dev_i,
1478 							flags, ext_flags);
1479 			}
1480 			break;
1481 		case IVHD_DEV_SPECIAL: {
1482 			u8 handle, type;
1483 			const char *var;
1484 			u32 devid;
1485 			int ret;
1486 
1487 			handle = e->ext & 0xff;
1488 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1489 			type   = (e->ext >> 24) & 0xff;
1490 
1491 			if (type == IVHD_SPECIAL_IOAPIC)
1492 				var = "IOAPIC";
1493 			else if (type == IVHD_SPECIAL_HPET)
1494 				var = "HPET";
1495 			else
1496 				var = "UNKNOWN";
1497 
1498 			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1499 				    var, (int)handle,
1500 				    seg_id, PCI_BUS_NUM(devid),
1501 				    PCI_SLOT(devid),
1502 				    PCI_FUNC(devid));
1503 
1504 			ret = add_special_device(type, handle, &devid, false);
1505 			if (ret)
1506 				return ret;
1507 
1508 			/*
1509 			 * add_special_device might update the devid in case a
1510 			 * command-line override is present. So call
1511 			 * set_dev_entry_from_acpi after add_special_device.
1512 			 */
1513 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1514 
1515 			break;
1516 		}
1517 		case IVHD_DEV_ACPI_HID: {
1518 			u32 devid;
1519 			u8 hid[ACPIHID_HID_LEN];
1520 			u8 uid[ACPIHID_UID_LEN];
1521 			int ret;
1522 
1523 			if (h->type != 0x40) {
1524 				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1525 				       e->type);
1526 				break;
1527 			}
1528 
1529 			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1530 			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1531 			hid[ACPIHID_HID_LEN - 1] = '\0';
1532 
1533 			if (!(*hid)) {
1534 				pr_err(FW_BUG "Invalid HID.\n");
1535 				break;
1536 			}
1537 
1538 			uid[0] = '\0';
1539 			switch (e->uidf) {
1540 			case UID_NOT_PRESENT:
1541 
1542 				if (e->uidl != 0)
1543 					pr_warn(FW_BUG "Invalid UID length.\n");
1544 
1545 				break;
1546 			case UID_IS_INTEGER:
1547 
1548 				sprintf(uid, "%d", e->uid);
1549 
1550 				break;
1551 			case UID_IS_CHARACTER:
1552 
1553 				memcpy(uid, &e->uid, e->uidl);
1554 				uid[e->uidl] = '\0';
1555 
1556 				break;
1557 			default:
1558 				break;
1559 			}
1560 
1561 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1562 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1563 				    hid, uid, seg_id,
1564 				    PCI_BUS_NUM(devid),
1565 				    PCI_SLOT(devid),
1566 				    PCI_FUNC(devid));
1567 
1568 			flags = e->flags;
1569 
1570 			ret = add_acpi_hid_device(hid, uid, &devid, false);
1571 			if (ret)
1572 				return ret;
1573 
1574 			/*
1575 			 * add_special_device might update the devid in case a
1576 			 * command-line override is present. So call
1577 			 * set_dev_entry_from_acpi after add_special_device.
1578 			 */
1579 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1580 
1581 			break;
1582 		}
1583 		default:
1584 			break;
1585 		}
1586 
1587 		p += ivhd_entry_length(p);
1588 	}
1589 
1590 	return 0;
1591 }
1592 
1593 /* Allocate PCI segment data structure */
1594 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1595 					  struct acpi_table_header *ivrs_base)
1596 {
1597 	struct amd_iommu_pci_seg *pci_seg;
1598 	int last_bdf;
1599 
1600 	/*
1601 	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1602 	 * handle in this PCI segment. Upon this information the shared data
1603 	 * structures for the PCI segments in the system will be allocated.
1604 	 */
1605 	last_bdf = find_last_devid_acpi(ivrs_base, id);
1606 	if (last_bdf < 0)
1607 		return NULL;
1608 
1609 	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1610 	if (pci_seg == NULL)
1611 		return NULL;
1612 
1613 	pci_seg->last_bdf = last_bdf;
1614 	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1615 	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1616 	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1617 	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1618 
1619 	pci_seg->id = id;
1620 	init_llist_head(&pci_seg->dev_data_list);
1621 	INIT_LIST_HEAD(&pci_seg->unity_map);
1622 	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1623 
1624 	if (alloc_dev_table(pci_seg))
1625 		return NULL;
1626 	if (alloc_alias_table(pci_seg))
1627 		return NULL;
1628 	if (alloc_rlookup_table(pci_seg))
1629 		return NULL;
1630 
1631 	return pci_seg;
1632 }
1633 
1634 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1635 					struct acpi_table_header *ivrs_base)
1636 {
1637 	struct amd_iommu_pci_seg *pci_seg;
1638 
1639 	for_each_pci_segment(pci_seg) {
1640 		if (pci_seg->id == id)
1641 			return pci_seg;
1642 	}
1643 
1644 	return alloc_pci_segment(id, ivrs_base);
1645 }
1646 
1647 static void __init free_pci_segments(void)
1648 {
1649 	struct amd_iommu_pci_seg *pci_seg, *next;
1650 
1651 	for_each_pci_segment_safe(pci_seg, next) {
1652 		list_del(&pci_seg->list);
1653 		free_irq_lookup_table(pci_seg);
1654 		free_rlookup_table(pci_seg);
1655 		free_alias_table(pci_seg);
1656 		free_dev_table(pci_seg);
1657 		kfree(pci_seg);
1658 	}
1659 }
1660 
1661 static void __init free_iommu_one(struct amd_iommu *iommu)
1662 {
1663 	free_cwwb_sem(iommu);
1664 	free_command_buffer(iommu);
1665 	free_event_buffer(iommu);
1666 	free_ppr_log(iommu);
1667 	free_ga_log(iommu);
1668 	iommu_unmap_mmio_space(iommu);
1669 }
1670 
1671 static void __init free_iommu_all(void)
1672 {
1673 	struct amd_iommu *iommu, *next;
1674 
1675 	for_each_iommu_safe(iommu, next) {
1676 		list_del(&iommu->list);
1677 		free_iommu_one(iommu);
1678 		kfree(iommu);
1679 	}
1680 }
1681 
1682 /*
1683  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1684  * Workaround:
1685  *     BIOS should disable L2B micellaneous clock gating by setting
1686  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1687  */
1688 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1689 {
1690 	u32 value;
1691 
1692 	if ((boot_cpu_data.x86 != 0x15) ||
1693 	    (boot_cpu_data.x86_model < 0x10) ||
1694 	    (boot_cpu_data.x86_model > 0x1f))
1695 		return;
1696 
1697 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1698 	pci_read_config_dword(iommu->dev, 0xf4, &value);
1699 
1700 	if (value & BIT(2))
1701 		return;
1702 
1703 	/* Select NB indirect register 0x90 and enable writing */
1704 	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1705 
1706 	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1707 	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1708 
1709 	/* Clear the enable writing bit */
1710 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1711 }
1712 
1713 /*
1714  * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1715  * Workaround:
1716  *     BIOS should enable ATS write permission check by setting
1717  *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1718  */
1719 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1720 {
1721 	u32 value;
1722 
1723 	if ((boot_cpu_data.x86 != 0x15) ||
1724 	    (boot_cpu_data.x86_model < 0x30) ||
1725 	    (boot_cpu_data.x86_model > 0x3f))
1726 		return;
1727 
1728 	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1729 	value = iommu_read_l2(iommu, 0x47);
1730 
1731 	if (value & BIT(0))
1732 		return;
1733 
1734 	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1735 	iommu_write_l2(iommu, 0x47, value | BIT(0));
1736 
1737 	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1738 }
1739 
1740 /*
1741  * This function glues the initialization function for one IOMMU
1742  * together and also allocates the command buffer and programs the
1743  * hardware. It does NOT enable the IOMMU. This is done afterwards.
1744  */
1745 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1746 				 struct acpi_table_header *ivrs_base)
1747 {
1748 	struct amd_iommu_pci_seg *pci_seg;
1749 
1750 	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1751 	if (pci_seg == NULL)
1752 		return -ENOMEM;
1753 	iommu->pci_seg = pci_seg;
1754 
1755 	raw_spin_lock_init(&iommu->lock);
1756 	iommu->cmd_sem_val = 0;
1757 
1758 	/* Add IOMMU to internal data structures */
1759 	list_add_tail(&iommu->list, &amd_iommu_list);
1760 	iommu->index = amd_iommus_present++;
1761 
1762 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1763 		WARN(1, "System has more IOMMUs than supported by this driver\n");
1764 		return -ENOSYS;
1765 	}
1766 
1767 	/* Index is fine - add IOMMU to the array */
1768 	amd_iommus[iommu->index] = iommu;
1769 
1770 	/*
1771 	 * Copy data from ACPI table entry to the iommu struct
1772 	 */
1773 	iommu->devid   = h->devid;
1774 	iommu->cap_ptr = h->cap_ptr;
1775 	iommu->mmio_phys = h->mmio_phys;
1776 
1777 	switch (h->type) {
1778 	case 0x10:
1779 		/* Check if IVHD EFR contains proper max banks/counters */
1780 		if ((h->efr_attr != 0) &&
1781 		    ((h->efr_attr & (0xF << 13)) != 0) &&
1782 		    ((h->efr_attr & (0x3F << 17)) != 0))
1783 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1784 		else
1785 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1786 
1787 		/*
1788 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1789 		 * GAM also requires GA mode. Therefore, we need to
1790 		 * check cmpxchg16b support before enabling it.
1791 		 */
1792 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1793 		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1794 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1795 		break;
1796 	case 0x11:
1797 	case 0x40:
1798 		if (h->efr_reg & (1 << 9))
1799 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1800 		else
1801 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1802 
1803 		/*
1804 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1805 		 * XT, GAM also requires GA mode. Therefore, we need to
1806 		 * check cmpxchg16b support before enabling them.
1807 		 */
1808 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1809 		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1810 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1811 			break;
1812 		}
1813 
1814 		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1815 			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1816 
1817 		early_iommu_features_init(iommu, h);
1818 
1819 		break;
1820 	default:
1821 		return -EINVAL;
1822 	}
1823 
1824 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1825 						iommu->mmio_phys_end);
1826 	if (!iommu->mmio_base)
1827 		return -ENOMEM;
1828 
1829 	return init_iommu_from_acpi(iommu, h);
1830 }
1831 
1832 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1833 {
1834 	int ret;
1835 
1836 	if (alloc_cwwb_sem(iommu))
1837 		return -ENOMEM;
1838 
1839 	if (alloc_command_buffer(iommu))
1840 		return -ENOMEM;
1841 
1842 	if (alloc_event_buffer(iommu))
1843 		return -ENOMEM;
1844 
1845 	iommu->int_enabled = false;
1846 
1847 	init_translation_status(iommu);
1848 	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1849 		iommu_disable(iommu);
1850 		clear_translation_pre_enabled(iommu);
1851 		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1852 			iommu->index);
1853 	}
1854 	if (amd_iommu_pre_enabled)
1855 		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1856 
1857 	if (amd_iommu_irq_remap) {
1858 		ret = amd_iommu_create_irq_domain(iommu);
1859 		if (ret)
1860 			return ret;
1861 	}
1862 
1863 	/*
1864 	 * Make sure IOMMU is not considered to translate itself. The IVRS
1865 	 * table tells us so, but this is a lie!
1866 	 */
1867 	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1868 
1869 	return 0;
1870 }
1871 
1872 /**
1873  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1874  * @ivrs: Pointer to the IVRS header
1875  *
1876  * This function search through all IVDB of the maximum supported IVHD
1877  */
1878 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1879 {
1880 	u8 *base = (u8 *)ivrs;
1881 	struct ivhd_header *ivhd = (struct ivhd_header *)
1882 					(base + IVRS_HEADER_LENGTH);
1883 	u8 last_type = ivhd->type;
1884 	u16 devid = ivhd->devid;
1885 
1886 	while (((u8 *)ivhd - base < ivrs->length) &&
1887 	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1888 		u8 *p = (u8 *) ivhd;
1889 
1890 		if (ivhd->devid == devid)
1891 			last_type = ivhd->type;
1892 		ivhd = (struct ivhd_header *)(p + ivhd->length);
1893 	}
1894 
1895 	return last_type;
1896 }
1897 
1898 /*
1899  * Iterates over all IOMMU entries in the ACPI table, allocates the
1900  * IOMMU structure and initializes it with init_iommu_one()
1901  */
1902 static int __init init_iommu_all(struct acpi_table_header *table)
1903 {
1904 	u8 *p = (u8 *)table, *end = (u8 *)table;
1905 	struct ivhd_header *h;
1906 	struct amd_iommu *iommu;
1907 	int ret;
1908 
1909 	end += table->length;
1910 	p += IVRS_HEADER_LENGTH;
1911 
1912 	/* Phase 1: Process all IVHD blocks */
1913 	while (p < end) {
1914 		h = (struct ivhd_header *)p;
1915 		if (*p == amd_iommu_target_ivhd_type) {
1916 
1917 			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1918 				    "flags: %01x info %04x\n",
1919 				    h->pci_seg, PCI_BUS_NUM(h->devid),
1920 				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1921 				    h->cap_ptr, h->flags, h->info);
1922 			DUMP_printk("       mmio-addr: %016llx\n",
1923 				    h->mmio_phys);
1924 
1925 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1926 			if (iommu == NULL)
1927 				return -ENOMEM;
1928 
1929 			ret = init_iommu_one(iommu, h, table);
1930 			if (ret)
1931 				return ret;
1932 		}
1933 		p += h->length;
1934 
1935 	}
1936 	WARN_ON(p != end);
1937 
1938 	/* Phase 2 : Early feature support check */
1939 	get_global_efr();
1940 
1941 	/* Phase 3 : Enabling IOMMU features */
1942 	for_each_iommu(iommu) {
1943 		ret = init_iommu_one_late(iommu);
1944 		if (ret)
1945 			return ret;
1946 	}
1947 
1948 	return 0;
1949 }
1950 
1951 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1952 {
1953 	u64 val;
1954 	struct pci_dev *pdev = iommu->dev;
1955 
1956 	if (!iommu_feature(iommu, FEATURE_PC))
1957 		return;
1958 
1959 	amd_iommu_pc_present = true;
1960 
1961 	pci_info(pdev, "IOMMU performance counters supported\n");
1962 
1963 	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1964 	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1965 	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1966 
1967 	return;
1968 }
1969 
1970 static ssize_t amd_iommu_show_cap(struct device *dev,
1971 				  struct device_attribute *attr,
1972 				  char *buf)
1973 {
1974 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1975 	return sysfs_emit(buf, "%x\n", iommu->cap);
1976 }
1977 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1978 
1979 static ssize_t amd_iommu_show_features(struct device *dev,
1980 				       struct device_attribute *attr,
1981 				       char *buf)
1982 {
1983 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1984 	return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features);
1985 }
1986 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1987 
1988 static struct attribute *amd_iommu_attrs[] = {
1989 	&dev_attr_cap.attr,
1990 	&dev_attr_features.attr,
1991 	NULL,
1992 };
1993 
1994 static struct attribute_group amd_iommu_group = {
1995 	.name = "amd-iommu",
1996 	.attrs = amd_iommu_attrs,
1997 };
1998 
1999 static const struct attribute_group *amd_iommu_groups[] = {
2000 	&amd_iommu_group,
2001 	NULL,
2002 };
2003 
2004 /*
2005  * Note: IVHD 0x11 and 0x40 also contains exact copy
2006  * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
2007  * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
2008  */
2009 static void __init late_iommu_features_init(struct amd_iommu *iommu)
2010 {
2011 	u64 features, features2;
2012 
2013 	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
2014 		return;
2015 
2016 	/* read extended feature bits */
2017 	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
2018 	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
2019 
2020 	if (!iommu->features) {
2021 		iommu->features = features;
2022 		iommu->features2 = features2;
2023 		return;
2024 	}
2025 
2026 	/*
2027 	 * Sanity check and warn if EFR values from
2028 	 * IVHD and MMIO conflict.
2029 	 */
2030 	if (features != iommu->features ||
2031 	    features2 != iommu->features2) {
2032 		pr_warn(FW_WARN
2033 			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2034 			features, iommu->features,
2035 			features2, iommu->features2);
2036 	}
2037 }
2038 
2039 static int __init iommu_init_pci(struct amd_iommu *iommu)
2040 {
2041 	int cap_ptr = iommu->cap_ptr;
2042 	int ret;
2043 
2044 	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2045 						 PCI_BUS_NUM(iommu->devid),
2046 						 iommu->devid & 0xff);
2047 	if (!iommu->dev)
2048 		return -ENODEV;
2049 
2050 	/* Prevent binding other PCI device drivers to IOMMU devices */
2051 	iommu->dev->match_driver = false;
2052 
2053 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2054 			      &iommu->cap);
2055 
2056 	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2057 		amd_iommu_iotlb_sup = false;
2058 
2059 	late_iommu_features_init(iommu);
2060 
2061 	if (iommu_feature(iommu, FEATURE_GT)) {
2062 		int glxval;
2063 		u32 max_pasid;
2064 		u64 pasmax;
2065 
2066 		pasmax = iommu->features & FEATURE_PASID_MASK;
2067 		pasmax >>= FEATURE_PASID_SHIFT;
2068 		max_pasid  = (1 << (pasmax + 1)) - 1;
2069 
2070 		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
2071 
2072 		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
2073 
2074 		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
2075 		glxval >>= FEATURE_GLXVAL_SHIFT;
2076 
2077 		if (amd_iommu_max_glx_val == -1)
2078 			amd_iommu_max_glx_val = glxval;
2079 		else
2080 			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2081 	}
2082 
2083 	if (iommu_feature(iommu, FEATURE_GT) &&
2084 	    iommu_feature(iommu, FEATURE_PPR)) {
2085 		iommu->is_iommu_v2   = true;
2086 		amd_iommu_v2_present = true;
2087 	}
2088 
2089 	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
2090 		return -ENOMEM;
2091 
2092 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2093 		pr_info("Using strict mode due to virtualization\n");
2094 		iommu_set_dma_strict();
2095 		amd_iommu_np_cache = true;
2096 	}
2097 
2098 	init_iommu_perf_ctr(iommu);
2099 
2100 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2101 		if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
2102 		    !iommu_feature(iommu, FEATURE_GT)) {
2103 			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
2104 			amd_iommu_pgtable = AMD_IOMMU_V1;
2105 		} else if (iommu_default_passthrough()) {
2106 			pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
2107 			amd_iommu_pgtable = AMD_IOMMU_V1;
2108 		}
2109 	}
2110 
2111 	if (is_rd890_iommu(iommu->dev)) {
2112 		int i, j;
2113 
2114 		iommu->root_pdev =
2115 			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2116 						    iommu->dev->bus->number,
2117 						    PCI_DEVFN(0, 0));
2118 
2119 		/*
2120 		 * Some rd890 systems may not be fully reconfigured by the
2121 		 * BIOS, so it's necessary for us to store this information so
2122 		 * it can be reprogrammed on resume
2123 		 */
2124 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2125 				&iommu->stored_addr_lo);
2126 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2127 				&iommu->stored_addr_hi);
2128 
2129 		/* Low bit locks writes to configuration space */
2130 		iommu->stored_addr_lo &= ~1;
2131 
2132 		for (i = 0; i < 6; i++)
2133 			for (j = 0; j < 0x12; j++)
2134 				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2135 
2136 		for (i = 0; i < 0x83; i++)
2137 			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2138 	}
2139 
2140 	amd_iommu_erratum_746_workaround(iommu);
2141 	amd_iommu_ats_write_check_workaround(iommu);
2142 
2143 	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2144 			       amd_iommu_groups, "ivhd%d", iommu->index);
2145 	if (ret)
2146 		return ret;
2147 
2148 	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2149 
2150 	return pci_enable_device(iommu->dev);
2151 }
2152 
2153 static void print_iommu_info(void)
2154 {
2155 	static const char * const feat_str[] = {
2156 		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2157 		"IA", "GA", "HE", "PC"
2158 	};
2159 	struct amd_iommu *iommu;
2160 
2161 	for_each_iommu(iommu) {
2162 		struct pci_dev *pdev = iommu->dev;
2163 		int i;
2164 
2165 		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
2166 
2167 		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
2168 			pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
2169 
2170 			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2171 				if (iommu_feature(iommu, (1ULL << i)))
2172 					pr_cont(" %s", feat_str[i]);
2173 			}
2174 
2175 			if (iommu->features & FEATURE_GAM_VAPIC)
2176 				pr_cont(" GA_vAPIC");
2177 
2178 			if (iommu->features & FEATURE_SNP)
2179 				pr_cont(" SNP");
2180 
2181 			pr_cont("\n");
2182 		}
2183 	}
2184 	if (irq_remapping_enabled) {
2185 		pr_info("Interrupt remapping enabled\n");
2186 		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2187 			pr_info("X2APIC enabled\n");
2188 	}
2189 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2190 		pr_info("V2 page table enabled (Paging mode : %d level)\n",
2191 			amd_iommu_gpt_level);
2192 	}
2193 }
2194 
2195 static int __init amd_iommu_init_pci(void)
2196 {
2197 	struct amd_iommu *iommu;
2198 	struct amd_iommu_pci_seg *pci_seg;
2199 	int ret;
2200 
2201 	for_each_iommu(iommu) {
2202 		ret = iommu_init_pci(iommu);
2203 		if (ret) {
2204 			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2205 			       iommu->index, ret);
2206 			goto out;
2207 		}
2208 		/* Need to setup range after PCI init */
2209 		iommu_set_cwwb_range(iommu);
2210 	}
2211 
2212 	/*
2213 	 * Order is important here to make sure any unity map requirements are
2214 	 * fulfilled. The unity mappings are created and written to the device
2215 	 * table during the iommu_init_pci() call.
2216 	 *
2217 	 * After that we call init_device_table_dma() to make sure any
2218 	 * uninitialized DTE will block DMA, and in the end we flush the caches
2219 	 * of all IOMMUs to make sure the changes to the device table are
2220 	 * active.
2221 	 */
2222 	for_each_pci_segment(pci_seg)
2223 		init_device_table_dma(pci_seg);
2224 
2225 	for_each_iommu(iommu)
2226 		iommu_flush_all_caches(iommu);
2227 
2228 	print_iommu_info();
2229 
2230 out:
2231 	return ret;
2232 }
2233 
2234 /****************************************************************************
2235  *
2236  * The following functions initialize the MSI interrupts for all IOMMUs
2237  * in the system. It's a bit challenging because there could be multiple
2238  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2239  * pci_dev.
2240  *
2241  ****************************************************************************/
2242 
2243 static int iommu_setup_msi(struct amd_iommu *iommu)
2244 {
2245 	int r;
2246 
2247 	r = pci_enable_msi(iommu->dev);
2248 	if (r)
2249 		return r;
2250 
2251 	r = request_threaded_irq(iommu->dev->irq,
2252 				 amd_iommu_int_handler,
2253 				 amd_iommu_int_thread,
2254 				 0, "AMD-Vi",
2255 				 iommu);
2256 
2257 	if (r) {
2258 		pci_disable_msi(iommu->dev);
2259 		return r;
2260 	}
2261 
2262 	return 0;
2263 }
2264 
2265 union intcapxt {
2266 	u64	capxt;
2267 	struct {
2268 		u64	reserved_0		:  2,
2269 			dest_mode_logical	:  1,
2270 			reserved_1		:  5,
2271 			destid_0_23		: 24,
2272 			vector			:  8,
2273 			reserved_2		: 16,
2274 			destid_24_31		:  8;
2275 	};
2276 } __attribute__ ((packed));
2277 
2278 
2279 static struct irq_chip intcapxt_controller;
2280 
2281 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2282 				       struct irq_data *irqd, bool reserve)
2283 {
2284 	return 0;
2285 }
2286 
2287 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2288 					  struct irq_data *irqd)
2289 {
2290 }
2291 
2292 
2293 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2294 				    unsigned int nr_irqs, void *arg)
2295 {
2296 	struct irq_alloc_info *info = arg;
2297 	int i, ret;
2298 
2299 	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2300 		return -EINVAL;
2301 
2302 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2303 	if (ret < 0)
2304 		return ret;
2305 
2306 	for (i = virq; i < virq + nr_irqs; i++) {
2307 		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2308 
2309 		irqd->chip = &intcapxt_controller;
2310 		irqd->chip_data = info->data;
2311 		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2312 	}
2313 
2314 	return ret;
2315 }
2316 
2317 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2318 				    unsigned int nr_irqs)
2319 {
2320 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2321 }
2322 
2323 
2324 static void intcapxt_unmask_irq(struct irq_data *irqd)
2325 {
2326 	struct amd_iommu *iommu = irqd->chip_data;
2327 	struct irq_cfg *cfg = irqd_cfg(irqd);
2328 	union intcapxt xt;
2329 
2330 	xt.capxt = 0ULL;
2331 	xt.dest_mode_logical = apic->dest_mode_logical;
2332 	xt.vector = cfg->vector;
2333 	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2334 	xt.destid_24_31 = cfg->dest_apicid >> 24;
2335 
2336 	/**
2337 	 * Current IOMMU implementation uses the same IRQ for all
2338 	 * 3 IOMMU interrupts.
2339 	 */
2340 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2341 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2342 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2343 }
2344 
2345 static void intcapxt_mask_irq(struct irq_data *irqd)
2346 {
2347 	struct amd_iommu *iommu = irqd->chip_data;
2348 
2349 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2350 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2351 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2352 }
2353 
2354 
2355 static int intcapxt_set_affinity(struct irq_data *irqd,
2356 				 const struct cpumask *mask, bool force)
2357 {
2358 	struct irq_data *parent = irqd->parent_data;
2359 	int ret;
2360 
2361 	ret = parent->chip->irq_set_affinity(parent, mask, force);
2362 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2363 		return ret;
2364 	return 0;
2365 }
2366 
2367 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2368 {
2369 	return on ? -EOPNOTSUPP : 0;
2370 }
2371 
2372 static struct irq_chip intcapxt_controller = {
2373 	.name			= "IOMMU-MSI",
2374 	.irq_unmask		= intcapxt_unmask_irq,
2375 	.irq_mask		= intcapxt_mask_irq,
2376 	.irq_ack		= irq_chip_ack_parent,
2377 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2378 	.irq_set_affinity       = intcapxt_set_affinity,
2379 	.irq_set_wake		= intcapxt_set_wake,
2380 	.flags			= IRQCHIP_MASK_ON_SUSPEND,
2381 };
2382 
2383 static const struct irq_domain_ops intcapxt_domain_ops = {
2384 	.alloc			= intcapxt_irqdomain_alloc,
2385 	.free			= intcapxt_irqdomain_free,
2386 	.activate		= intcapxt_irqdomain_activate,
2387 	.deactivate		= intcapxt_irqdomain_deactivate,
2388 };
2389 
2390 
2391 static struct irq_domain *iommu_irqdomain;
2392 
2393 static struct irq_domain *iommu_get_irqdomain(void)
2394 {
2395 	struct fwnode_handle *fn;
2396 
2397 	/* No need for locking here (yet) as the init is single-threaded */
2398 	if (iommu_irqdomain)
2399 		return iommu_irqdomain;
2400 
2401 	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2402 	if (!fn)
2403 		return NULL;
2404 
2405 	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2406 						      fn, &intcapxt_domain_ops,
2407 						      NULL);
2408 	if (!iommu_irqdomain)
2409 		irq_domain_free_fwnode(fn);
2410 
2411 	return iommu_irqdomain;
2412 }
2413 
2414 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2415 {
2416 	struct irq_domain *domain;
2417 	struct irq_alloc_info info;
2418 	int irq, ret;
2419 	int node = dev_to_node(&iommu->dev->dev);
2420 
2421 	domain = iommu_get_irqdomain();
2422 	if (!domain)
2423 		return -ENXIO;
2424 
2425 	init_irq_alloc_info(&info, NULL);
2426 	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2427 	info.data = iommu;
2428 
2429 	irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2430 	if (irq < 0) {
2431 		irq_domain_remove(domain);
2432 		return irq;
2433 	}
2434 
2435 	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2436 				   amd_iommu_int_thread, 0, "AMD-Vi", iommu);
2437 	if (ret) {
2438 		irq_domain_free_irqs(irq, 1);
2439 		irq_domain_remove(domain);
2440 		return ret;
2441 	}
2442 
2443 	return 0;
2444 }
2445 
2446 static int iommu_init_irq(struct amd_iommu *iommu)
2447 {
2448 	int ret;
2449 
2450 	if (iommu->int_enabled)
2451 		goto enable_faults;
2452 
2453 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2454 		ret = iommu_setup_intcapxt(iommu);
2455 	else if (iommu->dev->msi_cap)
2456 		ret = iommu_setup_msi(iommu);
2457 	else
2458 		ret = -ENODEV;
2459 
2460 	if (ret)
2461 		return ret;
2462 
2463 	iommu->int_enabled = true;
2464 enable_faults:
2465 
2466 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2467 		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2468 
2469 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2470 
2471 	if (iommu->ppr_log != NULL)
2472 		iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2473 	return 0;
2474 }
2475 
2476 /****************************************************************************
2477  *
2478  * The next functions belong to the third pass of parsing the ACPI
2479  * table. In this last pass the memory mapping requirements are
2480  * gathered (like exclusion and unity mapping ranges).
2481  *
2482  ****************************************************************************/
2483 
2484 static void __init free_unity_maps(void)
2485 {
2486 	struct unity_map_entry *entry, *next;
2487 	struct amd_iommu_pci_seg *p, *pci_seg;
2488 
2489 	for_each_pci_segment_safe(pci_seg, p) {
2490 		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2491 			list_del(&entry->list);
2492 			kfree(entry);
2493 		}
2494 	}
2495 }
2496 
2497 /* called for unity map ACPI definition */
2498 static int __init init_unity_map_range(struct ivmd_header *m,
2499 				       struct acpi_table_header *ivrs_base)
2500 {
2501 	struct unity_map_entry *e = NULL;
2502 	struct amd_iommu_pci_seg *pci_seg;
2503 	char *s;
2504 
2505 	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2506 	if (pci_seg == NULL)
2507 		return -ENOMEM;
2508 
2509 	e = kzalloc(sizeof(*e), GFP_KERNEL);
2510 	if (e == NULL)
2511 		return -ENOMEM;
2512 
2513 	switch (m->type) {
2514 	default:
2515 		kfree(e);
2516 		return 0;
2517 	case ACPI_IVMD_TYPE:
2518 		s = "IVMD_TYPEi\t\t\t";
2519 		e->devid_start = e->devid_end = m->devid;
2520 		break;
2521 	case ACPI_IVMD_TYPE_ALL:
2522 		s = "IVMD_TYPE_ALL\t\t";
2523 		e->devid_start = 0;
2524 		e->devid_end = pci_seg->last_bdf;
2525 		break;
2526 	case ACPI_IVMD_TYPE_RANGE:
2527 		s = "IVMD_TYPE_RANGE\t\t";
2528 		e->devid_start = m->devid;
2529 		e->devid_end = m->aux;
2530 		break;
2531 	}
2532 	e->address_start = PAGE_ALIGN(m->range_start);
2533 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2534 	e->prot = m->flags >> 1;
2535 
2536 	/*
2537 	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2538 	 * since some buggy BIOSes might lead to the overwritten exclusion
2539 	 * range (exclusion_start and exclusion_length members). This
2540 	 * happens when there are multiple exclusion ranges (IVMD entries)
2541 	 * defined in ACPI table.
2542 	 */
2543 	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2544 		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2545 
2546 	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2547 		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2548 		    " flags: %x\n", s, m->pci_seg,
2549 		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2550 		    PCI_FUNC(e->devid_start), m->pci_seg,
2551 		    PCI_BUS_NUM(e->devid_end),
2552 		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2553 		    e->address_start, e->address_end, m->flags);
2554 
2555 	list_add_tail(&e->list, &pci_seg->unity_map);
2556 
2557 	return 0;
2558 }
2559 
2560 /* iterates over all memory definitions we find in the ACPI table */
2561 static int __init init_memory_definitions(struct acpi_table_header *table)
2562 {
2563 	u8 *p = (u8 *)table, *end = (u8 *)table;
2564 	struct ivmd_header *m;
2565 
2566 	end += table->length;
2567 	p += IVRS_HEADER_LENGTH;
2568 
2569 	while (p < end) {
2570 		m = (struct ivmd_header *)p;
2571 		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2572 			init_unity_map_range(m, table);
2573 
2574 		p += m->length;
2575 	}
2576 
2577 	return 0;
2578 }
2579 
2580 /*
2581  * Init the device table to not allow DMA access for devices
2582  */
2583 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2584 {
2585 	u32 devid;
2586 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2587 
2588 	if (dev_table == NULL)
2589 		return;
2590 
2591 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2592 		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2593 		if (!amd_iommu_snp_en)
2594 			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2595 	}
2596 }
2597 
2598 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2599 {
2600 	u32 devid;
2601 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2602 
2603 	if (dev_table == NULL)
2604 		return;
2605 
2606 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2607 		dev_table[devid].data[0] = 0ULL;
2608 		dev_table[devid].data[1] = 0ULL;
2609 	}
2610 }
2611 
2612 static void init_device_table(void)
2613 {
2614 	struct amd_iommu_pci_seg *pci_seg;
2615 	u32 devid;
2616 
2617 	if (!amd_iommu_irq_remap)
2618 		return;
2619 
2620 	for_each_pci_segment(pci_seg) {
2621 		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2622 			__set_dev_entry_bit(pci_seg->dev_table,
2623 					    devid, DEV_ENTRY_IRQ_TBL_EN);
2624 	}
2625 }
2626 
2627 static void iommu_init_flags(struct amd_iommu *iommu)
2628 {
2629 	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2630 		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2631 		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2632 
2633 	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2634 		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2635 		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2636 
2637 	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2638 		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2639 		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2640 
2641 	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2642 		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2643 		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2644 
2645 	/*
2646 	 * make IOMMU memory accesses cache coherent
2647 	 */
2648 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2649 
2650 	/* Set IOTLB invalidation timeout to 1s */
2651 	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2652 }
2653 
2654 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2655 {
2656 	int i, j;
2657 	u32 ioc_feature_control;
2658 	struct pci_dev *pdev = iommu->root_pdev;
2659 
2660 	/* RD890 BIOSes may not have completely reconfigured the iommu */
2661 	if (!is_rd890_iommu(iommu->dev) || !pdev)
2662 		return;
2663 
2664 	/*
2665 	 * First, we need to ensure that the iommu is enabled. This is
2666 	 * controlled by a register in the northbridge
2667 	 */
2668 
2669 	/* Select Northbridge indirect register 0x75 and enable writing */
2670 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2671 	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2672 
2673 	/* Enable the iommu */
2674 	if (!(ioc_feature_control & 0x1))
2675 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2676 
2677 	/* Restore the iommu BAR */
2678 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2679 			       iommu->stored_addr_lo);
2680 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2681 			       iommu->stored_addr_hi);
2682 
2683 	/* Restore the l1 indirect regs for each of the 6 l1s */
2684 	for (i = 0; i < 6; i++)
2685 		for (j = 0; j < 0x12; j++)
2686 			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2687 
2688 	/* Restore the l2 indirect regs */
2689 	for (i = 0; i < 0x83; i++)
2690 		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2691 
2692 	/* Lock PCI setup registers */
2693 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2694 			       iommu->stored_addr_lo | 1);
2695 }
2696 
2697 static void iommu_enable_ga(struct amd_iommu *iommu)
2698 {
2699 #ifdef CONFIG_IRQ_REMAP
2700 	switch (amd_iommu_guest_ir) {
2701 	case AMD_IOMMU_GUEST_IR_VAPIC:
2702 	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2703 		iommu_feature_enable(iommu, CONTROL_GA_EN);
2704 		iommu->irte_ops = &irte_128_ops;
2705 		break;
2706 	default:
2707 		iommu->irte_ops = &irte_32_ops;
2708 		break;
2709 	}
2710 #endif
2711 }
2712 
2713 static void early_enable_iommu(struct amd_iommu *iommu)
2714 {
2715 	iommu_disable(iommu);
2716 	iommu_init_flags(iommu);
2717 	iommu_set_device_table(iommu);
2718 	iommu_enable_command_buffer(iommu);
2719 	iommu_enable_event_buffer(iommu);
2720 	iommu_set_exclusion_range(iommu);
2721 	iommu_enable_ga(iommu);
2722 	iommu_enable_xt(iommu);
2723 	iommu_enable(iommu);
2724 	iommu_flush_all_caches(iommu);
2725 }
2726 
2727 /*
2728  * This function finally enables all IOMMUs found in the system after
2729  * they have been initialized.
2730  *
2731  * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2732  * the old content of device table entries. Not this case or copy failed,
2733  * just continue as normal kernel does.
2734  */
2735 static void early_enable_iommus(void)
2736 {
2737 	struct amd_iommu *iommu;
2738 	struct amd_iommu_pci_seg *pci_seg;
2739 
2740 	if (!copy_device_table()) {
2741 		/*
2742 		 * If come here because of failure in copying device table from old
2743 		 * kernel with all IOMMUs enabled, print error message and try to
2744 		 * free allocated old_dev_tbl_cpy.
2745 		 */
2746 		if (amd_iommu_pre_enabled)
2747 			pr_err("Failed to copy DEV table from previous kernel.\n");
2748 
2749 		for_each_pci_segment(pci_seg) {
2750 			if (pci_seg->old_dev_tbl_cpy != NULL) {
2751 				free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
2752 						get_order(pci_seg->dev_table_size));
2753 				pci_seg->old_dev_tbl_cpy = NULL;
2754 			}
2755 		}
2756 
2757 		for_each_iommu(iommu) {
2758 			clear_translation_pre_enabled(iommu);
2759 			early_enable_iommu(iommu);
2760 		}
2761 	} else {
2762 		pr_info("Copied DEV table from previous kernel.\n");
2763 
2764 		for_each_pci_segment(pci_seg) {
2765 			free_pages((unsigned long)pci_seg->dev_table,
2766 				   get_order(pci_seg->dev_table_size));
2767 			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2768 		}
2769 
2770 		for_each_iommu(iommu) {
2771 			iommu_disable_command_buffer(iommu);
2772 			iommu_disable_event_buffer(iommu);
2773 			iommu_enable_command_buffer(iommu);
2774 			iommu_enable_event_buffer(iommu);
2775 			iommu_enable_ga(iommu);
2776 			iommu_enable_xt(iommu);
2777 			iommu_set_device_table(iommu);
2778 			iommu_flush_all_caches(iommu);
2779 		}
2780 	}
2781 }
2782 
2783 static void enable_iommus_v2(void)
2784 {
2785 	struct amd_iommu *iommu;
2786 
2787 	for_each_iommu(iommu) {
2788 		iommu_enable_ppr_log(iommu);
2789 		iommu_enable_gt(iommu);
2790 	}
2791 }
2792 
2793 static void enable_iommus_vapic(void)
2794 {
2795 #ifdef CONFIG_IRQ_REMAP
2796 	u32 status, i;
2797 	struct amd_iommu *iommu;
2798 
2799 	for_each_iommu(iommu) {
2800 		/*
2801 		 * Disable GALog if already running. It could have been enabled
2802 		 * in the previous boot before kdump.
2803 		 */
2804 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2805 		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2806 			continue;
2807 
2808 		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2809 		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2810 
2811 		/*
2812 		 * Need to set and poll check the GALOGRun bit to zero before
2813 		 * we can set/ modify GA Log registers safely.
2814 		 */
2815 		for (i = 0; i < LOOP_TIMEOUT; ++i) {
2816 			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2817 			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2818 				break;
2819 			udelay(10);
2820 		}
2821 
2822 		if (WARN_ON(i >= LOOP_TIMEOUT))
2823 			return;
2824 	}
2825 
2826 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2827 	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
2828 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2829 		return;
2830 	}
2831 
2832 	if (amd_iommu_snp_en &&
2833 	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2834 		pr_warn("Force to disable Virtual APIC due to SNP\n");
2835 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2836 		return;
2837 	}
2838 
2839 	/* Enabling GAM and SNPAVIC support */
2840 	for_each_iommu(iommu) {
2841 		if (iommu_init_ga_log(iommu) ||
2842 		    iommu_ga_log_enable(iommu))
2843 			return;
2844 
2845 		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2846 		if (amd_iommu_snp_en)
2847 			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2848 	}
2849 
2850 	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2851 	pr_info("Virtual APIC enabled\n");
2852 #endif
2853 }
2854 
2855 static void enable_iommus(void)
2856 {
2857 	early_enable_iommus();
2858 	enable_iommus_vapic();
2859 	enable_iommus_v2();
2860 }
2861 
2862 static void disable_iommus(void)
2863 {
2864 	struct amd_iommu *iommu;
2865 
2866 	for_each_iommu(iommu)
2867 		iommu_disable(iommu);
2868 
2869 #ifdef CONFIG_IRQ_REMAP
2870 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2871 		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2872 #endif
2873 }
2874 
2875 /*
2876  * Suspend/Resume support
2877  * disable suspend until real resume implemented
2878  */
2879 
2880 static void amd_iommu_resume(void)
2881 {
2882 	struct amd_iommu *iommu;
2883 
2884 	for_each_iommu(iommu)
2885 		iommu_apply_resume_quirks(iommu);
2886 
2887 	/* re-load the hardware */
2888 	enable_iommus();
2889 
2890 	amd_iommu_enable_interrupts();
2891 }
2892 
2893 static int amd_iommu_suspend(void)
2894 {
2895 	/* disable IOMMUs to go out of the way for BIOS */
2896 	disable_iommus();
2897 
2898 	return 0;
2899 }
2900 
2901 static struct syscore_ops amd_iommu_syscore_ops = {
2902 	.suspend = amd_iommu_suspend,
2903 	.resume = amd_iommu_resume,
2904 };
2905 
2906 static void __init free_iommu_resources(void)
2907 {
2908 	kmem_cache_destroy(amd_iommu_irq_cache);
2909 	amd_iommu_irq_cache = NULL;
2910 
2911 	free_iommu_all();
2912 	free_pci_segments();
2913 }
2914 
2915 /* SB IOAPIC is always on this device in AMD systems */
2916 #define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2917 
2918 static bool __init check_ioapic_information(void)
2919 {
2920 	const char *fw_bug = FW_BUG;
2921 	bool ret, has_sb_ioapic;
2922 	int idx;
2923 
2924 	has_sb_ioapic = false;
2925 	ret           = false;
2926 
2927 	/*
2928 	 * If we have map overrides on the kernel command line the
2929 	 * messages in this function might not describe firmware bugs
2930 	 * anymore - so be careful
2931 	 */
2932 	if (cmdline_maps)
2933 		fw_bug = "";
2934 
2935 	for (idx = 0; idx < nr_ioapics; idx++) {
2936 		int devid, id = mpc_ioapic_id(idx);
2937 
2938 		devid = get_ioapic_devid(id);
2939 		if (devid < 0) {
2940 			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2941 				fw_bug, id);
2942 			ret = false;
2943 		} else if (devid == IOAPIC_SB_DEVID) {
2944 			has_sb_ioapic = true;
2945 			ret           = true;
2946 		}
2947 	}
2948 
2949 	if (!has_sb_ioapic) {
2950 		/*
2951 		 * We expect the SB IOAPIC to be listed in the IVRS
2952 		 * table. The system timer is connected to the SB IOAPIC
2953 		 * and if we don't have it in the list the system will
2954 		 * panic at boot time.  This situation usually happens
2955 		 * when the BIOS is buggy and provides us the wrong
2956 		 * device id for the IOAPIC in the system.
2957 		 */
2958 		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2959 	}
2960 
2961 	if (!ret)
2962 		pr_err("Disabling interrupt remapping\n");
2963 
2964 	return ret;
2965 }
2966 
2967 static void __init free_dma_resources(void)
2968 {
2969 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2970 		   get_order(MAX_DOMAIN_ID/8));
2971 	amd_iommu_pd_alloc_bitmap = NULL;
2972 
2973 	free_unity_maps();
2974 }
2975 
2976 static void __init ivinfo_init(void *ivrs)
2977 {
2978 	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
2979 }
2980 
2981 /*
2982  * This is the hardware init function for AMD IOMMU in the system.
2983  * This function is called either from amd_iommu_init or from the interrupt
2984  * remapping setup code.
2985  *
2986  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2987  * four times:
2988  *
2989  *	1 pass) Discover the most comprehensive IVHD type to use.
2990  *
2991  *	2 pass) Find the highest PCI device id the driver has to handle.
2992  *		Upon this information the size of the data structures is
2993  *		determined that needs to be allocated.
2994  *
2995  *	3 pass) Initialize the data structures just allocated with the
2996  *		information in the ACPI table about available AMD IOMMUs
2997  *		in the system. It also maps the PCI devices in the
2998  *		system to specific IOMMUs
2999  *
3000  *	4 pass) After the basic data structures are allocated and
3001  *		initialized we update them with information about memory
3002  *		remapping requirements parsed out of the ACPI table in
3003  *		this last pass.
3004  *
3005  * After everything is set up the IOMMUs are enabled and the necessary
3006  * hotplug and suspend notifiers are registered.
3007  */
3008 static int __init early_amd_iommu_init(void)
3009 {
3010 	struct acpi_table_header *ivrs_base;
3011 	int remap_cache_sz, ret;
3012 	acpi_status status;
3013 
3014 	if (!amd_iommu_detected)
3015 		return -ENODEV;
3016 
3017 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3018 	if (status == AE_NOT_FOUND)
3019 		return -ENODEV;
3020 	else if (ACPI_FAILURE(status)) {
3021 		const char *err = acpi_format_exception(status);
3022 		pr_err("IVRS table error: %s\n", err);
3023 		return -EINVAL;
3024 	}
3025 
3026 	/*
3027 	 * Validate checksum here so we don't need to do it when
3028 	 * we actually parse the table
3029 	 */
3030 	ret = check_ivrs_checksum(ivrs_base);
3031 	if (ret)
3032 		goto out;
3033 
3034 	ivinfo_init(ivrs_base);
3035 
3036 	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3037 	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3038 
3039 	/* Device table - directly used by all IOMMUs */
3040 	ret = -ENOMEM;
3041 
3042 	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
3043 					    GFP_KERNEL | __GFP_ZERO,
3044 					    get_order(MAX_DOMAIN_ID/8));
3045 	if (amd_iommu_pd_alloc_bitmap == NULL)
3046 		goto out;
3047 
3048 	/*
3049 	 * never allocate domain 0 because its used as the non-allocated and
3050 	 * error value placeholder
3051 	 */
3052 	__set_bit(0, amd_iommu_pd_alloc_bitmap);
3053 
3054 	/*
3055 	 * now the data structures are allocated and basically initialized
3056 	 * start the real acpi table scan
3057 	 */
3058 	ret = init_iommu_all(ivrs_base);
3059 	if (ret)
3060 		goto out;
3061 
3062 	/* 5 level guest page table */
3063 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3064 	    check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL)
3065 		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3066 
3067 	/* Disable any previously enabled IOMMUs */
3068 	if (!is_kdump_kernel() || amd_iommu_disabled)
3069 		disable_iommus();
3070 
3071 	if (amd_iommu_irq_remap)
3072 		amd_iommu_irq_remap = check_ioapic_information();
3073 
3074 	if (amd_iommu_irq_remap) {
3075 		struct amd_iommu_pci_seg *pci_seg;
3076 		/*
3077 		 * Interrupt remapping enabled, create kmem_cache for the
3078 		 * remapping tables.
3079 		 */
3080 		ret = -ENOMEM;
3081 		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3082 			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3083 		else
3084 			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3085 		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3086 							remap_cache_sz,
3087 							DTE_INTTAB_ALIGNMENT,
3088 							0, NULL);
3089 		if (!amd_iommu_irq_cache)
3090 			goto out;
3091 
3092 		for_each_pci_segment(pci_seg) {
3093 			if (alloc_irq_lookup_table(pci_seg))
3094 				goto out;
3095 		}
3096 	}
3097 
3098 	ret = init_memory_definitions(ivrs_base);
3099 	if (ret)
3100 		goto out;
3101 
3102 	/* init the device table */
3103 	init_device_table();
3104 
3105 out:
3106 	/* Don't leak any ACPI memory */
3107 	acpi_put_table(ivrs_base);
3108 
3109 	return ret;
3110 }
3111 
3112 static int amd_iommu_enable_interrupts(void)
3113 {
3114 	struct amd_iommu *iommu;
3115 	int ret = 0;
3116 
3117 	for_each_iommu(iommu) {
3118 		ret = iommu_init_irq(iommu);
3119 		if (ret)
3120 			goto out;
3121 	}
3122 
3123 out:
3124 	return ret;
3125 }
3126 
3127 static bool __init detect_ivrs(void)
3128 {
3129 	struct acpi_table_header *ivrs_base;
3130 	acpi_status status;
3131 	int i;
3132 
3133 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3134 	if (status == AE_NOT_FOUND)
3135 		return false;
3136 	else if (ACPI_FAILURE(status)) {
3137 		const char *err = acpi_format_exception(status);
3138 		pr_err("IVRS table error: %s\n", err);
3139 		return false;
3140 	}
3141 
3142 	acpi_put_table(ivrs_base);
3143 
3144 	if (amd_iommu_force_enable)
3145 		goto out;
3146 
3147 	/* Don't use IOMMU if there is Stoney Ridge graphics */
3148 	for (i = 0; i < 32; i++) {
3149 		u32 pci_id;
3150 
3151 		pci_id = read_pci_config(0, i, 0, 0);
3152 		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3153 			pr_info("Disable IOMMU on Stoney Ridge\n");
3154 			return false;
3155 		}
3156 	}
3157 
3158 out:
3159 	/* Make sure ACS will be enabled during PCI probe */
3160 	pci_request_acs();
3161 
3162 	return true;
3163 }
3164 
3165 /****************************************************************************
3166  *
3167  * AMD IOMMU Initialization State Machine
3168  *
3169  ****************************************************************************/
3170 
3171 static int __init state_next(void)
3172 {
3173 	int ret = 0;
3174 
3175 	switch (init_state) {
3176 	case IOMMU_START_STATE:
3177 		if (!detect_ivrs()) {
3178 			init_state	= IOMMU_NOT_FOUND;
3179 			ret		= -ENODEV;
3180 		} else {
3181 			init_state	= IOMMU_IVRS_DETECTED;
3182 		}
3183 		break;
3184 	case IOMMU_IVRS_DETECTED:
3185 		if (amd_iommu_disabled) {
3186 			init_state = IOMMU_CMDLINE_DISABLED;
3187 			ret = -EINVAL;
3188 		} else {
3189 			ret = early_amd_iommu_init();
3190 			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3191 		}
3192 		break;
3193 	case IOMMU_ACPI_FINISHED:
3194 		early_enable_iommus();
3195 		x86_platform.iommu_shutdown = disable_iommus;
3196 		init_state = IOMMU_ENABLED;
3197 		break;
3198 	case IOMMU_ENABLED:
3199 		register_syscore_ops(&amd_iommu_syscore_ops);
3200 		ret = amd_iommu_init_pci();
3201 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3202 		enable_iommus_vapic();
3203 		enable_iommus_v2();
3204 		break;
3205 	case IOMMU_PCI_INIT:
3206 		ret = amd_iommu_enable_interrupts();
3207 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3208 		break;
3209 	case IOMMU_INTERRUPTS_EN:
3210 		init_state = IOMMU_INITIALIZED;
3211 		break;
3212 	case IOMMU_INITIALIZED:
3213 		/* Nothing to do */
3214 		break;
3215 	case IOMMU_NOT_FOUND:
3216 	case IOMMU_INIT_ERROR:
3217 	case IOMMU_CMDLINE_DISABLED:
3218 		/* Error states => do nothing */
3219 		ret = -EINVAL;
3220 		break;
3221 	default:
3222 		/* Unknown state */
3223 		BUG();
3224 	}
3225 
3226 	if (ret) {
3227 		free_dma_resources();
3228 		if (!irq_remapping_enabled) {
3229 			disable_iommus();
3230 			free_iommu_resources();
3231 		} else {
3232 			struct amd_iommu *iommu;
3233 			struct amd_iommu_pci_seg *pci_seg;
3234 
3235 			for_each_pci_segment(pci_seg)
3236 				uninit_device_table_dma(pci_seg);
3237 
3238 			for_each_iommu(iommu)
3239 				iommu_flush_all_caches(iommu);
3240 		}
3241 	}
3242 	return ret;
3243 }
3244 
3245 static int __init iommu_go_to_state(enum iommu_init_state state)
3246 {
3247 	int ret = -EINVAL;
3248 
3249 	while (init_state != state) {
3250 		if (init_state == IOMMU_NOT_FOUND         ||
3251 		    init_state == IOMMU_INIT_ERROR        ||
3252 		    init_state == IOMMU_CMDLINE_DISABLED)
3253 			break;
3254 		ret = state_next();
3255 	}
3256 
3257 	return ret;
3258 }
3259 
3260 #ifdef CONFIG_IRQ_REMAP
3261 int __init amd_iommu_prepare(void)
3262 {
3263 	int ret;
3264 
3265 	amd_iommu_irq_remap = true;
3266 
3267 	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3268 	if (ret) {
3269 		amd_iommu_irq_remap = false;
3270 		return ret;
3271 	}
3272 
3273 	return amd_iommu_irq_remap ? 0 : -ENODEV;
3274 }
3275 
3276 int __init amd_iommu_enable(void)
3277 {
3278 	int ret;
3279 
3280 	ret = iommu_go_to_state(IOMMU_ENABLED);
3281 	if (ret)
3282 		return ret;
3283 
3284 	irq_remapping_enabled = 1;
3285 	return amd_iommu_xt_mode;
3286 }
3287 
3288 void amd_iommu_disable(void)
3289 {
3290 	amd_iommu_suspend();
3291 }
3292 
3293 int amd_iommu_reenable(int mode)
3294 {
3295 	amd_iommu_resume();
3296 
3297 	return 0;
3298 }
3299 
3300 int __init amd_iommu_enable_faulting(void)
3301 {
3302 	/* We enable MSI later when PCI is initialized */
3303 	return 0;
3304 }
3305 #endif
3306 
3307 /*
3308  * This is the core init function for AMD IOMMU hardware in the system.
3309  * This function is called from the generic x86 DMA layer initialization
3310  * code.
3311  */
3312 static int __init amd_iommu_init(void)
3313 {
3314 	struct amd_iommu *iommu;
3315 	int ret;
3316 
3317 	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3318 #ifdef CONFIG_GART_IOMMU
3319 	if (ret && list_empty(&amd_iommu_list)) {
3320 		/*
3321 		 * We failed to initialize the AMD IOMMU - try fallback
3322 		 * to GART if possible.
3323 		 */
3324 		gart_iommu_init();
3325 	}
3326 #endif
3327 
3328 	for_each_iommu(iommu)
3329 		amd_iommu_debugfs_setup(iommu);
3330 
3331 	return ret;
3332 }
3333 
3334 static bool amd_iommu_sme_check(void)
3335 {
3336 	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3337 	    (boot_cpu_data.x86 != 0x17))
3338 		return true;
3339 
3340 	/* For Fam17h, a specific level of support is required */
3341 	if (boot_cpu_data.microcode >= 0x08001205)
3342 		return true;
3343 
3344 	if ((boot_cpu_data.microcode >= 0x08001126) &&
3345 	    (boot_cpu_data.microcode <= 0x080011ff))
3346 		return true;
3347 
3348 	pr_notice("IOMMU not currently supported when SME is active\n");
3349 
3350 	return false;
3351 }
3352 
3353 /****************************************************************************
3354  *
3355  * Early detect code. This code runs at IOMMU detection time in the DMA
3356  * layer. It just looks if there is an IVRS ACPI table to detect AMD
3357  * IOMMUs
3358  *
3359  ****************************************************************************/
3360 int __init amd_iommu_detect(void)
3361 {
3362 	int ret;
3363 
3364 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3365 		return -ENODEV;
3366 
3367 	if (!amd_iommu_sme_check())
3368 		return -ENODEV;
3369 
3370 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3371 	if (ret)
3372 		return ret;
3373 
3374 	amd_iommu_detected = true;
3375 	iommu_detected = 1;
3376 	x86_init.iommu.iommu_init = amd_iommu_init;
3377 
3378 	return 1;
3379 }
3380 
3381 /****************************************************************************
3382  *
3383  * Parsing functions for the AMD IOMMU specific kernel command line
3384  * options.
3385  *
3386  ****************************************************************************/
3387 
3388 static int __init parse_amd_iommu_dump(char *str)
3389 {
3390 	amd_iommu_dump = true;
3391 
3392 	return 1;
3393 }
3394 
3395 static int __init parse_amd_iommu_intr(char *str)
3396 {
3397 	for (; *str; ++str) {
3398 		if (strncmp(str, "legacy", 6) == 0) {
3399 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3400 			break;
3401 		}
3402 		if (strncmp(str, "vapic", 5) == 0) {
3403 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3404 			break;
3405 		}
3406 	}
3407 	return 1;
3408 }
3409 
3410 static int __init parse_amd_iommu_options(char *str)
3411 {
3412 	if (!str)
3413 		return -EINVAL;
3414 
3415 	while (*str) {
3416 		if (strncmp(str, "fullflush", 9) == 0) {
3417 			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3418 			iommu_set_dma_strict();
3419 		} else if (strncmp(str, "force_enable", 12) == 0) {
3420 			amd_iommu_force_enable = true;
3421 		} else if (strncmp(str, "off", 3) == 0) {
3422 			amd_iommu_disabled = true;
3423 		} else if (strncmp(str, "force_isolation", 15) == 0) {
3424 			amd_iommu_force_isolation = true;
3425 		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3426 			amd_iommu_pgtable = AMD_IOMMU_V1;
3427 		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3428 			amd_iommu_pgtable = AMD_IOMMU_V2;
3429 		} else {
3430 			pr_notice("Unknown option - '%s'\n", str);
3431 		}
3432 
3433 		str += strcspn(str, ",");
3434 		while (*str == ',')
3435 			str++;
3436 	}
3437 
3438 	return 1;
3439 }
3440 
3441 static int __init parse_ivrs_ioapic(char *str)
3442 {
3443 	u32 seg = 0, bus, dev, fn;
3444 	int id, i;
3445 	u32 devid;
3446 
3447 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3448 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3449 		goto found;
3450 
3451 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3452 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3453 		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3454 			str, id, seg, bus, dev, fn);
3455 		goto found;
3456 	}
3457 
3458 	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3459 	return 1;
3460 
3461 found:
3462 	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3463 		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3464 			str);
3465 		return 1;
3466 	}
3467 
3468 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3469 
3470 	cmdline_maps			= true;
3471 	i				= early_ioapic_map_size++;
3472 	early_ioapic_map[i].id		= id;
3473 	early_ioapic_map[i].devid	= devid;
3474 	early_ioapic_map[i].cmd_line	= true;
3475 
3476 	return 1;
3477 }
3478 
3479 static int __init parse_ivrs_hpet(char *str)
3480 {
3481 	u32 seg = 0, bus, dev, fn;
3482 	int id, i;
3483 	u32 devid;
3484 
3485 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3486 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3487 		goto found;
3488 
3489 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3490 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3491 		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3492 			str, id, seg, bus, dev, fn);
3493 		goto found;
3494 	}
3495 
3496 	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3497 	return 1;
3498 
3499 found:
3500 	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3501 		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3502 			str);
3503 		return 1;
3504 	}
3505 
3506 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3507 
3508 	cmdline_maps			= true;
3509 	i				= early_hpet_map_size++;
3510 	early_hpet_map[i].id		= id;
3511 	early_hpet_map[i].devid		= devid;
3512 	early_hpet_map[i].cmd_line	= true;
3513 
3514 	return 1;
3515 }
3516 
3517 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3518 
3519 static int __init parse_ivrs_acpihid(char *str)
3520 {
3521 	u32 seg = 0, bus, dev, fn;
3522 	char *hid, *uid, *p, *addr;
3523 	char acpiid[ACPIID_LEN] = {0};
3524 	int i;
3525 
3526 	addr = strchr(str, '@');
3527 	if (!addr) {
3528 		addr = strchr(str, '=');
3529 		if (!addr)
3530 			goto not_found;
3531 
3532 		++addr;
3533 
3534 		if (strlen(addr) > ACPIID_LEN)
3535 			goto not_found;
3536 
3537 		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3538 		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3539 			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3540 				str, acpiid, seg, bus, dev, fn);
3541 			goto found;
3542 		}
3543 		goto not_found;
3544 	}
3545 
3546 	/* We have the '@', make it the terminator to get just the acpiid */
3547 	*addr++ = 0;
3548 
3549 	if (strlen(str) > ACPIID_LEN + 1)
3550 		goto not_found;
3551 
3552 	if (sscanf(str, "=%s", acpiid) != 1)
3553 		goto not_found;
3554 
3555 	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3556 	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3557 		goto found;
3558 
3559 not_found:
3560 	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3561 	return 1;
3562 
3563 found:
3564 	p = acpiid;
3565 	hid = strsep(&p, ":");
3566 	uid = p;
3567 
3568 	if (!hid || !(*hid) || !uid) {
3569 		pr_err("Invalid command line: hid or uid\n");
3570 		return 1;
3571 	}
3572 
3573 	/*
3574 	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3575 	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3576 	 */
3577 	while (*uid == '0' && *(uid + 1))
3578 		uid++;
3579 
3580 	i = early_acpihid_map_size++;
3581 	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3582 	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3583 	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3584 	early_acpihid_map[i].cmd_line	= true;
3585 
3586 	return 1;
3587 }
3588 
3589 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
3590 __setup("amd_iommu=",		parse_amd_iommu_options);
3591 __setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3592 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
3593 __setup("ivrs_hpet",		parse_ivrs_hpet);
3594 __setup("ivrs_acpihid",		parse_ivrs_acpihid);
3595 
3596 bool amd_iommu_v2_supported(void)
3597 {
3598 	/* CPU page table size should match IOMMU guest page table size */
3599 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3600 	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3601 		return false;
3602 
3603 	/*
3604 	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3605 	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3606 	 * setting up IOMMUv1 page table.
3607 	 */
3608 	return amd_iommu_v2_present && !amd_iommu_snp_en;
3609 }
3610 EXPORT_SYMBOL(amd_iommu_v2_supported);
3611 
3612 struct amd_iommu *get_amd_iommu(unsigned int idx)
3613 {
3614 	unsigned int i = 0;
3615 	struct amd_iommu *iommu;
3616 
3617 	for_each_iommu(iommu)
3618 		if (i++ == idx)
3619 			return iommu;
3620 	return NULL;
3621 }
3622 
3623 /****************************************************************************
3624  *
3625  * IOMMU EFR Performance Counter support functionality. This code allows
3626  * access to the IOMMU PC functionality.
3627  *
3628  ****************************************************************************/
3629 
3630 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3631 {
3632 	struct amd_iommu *iommu = get_amd_iommu(idx);
3633 
3634 	if (iommu)
3635 		return iommu->max_banks;
3636 
3637 	return 0;
3638 }
3639 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3640 
3641 bool amd_iommu_pc_supported(void)
3642 {
3643 	return amd_iommu_pc_present;
3644 }
3645 EXPORT_SYMBOL(amd_iommu_pc_supported);
3646 
3647 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3648 {
3649 	struct amd_iommu *iommu = get_amd_iommu(idx);
3650 
3651 	if (iommu)
3652 		return iommu->max_counters;
3653 
3654 	return 0;
3655 }
3656 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3657 
3658 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3659 				u8 fxn, u64 *value, bool is_write)
3660 {
3661 	u32 offset;
3662 	u32 max_offset_lim;
3663 
3664 	/* Make sure the IOMMU PC resource is available */
3665 	if (!amd_iommu_pc_present)
3666 		return -ENODEV;
3667 
3668 	/* Check for valid iommu and pc register indexing */
3669 	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3670 		return -ENODEV;
3671 
3672 	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3673 
3674 	/* Limit the offset to the hw defined mmio region aperture */
3675 	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3676 				(iommu->max_counters << 8) | 0x28);
3677 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3678 	    (offset > max_offset_lim))
3679 		return -EINVAL;
3680 
3681 	if (is_write) {
3682 		u64 val = *value & GENMASK_ULL(47, 0);
3683 
3684 		writel((u32)val, iommu->mmio_base + offset);
3685 		writel((val >> 32), iommu->mmio_base + offset + 4);
3686 	} else {
3687 		*value = readl(iommu->mmio_base + offset + 4);
3688 		*value <<= 32;
3689 		*value |= readl(iommu->mmio_base + offset);
3690 		*value &= GENMASK_ULL(47, 0);
3691 	}
3692 
3693 	return 0;
3694 }
3695 
3696 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3697 {
3698 	if (!iommu)
3699 		return -EINVAL;
3700 
3701 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3702 }
3703 
3704 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3705 {
3706 	if (!iommu)
3707 		return -EINVAL;
3708 
3709 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3710 }
3711 
3712 #ifdef CONFIG_AMD_MEM_ENCRYPT
3713 int amd_iommu_snp_enable(void)
3714 {
3715 	/*
3716 	 * The SNP support requires that IOMMU must be enabled, and is
3717 	 * not configured in the passthrough mode.
3718 	 */
3719 	if (no_iommu || iommu_default_passthrough()) {
3720 		pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
3721 		return -EINVAL;
3722 	}
3723 
3724 	/*
3725 	 * Prevent enabling SNP after IOMMU_ENABLED state because this process
3726 	 * affect how IOMMU driver sets up data structures and configures
3727 	 * IOMMU hardware.
3728 	 */
3729 	if (init_state > IOMMU_ENABLED) {
3730 		pr_err("SNP: Too late to enable SNP for IOMMU.\n");
3731 		return -EINVAL;
3732 	}
3733 
3734 	amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
3735 	if (!amd_iommu_snp_en)
3736 		return -EINVAL;
3737 
3738 	pr_info("SNP enabled\n");
3739 
3740 	/* Enforce IOMMU v1 pagetable when SNP is enabled. */
3741 	if (amd_iommu_pgtable != AMD_IOMMU_V1) {
3742 		pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
3743 		amd_iommu_pgtable = AMD_IOMMU_V1;
3744 	}
3745 
3746 	return 0;
3747 }
3748 #endif
3749