xref: /openbmc/linux/drivers/iommu/amd/init.c (revision f5944964)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  *         Leo Duran <leo.duran@amd.com>
6  */
7 
8 #define pr_fmt(fmt)     "AMD-Vi: " fmt
9 #define dev_fmt(fmt)    pr_fmt(fmt)
10 
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/slab.h>
16 #include <linux/syscore_ops.h>
17 #include <linux/interrupt.h>
18 #include <linux/msi.h>
19 #include <linux/irq.h>
20 #include <linux/amd-iommu.h>
21 #include <linux/export.h>
22 #include <linux/kmemleak.h>
23 #include <linux/cc_platform.h>
24 #include <linux/iopoll.h>
25 #include <asm/pci-direct.h>
26 #include <asm/iommu.h>
27 #include <asm/apic.h>
28 #include <asm/gart.h>
29 #include <asm/x86_init.h>
30 #include <asm/io_apic.h>
31 #include <asm/irq_remapping.h>
32 #include <asm/set_memory.h>
33 
34 #include <linux/crash_dump.h>
35 
36 #include "amd_iommu.h"
37 #include "../irq_remapping.h"
38 
39 /*
40  * definitions for the ACPI scanning code
41  */
42 #define IVRS_HEADER_LENGTH 48
43 
44 #define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
45 #define ACPI_IVMD_TYPE_ALL              0x20
46 #define ACPI_IVMD_TYPE                  0x21
47 #define ACPI_IVMD_TYPE_RANGE            0x22
48 
49 #define IVHD_DEV_ALL                    0x01
50 #define IVHD_DEV_SELECT                 0x02
51 #define IVHD_DEV_SELECT_RANGE_START     0x03
52 #define IVHD_DEV_RANGE_END              0x04
53 #define IVHD_DEV_ALIAS                  0x42
54 #define IVHD_DEV_ALIAS_RANGE            0x43
55 #define IVHD_DEV_EXT_SELECT             0x46
56 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
57 #define IVHD_DEV_SPECIAL		0x48
58 #define IVHD_DEV_ACPI_HID		0xf0
59 
60 #define UID_NOT_PRESENT                 0
61 #define UID_IS_INTEGER                  1
62 #define UID_IS_CHARACTER                2
63 
64 #define IVHD_SPECIAL_IOAPIC		1
65 #define IVHD_SPECIAL_HPET		2
66 
67 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
68 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
69 #define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
70 #define IVHD_FLAG_ISOC_EN_MASK          0x08
71 
72 #define IVMD_FLAG_EXCL_RANGE            0x08
73 #define IVMD_FLAG_IW                    0x04
74 #define IVMD_FLAG_IR                    0x02
75 #define IVMD_FLAG_UNITY_MAP             0x01
76 
77 #define ACPI_DEVFLAG_INITPASS           0x01
78 #define ACPI_DEVFLAG_EXTINT             0x02
79 #define ACPI_DEVFLAG_NMI                0x04
80 #define ACPI_DEVFLAG_SYSMGT1            0x10
81 #define ACPI_DEVFLAG_SYSMGT2            0x20
82 #define ACPI_DEVFLAG_LINT0              0x40
83 #define ACPI_DEVFLAG_LINT1              0x80
84 #define ACPI_DEVFLAG_ATSDIS             0x10000000
85 
86 #define LOOP_TIMEOUT	2000000
87 
88 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
89 						 | ((dev & 0x1f) << 3) | (fn & 0x7))
90 
91 /*
92  * ACPI table definitions
93  *
94  * These data structures are laid over the table to parse the important values
95  * out of it.
96  */
97 
98 /*
99  * structure describing one IOMMU in the ACPI table. Typically followed by one
100  * or more ivhd_entrys.
101  */
102 struct ivhd_header {
103 	u8 type;
104 	u8 flags;
105 	u16 length;
106 	u16 devid;
107 	u16 cap_ptr;
108 	u64 mmio_phys;
109 	u16 pci_seg;
110 	u16 info;
111 	u32 efr_attr;
112 
113 	/* Following only valid on IVHD type 11h and 40h */
114 	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
115 	u64 efr_reg2;
116 } __attribute__((packed));
117 
118 /*
119  * A device entry describing which devices a specific IOMMU translates and
120  * which requestor ids they use.
121  */
122 struct ivhd_entry {
123 	u8 type;
124 	u16 devid;
125 	u8 flags;
126 	struct_group(ext_hid,
127 		u32 ext;
128 		u32 hidh;
129 	);
130 	u64 cid;
131 	u8 uidf;
132 	u8 uidl;
133 	u8 uid;
134 } __attribute__((packed));
135 
136 /*
137  * An AMD IOMMU memory definition structure. It defines things like exclusion
138  * ranges for devices and regions that should be unity mapped.
139  */
140 struct ivmd_header {
141 	u8 type;
142 	u8 flags;
143 	u16 length;
144 	u16 devid;
145 	u16 aux;
146 	u16 pci_seg;
147 	u8  resv[6];
148 	u64 range_start;
149 	u64 range_length;
150 } __attribute__((packed));
151 
152 bool amd_iommu_dump;
153 bool amd_iommu_irq_remap __read_mostly;
154 
155 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
156 /* Guest page table level */
157 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
158 
159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
161 
162 static bool amd_iommu_detected;
163 static bool amd_iommu_disabled __initdata;
164 static bool amd_iommu_force_enable __initdata;
165 static int amd_iommu_target_ivhd_type;
166 
167 /* Global EFR and EFR2 registers */
168 u64 amd_iommu_efr;
169 u64 amd_iommu_efr2;
170 
171 /* SNP is enabled on the system? */
172 bool amd_iommu_snp_en;
173 EXPORT_SYMBOL(amd_iommu_snp_en);
174 
175 LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
176 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
177 					   system */
178 
179 /* Array to assign indices to IOMMUs*/
180 struct amd_iommu *amd_iommus[MAX_IOMMUS];
181 
182 /* Number of IOMMUs present in the system */
183 static int amd_iommus_present;
184 
185 /* IOMMUs have a non-present cache? */
186 bool amd_iommu_np_cache __read_mostly;
187 bool amd_iommu_iotlb_sup __read_mostly = true;
188 
189 u32 amd_iommu_max_pasid __read_mostly = ~0;
190 
191 bool amd_iommu_v2_present __read_mostly;
192 static bool amd_iommu_pc_present __read_mostly;
193 bool amdr_ivrs_remap_support __read_mostly;
194 
195 bool amd_iommu_force_isolation __read_mostly;
196 
197 /*
198  * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
199  * to know which ones are already in use.
200  */
201 unsigned long *amd_iommu_pd_alloc_bitmap;
202 
203 enum iommu_init_state {
204 	IOMMU_START_STATE,
205 	IOMMU_IVRS_DETECTED,
206 	IOMMU_ACPI_FINISHED,
207 	IOMMU_ENABLED,
208 	IOMMU_PCI_INIT,
209 	IOMMU_INTERRUPTS_EN,
210 	IOMMU_INITIALIZED,
211 	IOMMU_NOT_FOUND,
212 	IOMMU_INIT_ERROR,
213 	IOMMU_CMDLINE_DISABLED,
214 };
215 
216 /* Early ioapic and hpet maps from kernel command line */
217 #define EARLY_MAP_SIZE		4
218 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
219 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
220 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
221 
222 static int __initdata early_ioapic_map_size;
223 static int __initdata early_hpet_map_size;
224 static int __initdata early_acpihid_map_size;
225 
226 static bool __initdata cmdline_maps;
227 
228 static enum iommu_init_state init_state = IOMMU_START_STATE;
229 
230 static int amd_iommu_enable_interrupts(void);
231 static int __init iommu_go_to_state(enum iommu_init_state state);
232 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
233 
234 static bool amd_iommu_pre_enabled = true;
235 
236 static u32 amd_iommu_ivinfo __initdata;
237 
238 bool translation_pre_enabled(struct amd_iommu *iommu)
239 {
240 	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
241 }
242 
243 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
244 {
245 	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
246 }
247 
248 static void init_translation_status(struct amd_iommu *iommu)
249 {
250 	u64 ctrl;
251 
252 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
253 	if (ctrl & (1<<CONTROL_IOMMU_EN))
254 		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
255 }
256 
257 static inline unsigned long tbl_size(int entry_size, int last_bdf)
258 {
259 	unsigned shift = PAGE_SHIFT +
260 			 get_order((last_bdf + 1) * entry_size);
261 
262 	return 1UL << shift;
263 }
264 
265 int amd_iommu_get_num_iommus(void)
266 {
267 	return amd_iommus_present;
268 }
269 
270 /*
271  * Iterate through all the IOMMUs to get common EFR
272  * masks among all IOMMUs and warn if found inconsistency.
273  */
274 static void get_global_efr(void)
275 {
276 	struct amd_iommu *iommu;
277 
278 	for_each_iommu(iommu) {
279 		u64 tmp = iommu->features;
280 		u64 tmp2 = iommu->features2;
281 
282 		if (list_is_first(&iommu->list, &amd_iommu_list)) {
283 			amd_iommu_efr = tmp;
284 			amd_iommu_efr2 = tmp2;
285 			continue;
286 		}
287 
288 		if (amd_iommu_efr == tmp &&
289 		    amd_iommu_efr2 == tmp2)
290 			continue;
291 
292 		pr_err(FW_BUG
293 		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
294 		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
295 		       iommu->index, iommu->pci_seg->id,
296 		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
297 		       PCI_FUNC(iommu->devid));
298 
299 		amd_iommu_efr &= tmp;
300 		amd_iommu_efr2 &= tmp2;
301 	}
302 
303 	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
304 }
305 
306 static bool check_feature_on_all_iommus(u64 mask)
307 {
308 	return !!(amd_iommu_efr & mask);
309 }
310 
311 static inline int check_feature_gpt_level(void)
312 {
313 	return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
314 }
315 
316 /*
317  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
318  * Default to IVHD EFR since it is available sooner
319  * (i.e. before PCI init).
320  */
321 static void __init early_iommu_features_init(struct amd_iommu *iommu,
322 					     struct ivhd_header *h)
323 {
324 	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
325 		iommu->features = h->efr_reg;
326 		iommu->features2 = h->efr_reg2;
327 	}
328 	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
329 		amdr_ivrs_remap_support = true;
330 }
331 
332 /* Access to l1 and l2 indexed register spaces */
333 
334 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
335 {
336 	u32 val;
337 
338 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
339 	pci_read_config_dword(iommu->dev, 0xfc, &val);
340 	return val;
341 }
342 
343 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
344 {
345 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
346 	pci_write_config_dword(iommu->dev, 0xfc, val);
347 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
348 }
349 
350 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
351 {
352 	u32 val;
353 
354 	pci_write_config_dword(iommu->dev, 0xf0, address);
355 	pci_read_config_dword(iommu->dev, 0xf4, &val);
356 	return val;
357 }
358 
359 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
360 {
361 	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
362 	pci_write_config_dword(iommu->dev, 0xf4, val);
363 }
364 
365 /****************************************************************************
366  *
367  * AMD IOMMU MMIO register space handling functions
368  *
369  * These functions are used to program the IOMMU device registers in
370  * MMIO space required for that driver.
371  *
372  ****************************************************************************/
373 
374 /*
375  * This function set the exclusion range in the IOMMU. DMA accesses to the
376  * exclusion range are passed through untranslated
377  */
378 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
379 {
380 	u64 start = iommu->exclusion_start & PAGE_MASK;
381 	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
382 	u64 entry;
383 
384 	if (!iommu->exclusion_start)
385 		return;
386 
387 	entry = start | MMIO_EXCL_ENABLE_MASK;
388 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
389 			&entry, sizeof(entry));
390 
391 	entry = limit;
392 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
393 			&entry, sizeof(entry));
394 }
395 
396 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
397 {
398 	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
399 	u64 entry = start & PM_ADDR_MASK;
400 
401 	if (!check_feature_on_all_iommus(FEATURE_SNP))
402 		return;
403 
404 	/* Note:
405 	 * Re-purpose Exclusion base/limit registers for Completion wait
406 	 * write-back base/limit.
407 	 */
408 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
409 		    &entry, sizeof(entry));
410 
411 	/* Note:
412 	 * Default to 4 Kbytes, which can be specified by setting base
413 	 * address equal to the limit address.
414 	 */
415 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
416 		    &entry, sizeof(entry));
417 }
418 
419 /* Programs the physical address of the device table into the IOMMU hardware */
420 static void iommu_set_device_table(struct amd_iommu *iommu)
421 {
422 	u64 entry;
423 	u32 dev_table_size = iommu->pci_seg->dev_table_size;
424 	void *dev_table = (void *)get_dev_table(iommu);
425 
426 	BUG_ON(iommu->mmio_base == NULL);
427 
428 	entry = iommu_virt_to_phys(dev_table);
429 	entry |= (dev_table_size >> 12) - 1;
430 	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
431 			&entry, sizeof(entry));
432 }
433 
434 /* Generic functions to enable/disable certain features of the IOMMU. */
435 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
436 {
437 	u64 ctrl;
438 
439 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
440 	ctrl |= (1ULL << bit);
441 	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
442 }
443 
444 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
445 {
446 	u64 ctrl;
447 
448 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
449 	ctrl &= ~(1ULL << bit);
450 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
451 }
452 
453 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
454 {
455 	u64 ctrl;
456 
457 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
458 	ctrl &= ~CTRL_INV_TO_MASK;
459 	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
460 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
461 }
462 
463 /* Function to enable the hardware */
464 static void iommu_enable(struct amd_iommu *iommu)
465 {
466 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
467 }
468 
469 static void iommu_disable(struct amd_iommu *iommu)
470 {
471 	if (!iommu->mmio_base)
472 		return;
473 
474 	/* Disable command buffer */
475 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
476 
477 	/* Disable event logging and event interrupts */
478 	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
479 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
480 
481 	/* Disable IOMMU GA_LOG */
482 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
483 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
484 
485 	/* Disable IOMMU hardware itself */
486 	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
487 }
488 
489 /*
490  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
491  * the system has one.
492  */
493 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
494 {
495 	if (!request_mem_region(address, end, "amd_iommu")) {
496 		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
497 			address, end);
498 		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
499 		return NULL;
500 	}
501 
502 	return (u8 __iomem *)ioremap(address, end);
503 }
504 
505 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
506 {
507 	if (iommu->mmio_base)
508 		iounmap(iommu->mmio_base);
509 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
510 }
511 
512 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
513 {
514 	u32 size = 0;
515 
516 	switch (h->type) {
517 	case 0x10:
518 		size = 24;
519 		break;
520 	case 0x11:
521 	case 0x40:
522 		size = 40;
523 		break;
524 	}
525 	return size;
526 }
527 
528 /****************************************************************************
529  *
530  * The functions below belong to the first pass of AMD IOMMU ACPI table
531  * parsing. In this pass we try to find out the highest device id this
532  * code has to handle. Upon this information the size of the shared data
533  * structures is determined later.
534  *
535  ****************************************************************************/
536 
537 /*
538  * This function calculates the length of a given IVHD entry
539  */
540 static inline int ivhd_entry_length(u8 *ivhd)
541 {
542 	u32 type = ((struct ivhd_entry *)ivhd)->type;
543 
544 	if (type < 0x80) {
545 		return 0x04 << (*ivhd >> 6);
546 	} else if (type == IVHD_DEV_ACPI_HID) {
547 		/* For ACPI_HID, offset 21 is uid len */
548 		return *((u8 *)ivhd + 21) + 22;
549 	}
550 	return 0;
551 }
552 
553 /*
554  * After reading the highest device id from the IOMMU PCI capability header
555  * this function looks if there is a higher device id defined in the ACPI table
556  */
557 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
558 {
559 	u8 *p = (void *)h, *end = (void *)h;
560 	struct ivhd_entry *dev;
561 	int last_devid = -EINVAL;
562 
563 	u32 ivhd_size = get_ivhd_header_size(h);
564 
565 	if (!ivhd_size) {
566 		pr_err("Unsupported IVHD type %#x\n", h->type);
567 		return -EINVAL;
568 	}
569 
570 	p += ivhd_size;
571 	end += h->length;
572 
573 	while (p < end) {
574 		dev = (struct ivhd_entry *)p;
575 		switch (dev->type) {
576 		case IVHD_DEV_ALL:
577 			/* Use maximum BDF value for DEV_ALL */
578 			return 0xffff;
579 		case IVHD_DEV_SELECT:
580 		case IVHD_DEV_RANGE_END:
581 		case IVHD_DEV_ALIAS:
582 		case IVHD_DEV_EXT_SELECT:
583 			/* all the above subfield types refer to device ids */
584 			if (dev->devid > last_devid)
585 				last_devid = dev->devid;
586 			break;
587 		default:
588 			break;
589 		}
590 		p += ivhd_entry_length(p);
591 	}
592 
593 	WARN_ON(p != end);
594 
595 	return last_devid;
596 }
597 
598 static int __init check_ivrs_checksum(struct acpi_table_header *table)
599 {
600 	int i;
601 	u8 checksum = 0, *p = (u8 *)table;
602 
603 	for (i = 0; i < table->length; ++i)
604 		checksum += p[i];
605 	if (checksum != 0) {
606 		/* ACPI table corrupt */
607 		pr_err(FW_BUG "IVRS invalid checksum\n");
608 		return -ENODEV;
609 	}
610 
611 	return 0;
612 }
613 
614 /*
615  * Iterate over all IVHD entries in the ACPI table and find the highest device
616  * id which we need to handle. This is the first of three functions which parse
617  * the ACPI table. So we check the checksum here.
618  */
619 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
620 {
621 	u8 *p = (u8 *)table, *end = (u8 *)table;
622 	struct ivhd_header *h;
623 	int last_devid, last_bdf = 0;
624 
625 	p += IVRS_HEADER_LENGTH;
626 
627 	end += table->length;
628 	while (p < end) {
629 		h = (struct ivhd_header *)p;
630 		if (h->pci_seg == pci_seg &&
631 		    h->type == amd_iommu_target_ivhd_type) {
632 			last_devid = find_last_devid_from_ivhd(h);
633 
634 			if (last_devid < 0)
635 				return -EINVAL;
636 			if (last_devid > last_bdf)
637 				last_bdf = last_devid;
638 		}
639 		p += h->length;
640 	}
641 	WARN_ON(p != end);
642 
643 	return last_bdf;
644 }
645 
646 /****************************************************************************
647  *
648  * The following functions belong to the code path which parses the ACPI table
649  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
650  * data structures, initialize the per PCI segment device/alias/rlookup table
651  * and also basically initialize the hardware.
652  *
653  ****************************************************************************/
654 
655 /* Allocate per PCI segment device table */
656 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
657 {
658 	pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
659 						      get_order(pci_seg->dev_table_size));
660 	if (!pci_seg->dev_table)
661 		return -ENOMEM;
662 
663 	return 0;
664 }
665 
666 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
667 {
668 	free_pages((unsigned long)pci_seg->dev_table,
669 		    get_order(pci_seg->dev_table_size));
670 	pci_seg->dev_table = NULL;
671 }
672 
673 /* Allocate per PCI segment IOMMU rlookup table. */
674 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
675 {
676 	pci_seg->rlookup_table = (void *)__get_free_pages(
677 						GFP_KERNEL | __GFP_ZERO,
678 						get_order(pci_seg->rlookup_table_size));
679 	if (pci_seg->rlookup_table == NULL)
680 		return -ENOMEM;
681 
682 	return 0;
683 }
684 
685 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
686 {
687 	free_pages((unsigned long)pci_seg->rlookup_table,
688 		   get_order(pci_seg->rlookup_table_size));
689 	pci_seg->rlookup_table = NULL;
690 }
691 
692 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
693 {
694 	pci_seg->irq_lookup_table = (void *)__get_free_pages(
695 					     GFP_KERNEL | __GFP_ZERO,
696 					     get_order(pci_seg->rlookup_table_size));
697 	kmemleak_alloc(pci_seg->irq_lookup_table,
698 		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
699 	if (pci_seg->irq_lookup_table == NULL)
700 		return -ENOMEM;
701 
702 	return 0;
703 }
704 
705 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
706 {
707 	kmemleak_free(pci_seg->irq_lookup_table);
708 	free_pages((unsigned long)pci_seg->irq_lookup_table,
709 		   get_order(pci_seg->rlookup_table_size));
710 	pci_seg->irq_lookup_table = NULL;
711 }
712 
713 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
714 {
715 	int i;
716 
717 	pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
718 					get_order(pci_seg->alias_table_size));
719 	if (!pci_seg->alias_table)
720 		return -ENOMEM;
721 
722 	/*
723 	 * let all alias entries point to itself
724 	 */
725 	for (i = 0; i <= pci_seg->last_bdf; ++i)
726 		pci_seg->alias_table[i] = i;
727 
728 	return 0;
729 }
730 
731 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
732 {
733 	free_pages((unsigned long)pci_seg->alias_table,
734 		   get_order(pci_seg->alias_table_size));
735 	pci_seg->alias_table = NULL;
736 }
737 
738 /*
739  * Allocates the command buffer. This buffer is per AMD IOMMU. We can
740  * write commands to that buffer later and the IOMMU will execute them
741  * asynchronously
742  */
743 static int __init alloc_command_buffer(struct amd_iommu *iommu)
744 {
745 	iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
746 						  get_order(CMD_BUFFER_SIZE));
747 
748 	return iommu->cmd_buf ? 0 : -ENOMEM;
749 }
750 
751 /*
752  * This function restarts event logging in case the IOMMU experienced
753  * an event log buffer overflow.
754  */
755 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
756 {
757 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
758 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
759 }
760 
761 /*
762  * This function resets the command buffer if the IOMMU stopped fetching
763  * commands from it.
764  */
765 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
766 {
767 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
768 
769 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
770 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
771 	iommu->cmd_buf_head = 0;
772 	iommu->cmd_buf_tail = 0;
773 
774 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
775 }
776 
777 /*
778  * This function writes the command buffer address to the hardware and
779  * enables it.
780  */
781 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
782 {
783 	u64 entry;
784 
785 	BUG_ON(iommu->cmd_buf == NULL);
786 
787 	entry = iommu_virt_to_phys(iommu->cmd_buf);
788 	entry |= MMIO_CMD_SIZE_512;
789 
790 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
791 		    &entry, sizeof(entry));
792 
793 	amd_iommu_reset_cmd_buffer(iommu);
794 }
795 
796 /*
797  * This function disables the command buffer
798  */
799 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
800 {
801 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
802 }
803 
804 static void __init free_command_buffer(struct amd_iommu *iommu)
805 {
806 	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
807 }
808 
809 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
810 					 gfp_t gfp, size_t size)
811 {
812 	int order = get_order(size);
813 	void *buf = (void *)__get_free_pages(gfp, order);
814 
815 	if (buf &&
816 	    check_feature_on_all_iommus(FEATURE_SNP) &&
817 	    set_memory_4k((unsigned long)buf, (1 << order))) {
818 		free_pages((unsigned long)buf, order);
819 		buf = NULL;
820 	}
821 
822 	return buf;
823 }
824 
825 /* allocates the memory where the IOMMU will log its events to */
826 static int __init alloc_event_buffer(struct amd_iommu *iommu)
827 {
828 	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
829 					      EVT_BUFFER_SIZE);
830 
831 	return iommu->evt_buf ? 0 : -ENOMEM;
832 }
833 
834 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
835 {
836 	u64 entry;
837 
838 	BUG_ON(iommu->evt_buf == NULL);
839 
840 	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
841 
842 	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
843 		    &entry, sizeof(entry));
844 
845 	/* set head and tail to zero manually */
846 	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
847 	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
848 
849 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
850 }
851 
852 /*
853  * This function disables the event log buffer
854  */
855 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
856 {
857 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
858 }
859 
860 static void __init free_event_buffer(struct amd_iommu *iommu)
861 {
862 	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
863 }
864 
865 /* allocates the memory where the IOMMU will log its events to */
866 static int __init alloc_ppr_log(struct amd_iommu *iommu)
867 {
868 	iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
869 					      PPR_LOG_SIZE);
870 
871 	return iommu->ppr_log ? 0 : -ENOMEM;
872 }
873 
874 static void iommu_enable_ppr_log(struct amd_iommu *iommu)
875 {
876 	u64 entry;
877 
878 	if (iommu->ppr_log == NULL)
879 		return;
880 
881 	entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
882 
883 	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
884 		    &entry, sizeof(entry));
885 
886 	/* set head and tail to zero manually */
887 	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
888 	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
889 
890 	iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
891 	iommu_feature_enable(iommu, CONTROL_PPR_EN);
892 }
893 
894 static void __init free_ppr_log(struct amd_iommu *iommu)
895 {
896 	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
897 }
898 
899 static void free_ga_log(struct amd_iommu *iommu)
900 {
901 #ifdef CONFIG_IRQ_REMAP
902 	free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
903 	free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
904 #endif
905 }
906 
907 #ifdef CONFIG_IRQ_REMAP
908 static int iommu_ga_log_enable(struct amd_iommu *iommu)
909 {
910 	u32 status, i;
911 	u64 entry;
912 
913 	if (!iommu->ga_log)
914 		return -EINVAL;
915 
916 	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
917 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
918 		    &entry, sizeof(entry));
919 	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
920 		 (BIT_ULL(52)-1)) & ~7ULL;
921 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
922 		    &entry, sizeof(entry));
923 	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
924 	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
925 
926 
927 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
928 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
929 
930 	for (i = 0; i < LOOP_TIMEOUT; ++i) {
931 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
932 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
933 			break;
934 		udelay(10);
935 	}
936 
937 	if (WARN_ON(i >= LOOP_TIMEOUT))
938 		return -EINVAL;
939 
940 	return 0;
941 }
942 
943 static int iommu_init_ga_log(struct amd_iommu *iommu)
944 {
945 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
946 		return 0;
947 
948 	iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
949 					get_order(GA_LOG_SIZE));
950 	if (!iommu->ga_log)
951 		goto err_out;
952 
953 	iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
954 					get_order(8));
955 	if (!iommu->ga_log_tail)
956 		goto err_out;
957 
958 	return 0;
959 err_out:
960 	free_ga_log(iommu);
961 	return -EINVAL;
962 }
963 #endif /* CONFIG_IRQ_REMAP */
964 
965 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
966 {
967 	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
968 
969 	return iommu->cmd_sem ? 0 : -ENOMEM;
970 }
971 
972 static void __init free_cwwb_sem(struct amd_iommu *iommu)
973 {
974 	if (iommu->cmd_sem)
975 		free_page((unsigned long)iommu->cmd_sem);
976 }
977 
978 static void iommu_enable_xt(struct amd_iommu *iommu)
979 {
980 #ifdef CONFIG_IRQ_REMAP
981 	/*
982 	 * XT mode (32-bit APIC destination ID) requires
983 	 * GA mode (128-bit IRTE support) as a prerequisite.
984 	 */
985 	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
986 	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
987 		iommu_feature_enable(iommu, CONTROL_XT_EN);
988 #endif /* CONFIG_IRQ_REMAP */
989 }
990 
991 static void iommu_enable_gt(struct amd_iommu *iommu)
992 {
993 	if (!iommu_feature(iommu, FEATURE_GT))
994 		return;
995 
996 	iommu_feature_enable(iommu, CONTROL_GT_EN);
997 }
998 
999 /* sets a specific bit in the device table entry. */
1000 static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
1001 				u16 devid, u8 bit)
1002 {
1003 	int i = (bit >> 6) & 0x03;
1004 	int _bit = bit & 0x3f;
1005 
1006 	dev_table[devid].data[i] |= (1UL << _bit);
1007 }
1008 
1009 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1010 {
1011 	struct dev_table_entry *dev_table = get_dev_table(iommu);
1012 
1013 	return __set_dev_entry_bit(dev_table, devid, bit);
1014 }
1015 
1016 static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1017 			       u16 devid, u8 bit)
1018 {
1019 	int i = (bit >> 6) & 0x03;
1020 	int _bit = bit & 0x3f;
1021 
1022 	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1023 }
1024 
1025 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1026 {
1027 	struct dev_table_entry *dev_table = get_dev_table(iommu);
1028 
1029 	return __get_dev_entry_bit(dev_table, devid, bit);
1030 }
1031 
1032 static bool __copy_device_table(struct amd_iommu *iommu)
1033 {
1034 	u64 int_ctl, int_tab_len, entry = 0;
1035 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1036 	struct dev_table_entry *old_devtb = NULL;
1037 	u32 lo, hi, devid, old_devtb_size;
1038 	phys_addr_t old_devtb_phys;
1039 	u16 dom_id, dte_v, irq_v;
1040 	gfp_t gfp_flag;
1041 	u64 tmp;
1042 
1043 	/* Each IOMMU use separate device table with the same size */
1044 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1045 	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1046 	entry = (((u64) hi) << 32) + lo;
1047 
1048 	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1049 	if (old_devtb_size != pci_seg->dev_table_size) {
1050 		pr_err("The device table size of IOMMU:%d is not expected!\n",
1051 			iommu->index);
1052 		return false;
1053 	}
1054 
1055 	/*
1056 	 * When SME is enabled in the first kernel, the entry includes the
1057 	 * memory encryption mask(sme_me_mask), we must remove the memory
1058 	 * encryption mask to obtain the true physical address in kdump kernel.
1059 	 */
1060 	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1061 
1062 	if (old_devtb_phys >= 0x100000000ULL) {
1063 		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1064 		return false;
1065 	}
1066 	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1067 		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
1068 							pci_seg->dev_table_size)
1069 		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1070 
1071 	if (!old_devtb)
1072 		return false;
1073 
1074 	gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
1075 	pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
1076 						    get_order(pci_seg->dev_table_size));
1077 	if (pci_seg->old_dev_tbl_cpy == NULL) {
1078 		pr_err("Failed to allocate memory for copying old device table!\n");
1079 		memunmap(old_devtb);
1080 		return false;
1081 	}
1082 
1083 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1084 		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1085 		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1086 		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1087 
1088 		if (dte_v && dom_id) {
1089 			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1090 			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1091 			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
1092 			/* If gcr3 table existed, mask it out */
1093 			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1094 				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1095 				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1096 				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1097 				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1098 				tmp |= DTE_FLAG_GV;
1099 				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1100 			}
1101 		}
1102 
1103 		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1104 		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1105 		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1106 		if (irq_v && (int_ctl || int_tab_len)) {
1107 			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1108 			    (int_tab_len != DTE_INTTABLEN)) {
1109 				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1110 				memunmap(old_devtb);
1111 				return false;
1112 			}
1113 
1114 			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1115 		}
1116 	}
1117 	memunmap(old_devtb);
1118 
1119 	return true;
1120 }
1121 
1122 static bool copy_device_table(void)
1123 {
1124 	struct amd_iommu *iommu;
1125 	struct amd_iommu_pci_seg *pci_seg;
1126 
1127 	if (!amd_iommu_pre_enabled)
1128 		return false;
1129 
1130 	pr_warn("Translation is already enabled - trying to copy translation structures\n");
1131 
1132 	/*
1133 	 * All IOMMUs within PCI segment shares common device table.
1134 	 * Hence copy device table only once per PCI segment.
1135 	 */
1136 	for_each_pci_segment(pci_seg) {
1137 		for_each_iommu(iommu) {
1138 			if (pci_seg->id != iommu->pci_seg->id)
1139 				continue;
1140 			if (!__copy_device_table(iommu))
1141 				return false;
1142 			break;
1143 		}
1144 	}
1145 
1146 	return true;
1147 }
1148 
1149 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1150 {
1151 	int sysmgt;
1152 
1153 	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1154 		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1155 
1156 	if (sysmgt == 0x01)
1157 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1158 }
1159 
1160 /*
1161  * This function takes the device specific flags read from the ACPI
1162  * table and sets up the device table entry with that information
1163  */
1164 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1165 					   u16 devid, u32 flags, u32 ext_flags)
1166 {
1167 	if (flags & ACPI_DEVFLAG_INITPASS)
1168 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1169 	if (flags & ACPI_DEVFLAG_EXTINT)
1170 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1171 	if (flags & ACPI_DEVFLAG_NMI)
1172 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1173 	if (flags & ACPI_DEVFLAG_SYSMGT1)
1174 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1175 	if (flags & ACPI_DEVFLAG_SYSMGT2)
1176 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1177 	if (flags & ACPI_DEVFLAG_LINT0)
1178 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1179 	if (flags & ACPI_DEVFLAG_LINT1)
1180 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1181 
1182 	amd_iommu_apply_erratum_63(iommu, devid);
1183 
1184 	amd_iommu_set_rlookup_table(iommu, devid);
1185 }
1186 
1187 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1188 {
1189 	struct devid_map *entry;
1190 	struct list_head *list;
1191 
1192 	if (type == IVHD_SPECIAL_IOAPIC)
1193 		list = &ioapic_map;
1194 	else if (type == IVHD_SPECIAL_HPET)
1195 		list = &hpet_map;
1196 	else
1197 		return -EINVAL;
1198 
1199 	list_for_each_entry(entry, list, list) {
1200 		if (!(entry->id == id && entry->cmd_line))
1201 			continue;
1202 
1203 		pr_info("Command-line override present for %s id %d - ignoring\n",
1204 			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1205 
1206 		*devid = entry->devid;
1207 
1208 		return 0;
1209 	}
1210 
1211 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1212 	if (!entry)
1213 		return -ENOMEM;
1214 
1215 	entry->id	= id;
1216 	entry->devid	= *devid;
1217 	entry->cmd_line	= cmd_line;
1218 
1219 	list_add_tail(&entry->list, list);
1220 
1221 	return 0;
1222 }
1223 
1224 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1225 				      bool cmd_line)
1226 {
1227 	struct acpihid_map_entry *entry;
1228 	struct list_head *list = &acpihid_map;
1229 
1230 	list_for_each_entry(entry, list, list) {
1231 		if (strcmp(entry->hid, hid) ||
1232 		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1233 		    !entry->cmd_line)
1234 			continue;
1235 
1236 		pr_info("Command-line override for hid:%s uid:%s\n",
1237 			hid, uid);
1238 		*devid = entry->devid;
1239 		return 0;
1240 	}
1241 
1242 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1243 	if (!entry)
1244 		return -ENOMEM;
1245 
1246 	memcpy(entry->uid, uid, strlen(uid));
1247 	memcpy(entry->hid, hid, strlen(hid));
1248 	entry->devid = *devid;
1249 	entry->cmd_line	= cmd_line;
1250 	entry->root_devid = (entry->devid & (~0x7));
1251 
1252 	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1253 		entry->cmd_line ? "cmd" : "ivrs",
1254 		entry->hid, entry->uid, entry->root_devid);
1255 
1256 	list_add_tail(&entry->list, list);
1257 	return 0;
1258 }
1259 
1260 static int __init add_early_maps(void)
1261 {
1262 	int i, ret;
1263 
1264 	for (i = 0; i < early_ioapic_map_size; ++i) {
1265 		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1266 					 early_ioapic_map[i].id,
1267 					 &early_ioapic_map[i].devid,
1268 					 early_ioapic_map[i].cmd_line);
1269 		if (ret)
1270 			return ret;
1271 	}
1272 
1273 	for (i = 0; i < early_hpet_map_size; ++i) {
1274 		ret = add_special_device(IVHD_SPECIAL_HPET,
1275 					 early_hpet_map[i].id,
1276 					 &early_hpet_map[i].devid,
1277 					 early_hpet_map[i].cmd_line);
1278 		if (ret)
1279 			return ret;
1280 	}
1281 
1282 	for (i = 0; i < early_acpihid_map_size; ++i) {
1283 		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1284 					  early_acpihid_map[i].uid,
1285 					  &early_acpihid_map[i].devid,
1286 					  early_acpihid_map[i].cmd_line);
1287 		if (ret)
1288 			return ret;
1289 	}
1290 
1291 	return 0;
1292 }
1293 
1294 /*
1295  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1296  * initializes the hardware and our data structures with it.
1297  */
1298 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1299 					struct ivhd_header *h)
1300 {
1301 	u8 *p = (u8 *)h;
1302 	u8 *end = p, flags = 0;
1303 	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1304 	u32 dev_i, ext_flags = 0;
1305 	bool alias = false;
1306 	struct ivhd_entry *e;
1307 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1308 	u32 ivhd_size;
1309 	int ret;
1310 
1311 
1312 	ret = add_early_maps();
1313 	if (ret)
1314 		return ret;
1315 
1316 	amd_iommu_apply_ivrs_quirks();
1317 
1318 	/*
1319 	 * First save the recommended feature enable bits from ACPI
1320 	 */
1321 	iommu->acpi_flags = h->flags;
1322 
1323 	/*
1324 	 * Done. Now parse the device entries
1325 	 */
1326 	ivhd_size = get_ivhd_header_size(h);
1327 	if (!ivhd_size) {
1328 		pr_err("Unsupported IVHD type %#x\n", h->type);
1329 		return -EINVAL;
1330 	}
1331 
1332 	p += ivhd_size;
1333 
1334 	end += h->length;
1335 
1336 
1337 	while (p < end) {
1338 		e = (struct ivhd_entry *)p;
1339 		seg_id = pci_seg->id;
1340 
1341 		switch (e->type) {
1342 		case IVHD_DEV_ALL:
1343 
1344 			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1345 
1346 			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1347 				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1348 			break;
1349 		case IVHD_DEV_SELECT:
1350 
1351 			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1352 				    "flags: %02x\n",
1353 				    seg_id, PCI_BUS_NUM(e->devid),
1354 				    PCI_SLOT(e->devid),
1355 				    PCI_FUNC(e->devid),
1356 				    e->flags);
1357 
1358 			devid = e->devid;
1359 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1360 			break;
1361 		case IVHD_DEV_SELECT_RANGE_START:
1362 
1363 			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1364 				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
1365 				    seg_id, PCI_BUS_NUM(e->devid),
1366 				    PCI_SLOT(e->devid),
1367 				    PCI_FUNC(e->devid),
1368 				    e->flags);
1369 
1370 			devid_start = e->devid;
1371 			flags = e->flags;
1372 			ext_flags = 0;
1373 			alias = false;
1374 			break;
1375 		case IVHD_DEV_ALIAS:
1376 
1377 			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1378 				    "flags: %02x devid_to: %02x:%02x.%x\n",
1379 				    seg_id, PCI_BUS_NUM(e->devid),
1380 				    PCI_SLOT(e->devid),
1381 				    PCI_FUNC(e->devid),
1382 				    e->flags,
1383 				    PCI_BUS_NUM(e->ext >> 8),
1384 				    PCI_SLOT(e->ext >> 8),
1385 				    PCI_FUNC(e->ext >> 8));
1386 
1387 			devid = e->devid;
1388 			devid_to = e->ext >> 8;
1389 			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1390 			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1391 			pci_seg->alias_table[devid] = devid_to;
1392 			break;
1393 		case IVHD_DEV_ALIAS_RANGE:
1394 
1395 			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1396 				    "devid: %04x:%02x:%02x.%x flags: %02x "
1397 				    "devid_to: %04x:%02x:%02x.%x\n",
1398 				    seg_id, PCI_BUS_NUM(e->devid),
1399 				    PCI_SLOT(e->devid),
1400 				    PCI_FUNC(e->devid),
1401 				    e->flags,
1402 				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1403 				    PCI_SLOT(e->ext >> 8),
1404 				    PCI_FUNC(e->ext >> 8));
1405 
1406 			devid_start = e->devid;
1407 			flags = e->flags;
1408 			devid_to = e->ext >> 8;
1409 			ext_flags = 0;
1410 			alias = true;
1411 			break;
1412 		case IVHD_DEV_EXT_SELECT:
1413 
1414 			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1415 				    "flags: %02x ext: %08x\n",
1416 				    seg_id, PCI_BUS_NUM(e->devid),
1417 				    PCI_SLOT(e->devid),
1418 				    PCI_FUNC(e->devid),
1419 				    e->flags, e->ext);
1420 
1421 			devid = e->devid;
1422 			set_dev_entry_from_acpi(iommu, devid, e->flags,
1423 						e->ext);
1424 			break;
1425 		case IVHD_DEV_EXT_SELECT_RANGE:
1426 
1427 			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1428 				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1429 				    seg_id, PCI_BUS_NUM(e->devid),
1430 				    PCI_SLOT(e->devid),
1431 				    PCI_FUNC(e->devid),
1432 				    e->flags, e->ext);
1433 
1434 			devid_start = e->devid;
1435 			flags = e->flags;
1436 			ext_flags = e->ext;
1437 			alias = false;
1438 			break;
1439 		case IVHD_DEV_RANGE_END:
1440 
1441 			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1442 				    seg_id, PCI_BUS_NUM(e->devid),
1443 				    PCI_SLOT(e->devid),
1444 				    PCI_FUNC(e->devid));
1445 
1446 			devid = e->devid;
1447 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1448 				if (alias) {
1449 					pci_seg->alias_table[dev_i] = devid_to;
1450 					set_dev_entry_from_acpi(iommu,
1451 						devid_to, flags, ext_flags);
1452 				}
1453 				set_dev_entry_from_acpi(iommu, dev_i,
1454 							flags, ext_flags);
1455 			}
1456 			break;
1457 		case IVHD_DEV_SPECIAL: {
1458 			u8 handle, type;
1459 			const char *var;
1460 			u32 devid;
1461 			int ret;
1462 
1463 			handle = e->ext & 0xff;
1464 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1465 			type   = (e->ext >> 24) & 0xff;
1466 
1467 			if (type == IVHD_SPECIAL_IOAPIC)
1468 				var = "IOAPIC";
1469 			else if (type == IVHD_SPECIAL_HPET)
1470 				var = "HPET";
1471 			else
1472 				var = "UNKNOWN";
1473 
1474 			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1475 				    var, (int)handle,
1476 				    seg_id, PCI_BUS_NUM(devid),
1477 				    PCI_SLOT(devid),
1478 				    PCI_FUNC(devid));
1479 
1480 			ret = add_special_device(type, handle, &devid, false);
1481 			if (ret)
1482 				return ret;
1483 
1484 			/*
1485 			 * add_special_device might update the devid in case a
1486 			 * command-line override is present. So call
1487 			 * set_dev_entry_from_acpi after add_special_device.
1488 			 */
1489 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1490 
1491 			break;
1492 		}
1493 		case IVHD_DEV_ACPI_HID: {
1494 			u32 devid;
1495 			u8 hid[ACPIHID_HID_LEN];
1496 			u8 uid[ACPIHID_UID_LEN];
1497 			int ret;
1498 
1499 			if (h->type != 0x40) {
1500 				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1501 				       e->type);
1502 				break;
1503 			}
1504 
1505 			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1506 			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1507 			hid[ACPIHID_HID_LEN - 1] = '\0';
1508 
1509 			if (!(*hid)) {
1510 				pr_err(FW_BUG "Invalid HID.\n");
1511 				break;
1512 			}
1513 
1514 			uid[0] = '\0';
1515 			switch (e->uidf) {
1516 			case UID_NOT_PRESENT:
1517 
1518 				if (e->uidl != 0)
1519 					pr_warn(FW_BUG "Invalid UID length.\n");
1520 
1521 				break;
1522 			case UID_IS_INTEGER:
1523 
1524 				sprintf(uid, "%d", e->uid);
1525 
1526 				break;
1527 			case UID_IS_CHARACTER:
1528 
1529 				memcpy(uid, &e->uid, e->uidl);
1530 				uid[e->uidl] = '\0';
1531 
1532 				break;
1533 			default:
1534 				break;
1535 			}
1536 
1537 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1538 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1539 				    hid, uid, seg_id,
1540 				    PCI_BUS_NUM(devid),
1541 				    PCI_SLOT(devid),
1542 				    PCI_FUNC(devid));
1543 
1544 			flags = e->flags;
1545 
1546 			ret = add_acpi_hid_device(hid, uid, &devid, false);
1547 			if (ret)
1548 				return ret;
1549 
1550 			/*
1551 			 * add_special_device might update the devid in case a
1552 			 * command-line override is present. So call
1553 			 * set_dev_entry_from_acpi after add_special_device.
1554 			 */
1555 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1556 
1557 			break;
1558 		}
1559 		default:
1560 			break;
1561 		}
1562 
1563 		p += ivhd_entry_length(p);
1564 	}
1565 
1566 	return 0;
1567 }
1568 
1569 /* Allocate PCI segment data structure */
1570 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1571 					  struct acpi_table_header *ivrs_base)
1572 {
1573 	struct amd_iommu_pci_seg *pci_seg;
1574 	int last_bdf;
1575 
1576 	/*
1577 	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1578 	 * handle in this PCI segment. Upon this information the shared data
1579 	 * structures for the PCI segments in the system will be allocated.
1580 	 */
1581 	last_bdf = find_last_devid_acpi(ivrs_base, id);
1582 	if (last_bdf < 0)
1583 		return NULL;
1584 
1585 	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1586 	if (pci_seg == NULL)
1587 		return NULL;
1588 
1589 	pci_seg->last_bdf = last_bdf;
1590 	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1591 	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1592 	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1593 	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1594 
1595 	pci_seg->id = id;
1596 	init_llist_head(&pci_seg->dev_data_list);
1597 	INIT_LIST_HEAD(&pci_seg->unity_map);
1598 	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1599 
1600 	if (alloc_dev_table(pci_seg))
1601 		return NULL;
1602 	if (alloc_alias_table(pci_seg))
1603 		return NULL;
1604 	if (alloc_rlookup_table(pci_seg))
1605 		return NULL;
1606 
1607 	return pci_seg;
1608 }
1609 
1610 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1611 					struct acpi_table_header *ivrs_base)
1612 {
1613 	struct amd_iommu_pci_seg *pci_seg;
1614 
1615 	for_each_pci_segment(pci_seg) {
1616 		if (pci_seg->id == id)
1617 			return pci_seg;
1618 	}
1619 
1620 	return alloc_pci_segment(id, ivrs_base);
1621 }
1622 
1623 static void __init free_pci_segments(void)
1624 {
1625 	struct amd_iommu_pci_seg *pci_seg, *next;
1626 
1627 	for_each_pci_segment_safe(pci_seg, next) {
1628 		list_del(&pci_seg->list);
1629 		free_irq_lookup_table(pci_seg);
1630 		free_rlookup_table(pci_seg);
1631 		free_alias_table(pci_seg);
1632 		free_dev_table(pci_seg);
1633 		kfree(pci_seg);
1634 	}
1635 }
1636 
1637 static void __init free_iommu_one(struct amd_iommu *iommu)
1638 {
1639 	free_cwwb_sem(iommu);
1640 	free_command_buffer(iommu);
1641 	free_event_buffer(iommu);
1642 	free_ppr_log(iommu);
1643 	free_ga_log(iommu);
1644 	iommu_unmap_mmio_space(iommu);
1645 }
1646 
1647 static void __init free_iommu_all(void)
1648 {
1649 	struct amd_iommu *iommu, *next;
1650 
1651 	for_each_iommu_safe(iommu, next) {
1652 		list_del(&iommu->list);
1653 		free_iommu_one(iommu);
1654 		kfree(iommu);
1655 	}
1656 }
1657 
1658 /*
1659  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1660  * Workaround:
1661  *     BIOS should disable L2B micellaneous clock gating by setting
1662  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1663  */
1664 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1665 {
1666 	u32 value;
1667 
1668 	if ((boot_cpu_data.x86 != 0x15) ||
1669 	    (boot_cpu_data.x86_model < 0x10) ||
1670 	    (boot_cpu_data.x86_model > 0x1f))
1671 		return;
1672 
1673 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1674 	pci_read_config_dword(iommu->dev, 0xf4, &value);
1675 
1676 	if (value & BIT(2))
1677 		return;
1678 
1679 	/* Select NB indirect register 0x90 and enable writing */
1680 	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1681 
1682 	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1683 	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1684 
1685 	/* Clear the enable writing bit */
1686 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1687 }
1688 
1689 /*
1690  * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1691  * Workaround:
1692  *     BIOS should enable ATS write permission check by setting
1693  *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1694  */
1695 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1696 {
1697 	u32 value;
1698 
1699 	if ((boot_cpu_data.x86 != 0x15) ||
1700 	    (boot_cpu_data.x86_model < 0x30) ||
1701 	    (boot_cpu_data.x86_model > 0x3f))
1702 		return;
1703 
1704 	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1705 	value = iommu_read_l2(iommu, 0x47);
1706 
1707 	if (value & BIT(0))
1708 		return;
1709 
1710 	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1711 	iommu_write_l2(iommu, 0x47, value | BIT(0));
1712 
1713 	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1714 }
1715 
1716 /*
1717  * This function glues the initialization function for one IOMMU
1718  * together and also allocates the command buffer and programs the
1719  * hardware. It does NOT enable the IOMMU. This is done afterwards.
1720  */
1721 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1722 				 struct acpi_table_header *ivrs_base)
1723 {
1724 	struct amd_iommu_pci_seg *pci_seg;
1725 
1726 	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1727 	if (pci_seg == NULL)
1728 		return -ENOMEM;
1729 	iommu->pci_seg = pci_seg;
1730 
1731 	raw_spin_lock_init(&iommu->lock);
1732 	iommu->cmd_sem_val = 0;
1733 
1734 	/* Add IOMMU to internal data structures */
1735 	list_add_tail(&iommu->list, &amd_iommu_list);
1736 	iommu->index = amd_iommus_present++;
1737 
1738 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1739 		WARN(1, "System has more IOMMUs than supported by this driver\n");
1740 		return -ENOSYS;
1741 	}
1742 
1743 	/* Index is fine - add IOMMU to the array */
1744 	amd_iommus[iommu->index] = iommu;
1745 
1746 	/*
1747 	 * Copy data from ACPI table entry to the iommu struct
1748 	 */
1749 	iommu->devid   = h->devid;
1750 	iommu->cap_ptr = h->cap_ptr;
1751 	iommu->mmio_phys = h->mmio_phys;
1752 
1753 	switch (h->type) {
1754 	case 0x10:
1755 		/* Check if IVHD EFR contains proper max banks/counters */
1756 		if ((h->efr_attr != 0) &&
1757 		    ((h->efr_attr & (0xF << 13)) != 0) &&
1758 		    ((h->efr_attr & (0x3F << 17)) != 0))
1759 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1760 		else
1761 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1762 
1763 		/*
1764 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1765 		 * GAM also requires GA mode. Therefore, we need to
1766 		 * check cmpxchg16b support before enabling it.
1767 		 */
1768 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1769 		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1770 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1771 		break;
1772 	case 0x11:
1773 	case 0x40:
1774 		if (h->efr_reg & (1 << 9))
1775 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1776 		else
1777 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1778 
1779 		/*
1780 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1781 		 * XT, GAM also requires GA mode. Therefore, we need to
1782 		 * check cmpxchg16b support before enabling them.
1783 		 */
1784 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1785 		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1786 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1787 			break;
1788 		}
1789 
1790 		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1791 			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1792 
1793 		early_iommu_features_init(iommu, h);
1794 
1795 		break;
1796 	default:
1797 		return -EINVAL;
1798 	}
1799 
1800 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1801 						iommu->mmio_phys_end);
1802 	if (!iommu->mmio_base)
1803 		return -ENOMEM;
1804 
1805 	return init_iommu_from_acpi(iommu, h);
1806 }
1807 
1808 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1809 {
1810 	int ret;
1811 
1812 	if (alloc_cwwb_sem(iommu))
1813 		return -ENOMEM;
1814 
1815 	if (alloc_command_buffer(iommu))
1816 		return -ENOMEM;
1817 
1818 	if (alloc_event_buffer(iommu))
1819 		return -ENOMEM;
1820 
1821 	iommu->int_enabled = false;
1822 
1823 	init_translation_status(iommu);
1824 	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1825 		iommu_disable(iommu);
1826 		clear_translation_pre_enabled(iommu);
1827 		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1828 			iommu->index);
1829 	}
1830 	if (amd_iommu_pre_enabled)
1831 		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1832 
1833 	if (amd_iommu_irq_remap) {
1834 		ret = amd_iommu_create_irq_domain(iommu);
1835 		if (ret)
1836 			return ret;
1837 	}
1838 
1839 	/*
1840 	 * Make sure IOMMU is not considered to translate itself. The IVRS
1841 	 * table tells us so, but this is a lie!
1842 	 */
1843 	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1844 
1845 	return 0;
1846 }
1847 
1848 /**
1849  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1850  * @ivrs: Pointer to the IVRS header
1851  *
1852  * This function search through all IVDB of the maximum supported IVHD
1853  */
1854 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1855 {
1856 	u8 *base = (u8 *)ivrs;
1857 	struct ivhd_header *ivhd = (struct ivhd_header *)
1858 					(base + IVRS_HEADER_LENGTH);
1859 	u8 last_type = ivhd->type;
1860 	u16 devid = ivhd->devid;
1861 
1862 	while (((u8 *)ivhd - base < ivrs->length) &&
1863 	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1864 		u8 *p = (u8 *) ivhd;
1865 
1866 		if (ivhd->devid == devid)
1867 			last_type = ivhd->type;
1868 		ivhd = (struct ivhd_header *)(p + ivhd->length);
1869 	}
1870 
1871 	return last_type;
1872 }
1873 
1874 /*
1875  * Iterates over all IOMMU entries in the ACPI table, allocates the
1876  * IOMMU structure and initializes it with init_iommu_one()
1877  */
1878 static int __init init_iommu_all(struct acpi_table_header *table)
1879 {
1880 	u8 *p = (u8 *)table, *end = (u8 *)table;
1881 	struct ivhd_header *h;
1882 	struct amd_iommu *iommu;
1883 	int ret;
1884 
1885 	end += table->length;
1886 	p += IVRS_HEADER_LENGTH;
1887 
1888 	/* Phase 1: Process all IVHD blocks */
1889 	while (p < end) {
1890 		h = (struct ivhd_header *)p;
1891 		if (*p == amd_iommu_target_ivhd_type) {
1892 
1893 			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1894 				    "flags: %01x info %04x\n",
1895 				    h->pci_seg, PCI_BUS_NUM(h->devid),
1896 				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1897 				    h->cap_ptr, h->flags, h->info);
1898 			DUMP_printk("       mmio-addr: %016llx\n",
1899 				    h->mmio_phys);
1900 
1901 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1902 			if (iommu == NULL)
1903 				return -ENOMEM;
1904 
1905 			ret = init_iommu_one(iommu, h, table);
1906 			if (ret)
1907 				return ret;
1908 		}
1909 		p += h->length;
1910 
1911 	}
1912 	WARN_ON(p != end);
1913 
1914 	/* Phase 2 : Early feature support check */
1915 	get_global_efr();
1916 
1917 	/* Phase 3 : Enabling IOMMU features */
1918 	for_each_iommu(iommu) {
1919 		ret = init_iommu_one_late(iommu);
1920 		if (ret)
1921 			return ret;
1922 	}
1923 
1924 	return 0;
1925 }
1926 
1927 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1928 {
1929 	u64 val;
1930 	struct pci_dev *pdev = iommu->dev;
1931 
1932 	if (!iommu_feature(iommu, FEATURE_PC))
1933 		return;
1934 
1935 	amd_iommu_pc_present = true;
1936 
1937 	pci_info(pdev, "IOMMU performance counters supported\n");
1938 
1939 	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1940 	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1941 	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1942 
1943 	return;
1944 }
1945 
1946 static ssize_t amd_iommu_show_cap(struct device *dev,
1947 				  struct device_attribute *attr,
1948 				  char *buf)
1949 {
1950 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1951 	return sprintf(buf, "%x\n", iommu->cap);
1952 }
1953 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1954 
1955 static ssize_t amd_iommu_show_features(struct device *dev,
1956 				       struct device_attribute *attr,
1957 				       char *buf)
1958 {
1959 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1960 	return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features);
1961 }
1962 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1963 
1964 static struct attribute *amd_iommu_attrs[] = {
1965 	&dev_attr_cap.attr,
1966 	&dev_attr_features.attr,
1967 	NULL,
1968 };
1969 
1970 static struct attribute_group amd_iommu_group = {
1971 	.name = "amd-iommu",
1972 	.attrs = amd_iommu_attrs,
1973 };
1974 
1975 static const struct attribute_group *amd_iommu_groups[] = {
1976 	&amd_iommu_group,
1977 	NULL,
1978 };
1979 
1980 /*
1981  * Note: IVHD 0x11 and 0x40 also contains exact copy
1982  * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1983  * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1984  */
1985 static void __init late_iommu_features_init(struct amd_iommu *iommu)
1986 {
1987 	u64 features, features2;
1988 
1989 	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
1990 		return;
1991 
1992 	/* read extended feature bits */
1993 	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1994 	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
1995 
1996 	if (!iommu->features) {
1997 		iommu->features = features;
1998 		iommu->features2 = features2;
1999 		return;
2000 	}
2001 
2002 	/*
2003 	 * Sanity check and warn if EFR values from
2004 	 * IVHD and MMIO conflict.
2005 	 */
2006 	if (features != iommu->features ||
2007 	    features2 != iommu->features2) {
2008 		pr_warn(FW_WARN
2009 			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2010 			features, iommu->features,
2011 			features2, iommu->features2);
2012 	}
2013 }
2014 
2015 static int __init iommu_init_pci(struct amd_iommu *iommu)
2016 {
2017 	int cap_ptr = iommu->cap_ptr;
2018 	int ret;
2019 
2020 	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2021 						 PCI_BUS_NUM(iommu->devid),
2022 						 iommu->devid & 0xff);
2023 	if (!iommu->dev)
2024 		return -ENODEV;
2025 
2026 	/* Prevent binding other PCI device drivers to IOMMU devices */
2027 	iommu->dev->match_driver = false;
2028 
2029 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2030 			      &iommu->cap);
2031 
2032 	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2033 		amd_iommu_iotlb_sup = false;
2034 
2035 	late_iommu_features_init(iommu);
2036 
2037 	if (iommu_feature(iommu, FEATURE_GT)) {
2038 		int glxval;
2039 		u32 max_pasid;
2040 		u64 pasmax;
2041 
2042 		pasmax = iommu->features & FEATURE_PASID_MASK;
2043 		pasmax >>= FEATURE_PASID_SHIFT;
2044 		max_pasid  = (1 << (pasmax + 1)) - 1;
2045 
2046 		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
2047 
2048 		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
2049 
2050 		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
2051 		glxval >>= FEATURE_GLXVAL_SHIFT;
2052 
2053 		if (amd_iommu_max_glx_val == -1)
2054 			amd_iommu_max_glx_val = glxval;
2055 		else
2056 			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2057 	}
2058 
2059 	if (iommu_feature(iommu, FEATURE_GT) &&
2060 	    iommu_feature(iommu, FEATURE_PPR)) {
2061 		iommu->is_iommu_v2   = true;
2062 		amd_iommu_v2_present = true;
2063 	}
2064 
2065 	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
2066 		return -ENOMEM;
2067 
2068 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2069 		pr_info("Using strict mode due to virtualization\n");
2070 		iommu_set_dma_strict();
2071 		amd_iommu_np_cache = true;
2072 	}
2073 
2074 	init_iommu_perf_ctr(iommu);
2075 
2076 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2077 		if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
2078 		    !iommu_feature(iommu, FEATURE_GT)) {
2079 			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
2080 			amd_iommu_pgtable = AMD_IOMMU_V1;
2081 		} else if (iommu_default_passthrough()) {
2082 			pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
2083 			amd_iommu_pgtable = AMD_IOMMU_V1;
2084 		}
2085 	}
2086 
2087 	if (is_rd890_iommu(iommu->dev)) {
2088 		int i, j;
2089 
2090 		iommu->root_pdev =
2091 			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2092 						    iommu->dev->bus->number,
2093 						    PCI_DEVFN(0, 0));
2094 
2095 		/*
2096 		 * Some rd890 systems may not be fully reconfigured by the
2097 		 * BIOS, so it's necessary for us to store this information so
2098 		 * it can be reprogrammed on resume
2099 		 */
2100 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2101 				&iommu->stored_addr_lo);
2102 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2103 				&iommu->stored_addr_hi);
2104 
2105 		/* Low bit locks writes to configuration space */
2106 		iommu->stored_addr_lo &= ~1;
2107 
2108 		for (i = 0; i < 6; i++)
2109 			for (j = 0; j < 0x12; j++)
2110 				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2111 
2112 		for (i = 0; i < 0x83; i++)
2113 			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2114 	}
2115 
2116 	amd_iommu_erratum_746_workaround(iommu);
2117 	amd_iommu_ats_write_check_workaround(iommu);
2118 
2119 	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2120 			       amd_iommu_groups, "ivhd%d", iommu->index);
2121 	if (ret)
2122 		return ret;
2123 
2124 	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2125 
2126 	return pci_enable_device(iommu->dev);
2127 }
2128 
2129 static void print_iommu_info(void)
2130 {
2131 	static const char * const feat_str[] = {
2132 		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2133 		"IA", "GA", "HE", "PC"
2134 	};
2135 	struct amd_iommu *iommu;
2136 
2137 	for_each_iommu(iommu) {
2138 		struct pci_dev *pdev = iommu->dev;
2139 		int i;
2140 
2141 		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
2142 
2143 		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
2144 			pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
2145 
2146 			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2147 				if (iommu_feature(iommu, (1ULL << i)))
2148 					pr_cont(" %s", feat_str[i]);
2149 			}
2150 
2151 			if (iommu->features & FEATURE_GAM_VAPIC)
2152 				pr_cont(" GA_vAPIC");
2153 
2154 			if (iommu->features & FEATURE_SNP)
2155 				pr_cont(" SNP");
2156 
2157 			pr_cont("\n");
2158 		}
2159 	}
2160 	if (irq_remapping_enabled) {
2161 		pr_info("Interrupt remapping enabled\n");
2162 		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2163 			pr_info("X2APIC enabled\n");
2164 	}
2165 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2166 		pr_info("V2 page table enabled (Paging mode : %d level)\n",
2167 			amd_iommu_gpt_level);
2168 	}
2169 }
2170 
2171 static int __init amd_iommu_init_pci(void)
2172 {
2173 	struct amd_iommu *iommu;
2174 	struct amd_iommu_pci_seg *pci_seg;
2175 	int ret;
2176 
2177 	for_each_iommu(iommu) {
2178 		ret = iommu_init_pci(iommu);
2179 		if (ret) {
2180 			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2181 			       iommu->index, ret);
2182 			goto out;
2183 		}
2184 		/* Need to setup range after PCI init */
2185 		iommu_set_cwwb_range(iommu);
2186 	}
2187 
2188 	/*
2189 	 * Order is important here to make sure any unity map requirements are
2190 	 * fulfilled. The unity mappings are created and written to the device
2191 	 * table during the iommu_init_pci() call.
2192 	 *
2193 	 * After that we call init_device_table_dma() to make sure any
2194 	 * uninitialized DTE will block DMA, and in the end we flush the caches
2195 	 * of all IOMMUs to make sure the changes to the device table are
2196 	 * active.
2197 	 */
2198 	for_each_pci_segment(pci_seg)
2199 		init_device_table_dma(pci_seg);
2200 
2201 	for_each_iommu(iommu)
2202 		iommu_flush_all_caches(iommu);
2203 
2204 	print_iommu_info();
2205 
2206 out:
2207 	return ret;
2208 }
2209 
2210 /****************************************************************************
2211  *
2212  * The following functions initialize the MSI interrupts for all IOMMUs
2213  * in the system. It's a bit challenging because there could be multiple
2214  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2215  * pci_dev.
2216  *
2217  ****************************************************************************/
2218 
2219 static int iommu_setup_msi(struct amd_iommu *iommu)
2220 {
2221 	int r;
2222 
2223 	r = pci_enable_msi(iommu->dev);
2224 	if (r)
2225 		return r;
2226 
2227 	r = request_threaded_irq(iommu->dev->irq,
2228 				 amd_iommu_int_handler,
2229 				 amd_iommu_int_thread,
2230 				 0, "AMD-Vi",
2231 				 iommu);
2232 
2233 	if (r) {
2234 		pci_disable_msi(iommu->dev);
2235 		return r;
2236 	}
2237 
2238 	return 0;
2239 }
2240 
2241 union intcapxt {
2242 	u64	capxt;
2243 	struct {
2244 		u64	reserved_0		:  2,
2245 			dest_mode_logical	:  1,
2246 			reserved_1		:  5,
2247 			destid_0_23		: 24,
2248 			vector			:  8,
2249 			reserved_2		: 16,
2250 			destid_24_31		:  8;
2251 	};
2252 } __attribute__ ((packed));
2253 
2254 
2255 static struct irq_chip intcapxt_controller;
2256 
2257 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2258 				       struct irq_data *irqd, bool reserve)
2259 {
2260 	return 0;
2261 }
2262 
2263 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2264 					  struct irq_data *irqd)
2265 {
2266 }
2267 
2268 
2269 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2270 				    unsigned int nr_irqs, void *arg)
2271 {
2272 	struct irq_alloc_info *info = arg;
2273 	int i, ret;
2274 
2275 	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2276 		return -EINVAL;
2277 
2278 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2279 	if (ret < 0)
2280 		return ret;
2281 
2282 	for (i = virq; i < virq + nr_irqs; i++) {
2283 		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2284 
2285 		irqd->chip = &intcapxt_controller;
2286 		irqd->chip_data = info->data;
2287 		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2288 	}
2289 
2290 	return ret;
2291 }
2292 
2293 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2294 				    unsigned int nr_irqs)
2295 {
2296 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2297 }
2298 
2299 
2300 static void intcapxt_unmask_irq(struct irq_data *irqd)
2301 {
2302 	struct amd_iommu *iommu = irqd->chip_data;
2303 	struct irq_cfg *cfg = irqd_cfg(irqd);
2304 	union intcapxt xt;
2305 
2306 	xt.capxt = 0ULL;
2307 	xt.dest_mode_logical = apic->dest_mode_logical;
2308 	xt.vector = cfg->vector;
2309 	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2310 	xt.destid_24_31 = cfg->dest_apicid >> 24;
2311 
2312 	/**
2313 	 * Current IOMMU implementation uses the same IRQ for all
2314 	 * 3 IOMMU interrupts.
2315 	 */
2316 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2317 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2318 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2319 }
2320 
2321 static void intcapxt_mask_irq(struct irq_data *irqd)
2322 {
2323 	struct amd_iommu *iommu = irqd->chip_data;
2324 
2325 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2326 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2327 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2328 }
2329 
2330 
2331 static int intcapxt_set_affinity(struct irq_data *irqd,
2332 				 const struct cpumask *mask, bool force)
2333 {
2334 	struct irq_data *parent = irqd->parent_data;
2335 	int ret;
2336 
2337 	ret = parent->chip->irq_set_affinity(parent, mask, force);
2338 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2339 		return ret;
2340 	return 0;
2341 }
2342 
2343 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2344 {
2345 	return on ? -EOPNOTSUPP : 0;
2346 }
2347 
2348 static struct irq_chip intcapxt_controller = {
2349 	.name			= "IOMMU-MSI",
2350 	.irq_unmask		= intcapxt_unmask_irq,
2351 	.irq_mask		= intcapxt_mask_irq,
2352 	.irq_ack		= irq_chip_ack_parent,
2353 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2354 	.irq_set_affinity       = intcapxt_set_affinity,
2355 	.irq_set_wake		= intcapxt_set_wake,
2356 	.flags			= IRQCHIP_MASK_ON_SUSPEND,
2357 };
2358 
2359 static const struct irq_domain_ops intcapxt_domain_ops = {
2360 	.alloc			= intcapxt_irqdomain_alloc,
2361 	.free			= intcapxt_irqdomain_free,
2362 	.activate		= intcapxt_irqdomain_activate,
2363 	.deactivate		= intcapxt_irqdomain_deactivate,
2364 };
2365 
2366 
2367 static struct irq_domain *iommu_irqdomain;
2368 
2369 static struct irq_domain *iommu_get_irqdomain(void)
2370 {
2371 	struct fwnode_handle *fn;
2372 
2373 	/* No need for locking here (yet) as the init is single-threaded */
2374 	if (iommu_irqdomain)
2375 		return iommu_irqdomain;
2376 
2377 	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2378 	if (!fn)
2379 		return NULL;
2380 
2381 	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2382 						      fn, &intcapxt_domain_ops,
2383 						      NULL);
2384 	if (!iommu_irqdomain)
2385 		irq_domain_free_fwnode(fn);
2386 
2387 	return iommu_irqdomain;
2388 }
2389 
2390 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2391 {
2392 	struct irq_domain *domain;
2393 	struct irq_alloc_info info;
2394 	int irq, ret;
2395 	int node = dev_to_node(&iommu->dev->dev);
2396 
2397 	domain = iommu_get_irqdomain();
2398 	if (!domain)
2399 		return -ENXIO;
2400 
2401 	init_irq_alloc_info(&info, NULL);
2402 	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2403 	info.data = iommu;
2404 
2405 	irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2406 	if (irq < 0) {
2407 		irq_domain_remove(domain);
2408 		return irq;
2409 	}
2410 
2411 	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2412 				   amd_iommu_int_thread, 0, "AMD-Vi", iommu);
2413 	if (ret) {
2414 		irq_domain_free_irqs(irq, 1);
2415 		irq_domain_remove(domain);
2416 		return ret;
2417 	}
2418 
2419 	return 0;
2420 }
2421 
2422 static int iommu_init_irq(struct amd_iommu *iommu)
2423 {
2424 	int ret;
2425 
2426 	if (iommu->int_enabled)
2427 		goto enable_faults;
2428 
2429 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2430 		ret = iommu_setup_intcapxt(iommu);
2431 	else if (iommu->dev->msi_cap)
2432 		ret = iommu_setup_msi(iommu);
2433 	else
2434 		ret = -ENODEV;
2435 
2436 	if (ret)
2437 		return ret;
2438 
2439 	iommu->int_enabled = true;
2440 enable_faults:
2441 
2442 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2443 		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2444 
2445 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2446 
2447 	if (iommu->ppr_log != NULL)
2448 		iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2449 	return 0;
2450 }
2451 
2452 /****************************************************************************
2453  *
2454  * The next functions belong to the third pass of parsing the ACPI
2455  * table. In this last pass the memory mapping requirements are
2456  * gathered (like exclusion and unity mapping ranges).
2457  *
2458  ****************************************************************************/
2459 
2460 static void __init free_unity_maps(void)
2461 {
2462 	struct unity_map_entry *entry, *next;
2463 	struct amd_iommu_pci_seg *p, *pci_seg;
2464 
2465 	for_each_pci_segment_safe(pci_seg, p) {
2466 		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2467 			list_del(&entry->list);
2468 			kfree(entry);
2469 		}
2470 	}
2471 }
2472 
2473 /* called for unity map ACPI definition */
2474 static int __init init_unity_map_range(struct ivmd_header *m,
2475 				       struct acpi_table_header *ivrs_base)
2476 {
2477 	struct unity_map_entry *e = NULL;
2478 	struct amd_iommu_pci_seg *pci_seg;
2479 	char *s;
2480 
2481 	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2482 	if (pci_seg == NULL)
2483 		return -ENOMEM;
2484 
2485 	e = kzalloc(sizeof(*e), GFP_KERNEL);
2486 	if (e == NULL)
2487 		return -ENOMEM;
2488 
2489 	switch (m->type) {
2490 	default:
2491 		kfree(e);
2492 		return 0;
2493 	case ACPI_IVMD_TYPE:
2494 		s = "IVMD_TYPEi\t\t\t";
2495 		e->devid_start = e->devid_end = m->devid;
2496 		break;
2497 	case ACPI_IVMD_TYPE_ALL:
2498 		s = "IVMD_TYPE_ALL\t\t";
2499 		e->devid_start = 0;
2500 		e->devid_end = pci_seg->last_bdf;
2501 		break;
2502 	case ACPI_IVMD_TYPE_RANGE:
2503 		s = "IVMD_TYPE_RANGE\t\t";
2504 		e->devid_start = m->devid;
2505 		e->devid_end = m->aux;
2506 		break;
2507 	}
2508 	e->address_start = PAGE_ALIGN(m->range_start);
2509 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2510 	e->prot = m->flags >> 1;
2511 
2512 	/*
2513 	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2514 	 * since some buggy BIOSes might lead to the overwritten exclusion
2515 	 * range (exclusion_start and exclusion_length members). This
2516 	 * happens when there are multiple exclusion ranges (IVMD entries)
2517 	 * defined in ACPI table.
2518 	 */
2519 	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2520 		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2521 
2522 	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2523 		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2524 		    " flags: %x\n", s, m->pci_seg,
2525 		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2526 		    PCI_FUNC(e->devid_start), m->pci_seg,
2527 		    PCI_BUS_NUM(e->devid_end),
2528 		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2529 		    e->address_start, e->address_end, m->flags);
2530 
2531 	list_add_tail(&e->list, &pci_seg->unity_map);
2532 
2533 	return 0;
2534 }
2535 
2536 /* iterates over all memory definitions we find in the ACPI table */
2537 static int __init init_memory_definitions(struct acpi_table_header *table)
2538 {
2539 	u8 *p = (u8 *)table, *end = (u8 *)table;
2540 	struct ivmd_header *m;
2541 
2542 	end += table->length;
2543 	p += IVRS_HEADER_LENGTH;
2544 
2545 	while (p < end) {
2546 		m = (struct ivmd_header *)p;
2547 		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2548 			init_unity_map_range(m, table);
2549 
2550 		p += m->length;
2551 	}
2552 
2553 	return 0;
2554 }
2555 
2556 /*
2557  * Init the device table to not allow DMA access for devices
2558  */
2559 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2560 {
2561 	u32 devid;
2562 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2563 
2564 	if (dev_table == NULL)
2565 		return;
2566 
2567 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2568 		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2569 		if (!amd_iommu_snp_en)
2570 			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2571 	}
2572 }
2573 
2574 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2575 {
2576 	u32 devid;
2577 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2578 
2579 	if (dev_table == NULL)
2580 		return;
2581 
2582 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2583 		dev_table[devid].data[0] = 0ULL;
2584 		dev_table[devid].data[1] = 0ULL;
2585 	}
2586 }
2587 
2588 static void init_device_table(void)
2589 {
2590 	struct amd_iommu_pci_seg *pci_seg;
2591 	u32 devid;
2592 
2593 	if (!amd_iommu_irq_remap)
2594 		return;
2595 
2596 	for_each_pci_segment(pci_seg) {
2597 		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2598 			__set_dev_entry_bit(pci_seg->dev_table,
2599 					    devid, DEV_ENTRY_IRQ_TBL_EN);
2600 	}
2601 }
2602 
2603 static void iommu_init_flags(struct amd_iommu *iommu)
2604 {
2605 	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2606 		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2607 		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2608 
2609 	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2610 		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2611 		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2612 
2613 	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2614 		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2615 		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2616 
2617 	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2618 		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2619 		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2620 
2621 	/*
2622 	 * make IOMMU memory accesses cache coherent
2623 	 */
2624 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2625 
2626 	/* Set IOTLB invalidation timeout to 1s */
2627 	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2628 }
2629 
2630 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2631 {
2632 	int i, j;
2633 	u32 ioc_feature_control;
2634 	struct pci_dev *pdev = iommu->root_pdev;
2635 
2636 	/* RD890 BIOSes may not have completely reconfigured the iommu */
2637 	if (!is_rd890_iommu(iommu->dev) || !pdev)
2638 		return;
2639 
2640 	/*
2641 	 * First, we need to ensure that the iommu is enabled. This is
2642 	 * controlled by a register in the northbridge
2643 	 */
2644 
2645 	/* Select Northbridge indirect register 0x75 and enable writing */
2646 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2647 	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2648 
2649 	/* Enable the iommu */
2650 	if (!(ioc_feature_control & 0x1))
2651 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2652 
2653 	/* Restore the iommu BAR */
2654 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2655 			       iommu->stored_addr_lo);
2656 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2657 			       iommu->stored_addr_hi);
2658 
2659 	/* Restore the l1 indirect regs for each of the 6 l1s */
2660 	for (i = 0; i < 6; i++)
2661 		for (j = 0; j < 0x12; j++)
2662 			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2663 
2664 	/* Restore the l2 indirect regs */
2665 	for (i = 0; i < 0x83; i++)
2666 		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2667 
2668 	/* Lock PCI setup registers */
2669 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2670 			       iommu->stored_addr_lo | 1);
2671 }
2672 
2673 static void iommu_enable_ga(struct amd_iommu *iommu)
2674 {
2675 #ifdef CONFIG_IRQ_REMAP
2676 	switch (amd_iommu_guest_ir) {
2677 	case AMD_IOMMU_GUEST_IR_VAPIC:
2678 	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2679 		iommu_feature_enable(iommu, CONTROL_GA_EN);
2680 		iommu->irte_ops = &irte_128_ops;
2681 		break;
2682 	default:
2683 		iommu->irte_ops = &irte_32_ops;
2684 		break;
2685 	}
2686 #endif
2687 }
2688 
2689 static void early_enable_iommu(struct amd_iommu *iommu)
2690 {
2691 	iommu_disable(iommu);
2692 	iommu_init_flags(iommu);
2693 	iommu_set_device_table(iommu);
2694 	iommu_enable_command_buffer(iommu);
2695 	iommu_enable_event_buffer(iommu);
2696 	iommu_set_exclusion_range(iommu);
2697 	iommu_enable_ga(iommu);
2698 	iommu_enable_xt(iommu);
2699 	iommu_enable(iommu);
2700 	iommu_flush_all_caches(iommu);
2701 }
2702 
2703 /*
2704  * This function finally enables all IOMMUs found in the system after
2705  * they have been initialized.
2706  *
2707  * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2708  * the old content of device table entries. Not this case or copy failed,
2709  * just continue as normal kernel does.
2710  */
2711 static void early_enable_iommus(void)
2712 {
2713 	struct amd_iommu *iommu;
2714 	struct amd_iommu_pci_seg *pci_seg;
2715 
2716 	if (!copy_device_table()) {
2717 		/*
2718 		 * If come here because of failure in copying device table from old
2719 		 * kernel with all IOMMUs enabled, print error message and try to
2720 		 * free allocated old_dev_tbl_cpy.
2721 		 */
2722 		if (amd_iommu_pre_enabled)
2723 			pr_err("Failed to copy DEV table from previous kernel.\n");
2724 
2725 		for_each_pci_segment(pci_seg) {
2726 			if (pci_seg->old_dev_tbl_cpy != NULL) {
2727 				free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
2728 						get_order(pci_seg->dev_table_size));
2729 				pci_seg->old_dev_tbl_cpy = NULL;
2730 			}
2731 		}
2732 
2733 		for_each_iommu(iommu) {
2734 			clear_translation_pre_enabled(iommu);
2735 			early_enable_iommu(iommu);
2736 		}
2737 	} else {
2738 		pr_info("Copied DEV table from previous kernel.\n");
2739 
2740 		for_each_pci_segment(pci_seg) {
2741 			free_pages((unsigned long)pci_seg->dev_table,
2742 				   get_order(pci_seg->dev_table_size));
2743 			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2744 		}
2745 
2746 		for_each_iommu(iommu) {
2747 			iommu_disable_command_buffer(iommu);
2748 			iommu_disable_event_buffer(iommu);
2749 			iommu_enable_command_buffer(iommu);
2750 			iommu_enable_event_buffer(iommu);
2751 			iommu_enable_ga(iommu);
2752 			iommu_enable_xt(iommu);
2753 			iommu_set_device_table(iommu);
2754 			iommu_flush_all_caches(iommu);
2755 		}
2756 	}
2757 }
2758 
2759 static void enable_iommus_v2(void)
2760 {
2761 	struct amd_iommu *iommu;
2762 
2763 	for_each_iommu(iommu) {
2764 		iommu_enable_ppr_log(iommu);
2765 		iommu_enable_gt(iommu);
2766 	}
2767 }
2768 
2769 static void enable_iommus_vapic(void)
2770 {
2771 #ifdef CONFIG_IRQ_REMAP
2772 	u32 status, i;
2773 	struct amd_iommu *iommu;
2774 
2775 	for_each_iommu(iommu) {
2776 		/*
2777 		 * Disable GALog if already running. It could have been enabled
2778 		 * in the previous boot before kdump.
2779 		 */
2780 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2781 		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2782 			continue;
2783 
2784 		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2785 		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2786 
2787 		/*
2788 		 * Need to set and poll check the GALOGRun bit to zero before
2789 		 * we can set/ modify GA Log registers safely.
2790 		 */
2791 		for (i = 0; i < LOOP_TIMEOUT; ++i) {
2792 			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2793 			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2794 				break;
2795 			udelay(10);
2796 		}
2797 
2798 		if (WARN_ON(i >= LOOP_TIMEOUT))
2799 			return;
2800 	}
2801 
2802 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2803 	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
2804 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2805 		return;
2806 	}
2807 
2808 	if (amd_iommu_snp_en &&
2809 	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2810 		pr_warn("Force to disable Virtual APIC due to SNP\n");
2811 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2812 		return;
2813 	}
2814 
2815 	/* Enabling GAM and SNPAVIC support */
2816 	for_each_iommu(iommu) {
2817 		if (iommu_init_ga_log(iommu) ||
2818 		    iommu_ga_log_enable(iommu))
2819 			return;
2820 
2821 		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2822 		if (amd_iommu_snp_en)
2823 			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2824 	}
2825 
2826 	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2827 	pr_info("Virtual APIC enabled\n");
2828 #endif
2829 }
2830 
2831 static void enable_iommus(void)
2832 {
2833 	early_enable_iommus();
2834 	enable_iommus_vapic();
2835 	enable_iommus_v2();
2836 }
2837 
2838 static void disable_iommus(void)
2839 {
2840 	struct amd_iommu *iommu;
2841 
2842 	for_each_iommu(iommu)
2843 		iommu_disable(iommu);
2844 
2845 #ifdef CONFIG_IRQ_REMAP
2846 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2847 		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2848 #endif
2849 }
2850 
2851 /*
2852  * Suspend/Resume support
2853  * disable suspend until real resume implemented
2854  */
2855 
2856 static void amd_iommu_resume(void)
2857 {
2858 	struct amd_iommu *iommu;
2859 
2860 	for_each_iommu(iommu)
2861 		iommu_apply_resume_quirks(iommu);
2862 
2863 	/* re-load the hardware */
2864 	enable_iommus();
2865 
2866 	amd_iommu_enable_interrupts();
2867 }
2868 
2869 static int amd_iommu_suspend(void)
2870 {
2871 	/* disable IOMMUs to go out of the way for BIOS */
2872 	disable_iommus();
2873 
2874 	return 0;
2875 }
2876 
2877 static struct syscore_ops amd_iommu_syscore_ops = {
2878 	.suspend = amd_iommu_suspend,
2879 	.resume = amd_iommu_resume,
2880 };
2881 
2882 static void __init free_iommu_resources(void)
2883 {
2884 	kmem_cache_destroy(amd_iommu_irq_cache);
2885 	amd_iommu_irq_cache = NULL;
2886 
2887 	free_iommu_all();
2888 	free_pci_segments();
2889 }
2890 
2891 /* SB IOAPIC is always on this device in AMD systems */
2892 #define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2893 
2894 static bool __init check_ioapic_information(void)
2895 {
2896 	const char *fw_bug = FW_BUG;
2897 	bool ret, has_sb_ioapic;
2898 	int idx;
2899 
2900 	has_sb_ioapic = false;
2901 	ret           = false;
2902 
2903 	/*
2904 	 * If we have map overrides on the kernel command line the
2905 	 * messages in this function might not describe firmware bugs
2906 	 * anymore - so be careful
2907 	 */
2908 	if (cmdline_maps)
2909 		fw_bug = "";
2910 
2911 	for (idx = 0; idx < nr_ioapics; idx++) {
2912 		int devid, id = mpc_ioapic_id(idx);
2913 
2914 		devid = get_ioapic_devid(id);
2915 		if (devid < 0) {
2916 			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2917 				fw_bug, id);
2918 			ret = false;
2919 		} else if (devid == IOAPIC_SB_DEVID) {
2920 			has_sb_ioapic = true;
2921 			ret           = true;
2922 		}
2923 	}
2924 
2925 	if (!has_sb_ioapic) {
2926 		/*
2927 		 * We expect the SB IOAPIC to be listed in the IVRS
2928 		 * table. The system timer is connected to the SB IOAPIC
2929 		 * and if we don't have it in the list the system will
2930 		 * panic at boot time.  This situation usually happens
2931 		 * when the BIOS is buggy and provides us the wrong
2932 		 * device id for the IOAPIC in the system.
2933 		 */
2934 		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2935 	}
2936 
2937 	if (!ret)
2938 		pr_err("Disabling interrupt remapping\n");
2939 
2940 	return ret;
2941 }
2942 
2943 static void __init free_dma_resources(void)
2944 {
2945 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2946 		   get_order(MAX_DOMAIN_ID/8));
2947 	amd_iommu_pd_alloc_bitmap = NULL;
2948 
2949 	free_unity_maps();
2950 }
2951 
2952 static void __init ivinfo_init(void *ivrs)
2953 {
2954 	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
2955 }
2956 
2957 /*
2958  * This is the hardware init function for AMD IOMMU in the system.
2959  * This function is called either from amd_iommu_init or from the interrupt
2960  * remapping setup code.
2961  *
2962  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2963  * four times:
2964  *
2965  *	1 pass) Discover the most comprehensive IVHD type to use.
2966  *
2967  *	2 pass) Find the highest PCI device id the driver has to handle.
2968  *		Upon this information the size of the data structures is
2969  *		determined that needs to be allocated.
2970  *
2971  *	3 pass) Initialize the data structures just allocated with the
2972  *		information in the ACPI table about available AMD IOMMUs
2973  *		in the system. It also maps the PCI devices in the
2974  *		system to specific IOMMUs
2975  *
2976  *	4 pass) After the basic data structures are allocated and
2977  *		initialized we update them with information about memory
2978  *		remapping requirements parsed out of the ACPI table in
2979  *		this last pass.
2980  *
2981  * After everything is set up the IOMMUs are enabled and the necessary
2982  * hotplug and suspend notifiers are registered.
2983  */
2984 static int __init early_amd_iommu_init(void)
2985 {
2986 	struct acpi_table_header *ivrs_base;
2987 	int remap_cache_sz, ret;
2988 	acpi_status status;
2989 
2990 	if (!amd_iommu_detected)
2991 		return -ENODEV;
2992 
2993 	status = acpi_get_table("IVRS", 0, &ivrs_base);
2994 	if (status == AE_NOT_FOUND)
2995 		return -ENODEV;
2996 	else if (ACPI_FAILURE(status)) {
2997 		const char *err = acpi_format_exception(status);
2998 		pr_err("IVRS table error: %s\n", err);
2999 		return -EINVAL;
3000 	}
3001 
3002 	/*
3003 	 * Validate checksum here so we don't need to do it when
3004 	 * we actually parse the table
3005 	 */
3006 	ret = check_ivrs_checksum(ivrs_base);
3007 	if (ret)
3008 		goto out;
3009 
3010 	ivinfo_init(ivrs_base);
3011 
3012 	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3013 	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3014 
3015 	/* Device table - directly used by all IOMMUs */
3016 	ret = -ENOMEM;
3017 
3018 	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
3019 					    GFP_KERNEL | __GFP_ZERO,
3020 					    get_order(MAX_DOMAIN_ID/8));
3021 	if (amd_iommu_pd_alloc_bitmap == NULL)
3022 		goto out;
3023 
3024 	/*
3025 	 * never allocate domain 0 because its used as the non-allocated and
3026 	 * error value placeholder
3027 	 */
3028 	__set_bit(0, amd_iommu_pd_alloc_bitmap);
3029 
3030 	/*
3031 	 * now the data structures are allocated and basically initialized
3032 	 * start the real acpi table scan
3033 	 */
3034 	ret = init_iommu_all(ivrs_base);
3035 	if (ret)
3036 		goto out;
3037 
3038 	/* 5 level guest page table */
3039 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3040 	    check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL)
3041 		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3042 
3043 	/* Disable any previously enabled IOMMUs */
3044 	if (!is_kdump_kernel() || amd_iommu_disabled)
3045 		disable_iommus();
3046 
3047 	if (amd_iommu_irq_remap)
3048 		amd_iommu_irq_remap = check_ioapic_information();
3049 
3050 	if (amd_iommu_irq_remap) {
3051 		struct amd_iommu_pci_seg *pci_seg;
3052 		/*
3053 		 * Interrupt remapping enabled, create kmem_cache for the
3054 		 * remapping tables.
3055 		 */
3056 		ret = -ENOMEM;
3057 		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3058 			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3059 		else
3060 			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3061 		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3062 							remap_cache_sz,
3063 							DTE_INTTAB_ALIGNMENT,
3064 							0, NULL);
3065 		if (!amd_iommu_irq_cache)
3066 			goto out;
3067 
3068 		for_each_pci_segment(pci_seg) {
3069 			if (alloc_irq_lookup_table(pci_seg))
3070 				goto out;
3071 		}
3072 	}
3073 
3074 	ret = init_memory_definitions(ivrs_base);
3075 	if (ret)
3076 		goto out;
3077 
3078 	/* init the device table */
3079 	init_device_table();
3080 
3081 out:
3082 	/* Don't leak any ACPI memory */
3083 	acpi_put_table(ivrs_base);
3084 
3085 	return ret;
3086 }
3087 
3088 static int amd_iommu_enable_interrupts(void)
3089 {
3090 	struct amd_iommu *iommu;
3091 	int ret = 0;
3092 
3093 	for_each_iommu(iommu) {
3094 		ret = iommu_init_irq(iommu);
3095 		if (ret)
3096 			goto out;
3097 	}
3098 
3099 out:
3100 	return ret;
3101 }
3102 
3103 static bool __init detect_ivrs(void)
3104 {
3105 	struct acpi_table_header *ivrs_base;
3106 	acpi_status status;
3107 	int i;
3108 
3109 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3110 	if (status == AE_NOT_FOUND)
3111 		return false;
3112 	else if (ACPI_FAILURE(status)) {
3113 		const char *err = acpi_format_exception(status);
3114 		pr_err("IVRS table error: %s\n", err);
3115 		return false;
3116 	}
3117 
3118 	acpi_put_table(ivrs_base);
3119 
3120 	if (amd_iommu_force_enable)
3121 		goto out;
3122 
3123 	/* Don't use IOMMU if there is Stoney Ridge graphics */
3124 	for (i = 0; i < 32; i++) {
3125 		u32 pci_id;
3126 
3127 		pci_id = read_pci_config(0, i, 0, 0);
3128 		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3129 			pr_info("Disable IOMMU on Stoney Ridge\n");
3130 			return false;
3131 		}
3132 	}
3133 
3134 out:
3135 	/* Make sure ACS will be enabled during PCI probe */
3136 	pci_request_acs();
3137 
3138 	return true;
3139 }
3140 
3141 /****************************************************************************
3142  *
3143  * AMD IOMMU Initialization State Machine
3144  *
3145  ****************************************************************************/
3146 
3147 static int __init state_next(void)
3148 {
3149 	int ret = 0;
3150 
3151 	switch (init_state) {
3152 	case IOMMU_START_STATE:
3153 		if (!detect_ivrs()) {
3154 			init_state	= IOMMU_NOT_FOUND;
3155 			ret		= -ENODEV;
3156 		} else {
3157 			init_state	= IOMMU_IVRS_DETECTED;
3158 		}
3159 		break;
3160 	case IOMMU_IVRS_DETECTED:
3161 		if (amd_iommu_disabled) {
3162 			init_state = IOMMU_CMDLINE_DISABLED;
3163 			ret = -EINVAL;
3164 		} else {
3165 			ret = early_amd_iommu_init();
3166 			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3167 		}
3168 		break;
3169 	case IOMMU_ACPI_FINISHED:
3170 		early_enable_iommus();
3171 		x86_platform.iommu_shutdown = disable_iommus;
3172 		init_state = IOMMU_ENABLED;
3173 		break;
3174 	case IOMMU_ENABLED:
3175 		register_syscore_ops(&amd_iommu_syscore_ops);
3176 		ret = amd_iommu_init_pci();
3177 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3178 		enable_iommus_vapic();
3179 		enable_iommus_v2();
3180 		break;
3181 	case IOMMU_PCI_INIT:
3182 		ret = amd_iommu_enable_interrupts();
3183 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3184 		break;
3185 	case IOMMU_INTERRUPTS_EN:
3186 		init_state = IOMMU_INITIALIZED;
3187 		break;
3188 	case IOMMU_INITIALIZED:
3189 		/* Nothing to do */
3190 		break;
3191 	case IOMMU_NOT_FOUND:
3192 	case IOMMU_INIT_ERROR:
3193 	case IOMMU_CMDLINE_DISABLED:
3194 		/* Error states => do nothing */
3195 		ret = -EINVAL;
3196 		break;
3197 	default:
3198 		/* Unknown state */
3199 		BUG();
3200 	}
3201 
3202 	if (ret) {
3203 		free_dma_resources();
3204 		if (!irq_remapping_enabled) {
3205 			disable_iommus();
3206 			free_iommu_resources();
3207 		} else {
3208 			struct amd_iommu *iommu;
3209 			struct amd_iommu_pci_seg *pci_seg;
3210 
3211 			for_each_pci_segment(pci_seg)
3212 				uninit_device_table_dma(pci_seg);
3213 
3214 			for_each_iommu(iommu)
3215 				iommu_flush_all_caches(iommu);
3216 		}
3217 	}
3218 	return ret;
3219 }
3220 
3221 static int __init iommu_go_to_state(enum iommu_init_state state)
3222 {
3223 	int ret = -EINVAL;
3224 
3225 	while (init_state != state) {
3226 		if (init_state == IOMMU_NOT_FOUND         ||
3227 		    init_state == IOMMU_INIT_ERROR        ||
3228 		    init_state == IOMMU_CMDLINE_DISABLED)
3229 			break;
3230 		ret = state_next();
3231 	}
3232 
3233 	return ret;
3234 }
3235 
3236 #ifdef CONFIG_IRQ_REMAP
3237 int __init amd_iommu_prepare(void)
3238 {
3239 	int ret;
3240 
3241 	amd_iommu_irq_remap = true;
3242 
3243 	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3244 	if (ret) {
3245 		amd_iommu_irq_remap = false;
3246 		return ret;
3247 	}
3248 
3249 	return amd_iommu_irq_remap ? 0 : -ENODEV;
3250 }
3251 
3252 int __init amd_iommu_enable(void)
3253 {
3254 	int ret;
3255 
3256 	ret = iommu_go_to_state(IOMMU_ENABLED);
3257 	if (ret)
3258 		return ret;
3259 
3260 	irq_remapping_enabled = 1;
3261 	return amd_iommu_xt_mode;
3262 }
3263 
3264 void amd_iommu_disable(void)
3265 {
3266 	amd_iommu_suspend();
3267 }
3268 
3269 int amd_iommu_reenable(int mode)
3270 {
3271 	amd_iommu_resume();
3272 
3273 	return 0;
3274 }
3275 
3276 int __init amd_iommu_enable_faulting(void)
3277 {
3278 	/* We enable MSI later when PCI is initialized */
3279 	return 0;
3280 }
3281 #endif
3282 
3283 /*
3284  * This is the core init function for AMD IOMMU hardware in the system.
3285  * This function is called from the generic x86 DMA layer initialization
3286  * code.
3287  */
3288 static int __init amd_iommu_init(void)
3289 {
3290 	struct amd_iommu *iommu;
3291 	int ret;
3292 
3293 	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3294 #ifdef CONFIG_GART_IOMMU
3295 	if (ret && list_empty(&amd_iommu_list)) {
3296 		/*
3297 		 * We failed to initialize the AMD IOMMU - try fallback
3298 		 * to GART if possible.
3299 		 */
3300 		gart_iommu_init();
3301 	}
3302 #endif
3303 
3304 	for_each_iommu(iommu)
3305 		amd_iommu_debugfs_setup(iommu);
3306 
3307 	return ret;
3308 }
3309 
3310 static bool amd_iommu_sme_check(void)
3311 {
3312 	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3313 	    (boot_cpu_data.x86 != 0x17))
3314 		return true;
3315 
3316 	/* For Fam17h, a specific level of support is required */
3317 	if (boot_cpu_data.microcode >= 0x08001205)
3318 		return true;
3319 
3320 	if ((boot_cpu_data.microcode >= 0x08001126) &&
3321 	    (boot_cpu_data.microcode <= 0x080011ff))
3322 		return true;
3323 
3324 	pr_notice("IOMMU not currently supported when SME is active\n");
3325 
3326 	return false;
3327 }
3328 
3329 /****************************************************************************
3330  *
3331  * Early detect code. This code runs at IOMMU detection time in the DMA
3332  * layer. It just looks if there is an IVRS ACPI table to detect AMD
3333  * IOMMUs
3334  *
3335  ****************************************************************************/
3336 int __init amd_iommu_detect(void)
3337 {
3338 	int ret;
3339 
3340 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3341 		return -ENODEV;
3342 
3343 	if (!amd_iommu_sme_check())
3344 		return -ENODEV;
3345 
3346 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3347 	if (ret)
3348 		return ret;
3349 
3350 	amd_iommu_detected = true;
3351 	iommu_detected = 1;
3352 	x86_init.iommu.iommu_init = amd_iommu_init;
3353 
3354 	return 1;
3355 }
3356 
3357 /****************************************************************************
3358  *
3359  * Parsing functions for the AMD IOMMU specific kernel command line
3360  * options.
3361  *
3362  ****************************************************************************/
3363 
3364 static int __init parse_amd_iommu_dump(char *str)
3365 {
3366 	amd_iommu_dump = true;
3367 
3368 	return 1;
3369 }
3370 
3371 static int __init parse_amd_iommu_intr(char *str)
3372 {
3373 	for (; *str; ++str) {
3374 		if (strncmp(str, "legacy", 6) == 0) {
3375 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3376 			break;
3377 		}
3378 		if (strncmp(str, "vapic", 5) == 0) {
3379 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3380 			break;
3381 		}
3382 	}
3383 	return 1;
3384 }
3385 
3386 static int __init parse_amd_iommu_options(char *str)
3387 {
3388 	if (!str)
3389 		return -EINVAL;
3390 
3391 	while (*str) {
3392 		if (strncmp(str, "fullflush", 9) == 0) {
3393 			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3394 			iommu_set_dma_strict();
3395 		} else if (strncmp(str, "force_enable", 12) == 0) {
3396 			amd_iommu_force_enable = true;
3397 		} else if (strncmp(str, "off", 3) == 0) {
3398 			amd_iommu_disabled = true;
3399 		} else if (strncmp(str, "force_isolation", 15) == 0) {
3400 			amd_iommu_force_isolation = true;
3401 		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3402 			amd_iommu_pgtable = AMD_IOMMU_V1;
3403 		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3404 			amd_iommu_pgtable = AMD_IOMMU_V2;
3405 		} else {
3406 			pr_notice("Unknown option - '%s'\n", str);
3407 		}
3408 
3409 		str += strcspn(str, ",");
3410 		while (*str == ',')
3411 			str++;
3412 	}
3413 
3414 	return 1;
3415 }
3416 
3417 static int __init parse_ivrs_ioapic(char *str)
3418 {
3419 	u32 seg = 0, bus, dev, fn;
3420 	int id, i;
3421 	u32 devid;
3422 
3423 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3424 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3425 		goto found;
3426 
3427 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3428 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3429 		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3430 			str, id, seg, bus, dev, fn);
3431 		goto found;
3432 	}
3433 
3434 	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3435 	return 1;
3436 
3437 found:
3438 	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3439 		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3440 			str);
3441 		return 1;
3442 	}
3443 
3444 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3445 
3446 	cmdline_maps			= true;
3447 	i				= early_ioapic_map_size++;
3448 	early_ioapic_map[i].id		= id;
3449 	early_ioapic_map[i].devid	= devid;
3450 	early_ioapic_map[i].cmd_line	= true;
3451 
3452 	return 1;
3453 }
3454 
3455 static int __init parse_ivrs_hpet(char *str)
3456 {
3457 	u32 seg = 0, bus, dev, fn;
3458 	int id, i;
3459 	u32 devid;
3460 
3461 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3462 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3463 		goto found;
3464 
3465 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3466 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3467 		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3468 			str, id, seg, bus, dev, fn);
3469 		goto found;
3470 	}
3471 
3472 	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3473 	return 1;
3474 
3475 found:
3476 	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3477 		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3478 			str);
3479 		return 1;
3480 	}
3481 
3482 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3483 
3484 	cmdline_maps			= true;
3485 	i				= early_hpet_map_size++;
3486 	early_hpet_map[i].id		= id;
3487 	early_hpet_map[i].devid		= devid;
3488 	early_hpet_map[i].cmd_line	= true;
3489 
3490 	return 1;
3491 }
3492 
3493 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3494 
3495 static int __init parse_ivrs_acpihid(char *str)
3496 {
3497 	u32 seg = 0, bus, dev, fn;
3498 	char *hid, *uid, *p, *addr;
3499 	char acpiid[ACPIID_LEN] = {0};
3500 	int i;
3501 
3502 	addr = strchr(str, '@');
3503 	if (!addr) {
3504 		addr = strchr(str, '=');
3505 		if (!addr)
3506 			goto not_found;
3507 
3508 		++addr;
3509 
3510 		if (strlen(addr) > ACPIID_LEN)
3511 			goto not_found;
3512 
3513 		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3514 		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3515 			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3516 				str, acpiid, seg, bus, dev, fn);
3517 			goto found;
3518 		}
3519 		goto not_found;
3520 	}
3521 
3522 	/* We have the '@', make it the terminator to get just the acpiid */
3523 	*addr++ = 0;
3524 
3525 	if (strlen(str) > ACPIID_LEN + 1)
3526 		goto not_found;
3527 
3528 	if (sscanf(str, "=%s", acpiid) != 1)
3529 		goto not_found;
3530 
3531 	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3532 	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3533 		goto found;
3534 
3535 not_found:
3536 	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3537 	return 1;
3538 
3539 found:
3540 	p = acpiid;
3541 	hid = strsep(&p, ":");
3542 	uid = p;
3543 
3544 	if (!hid || !(*hid) || !uid) {
3545 		pr_err("Invalid command line: hid or uid\n");
3546 		return 1;
3547 	}
3548 
3549 	/*
3550 	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3551 	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3552 	 */
3553 	while (*uid == '0' && *(uid + 1))
3554 		uid++;
3555 
3556 	i = early_acpihid_map_size++;
3557 	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3558 	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3559 	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3560 	early_acpihid_map[i].cmd_line	= true;
3561 
3562 	return 1;
3563 }
3564 
3565 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
3566 __setup("amd_iommu=",		parse_amd_iommu_options);
3567 __setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3568 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
3569 __setup("ivrs_hpet",		parse_ivrs_hpet);
3570 __setup("ivrs_acpihid",		parse_ivrs_acpihid);
3571 
3572 bool amd_iommu_v2_supported(void)
3573 {
3574 	/* CPU page table size should match IOMMU guest page table size */
3575 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3576 	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3577 		return false;
3578 
3579 	/*
3580 	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3581 	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3582 	 * setting up IOMMUv1 page table.
3583 	 */
3584 	return amd_iommu_v2_present && !amd_iommu_snp_en;
3585 }
3586 EXPORT_SYMBOL(amd_iommu_v2_supported);
3587 
3588 struct amd_iommu *get_amd_iommu(unsigned int idx)
3589 {
3590 	unsigned int i = 0;
3591 	struct amd_iommu *iommu;
3592 
3593 	for_each_iommu(iommu)
3594 		if (i++ == idx)
3595 			return iommu;
3596 	return NULL;
3597 }
3598 
3599 /****************************************************************************
3600  *
3601  * IOMMU EFR Performance Counter support functionality. This code allows
3602  * access to the IOMMU PC functionality.
3603  *
3604  ****************************************************************************/
3605 
3606 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3607 {
3608 	struct amd_iommu *iommu = get_amd_iommu(idx);
3609 
3610 	if (iommu)
3611 		return iommu->max_banks;
3612 
3613 	return 0;
3614 }
3615 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3616 
3617 bool amd_iommu_pc_supported(void)
3618 {
3619 	return amd_iommu_pc_present;
3620 }
3621 EXPORT_SYMBOL(amd_iommu_pc_supported);
3622 
3623 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3624 {
3625 	struct amd_iommu *iommu = get_amd_iommu(idx);
3626 
3627 	if (iommu)
3628 		return iommu->max_counters;
3629 
3630 	return 0;
3631 }
3632 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3633 
3634 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3635 				u8 fxn, u64 *value, bool is_write)
3636 {
3637 	u32 offset;
3638 	u32 max_offset_lim;
3639 
3640 	/* Make sure the IOMMU PC resource is available */
3641 	if (!amd_iommu_pc_present)
3642 		return -ENODEV;
3643 
3644 	/* Check for valid iommu and pc register indexing */
3645 	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3646 		return -ENODEV;
3647 
3648 	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3649 
3650 	/* Limit the offset to the hw defined mmio region aperture */
3651 	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3652 				(iommu->max_counters << 8) | 0x28);
3653 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3654 	    (offset > max_offset_lim))
3655 		return -EINVAL;
3656 
3657 	if (is_write) {
3658 		u64 val = *value & GENMASK_ULL(47, 0);
3659 
3660 		writel((u32)val, iommu->mmio_base + offset);
3661 		writel((val >> 32), iommu->mmio_base + offset + 4);
3662 	} else {
3663 		*value = readl(iommu->mmio_base + offset + 4);
3664 		*value <<= 32;
3665 		*value |= readl(iommu->mmio_base + offset);
3666 		*value &= GENMASK_ULL(47, 0);
3667 	}
3668 
3669 	return 0;
3670 }
3671 
3672 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3673 {
3674 	if (!iommu)
3675 		return -EINVAL;
3676 
3677 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3678 }
3679 
3680 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3681 {
3682 	if (!iommu)
3683 		return -EINVAL;
3684 
3685 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3686 }
3687 
3688 #ifdef CONFIG_AMD_MEM_ENCRYPT
3689 int amd_iommu_snp_enable(void)
3690 {
3691 	/*
3692 	 * The SNP support requires that IOMMU must be enabled, and is
3693 	 * not configured in the passthrough mode.
3694 	 */
3695 	if (no_iommu || iommu_default_passthrough()) {
3696 		pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
3697 		return -EINVAL;
3698 	}
3699 
3700 	/*
3701 	 * Prevent enabling SNP after IOMMU_ENABLED state because this process
3702 	 * affect how IOMMU driver sets up data structures and configures
3703 	 * IOMMU hardware.
3704 	 */
3705 	if (init_state > IOMMU_ENABLED) {
3706 		pr_err("SNP: Too late to enable SNP for IOMMU.\n");
3707 		return -EINVAL;
3708 	}
3709 
3710 	amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
3711 	if (!amd_iommu_snp_en)
3712 		return -EINVAL;
3713 
3714 	pr_info("SNP enabled\n");
3715 
3716 	/* Enforce IOMMU v1 pagetable when SNP is enabled. */
3717 	if (amd_iommu_pgtable != AMD_IOMMU_V1) {
3718 		pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
3719 		amd_iommu_pgtable = AMD_IOMMU_V1;
3720 	}
3721 
3722 	return 0;
3723 }
3724 #endif
3725