xref: /openbmc/linux/drivers/iommu/amd/init.c (revision 47edc84f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  *         Leo Duran <leo.duran@amd.com>
6  */
7 
8 #define pr_fmt(fmt)     "AMD-Vi: " fmt
9 #define dev_fmt(fmt)    pr_fmt(fmt)
10 
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/delay.h>
16 #include <linux/slab.h>
17 #include <linux/syscore_ops.h>
18 #include <linux/interrupt.h>
19 #include <linux/msi.h>
20 #include <linux/irq.h>
21 #include <linux/amd-iommu.h>
22 #include <linux/export.h>
23 #include <linux/kmemleak.h>
24 #include <linux/mem_encrypt.h>
25 #include <asm/pci-direct.h>
26 #include <asm/iommu.h>
27 #include <asm/apic.h>
28 #include <asm/gart.h>
29 #include <asm/x86_init.h>
30 #include <asm/iommu_table.h>
31 #include <asm/io_apic.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/set_memory.h>
34 
35 #include <linux/crash_dump.h>
36 
37 #include "amd_iommu.h"
38 #include "../irq_remapping.h"
39 
40 /*
41  * definitions for the ACPI scanning code
42  */
43 #define IVRS_HEADER_LENGTH 48
44 
45 #define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
46 #define ACPI_IVMD_TYPE_ALL              0x20
47 #define ACPI_IVMD_TYPE                  0x21
48 #define ACPI_IVMD_TYPE_RANGE            0x22
49 
50 #define IVHD_DEV_ALL                    0x01
51 #define IVHD_DEV_SELECT                 0x02
52 #define IVHD_DEV_SELECT_RANGE_START     0x03
53 #define IVHD_DEV_RANGE_END              0x04
54 #define IVHD_DEV_ALIAS                  0x42
55 #define IVHD_DEV_ALIAS_RANGE            0x43
56 #define IVHD_DEV_EXT_SELECT             0x46
57 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
58 #define IVHD_DEV_SPECIAL		0x48
59 #define IVHD_DEV_ACPI_HID		0xf0
60 
61 #define UID_NOT_PRESENT                 0
62 #define UID_IS_INTEGER                  1
63 #define UID_IS_CHARACTER                2
64 
65 #define IVHD_SPECIAL_IOAPIC		1
66 #define IVHD_SPECIAL_HPET		2
67 
68 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
69 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
70 #define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
71 #define IVHD_FLAG_ISOC_EN_MASK          0x08
72 
73 #define IVMD_FLAG_EXCL_RANGE            0x08
74 #define IVMD_FLAG_IW                    0x04
75 #define IVMD_FLAG_IR                    0x02
76 #define IVMD_FLAG_UNITY_MAP             0x01
77 
78 #define ACPI_DEVFLAG_INITPASS           0x01
79 #define ACPI_DEVFLAG_EXTINT             0x02
80 #define ACPI_DEVFLAG_NMI                0x04
81 #define ACPI_DEVFLAG_SYSMGT1            0x10
82 #define ACPI_DEVFLAG_SYSMGT2            0x20
83 #define ACPI_DEVFLAG_LINT0              0x40
84 #define ACPI_DEVFLAG_LINT1              0x80
85 #define ACPI_DEVFLAG_ATSDIS             0x10000000
86 
87 #define LOOP_TIMEOUT	100000
88 /*
89  * ACPI table definitions
90  *
91  * These data structures are laid over the table to parse the important values
92  * out of it.
93  */
94 
95 extern const struct iommu_ops amd_iommu_ops;
96 
97 /*
98  * structure describing one IOMMU in the ACPI table. Typically followed by one
99  * or more ivhd_entrys.
100  */
101 struct ivhd_header {
102 	u8 type;
103 	u8 flags;
104 	u16 length;
105 	u16 devid;
106 	u16 cap_ptr;
107 	u64 mmio_phys;
108 	u16 pci_seg;
109 	u16 info;
110 	u32 efr_attr;
111 
112 	/* Following only valid on IVHD type 11h and 40h */
113 	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
114 	u64 res;
115 } __attribute__((packed));
116 
117 /*
118  * A device entry describing which devices a specific IOMMU translates and
119  * which requestor ids they use.
120  */
121 struct ivhd_entry {
122 	u8 type;
123 	u16 devid;
124 	u8 flags;
125 	u32 ext;
126 	u32 hidh;
127 	u64 cid;
128 	u8 uidf;
129 	u8 uidl;
130 	u8 uid;
131 } __attribute__((packed));
132 
133 /*
134  * An AMD IOMMU memory definition structure. It defines things like exclusion
135  * ranges for devices and regions that should be unity mapped.
136  */
137 struct ivmd_header {
138 	u8 type;
139 	u8 flags;
140 	u16 length;
141 	u16 devid;
142 	u16 aux;
143 	u64 resv;
144 	u64 range_start;
145 	u64 range_length;
146 } __attribute__((packed));
147 
148 bool amd_iommu_dump;
149 bool amd_iommu_irq_remap __read_mostly;
150 
151 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
152 
153 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
154 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
155 
156 static bool amd_iommu_detected;
157 static bool __initdata amd_iommu_disabled;
158 static int amd_iommu_target_ivhd_type;
159 
160 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
161 					   to handle */
162 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
163 					   we find in ACPI */
164 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
165 
166 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
167 					   system */
168 
169 /* Array to assign indices to IOMMUs*/
170 struct amd_iommu *amd_iommus[MAX_IOMMUS];
171 
172 /* Number of IOMMUs present in the system */
173 static int amd_iommus_present;
174 
175 /* IOMMUs have a non-present cache? */
176 bool amd_iommu_np_cache __read_mostly;
177 bool amd_iommu_iotlb_sup __read_mostly = true;
178 
179 u32 amd_iommu_max_pasid __read_mostly = ~0;
180 
181 bool amd_iommu_v2_present __read_mostly;
182 static bool amd_iommu_pc_present __read_mostly;
183 
184 bool amd_iommu_force_isolation __read_mostly;
185 
186 /*
187  * Pointer to the device table which is shared by all AMD IOMMUs
188  * it is indexed by the PCI device id or the HT unit id and contains
189  * information about the domain the device belongs to as well as the
190  * page table root pointer.
191  */
192 struct dev_table_entry *amd_iommu_dev_table;
193 /*
194  * Pointer to a device table which the content of old device table
195  * will be copied to. It's only be used in kdump kernel.
196  */
197 static struct dev_table_entry *old_dev_tbl_cpy;
198 
199 /*
200  * The alias table is a driver specific data structure which contains the
201  * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
202  * More than one device can share the same requestor id.
203  */
204 u16 *amd_iommu_alias_table;
205 
206 /*
207  * The rlookup table is used to find the IOMMU which is responsible
208  * for a specific device. It is also indexed by the PCI device id.
209  */
210 struct amd_iommu **amd_iommu_rlookup_table;
211 EXPORT_SYMBOL(amd_iommu_rlookup_table);
212 
213 /*
214  * This table is used to find the irq remapping table for a given device id
215  * quickly.
216  */
217 struct irq_remap_table **irq_lookup_table;
218 
219 /*
220  * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
221  * to know which ones are already in use.
222  */
223 unsigned long *amd_iommu_pd_alloc_bitmap;
224 
225 static u32 dev_table_size;	/* size of the device table */
226 static u32 alias_table_size;	/* size of the alias table */
227 static u32 rlookup_table_size;	/* size if the rlookup table */
228 
229 enum iommu_init_state {
230 	IOMMU_START_STATE,
231 	IOMMU_IVRS_DETECTED,
232 	IOMMU_ACPI_FINISHED,
233 	IOMMU_ENABLED,
234 	IOMMU_PCI_INIT,
235 	IOMMU_INTERRUPTS_EN,
236 	IOMMU_DMA_OPS,
237 	IOMMU_INITIALIZED,
238 	IOMMU_NOT_FOUND,
239 	IOMMU_INIT_ERROR,
240 	IOMMU_CMDLINE_DISABLED,
241 };
242 
243 /* Early ioapic and hpet maps from kernel command line */
244 #define EARLY_MAP_SIZE		4
245 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
246 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
247 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
248 
249 static int __initdata early_ioapic_map_size;
250 static int __initdata early_hpet_map_size;
251 static int __initdata early_acpihid_map_size;
252 
253 static bool __initdata cmdline_maps;
254 
255 static enum iommu_init_state init_state = IOMMU_START_STATE;
256 
257 static int amd_iommu_enable_interrupts(void);
258 static int __init iommu_go_to_state(enum iommu_init_state state);
259 static void init_device_table_dma(void);
260 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
261 				u8 fxn, u64 *value, bool is_write);
262 
263 static bool amd_iommu_pre_enabled = true;
264 
265 static u32 amd_iommu_ivinfo __initdata;
266 
267 bool translation_pre_enabled(struct amd_iommu *iommu)
268 {
269 	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
270 }
271 EXPORT_SYMBOL(translation_pre_enabled);
272 
273 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
274 {
275 	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
276 }
277 
278 static void init_translation_status(struct amd_iommu *iommu)
279 {
280 	u64 ctrl;
281 
282 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
283 	if (ctrl & (1<<CONTROL_IOMMU_EN))
284 		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
285 }
286 
287 static inline void update_last_devid(u16 devid)
288 {
289 	if (devid > amd_iommu_last_bdf)
290 		amd_iommu_last_bdf = devid;
291 }
292 
293 static inline unsigned long tbl_size(int entry_size)
294 {
295 	unsigned shift = PAGE_SHIFT +
296 			 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
297 
298 	return 1UL << shift;
299 }
300 
301 int amd_iommu_get_num_iommus(void)
302 {
303 	return amd_iommus_present;
304 }
305 
306 /*
307  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
308  * Default to IVHD EFR since it is available sooner
309  * (i.e. before PCI init).
310  */
311 static void __init early_iommu_features_init(struct amd_iommu *iommu,
312 					     struct ivhd_header *h)
313 {
314 	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP)
315 		iommu->features = h->efr_reg;
316 }
317 
318 /* Access to l1 and l2 indexed register spaces */
319 
320 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
321 {
322 	u32 val;
323 
324 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
325 	pci_read_config_dword(iommu->dev, 0xfc, &val);
326 	return val;
327 }
328 
329 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
330 {
331 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
332 	pci_write_config_dword(iommu->dev, 0xfc, val);
333 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
334 }
335 
336 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
337 {
338 	u32 val;
339 
340 	pci_write_config_dword(iommu->dev, 0xf0, address);
341 	pci_read_config_dword(iommu->dev, 0xf4, &val);
342 	return val;
343 }
344 
345 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
346 {
347 	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
348 	pci_write_config_dword(iommu->dev, 0xf4, val);
349 }
350 
351 /****************************************************************************
352  *
353  * AMD IOMMU MMIO register space handling functions
354  *
355  * These functions are used to program the IOMMU device registers in
356  * MMIO space required for that driver.
357  *
358  ****************************************************************************/
359 
360 /*
361  * This function set the exclusion range in the IOMMU. DMA accesses to the
362  * exclusion range are passed through untranslated
363  */
364 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
365 {
366 	u64 start = iommu->exclusion_start & PAGE_MASK;
367 	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
368 	u64 entry;
369 
370 	if (!iommu->exclusion_start)
371 		return;
372 
373 	entry = start | MMIO_EXCL_ENABLE_MASK;
374 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
375 			&entry, sizeof(entry));
376 
377 	entry = limit;
378 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
379 			&entry, sizeof(entry));
380 }
381 
382 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
383 {
384 	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
385 	u64 entry = start & PM_ADDR_MASK;
386 
387 	if (!iommu_feature(iommu, FEATURE_SNP))
388 		return;
389 
390 	/* Note:
391 	 * Re-purpose Exclusion base/limit registers for Completion wait
392 	 * write-back base/limit.
393 	 */
394 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
395 		    &entry, sizeof(entry));
396 
397 	/* Note:
398 	 * Default to 4 Kbytes, which can be specified by setting base
399 	 * address equal to the limit address.
400 	 */
401 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
402 		    &entry, sizeof(entry));
403 }
404 
405 /* Programs the physical address of the device table into the IOMMU hardware */
406 static void iommu_set_device_table(struct amd_iommu *iommu)
407 {
408 	u64 entry;
409 
410 	BUG_ON(iommu->mmio_base == NULL);
411 
412 	entry = iommu_virt_to_phys(amd_iommu_dev_table);
413 	entry |= (dev_table_size >> 12) - 1;
414 	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
415 			&entry, sizeof(entry));
416 }
417 
418 /* Generic functions to enable/disable certain features of the IOMMU. */
419 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
420 {
421 	u64 ctrl;
422 
423 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
424 	ctrl |= (1ULL << bit);
425 	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
426 }
427 
428 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
429 {
430 	u64 ctrl;
431 
432 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
433 	ctrl &= ~(1ULL << bit);
434 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
435 }
436 
437 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
438 {
439 	u64 ctrl;
440 
441 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
442 	ctrl &= ~CTRL_INV_TO_MASK;
443 	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
444 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
445 }
446 
447 /* Function to enable the hardware */
448 static void iommu_enable(struct amd_iommu *iommu)
449 {
450 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
451 }
452 
453 static void iommu_disable(struct amd_iommu *iommu)
454 {
455 	if (!iommu->mmio_base)
456 		return;
457 
458 	/* Disable command buffer */
459 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
460 
461 	/* Disable event logging and event interrupts */
462 	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
463 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
464 
465 	/* Disable IOMMU GA_LOG */
466 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
467 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
468 
469 	/* Disable IOMMU hardware itself */
470 	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
471 }
472 
473 /*
474  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
475  * the system has one.
476  */
477 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
478 {
479 	if (!request_mem_region(address, end, "amd_iommu")) {
480 		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
481 			address, end);
482 		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
483 		return NULL;
484 	}
485 
486 	return (u8 __iomem *)ioremap(address, end);
487 }
488 
489 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
490 {
491 	if (iommu->mmio_base)
492 		iounmap(iommu->mmio_base);
493 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
494 }
495 
496 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
497 {
498 	u32 size = 0;
499 
500 	switch (h->type) {
501 	case 0x10:
502 		size = 24;
503 		break;
504 	case 0x11:
505 	case 0x40:
506 		size = 40;
507 		break;
508 	}
509 	return size;
510 }
511 
512 /****************************************************************************
513  *
514  * The functions below belong to the first pass of AMD IOMMU ACPI table
515  * parsing. In this pass we try to find out the highest device id this
516  * code has to handle. Upon this information the size of the shared data
517  * structures is determined later.
518  *
519  ****************************************************************************/
520 
521 /*
522  * This function calculates the length of a given IVHD entry
523  */
524 static inline int ivhd_entry_length(u8 *ivhd)
525 {
526 	u32 type = ((struct ivhd_entry *)ivhd)->type;
527 
528 	if (type < 0x80) {
529 		return 0x04 << (*ivhd >> 6);
530 	} else if (type == IVHD_DEV_ACPI_HID) {
531 		/* For ACPI_HID, offset 21 is uid len */
532 		return *((u8 *)ivhd + 21) + 22;
533 	}
534 	return 0;
535 }
536 
537 /*
538  * After reading the highest device id from the IOMMU PCI capability header
539  * this function looks if there is a higher device id defined in the ACPI table
540  */
541 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
542 {
543 	u8 *p = (void *)h, *end = (void *)h;
544 	struct ivhd_entry *dev;
545 
546 	u32 ivhd_size = get_ivhd_header_size(h);
547 
548 	if (!ivhd_size) {
549 		pr_err("Unsupported IVHD type %#x\n", h->type);
550 		return -EINVAL;
551 	}
552 
553 	p += ivhd_size;
554 	end += h->length;
555 
556 	while (p < end) {
557 		dev = (struct ivhd_entry *)p;
558 		switch (dev->type) {
559 		case IVHD_DEV_ALL:
560 			/* Use maximum BDF value for DEV_ALL */
561 			update_last_devid(0xffff);
562 			break;
563 		case IVHD_DEV_SELECT:
564 		case IVHD_DEV_RANGE_END:
565 		case IVHD_DEV_ALIAS:
566 		case IVHD_DEV_EXT_SELECT:
567 			/* all the above subfield types refer to device ids */
568 			update_last_devid(dev->devid);
569 			break;
570 		default:
571 			break;
572 		}
573 		p += ivhd_entry_length(p);
574 	}
575 
576 	WARN_ON(p != end);
577 
578 	return 0;
579 }
580 
581 static int __init check_ivrs_checksum(struct acpi_table_header *table)
582 {
583 	int i;
584 	u8 checksum = 0, *p = (u8 *)table;
585 
586 	for (i = 0; i < table->length; ++i)
587 		checksum += p[i];
588 	if (checksum != 0) {
589 		/* ACPI table corrupt */
590 		pr_err(FW_BUG "IVRS invalid checksum\n");
591 		return -ENODEV;
592 	}
593 
594 	return 0;
595 }
596 
597 /*
598  * Iterate over all IVHD entries in the ACPI table and find the highest device
599  * id which we need to handle. This is the first of three functions which parse
600  * the ACPI table. So we check the checksum here.
601  */
602 static int __init find_last_devid_acpi(struct acpi_table_header *table)
603 {
604 	u8 *p = (u8 *)table, *end = (u8 *)table;
605 	struct ivhd_header *h;
606 
607 	p += IVRS_HEADER_LENGTH;
608 
609 	end += table->length;
610 	while (p < end) {
611 		h = (struct ivhd_header *)p;
612 		if (h->type == amd_iommu_target_ivhd_type) {
613 			int ret = find_last_devid_from_ivhd(h);
614 
615 			if (ret)
616 				return ret;
617 		}
618 		p += h->length;
619 	}
620 	WARN_ON(p != end);
621 
622 	return 0;
623 }
624 
625 /****************************************************************************
626  *
627  * The following functions belong to the code path which parses the ACPI table
628  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
629  * data structures, initialize the device/alias/rlookup table and also
630  * basically initialize the hardware.
631  *
632  ****************************************************************************/
633 
634 /*
635  * Allocates the command buffer. This buffer is per AMD IOMMU. We can
636  * write commands to that buffer later and the IOMMU will execute them
637  * asynchronously
638  */
639 static int __init alloc_command_buffer(struct amd_iommu *iommu)
640 {
641 	iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
642 						  get_order(CMD_BUFFER_SIZE));
643 
644 	return iommu->cmd_buf ? 0 : -ENOMEM;
645 }
646 
647 /*
648  * This function resets the command buffer if the IOMMU stopped fetching
649  * commands from it.
650  */
651 void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
652 {
653 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
654 
655 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
656 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
657 	iommu->cmd_buf_head = 0;
658 	iommu->cmd_buf_tail = 0;
659 
660 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
661 }
662 
663 /*
664  * This function writes the command buffer address to the hardware and
665  * enables it.
666  */
667 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
668 {
669 	u64 entry;
670 
671 	BUG_ON(iommu->cmd_buf == NULL);
672 
673 	entry = iommu_virt_to_phys(iommu->cmd_buf);
674 	entry |= MMIO_CMD_SIZE_512;
675 
676 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
677 		    &entry, sizeof(entry));
678 
679 	amd_iommu_reset_cmd_buffer(iommu);
680 }
681 
682 /*
683  * This function disables the command buffer
684  */
685 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
686 {
687 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
688 }
689 
690 static void __init free_command_buffer(struct amd_iommu *iommu)
691 {
692 	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
693 }
694 
695 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
696 					 gfp_t gfp, size_t size)
697 {
698 	int order = get_order(size);
699 	void *buf = (void *)__get_free_pages(gfp, order);
700 
701 	if (buf &&
702 	    iommu_feature(iommu, FEATURE_SNP) &&
703 	    set_memory_4k((unsigned long)buf, (1 << order))) {
704 		free_pages((unsigned long)buf, order);
705 		buf = NULL;
706 	}
707 
708 	return buf;
709 }
710 
711 /* allocates the memory where the IOMMU will log its events to */
712 static int __init alloc_event_buffer(struct amd_iommu *iommu)
713 {
714 	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
715 					      EVT_BUFFER_SIZE);
716 
717 	return iommu->evt_buf ? 0 : -ENOMEM;
718 }
719 
720 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
721 {
722 	u64 entry;
723 
724 	BUG_ON(iommu->evt_buf == NULL);
725 
726 	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
727 
728 	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
729 		    &entry, sizeof(entry));
730 
731 	/* set head and tail to zero manually */
732 	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
733 	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
734 
735 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
736 }
737 
738 /*
739  * This function disables the event log buffer
740  */
741 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
742 {
743 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
744 }
745 
746 static void __init free_event_buffer(struct amd_iommu *iommu)
747 {
748 	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
749 }
750 
751 /* allocates the memory where the IOMMU will log its events to */
752 static int __init alloc_ppr_log(struct amd_iommu *iommu)
753 {
754 	iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
755 					      PPR_LOG_SIZE);
756 
757 	return iommu->ppr_log ? 0 : -ENOMEM;
758 }
759 
760 static void iommu_enable_ppr_log(struct amd_iommu *iommu)
761 {
762 	u64 entry;
763 
764 	if (iommu->ppr_log == NULL)
765 		return;
766 
767 	entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
768 
769 	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
770 		    &entry, sizeof(entry));
771 
772 	/* set head and tail to zero manually */
773 	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
774 	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
775 
776 	iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
777 	iommu_feature_enable(iommu, CONTROL_PPR_EN);
778 }
779 
780 static void __init free_ppr_log(struct amd_iommu *iommu)
781 {
782 	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
783 }
784 
785 static void free_ga_log(struct amd_iommu *iommu)
786 {
787 #ifdef CONFIG_IRQ_REMAP
788 	free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
789 	free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
790 #endif
791 }
792 
793 static int iommu_ga_log_enable(struct amd_iommu *iommu)
794 {
795 #ifdef CONFIG_IRQ_REMAP
796 	u32 status, i;
797 
798 	if (!iommu->ga_log)
799 		return -EINVAL;
800 
801 	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
802 
803 	/* Check if already running */
804 	if (status & (MMIO_STATUS_GALOG_RUN_MASK))
805 		return 0;
806 
807 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
808 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
809 
810 	for (i = 0; i < LOOP_TIMEOUT; ++i) {
811 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
812 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
813 			break;
814 	}
815 
816 	if (i >= LOOP_TIMEOUT)
817 		return -EINVAL;
818 #endif /* CONFIG_IRQ_REMAP */
819 	return 0;
820 }
821 
822 #ifdef CONFIG_IRQ_REMAP
823 static int iommu_init_ga_log(struct amd_iommu *iommu)
824 {
825 	u64 entry;
826 
827 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
828 		return 0;
829 
830 	iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
831 					get_order(GA_LOG_SIZE));
832 	if (!iommu->ga_log)
833 		goto err_out;
834 
835 	iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
836 					get_order(8));
837 	if (!iommu->ga_log_tail)
838 		goto err_out;
839 
840 	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
841 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
842 		    &entry, sizeof(entry));
843 	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
844 		 (BIT_ULL(52)-1)) & ~7ULL;
845 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
846 		    &entry, sizeof(entry));
847 	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
848 	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
849 
850 	return 0;
851 err_out:
852 	free_ga_log(iommu);
853 	return -EINVAL;
854 }
855 #endif /* CONFIG_IRQ_REMAP */
856 
857 static int iommu_init_ga(struct amd_iommu *iommu)
858 {
859 	int ret = 0;
860 
861 #ifdef CONFIG_IRQ_REMAP
862 	/* Note: We have already checked GASup from IVRS table.
863 	 *       Now, we need to make sure that GAMSup is set.
864 	 */
865 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
866 	    !iommu_feature(iommu, FEATURE_GAM_VAPIC))
867 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
868 
869 	ret = iommu_init_ga_log(iommu);
870 #endif /* CONFIG_IRQ_REMAP */
871 
872 	return ret;
873 }
874 
875 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
876 {
877 	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
878 
879 	return iommu->cmd_sem ? 0 : -ENOMEM;
880 }
881 
882 static void __init free_cwwb_sem(struct amd_iommu *iommu)
883 {
884 	if (iommu->cmd_sem)
885 		free_page((unsigned long)iommu->cmd_sem);
886 }
887 
888 static void iommu_enable_xt(struct amd_iommu *iommu)
889 {
890 #ifdef CONFIG_IRQ_REMAP
891 	/*
892 	 * XT mode (32-bit APIC destination ID) requires
893 	 * GA mode (128-bit IRTE support) as a prerequisite.
894 	 */
895 	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
896 	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
897 		iommu_feature_enable(iommu, CONTROL_XT_EN);
898 #endif /* CONFIG_IRQ_REMAP */
899 }
900 
901 static void iommu_enable_gt(struct amd_iommu *iommu)
902 {
903 	if (!iommu_feature(iommu, FEATURE_GT))
904 		return;
905 
906 	iommu_feature_enable(iommu, CONTROL_GT_EN);
907 }
908 
909 /* sets a specific bit in the device table entry. */
910 static void set_dev_entry_bit(u16 devid, u8 bit)
911 {
912 	int i = (bit >> 6) & 0x03;
913 	int _bit = bit & 0x3f;
914 
915 	amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
916 }
917 
918 static int get_dev_entry_bit(u16 devid, u8 bit)
919 {
920 	int i = (bit >> 6) & 0x03;
921 	int _bit = bit & 0x3f;
922 
923 	return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
924 }
925 
926 
927 static bool copy_device_table(void)
928 {
929 	u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
930 	struct dev_table_entry *old_devtb = NULL;
931 	u32 lo, hi, devid, old_devtb_size;
932 	phys_addr_t old_devtb_phys;
933 	struct amd_iommu *iommu;
934 	u16 dom_id, dte_v, irq_v;
935 	gfp_t gfp_flag;
936 	u64 tmp;
937 
938 	if (!amd_iommu_pre_enabled)
939 		return false;
940 
941 	pr_warn("Translation is already enabled - trying to copy translation structures\n");
942 	for_each_iommu(iommu) {
943 		/* All IOMMUs should use the same device table with the same size */
944 		lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
945 		hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
946 		entry = (((u64) hi) << 32) + lo;
947 		if (last_entry && last_entry != entry) {
948 			pr_err("IOMMU:%d should use the same dev table as others!\n",
949 				iommu->index);
950 			return false;
951 		}
952 		last_entry = entry;
953 
954 		old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
955 		if (old_devtb_size != dev_table_size) {
956 			pr_err("The device table size of IOMMU:%d is not expected!\n",
957 				iommu->index);
958 			return false;
959 		}
960 	}
961 
962 	/*
963 	 * When SME is enabled in the first kernel, the entry includes the
964 	 * memory encryption mask(sme_me_mask), we must remove the memory
965 	 * encryption mask to obtain the true physical address in kdump kernel.
966 	 */
967 	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
968 
969 	if (old_devtb_phys >= 0x100000000ULL) {
970 		pr_err("The address of old device table is above 4G, not trustworthy!\n");
971 		return false;
972 	}
973 	old_devtb = (sme_active() && is_kdump_kernel())
974 		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
975 							dev_table_size)
976 		    : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
977 
978 	if (!old_devtb)
979 		return false;
980 
981 	gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
982 	old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
983 				get_order(dev_table_size));
984 	if (old_dev_tbl_cpy == NULL) {
985 		pr_err("Failed to allocate memory for copying old device table!\n");
986 		return false;
987 	}
988 
989 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
990 		old_dev_tbl_cpy[devid] = old_devtb[devid];
991 		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
992 		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
993 
994 		if (dte_v && dom_id) {
995 			old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
996 			old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
997 			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
998 			/* If gcr3 table existed, mask it out */
999 			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1000 				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1001 				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1002 				old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1003 				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1004 				tmp |= DTE_FLAG_GV;
1005 				old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1006 			}
1007 		}
1008 
1009 		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1010 		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1011 		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1012 		if (irq_v && (int_ctl || int_tab_len)) {
1013 			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1014 			    (int_tab_len != DTE_INTTABLEN)) {
1015 				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1016 				return false;
1017 			}
1018 
1019 		        old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1020 		}
1021 	}
1022 	memunmap(old_devtb);
1023 
1024 	return true;
1025 }
1026 
1027 void amd_iommu_apply_erratum_63(u16 devid)
1028 {
1029 	int sysmgt;
1030 
1031 	sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
1032 		 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
1033 
1034 	if (sysmgt == 0x01)
1035 		set_dev_entry_bit(devid, DEV_ENTRY_IW);
1036 }
1037 
1038 /* Writes the specific IOMMU for a device into the rlookup table */
1039 static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
1040 {
1041 	amd_iommu_rlookup_table[devid] = iommu;
1042 }
1043 
1044 /*
1045  * This function takes the device specific flags read from the ACPI
1046  * table and sets up the device table entry with that information
1047  */
1048 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1049 					   u16 devid, u32 flags, u32 ext_flags)
1050 {
1051 	if (flags & ACPI_DEVFLAG_INITPASS)
1052 		set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
1053 	if (flags & ACPI_DEVFLAG_EXTINT)
1054 		set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
1055 	if (flags & ACPI_DEVFLAG_NMI)
1056 		set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
1057 	if (flags & ACPI_DEVFLAG_SYSMGT1)
1058 		set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
1059 	if (flags & ACPI_DEVFLAG_SYSMGT2)
1060 		set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
1061 	if (flags & ACPI_DEVFLAG_LINT0)
1062 		set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
1063 	if (flags & ACPI_DEVFLAG_LINT1)
1064 		set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
1065 
1066 	amd_iommu_apply_erratum_63(devid);
1067 
1068 	set_iommu_for_device(iommu, devid);
1069 }
1070 
1071 int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
1072 {
1073 	struct devid_map *entry;
1074 	struct list_head *list;
1075 
1076 	if (type == IVHD_SPECIAL_IOAPIC)
1077 		list = &ioapic_map;
1078 	else if (type == IVHD_SPECIAL_HPET)
1079 		list = &hpet_map;
1080 	else
1081 		return -EINVAL;
1082 
1083 	list_for_each_entry(entry, list, list) {
1084 		if (!(entry->id == id && entry->cmd_line))
1085 			continue;
1086 
1087 		pr_info("Command-line override present for %s id %d - ignoring\n",
1088 			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1089 
1090 		*devid = entry->devid;
1091 
1092 		return 0;
1093 	}
1094 
1095 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1096 	if (!entry)
1097 		return -ENOMEM;
1098 
1099 	entry->id	= id;
1100 	entry->devid	= *devid;
1101 	entry->cmd_line	= cmd_line;
1102 
1103 	list_add_tail(&entry->list, list);
1104 
1105 	return 0;
1106 }
1107 
1108 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1109 				      bool cmd_line)
1110 {
1111 	struct acpihid_map_entry *entry;
1112 	struct list_head *list = &acpihid_map;
1113 
1114 	list_for_each_entry(entry, list, list) {
1115 		if (strcmp(entry->hid, hid) ||
1116 		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1117 		    !entry->cmd_line)
1118 			continue;
1119 
1120 		pr_info("Command-line override for hid:%s uid:%s\n",
1121 			hid, uid);
1122 		*devid = entry->devid;
1123 		return 0;
1124 	}
1125 
1126 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1127 	if (!entry)
1128 		return -ENOMEM;
1129 
1130 	memcpy(entry->uid, uid, strlen(uid));
1131 	memcpy(entry->hid, hid, strlen(hid));
1132 	entry->devid = *devid;
1133 	entry->cmd_line	= cmd_line;
1134 	entry->root_devid = (entry->devid & (~0x7));
1135 
1136 	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1137 		entry->cmd_line ? "cmd" : "ivrs",
1138 		entry->hid, entry->uid, entry->root_devid);
1139 
1140 	list_add_tail(&entry->list, list);
1141 	return 0;
1142 }
1143 
1144 static int __init add_early_maps(void)
1145 {
1146 	int i, ret;
1147 
1148 	for (i = 0; i < early_ioapic_map_size; ++i) {
1149 		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1150 					 early_ioapic_map[i].id,
1151 					 &early_ioapic_map[i].devid,
1152 					 early_ioapic_map[i].cmd_line);
1153 		if (ret)
1154 			return ret;
1155 	}
1156 
1157 	for (i = 0; i < early_hpet_map_size; ++i) {
1158 		ret = add_special_device(IVHD_SPECIAL_HPET,
1159 					 early_hpet_map[i].id,
1160 					 &early_hpet_map[i].devid,
1161 					 early_hpet_map[i].cmd_line);
1162 		if (ret)
1163 			return ret;
1164 	}
1165 
1166 	for (i = 0; i < early_acpihid_map_size; ++i) {
1167 		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1168 					  early_acpihid_map[i].uid,
1169 					  &early_acpihid_map[i].devid,
1170 					  early_acpihid_map[i].cmd_line);
1171 		if (ret)
1172 			return ret;
1173 	}
1174 
1175 	return 0;
1176 }
1177 
1178 /*
1179  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1180  * initializes the hardware and our data structures with it.
1181  */
1182 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1183 					struct ivhd_header *h)
1184 {
1185 	u8 *p = (u8 *)h;
1186 	u8 *end = p, flags = 0;
1187 	u16 devid = 0, devid_start = 0, devid_to = 0;
1188 	u32 dev_i, ext_flags = 0;
1189 	bool alias = false;
1190 	struct ivhd_entry *e;
1191 	u32 ivhd_size;
1192 	int ret;
1193 
1194 
1195 	ret = add_early_maps();
1196 	if (ret)
1197 		return ret;
1198 
1199 	amd_iommu_apply_ivrs_quirks();
1200 
1201 	/*
1202 	 * First save the recommended feature enable bits from ACPI
1203 	 */
1204 	iommu->acpi_flags = h->flags;
1205 
1206 	/*
1207 	 * Done. Now parse the device entries
1208 	 */
1209 	ivhd_size = get_ivhd_header_size(h);
1210 	if (!ivhd_size) {
1211 		pr_err("Unsupported IVHD type %#x\n", h->type);
1212 		return -EINVAL;
1213 	}
1214 
1215 	p += ivhd_size;
1216 
1217 	end += h->length;
1218 
1219 
1220 	while (p < end) {
1221 		e = (struct ivhd_entry *)p;
1222 		switch (e->type) {
1223 		case IVHD_DEV_ALL:
1224 
1225 			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1226 
1227 			for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
1228 				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1229 			break;
1230 		case IVHD_DEV_SELECT:
1231 
1232 			DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
1233 				    "flags: %02x\n",
1234 				    PCI_BUS_NUM(e->devid),
1235 				    PCI_SLOT(e->devid),
1236 				    PCI_FUNC(e->devid),
1237 				    e->flags);
1238 
1239 			devid = e->devid;
1240 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1241 			break;
1242 		case IVHD_DEV_SELECT_RANGE_START:
1243 
1244 			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1245 				    "devid: %02x:%02x.%x flags: %02x\n",
1246 				    PCI_BUS_NUM(e->devid),
1247 				    PCI_SLOT(e->devid),
1248 				    PCI_FUNC(e->devid),
1249 				    e->flags);
1250 
1251 			devid_start = e->devid;
1252 			flags = e->flags;
1253 			ext_flags = 0;
1254 			alias = false;
1255 			break;
1256 		case IVHD_DEV_ALIAS:
1257 
1258 			DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
1259 				    "flags: %02x devid_to: %02x:%02x.%x\n",
1260 				    PCI_BUS_NUM(e->devid),
1261 				    PCI_SLOT(e->devid),
1262 				    PCI_FUNC(e->devid),
1263 				    e->flags,
1264 				    PCI_BUS_NUM(e->ext >> 8),
1265 				    PCI_SLOT(e->ext >> 8),
1266 				    PCI_FUNC(e->ext >> 8));
1267 
1268 			devid = e->devid;
1269 			devid_to = e->ext >> 8;
1270 			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1271 			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1272 			amd_iommu_alias_table[devid] = devid_to;
1273 			break;
1274 		case IVHD_DEV_ALIAS_RANGE:
1275 
1276 			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1277 				    "devid: %02x:%02x.%x flags: %02x "
1278 				    "devid_to: %02x:%02x.%x\n",
1279 				    PCI_BUS_NUM(e->devid),
1280 				    PCI_SLOT(e->devid),
1281 				    PCI_FUNC(e->devid),
1282 				    e->flags,
1283 				    PCI_BUS_NUM(e->ext >> 8),
1284 				    PCI_SLOT(e->ext >> 8),
1285 				    PCI_FUNC(e->ext >> 8));
1286 
1287 			devid_start = e->devid;
1288 			flags = e->flags;
1289 			devid_to = e->ext >> 8;
1290 			ext_flags = 0;
1291 			alias = true;
1292 			break;
1293 		case IVHD_DEV_EXT_SELECT:
1294 
1295 			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
1296 				    "flags: %02x ext: %08x\n",
1297 				    PCI_BUS_NUM(e->devid),
1298 				    PCI_SLOT(e->devid),
1299 				    PCI_FUNC(e->devid),
1300 				    e->flags, e->ext);
1301 
1302 			devid = e->devid;
1303 			set_dev_entry_from_acpi(iommu, devid, e->flags,
1304 						e->ext);
1305 			break;
1306 		case IVHD_DEV_EXT_SELECT_RANGE:
1307 
1308 			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1309 				    "%02x:%02x.%x flags: %02x ext: %08x\n",
1310 				    PCI_BUS_NUM(e->devid),
1311 				    PCI_SLOT(e->devid),
1312 				    PCI_FUNC(e->devid),
1313 				    e->flags, e->ext);
1314 
1315 			devid_start = e->devid;
1316 			flags = e->flags;
1317 			ext_flags = e->ext;
1318 			alias = false;
1319 			break;
1320 		case IVHD_DEV_RANGE_END:
1321 
1322 			DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
1323 				    PCI_BUS_NUM(e->devid),
1324 				    PCI_SLOT(e->devid),
1325 				    PCI_FUNC(e->devid));
1326 
1327 			devid = e->devid;
1328 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1329 				if (alias) {
1330 					amd_iommu_alias_table[dev_i] = devid_to;
1331 					set_dev_entry_from_acpi(iommu,
1332 						devid_to, flags, ext_flags);
1333 				}
1334 				set_dev_entry_from_acpi(iommu, dev_i,
1335 							flags, ext_flags);
1336 			}
1337 			break;
1338 		case IVHD_DEV_SPECIAL: {
1339 			u8 handle, type;
1340 			const char *var;
1341 			u16 devid;
1342 			int ret;
1343 
1344 			handle = e->ext & 0xff;
1345 			devid  = (e->ext >>  8) & 0xffff;
1346 			type   = (e->ext >> 24) & 0xff;
1347 
1348 			if (type == IVHD_SPECIAL_IOAPIC)
1349 				var = "IOAPIC";
1350 			else if (type == IVHD_SPECIAL_HPET)
1351 				var = "HPET";
1352 			else
1353 				var = "UNKNOWN";
1354 
1355 			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
1356 				    var, (int)handle,
1357 				    PCI_BUS_NUM(devid),
1358 				    PCI_SLOT(devid),
1359 				    PCI_FUNC(devid));
1360 
1361 			ret = add_special_device(type, handle, &devid, false);
1362 			if (ret)
1363 				return ret;
1364 
1365 			/*
1366 			 * add_special_device might update the devid in case a
1367 			 * command-line override is present. So call
1368 			 * set_dev_entry_from_acpi after add_special_device.
1369 			 */
1370 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1371 
1372 			break;
1373 		}
1374 		case IVHD_DEV_ACPI_HID: {
1375 			u16 devid;
1376 			u8 hid[ACPIHID_HID_LEN];
1377 			u8 uid[ACPIHID_UID_LEN];
1378 			int ret;
1379 
1380 			if (h->type != 0x40) {
1381 				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1382 				       e->type);
1383 				break;
1384 			}
1385 
1386 			memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
1387 			hid[ACPIHID_HID_LEN - 1] = '\0';
1388 
1389 			if (!(*hid)) {
1390 				pr_err(FW_BUG "Invalid HID.\n");
1391 				break;
1392 			}
1393 
1394 			uid[0] = '\0';
1395 			switch (e->uidf) {
1396 			case UID_NOT_PRESENT:
1397 
1398 				if (e->uidl != 0)
1399 					pr_warn(FW_BUG "Invalid UID length.\n");
1400 
1401 				break;
1402 			case UID_IS_INTEGER:
1403 
1404 				sprintf(uid, "%d", e->uid);
1405 
1406 				break;
1407 			case UID_IS_CHARACTER:
1408 
1409 				memcpy(uid, &e->uid, e->uidl);
1410 				uid[e->uidl] = '\0';
1411 
1412 				break;
1413 			default:
1414 				break;
1415 			}
1416 
1417 			devid = e->devid;
1418 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
1419 				    hid, uid,
1420 				    PCI_BUS_NUM(devid),
1421 				    PCI_SLOT(devid),
1422 				    PCI_FUNC(devid));
1423 
1424 			flags = e->flags;
1425 
1426 			ret = add_acpi_hid_device(hid, uid, &devid, false);
1427 			if (ret)
1428 				return ret;
1429 
1430 			/*
1431 			 * add_special_device might update the devid in case a
1432 			 * command-line override is present. So call
1433 			 * set_dev_entry_from_acpi after add_special_device.
1434 			 */
1435 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1436 
1437 			break;
1438 		}
1439 		default:
1440 			break;
1441 		}
1442 
1443 		p += ivhd_entry_length(p);
1444 	}
1445 
1446 	return 0;
1447 }
1448 
1449 static void __init free_iommu_one(struct amd_iommu *iommu)
1450 {
1451 	free_cwwb_sem(iommu);
1452 	free_command_buffer(iommu);
1453 	free_event_buffer(iommu);
1454 	free_ppr_log(iommu);
1455 	free_ga_log(iommu);
1456 	iommu_unmap_mmio_space(iommu);
1457 }
1458 
1459 static void __init free_iommu_all(void)
1460 {
1461 	struct amd_iommu *iommu, *next;
1462 
1463 	for_each_iommu_safe(iommu, next) {
1464 		list_del(&iommu->list);
1465 		free_iommu_one(iommu);
1466 		kfree(iommu);
1467 	}
1468 }
1469 
1470 /*
1471  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1472  * Workaround:
1473  *     BIOS should disable L2B micellaneous clock gating by setting
1474  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1475  */
1476 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1477 {
1478 	u32 value;
1479 
1480 	if ((boot_cpu_data.x86 != 0x15) ||
1481 	    (boot_cpu_data.x86_model < 0x10) ||
1482 	    (boot_cpu_data.x86_model > 0x1f))
1483 		return;
1484 
1485 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1486 	pci_read_config_dword(iommu->dev, 0xf4, &value);
1487 
1488 	if (value & BIT(2))
1489 		return;
1490 
1491 	/* Select NB indirect register 0x90 and enable writing */
1492 	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1493 
1494 	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1495 	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1496 
1497 	/* Clear the enable writing bit */
1498 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1499 }
1500 
1501 /*
1502  * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1503  * Workaround:
1504  *     BIOS should enable ATS write permission check by setting
1505  *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1506  */
1507 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1508 {
1509 	u32 value;
1510 
1511 	if ((boot_cpu_data.x86 != 0x15) ||
1512 	    (boot_cpu_data.x86_model < 0x30) ||
1513 	    (boot_cpu_data.x86_model > 0x3f))
1514 		return;
1515 
1516 	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1517 	value = iommu_read_l2(iommu, 0x47);
1518 
1519 	if (value & BIT(0))
1520 		return;
1521 
1522 	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1523 	iommu_write_l2(iommu, 0x47, value | BIT(0));
1524 
1525 	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1526 }
1527 
1528 /*
1529  * This function clues the initialization function for one IOMMU
1530  * together and also allocates the command buffer and programs the
1531  * hardware. It does NOT enable the IOMMU. This is done afterwards.
1532  */
1533 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1534 {
1535 	int ret;
1536 
1537 	raw_spin_lock_init(&iommu->lock);
1538 	iommu->cmd_sem_val = 0;
1539 
1540 	/* Add IOMMU to internal data structures */
1541 	list_add_tail(&iommu->list, &amd_iommu_list);
1542 	iommu->index = amd_iommus_present++;
1543 
1544 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1545 		WARN(1, "System has more IOMMUs than supported by this driver\n");
1546 		return -ENOSYS;
1547 	}
1548 
1549 	/* Index is fine - add IOMMU to the array */
1550 	amd_iommus[iommu->index] = iommu;
1551 
1552 	/*
1553 	 * Copy data from ACPI table entry to the iommu struct
1554 	 */
1555 	iommu->devid   = h->devid;
1556 	iommu->cap_ptr = h->cap_ptr;
1557 	iommu->pci_seg = h->pci_seg;
1558 	iommu->mmio_phys = h->mmio_phys;
1559 
1560 	switch (h->type) {
1561 	case 0x10:
1562 		/* Check if IVHD EFR contains proper max banks/counters */
1563 		if ((h->efr_attr != 0) &&
1564 		    ((h->efr_attr & (0xF << 13)) != 0) &&
1565 		    ((h->efr_attr & (0x3F << 17)) != 0))
1566 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1567 		else
1568 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1569 
1570 		/*
1571 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1572 		 * GAM also requires GA mode. Therefore, we need to
1573 		 * check cmpxchg16b support before enabling it.
1574 		 */
1575 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1576 		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1577 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1578 		break;
1579 	case 0x11:
1580 	case 0x40:
1581 		if (h->efr_reg & (1 << 9))
1582 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1583 		else
1584 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1585 
1586 		/*
1587 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1588 		 * XT, GAM also requires GA mode. Therefore, we need to
1589 		 * check cmpxchg16b support before enabling them.
1590 		 */
1591 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1592 		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1593 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1594 			break;
1595 		}
1596 
1597 		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1598 			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1599 
1600 		early_iommu_features_init(iommu, h);
1601 
1602 		break;
1603 	default:
1604 		return -EINVAL;
1605 	}
1606 
1607 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1608 						iommu->mmio_phys_end);
1609 	if (!iommu->mmio_base)
1610 		return -ENOMEM;
1611 
1612 	if (alloc_cwwb_sem(iommu))
1613 		return -ENOMEM;
1614 
1615 	if (alloc_command_buffer(iommu))
1616 		return -ENOMEM;
1617 
1618 	if (alloc_event_buffer(iommu))
1619 		return -ENOMEM;
1620 
1621 	iommu->int_enabled = false;
1622 
1623 	init_translation_status(iommu);
1624 	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1625 		iommu_disable(iommu);
1626 		clear_translation_pre_enabled(iommu);
1627 		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1628 			iommu->index);
1629 	}
1630 	if (amd_iommu_pre_enabled)
1631 		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1632 
1633 	ret = init_iommu_from_acpi(iommu, h);
1634 	if (ret)
1635 		return ret;
1636 
1637 	if (amd_iommu_irq_remap) {
1638 		ret = amd_iommu_create_irq_domain(iommu);
1639 		if (ret)
1640 			return ret;
1641 	}
1642 
1643 	/*
1644 	 * Make sure IOMMU is not considered to translate itself. The IVRS
1645 	 * table tells us so, but this is a lie!
1646 	 */
1647 	amd_iommu_rlookup_table[iommu->devid] = NULL;
1648 
1649 	return 0;
1650 }
1651 
1652 /**
1653  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1654  * @ivrs: Pointer to the IVRS header
1655  *
1656  * This function search through all IVDB of the maximum supported IVHD
1657  */
1658 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1659 {
1660 	u8 *base = (u8 *)ivrs;
1661 	struct ivhd_header *ivhd = (struct ivhd_header *)
1662 					(base + IVRS_HEADER_LENGTH);
1663 	u8 last_type = ivhd->type;
1664 	u16 devid = ivhd->devid;
1665 
1666 	while (((u8 *)ivhd - base < ivrs->length) &&
1667 	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1668 		u8 *p = (u8 *) ivhd;
1669 
1670 		if (ivhd->devid == devid)
1671 			last_type = ivhd->type;
1672 		ivhd = (struct ivhd_header *)(p + ivhd->length);
1673 	}
1674 
1675 	return last_type;
1676 }
1677 
1678 /*
1679  * Iterates over all IOMMU entries in the ACPI table, allocates the
1680  * IOMMU structure and initializes it with init_iommu_one()
1681  */
1682 static int __init init_iommu_all(struct acpi_table_header *table)
1683 {
1684 	u8 *p = (u8 *)table, *end = (u8 *)table;
1685 	struct ivhd_header *h;
1686 	struct amd_iommu *iommu;
1687 	int ret;
1688 
1689 	end += table->length;
1690 	p += IVRS_HEADER_LENGTH;
1691 
1692 	while (p < end) {
1693 		h = (struct ivhd_header *)p;
1694 		if (*p == amd_iommu_target_ivhd_type) {
1695 
1696 			DUMP_printk("device: %02x:%02x.%01x cap: %04x "
1697 				    "seg: %d flags: %01x info %04x\n",
1698 				    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
1699 				    PCI_FUNC(h->devid), h->cap_ptr,
1700 				    h->pci_seg, h->flags, h->info);
1701 			DUMP_printk("       mmio-addr: %016llx\n",
1702 				    h->mmio_phys);
1703 
1704 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1705 			if (iommu == NULL)
1706 				return -ENOMEM;
1707 
1708 			ret = init_iommu_one(iommu, h);
1709 			if (ret)
1710 				return ret;
1711 		}
1712 		p += h->length;
1713 
1714 	}
1715 	WARN_ON(p != end);
1716 
1717 	return 0;
1718 }
1719 
1720 static void __init init_iommu_perf_ctr(struct amd_iommu *iommu)
1721 {
1722 	int retry;
1723 	struct pci_dev *pdev = iommu->dev;
1724 	u64 val = 0xabcd, val2 = 0, save_reg, save_src;
1725 
1726 	if (!iommu_feature(iommu, FEATURE_PC))
1727 		return;
1728 
1729 	amd_iommu_pc_present = true;
1730 
1731 	/* save the value to restore, if writable */
1732 	if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) ||
1733 	    iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false))
1734 		goto pc_false;
1735 
1736 	/*
1737 	 * Disable power gating by programing the performance counter
1738 	 * source to 20 (i.e. counts the reads and writes from/to IOMMU
1739 	 * Reserved Register [MMIO Offset 1FF8h] that are ignored.),
1740 	 * which never get incremented during this init phase.
1741 	 * (Note: The event is also deprecated.)
1742 	 */
1743 	val = 20;
1744 	if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true))
1745 		goto pc_false;
1746 
1747 	/* Check if the performance counters can be written to */
1748 	val = 0xabcd;
1749 	for (retry = 5; retry; retry--) {
1750 		if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) ||
1751 		    iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) ||
1752 		    val2)
1753 			break;
1754 
1755 		/* Wait about 20 msec for power gating to disable and retry. */
1756 		msleep(20);
1757 	}
1758 
1759 	/* restore */
1760 	if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) ||
1761 	    iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true))
1762 		goto pc_false;
1763 
1764 	if (val != val2)
1765 		goto pc_false;
1766 
1767 	pci_info(pdev, "IOMMU performance counters supported\n");
1768 
1769 	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1770 	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1771 	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1772 
1773 	return;
1774 
1775 pc_false:
1776 	pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n");
1777 	amd_iommu_pc_present = false;
1778 	return;
1779 }
1780 
1781 static ssize_t amd_iommu_show_cap(struct device *dev,
1782 				  struct device_attribute *attr,
1783 				  char *buf)
1784 {
1785 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1786 	return sprintf(buf, "%x\n", iommu->cap);
1787 }
1788 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1789 
1790 static ssize_t amd_iommu_show_features(struct device *dev,
1791 				       struct device_attribute *attr,
1792 				       char *buf)
1793 {
1794 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1795 	return sprintf(buf, "%llx\n", iommu->features);
1796 }
1797 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1798 
1799 static struct attribute *amd_iommu_attrs[] = {
1800 	&dev_attr_cap.attr,
1801 	&dev_attr_features.attr,
1802 	NULL,
1803 };
1804 
1805 static struct attribute_group amd_iommu_group = {
1806 	.name = "amd-iommu",
1807 	.attrs = amd_iommu_attrs,
1808 };
1809 
1810 static const struct attribute_group *amd_iommu_groups[] = {
1811 	&amd_iommu_group,
1812 	NULL,
1813 };
1814 
1815 /*
1816  * Note: IVHD 0x11 and 0x40 also contains exact copy
1817  * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1818  * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1819  */
1820 static void __init late_iommu_features_init(struct amd_iommu *iommu)
1821 {
1822 	u64 features;
1823 
1824 	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
1825 		return;
1826 
1827 	/* read extended feature bits */
1828 	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1829 
1830 	if (!iommu->features) {
1831 		iommu->features = features;
1832 		return;
1833 	}
1834 
1835 	/*
1836 	 * Sanity check and warn if EFR values from
1837 	 * IVHD and MMIO conflict.
1838 	 */
1839 	if (features != iommu->features)
1840 		pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).",
1841 			features, iommu->features);
1842 }
1843 
1844 static int __init iommu_init_pci(struct amd_iommu *iommu)
1845 {
1846 	int cap_ptr = iommu->cap_ptr;
1847 	int ret;
1848 
1849 	iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
1850 						 iommu->devid & 0xff);
1851 	if (!iommu->dev)
1852 		return -ENODEV;
1853 
1854 	/* Prevent binding other PCI device drivers to IOMMU devices */
1855 	iommu->dev->match_driver = false;
1856 
1857 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
1858 			      &iommu->cap);
1859 
1860 	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
1861 		amd_iommu_iotlb_sup = false;
1862 
1863 	late_iommu_features_init(iommu);
1864 
1865 	if (iommu_feature(iommu, FEATURE_GT)) {
1866 		int glxval;
1867 		u32 max_pasid;
1868 		u64 pasmax;
1869 
1870 		pasmax = iommu->features & FEATURE_PASID_MASK;
1871 		pasmax >>= FEATURE_PASID_SHIFT;
1872 		max_pasid  = (1 << (pasmax + 1)) - 1;
1873 
1874 		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
1875 
1876 		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
1877 
1878 		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
1879 		glxval >>= FEATURE_GLXVAL_SHIFT;
1880 
1881 		if (amd_iommu_max_glx_val == -1)
1882 			amd_iommu_max_glx_val = glxval;
1883 		else
1884 			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
1885 	}
1886 
1887 	if (iommu_feature(iommu, FEATURE_GT) &&
1888 	    iommu_feature(iommu, FEATURE_PPR)) {
1889 		iommu->is_iommu_v2   = true;
1890 		amd_iommu_v2_present = true;
1891 	}
1892 
1893 	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
1894 		return -ENOMEM;
1895 
1896 	ret = iommu_init_ga(iommu);
1897 	if (ret)
1898 		return ret;
1899 
1900 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1901 		amd_iommu_np_cache = true;
1902 
1903 	init_iommu_perf_ctr(iommu);
1904 
1905 	if (is_rd890_iommu(iommu->dev)) {
1906 		int i, j;
1907 
1908 		iommu->root_pdev =
1909 			pci_get_domain_bus_and_slot(0, iommu->dev->bus->number,
1910 						    PCI_DEVFN(0, 0));
1911 
1912 		/*
1913 		 * Some rd890 systems may not be fully reconfigured by the
1914 		 * BIOS, so it's necessary for us to store this information so
1915 		 * it can be reprogrammed on resume
1916 		 */
1917 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
1918 				&iommu->stored_addr_lo);
1919 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
1920 				&iommu->stored_addr_hi);
1921 
1922 		/* Low bit locks writes to configuration space */
1923 		iommu->stored_addr_lo &= ~1;
1924 
1925 		for (i = 0; i < 6; i++)
1926 			for (j = 0; j < 0x12; j++)
1927 				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
1928 
1929 		for (i = 0; i < 0x83; i++)
1930 			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
1931 	}
1932 
1933 	amd_iommu_erratum_746_workaround(iommu);
1934 	amd_iommu_ats_write_check_workaround(iommu);
1935 
1936 	iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
1937 			       amd_iommu_groups, "ivhd%d", iommu->index);
1938 	iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops);
1939 	iommu_device_register(&iommu->iommu);
1940 
1941 	return pci_enable_device(iommu->dev);
1942 }
1943 
1944 static void print_iommu_info(void)
1945 {
1946 	static const char * const feat_str[] = {
1947 		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
1948 		"IA", "GA", "HE", "PC"
1949 	};
1950 	struct amd_iommu *iommu;
1951 
1952 	for_each_iommu(iommu) {
1953 		struct pci_dev *pdev = iommu->dev;
1954 		int i;
1955 
1956 		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
1957 
1958 		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1959 			pci_info(pdev, "Extended features (%#llx):",
1960 				 iommu->features);
1961 			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1962 				if (iommu_feature(iommu, (1ULL << i)))
1963 					pr_cont(" %s", feat_str[i]);
1964 			}
1965 
1966 			if (iommu->features & FEATURE_GAM_VAPIC)
1967 				pr_cont(" GA_vAPIC");
1968 
1969 			pr_cont("\n");
1970 		}
1971 	}
1972 	if (irq_remapping_enabled) {
1973 		pr_info("Interrupt remapping enabled\n");
1974 		if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
1975 			pr_info("Virtual APIC enabled\n");
1976 		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1977 			pr_info("X2APIC enabled\n");
1978 	}
1979 }
1980 
1981 static int __init amd_iommu_init_pci(void)
1982 {
1983 	struct amd_iommu *iommu;
1984 	int ret;
1985 
1986 	for_each_iommu(iommu) {
1987 		ret = iommu_init_pci(iommu);
1988 		if (ret)
1989 			break;
1990 
1991 		/* Need to setup range after PCI init */
1992 		iommu_set_cwwb_range(iommu);
1993 	}
1994 
1995 	/*
1996 	 * Order is important here to make sure any unity map requirements are
1997 	 * fulfilled. The unity mappings are created and written to the device
1998 	 * table during the amd_iommu_init_api() call.
1999 	 *
2000 	 * After that we call init_device_table_dma() to make sure any
2001 	 * uninitialized DTE will block DMA, and in the end we flush the caches
2002 	 * of all IOMMUs to make sure the changes to the device table are
2003 	 * active.
2004 	 */
2005 	ret = amd_iommu_init_api();
2006 
2007 	init_device_table_dma();
2008 
2009 	for_each_iommu(iommu)
2010 		iommu_flush_all_caches(iommu);
2011 
2012 	if (!ret)
2013 		print_iommu_info();
2014 
2015 	return ret;
2016 }
2017 
2018 /****************************************************************************
2019  *
2020  * The following functions initialize the MSI interrupts for all IOMMUs
2021  * in the system. It's a bit challenging because there could be multiple
2022  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2023  * pci_dev.
2024  *
2025  ****************************************************************************/
2026 
2027 static int iommu_setup_msi(struct amd_iommu *iommu)
2028 {
2029 	int r;
2030 
2031 	r = pci_enable_msi(iommu->dev);
2032 	if (r)
2033 		return r;
2034 
2035 	r = request_threaded_irq(iommu->dev->irq,
2036 				 amd_iommu_int_handler,
2037 				 amd_iommu_int_thread,
2038 				 0, "AMD-Vi",
2039 				 iommu);
2040 
2041 	if (r) {
2042 		pci_disable_msi(iommu->dev);
2043 		return r;
2044 	}
2045 
2046 	return 0;
2047 }
2048 
2049 union intcapxt {
2050 	u64	capxt;
2051 	struct {
2052 		u64	reserved_0		:  2,
2053 			dest_mode_logical	:  1,
2054 			reserved_1		:  5,
2055 			destid_0_23		: 24,
2056 			vector			:  8,
2057 			reserved_2		: 16,
2058 			destid_24_31		:  8;
2059 	};
2060 } __attribute__ ((packed));
2061 
2062 /*
2063  * There isn't really any need to mask/unmask at the irqchip level because
2064  * the 64-bit INTCAPXT registers can be updated atomically without tearing
2065  * when the affinity is being updated.
2066  */
2067 static void intcapxt_unmask_irq(struct irq_data *data)
2068 {
2069 }
2070 
2071 static void intcapxt_mask_irq(struct irq_data *data)
2072 {
2073 }
2074 
2075 static struct irq_chip intcapxt_controller;
2076 
2077 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2078 				       struct irq_data *irqd, bool reserve)
2079 {
2080 	struct amd_iommu *iommu = irqd->chip_data;
2081 	struct irq_cfg *cfg = irqd_cfg(irqd);
2082 	union intcapxt xt;
2083 
2084 	xt.capxt = 0ULL;
2085 	xt.dest_mode_logical = apic->dest_mode_logical;
2086 	xt.vector = cfg->vector;
2087 	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2088 	xt.destid_24_31 = cfg->dest_apicid >> 24;
2089 
2090 	/**
2091 	 * Current IOMMU implemtation uses the same IRQ for all
2092 	 * 3 IOMMU interrupts.
2093 	 */
2094 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2095 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2096 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2097 	return 0;
2098 }
2099 
2100 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2101 					  struct irq_data *irqd)
2102 {
2103 	intcapxt_mask_irq(irqd);
2104 }
2105 
2106 
2107 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2108 				    unsigned int nr_irqs, void *arg)
2109 {
2110 	struct irq_alloc_info *info = arg;
2111 	int i, ret;
2112 
2113 	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2114 		return -EINVAL;
2115 
2116 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2117 	if (ret < 0)
2118 		return ret;
2119 
2120 	for (i = virq; i < virq + nr_irqs; i++) {
2121 		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2122 
2123 		irqd->chip = &intcapxt_controller;
2124 		irqd->chip_data = info->data;
2125 		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2126 	}
2127 
2128 	return ret;
2129 }
2130 
2131 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2132 				    unsigned int nr_irqs)
2133 {
2134 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2135 }
2136 
2137 static int intcapxt_set_affinity(struct irq_data *irqd,
2138 				 const struct cpumask *mask, bool force)
2139 {
2140 	struct irq_data *parent = irqd->parent_data;
2141 	int ret;
2142 
2143 	ret = parent->chip->irq_set_affinity(parent, mask, force);
2144 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2145 		return ret;
2146 
2147 	return intcapxt_irqdomain_activate(irqd->domain, irqd, false);
2148 }
2149 
2150 static struct irq_chip intcapxt_controller = {
2151 	.name			= "IOMMU-MSI",
2152 	.irq_unmask		= intcapxt_unmask_irq,
2153 	.irq_mask		= intcapxt_mask_irq,
2154 	.irq_ack		= irq_chip_ack_parent,
2155 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2156 	.irq_set_affinity       = intcapxt_set_affinity,
2157 	.flags			= IRQCHIP_SKIP_SET_WAKE,
2158 };
2159 
2160 static const struct irq_domain_ops intcapxt_domain_ops = {
2161 	.alloc			= intcapxt_irqdomain_alloc,
2162 	.free			= intcapxt_irqdomain_free,
2163 	.activate		= intcapxt_irqdomain_activate,
2164 	.deactivate		= intcapxt_irqdomain_deactivate,
2165 };
2166 
2167 
2168 static struct irq_domain *iommu_irqdomain;
2169 
2170 static struct irq_domain *iommu_get_irqdomain(void)
2171 {
2172 	struct fwnode_handle *fn;
2173 
2174 	/* No need for locking here (yet) as the init is single-threaded */
2175 	if (iommu_irqdomain)
2176 		return iommu_irqdomain;
2177 
2178 	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2179 	if (!fn)
2180 		return NULL;
2181 
2182 	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2183 						      fn, &intcapxt_domain_ops,
2184 						      NULL);
2185 	if (!iommu_irqdomain)
2186 		irq_domain_free_fwnode(fn);
2187 
2188 	return iommu_irqdomain;
2189 }
2190 
2191 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2192 {
2193 	struct irq_domain *domain;
2194 	struct irq_alloc_info info;
2195 	int irq, ret;
2196 
2197 	domain = iommu_get_irqdomain();
2198 	if (!domain)
2199 		return -ENXIO;
2200 
2201 	init_irq_alloc_info(&info, NULL);
2202 	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2203 	info.data = iommu;
2204 
2205 	irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
2206 	if (irq < 0) {
2207 		irq_domain_remove(domain);
2208 		return irq;
2209 	}
2210 
2211 	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2212 				   amd_iommu_int_thread, 0, "AMD-Vi", iommu);
2213 	if (ret) {
2214 		irq_domain_free_irqs(irq, 1);
2215 		irq_domain_remove(domain);
2216 		return ret;
2217 	}
2218 
2219 	iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2220 	return 0;
2221 }
2222 
2223 static int iommu_init_irq(struct amd_iommu *iommu)
2224 {
2225 	int ret;
2226 
2227 	if (iommu->int_enabled)
2228 		goto enable_faults;
2229 
2230 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2231 		ret = iommu_setup_intcapxt(iommu);
2232 	else if (iommu->dev->msi_cap)
2233 		ret = iommu_setup_msi(iommu);
2234 	else
2235 		ret = -ENODEV;
2236 
2237 	if (ret)
2238 		return ret;
2239 
2240 	iommu->int_enabled = true;
2241 enable_faults:
2242 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2243 
2244 	if (iommu->ppr_log != NULL)
2245 		iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2246 
2247 	iommu_ga_log_enable(iommu);
2248 
2249 	return 0;
2250 }
2251 
2252 /****************************************************************************
2253  *
2254  * The next functions belong to the third pass of parsing the ACPI
2255  * table. In this last pass the memory mapping requirements are
2256  * gathered (like exclusion and unity mapping ranges).
2257  *
2258  ****************************************************************************/
2259 
2260 static void __init free_unity_maps(void)
2261 {
2262 	struct unity_map_entry *entry, *next;
2263 
2264 	list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
2265 		list_del(&entry->list);
2266 		kfree(entry);
2267 	}
2268 }
2269 
2270 /* called for unity map ACPI definition */
2271 static int __init init_unity_map_range(struct ivmd_header *m)
2272 {
2273 	struct unity_map_entry *e = NULL;
2274 	char *s;
2275 
2276 	e = kzalloc(sizeof(*e), GFP_KERNEL);
2277 	if (e == NULL)
2278 		return -ENOMEM;
2279 
2280 	switch (m->type) {
2281 	default:
2282 		kfree(e);
2283 		return 0;
2284 	case ACPI_IVMD_TYPE:
2285 		s = "IVMD_TYPEi\t\t\t";
2286 		e->devid_start = e->devid_end = m->devid;
2287 		break;
2288 	case ACPI_IVMD_TYPE_ALL:
2289 		s = "IVMD_TYPE_ALL\t\t";
2290 		e->devid_start = 0;
2291 		e->devid_end = amd_iommu_last_bdf;
2292 		break;
2293 	case ACPI_IVMD_TYPE_RANGE:
2294 		s = "IVMD_TYPE_RANGE\t\t";
2295 		e->devid_start = m->devid;
2296 		e->devid_end = m->aux;
2297 		break;
2298 	}
2299 	e->address_start = PAGE_ALIGN(m->range_start);
2300 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2301 	e->prot = m->flags >> 1;
2302 
2303 	/*
2304 	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2305 	 * since some buggy BIOSes might lead to the overwritten exclusion
2306 	 * range (exclusion_start and exclusion_length members). This
2307 	 * happens when there are multiple exclusion ranges (IVMD entries)
2308 	 * defined in ACPI table.
2309 	 */
2310 	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2311 		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2312 
2313 	DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
2314 		    " range_start: %016llx range_end: %016llx flags: %x\n", s,
2315 		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2316 		    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
2317 		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2318 		    e->address_start, e->address_end, m->flags);
2319 
2320 	list_add_tail(&e->list, &amd_iommu_unity_map);
2321 
2322 	return 0;
2323 }
2324 
2325 /* iterates over all memory definitions we find in the ACPI table */
2326 static int __init init_memory_definitions(struct acpi_table_header *table)
2327 {
2328 	u8 *p = (u8 *)table, *end = (u8 *)table;
2329 	struct ivmd_header *m;
2330 
2331 	end += table->length;
2332 	p += IVRS_HEADER_LENGTH;
2333 
2334 	while (p < end) {
2335 		m = (struct ivmd_header *)p;
2336 		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2337 			init_unity_map_range(m);
2338 
2339 		p += m->length;
2340 	}
2341 
2342 	return 0;
2343 }
2344 
2345 /*
2346  * Init the device table to not allow DMA access for devices
2347  */
2348 static void init_device_table_dma(void)
2349 {
2350 	u32 devid;
2351 
2352 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2353 		set_dev_entry_bit(devid, DEV_ENTRY_VALID);
2354 		set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
2355 	}
2356 }
2357 
2358 static void __init uninit_device_table_dma(void)
2359 {
2360 	u32 devid;
2361 
2362 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2363 		amd_iommu_dev_table[devid].data[0] = 0ULL;
2364 		amd_iommu_dev_table[devid].data[1] = 0ULL;
2365 	}
2366 }
2367 
2368 static void init_device_table(void)
2369 {
2370 	u32 devid;
2371 
2372 	if (!amd_iommu_irq_remap)
2373 		return;
2374 
2375 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2376 		set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
2377 }
2378 
2379 static void iommu_init_flags(struct amd_iommu *iommu)
2380 {
2381 	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2382 		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2383 		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2384 
2385 	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2386 		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2387 		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2388 
2389 	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2390 		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2391 		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2392 
2393 	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2394 		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2395 		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2396 
2397 	/*
2398 	 * make IOMMU memory accesses cache coherent
2399 	 */
2400 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2401 
2402 	/* Set IOTLB invalidation timeout to 1s */
2403 	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2404 }
2405 
2406 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2407 {
2408 	int i, j;
2409 	u32 ioc_feature_control;
2410 	struct pci_dev *pdev = iommu->root_pdev;
2411 
2412 	/* RD890 BIOSes may not have completely reconfigured the iommu */
2413 	if (!is_rd890_iommu(iommu->dev) || !pdev)
2414 		return;
2415 
2416 	/*
2417 	 * First, we need to ensure that the iommu is enabled. This is
2418 	 * controlled by a register in the northbridge
2419 	 */
2420 
2421 	/* Select Northbridge indirect register 0x75 and enable writing */
2422 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2423 	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2424 
2425 	/* Enable the iommu */
2426 	if (!(ioc_feature_control & 0x1))
2427 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2428 
2429 	/* Restore the iommu BAR */
2430 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2431 			       iommu->stored_addr_lo);
2432 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2433 			       iommu->stored_addr_hi);
2434 
2435 	/* Restore the l1 indirect regs for each of the 6 l1s */
2436 	for (i = 0; i < 6; i++)
2437 		for (j = 0; j < 0x12; j++)
2438 			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2439 
2440 	/* Restore the l2 indirect regs */
2441 	for (i = 0; i < 0x83; i++)
2442 		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2443 
2444 	/* Lock PCI setup registers */
2445 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2446 			       iommu->stored_addr_lo | 1);
2447 }
2448 
2449 static void iommu_enable_ga(struct amd_iommu *iommu)
2450 {
2451 #ifdef CONFIG_IRQ_REMAP
2452 	switch (amd_iommu_guest_ir) {
2453 	case AMD_IOMMU_GUEST_IR_VAPIC:
2454 		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2455 		fallthrough;
2456 	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2457 		iommu_feature_enable(iommu, CONTROL_GA_EN);
2458 		iommu->irte_ops = &irte_128_ops;
2459 		break;
2460 	default:
2461 		iommu->irte_ops = &irte_32_ops;
2462 		break;
2463 	}
2464 #endif
2465 }
2466 
2467 static void early_enable_iommu(struct amd_iommu *iommu)
2468 {
2469 	iommu_disable(iommu);
2470 	iommu_init_flags(iommu);
2471 	iommu_set_device_table(iommu);
2472 	iommu_enable_command_buffer(iommu);
2473 	iommu_enable_event_buffer(iommu);
2474 	iommu_set_exclusion_range(iommu);
2475 	iommu_enable_ga(iommu);
2476 	iommu_enable_xt(iommu);
2477 	iommu_enable(iommu);
2478 	iommu_flush_all_caches(iommu);
2479 }
2480 
2481 /*
2482  * This function finally enables all IOMMUs found in the system after
2483  * they have been initialized.
2484  *
2485  * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2486  * the old content of device table entries. Not this case or copy failed,
2487  * just continue as normal kernel does.
2488  */
2489 static void early_enable_iommus(void)
2490 {
2491 	struct amd_iommu *iommu;
2492 
2493 
2494 	if (!copy_device_table()) {
2495 		/*
2496 		 * If come here because of failure in copying device table from old
2497 		 * kernel with all IOMMUs enabled, print error message and try to
2498 		 * free allocated old_dev_tbl_cpy.
2499 		 */
2500 		if (amd_iommu_pre_enabled)
2501 			pr_err("Failed to copy DEV table from previous kernel.\n");
2502 		if (old_dev_tbl_cpy != NULL)
2503 			free_pages((unsigned long)old_dev_tbl_cpy,
2504 					get_order(dev_table_size));
2505 
2506 		for_each_iommu(iommu) {
2507 			clear_translation_pre_enabled(iommu);
2508 			early_enable_iommu(iommu);
2509 		}
2510 	} else {
2511 		pr_info("Copied DEV table from previous kernel.\n");
2512 		free_pages((unsigned long)amd_iommu_dev_table,
2513 				get_order(dev_table_size));
2514 		amd_iommu_dev_table = old_dev_tbl_cpy;
2515 		for_each_iommu(iommu) {
2516 			iommu_disable_command_buffer(iommu);
2517 			iommu_disable_event_buffer(iommu);
2518 			iommu_enable_command_buffer(iommu);
2519 			iommu_enable_event_buffer(iommu);
2520 			iommu_enable_ga(iommu);
2521 			iommu_enable_xt(iommu);
2522 			iommu_set_device_table(iommu);
2523 			iommu_flush_all_caches(iommu);
2524 		}
2525 	}
2526 
2527 #ifdef CONFIG_IRQ_REMAP
2528 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2529 		amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2530 #endif
2531 }
2532 
2533 static void enable_iommus_v2(void)
2534 {
2535 	struct amd_iommu *iommu;
2536 
2537 	for_each_iommu(iommu) {
2538 		iommu_enable_ppr_log(iommu);
2539 		iommu_enable_gt(iommu);
2540 	}
2541 }
2542 
2543 static void enable_iommus(void)
2544 {
2545 	early_enable_iommus();
2546 
2547 	enable_iommus_v2();
2548 }
2549 
2550 static void disable_iommus(void)
2551 {
2552 	struct amd_iommu *iommu;
2553 
2554 	for_each_iommu(iommu)
2555 		iommu_disable(iommu);
2556 
2557 #ifdef CONFIG_IRQ_REMAP
2558 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2559 		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2560 #endif
2561 }
2562 
2563 /*
2564  * Suspend/Resume support
2565  * disable suspend until real resume implemented
2566  */
2567 
2568 static void amd_iommu_resume(void)
2569 {
2570 	struct amd_iommu *iommu;
2571 
2572 	for_each_iommu(iommu)
2573 		iommu_apply_resume_quirks(iommu);
2574 
2575 	/* re-load the hardware */
2576 	enable_iommus();
2577 
2578 	amd_iommu_enable_interrupts();
2579 }
2580 
2581 static int amd_iommu_suspend(void)
2582 {
2583 	/* disable IOMMUs to go out of the way for BIOS */
2584 	disable_iommus();
2585 
2586 	return 0;
2587 }
2588 
2589 static struct syscore_ops amd_iommu_syscore_ops = {
2590 	.suspend = amd_iommu_suspend,
2591 	.resume = amd_iommu_resume,
2592 };
2593 
2594 static void __init free_iommu_resources(void)
2595 {
2596 	kmemleak_free(irq_lookup_table);
2597 	free_pages((unsigned long)irq_lookup_table,
2598 		   get_order(rlookup_table_size));
2599 	irq_lookup_table = NULL;
2600 
2601 	kmem_cache_destroy(amd_iommu_irq_cache);
2602 	amd_iommu_irq_cache = NULL;
2603 
2604 	free_pages((unsigned long)amd_iommu_rlookup_table,
2605 		   get_order(rlookup_table_size));
2606 	amd_iommu_rlookup_table = NULL;
2607 
2608 	free_pages((unsigned long)amd_iommu_alias_table,
2609 		   get_order(alias_table_size));
2610 	amd_iommu_alias_table = NULL;
2611 
2612 	free_pages((unsigned long)amd_iommu_dev_table,
2613 		   get_order(dev_table_size));
2614 	amd_iommu_dev_table = NULL;
2615 
2616 	free_iommu_all();
2617 }
2618 
2619 /* SB IOAPIC is always on this device in AMD systems */
2620 #define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2621 
2622 static bool __init check_ioapic_information(void)
2623 {
2624 	const char *fw_bug = FW_BUG;
2625 	bool ret, has_sb_ioapic;
2626 	int idx;
2627 
2628 	has_sb_ioapic = false;
2629 	ret           = false;
2630 
2631 	/*
2632 	 * If we have map overrides on the kernel command line the
2633 	 * messages in this function might not describe firmware bugs
2634 	 * anymore - so be careful
2635 	 */
2636 	if (cmdline_maps)
2637 		fw_bug = "";
2638 
2639 	for (idx = 0; idx < nr_ioapics; idx++) {
2640 		int devid, id = mpc_ioapic_id(idx);
2641 
2642 		devid = get_ioapic_devid(id);
2643 		if (devid < 0) {
2644 			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2645 				fw_bug, id);
2646 			ret = false;
2647 		} else if (devid == IOAPIC_SB_DEVID) {
2648 			has_sb_ioapic = true;
2649 			ret           = true;
2650 		}
2651 	}
2652 
2653 	if (!has_sb_ioapic) {
2654 		/*
2655 		 * We expect the SB IOAPIC to be listed in the IVRS
2656 		 * table. The system timer is connected to the SB IOAPIC
2657 		 * and if we don't have it in the list the system will
2658 		 * panic at boot time.  This situation usually happens
2659 		 * when the BIOS is buggy and provides us the wrong
2660 		 * device id for the IOAPIC in the system.
2661 		 */
2662 		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2663 	}
2664 
2665 	if (!ret)
2666 		pr_err("Disabling interrupt remapping\n");
2667 
2668 	return ret;
2669 }
2670 
2671 static void __init free_dma_resources(void)
2672 {
2673 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2674 		   get_order(MAX_DOMAIN_ID/8));
2675 	amd_iommu_pd_alloc_bitmap = NULL;
2676 
2677 	free_unity_maps();
2678 }
2679 
2680 static void __init ivinfo_init(void *ivrs)
2681 {
2682 	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
2683 }
2684 
2685 /*
2686  * This is the hardware init function for AMD IOMMU in the system.
2687  * This function is called either from amd_iommu_init or from the interrupt
2688  * remapping setup code.
2689  *
2690  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2691  * four times:
2692  *
2693  *	1 pass) Discover the most comprehensive IVHD type to use.
2694  *
2695  *	2 pass) Find the highest PCI device id the driver has to handle.
2696  *		Upon this information the size of the data structures is
2697  *		determined that needs to be allocated.
2698  *
2699  *	3 pass) Initialize the data structures just allocated with the
2700  *		information in the ACPI table about available AMD IOMMUs
2701  *		in the system. It also maps the PCI devices in the
2702  *		system to specific IOMMUs
2703  *
2704  *	4 pass) After the basic data structures are allocated and
2705  *		initialized we update them with information about memory
2706  *		remapping requirements parsed out of the ACPI table in
2707  *		this last pass.
2708  *
2709  * After everything is set up the IOMMUs are enabled and the necessary
2710  * hotplug and suspend notifiers are registered.
2711  */
2712 static int __init early_amd_iommu_init(void)
2713 {
2714 	struct acpi_table_header *ivrs_base;
2715 	int i, remap_cache_sz, ret;
2716 	acpi_status status;
2717 	u32 pci_id;
2718 
2719 	if (!amd_iommu_detected)
2720 		return -ENODEV;
2721 
2722 	status = acpi_get_table("IVRS", 0, &ivrs_base);
2723 	if (status == AE_NOT_FOUND)
2724 		return -ENODEV;
2725 	else if (ACPI_FAILURE(status)) {
2726 		const char *err = acpi_format_exception(status);
2727 		pr_err("IVRS table error: %s\n", err);
2728 		return -EINVAL;
2729 	}
2730 
2731 	/*
2732 	 * Validate checksum here so we don't need to do it when
2733 	 * we actually parse the table
2734 	 */
2735 	ret = check_ivrs_checksum(ivrs_base);
2736 	if (ret)
2737 		goto out;
2738 
2739 	ivinfo_init(ivrs_base);
2740 
2741 	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
2742 	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
2743 
2744 	/*
2745 	 * First parse ACPI tables to find the largest Bus/Dev/Func
2746 	 * we need to handle. Upon this information the shared data
2747 	 * structures for the IOMMUs in the system will be allocated
2748 	 */
2749 	ret = find_last_devid_acpi(ivrs_base);
2750 	if (ret)
2751 		goto out;
2752 
2753 	dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
2754 	alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
2755 	rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
2756 
2757 	/* Device table - directly used by all IOMMUs */
2758 	ret = -ENOMEM;
2759 	amd_iommu_dev_table = (void *)__get_free_pages(
2760 				      GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
2761 				      get_order(dev_table_size));
2762 	if (amd_iommu_dev_table == NULL)
2763 		goto out;
2764 
2765 	/*
2766 	 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
2767 	 * IOMMU see for that device
2768 	 */
2769 	amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
2770 			get_order(alias_table_size));
2771 	if (amd_iommu_alias_table == NULL)
2772 		goto out;
2773 
2774 	/* IOMMU rlookup table - find the IOMMU for a specific device */
2775 	amd_iommu_rlookup_table = (void *)__get_free_pages(
2776 			GFP_KERNEL | __GFP_ZERO,
2777 			get_order(rlookup_table_size));
2778 	if (amd_iommu_rlookup_table == NULL)
2779 		goto out;
2780 
2781 	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
2782 					    GFP_KERNEL | __GFP_ZERO,
2783 					    get_order(MAX_DOMAIN_ID/8));
2784 	if (amd_iommu_pd_alloc_bitmap == NULL)
2785 		goto out;
2786 
2787 	/*
2788 	 * let all alias entries point to itself
2789 	 */
2790 	for (i = 0; i <= amd_iommu_last_bdf; ++i)
2791 		amd_iommu_alias_table[i] = i;
2792 
2793 	/*
2794 	 * never allocate domain 0 because its used as the non-allocated and
2795 	 * error value placeholder
2796 	 */
2797 	__set_bit(0, amd_iommu_pd_alloc_bitmap);
2798 
2799 	/*
2800 	 * now the data structures are allocated and basically initialized
2801 	 * start the real acpi table scan
2802 	 */
2803 	ret = init_iommu_all(ivrs_base);
2804 	if (ret)
2805 		goto out;
2806 
2807 	/* Disable IOMMU if there's Stoney Ridge graphics */
2808 	for (i = 0; i < 32; i++) {
2809 		pci_id = read_pci_config(0, i, 0, 0);
2810 		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
2811 			pr_info("Disable IOMMU on Stoney Ridge\n");
2812 			amd_iommu_disabled = true;
2813 			break;
2814 		}
2815 	}
2816 
2817 	/* Disable any previously enabled IOMMUs */
2818 	if (!is_kdump_kernel() || amd_iommu_disabled)
2819 		disable_iommus();
2820 
2821 	if (amd_iommu_irq_remap)
2822 		amd_iommu_irq_remap = check_ioapic_information();
2823 
2824 	if (amd_iommu_irq_remap) {
2825 		/*
2826 		 * Interrupt remapping enabled, create kmem_cache for the
2827 		 * remapping tables.
2828 		 */
2829 		ret = -ENOMEM;
2830 		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
2831 			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
2832 		else
2833 			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
2834 		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
2835 							remap_cache_sz,
2836 							DTE_INTTAB_ALIGNMENT,
2837 							0, NULL);
2838 		if (!amd_iommu_irq_cache)
2839 			goto out;
2840 
2841 		irq_lookup_table = (void *)__get_free_pages(
2842 				GFP_KERNEL | __GFP_ZERO,
2843 				get_order(rlookup_table_size));
2844 		kmemleak_alloc(irq_lookup_table, rlookup_table_size,
2845 			       1, GFP_KERNEL);
2846 		if (!irq_lookup_table)
2847 			goto out;
2848 	}
2849 
2850 	ret = init_memory_definitions(ivrs_base);
2851 	if (ret)
2852 		goto out;
2853 
2854 	/* init the device table */
2855 	init_device_table();
2856 
2857 out:
2858 	/* Don't leak any ACPI memory */
2859 	acpi_put_table(ivrs_base);
2860 
2861 	return ret;
2862 }
2863 
2864 static int amd_iommu_enable_interrupts(void)
2865 {
2866 	struct amd_iommu *iommu;
2867 	int ret = 0;
2868 
2869 	for_each_iommu(iommu) {
2870 		ret = iommu_init_irq(iommu);
2871 		if (ret)
2872 			goto out;
2873 	}
2874 
2875 out:
2876 	return ret;
2877 }
2878 
2879 static bool detect_ivrs(void)
2880 {
2881 	struct acpi_table_header *ivrs_base;
2882 	acpi_status status;
2883 
2884 	status = acpi_get_table("IVRS", 0, &ivrs_base);
2885 	if (status == AE_NOT_FOUND)
2886 		return false;
2887 	else if (ACPI_FAILURE(status)) {
2888 		const char *err = acpi_format_exception(status);
2889 		pr_err("IVRS table error: %s\n", err);
2890 		return false;
2891 	}
2892 
2893 	acpi_put_table(ivrs_base);
2894 
2895 	/* Make sure ACS will be enabled during PCI probe */
2896 	pci_request_acs();
2897 
2898 	return true;
2899 }
2900 
2901 /****************************************************************************
2902  *
2903  * AMD IOMMU Initialization State Machine
2904  *
2905  ****************************************************************************/
2906 
2907 static int __init state_next(void)
2908 {
2909 	int ret = 0;
2910 
2911 	switch (init_state) {
2912 	case IOMMU_START_STATE:
2913 		if (!detect_ivrs()) {
2914 			init_state	= IOMMU_NOT_FOUND;
2915 			ret		= -ENODEV;
2916 		} else {
2917 			init_state	= IOMMU_IVRS_DETECTED;
2918 		}
2919 		break;
2920 	case IOMMU_IVRS_DETECTED:
2921 		ret = early_amd_iommu_init();
2922 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2923 		if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
2924 			pr_info("AMD IOMMU disabled\n");
2925 			init_state = IOMMU_CMDLINE_DISABLED;
2926 			ret = -EINVAL;
2927 		}
2928 		break;
2929 	case IOMMU_ACPI_FINISHED:
2930 		early_enable_iommus();
2931 		x86_platform.iommu_shutdown = disable_iommus;
2932 		init_state = IOMMU_ENABLED;
2933 		break;
2934 	case IOMMU_ENABLED:
2935 		register_syscore_ops(&amd_iommu_syscore_ops);
2936 		ret = amd_iommu_init_pci();
2937 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
2938 		enable_iommus_v2();
2939 		break;
2940 	case IOMMU_PCI_INIT:
2941 		ret = amd_iommu_enable_interrupts();
2942 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
2943 		break;
2944 	case IOMMU_INTERRUPTS_EN:
2945 		ret = amd_iommu_init_dma_ops();
2946 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
2947 		break;
2948 	case IOMMU_DMA_OPS:
2949 		init_state = IOMMU_INITIALIZED;
2950 		break;
2951 	case IOMMU_INITIALIZED:
2952 		/* Nothing to do */
2953 		break;
2954 	case IOMMU_NOT_FOUND:
2955 	case IOMMU_INIT_ERROR:
2956 	case IOMMU_CMDLINE_DISABLED:
2957 		/* Error states => do nothing */
2958 		ret = -EINVAL;
2959 		break;
2960 	default:
2961 		/* Unknown state */
2962 		BUG();
2963 	}
2964 
2965 	if (ret) {
2966 		free_dma_resources();
2967 		if (!irq_remapping_enabled) {
2968 			disable_iommus();
2969 			free_iommu_resources();
2970 		} else {
2971 			struct amd_iommu *iommu;
2972 
2973 			uninit_device_table_dma();
2974 			for_each_iommu(iommu)
2975 				iommu_flush_all_caches(iommu);
2976 		}
2977 	}
2978 	return ret;
2979 }
2980 
2981 static int __init iommu_go_to_state(enum iommu_init_state state)
2982 {
2983 	int ret = -EINVAL;
2984 
2985 	while (init_state != state) {
2986 		if (init_state == IOMMU_NOT_FOUND         ||
2987 		    init_state == IOMMU_INIT_ERROR        ||
2988 		    init_state == IOMMU_CMDLINE_DISABLED)
2989 			break;
2990 		ret = state_next();
2991 	}
2992 
2993 	return ret;
2994 }
2995 
2996 #ifdef CONFIG_IRQ_REMAP
2997 int __init amd_iommu_prepare(void)
2998 {
2999 	int ret;
3000 
3001 	amd_iommu_irq_remap = true;
3002 
3003 	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3004 	if (ret)
3005 		return ret;
3006 	return amd_iommu_irq_remap ? 0 : -ENODEV;
3007 }
3008 
3009 int __init amd_iommu_enable(void)
3010 {
3011 	int ret;
3012 
3013 	ret = iommu_go_to_state(IOMMU_ENABLED);
3014 	if (ret)
3015 		return ret;
3016 
3017 	irq_remapping_enabled = 1;
3018 	return amd_iommu_xt_mode;
3019 }
3020 
3021 void amd_iommu_disable(void)
3022 {
3023 	amd_iommu_suspend();
3024 }
3025 
3026 int amd_iommu_reenable(int mode)
3027 {
3028 	amd_iommu_resume();
3029 
3030 	return 0;
3031 }
3032 
3033 int __init amd_iommu_enable_faulting(void)
3034 {
3035 	/* We enable MSI later when PCI is initialized */
3036 	return 0;
3037 }
3038 #endif
3039 
3040 /*
3041  * This is the core init function for AMD IOMMU hardware in the system.
3042  * This function is called from the generic x86 DMA layer initialization
3043  * code.
3044  */
3045 static int __init amd_iommu_init(void)
3046 {
3047 	struct amd_iommu *iommu;
3048 	int ret;
3049 
3050 	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3051 #ifdef CONFIG_GART_IOMMU
3052 	if (ret && list_empty(&amd_iommu_list)) {
3053 		/*
3054 		 * We failed to initialize the AMD IOMMU - try fallback
3055 		 * to GART if possible.
3056 		 */
3057 		gart_iommu_init();
3058 	}
3059 #endif
3060 
3061 	for_each_iommu(iommu)
3062 		amd_iommu_debugfs_setup(iommu);
3063 
3064 	return ret;
3065 }
3066 
3067 static bool amd_iommu_sme_check(void)
3068 {
3069 	if (!sme_active() || (boot_cpu_data.x86 != 0x17))
3070 		return true;
3071 
3072 	/* For Fam17h, a specific level of support is required */
3073 	if (boot_cpu_data.microcode >= 0x08001205)
3074 		return true;
3075 
3076 	if ((boot_cpu_data.microcode >= 0x08001126) &&
3077 	    (boot_cpu_data.microcode <= 0x080011ff))
3078 		return true;
3079 
3080 	pr_notice("IOMMU not currently supported when SME is active\n");
3081 
3082 	return false;
3083 }
3084 
3085 /****************************************************************************
3086  *
3087  * Early detect code. This code runs at IOMMU detection time in the DMA
3088  * layer. It just looks if there is an IVRS ACPI table to detect AMD
3089  * IOMMUs
3090  *
3091  ****************************************************************************/
3092 int __init amd_iommu_detect(void)
3093 {
3094 	int ret;
3095 
3096 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3097 		return -ENODEV;
3098 
3099 	if (!amd_iommu_sme_check())
3100 		return -ENODEV;
3101 
3102 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3103 	if (ret)
3104 		return ret;
3105 
3106 	amd_iommu_detected = true;
3107 	iommu_detected = 1;
3108 	x86_init.iommu.iommu_init = amd_iommu_init;
3109 
3110 	return 1;
3111 }
3112 
3113 /****************************************************************************
3114  *
3115  * Parsing functions for the AMD IOMMU specific kernel command line
3116  * options.
3117  *
3118  ****************************************************************************/
3119 
3120 static int __init parse_amd_iommu_dump(char *str)
3121 {
3122 	amd_iommu_dump = true;
3123 
3124 	return 1;
3125 }
3126 
3127 static int __init parse_amd_iommu_intr(char *str)
3128 {
3129 	for (; *str; ++str) {
3130 		if (strncmp(str, "legacy", 6) == 0) {
3131 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3132 			break;
3133 		}
3134 		if (strncmp(str, "vapic", 5) == 0) {
3135 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3136 			break;
3137 		}
3138 	}
3139 	return 1;
3140 }
3141 
3142 static int __init parse_amd_iommu_options(char *str)
3143 {
3144 	for (; *str; ++str) {
3145 		if (strncmp(str, "fullflush", 9) == 0)
3146 			amd_iommu_unmap_flush = true;
3147 		if (strncmp(str, "off", 3) == 0)
3148 			amd_iommu_disabled = true;
3149 		if (strncmp(str, "force_isolation", 15) == 0)
3150 			amd_iommu_force_isolation = true;
3151 	}
3152 
3153 	return 1;
3154 }
3155 
3156 static int __init parse_ivrs_ioapic(char *str)
3157 {
3158 	unsigned int bus, dev, fn;
3159 	int ret, id, i;
3160 	u16 devid;
3161 
3162 	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
3163 
3164 	if (ret != 4) {
3165 		pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3166 		return 1;
3167 	}
3168 
3169 	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3170 		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3171 			str);
3172 		return 1;
3173 	}
3174 
3175 	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3176 
3177 	cmdline_maps			= true;
3178 	i				= early_ioapic_map_size++;
3179 	early_ioapic_map[i].id		= id;
3180 	early_ioapic_map[i].devid	= devid;
3181 	early_ioapic_map[i].cmd_line	= true;
3182 
3183 	return 1;
3184 }
3185 
3186 static int __init parse_ivrs_hpet(char *str)
3187 {
3188 	unsigned int bus, dev, fn;
3189 	int ret, id, i;
3190 	u16 devid;
3191 
3192 	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
3193 
3194 	if (ret != 4) {
3195 		pr_err("Invalid command line: ivrs_hpet%s\n", str);
3196 		return 1;
3197 	}
3198 
3199 	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3200 		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3201 			str);
3202 		return 1;
3203 	}
3204 
3205 	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3206 
3207 	cmdline_maps			= true;
3208 	i				= early_hpet_map_size++;
3209 	early_hpet_map[i].id		= id;
3210 	early_hpet_map[i].devid		= devid;
3211 	early_hpet_map[i].cmd_line	= true;
3212 
3213 	return 1;
3214 }
3215 
3216 static int __init parse_ivrs_acpihid(char *str)
3217 {
3218 	u32 bus, dev, fn;
3219 	char *hid, *uid, *p;
3220 	char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
3221 	int ret, i;
3222 
3223 	ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
3224 	if (ret != 4) {
3225 		pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
3226 		return 1;
3227 	}
3228 
3229 	p = acpiid;
3230 	hid = strsep(&p, ":");
3231 	uid = p;
3232 
3233 	if (!hid || !(*hid) || !uid) {
3234 		pr_err("Invalid command line: hid or uid\n");
3235 		return 1;
3236 	}
3237 
3238 	i = early_acpihid_map_size++;
3239 	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3240 	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3241 	early_acpihid_map[i].devid =
3242 		((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3243 	early_acpihid_map[i].cmd_line	= true;
3244 
3245 	return 1;
3246 }
3247 
3248 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
3249 __setup("amd_iommu=",		parse_amd_iommu_options);
3250 __setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3251 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
3252 __setup("ivrs_hpet",		parse_ivrs_hpet);
3253 __setup("ivrs_acpihid",		parse_ivrs_acpihid);
3254 
3255 IOMMU_INIT_FINISH(amd_iommu_detect,
3256 		  gart_iommu_hole_init,
3257 		  NULL,
3258 		  NULL);
3259 
3260 bool amd_iommu_v2_supported(void)
3261 {
3262 	return amd_iommu_v2_present;
3263 }
3264 EXPORT_SYMBOL(amd_iommu_v2_supported);
3265 
3266 struct amd_iommu *get_amd_iommu(unsigned int idx)
3267 {
3268 	unsigned int i = 0;
3269 	struct amd_iommu *iommu;
3270 
3271 	for_each_iommu(iommu)
3272 		if (i++ == idx)
3273 			return iommu;
3274 	return NULL;
3275 }
3276 EXPORT_SYMBOL(get_amd_iommu);
3277 
3278 /****************************************************************************
3279  *
3280  * IOMMU EFR Performance Counter support functionality. This code allows
3281  * access to the IOMMU PC functionality.
3282  *
3283  ****************************************************************************/
3284 
3285 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3286 {
3287 	struct amd_iommu *iommu = get_amd_iommu(idx);
3288 
3289 	if (iommu)
3290 		return iommu->max_banks;
3291 
3292 	return 0;
3293 }
3294 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3295 
3296 bool amd_iommu_pc_supported(void)
3297 {
3298 	return amd_iommu_pc_present;
3299 }
3300 EXPORT_SYMBOL(amd_iommu_pc_supported);
3301 
3302 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3303 {
3304 	struct amd_iommu *iommu = get_amd_iommu(idx);
3305 
3306 	if (iommu)
3307 		return iommu->max_counters;
3308 
3309 	return 0;
3310 }
3311 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3312 
3313 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3314 				u8 fxn, u64 *value, bool is_write)
3315 {
3316 	u32 offset;
3317 	u32 max_offset_lim;
3318 
3319 	/* Make sure the IOMMU PC resource is available */
3320 	if (!amd_iommu_pc_present)
3321 		return -ENODEV;
3322 
3323 	/* Check for valid iommu and pc register indexing */
3324 	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3325 		return -ENODEV;
3326 
3327 	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3328 
3329 	/* Limit the offset to the hw defined mmio region aperture */
3330 	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3331 				(iommu->max_counters << 8) | 0x28);
3332 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3333 	    (offset > max_offset_lim))
3334 		return -EINVAL;
3335 
3336 	if (is_write) {
3337 		u64 val = *value & GENMASK_ULL(47, 0);
3338 
3339 		writel((u32)val, iommu->mmio_base + offset);
3340 		writel((val >> 32), iommu->mmio_base + offset + 4);
3341 	} else {
3342 		*value = readl(iommu->mmio_base + offset + 4);
3343 		*value <<= 32;
3344 		*value |= readl(iommu->mmio_base + offset);
3345 		*value &= GENMASK_ULL(47, 0);
3346 	}
3347 
3348 	return 0;
3349 }
3350 
3351 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3352 {
3353 	if (!iommu)
3354 		return -EINVAL;
3355 
3356 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3357 }
3358 EXPORT_SYMBOL(amd_iommu_pc_get_reg);
3359 
3360 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3361 {
3362 	if (!iommu)
3363 		return -EINVAL;
3364 
3365 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3366 }
3367 EXPORT_SYMBOL(amd_iommu_pc_set_reg);
3368