xref: /openbmc/linux/drivers/iommu/intel/iommu.c (revision ecc23d0a422a3118fcf6e4f0a46e17a6c2047b02)
1672cf6dfSJoerg Roedel // SPDX-License-Identifier: GPL-2.0-only
2672cf6dfSJoerg Roedel /*
3672cf6dfSJoerg Roedel  * Copyright © 2006-2014 Intel Corporation.
4672cf6dfSJoerg Roedel  *
5672cf6dfSJoerg Roedel  * Authors: David Woodhouse <dwmw2@infradead.org>,
6672cf6dfSJoerg Roedel  *          Ashok Raj <ashok.raj@intel.com>,
7672cf6dfSJoerg Roedel  *          Shaohua Li <shaohua.li@intel.com>,
8672cf6dfSJoerg Roedel  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9672cf6dfSJoerg Roedel  *          Fenghua Yu <fenghua.yu@intel.com>
10672cf6dfSJoerg Roedel  *          Joerg Roedel <jroedel@suse.de>
11672cf6dfSJoerg Roedel  */
12672cf6dfSJoerg Roedel 
13672cf6dfSJoerg Roedel #define pr_fmt(fmt)     "DMAR: " fmt
14672cf6dfSJoerg Roedel #define dev_fmt(fmt)    pr_fmt(fmt)
15672cf6dfSJoerg Roedel 
16763e656cSLu Baolu #include <linux/crash_dump.h>
17763e656cSLu Baolu #include <linux/dma-direct.h>
18763e656cSLu Baolu #include <linux/dmi.h>
19763e656cSLu Baolu #include <linux/memory.h>
20763e656cSLu Baolu #include <linux/pci.h>
21763e656cSLu Baolu #include <linux/pci-ats.h>
22763e656cSLu Baolu #include <linux/spinlock.h>
23672cf6dfSJoerg Roedel #include <linux/syscore_ops.h>
24672cf6dfSJoerg Roedel #include <linux/tboot.h>
2555243393SYi Liu #include <uapi/linux/iommufd.h>
26672cf6dfSJoerg Roedel 
272585a279SLu Baolu #include "iommu.h"
28f2042ed2SRobin Murphy #include "../dma-iommu.h"
29672cf6dfSJoerg Roedel #include "../irq_remapping.h"
30757636edSLu Baolu #include "../iommu-sva.h"
3102f3effdSLu Baolu #include "pasid.h"
32ad3d1902SKyung Min Park #include "cap_audit.h"
33d8a7c0cfSKan Liang #include "perfmon.h"
34672cf6dfSJoerg Roedel 
35672cf6dfSJoerg Roedel #define ROOT_SIZE		VTD_PAGE_SIZE
36672cf6dfSJoerg Roedel #define CONTEXT_SIZE		VTD_PAGE_SIZE
37672cf6dfSJoerg Roedel 
38672cf6dfSJoerg Roedel #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
39672cf6dfSJoerg Roedel #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
40672cf6dfSJoerg Roedel #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
41672cf6dfSJoerg Roedel #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
42672cf6dfSJoerg Roedel 
43672cf6dfSJoerg Roedel #define IOAPIC_RANGE_START	(0xfee00000)
44672cf6dfSJoerg Roedel #define IOAPIC_RANGE_END	(0xfeefffff)
45672cf6dfSJoerg Roedel #define IOVA_START_ADDR		(0x1000)
46672cf6dfSJoerg Roedel 
47672cf6dfSJoerg Roedel #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
48672cf6dfSJoerg Roedel 
49672cf6dfSJoerg Roedel #define MAX_AGAW_WIDTH 64
50672cf6dfSJoerg Roedel #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
51672cf6dfSJoerg Roedel 
52c062db03SLu Baolu #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
53c062db03SLu Baolu #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
54672cf6dfSJoerg Roedel 
55672cf6dfSJoerg Roedel /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
56672cf6dfSJoerg Roedel    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
57672cf6dfSJoerg Roedel #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
58672cf6dfSJoerg Roedel 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
59672cf6dfSJoerg Roedel #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
60672cf6dfSJoerg Roedel 
61672cf6dfSJoerg Roedel /* IO virtual address start page frame number */
62672cf6dfSJoerg Roedel #define IOVA_START_PFN		(1)
63672cf6dfSJoerg Roedel 
64672cf6dfSJoerg Roedel #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
65672cf6dfSJoerg Roedel 
66672cf6dfSJoerg Roedel /* page table handling */
67672cf6dfSJoerg Roedel #define LEVEL_STRIDE		(9)
68672cf6dfSJoerg Roedel #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
69672cf6dfSJoerg Roedel 
agaw_to_level(int agaw)70672cf6dfSJoerg Roedel static inline int agaw_to_level(int agaw)
71672cf6dfSJoerg Roedel {
72672cf6dfSJoerg Roedel 	return agaw + 2;
73672cf6dfSJoerg Roedel }
74672cf6dfSJoerg Roedel 
agaw_to_width(int agaw)75672cf6dfSJoerg Roedel static inline int agaw_to_width(int agaw)
76672cf6dfSJoerg Roedel {
77672cf6dfSJoerg Roedel 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
78672cf6dfSJoerg Roedel }
79672cf6dfSJoerg Roedel 
width_to_agaw(int width)80672cf6dfSJoerg Roedel static inline int width_to_agaw(int width)
81672cf6dfSJoerg Roedel {
82672cf6dfSJoerg Roedel 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
83672cf6dfSJoerg Roedel }
84672cf6dfSJoerg Roedel 
level_to_offset_bits(int level)85672cf6dfSJoerg Roedel static inline unsigned int level_to_offset_bits(int level)
86672cf6dfSJoerg Roedel {
87672cf6dfSJoerg Roedel 	return (level - 1) * LEVEL_STRIDE;
88672cf6dfSJoerg Roedel }
89672cf6dfSJoerg Roedel 
pfn_level_offset(u64 pfn,int level)9029aaebbcSChris Wilson static inline int pfn_level_offset(u64 pfn, int level)
91672cf6dfSJoerg Roedel {
92672cf6dfSJoerg Roedel 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
93672cf6dfSJoerg Roedel }
94672cf6dfSJoerg Roedel 
level_mask(int level)9529aaebbcSChris Wilson static inline u64 level_mask(int level)
96672cf6dfSJoerg Roedel {
9729aaebbcSChris Wilson 	return -1ULL << level_to_offset_bits(level);
98672cf6dfSJoerg Roedel }
99672cf6dfSJoerg Roedel 
level_size(int level)10029aaebbcSChris Wilson static inline u64 level_size(int level)
101672cf6dfSJoerg Roedel {
10229aaebbcSChris Wilson 	return 1ULL << level_to_offset_bits(level);
103672cf6dfSJoerg Roedel }
104672cf6dfSJoerg Roedel 
align_to_level(u64 pfn,int level)10529aaebbcSChris Wilson static inline u64 align_to_level(u64 pfn, int level)
106672cf6dfSJoerg Roedel {
107672cf6dfSJoerg Roedel 	return (pfn + level_size(level) - 1) & level_mask(level);
108672cf6dfSJoerg Roedel }
109672cf6dfSJoerg Roedel 
lvl_to_nr_pages(unsigned int lvl)110672cf6dfSJoerg Roedel static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
111672cf6dfSJoerg Roedel {
11229aaebbcSChris Wilson 	return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
113672cf6dfSJoerg Roedel }
114672cf6dfSJoerg Roedel 
115672cf6dfSJoerg Roedel /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
116672cf6dfSJoerg Roedel    are never going to work. */
mm_to_dma_pfn_start(unsigned long mm_pfn)117fb5f50a4SYanfei Xu static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn)
118672cf6dfSJoerg Roedel {
119672cf6dfSJoerg Roedel 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
120672cf6dfSJoerg Roedel }
mm_to_dma_pfn_end(unsigned long mm_pfn)121fb5f50a4SYanfei Xu static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn)
122fb5f50a4SYanfei Xu {
123fb5f50a4SYanfei Xu 	return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1;
124fb5f50a4SYanfei Xu }
page_to_dma_pfn(struct page * pg)125672cf6dfSJoerg Roedel static inline unsigned long page_to_dma_pfn(struct page *pg)
126672cf6dfSJoerg Roedel {
127fb5f50a4SYanfei Xu 	return mm_to_dma_pfn_start(page_to_pfn(pg));
128672cf6dfSJoerg Roedel }
virt_to_dma_pfn(void * p)129672cf6dfSJoerg Roedel static inline unsigned long virt_to_dma_pfn(void *p)
130672cf6dfSJoerg Roedel {
131672cf6dfSJoerg Roedel 	return page_to_dma_pfn(virt_to_page(p));
132672cf6dfSJoerg Roedel }
133672cf6dfSJoerg Roedel 
134672cf6dfSJoerg Roedel static void __init check_tylersburg_isoch(void);
135672cf6dfSJoerg Roedel static int rwbf_quirk;
136672cf6dfSJoerg Roedel 
137672cf6dfSJoerg Roedel /*
138672cf6dfSJoerg Roedel  * set to 1 to panic kernel if can't successfully enable VT-d
139672cf6dfSJoerg Roedel  * (used when kernel is launched w/ TXT)
140672cf6dfSJoerg Roedel  */
141672cf6dfSJoerg Roedel static int force_on = 0;
1424d213e76SZhenzhong Duan static int intel_iommu_tboot_noforce;
143672cf6dfSJoerg Roedel static int no_platform_optin;
144672cf6dfSJoerg Roedel 
145672cf6dfSJoerg Roedel #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
146672cf6dfSJoerg Roedel 
147672cf6dfSJoerg Roedel /*
148672cf6dfSJoerg Roedel  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
149672cf6dfSJoerg Roedel  * if marked present.
150672cf6dfSJoerg Roedel  */
root_entry_lctp(struct root_entry * re)151672cf6dfSJoerg Roedel static phys_addr_t root_entry_lctp(struct root_entry *re)
152672cf6dfSJoerg Roedel {
153672cf6dfSJoerg Roedel 	if (!(re->lo & 1))
154672cf6dfSJoerg Roedel 		return 0;
155672cf6dfSJoerg Roedel 
156672cf6dfSJoerg Roedel 	return re->lo & VTD_PAGE_MASK;
157672cf6dfSJoerg Roedel }
158672cf6dfSJoerg Roedel 
159672cf6dfSJoerg Roedel /*
160672cf6dfSJoerg Roedel  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
161672cf6dfSJoerg Roedel  * if marked present.
162672cf6dfSJoerg Roedel  */
root_entry_uctp(struct root_entry * re)163672cf6dfSJoerg Roedel static phys_addr_t root_entry_uctp(struct root_entry *re)
164672cf6dfSJoerg Roedel {
165672cf6dfSJoerg Roedel 	if (!(re->hi & 1))
166672cf6dfSJoerg Roedel 		return 0;
167672cf6dfSJoerg Roedel 
168672cf6dfSJoerg Roedel 	return re->hi & VTD_PAGE_MASK;
169672cf6dfSJoerg Roedel }
170672cf6dfSJoerg Roedel 
context_set_present(struct context_entry * context)171672cf6dfSJoerg Roedel static inline void context_set_present(struct context_entry *context)
172672cf6dfSJoerg Roedel {
173672cf6dfSJoerg Roedel 	context->lo |= 1;
174672cf6dfSJoerg Roedel }
175672cf6dfSJoerg Roedel 
context_set_fault_enable(struct context_entry * context)176672cf6dfSJoerg Roedel static inline void context_set_fault_enable(struct context_entry *context)
177672cf6dfSJoerg Roedel {
178672cf6dfSJoerg Roedel 	context->lo &= (((u64)-1) << 2) | 1;
179672cf6dfSJoerg Roedel }
180672cf6dfSJoerg Roedel 
context_set_translation_type(struct context_entry * context,unsigned long value)181672cf6dfSJoerg Roedel static inline void context_set_translation_type(struct context_entry *context,
182672cf6dfSJoerg Roedel 						unsigned long value)
183672cf6dfSJoerg Roedel {
184672cf6dfSJoerg Roedel 	context->lo &= (((u64)-1) << 4) | 3;
185672cf6dfSJoerg Roedel 	context->lo |= (value & 3) << 2;
186672cf6dfSJoerg Roedel }
187672cf6dfSJoerg Roedel 
context_set_address_root(struct context_entry * context,unsigned long value)188672cf6dfSJoerg Roedel static inline void context_set_address_root(struct context_entry *context,
189672cf6dfSJoerg Roedel 					    unsigned long value)
190672cf6dfSJoerg Roedel {
191672cf6dfSJoerg Roedel 	context->lo &= ~VTD_PAGE_MASK;
192672cf6dfSJoerg Roedel 	context->lo |= value & VTD_PAGE_MASK;
193672cf6dfSJoerg Roedel }
194672cf6dfSJoerg Roedel 
context_set_address_width(struct context_entry * context,unsigned long value)195672cf6dfSJoerg Roedel static inline void context_set_address_width(struct context_entry *context,
196672cf6dfSJoerg Roedel 					     unsigned long value)
197672cf6dfSJoerg Roedel {
198672cf6dfSJoerg Roedel 	context->hi |= value & 7;
199672cf6dfSJoerg Roedel }
200672cf6dfSJoerg Roedel 
context_set_domain_id(struct context_entry * context,unsigned long value)201672cf6dfSJoerg Roedel static inline void context_set_domain_id(struct context_entry *context,
202672cf6dfSJoerg Roedel 					 unsigned long value)
203672cf6dfSJoerg Roedel {
204672cf6dfSJoerg Roedel 	context->hi |= (value & ((1 << 16) - 1)) << 8;
205672cf6dfSJoerg Roedel }
206672cf6dfSJoerg Roedel 
context_set_pasid(struct context_entry * context)2070faa19a1SLu Baolu static inline void context_set_pasid(struct context_entry *context)
2080faa19a1SLu Baolu {
2090faa19a1SLu Baolu 	context->lo |= CONTEXT_PASIDE;
2100faa19a1SLu Baolu }
2110faa19a1SLu Baolu 
context_domain_id(struct context_entry * c)212672cf6dfSJoerg Roedel static inline int context_domain_id(struct context_entry *c)
213672cf6dfSJoerg Roedel {
214672cf6dfSJoerg Roedel 	return((c->hi >> 8) & 0xffff);
215672cf6dfSJoerg Roedel }
216672cf6dfSJoerg Roedel 
context_clear_entry(struct context_entry * context)217672cf6dfSJoerg Roedel static inline void context_clear_entry(struct context_entry *context)
218672cf6dfSJoerg Roedel {
219672cf6dfSJoerg Roedel 	context->lo = 0;
220672cf6dfSJoerg Roedel 	context->hi = 0;
221672cf6dfSJoerg Roedel }
222672cf6dfSJoerg Roedel 
context_copied(struct intel_iommu * iommu,u8 bus,u8 devfn)2230c5f6c0dSLu Baolu static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
2240c5f6c0dSLu Baolu {
2250c5f6c0dSLu Baolu 	if (!iommu->copied_tables)
2260c5f6c0dSLu Baolu 		return false;
2270c5f6c0dSLu Baolu 
2280c5f6c0dSLu Baolu 	return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
2290c5f6c0dSLu Baolu }
2300c5f6c0dSLu Baolu 
2310c5f6c0dSLu Baolu static inline void
set_context_copied(struct intel_iommu * iommu,u8 bus,u8 devfn)2320c5f6c0dSLu Baolu set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
2330c5f6c0dSLu Baolu {
2340c5f6c0dSLu Baolu 	set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
2350c5f6c0dSLu Baolu }
2360c5f6c0dSLu Baolu 
2370c5f6c0dSLu Baolu static inline void
clear_context_copied(struct intel_iommu * iommu,u8 bus,u8 devfn)2380c5f6c0dSLu Baolu clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
2390c5f6c0dSLu Baolu {
2400c5f6c0dSLu Baolu 	clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
2410c5f6c0dSLu Baolu }
2420c5f6c0dSLu Baolu 
243672cf6dfSJoerg Roedel /*
244672cf6dfSJoerg Roedel  * This domain is a statically identity mapping domain.
245672cf6dfSJoerg Roedel  *	1. This domain creats a static 1:1 mapping to all usable memory.
246672cf6dfSJoerg Roedel  * 	2. It maps to each iommu if successful.
247672cf6dfSJoerg Roedel  *	3. Each iommu mapps to this domain if successful.
248672cf6dfSJoerg Roedel  */
249672cf6dfSJoerg Roedel static struct dmar_domain *si_domain;
250672cf6dfSJoerg Roedel static int hw_pass_through = 1;
251672cf6dfSJoerg Roedel 
252672cf6dfSJoerg Roedel struct dmar_rmrr_unit {
253672cf6dfSJoerg Roedel 	struct list_head list;		/* list of rmrr units	*/
254672cf6dfSJoerg Roedel 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
255672cf6dfSJoerg Roedel 	u64	base_address;		/* reserved base address*/
256672cf6dfSJoerg Roedel 	u64	end_address;		/* reserved end address */
257672cf6dfSJoerg Roedel 	struct dmar_dev_scope *devices;	/* target devices */
258672cf6dfSJoerg Roedel 	int	devices_cnt;		/* target device count */
259672cf6dfSJoerg Roedel };
260672cf6dfSJoerg Roedel 
261672cf6dfSJoerg Roedel struct dmar_atsr_unit {
262672cf6dfSJoerg Roedel 	struct list_head list;		/* list of ATSR units */
263672cf6dfSJoerg Roedel 	struct acpi_dmar_header *hdr;	/* ACPI header */
264672cf6dfSJoerg Roedel 	struct dmar_dev_scope *devices;	/* target devices */
265672cf6dfSJoerg Roedel 	int devices_cnt;		/* target device count */
266672cf6dfSJoerg Roedel 	u8 include_all:1;		/* include all ports */
267672cf6dfSJoerg Roedel };
268672cf6dfSJoerg Roedel 
26931a75cbbSYian Chen struct dmar_satc_unit {
27031a75cbbSYian Chen 	struct list_head list;		/* list of SATC units */
27131a75cbbSYian Chen 	struct acpi_dmar_header *hdr;	/* ACPI header */
27231a75cbbSYian Chen 	struct dmar_dev_scope *devices;	/* target devices */
27331a75cbbSYian Chen 	struct intel_iommu *iommu;	/* the corresponding iommu */
27431a75cbbSYian Chen 	int devices_cnt;		/* target device count */
27531a75cbbSYian Chen 	u8 atc_required:1;		/* ATS is required */
27631a75cbbSYian Chen };
27731a75cbbSYian Chen 
278672cf6dfSJoerg Roedel static LIST_HEAD(dmar_atsr_units);
279672cf6dfSJoerg Roedel static LIST_HEAD(dmar_rmrr_units);
28031a75cbbSYian Chen static LIST_HEAD(dmar_satc_units);
281672cf6dfSJoerg Roedel 
282672cf6dfSJoerg Roedel #define for_each_rmrr_units(rmrr) \
283672cf6dfSJoerg Roedel 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
284672cf6dfSJoerg Roedel 
285c7be17c2SLu Baolu static void device_block_translation(struct device *dev);
28635a99c54SLu Baolu static void intel_iommu_domain_free(struct iommu_domain *domain);
287672cf6dfSJoerg Roedel 
28801dac2d9SLu Baolu int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
28901dac2d9SLu Baolu int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
290672cf6dfSJoerg Roedel 
291672cf6dfSJoerg Roedel int intel_iommu_enabled = 0;
292672cf6dfSJoerg Roedel EXPORT_SYMBOL_GPL(intel_iommu_enabled);
293672cf6dfSJoerg Roedel 
294672cf6dfSJoerg Roedel static int dmar_map_gfx = 1;
295672cf6dfSJoerg Roedel static int intel_iommu_superpage = 1;
296672cf6dfSJoerg Roedel static int iommu_identity_mapping;
297b1012ca8SLu Baolu static int iommu_skip_te_disable;
298672cf6dfSJoerg Roedel 
299672cf6dfSJoerg Roedel #define IDENTMAP_GFX		2
300672cf6dfSJoerg Roedel #define IDENTMAP_AZALIA		4
301672cf6dfSJoerg Roedel 
302672cf6dfSJoerg Roedel const struct iommu_ops intel_iommu_ops;
303672cf6dfSJoerg Roedel 
translation_pre_enabled(struct intel_iommu * iommu)304672cf6dfSJoerg Roedel static bool translation_pre_enabled(struct intel_iommu *iommu)
305672cf6dfSJoerg Roedel {
306672cf6dfSJoerg Roedel 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
307672cf6dfSJoerg Roedel }
308672cf6dfSJoerg Roedel 
clear_translation_pre_enabled(struct intel_iommu * iommu)309672cf6dfSJoerg Roedel static void clear_translation_pre_enabled(struct intel_iommu *iommu)
310672cf6dfSJoerg Roedel {
311672cf6dfSJoerg Roedel 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
312672cf6dfSJoerg Roedel }
313672cf6dfSJoerg Roedel 
init_translation_status(struct intel_iommu * iommu)314672cf6dfSJoerg Roedel static void init_translation_status(struct intel_iommu *iommu)
315672cf6dfSJoerg Roedel {
316672cf6dfSJoerg Roedel 	u32 gsts;
317672cf6dfSJoerg Roedel 
318672cf6dfSJoerg Roedel 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
319672cf6dfSJoerg Roedel 	if (gsts & DMA_GSTS_TES)
320672cf6dfSJoerg Roedel 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
321672cf6dfSJoerg Roedel }
322672cf6dfSJoerg Roedel 
intel_iommu_setup(char * str)323672cf6dfSJoerg Roedel static int __init intel_iommu_setup(char *str)
324672cf6dfSJoerg Roedel {
325672cf6dfSJoerg Roedel 	if (!str)
326672cf6dfSJoerg Roedel 		return -EINVAL;
3275240aed2STvrtko Ursulin 
328672cf6dfSJoerg Roedel 	while (*str) {
329672cf6dfSJoerg Roedel 		if (!strncmp(str, "on", 2)) {
330672cf6dfSJoerg Roedel 			dmar_disabled = 0;
331672cf6dfSJoerg Roedel 			pr_info("IOMMU enabled\n");
332672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "off", 3)) {
333672cf6dfSJoerg Roedel 			dmar_disabled = 1;
334672cf6dfSJoerg Roedel 			no_platform_optin = 1;
335672cf6dfSJoerg Roedel 			pr_info("IOMMU disabled\n");
336672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "igfx_off", 8)) {
337672cf6dfSJoerg Roedel 			dmar_map_gfx = 0;
338672cf6dfSJoerg Roedel 			pr_info("Disable GFX device mapping\n");
339672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "forcedac", 8)) {
3403542dcb1SRobin Murphy 			pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
3413542dcb1SRobin Murphy 			iommu_dma_forcedac = true;
342672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "strict", 6)) {
3431d479f16SJohn Garry 			pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
344308723e3SJohn Garry 			iommu_set_dma_strict();
345672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "sp_off", 6)) {
346672cf6dfSJoerg Roedel 			pr_info("Disable supported super page\n");
347672cf6dfSJoerg Roedel 			intel_iommu_superpage = 0;
348672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "sm_on", 5)) {
349792fb43cSLu Baolu 			pr_info("Enable scalable mode if hardware supports\n");
350672cf6dfSJoerg Roedel 			intel_iommu_sm = 1;
351792fb43cSLu Baolu 		} else if (!strncmp(str, "sm_off", 6)) {
352792fb43cSLu Baolu 			pr_info("Scalable mode is disallowed\n");
353792fb43cSLu Baolu 			intel_iommu_sm = 0;
354672cf6dfSJoerg Roedel 		} else if (!strncmp(str, "tboot_noforce", 13)) {
355672cf6dfSJoerg Roedel 			pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
356672cf6dfSJoerg Roedel 			intel_iommu_tboot_noforce = 1;
3575240aed2STvrtko Ursulin 		} else {
3585240aed2STvrtko Ursulin 			pr_notice("Unknown option - '%s'\n", str);
359672cf6dfSJoerg Roedel 		}
360672cf6dfSJoerg Roedel 
361672cf6dfSJoerg Roedel 		str += strcspn(str, ",");
362672cf6dfSJoerg Roedel 		while (*str == ',')
363672cf6dfSJoerg Roedel 			str++;
364672cf6dfSJoerg Roedel 	}
3655240aed2STvrtko Ursulin 
3665240aed2STvrtko Ursulin 	return 1;
367672cf6dfSJoerg Roedel }
368672cf6dfSJoerg Roedel __setup("intel_iommu=", intel_iommu_setup);
369672cf6dfSJoerg Roedel 
alloc_pgtable_page(int node,gfp_t gfp)3702552d3a2SJason Gunthorpe void *alloc_pgtable_page(int node, gfp_t gfp)
371672cf6dfSJoerg Roedel {
372672cf6dfSJoerg Roedel 	struct page *page;
373672cf6dfSJoerg Roedel 	void *vaddr = NULL;
374672cf6dfSJoerg Roedel 
3752552d3a2SJason Gunthorpe 	page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
376672cf6dfSJoerg Roedel 	if (page)
377672cf6dfSJoerg Roedel 		vaddr = page_address(page);
378672cf6dfSJoerg Roedel 	return vaddr;
379672cf6dfSJoerg Roedel }
380672cf6dfSJoerg Roedel 
free_pgtable_page(void * vaddr)381672cf6dfSJoerg Roedel void free_pgtable_page(void *vaddr)
382672cf6dfSJoerg Roedel {
383672cf6dfSJoerg Roedel 	free_page((unsigned long)vaddr);
384672cf6dfSJoerg Roedel }
385672cf6dfSJoerg Roedel 
domain_type_is_si(struct dmar_domain * domain)386672cf6dfSJoerg Roedel static inline int domain_type_is_si(struct dmar_domain *domain)
387672cf6dfSJoerg Roedel {
388b34380a6SLu Baolu 	return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
389672cf6dfSJoerg Roedel }
390672cf6dfSJoerg Roedel 
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)391672cf6dfSJoerg Roedel static inline int domain_pfn_supported(struct dmar_domain *domain,
392672cf6dfSJoerg Roedel 				       unsigned long pfn)
393672cf6dfSJoerg Roedel {
394672cf6dfSJoerg Roedel 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
395672cf6dfSJoerg Roedel 
396672cf6dfSJoerg Roedel 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
397672cf6dfSJoerg Roedel }
398672cf6dfSJoerg Roedel 
39953fc7ad6SLu Baolu /*
40053fc7ad6SLu Baolu  * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
40153fc7ad6SLu Baolu  * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
40253fc7ad6SLu Baolu  * the returned SAGAW.
40353fc7ad6SLu Baolu  */
__iommu_calculate_sagaw(struct intel_iommu * iommu)40453fc7ad6SLu Baolu static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
40553fc7ad6SLu Baolu {
40653fc7ad6SLu Baolu 	unsigned long fl_sagaw, sl_sagaw;
40753fc7ad6SLu Baolu 
408b722cb32SYi Liu 	fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
40953fc7ad6SLu Baolu 	sl_sagaw = cap_sagaw(iommu->cap);
41053fc7ad6SLu Baolu 
41153fc7ad6SLu Baolu 	/* Second level only. */
41253fc7ad6SLu Baolu 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
41353fc7ad6SLu Baolu 		return sl_sagaw;
41453fc7ad6SLu Baolu 
41553fc7ad6SLu Baolu 	/* First level only. */
41653fc7ad6SLu Baolu 	if (!ecap_slts(iommu->ecap))
41753fc7ad6SLu Baolu 		return fl_sagaw;
41853fc7ad6SLu Baolu 
41953fc7ad6SLu Baolu 	return fl_sagaw & sl_sagaw;
42053fc7ad6SLu Baolu }
42153fc7ad6SLu Baolu 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)422672cf6dfSJoerg Roedel static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
423672cf6dfSJoerg Roedel {
424672cf6dfSJoerg Roedel 	unsigned long sagaw;
42505d2cbf9SColin Ian King 	int agaw;
426672cf6dfSJoerg Roedel 
42753fc7ad6SLu Baolu 	sagaw = __iommu_calculate_sagaw(iommu);
42853fc7ad6SLu Baolu 	for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
429672cf6dfSJoerg Roedel 		if (test_bit(agaw, &sagaw))
430672cf6dfSJoerg Roedel 			break;
431672cf6dfSJoerg Roedel 	}
432672cf6dfSJoerg Roedel 
433672cf6dfSJoerg Roedel 	return agaw;
434672cf6dfSJoerg Roedel }
435672cf6dfSJoerg Roedel 
436672cf6dfSJoerg Roedel /*
437672cf6dfSJoerg Roedel  * Calculate max SAGAW for each iommu.
438672cf6dfSJoerg Roedel  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)439672cf6dfSJoerg Roedel int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
440672cf6dfSJoerg Roedel {
441672cf6dfSJoerg Roedel 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
442672cf6dfSJoerg Roedel }
443672cf6dfSJoerg Roedel 
444672cf6dfSJoerg Roedel /*
445672cf6dfSJoerg Roedel  * calculate agaw for each iommu.
446672cf6dfSJoerg Roedel  * "SAGAW" may be different across iommus, use a default agaw, and
447672cf6dfSJoerg Roedel  * get a supported less agaw for iommus that don't support the default agaw.
448672cf6dfSJoerg Roedel  */
iommu_calculate_agaw(struct intel_iommu * iommu)449672cf6dfSJoerg Roedel int iommu_calculate_agaw(struct intel_iommu *iommu)
450672cf6dfSJoerg Roedel {
451672cf6dfSJoerg Roedel 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
452672cf6dfSJoerg Roedel }
453672cf6dfSJoerg Roedel 
iommu_paging_structure_coherency(struct intel_iommu * iommu)45404c00956SLu Baolu static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
45504c00956SLu Baolu {
45604c00956SLu Baolu 	return sm_supported(iommu) ?
45704c00956SLu Baolu 			ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
45804c00956SLu Baolu }
45904c00956SLu Baolu 
domain_update_iommu_coherency(struct dmar_domain * domain)460672cf6dfSJoerg Roedel static void domain_update_iommu_coherency(struct dmar_domain *domain)
461672cf6dfSJoerg Roedel {
462ba949f4cSLu Baolu 	struct iommu_domain_info *info;
463672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
464672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
465672cf6dfSJoerg Roedel 	bool found = false;
466ba949f4cSLu Baolu 	unsigned long i;
467672cf6dfSJoerg Roedel 
4681f106ff0SParav Pandit 	domain->iommu_coherency = true;
469ba949f4cSLu Baolu 	xa_for_each(&domain->iommu_array, i, info) {
470672cf6dfSJoerg Roedel 		found = true;
471ba949f4cSLu Baolu 		if (!iommu_paging_structure_coherency(info->iommu)) {
4721f106ff0SParav Pandit 			domain->iommu_coherency = false;
473672cf6dfSJoerg Roedel 			break;
474672cf6dfSJoerg Roedel 		}
475672cf6dfSJoerg Roedel 	}
476672cf6dfSJoerg Roedel 	if (found)
477672cf6dfSJoerg Roedel 		return;
478672cf6dfSJoerg Roedel 
479672cf6dfSJoerg Roedel 	/* No hardware attached; use lowest common denominator */
480672cf6dfSJoerg Roedel 	rcu_read_lock();
481672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
48204c00956SLu Baolu 		if (!iommu_paging_structure_coherency(iommu)) {
4831f106ff0SParav Pandit 			domain->iommu_coherency = false;
484672cf6dfSJoerg Roedel 			break;
485672cf6dfSJoerg Roedel 		}
486672cf6dfSJoerg Roedel 	}
487672cf6dfSJoerg Roedel 	rcu_read_unlock();
488672cf6dfSJoerg Roedel }
489672cf6dfSJoerg Roedel 
domain_update_iommu_superpage(struct dmar_domain * domain,struct intel_iommu * skip)490672cf6dfSJoerg Roedel static int domain_update_iommu_superpage(struct dmar_domain *domain,
491672cf6dfSJoerg Roedel 					 struct intel_iommu *skip)
492672cf6dfSJoerg Roedel {
493672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
494672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
495672cf6dfSJoerg Roedel 	int mask = 0x3;
496672cf6dfSJoerg Roedel 
497cee57d4fSParav Pandit 	if (!intel_iommu_superpage)
498672cf6dfSJoerg Roedel 		return 0;
499672cf6dfSJoerg Roedel 
500672cf6dfSJoerg Roedel 	/* set iommu_superpage to the smallest common denominator */
501672cf6dfSJoerg Roedel 	rcu_read_lock();
502672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
503672cf6dfSJoerg Roedel 		if (iommu != skip) {
504e5b0feb4SLu Baolu 			if (domain && domain->use_first_level) {
505672cf6dfSJoerg Roedel 				if (!cap_fl1gp_support(iommu->cap))
506672cf6dfSJoerg Roedel 					mask = 0x1;
507672cf6dfSJoerg Roedel 			} else {
508672cf6dfSJoerg Roedel 				mask &= cap_super_page_val(iommu->cap);
509672cf6dfSJoerg Roedel 			}
510672cf6dfSJoerg Roedel 
511672cf6dfSJoerg Roedel 			if (!mask)
512672cf6dfSJoerg Roedel 				break;
513672cf6dfSJoerg Roedel 		}
514672cf6dfSJoerg Roedel 	}
515672cf6dfSJoerg Roedel 	rcu_read_unlock();
516672cf6dfSJoerg Roedel 
517672cf6dfSJoerg Roedel 	return fls(mask);
518672cf6dfSJoerg Roedel }
519672cf6dfSJoerg Roedel 
domain_update_device_node(struct dmar_domain * domain)520d2ef0962SLu Baolu static int domain_update_device_node(struct dmar_domain *domain)
521d2ef0962SLu Baolu {
522d2ef0962SLu Baolu 	struct device_domain_info *info;
523d2ef0962SLu Baolu 	int nid = NUMA_NO_NODE;
524a349ffcbSLu Baolu 	unsigned long flags;
525d2ef0962SLu Baolu 
526a349ffcbSLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
527d2ef0962SLu Baolu 	list_for_each_entry(info, &domain->devices, link) {
528d2ef0962SLu Baolu 		/*
529d2ef0962SLu Baolu 		 * There could possibly be multiple device numa nodes as devices
530d2ef0962SLu Baolu 		 * within the same domain may sit behind different IOMMUs. There
531d2ef0962SLu Baolu 		 * isn't perfect answer in such situation, so we select first
532d2ef0962SLu Baolu 		 * come first served policy.
533d2ef0962SLu Baolu 		 */
534d2ef0962SLu Baolu 		nid = dev_to_node(info->dev);
535d2ef0962SLu Baolu 		if (nid != NUMA_NO_NODE)
536d2ef0962SLu Baolu 			break;
537d2ef0962SLu Baolu 	}
538a349ffcbSLu Baolu 	spin_unlock_irqrestore(&domain->lock, flags);
539d2ef0962SLu Baolu 
540d2ef0962SLu Baolu 	return nid;
541d2ef0962SLu Baolu }
542d2ef0962SLu Baolu 
5437c29ada5SLiu Yi L static void domain_update_iotlb(struct dmar_domain *domain);
5447c29ada5SLiu Yi L 
545a886d5a7SLu Baolu /* Return the super pagesize bitmap if supported. */
domain_super_pgsize_bitmap(struct dmar_domain * domain)546a886d5a7SLu Baolu static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
547a886d5a7SLu Baolu {
548a886d5a7SLu Baolu 	unsigned long bitmap = 0;
549a886d5a7SLu Baolu 
550a886d5a7SLu Baolu 	/*
551a886d5a7SLu Baolu 	 * 1-level super page supports page size of 2MiB, 2-level super page
552a886d5a7SLu Baolu 	 * supports page size of both 2MiB and 1GiB.
553a886d5a7SLu Baolu 	 */
554a886d5a7SLu Baolu 	if (domain->iommu_superpage == 1)
555a886d5a7SLu Baolu 		bitmap |= SZ_2M;
556a886d5a7SLu Baolu 	else if (domain->iommu_superpage == 2)
557a886d5a7SLu Baolu 		bitmap |= SZ_2M | SZ_1G;
558a886d5a7SLu Baolu 
559a886d5a7SLu Baolu 	return bitmap;
560a886d5a7SLu Baolu }
561a886d5a7SLu Baolu 
562672cf6dfSJoerg Roedel /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)563672cf6dfSJoerg Roedel static void domain_update_iommu_cap(struct dmar_domain *domain)
564672cf6dfSJoerg Roedel {
565672cf6dfSJoerg Roedel 	domain_update_iommu_coherency(domain);
566672cf6dfSJoerg Roedel 	domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
567d2ef0962SLu Baolu 
568d2ef0962SLu Baolu 	/*
569d2ef0962SLu Baolu 	 * If RHSA is missing, we should default to the device numa domain
570d2ef0962SLu Baolu 	 * as fall back.
571d2ef0962SLu Baolu 	 */
572d2ef0962SLu Baolu 	if (domain->nid == NUMA_NO_NODE)
573d2ef0962SLu Baolu 		domain->nid = domain_update_device_node(domain);
574c062db03SLu Baolu 
575c062db03SLu Baolu 	/*
576c062db03SLu Baolu 	 * First-level translation restricts the input-address to a
577c062db03SLu Baolu 	 * canonical address (i.e., address bits 63:N have the same
578c062db03SLu Baolu 	 * value as address bit [N-1], where N is 48-bits with 4-level
579c062db03SLu Baolu 	 * paging and 57-bits with 5-level paging). Hence, skip bit
580c062db03SLu Baolu 	 * [N-1].
581c062db03SLu Baolu 	 */
582e5b0feb4SLu Baolu 	if (domain->use_first_level)
583c062db03SLu Baolu 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
584c062db03SLu Baolu 	else
585c062db03SLu Baolu 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
5867c29ada5SLiu Yi L 
587a886d5a7SLu Baolu 	domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
5887c29ada5SLiu Yi L 	domain_update_iotlb(domain);
589672cf6dfSJoerg Roedel }
590672cf6dfSJoerg Roedel 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)591672cf6dfSJoerg Roedel struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
592672cf6dfSJoerg Roedel 					 u8 devfn, int alloc)
593672cf6dfSJoerg Roedel {
594672cf6dfSJoerg Roedel 	struct root_entry *root = &iommu->root_entry[bus];
595672cf6dfSJoerg Roedel 	struct context_entry *context;
596672cf6dfSJoerg Roedel 	u64 *entry;
597672cf6dfSJoerg Roedel 
5980c5f6c0dSLu Baolu 	/*
5990c5f6c0dSLu Baolu 	 * Except that the caller requested to allocate a new entry,
6000c5f6c0dSLu Baolu 	 * returning a copied context entry makes no sense.
6010c5f6c0dSLu Baolu 	 */
6020c5f6c0dSLu Baolu 	if (!alloc && context_copied(iommu, bus, devfn))
6030c5f6c0dSLu Baolu 		return NULL;
6040c5f6c0dSLu Baolu 
605672cf6dfSJoerg Roedel 	entry = &root->lo;
606672cf6dfSJoerg Roedel 	if (sm_supported(iommu)) {
607672cf6dfSJoerg Roedel 		if (devfn >= 0x80) {
608672cf6dfSJoerg Roedel 			devfn -= 0x80;
609672cf6dfSJoerg Roedel 			entry = &root->hi;
610672cf6dfSJoerg Roedel 		}
611672cf6dfSJoerg Roedel 		devfn *= 2;
612672cf6dfSJoerg Roedel 	}
613672cf6dfSJoerg Roedel 	if (*entry & 1)
614672cf6dfSJoerg Roedel 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
615672cf6dfSJoerg Roedel 	else {
616672cf6dfSJoerg Roedel 		unsigned long phy_addr;
617672cf6dfSJoerg Roedel 		if (!alloc)
618672cf6dfSJoerg Roedel 			return NULL;
619672cf6dfSJoerg Roedel 
6202552d3a2SJason Gunthorpe 		context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
621672cf6dfSJoerg Roedel 		if (!context)
622672cf6dfSJoerg Roedel 			return NULL;
623672cf6dfSJoerg Roedel 
624672cf6dfSJoerg Roedel 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
625672cf6dfSJoerg Roedel 		phy_addr = virt_to_phys((void *)context);
626672cf6dfSJoerg Roedel 		*entry = phy_addr | 1;
627672cf6dfSJoerg Roedel 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
628672cf6dfSJoerg Roedel 	}
629672cf6dfSJoerg Roedel 	return &context[devfn];
630672cf6dfSJoerg Roedel }
631672cf6dfSJoerg Roedel 
632672cf6dfSJoerg Roedel /**
633672cf6dfSJoerg Roedel  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
634672cf6dfSJoerg Roedel  *				 sub-hierarchy of a candidate PCI-PCI bridge
635672cf6dfSJoerg Roedel  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
636672cf6dfSJoerg Roedel  * @bridge: the candidate PCI-PCI bridge
637672cf6dfSJoerg Roedel  *
638672cf6dfSJoerg Roedel  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
639672cf6dfSJoerg Roedel  */
640672cf6dfSJoerg Roedel static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)641672cf6dfSJoerg Roedel is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
642672cf6dfSJoerg Roedel {
643672cf6dfSJoerg Roedel 	struct pci_dev *pdev, *pbridge;
644672cf6dfSJoerg Roedel 
645672cf6dfSJoerg Roedel 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
646672cf6dfSJoerg Roedel 		return false;
647672cf6dfSJoerg Roedel 
648672cf6dfSJoerg Roedel 	pdev = to_pci_dev(dev);
649672cf6dfSJoerg Roedel 	pbridge = to_pci_dev(bridge);
650672cf6dfSJoerg Roedel 
651672cf6dfSJoerg Roedel 	if (pbridge->subordinate &&
652672cf6dfSJoerg Roedel 	    pbridge->subordinate->number <= pdev->bus->number &&
653672cf6dfSJoerg Roedel 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
654672cf6dfSJoerg Roedel 		return true;
655672cf6dfSJoerg Roedel 
656672cf6dfSJoerg Roedel 	return false;
657672cf6dfSJoerg Roedel }
658672cf6dfSJoerg Roedel 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)6592d33b7d6SLu Baolu static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
6602d33b7d6SLu Baolu {
6612d33b7d6SLu Baolu 	struct dmar_drhd_unit *drhd;
6622d33b7d6SLu Baolu 	u32 vtbar;
6632d33b7d6SLu Baolu 	int rc;
6642d33b7d6SLu Baolu 
6652d33b7d6SLu Baolu 	/* We know that this device on this chipset has its own IOMMU.
6662d33b7d6SLu Baolu 	 * If we find it under a different IOMMU, then the BIOS is lying
6672d33b7d6SLu Baolu 	 * to us. Hope that the IOMMU for this device is actually
6682d33b7d6SLu Baolu 	 * disabled, and it needs no translation...
6692d33b7d6SLu Baolu 	 */
6702d33b7d6SLu Baolu 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
6712d33b7d6SLu Baolu 	if (rc) {
6722d33b7d6SLu Baolu 		/* "can't" happen */
6732d33b7d6SLu Baolu 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
6742d33b7d6SLu Baolu 		return false;
6752d33b7d6SLu Baolu 	}
6762d33b7d6SLu Baolu 	vtbar &= 0xffff0000;
6772d33b7d6SLu Baolu 
6782d33b7d6SLu Baolu 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
6792d33b7d6SLu Baolu 	drhd = dmar_find_matched_drhd_unit(pdev);
6802d33b7d6SLu Baolu 	if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
6812d33b7d6SLu Baolu 		pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
6822d33b7d6SLu Baolu 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
6832d33b7d6SLu Baolu 		return true;
6842d33b7d6SLu Baolu 	}
6852d33b7d6SLu Baolu 
6862d33b7d6SLu Baolu 	return false;
6872d33b7d6SLu Baolu }
6882d33b7d6SLu Baolu 
iommu_is_dummy(struct intel_iommu * iommu,struct device * dev)6892d33b7d6SLu Baolu static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
6902d33b7d6SLu Baolu {
6912d33b7d6SLu Baolu 	if (!iommu || iommu->drhd->ignored)
6922d33b7d6SLu Baolu 		return true;
6932d33b7d6SLu Baolu 
6942d33b7d6SLu Baolu 	if (dev_is_pci(dev)) {
6952d33b7d6SLu Baolu 		struct pci_dev *pdev = to_pci_dev(dev);
6962d33b7d6SLu Baolu 
6972d33b7d6SLu Baolu 		if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
6982d33b7d6SLu Baolu 		    pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
6992d33b7d6SLu Baolu 		    quirk_ioat_snb_local_iommu(pdev))
7002d33b7d6SLu Baolu 			return true;
7012d33b7d6SLu Baolu 	}
7022d33b7d6SLu Baolu 
7032d33b7d6SLu Baolu 	return false;
7042d33b7d6SLu Baolu }
7052d33b7d6SLu Baolu 
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)706dd6692f1SLu Baolu struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
707672cf6dfSJoerg Roedel {
708672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd = NULL;
709dd6692f1SLu Baolu 	struct pci_dev *pdev = NULL;
710672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
711672cf6dfSJoerg Roedel 	struct device *tmp;
712672cf6dfSJoerg Roedel 	u16 segment = 0;
713672cf6dfSJoerg Roedel 	int i;
714672cf6dfSJoerg Roedel 
7152d33b7d6SLu Baolu 	if (!dev)
716672cf6dfSJoerg Roedel 		return NULL;
717672cf6dfSJoerg Roedel 
718672cf6dfSJoerg Roedel 	if (dev_is_pci(dev)) {
719672cf6dfSJoerg Roedel 		struct pci_dev *pf_pdev;
720672cf6dfSJoerg Roedel 
721672cf6dfSJoerg Roedel 		pdev = pci_real_dma_dev(to_pci_dev(dev));
722672cf6dfSJoerg Roedel 
723672cf6dfSJoerg Roedel 		/* VFs aren't listed in scope tables; we need to look up
724672cf6dfSJoerg Roedel 		 * the PF instead to find the IOMMU. */
725672cf6dfSJoerg Roedel 		pf_pdev = pci_physfn(pdev);
726672cf6dfSJoerg Roedel 		dev = &pf_pdev->dev;
727672cf6dfSJoerg Roedel 		segment = pci_domain_nr(pdev->bus);
728672cf6dfSJoerg Roedel 	} else if (has_acpi_companion(dev))
729672cf6dfSJoerg Roedel 		dev = &ACPI_COMPANION(dev)->dev;
730672cf6dfSJoerg Roedel 
731672cf6dfSJoerg Roedel 	rcu_read_lock();
7322d33b7d6SLu Baolu 	for_each_iommu(iommu, drhd) {
733672cf6dfSJoerg Roedel 		if (pdev && segment != drhd->segment)
734672cf6dfSJoerg Roedel 			continue;
735672cf6dfSJoerg Roedel 
736672cf6dfSJoerg Roedel 		for_each_active_dev_scope(drhd->devices,
737672cf6dfSJoerg Roedel 					  drhd->devices_cnt, i, tmp) {
738672cf6dfSJoerg Roedel 			if (tmp == dev) {
739672cf6dfSJoerg Roedel 				/* For a VF use its original BDF# not that of the PF
740672cf6dfSJoerg Roedel 				 * which we used for the IOMMU lookup. Strictly speaking
741672cf6dfSJoerg Roedel 				 * we could do this for all PCI devices; we only need to
742672cf6dfSJoerg Roedel 				 * get the BDF# from the scope table for ACPI matches. */
743672cf6dfSJoerg Roedel 				if (pdev && pdev->is_virtfn)
744672cf6dfSJoerg Roedel 					goto got_pdev;
745672cf6dfSJoerg Roedel 
746dd6692f1SLu Baolu 				if (bus && devfn) {
747672cf6dfSJoerg Roedel 					*bus = drhd->devices[i].bus;
748672cf6dfSJoerg Roedel 					*devfn = drhd->devices[i].devfn;
749dd6692f1SLu Baolu 				}
750672cf6dfSJoerg Roedel 				goto out;
751672cf6dfSJoerg Roedel 			}
752672cf6dfSJoerg Roedel 
753672cf6dfSJoerg Roedel 			if (is_downstream_to_pci_bridge(dev, tmp))
754672cf6dfSJoerg Roedel 				goto got_pdev;
755672cf6dfSJoerg Roedel 		}
756672cf6dfSJoerg Roedel 
757672cf6dfSJoerg Roedel 		if (pdev && drhd->include_all) {
758672cf6dfSJoerg Roedel got_pdev:
759dd6692f1SLu Baolu 			if (bus && devfn) {
760672cf6dfSJoerg Roedel 				*bus = pdev->bus->number;
761672cf6dfSJoerg Roedel 				*devfn = pdev->devfn;
762dd6692f1SLu Baolu 			}
763672cf6dfSJoerg Roedel 			goto out;
764672cf6dfSJoerg Roedel 		}
765672cf6dfSJoerg Roedel 	}
766672cf6dfSJoerg Roedel 	iommu = NULL;
767672cf6dfSJoerg Roedel out:
7682d33b7d6SLu Baolu 	if (iommu_is_dummy(iommu, dev))
7692d33b7d6SLu Baolu 		iommu = NULL;
7702d33b7d6SLu Baolu 
771672cf6dfSJoerg Roedel 	rcu_read_unlock();
772672cf6dfSJoerg Roedel 
773672cf6dfSJoerg Roedel 	return iommu;
774672cf6dfSJoerg Roedel }
775672cf6dfSJoerg Roedel 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)776672cf6dfSJoerg Roedel static void domain_flush_cache(struct dmar_domain *domain,
777672cf6dfSJoerg Roedel 			       void *addr, int size)
778672cf6dfSJoerg Roedel {
779672cf6dfSJoerg Roedel 	if (!domain->iommu_coherency)
780672cf6dfSJoerg Roedel 		clflush_cache_range(addr, size);
781672cf6dfSJoerg Roedel }
782672cf6dfSJoerg Roedel 
free_context_table(struct intel_iommu * iommu)783672cf6dfSJoerg Roedel static void free_context_table(struct intel_iommu *iommu)
784672cf6dfSJoerg Roedel {
785672cf6dfSJoerg Roedel 	struct context_entry *context;
7862e1c8dafSLu Baolu 	int i;
787672cf6dfSJoerg Roedel 
7882e1c8dafSLu Baolu 	if (!iommu->root_entry)
7892e1c8dafSLu Baolu 		return;
7902e1c8dafSLu Baolu 
791672cf6dfSJoerg Roedel 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
792672cf6dfSJoerg Roedel 		context = iommu_context_addr(iommu, i, 0, 0);
793672cf6dfSJoerg Roedel 		if (context)
794672cf6dfSJoerg Roedel 			free_pgtable_page(context);
795672cf6dfSJoerg Roedel 
796672cf6dfSJoerg Roedel 		if (!sm_supported(iommu))
797672cf6dfSJoerg Roedel 			continue;
798672cf6dfSJoerg Roedel 
799672cf6dfSJoerg Roedel 		context = iommu_context_addr(iommu, i, 0x80, 0);
800672cf6dfSJoerg Roedel 		if (context)
801672cf6dfSJoerg Roedel 			free_pgtable_page(context);
802672cf6dfSJoerg Roedel 	}
8032e1c8dafSLu Baolu 
804672cf6dfSJoerg Roedel 	free_pgtable_page(iommu->root_entry);
805672cf6dfSJoerg Roedel 	iommu->root_entry = NULL;
806672cf6dfSJoerg Roedel }
807672cf6dfSJoerg Roedel 
808914ff771SKyung Min Park #ifdef CONFIG_DMAR_DEBUG
pgtable_walk(struct intel_iommu * iommu,unsigned long pfn,u8 bus,u8 devfn,struct dma_pte * parent,int level)80935bf49e0SLu Baolu static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
81035bf49e0SLu Baolu 			 u8 bus, u8 devfn, struct dma_pte *parent, int level)
811914ff771SKyung Min Park {
81235bf49e0SLu Baolu 	struct dma_pte *pte;
81335bf49e0SLu Baolu 	int offset;
814914ff771SKyung Min Park 
815914ff771SKyung Min Park 	while (1) {
816914ff771SKyung Min Park 		offset = pfn_level_offset(pfn, level);
817914ff771SKyung Min Park 		pte = &parent[offset];
818914ff771SKyung Min Park 
819914ff771SKyung Min Park 		pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
820914ff771SKyung Min Park 
821*0ac9a7fbSZhenzhong Duan 		if (!dma_pte_present(pte)) {
822*0ac9a7fbSZhenzhong Duan 			pr_info("page table not present at level %d\n", level - 1);
823*0ac9a7fbSZhenzhong Duan 			break;
824*0ac9a7fbSZhenzhong Duan 		}
825*0ac9a7fbSZhenzhong Duan 
826*0ac9a7fbSZhenzhong Duan 		if (level == 1 || dma_pte_superpage(pte))
827914ff771SKyung Min Park 			break;
828914ff771SKyung Min Park 
829914ff771SKyung Min Park 		parent = phys_to_virt(dma_pte_addr(pte));
830914ff771SKyung Min Park 		level--;
831914ff771SKyung Min Park 	}
832914ff771SKyung Min Park }
833914ff771SKyung Min Park 
dmar_fault_dump_ptes(struct intel_iommu * iommu,u16 source_id,unsigned long long addr,u32 pasid)834914ff771SKyung Min Park void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
835914ff771SKyung Min Park 			  unsigned long long addr, u32 pasid)
836914ff771SKyung Min Park {
837914ff771SKyung Min Park 	struct pasid_dir_entry *dir, *pde;
838914ff771SKyung Min Park 	struct pasid_entry *entries, *pte;
839914ff771SKyung Min Park 	struct context_entry *ctx_entry;
840914ff771SKyung Min Park 	struct root_entry *rt_entry;
84135bf49e0SLu Baolu 	int i, dir_index, index, level;
842914ff771SKyung Min Park 	u8 devfn = source_id & 0xff;
843914ff771SKyung Min Park 	u8 bus = source_id >> 8;
84435bf49e0SLu Baolu 	struct dma_pte *pgtable;
845914ff771SKyung Min Park 
846914ff771SKyung Min Park 	pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
847914ff771SKyung Min Park 
848914ff771SKyung Min Park 	/* root entry dump */
849c801250aSZhenzhong Duan 	if (!iommu->root_entry) {
850c801250aSZhenzhong Duan 		pr_info("root table is not present\n");
851914ff771SKyung Min Park 		return;
852914ff771SKyung Min Park 	}
853c801250aSZhenzhong Duan 	rt_entry = &iommu->root_entry[bus];
854914ff771SKyung Min Park 
855914ff771SKyung Min Park 	if (sm_supported(iommu))
856914ff771SKyung Min Park 		pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
857914ff771SKyung Min Park 			rt_entry->hi, rt_entry->lo);
858914ff771SKyung Min Park 	else
859914ff771SKyung Min Park 		pr_info("root entry: 0x%016llx", rt_entry->lo);
860914ff771SKyung Min Park 
861914ff771SKyung Min Park 	/* context entry dump */
862914ff771SKyung Min Park 	ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
863914ff771SKyung Min Park 	if (!ctx_entry) {
864c801250aSZhenzhong Duan 		pr_info("context table is not present\n");
865914ff771SKyung Min Park 		return;
866914ff771SKyung Min Park 	}
867914ff771SKyung Min Park 
868914ff771SKyung Min Park 	pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
869914ff771SKyung Min Park 		ctx_entry->hi, ctx_entry->lo);
870914ff771SKyung Min Park 
871914ff771SKyung Min Park 	/* legacy mode does not require PASID entries */
87235bf49e0SLu Baolu 	if (!sm_supported(iommu)) {
873c801250aSZhenzhong Duan 		if (!context_present(ctx_entry)) {
874c801250aSZhenzhong Duan 			pr_info("legacy mode page table is not present\n");
875c801250aSZhenzhong Duan 			return;
876c801250aSZhenzhong Duan 		}
87735bf49e0SLu Baolu 		level = agaw_to_level(ctx_entry->hi & 7);
87835bf49e0SLu Baolu 		pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
879914ff771SKyung Min Park 		goto pgtable_walk;
88035bf49e0SLu Baolu 	}
881914ff771SKyung Min Park 
882c801250aSZhenzhong Duan 	if (!context_present(ctx_entry)) {
883c801250aSZhenzhong Duan 		pr_info("pasid directory table is not present\n");
884914ff771SKyung Min Park 		return;
885914ff771SKyung Min Park 	}
886c801250aSZhenzhong Duan 
887c801250aSZhenzhong Duan 	/* get the pointer to pasid directory entry */
888c801250aSZhenzhong Duan 	dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
889c801250aSZhenzhong Duan 
890914ff771SKyung Min Park 	/* For request-without-pasid, get the pasid from context entry */
891fffaed1eSJacob Pan 	if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
89242987801SJacob Pan 		pasid = IOMMU_NO_PASID;
893914ff771SKyung Min Park 
894914ff771SKyung Min Park 	dir_index = pasid >> PASID_PDE_SHIFT;
895914ff771SKyung Min Park 	pde = &dir[dir_index];
896914ff771SKyung Min Park 	pr_info("pasid dir entry: 0x%016llx\n", pde->val);
897914ff771SKyung Min Park 
898914ff771SKyung Min Park 	/* get the pointer to the pasid table entry */
899914ff771SKyung Min Park 	entries = get_pasid_table_from_pde(pde);
900914ff771SKyung Min Park 	if (!entries) {
901c801250aSZhenzhong Duan 		pr_info("pasid table is not present\n");
902914ff771SKyung Min Park 		return;
903914ff771SKyung Min Park 	}
904914ff771SKyung Min Park 	index = pasid & PASID_PTE_MASK;
905914ff771SKyung Min Park 	pte = &entries[index];
906914ff771SKyung Min Park 	for (i = 0; i < ARRAY_SIZE(pte->val); i++)
907914ff771SKyung Min Park 		pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
908914ff771SKyung Min Park 
909c801250aSZhenzhong Duan 	if (!pasid_pte_is_present(pte)) {
910c801250aSZhenzhong Duan 		pr_info("scalable mode page table is not present\n");
911c801250aSZhenzhong Duan 		return;
912c801250aSZhenzhong Duan 	}
913c801250aSZhenzhong Duan 
91435bf49e0SLu Baolu 	if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
91535bf49e0SLu Baolu 		level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
91635bf49e0SLu Baolu 		pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
91735bf49e0SLu Baolu 	} else {
91835bf49e0SLu Baolu 		level = agaw_to_level((pte->val[0] >> 2) & 0x7);
91935bf49e0SLu Baolu 		pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
92035bf49e0SLu Baolu 	}
92135bf49e0SLu Baolu 
922914ff771SKyung Min Park pgtable_walk:
92335bf49e0SLu Baolu 	pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
924914ff771SKyung Min Park }
925914ff771SKyung Min Park #endif
926914ff771SKyung Min Park 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level,gfp_t gfp)927672cf6dfSJoerg Roedel static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
9282d4d7676SJason Gunthorpe 				      unsigned long pfn, int *target_level,
9292d4d7676SJason Gunthorpe 				      gfp_t gfp)
930672cf6dfSJoerg Roedel {
931672cf6dfSJoerg Roedel 	struct dma_pte *parent, *pte;
932672cf6dfSJoerg Roedel 	int level = agaw_to_level(domain->agaw);
933672cf6dfSJoerg Roedel 	int offset;
934672cf6dfSJoerg Roedel 
935672cf6dfSJoerg Roedel 	if (!domain_pfn_supported(domain, pfn))
936672cf6dfSJoerg Roedel 		/* Address beyond IOMMU's addressing capabilities. */
937672cf6dfSJoerg Roedel 		return NULL;
938672cf6dfSJoerg Roedel 
939672cf6dfSJoerg Roedel 	parent = domain->pgd;
940672cf6dfSJoerg Roedel 
941672cf6dfSJoerg Roedel 	while (1) {
942672cf6dfSJoerg Roedel 		void *tmp_page;
943672cf6dfSJoerg Roedel 
944672cf6dfSJoerg Roedel 		offset = pfn_level_offset(pfn, level);
945672cf6dfSJoerg Roedel 		pte = &parent[offset];
946672cf6dfSJoerg Roedel 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
947672cf6dfSJoerg Roedel 			break;
948672cf6dfSJoerg Roedel 		if (level == *target_level)
949672cf6dfSJoerg Roedel 			break;
950672cf6dfSJoerg Roedel 
951672cf6dfSJoerg Roedel 		if (!dma_pte_present(pte)) {
952672cf6dfSJoerg Roedel 			uint64_t pteval;
953672cf6dfSJoerg Roedel 
9542d4d7676SJason Gunthorpe 			tmp_page = alloc_pgtable_page(domain->nid, gfp);
955672cf6dfSJoerg Roedel 
956672cf6dfSJoerg Roedel 			if (!tmp_page)
957672cf6dfSJoerg Roedel 				return NULL;
958672cf6dfSJoerg Roedel 
959672cf6dfSJoerg Roedel 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
960672cf6dfSJoerg Roedel 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
961e5b0feb4SLu Baolu 			if (domain->use_first_level)
962242b0aaeSTina Zhang 				pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
963242b0aaeSTina Zhang 
964672cf6dfSJoerg Roedel 			if (cmpxchg64(&pte->val, 0ULL, pteval))
965672cf6dfSJoerg Roedel 				/* Someone else set it while we were thinking; use theirs. */
966672cf6dfSJoerg Roedel 				free_pgtable_page(tmp_page);
967672cf6dfSJoerg Roedel 			else
968672cf6dfSJoerg Roedel 				domain_flush_cache(domain, pte, sizeof(*pte));
969672cf6dfSJoerg Roedel 		}
970672cf6dfSJoerg Roedel 		if (level == 1)
971672cf6dfSJoerg Roedel 			break;
972672cf6dfSJoerg Roedel 
973672cf6dfSJoerg Roedel 		parent = phys_to_virt(dma_pte_addr(pte));
974672cf6dfSJoerg Roedel 		level--;
975672cf6dfSJoerg Roedel 	}
976672cf6dfSJoerg Roedel 
977672cf6dfSJoerg Roedel 	if (!*target_level)
978672cf6dfSJoerg Roedel 		*target_level = level;
979672cf6dfSJoerg Roedel 
980672cf6dfSJoerg Roedel 	return pte;
981672cf6dfSJoerg Roedel }
982672cf6dfSJoerg Roedel 
983672cf6dfSJoerg Roedel /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)984672cf6dfSJoerg Roedel static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
985672cf6dfSJoerg Roedel 					 unsigned long pfn,
986672cf6dfSJoerg Roedel 					 int level, int *large_page)
987672cf6dfSJoerg Roedel {
988672cf6dfSJoerg Roedel 	struct dma_pte *parent, *pte;
989672cf6dfSJoerg Roedel 	int total = agaw_to_level(domain->agaw);
990672cf6dfSJoerg Roedel 	int offset;
991672cf6dfSJoerg Roedel 
992672cf6dfSJoerg Roedel 	parent = domain->pgd;
993672cf6dfSJoerg Roedel 	while (level <= total) {
994672cf6dfSJoerg Roedel 		offset = pfn_level_offset(pfn, total);
995672cf6dfSJoerg Roedel 		pte = &parent[offset];
996672cf6dfSJoerg Roedel 		if (level == total)
997672cf6dfSJoerg Roedel 			return pte;
998672cf6dfSJoerg Roedel 
999672cf6dfSJoerg Roedel 		if (!dma_pte_present(pte)) {
1000672cf6dfSJoerg Roedel 			*large_page = total;
1001672cf6dfSJoerg Roedel 			break;
1002672cf6dfSJoerg Roedel 		}
1003672cf6dfSJoerg Roedel 
1004672cf6dfSJoerg Roedel 		if (dma_pte_superpage(pte)) {
1005672cf6dfSJoerg Roedel 			*large_page = total;
1006672cf6dfSJoerg Roedel 			return pte;
1007672cf6dfSJoerg Roedel 		}
1008672cf6dfSJoerg Roedel 
1009672cf6dfSJoerg Roedel 		parent = phys_to_virt(dma_pte_addr(pte));
1010672cf6dfSJoerg Roedel 		total--;
1011672cf6dfSJoerg Roedel 	}
1012672cf6dfSJoerg Roedel 	return NULL;
1013672cf6dfSJoerg Roedel }
1014672cf6dfSJoerg Roedel 
1015672cf6dfSJoerg Roedel /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1016672cf6dfSJoerg Roedel static void dma_pte_clear_range(struct dmar_domain *domain,
1017672cf6dfSJoerg Roedel 				unsigned long start_pfn,
1018672cf6dfSJoerg Roedel 				unsigned long last_pfn)
1019672cf6dfSJoerg Roedel {
1020672cf6dfSJoerg Roedel 	unsigned int large_page;
1021672cf6dfSJoerg Roedel 	struct dma_pte *first_pte, *pte;
1022672cf6dfSJoerg Roedel 
102335dc5d89STina Zhang 	if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
102435dc5d89STina Zhang 	    WARN_ON(start_pfn > last_pfn))
102535dc5d89STina Zhang 		return;
1026672cf6dfSJoerg Roedel 
1027672cf6dfSJoerg Roedel 	/* we don't need lock here; nobody else touches the iova range */
1028672cf6dfSJoerg Roedel 	do {
1029672cf6dfSJoerg Roedel 		large_page = 1;
1030672cf6dfSJoerg Roedel 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1031672cf6dfSJoerg Roedel 		if (!pte) {
1032672cf6dfSJoerg Roedel 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1033672cf6dfSJoerg Roedel 			continue;
1034672cf6dfSJoerg Roedel 		}
1035672cf6dfSJoerg Roedel 		do {
1036672cf6dfSJoerg Roedel 			dma_clear_pte(pte);
1037672cf6dfSJoerg Roedel 			start_pfn += lvl_to_nr_pages(large_page);
1038672cf6dfSJoerg Roedel 			pte++;
1039672cf6dfSJoerg Roedel 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1040672cf6dfSJoerg Roedel 
1041672cf6dfSJoerg Roedel 		domain_flush_cache(domain, first_pte,
1042672cf6dfSJoerg Roedel 				   (void *)pte - (void *)first_pte);
1043672cf6dfSJoerg Roedel 
1044672cf6dfSJoerg Roedel 	} while (start_pfn && start_pfn <= last_pfn);
1045672cf6dfSJoerg Roedel }
1046672cf6dfSJoerg Roedel 
dma_pte_free_level(struct dmar_domain * domain,int level,int retain_level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)1047672cf6dfSJoerg Roedel static void dma_pte_free_level(struct dmar_domain *domain, int level,
1048672cf6dfSJoerg Roedel 			       int retain_level, struct dma_pte *pte,
1049672cf6dfSJoerg Roedel 			       unsigned long pfn, unsigned long start_pfn,
1050672cf6dfSJoerg Roedel 			       unsigned long last_pfn)
1051672cf6dfSJoerg Roedel {
1052672cf6dfSJoerg Roedel 	pfn = max(start_pfn, pfn);
1053672cf6dfSJoerg Roedel 	pte = &pte[pfn_level_offset(pfn, level)];
1054672cf6dfSJoerg Roedel 
1055672cf6dfSJoerg Roedel 	do {
1056672cf6dfSJoerg Roedel 		unsigned long level_pfn;
1057672cf6dfSJoerg Roedel 		struct dma_pte *level_pte;
1058672cf6dfSJoerg Roedel 
1059672cf6dfSJoerg Roedel 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1060672cf6dfSJoerg Roedel 			goto next;
1061672cf6dfSJoerg Roedel 
1062672cf6dfSJoerg Roedel 		level_pfn = pfn & level_mask(level);
1063672cf6dfSJoerg Roedel 		level_pte = phys_to_virt(dma_pte_addr(pte));
1064672cf6dfSJoerg Roedel 
1065672cf6dfSJoerg Roedel 		if (level > 2) {
1066672cf6dfSJoerg Roedel 			dma_pte_free_level(domain, level - 1, retain_level,
1067672cf6dfSJoerg Roedel 					   level_pte, level_pfn, start_pfn,
1068672cf6dfSJoerg Roedel 					   last_pfn);
1069672cf6dfSJoerg Roedel 		}
1070672cf6dfSJoerg Roedel 
1071672cf6dfSJoerg Roedel 		/*
1072672cf6dfSJoerg Roedel 		 * Free the page table if we're below the level we want to
1073672cf6dfSJoerg Roedel 		 * retain and the range covers the entire table.
1074672cf6dfSJoerg Roedel 		 */
1075672cf6dfSJoerg Roedel 		if (level < retain_level && !(start_pfn > level_pfn ||
1076672cf6dfSJoerg Roedel 		      last_pfn < level_pfn + level_size(level) - 1)) {
1077672cf6dfSJoerg Roedel 			dma_clear_pte(pte);
1078672cf6dfSJoerg Roedel 			domain_flush_cache(domain, pte, sizeof(*pte));
1079672cf6dfSJoerg Roedel 			free_pgtable_page(level_pte);
1080672cf6dfSJoerg Roedel 		}
1081672cf6dfSJoerg Roedel next:
1082672cf6dfSJoerg Roedel 		pfn += level_size(level);
1083672cf6dfSJoerg Roedel 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1084672cf6dfSJoerg Roedel }
1085672cf6dfSJoerg Roedel 
1086672cf6dfSJoerg Roedel /*
1087672cf6dfSJoerg Roedel  * clear last level (leaf) ptes and free page table pages below the
1088672cf6dfSJoerg Roedel  * level we wish to keep intact.
1089672cf6dfSJoerg Roedel  */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,int retain_level)1090672cf6dfSJoerg Roedel static void dma_pte_free_pagetable(struct dmar_domain *domain,
1091672cf6dfSJoerg Roedel 				   unsigned long start_pfn,
1092672cf6dfSJoerg Roedel 				   unsigned long last_pfn,
1093672cf6dfSJoerg Roedel 				   int retain_level)
1094672cf6dfSJoerg Roedel {
1095672cf6dfSJoerg Roedel 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1096672cf6dfSJoerg Roedel 
1097672cf6dfSJoerg Roedel 	/* We don't need lock here; nobody else touches the iova range */
1098672cf6dfSJoerg Roedel 	dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1099672cf6dfSJoerg Roedel 			   domain->pgd, 0, start_pfn, last_pfn);
1100672cf6dfSJoerg Roedel 
1101672cf6dfSJoerg Roedel 	/* free pgd */
1102672cf6dfSJoerg Roedel 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1103672cf6dfSJoerg Roedel 		free_pgtable_page(domain->pgd);
1104672cf6dfSJoerg Roedel 		domain->pgd = NULL;
1105672cf6dfSJoerg Roedel 	}
1106672cf6dfSJoerg Roedel }
1107672cf6dfSJoerg Roedel 
1108672cf6dfSJoerg Roedel /* When a page at a given level is being unlinked from its parent, we don't
1109672cf6dfSJoerg Roedel    need to *modify* it at all. All we need to do is make a list of all the
1110672cf6dfSJoerg Roedel    pages which can be freed just as soon as we've flushed the IOTLB and we
1111672cf6dfSJoerg Roedel    know the hardware page-walk will no longer touch them.
1112672cf6dfSJoerg Roedel    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1113672cf6dfSJoerg Roedel    be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct list_head * freelist)111487f60cc6SMatthew Wilcox (Oracle) static void dma_pte_list_pagetables(struct dmar_domain *domain,
1115672cf6dfSJoerg Roedel 				    int level, struct dma_pte *pte,
111687f60cc6SMatthew Wilcox (Oracle) 				    struct list_head *freelist)
1117672cf6dfSJoerg Roedel {
1118672cf6dfSJoerg Roedel 	struct page *pg;
1119672cf6dfSJoerg Roedel 
1120672cf6dfSJoerg Roedel 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
112187f60cc6SMatthew Wilcox (Oracle) 	list_add_tail(&pg->lru, freelist);
1122672cf6dfSJoerg Roedel 
1123672cf6dfSJoerg Roedel 	if (level == 1)
112487f60cc6SMatthew Wilcox (Oracle) 		return;
1125672cf6dfSJoerg Roedel 
1126672cf6dfSJoerg Roedel 	pte = page_address(pg);
1127672cf6dfSJoerg Roedel 	do {
1128672cf6dfSJoerg Roedel 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
112987f60cc6SMatthew Wilcox (Oracle) 			dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1130672cf6dfSJoerg Roedel 		pte++;
1131672cf6dfSJoerg Roedel 	} while (!first_pte_in_page(pte));
1132672cf6dfSJoerg Roedel }
1133672cf6dfSJoerg Roedel 
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct list_head * freelist)113487f60cc6SMatthew Wilcox (Oracle) static void dma_pte_clear_level(struct dmar_domain *domain, int level,
1135672cf6dfSJoerg Roedel 				struct dma_pte *pte, unsigned long pfn,
113687f60cc6SMatthew Wilcox (Oracle) 				unsigned long start_pfn, unsigned long last_pfn,
113787f60cc6SMatthew Wilcox (Oracle) 				struct list_head *freelist)
1138672cf6dfSJoerg Roedel {
1139672cf6dfSJoerg Roedel 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1140672cf6dfSJoerg Roedel 
1141672cf6dfSJoerg Roedel 	pfn = max(start_pfn, pfn);
1142672cf6dfSJoerg Roedel 	pte = &pte[pfn_level_offset(pfn, level)];
1143672cf6dfSJoerg Roedel 
1144672cf6dfSJoerg Roedel 	do {
114586dc40c7SAlex Williamson 		unsigned long level_pfn = pfn & level_mask(level);
1146672cf6dfSJoerg Roedel 
1147672cf6dfSJoerg Roedel 		if (!dma_pte_present(pte))
1148672cf6dfSJoerg Roedel 			goto next;
1149672cf6dfSJoerg Roedel 
1150672cf6dfSJoerg Roedel 		/* If range covers entire pagetable, free it */
1151672cf6dfSJoerg Roedel 		if (start_pfn <= level_pfn &&
1152672cf6dfSJoerg Roedel 		    last_pfn >= level_pfn + level_size(level) - 1) {
1153672cf6dfSJoerg Roedel 			/* These suborbinate page tables are going away entirely. Don't
1154672cf6dfSJoerg Roedel 			   bother to clear them; we're just going to *free* them. */
1155672cf6dfSJoerg Roedel 			if (level > 1 && !dma_pte_superpage(pte))
115687f60cc6SMatthew Wilcox (Oracle) 				dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1157672cf6dfSJoerg Roedel 
1158672cf6dfSJoerg Roedel 			dma_clear_pte(pte);
1159672cf6dfSJoerg Roedel 			if (!first_pte)
1160672cf6dfSJoerg Roedel 				first_pte = pte;
1161672cf6dfSJoerg Roedel 			last_pte = pte;
1162672cf6dfSJoerg Roedel 		} else if (level > 1) {
1163672cf6dfSJoerg Roedel 			/* Recurse down into a level that isn't *entirely* obsolete */
116487f60cc6SMatthew Wilcox (Oracle) 			dma_pte_clear_level(domain, level - 1,
1165672cf6dfSJoerg Roedel 					    phys_to_virt(dma_pte_addr(pte)),
1166672cf6dfSJoerg Roedel 					    level_pfn, start_pfn, last_pfn,
1167672cf6dfSJoerg Roedel 					    freelist);
1168672cf6dfSJoerg Roedel 		}
1169672cf6dfSJoerg Roedel next:
117086dc40c7SAlex Williamson 		pfn = level_pfn + level_size(level);
1171672cf6dfSJoerg Roedel 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1172672cf6dfSJoerg Roedel 
1173672cf6dfSJoerg Roedel 	if (first_pte)
1174672cf6dfSJoerg Roedel 		domain_flush_cache(domain, first_pte,
1175672cf6dfSJoerg Roedel 				   (void *)++last_pte - (void *)first_pte);
1176672cf6dfSJoerg Roedel }
1177672cf6dfSJoerg Roedel 
1178672cf6dfSJoerg Roedel /* We can't just free the pages because the IOMMU may still be walking
1179672cf6dfSJoerg Roedel    the page tables, and may have cached the intermediate levels. The
1180672cf6dfSJoerg Roedel    pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,struct list_head * freelist)118187f60cc6SMatthew Wilcox (Oracle) static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
118287f60cc6SMatthew Wilcox (Oracle) 			 unsigned long last_pfn, struct list_head *freelist)
1183672cf6dfSJoerg Roedel {
118435dc5d89STina Zhang 	if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
118535dc5d89STina Zhang 	    WARN_ON(start_pfn > last_pfn))
118635dc5d89STina Zhang 		return;
1187672cf6dfSJoerg Roedel 
1188672cf6dfSJoerg Roedel 	/* we don't need lock here; nobody else touches the iova range */
118987f60cc6SMatthew Wilcox (Oracle) 	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
119087f60cc6SMatthew Wilcox (Oracle) 			    domain->pgd, 0, start_pfn, last_pfn, freelist);
1191672cf6dfSJoerg Roedel 
1192672cf6dfSJoerg Roedel 	/* free pgd */
1193672cf6dfSJoerg Roedel 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1194672cf6dfSJoerg Roedel 		struct page *pgd_page = virt_to_page(domain->pgd);
119587f60cc6SMatthew Wilcox (Oracle) 		list_add_tail(&pgd_page->lru, freelist);
1196672cf6dfSJoerg Roedel 		domain->pgd = NULL;
1197672cf6dfSJoerg Roedel 	}
1198672cf6dfSJoerg Roedel }
1199672cf6dfSJoerg Roedel 
1200672cf6dfSJoerg Roedel /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1201672cf6dfSJoerg Roedel static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1202672cf6dfSJoerg Roedel {
1203672cf6dfSJoerg Roedel 	struct root_entry *root;
1204672cf6dfSJoerg Roedel 
120582d9654fSSuhui 	root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
1206672cf6dfSJoerg Roedel 	if (!root) {
1207672cf6dfSJoerg Roedel 		pr_err("Allocating root entry for %s failed\n",
1208672cf6dfSJoerg Roedel 			iommu->name);
1209672cf6dfSJoerg Roedel 		return -ENOMEM;
1210672cf6dfSJoerg Roedel 	}
1211672cf6dfSJoerg Roedel 
1212672cf6dfSJoerg Roedel 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1213672cf6dfSJoerg Roedel 	iommu->root_entry = root;
1214672cf6dfSJoerg Roedel 
1215672cf6dfSJoerg Roedel 	return 0;
1216672cf6dfSJoerg Roedel }
1217672cf6dfSJoerg Roedel 
iommu_set_root_entry(struct intel_iommu * iommu)1218672cf6dfSJoerg Roedel static void iommu_set_root_entry(struct intel_iommu *iommu)
1219672cf6dfSJoerg Roedel {
1220672cf6dfSJoerg Roedel 	u64 addr;
1221672cf6dfSJoerg Roedel 	u32 sts;
1222672cf6dfSJoerg Roedel 	unsigned long flag;
1223672cf6dfSJoerg Roedel 
1224672cf6dfSJoerg Roedel 	addr = virt_to_phys(iommu->root_entry);
1225672cf6dfSJoerg Roedel 	if (sm_supported(iommu))
1226672cf6dfSJoerg Roedel 		addr |= DMA_RTADDR_SMT;
1227672cf6dfSJoerg Roedel 
1228672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1229672cf6dfSJoerg Roedel 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1230672cf6dfSJoerg Roedel 
1231672cf6dfSJoerg Roedel 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1232672cf6dfSJoerg Roedel 
1233672cf6dfSJoerg Roedel 	/* Make sure hardware complete it */
1234672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1235672cf6dfSJoerg Roedel 		      readl, (sts & DMA_GSTS_RTPS), sts);
1236672cf6dfSJoerg Roedel 
1237672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1238c0474a60SLu Baolu 
12396ad931a2SLu Baolu 	/*
12406ad931a2SLu Baolu 	 * Hardware invalidates all DMA remapping hardware translation
12416ad931a2SLu Baolu 	 * caches as part of SRTP flow.
12426ad931a2SLu Baolu 	 */
12436ad931a2SLu Baolu 	if (cap_esrtps(iommu->cap))
12446ad931a2SLu Baolu 		return;
12456ad931a2SLu Baolu 
1246c0474a60SLu Baolu 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1247c0474a60SLu Baolu 	if (sm_supported(iommu))
1248c0474a60SLu Baolu 		qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1249c0474a60SLu Baolu 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1250672cf6dfSJoerg Roedel }
1251672cf6dfSJoerg Roedel 
iommu_flush_write_buffer(struct intel_iommu * iommu)1252672cf6dfSJoerg Roedel void iommu_flush_write_buffer(struct intel_iommu *iommu)
1253672cf6dfSJoerg Roedel {
1254672cf6dfSJoerg Roedel 	u32 val;
1255672cf6dfSJoerg Roedel 	unsigned long flag;
1256672cf6dfSJoerg Roedel 
1257672cf6dfSJoerg Roedel 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1258672cf6dfSJoerg Roedel 		return;
1259672cf6dfSJoerg Roedel 
1260672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1261672cf6dfSJoerg Roedel 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1262672cf6dfSJoerg Roedel 
1263672cf6dfSJoerg Roedel 	/* Make sure hardware complete it */
1264672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1265672cf6dfSJoerg Roedel 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1266672cf6dfSJoerg Roedel 
1267672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1268672cf6dfSJoerg Roedel }
1269672cf6dfSJoerg Roedel 
1270672cf6dfSJoerg Roedel /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1271672cf6dfSJoerg Roedel static void __iommu_flush_context(struct intel_iommu *iommu,
1272672cf6dfSJoerg Roedel 				  u16 did, u16 source_id, u8 function_mask,
1273672cf6dfSJoerg Roedel 				  u64 type)
1274672cf6dfSJoerg Roedel {
1275672cf6dfSJoerg Roedel 	u64 val = 0;
1276672cf6dfSJoerg Roedel 	unsigned long flag;
1277672cf6dfSJoerg Roedel 
1278672cf6dfSJoerg Roedel 	switch (type) {
1279672cf6dfSJoerg Roedel 	case DMA_CCMD_GLOBAL_INVL:
1280672cf6dfSJoerg Roedel 		val = DMA_CCMD_GLOBAL_INVL;
1281672cf6dfSJoerg Roedel 		break;
1282672cf6dfSJoerg Roedel 	case DMA_CCMD_DOMAIN_INVL:
1283672cf6dfSJoerg Roedel 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1284672cf6dfSJoerg Roedel 		break;
1285672cf6dfSJoerg Roedel 	case DMA_CCMD_DEVICE_INVL:
1286672cf6dfSJoerg Roedel 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1287672cf6dfSJoerg Roedel 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1288672cf6dfSJoerg Roedel 		break;
1289672cf6dfSJoerg Roedel 	default:
12904a627a25STina Zhang 		pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
12914a627a25STina Zhang 			iommu->name, type);
12924a627a25STina Zhang 		return;
1293672cf6dfSJoerg Roedel 	}
1294672cf6dfSJoerg Roedel 	val |= DMA_CCMD_ICC;
1295672cf6dfSJoerg Roedel 
1296672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1297672cf6dfSJoerg Roedel 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1298672cf6dfSJoerg Roedel 
1299672cf6dfSJoerg Roedel 	/* Make sure hardware complete it */
1300672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1301672cf6dfSJoerg Roedel 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1302672cf6dfSJoerg Roedel 
1303672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1304672cf6dfSJoerg Roedel }
1305672cf6dfSJoerg Roedel 
1306672cf6dfSJoerg Roedel /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1307672cf6dfSJoerg Roedel static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1308672cf6dfSJoerg Roedel 				u64 addr, unsigned int size_order, u64 type)
1309672cf6dfSJoerg Roedel {
1310672cf6dfSJoerg Roedel 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1311672cf6dfSJoerg Roedel 	u64 val = 0, val_iva = 0;
1312672cf6dfSJoerg Roedel 	unsigned long flag;
1313672cf6dfSJoerg Roedel 
1314672cf6dfSJoerg Roedel 	switch (type) {
1315672cf6dfSJoerg Roedel 	case DMA_TLB_GLOBAL_FLUSH:
1316672cf6dfSJoerg Roedel 		/* global flush doesn't need set IVA_REG */
1317672cf6dfSJoerg Roedel 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1318672cf6dfSJoerg Roedel 		break;
1319672cf6dfSJoerg Roedel 	case DMA_TLB_DSI_FLUSH:
1320672cf6dfSJoerg Roedel 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1321672cf6dfSJoerg Roedel 		break;
1322672cf6dfSJoerg Roedel 	case DMA_TLB_PSI_FLUSH:
1323672cf6dfSJoerg Roedel 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1324672cf6dfSJoerg Roedel 		/* IH bit is passed in as part of address */
1325672cf6dfSJoerg Roedel 		val_iva = size_order | addr;
1326672cf6dfSJoerg Roedel 		break;
1327672cf6dfSJoerg Roedel 	default:
13284a627a25STina Zhang 		pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
13294a627a25STina Zhang 			iommu->name, type);
13304a627a25STina Zhang 		return;
1331672cf6dfSJoerg Roedel 	}
1332b4da4e11SLu Baolu 
1333672cf6dfSJoerg Roedel 	if (cap_write_drain(iommu->cap))
1334672cf6dfSJoerg Roedel 		val |= DMA_TLB_WRITE_DRAIN;
1335672cf6dfSJoerg Roedel 
1336672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1337672cf6dfSJoerg Roedel 	/* Note: Only uses first TLB reg currently */
1338672cf6dfSJoerg Roedel 	if (val_iva)
1339672cf6dfSJoerg Roedel 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1340672cf6dfSJoerg Roedel 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1341672cf6dfSJoerg Roedel 
1342672cf6dfSJoerg Roedel 	/* Make sure hardware complete it */
1343672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1344672cf6dfSJoerg Roedel 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1345672cf6dfSJoerg Roedel 
1346672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1347672cf6dfSJoerg Roedel 
1348672cf6dfSJoerg Roedel 	/* check IOTLB invalidation granularity */
1349672cf6dfSJoerg Roedel 	if (DMA_TLB_IAIG(val) == 0)
1350672cf6dfSJoerg Roedel 		pr_err("Flush IOTLB failed\n");
1351672cf6dfSJoerg Roedel 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1352672cf6dfSJoerg Roedel 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1353672cf6dfSJoerg Roedel 			(unsigned long long)DMA_TLB_IIRG(type),
1354672cf6dfSJoerg Roedel 			(unsigned long long)DMA_TLB_IAIG(val));
1355672cf6dfSJoerg Roedel }
1356672cf6dfSJoerg Roedel 
1357672cf6dfSJoerg Roedel static struct device_domain_info *
domain_lookup_dev_info(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)13580faa19a1SLu Baolu domain_lookup_dev_info(struct dmar_domain *domain,
13590faa19a1SLu Baolu 		       struct intel_iommu *iommu, u8 bus, u8 devfn)
1360672cf6dfSJoerg Roedel {
1361672cf6dfSJoerg Roedel 	struct device_domain_info *info;
1362a349ffcbSLu Baolu 	unsigned long flags;
1363672cf6dfSJoerg Roedel 
1364a349ffcbSLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
1365969aaefbSLu Baolu 	list_for_each_entry(info, &domain->devices, link) {
1366672cf6dfSJoerg Roedel 		if (info->iommu == iommu && info->bus == bus &&
1367672cf6dfSJoerg Roedel 		    info->devfn == devfn) {
1368a349ffcbSLu Baolu 			spin_unlock_irqrestore(&domain->lock, flags);
13690faa19a1SLu Baolu 			return info;
1370672cf6dfSJoerg Roedel 		}
1371969aaefbSLu Baolu 	}
1372a349ffcbSLu Baolu 	spin_unlock_irqrestore(&domain->lock, flags);
1373672cf6dfSJoerg Roedel 
1374672cf6dfSJoerg Roedel 	return NULL;
1375672cf6dfSJoerg Roedel }
1376672cf6dfSJoerg Roedel 
domain_update_iotlb(struct dmar_domain * domain)1377672cf6dfSJoerg Roedel static void domain_update_iotlb(struct dmar_domain *domain)
1378672cf6dfSJoerg Roedel {
13797d0c9da6SLu Baolu 	struct dev_pasid_info *dev_pasid;
1380672cf6dfSJoerg Roedel 	struct device_domain_info *info;
1381672cf6dfSJoerg Roedel 	bool has_iotlb_device = false;
1382a349ffcbSLu Baolu 	unsigned long flags;
1383672cf6dfSJoerg Roedel 
1384a349ffcbSLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
1385969aaefbSLu Baolu 	list_for_each_entry(info, &domain->devices, link) {
13867c29ada5SLiu Yi L 		if (info->ats_enabled) {
1387672cf6dfSJoerg Roedel 			has_iotlb_device = true;
1388672cf6dfSJoerg Roedel 			break;
1389672cf6dfSJoerg Roedel 		}
1390969aaefbSLu Baolu 	}
13917d0c9da6SLu Baolu 
13927d0c9da6SLu Baolu 	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
13937d0c9da6SLu Baolu 		info = dev_iommu_priv_get(dev_pasid->dev);
13947d0c9da6SLu Baolu 		if (info->ats_enabled) {
13957d0c9da6SLu Baolu 			has_iotlb_device = true;
13967d0c9da6SLu Baolu 			break;
13977d0c9da6SLu Baolu 		}
13987d0c9da6SLu Baolu 	}
1399672cf6dfSJoerg Roedel 	domain->has_iotlb_device = has_iotlb_device;
1400a349ffcbSLu Baolu 	spin_unlock_irqrestore(&domain->lock, flags);
1401672cf6dfSJoerg Roedel }
1402672cf6dfSJoerg Roedel 
1403e65a6897SJacob Pan /*
1404e65a6897SJacob Pan  * The extra devTLB flush quirk impacts those QAT devices with PCI device
1405e65a6897SJacob Pan  * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
1406e65a6897SJacob Pan  * check because it applies only to the built-in QAT devices and it doesn't
1407e65a6897SJacob Pan  * grant additional privileges.
1408e65a6897SJacob Pan  */
140981c95fbaSJacob Pan #define BUGGY_QAT_DEVID_MASK 0x4940
dev_needs_extra_dtlb_flush(struct pci_dev * pdev)1410e65a6897SJacob Pan static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
1411e65a6897SJacob Pan {
1412e65a6897SJacob Pan 	if (pdev->vendor != PCI_VENDOR_ID_INTEL)
1413e65a6897SJacob Pan 		return false;
1414e65a6897SJacob Pan 
1415e65a6897SJacob Pan 	if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
1416e65a6897SJacob Pan 		return false;
1417e65a6897SJacob Pan 
1418e65a6897SJacob Pan 	return true;
1419e65a6897SJacob Pan }
1420e65a6897SJacob Pan 
iommu_enable_pci_caps(struct device_domain_info * info)14210faa19a1SLu Baolu static void iommu_enable_pci_caps(struct device_domain_info *info)
1422672cf6dfSJoerg Roedel {
1423672cf6dfSJoerg Roedel 	struct pci_dev *pdev;
1424672cf6dfSJoerg Roedel 
1425c7be17c2SLu Baolu 	if (!dev_is_pci(info->dev))
1426672cf6dfSJoerg Roedel 		return;
1427672cf6dfSJoerg Roedel 
1428672cf6dfSJoerg Roedel 	pdev = to_pci_dev(info->dev);
1429672cf6dfSJoerg Roedel 
1430672cf6dfSJoerg Roedel 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1431672cf6dfSJoerg Roedel 	   the device if you enable PASID support after ATS support is
1432672cf6dfSJoerg Roedel 	   undefined. So always enable PASID support on devices which
1433672cf6dfSJoerg Roedel 	   have it, even if we can't yet know if we're ever going to
1434672cf6dfSJoerg Roedel 	   use it. */
1435672cf6dfSJoerg Roedel 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1436672cf6dfSJoerg Roedel 		info->pasid_enabled = 1;
1437672cf6dfSJoerg Roedel 
1438672cf6dfSJoerg Roedel 	if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1439672cf6dfSJoerg Roedel 	    !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1440672cf6dfSJoerg Roedel 		info->ats_enabled = 1;
1441672cf6dfSJoerg Roedel 		domain_update_iotlb(info->domain);
1442672cf6dfSJoerg Roedel 	}
1443672cf6dfSJoerg Roedel }
1444672cf6dfSJoerg Roedel 
iommu_disable_pci_caps(struct device_domain_info * info)1445ba502132SLu Baolu static void iommu_disable_pci_caps(struct device_domain_info *info)
1446672cf6dfSJoerg Roedel {
1447672cf6dfSJoerg Roedel 	struct pci_dev *pdev;
1448672cf6dfSJoerg Roedel 
1449672cf6dfSJoerg Roedel 	if (!dev_is_pci(info->dev))
1450672cf6dfSJoerg Roedel 		return;
1451672cf6dfSJoerg Roedel 
1452672cf6dfSJoerg Roedel 	pdev = to_pci_dev(info->dev);
1453672cf6dfSJoerg Roedel 
1454672cf6dfSJoerg Roedel 	if (info->ats_enabled) {
1455672cf6dfSJoerg Roedel 		pci_disable_ats(pdev);
1456672cf6dfSJoerg Roedel 		info->ats_enabled = 0;
1457672cf6dfSJoerg Roedel 		domain_update_iotlb(info->domain);
1458672cf6dfSJoerg Roedel 	}
14590faa19a1SLu Baolu 
1460672cf6dfSJoerg Roedel 	if (info->pasid_enabled) {
1461672cf6dfSJoerg Roedel 		pci_disable_pasid(pdev);
1462672cf6dfSJoerg Roedel 		info->pasid_enabled = 0;
1463672cf6dfSJoerg Roedel 	}
1464672cf6dfSJoerg Roedel }
1465672cf6dfSJoerg Roedel 
__iommu_flush_dev_iotlb(struct device_domain_info * info,u64 addr,unsigned int mask)14667c29ada5SLiu Yi L static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
14677c29ada5SLiu Yi L 				    u64 addr, unsigned int mask)
1468672cf6dfSJoerg Roedel {
1469672cf6dfSJoerg Roedel 	u16 sid, qdep;
1470672cf6dfSJoerg Roedel 
14717c29ada5SLiu Yi L 	if (!info || !info->ats_enabled)
1472672cf6dfSJoerg Roedel 		return;
1473672cf6dfSJoerg Roedel 
1474672cf6dfSJoerg Roedel 	sid = info->bus << 8 | info->devfn;
1475672cf6dfSJoerg Roedel 	qdep = info->ats_qdep;
1476672cf6dfSJoerg Roedel 	qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1477672cf6dfSJoerg Roedel 			   qdep, addr, mask);
147842987801SJacob Pan 	quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep);
1479672cf6dfSJoerg Roedel }
14807c29ada5SLiu Yi L 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)14817c29ada5SLiu Yi L static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
14827c29ada5SLiu Yi L 				  u64 addr, unsigned mask)
14837c29ada5SLiu Yi L {
14847d0c9da6SLu Baolu 	struct dev_pasid_info *dev_pasid;
14857c29ada5SLiu Yi L 	struct device_domain_info *info;
1486a349ffcbSLu Baolu 	unsigned long flags;
14877c29ada5SLiu Yi L 
14887c29ada5SLiu Yi L 	if (!domain->has_iotlb_device)
14897c29ada5SLiu Yi L 		return;
14907c29ada5SLiu Yi L 
1491a349ffcbSLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
14927c29ada5SLiu Yi L 	list_for_each_entry(info, &domain->devices, link)
14937c29ada5SLiu Yi L 		__iommu_flush_dev_iotlb(info, addr, mask);
14947d0c9da6SLu Baolu 
14957d0c9da6SLu Baolu 	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
14967d0c9da6SLu Baolu 		info = dev_iommu_priv_get(dev_pasid->dev);
14977d0c9da6SLu Baolu 
14987d0c9da6SLu Baolu 		if (!info->ats_enabled)
14997d0c9da6SLu Baolu 			continue;
15007d0c9da6SLu Baolu 
15017d0c9da6SLu Baolu 		qi_flush_dev_iotlb_pasid(info->iommu,
15027d0c9da6SLu Baolu 					 PCI_DEVID(info->bus, info->devfn),
15037d0c9da6SLu Baolu 					 info->pfsid, dev_pasid->pasid,
15047d0c9da6SLu Baolu 					 info->ats_qdep, addr,
15057d0c9da6SLu Baolu 					 mask);
15067d0c9da6SLu Baolu 	}
1507a349ffcbSLu Baolu 	spin_unlock_irqrestore(&domain->lock, flags);
1508672cf6dfSJoerg Roedel }
1509672cf6dfSJoerg Roedel 
domain_flush_pasid_iotlb(struct intel_iommu * iommu,struct dmar_domain * domain,u64 addr,unsigned long npages,bool ih)1510ac1a3483SLu Baolu static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
1511ac1a3483SLu Baolu 				     struct dmar_domain *domain, u64 addr,
1512ac1a3483SLu Baolu 				     unsigned long npages, bool ih)
1513ac1a3483SLu Baolu {
1514ac1a3483SLu Baolu 	u16 did = domain_id_iommu(domain, iommu);
15157d0c9da6SLu Baolu 	struct dev_pasid_info *dev_pasid;
1516ac1a3483SLu Baolu 	unsigned long flags;
1517ac1a3483SLu Baolu 
1518ac1a3483SLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
15197d0c9da6SLu Baolu 	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
15207d0c9da6SLu Baolu 		qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih);
15217d0c9da6SLu Baolu 
152237f900e7SLu Baolu 	if (!list_empty(&domain->devices))
1523ac1a3483SLu Baolu 		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih);
1524672cf6dfSJoerg Roedel 	spin_unlock_irqrestore(&domain->lock, flags);
1525672cf6dfSJoerg Roedel }
1526672cf6dfSJoerg Roedel 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages,int ih,int map)1527672cf6dfSJoerg Roedel static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1528672cf6dfSJoerg Roedel 				  struct dmar_domain *domain,
1529672cf6dfSJoerg Roedel 				  unsigned long pfn, unsigned int pages,
1530672cf6dfSJoerg Roedel 				  int ih, int map)
1531672cf6dfSJoerg Roedel {
153259bf3557SDavid Stevens 	unsigned int aligned_pages = __roundup_pow_of_two(pages);
153359bf3557SDavid Stevens 	unsigned int mask = ilog2(aligned_pages);
1534672cf6dfSJoerg Roedel 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1535ba949f4cSLu Baolu 	u16 did = domain_id_iommu(domain, iommu);
1536672cf6dfSJoerg Roedel 
15374a627a25STina Zhang 	if (WARN_ON(!pages))
15384a627a25STina Zhang 		return;
1539672cf6dfSJoerg Roedel 
1540672cf6dfSJoerg Roedel 	if (ih)
1541672cf6dfSJoerg Roedel 		ih = 1 << 6;
1542672cf6dfSJoerg Roedel 
1543e5b0feb4SLu Baolu 	if (domain->use_first_level) {
1544ac1a3483SLu Baolu 		domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
1545672cf6dfSJoerg Roedel 	} else {
154659bf3557SDavid Stevens 		unsigned long bitmask = aligned_pages - 1;
154759bf3557SDavid Stevens 
154859bf3557SDavid Stevens 		/*
154959bf3557SDavid Stevens 		 * PSI masks the low order bits of the base address. If the
155059bf3557SDavid Stevens 		 * address isn't aligned to the mask, then compute a mask value
155159bf3557SDavid Stevens 		 * needed to ensure the target range is flushed.
155259bf3557SDavid Stevens 		 */
155359bf3557SDavid Stevens 		if (unlikely(bitmask & pfn)) {
155459bf3557SDavid Stevens 			unsigned long end_pfn = pfn + pages - 1, shared_bits;
155559bf3557SDavid Stevens 
155659bf3557SDavid Stevens 			/*
155759bf3557SDavid Stevens 			 * Since end_pfn <= pfn + bitmask, the only way bits
155859bf3557SDavid Stevens 			 * higher than bitmask can differ in pfn and end_pfn is
155959bf3557SDavid Stevens 			 * by carrying. This means after masking out bitmask,
156059bf3557SDavid Stevens 			 * high bits starting with the first set bit in
156159bf3557SDavid Stevens 			 * shared_bits are all equal in both pfn and end_pfn.
156259bf3557SDavid Stevens 			 */
156359bf3557SDavid Stevens 			shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
156459bf3557SDavid Stevens 			mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
156559bf3557SDavid Stevens 		}
156659bf3557SDavid Stevens 
1567672cf6dfSJoerg Roedel 		/*
1568672cf6dfSJoerg Roedel 		 * Fallback to domain selective flush if no PSI support or
156959bf3557SDavid Stevens 		 * the size is too big.
1570672cf6dfSJoerg Roedel 		 */
1571672cf6dfSJoerg Roedel 		if (!cap_pgsel_inv(iommu->cap) ||
1572672cf6dfSJoerg Roedel 		    mask > cap_max_amask_val(iommu->cap))
1573672cf6dfSJoerg Roedel 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
1574672cf6dfSJoerg Roedel 							DMA_TLB_DSI_FLUSH);
1575672cf6dfSJoerg Roedel 		else
1576672cf6dfSJoerg Roedel 			iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1577672cf6dfSJoerg Roedel 							DMA_TLB_PSI_FLUSH);
1578672cf6dfSJoerg Roedel 	}
1579672cf6dfSJoerg Roedel 
1580672cf6dfSJoerg Roedel 	/*
1581672cf6dfSJoerg Roedel 	 * In caching mode, changes of pages from non-present to present require
1582672cf6dfSJoerg Roedel 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1583672cf6dfSJoerg Roedel 	 */
1584672cf6dfSJoerg Roedel 	if (!cap_caching_mode(iommu->cap) || !map)
1585672cf6dfSJoerg Roedel 		iommu_flush_dev_iotlb(domain, addr, mask);
1586672cf6dfSJoerg Roedel }
1587672cf6dfSJoerg Roedel 
1588672cf6dfSJoerg Roedel /* Notification for newly created mappings */
__mapping_notify_one(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages)1589672cf6dfSJoerg Roedel static inline void __mapping_notify_one(struct intel_iommu *iommu,
1590672cf6dfSJoerg Roedel 					struct dmar_domain *domain,
1591672cf6dfSJoerg Roedel 					unsigned long pfn, unsigned int pages)
1592672cf6dfSJoerg Roedel {
1593672cf6dfSJoerg Roedel 	/*
1594672cf6dfSJoerg Roedel 	 * It's a non-present to present mapping. Only flush if caching mode
1595672cf6dfSJoerg Roedel 	 * and second level.
1596672cf6dfSJoerg Roedel 	 */
1597e5b0feb4SLu Baolu 	if (cap_caching_mode(iommu->cap) && !domain->use_first_level)
1598672cf6dfSJoerg Roedel 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1599672cf6dfSJoerg Roedel 	else
1600672cf6dfSJoerg Roedel 		iommu_flush_write_buffer(iommu);
1601672cf6dfSJoerg Roedel }
1602672cf6dfSJoerg Roedel 
intel_flush_iotlb_all(struct iommu_domain * domain)1603c588072bSTom Murphy static void intel_flush_iotlb_all(struct iommu_domain *domain)
1604672cf6dfSJoerg Roedel {
1605c588072bSTom Murphy 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1606ba949f4cSLu Baolu 	struct iommu_domain_info *info;
1607ba949f4cSLu Baolu 	unsigned long idx;
1608672cf6dfSJoerg Roedel 
1609ba949f4cSLu Baolu 	xa_for_each(&dmar_domain->iommu_array, idx, info) {
1610ba949f4cSLu Baolu 		struct intel_iommu *iommu = info->iommu;
1611ba949f4cSLu Baolu 		u16 did = domain_id_iommu(dmar_domain, iommu);
1612672cf6dfSJoerg Roedel 
1613e5b0feb4SLu Baolu 		if (dmar_domain->use_first_level)
1614ac1a3483SLu Baolu 			domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0);
1615672cf6dfSJoerg Roedel 		else
1616672cf6dfSJoerg Roedel 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
1617672cf6dfSJoerg Roedel 						 DMA_TLB_DSI_FLUSH);
1618672cf6dfSJoerg Roedel 
1619672cf6dfSJoerg Roedel 		if (!cap_caching_mode(iommu->cap))
1620402e6688SLu Baolu 			iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
1621672cf6dfSJoerg Roedel 	}
1622672cf6dfSJoerg Roedel }
1623672cf6dfSJoerg Roedel 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1624672cf6dfSJoerg Roedel static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1625672cf6dfSJoerg Roedel {
1626672cf6dfSJoerg Roedel 	u32 pmen;
1627672cf6dfSJoerg Roedel 	unsigned long flags;
1628672cf6dfSJoerg Roedel 
1629672cf6dfSJoerg Roedel 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1630672cf6dfSJoerg Roedel 		return;
1631672cf6dfSJoerg Roedel 
1632672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1633672cf6dfSJoerg Roedel 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1634672cf6dfSJoerg Roedel 	pmen &= ~DMA_PMEN_EPM;
1635672cf6dfSJoerg Roedel 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1636672cf6dfSJoerg Roedel 
1637672cf6dfSJoerg Roedel 	/* wait for the protected region status bit to clear */
1638672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1639672cf6dfSJoerg Roedel 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1640672cf6dfSJoerg Roedel 
1641672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1642672cf6dfSJoerg Roedel }
1643672cf6dfSJoerg Roedel 
iommu_enable_translation(struct intel_iommu * iommu)1644672cf6dfSJoerg Roedel static void iommu_enable_translation(struct intel_iommu *iommu)
1645672cf6dfSJoerg Roedel {
1646672cf6dfSJoerg Roedel 	u32 sts;
1647672cf6dfSJoerg Roedel 	unsigned long flags;
1648672cf6dfSJoerg Roedel 
1649672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1650672cf6dfSJoerg Roedel 	iommu->gcmd |= DMA_GCMD_TE;
1651672cf6dfSJoerg Roedel 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1652672cf6dfSJoerg Roedel 
1653672cf6dfSJoerg Roedel 	/* Make sure hardware complete it */
1654672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1655672cf6dfSJoerg Roedel 		      readl, (sts & DMA_GSTS_TES), sts);
1656672cf6dfSJoerg Roedel 
1657672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1658672cf6dfSJoerg Roedel }
1659672cf6dfSJoerg Roedel 
iommu_disable_translation(struct intel_iommu * iommu)1660672cf6dfSJoerg Roedel static void iommu_disable_translation(struct intel_iommu *iommu)
1661672cf6dfSJoerg Roedel {
1662672cf6dfSJoerg Roedel 	u32 sts;
1663672cf6dfSJoerg Roedel 	unsigned long flag;
1664672cf6dfSJoerg Roedel 
1665b1012ca8SLu Baolu 	if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1666b1012ca8SLu Baolu 	    (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1667b1012ca8SLu Baolu 		return;
1668b1012ca8SLu Baolu 
1669672cf6dfSJoerg Roedel 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1670672cf6dfSJoerg Roedel 	iommu->gcmd &= ~DMA_GCMD_TE;
1671672cf6dfSJoerg Roedel 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1672672cf6dfSJoerg Roedel 
1673672cf6dfSJoerg Roedel 	/* Make sure hardware complete it */
1674672cf6dfSJoerg Roedel 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1675672cf6dfSJoerg Roedel 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1676672cf6dfSJoerg Roedel 
1677672cf6dfSJoerg Roedel 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1678672cf6dfSJoerg Roedel }
1679672cf6dfSJoerg Roedel 
iommu_init_domains(struct intel_iommu * iommu)1680672cf6dfSJoerg Roedel static int iommu_init_domains(struct intel_iommu *iommu)
1681672cf6dfSJoerg Roedel {
1682bb712573SChristophe JAILLET 	u32 ndomains;
1683672cf6dfSJoerg Roedel 
1684672cf6dfSJoerg Roedel 	ndomains = cap_ndoms(iommu->cap);
1685672cf6dfSJoerg Roedel 	pr_debug("%s: Number of Domains supported <%d>\n",
1686672cf6dfSJoerg Roedel 		 iommu->name, ndomains);
1687672cf6dfSJoerg Roedel 
1688672cf6dfSJoerg Roedel 	spin_lock_init(&iommu->lock);
1689672cf6dfSJoerg Roedel 
1690bb712573SChristophe JAILLET 	iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
16915e41c998SZhen Lei 	if (!iommu->domain_ids)
1692672cf6dfSJoerg Roedel 		return -ENOMEM;
1693672cf6dfSJoerg Roedel 
1694672cf6dfSJoerg Roedel 	/*
1695672cf6dfSJoerg Roedel 	 * If Caching mode is set, then invalid translations are tagged
1696672cf6dfSJoerg Roedel 	 * with domain-id 0, hence we need to pre-allocate it. We also
1697672cf6dfSJoerg Roedel 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1698672cf6dfSJoerg Roedel 	 * make sure it is not used for a real domain.
1699672cf6dfSJoerg Roedel 	 */
1700672cf6dfSJoerg Roedel 	set_bit(0, iommu->domain_ids);
1701672cf6dfSJoerg Roedel 
1702672cf6dfSJoerg Roedel 	/*
1703672cf6dfSJoerg Roedel 	 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1704672cf6dfSJoerg Roedel 	 * entry for first-level or pass-through translation modes should
1705672cf6dfSJoerg Roedel 	 * be programmed with a domain id different from those used for
1706672cf6dfSJoerg Roedel 	 * second-level or nested translation. We reserve a domain id for
170754e86bfeSLu Baolu 	 * this purpose. This domain id is also used for identity domain
170854e86bfeSLu Baolu 	 * in legacy mode.
1709672cf6dfSJoerg Roedel 	 */
1710672cf6dfSJoerg Roedel 	set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1711672cf6dfSJoerg Roedel 
1712672cf6dfSJoerg Roedel 	return 0;
1713672cf6dfSJoerg Roedel }
1714672cf6dfSJoerg Roedel 
disable_dmar_iommu(struct intel_iommu * iommu)1715672cf6dfSJoerg Roedel static void disable_dmar_iommu(struct intel_iommu *iommu)
1716672cf6dfSJoerg Roedel {
1717402e6688SLu Baolu 	if (!iommu->domain_ids)
1718672cf6dfSJoerg Roedel 		return;
1719672cf6dfSJoerg Roedel 
172098f7b0dbSLu Baolu 	/*
172198f7b0dbSLu Baolu 	 * All iommu domains must have been detached from the devices,
172298f7b0dbSLu Baolu 	 * hence there should be no domain IDs in use.
172398f7b0dbSLu Baolu 	 */
172498f7b0dbSLu Baolu 	if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
172598f7b0dbSLu Baolu 		    > NUM_RESERVED_DID))
172698f7b0dbSLu Baolu 		return;
1727672cf6dfSJoerg Roedel 
1728672cf6dfSJoerg Roedel 	if (iommu->gcmd & DMA_GCMD_TE)
1729672cf6dfSJoerg Roedel 		iommu_disable_translation(iommu);
1730672cf6dfSJoerg Roedel }
1731672cf6dfSJoerg Roedel 
free_dmar_iommu(struct intel_iommu * iommu)1732672cf6dfSJoerg Roedel static void free_dmar_iommu(struct intel_iommu *iommu)
1733672cf6dfSJoerg Roedel {
1734402e6688SLu Baolu 	if (iommu->domain_ids) {
1735bb712573SChristophe JAILLET 		bitmap_free(iommu->domain_ids);
1736672cf6dfSJoerg Roedel 		iommu->domain_ids = NULL;
1737672cf6dfSJoerg Roedel 	}
1738672cf6dfSJoerg Roedel 
17390c5f6c0dSLu Baolu 	if (iommu->copied_tables) {
17400c5f6c0dSLu Baolu 		bitmap_free(iommu->copied_tables);
17410c5f6c0dSLu Baolu 		iommu->copied_tables = NULL;
17420c5f6c0dSLu Baolu 	}
17430c5f6c0dSLu Baolu 
1744672cf6dfSJoerg Roedel 	/* free context mapping */
1745672cf6dfSJoerg Roedel 	free_context_table(iommu);
1746672cf6dfSJoerg Roedel 
1747672cf6dfSJoerg Roedel #ifdef CONFIG_INTEL_IOMMU_SVM
1748672cf6dfSJoerg Roedel 	if (pasid_supported(iommu)) {
1749672cf6dfSJoerg Roedel 		if (ecap_prs(iommu->ecap))
1750672cf6dfSJoerg Roedel 			intel_svm_finish_prq(iommu);
1751672cf6dfSJoerg Roedel 	}
1752672cf6dfSJoerg Roedel #endif
1753672cf6dfSJoerg Roedel }
1754672cf6dfSJoerg Roedel 
1755672cf6dfSJoerg Roedel /*
1756672cf6dfSJoerg Roedel  * Check and return whether first level is used by default for
1757672cf6dfSJoerg Roedel  * DMA translation.
1758672cf6dfSJoerg Roedel  */
first_level_by_default(unsigned int type)1759032c5ee4SLu Baolu static bool first_level_by_default(unsigned int type)
1760672cf6dfSJoerg Roedel {
1761032c5ee4SLu Baolu 	/* Only SL is available in legacy mode */
1762032c5ee4SLu Baolu 	if (!scalable_mode_support())
1763032c5ee4SLu Baolu 		return false;
1764032c5ee4SLu Baolu 
1765032c5ee4SLu Baolu 	/* Only level (either FL or SL) is available, just use it */
1766032c5ee4SLu Baolu 	if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
1767032c5ee4SLu Baolu 		return intel_cap_flts_sanity();
1768032c5ee4SLu Baolu 
1769032c5ee4SLu Baolu 	/* Both levels are available, decide it based on domain type */
1770032c5ee4SLu Baolu 	return type != IOMMU_DOMAIN_UNMANAGED;
1771672cf6dfSJoerg Roedel }
1772672cf6dfSJoerg Roedel 
alloc_domain(unsigned int type)1773b34380a6SLu Baolu static struct dmar_domain *alloc_domain(unsigned int type)
1774672cf6dfSJoerg Roedel {
1775672cf6dfSJoerg Roedel 	struct dmar_domain *domain;
1776672cf6dfSJoerg Roedel 
1777ee2653bbSLu Baolu 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1778672cf6dfSJoerg Roedel 	if (!domain)
1779672cf6dfSJoerg Roedel 		return NULL;
1780672cf6dfSJoerg Roedel 
1781672cf6dfSJoerg Roedel 	domain->nid = NUMA_NO_NODE;
1782032c5ee4SLu Baolu 	if (first_level_by_default(type))
1783e5b0feb4SLu Baolu 		domain->use_first_level = true;
1784672cf6dfSJoerg Roedel 	domain->has_iotlb_device = false;
1785672cf6dfSJoerg Roedel 	INIT_LIST_HEAD(&domain->devices);
17867d0c9da6SLu Baolu 	INIT_LIST_HEAD(&domain->dev_pasids);
17875eaafdf0SLu Baolu 	spin_lock_init(&domain->lock);
1788ba949f4cSLu Baolu 	xa_init(&domain->iommu_array);
1789672cf6dfSJoerg Roedel 
1790672cf6dfSJoerg Roedel 	return domain;
1791672cf6dfSJoerg Roedel }
1792672cf6dfSJoerg Roedel 
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1793672cf6dfSJoerg Roedel static int domain_attach_iommu(struct dmar_domain *domain,
1794672cf6dfSJoerg Roedel 			       struct intel_iommu *iommu)
1795672cf6dfSJoerg Roedel {
1796ba949f4cSLu Baolu 	struct iommu_domain_info *info, *curr;
1797672cf6dfSJoerg Roedel 	unsigned long ndomains;
1798ba949f4cSLu Baolu 	int num, ret = -ENOSPC;
1799672cf6dfSJoerg Roedel 
1800ba949f4cSLu Baolu 	info = kzalloc(sizeof(*info), GFP_KERNEL);
1801ba949f4cSLu Baolu 	if (!info)
1802ba949f4cSLu Baolu 		return -ENOMEM;
1803672cf6dfSJoerg Roedel 
18042c3262f9SLu Baolu 	spin_lock(&iommu->lock);
1805ba949f4cSLu Baolu 	curr = xa_load(&domain->iommu_array, iommu->seq_id);
1806ba949f4cSLu Baolu 	if (curr) {
1807ba949f4cSLu Baolu 		curr->refcnt++;
1808ba949f4cSLu Baolu 		spin_unlock(&iommu->lock);
1809ba949f4cSLu Baolu 		kfree(info);
1810ba949f4cSLu Baolu 		return 0;
1811ba949f4cSLu Baolu 	}
1812ba949f4cSLu Baolu 
1813672cf6dfSJoerg Roedel 	ndomains = cap_ndoms(iommu->cap);
1814672cf6dfSJoerg Roedel 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1815672cf6dfSJoerg Roedel 	if (num >= ndomains) {
1816672cf6dfSJoerg Roedel 		pr_err("%s: No free domain ids\n", iommu->name);
1817ba949f4cSLu Baolu 		goto err_unlock;
1818672cf6dfSJoerg Roedel 	}
1819672cf6dfSJoerg Roedel 
1820672cf6dfSJoerg Roedel 	set_bit(num, iommu->domain_ids);
1821ba949f4cSLu Baolu 	info->refcnt	= 1;
1822ba949f4cSLu Baolu 	info->did	= num;
1823ba949f4cSLu Baolu 	info->iommu	= iommu;
1824ba949f4cSLu Baolu 	curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1825ba949f4cSLu Baolu 			  NULL, info, GFP_ATOMIC);
1826ba949f4cSLu Baolu 	if (curr) {
1827ba949f4cSLu Baolu 		ret = xa_err(curr) ? : -EBUSY;
1828ba949f4cSLu Baolu 		goto err_clear;
1829672cf6dfSJoerg Roedel 	}
1830ba949f4cSLu Baolu 	domain_update_iommu_cap(domain);
1831672cf6dfSJoerg Roedel 
18322c3262f9SLu Baolu 	spin_unlock(&iommu->lock);
1833672cf6dfSJoerg Roedel 	return 0;
1834ba949f4cSLu Baolu 
1835ba949f4cSLu Baolu err_clear:
1836ba949f4cSLu Baolu 	clear_bit(info->did, iommu->domain_ids);
1837ba949f4cSLu Baolu err_unlock:
1838ba949f4cSLu Baolu 	spin_unlock(&iommu->lock);
1839ba949f4cSLu Baolu 	kfree(info);
18402c3262f9SLu Baolu 	return ret;
1841672cf6dfSJoerg Roedel }
1842672cf6dfSJoerg Roedel 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)184374f6d776SParav Pandit static void domain_detach_iommu(struct dmar_domain *domain,
1844672cf6dfSJoerg Roedel 				struct intel_iommu *iommu)
1845672cf6dfSJoerg Roedel {
1846ba949f4cSLu Baolu 	struct iommu_domain_info *info;
1847672cf6dfSJoerg Roedel 
18482c3262f9SLu Baolu 	spin_lock(&iommu->lock);
1849ba949f4cSLu Baolu 	info = xa_load(&domain->iommu_array, iommu->seq_id);
1850ba949f4cSLu Baolu 	if (--info->refcnt == 0) {
1851ba949f4cSLu Baolu 		clear_bit(info->did, iommu->domain_ids);
1852ba949f4cSLu Baolu 		xa_erase(&domain->iommu_array, iommu->seq_id);
1853ba949f4cSLu Baolu 		domain->nid = NUMA_NO_NODE;
1854672cf6dfSJoerg Roedel 		domain_update_iommu_cap(domain);
1855ba949f4cSLu Baolu 		kfree(info);
1856672cf6dfSJoerg Roedel 	}
18572c3262f9SLu Baolu 	spin_unlock(&iommu->lock);
1858672cf6dfSJoerg Roedel }
1859672cf6dfSJoerg Roedel 
guestwidth_to_adjustwidth(int gaw)1860672cf6dfSJoerg Roedel static inline int guestwidth_to_adjustwidth(int gaw)
1861672cf6dfSJoerg Roedel {
1862672cf6dfSJoerg Roedel 	int agaw;
1863672cf6dfSJoerg Roedel 	int r = (gaw - 12) % 9;
1864672cf6dfSJoerg Roedel 
1865672cf6dfSJoerg Roedel 	if (r == 0)
1866672cf6dfSJoerg Roedel 		agaw = gaw;
1867672cf6dfSJoerg Roedel 	else
1868672cf6dfSJoerg Roedel 		agaw = gaw + 9 - r;
1869672cf6dfSJoerg Roedel 	if (agaw > 64)
1870672cf6dfSJoerg Roedel 		agaw = 64;
1871672cf6dfSJoerg Roedel 	return agaw;
1872672cf6dfSJoerg Roedel }
1873672cf6dfSJoerg Roedel 
domain_exit(struct dmar_domain * domain)1874672cf6dfSJoerg Roedel static void domain_exit(struct dmar_domain *domain)
1875672cf6dfSJoerg Roedel {
1876672cf6dfSJoerg Roedel 	if (domain->pgd) {
187787f60cc6SMatthew Wilcox (Oracle) 		LIST_HEAD(freelist);
1878672cf6dfSJoerg Roedel 
187987f60cc6SMatthew Wilcox (Oracle) 		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
188087f60cc6SMatthew Wilcox (Oracle) 		put_pages_list(&freelist);
1881672cf6dfSJoerg Roedel 	}
1882672cf6dfSJoerg Roedel 
188379d82ce4SLu Baolu 	if (WARN_ON(!list_empty(&domain->devices)))
188479d82ce4SLu Baolu 		return;
188579d82ce4SLu Baolu 
1886ee2653bbSLu Baolu 	kfree(domain);
1887672cf6dfSJoerg Roedel }
1888672cf6dfSJoerg Roedel 
1889672cf6dfSJoerg Roedel /*
1890672cf6dfSJoerg Roedel  * Get the PASID directory size for scalable mode context entry.
1891672cf6dfSJoerg Roedel  * Value of X in the PDTS field of a scalable mode context entry
1892672cf6dfSJoerg Roedel  * indicates PASID directory with 2^(X + 7) entries.
1893672cf6dfSJoerg Roedel  */
context_get_sm_pds(struct pasid_table * table)1894672cf6dfSJoerg Roedel static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1895672cf6dfSJoerg Roedel {
18964599d78aSKees Cook 	unsigned long pds, max_pde;
1897672cf6dfSJoerg Roedel 
1898672cf6dfSJoerg Roedel 	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
18994599d78aSKees Cook 	pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
1900672cf6dfSJoerg Roedel 	if (pds < 7)
1901672cf6dfSJoerg Roedel 		return 0;
1902672cf6dfSJoerg Roedel 
1903672cf6dfSJoerg Roedel 	return pds - 7;
1904672cf6dfSJoerg Roedel }
1905672cf6dfSJoerg Roedel 
1906672cf6dfSJoerg Roedel /*
1907672cf6dfSJoerg Roedel  * Set the RID_PASID field of a scalable mode context entry. The
1908672cf6dfSJoerg Roedel  * IOMMU hardware will use the PASID value set in this field for
1909672cf6dfSJoerg Roedel  * DMA translations of DMA requests without PASID.
1910672cf6dfSJoerg Roedel  */
1911672cf6dfSJoerg Roedel static inline void
context_set_sm_rid2pasid(struct context_entry * context,unsigned long pasid)1912672cf6dfSJoerg Roedel context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1913672cf6dfSJoerg Roedel {
1914672cf6dfSJoerg Roedel 	context->hi |= pasid & ((1 << 20) - 1);
1915672cf6dfSJoerg Roedel }
1916672cf6dfSJoerg Roedel 
1917672cf6dfSJoerg Roedel /*
1918672cf6dfSJoerg Roedel  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1919672cf6dfSJoerg Roedel  * entry.
1920672cf6dfSJoerg Roedel  */
context_set_sm_dte(struct context_entry * context)1921672cf6dfSJoerg Roedel static inline void context_set_sm_dte(struct context_entry *context)
1922672cf6dfSJoerg Roedel {
1923b31064f8STina Zhang 	context->lo |= BIT_ULL(2);
1924672cf6dfSJoerg Roedel }
1925672cf6dfSJoerg Roedel 
1926672cf6dfSJoerg Roedel /*
1927672cf6dfSJoerg Roedel  * Set the PRE(Page Request Enable) field of a scalable mode context
1928672cf6dfSJoerg Roedel  * entry.
1929672cf6dfSJoerg Roedel  */
context_set_sm_pre(struct context_entry * context)1930672cf6dfSJoerg Roedel static inline void context_set_sm_pre(struct context_entry *context)
1931672cf6dfSJoerg Roedel {
1932b31064f8STina Zhang 	context->lo |= BIT_ULL(4);
1933672cf6dfSJoerg Roedel }
1934672cf6dfSJoerg Roedel 
1935672cf6dfSJoerg Roedel /* Convert value to context PASID directory size field coding. */
1936672cf6dfSJoerg Roedel #define context_pdts(pds)	(((pds) & 0x7) << 9)
1937672cf6dfSJoerg Roedel 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,struct pasid_table * table,u8 bus,u8 devfn)1938672cf6dfSJoerg Roedel static int domain_context_mapping_one(struct dmar_domain *domain,
1939672cf6dfSJoerg Roedel 				      struct intel_iommu *iommu,
1940672cf6dfSJoerg Roedel 				      struct pasid_table *table,
1941672cf6dfSJoerg Roedel 				      u8 bus, u8 devfn)
1942672cf6dfSJoerg Roedel {
1943969aaefbSLu Baolu 	struct device_domain_info *info =
19440faa19a1SLu Baolu 			domain_lookup_dev_info(domain, iommu, bus, devfn);
1945ba949f4cSLu Baolu 	u16 did = domain_id_iommu(domain, iommu);
1946672cf6dfSJoerg Roedel 	int translation = CONTEXT_TT_MULTI_LEVEL;
1947672cf6dfSJoerg Roedel 	struct context_entry *context;
1948672cf6dfSJoerg Roedel 	int ret;
1949672cf6dfSJoerg Roedel 
1950672cf6dfSJoerg Roedel 	if (hw_pass_through && domain_type_is_si(domain))
1951672cf6dfSJoerg Roedel 		translation = CONTEXT_TT_PASS_THROUGH;
1952672cf6dfSJoerg Roedel 
1953672cf6dfSJoerg Roedel 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1954672cf6dfSJoerg Roedel 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1955672cf6dfSJoerg Roedel 
1956672cf6dfSJoerg Roedel 	spin_lock(&iommu->lock);
1957672cf6dfSJoerg Roedel 	ret = -ENOMEM;
1958672cf6dfSJoerg Roedel 	context = iommu_context_addr(iommu, bus, devfn, 1);
1959672cf6dfSJoerg Roedel 	if (!context)
1960672cf6dfSJoerg Roedel 		goto out_unlock;
1961672cf6dfSJoerg Roedel 
1962672cf6dfSJoerg Roedel 	ret = 0;
19630c5f6c0dSLu Baolu 	if (context_present(context) && !context_copied(iommu, bus, devfn))
1964672cf6dfSJoerg Roedel 		goto out_unlock;
1965672cf6dfSJoerg Roedel 
1966672cf6dfSJoerg Roedel 	/*
1967672cf6dfSJoerg Roedel 	 * For kdump cases, old valid entries may be cached due to the
1968672cf6dfSJoerg Roedel 	 * in-flight DMA and copied pgtable, but there is no unmapping
1969672cf6dfSJoerg Roedel 	 * behaviour for them, thus we need an explicit cache flush for
1970672cf6dfSJoerg Roedel 	 * the newly-mapped device. For kdump, at this point, the device
1971672cf6dfSJoerg Roedel 	 * is supposed to finish reset at its driver probe stage, so no
1972672cf6dfSJoerg Roedel 	 * in-flight DMA will exist, and we don't need to worry anymore
1973672cf6dfSJoerg Roedel 	 * hereafter.
1974672cf6dfSJoerg Roedel 	 */
19750c5f6c0dSLu Baolu 	if (context_copied(iommu, bus, devfn)) {
1976672cf6dfSJoerg Roedel 		u16 did_old = context_domain_id(context);
1977672cf6dfSJoerg Roedel 
1978672cf6dfSJoerg Roedel 		if (did_old < cap_ndoms(iommu->cap)) {
1979672cf6dfSJoerg Roedel 			iommu->flush.flush_context(iommu, did_old,
1980672cf6dfSJoerg Roedel 						   (((u16)bus) << 8) | devfn,
1981672cf6dfSJoerg Roedel 						   DMA_CCMD_MASK_NOBIT,
1982672cf6dfSJoerg Roedel 						   DMA_CCMD_DEVICE_INVL);
1983672cf6dfSJoerg Roedel 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1984672cf6dfSJoerg Roedel 						 DMA_TLB_DSI_FLUSH);
1985672cf6dfSJoerg Roedel 		}
19860c5f6c0dSLu Baolu 
19870c5f6c0dSLu Baolu 		clear_context_copied(iommu, bus, devfn);
1988672cf6dfSJoerg Roedel 	}
1989672cf6dfSJoerg Roedel 
1990672cf6dfSJoerg Roedel 	context_clear_entry(context);
1991672cf6dfSJoerg Roedel 
1992672cf6dfSJoerg Roedel 	if (sm_supported(iommu)) {
1993672cf6dfSJoerg Roedel 		unsigned long pds;
1994672cf6dfSJoerg Roedel 
1995672cf6dfSJoerg Roedel 		/* Setup the PASID DIR pointer: */
1996672cf6dfSJoerg Roedel 		pds = context_get_sm_pds(table);
1997672cf6dfSJoerg Roedel 		context->lo = (u64)virt_to_phys(table->table) |
1998672cf6dfSJoerg Roedel 				context_pdts(pds);
1999672cf6dfSJoerg Roedel 
2000672cf6dfSJoerg Roedel 		/* Setup the RID_PASID field: */
200142987801SJacob Pan 		context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
2002672cf6dfSJoerg Roedel 
2003672cf6dfSJoerg Roedel 		/*
2004672cf6dfSJoerg Roedel 		 * Setup the Device-TLB enable bit and Page request
2005672cf6dfSJoerg Roedel 		 * Enable bit:
2006672cf6dfSJoerg Roedel 		 */
2007672cf6dfSJoerg Roedel 		if (info && info->ats_supported)
2008672cf6dfSJoerg Roedel 			context_set_sm_dte(context);
2009672cf6dfSJoerg Roedel 		if (info && info->pri_supported)
2010672cf6dfSJoerg Roedel 			context_set_sm_pre(context);
20110faa19a1SLu Baolu 		if (info && info->pasid_supported)
20120faa19a1SLu Baolu 			context_set_pasid(context);
2013672cf6dfSJoerg Roedel 	} else {
2014672cf6dfSJoerg Roedel 		struct dma_pte *pgd = domain->pgd;
2015672cf6dfSJoerg Roedel 		int agaw;
2016672cf6dfSJoerg Roedel 
2017672cf6dfSJoerg Roedel 		context_set_domain_id(context, did);
2018672cf6dfSJoerg Roedel 
2019672cf6dfSJoerg Roedel 		if (translation != CONTEXT_TT_PASS_THROUGH) {
2020672cf6dfSJoerg Roedel 			/*
2021672cf6dfSJoerg Roedel 			 * Skip top levels of page tables for iommu which has
2022672cf6dfSJoerg Roedel 			 * less agaw than default. Unnecessary for PT mode.
2023672cf6dfSJoerg Roedel 			 */
2024672cf6dfSJoerg Roedel 			for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2025672cf6dfSJoerg Roedel 				ret = -ENOMEM;
2026672cf6dfSJoerg Roedel 				pgd = phys_to_virt(dma_pte_addr(pgd));
2027672cf6dfSJoerg Roedel 				if (!dma_pte_present(pgd))
2028672cf6dfSJoerg Roedel 					goto out_unlock;
2029672cf6dfSJoerg Roedel 			}
2030672cf6dfSJoerg Roedel 
2031672cf6dfSJoerg Roedel 			if (info && info->ats_supported)
2032672cf6dfSJoerg Roedel 				translation = CONTEXT_TT_DEV_IOTLB;
2033672cf6dfSJoerg Roedel 			else
2034672cf6dfSJoerg Roedel 				translation = CONTEXT_TT_MULTI_LEVEL;
2035672cf6dfSJoerg Roedel 
2036672cf6dfSJoerg Roedel 			context_set_address_root(context, virt_to_phys(pgd));
2037672cf6dfSJoerg Roedel 			context_set_address_width(context, agaw);
2038672cf6dfSJoerg Roedel 		} else {
2039672cf6dfSJoerg Roedel 			/*
2040672cf6dfSJoerg Roedel 			 * In pass through mode, AW must be programmed to
2041672cf6dfSJoerg Roedel 			 * indicate the largest AGAW value supported by
2042672cf6dfSJoerg Roedel 			 * hardware. And ASR is ignored by hardware.
2043672cf6dfSJoerg Roedel 			 */
2044672cf6dfSJoerg Roedel 			context_set_address_width(context, iommu->msagaw);
2045672cf6dfSJoerg Roedel 		}
2046672cf6dfSJoerg Roedel 
2047672cf6dfSJoerg Roedel 		context_set_translation_type(context, translation);
2048672cf6dfSJoerg Roedel 	}
2049672cf6dfSJoerg Roedel 
2050672cf6dfSJoerg Roedel 	context_set_fault_enable(context);
2051672cf6dfSJoerg Roedel 	context_set_present(context);
205204c00956SLu Baolu 	if (!ecap_coherent(iommu->ecap))
205304c00956SLu Baolu 		clflush_cache_range(context, sizeof(*context));
2054672cf6dfSJoerg Roedel 
2055672cf6dfSJoerg Roedel 	/*
2056672cf6dfSJoerg Roedel 	 * It's a non-present to present mapping. If hardware doesn't cache
2057672cf6dfSJoerg Roedel 	 * non-present entry we only need to flush the write-buffer. If the
2058672cf6dfSJoerg Roedel 	 * _does_ cache non-present entries, then it does so in the special
2059672cf6dfSJoerg Roedel 	 * domain #0, which we have to flush:
2060672cf6dfSJoerg Roedel 	 */
2061672cf6dfSJoerg Roedel 	if (cap_caching_mode(iommu->cap)) {
2062672cf6dfSJoerg Roedel 		iommu->flush.flush_context(iommu, 0,
2063672cf6dfSJoerg Roedel 					   (((u16)bus) << 8) | devfn,
2064672cf6dfSJoerg Roedel 					   DMA_CCMD_MASK_NOBIT,
2065672cf6dfSJoerg Roedel 					   DMA_CCMD_DEVICE_INVL);
2066672cf6dfSJoerg Roedel 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2067672cf6dfSJoerg Roedel 	} else {
2068672cf6dfSJoerg Roedel 		iommu_flush_write_buffer(iommu);
2069672cf6dfSJoerg Roedel 	}
2070672cf6dfSJoerg Roedel 
2071672cf6dfSJoerg Roedel 	ret = 0;
2072672cf6dfSJoerg Roedel 
2073672cf6dfSJoerg Roedel out_unlock:
2074672cf6dfSJoerg Roedel 	spin_unlock(&iommu->lock);
2075672cf6dfSJoerg Roedel 
2076672cf6dfSJoerg Roedel 	return ret;
2077672cf6dfSJoerg Roedel }
2078672cf6dfSJoerg Roedel 
2079672cf6dfSJoerg Roedel struct domain_context_mapping_data {
2080672cf6dfSJoerg Roedel 	struct dmar_domain *domain;
2081672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
2082672cf6dfSJoerg Roedel 	struct pasid_table *table;
2083672cf6dfSJoerg Roedel };
2084672cf6dfSJoerg Roedel 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)2085672cf6dfSJoerg Roedel static int domain_context_mapping_cb(struct pci_dev *pdev,
2086672cf6dfSJoerg Roedel 				     u16 alias, void *opaque)
2087672cf6dfSJoerg Roedel {
2088672cf6dfSJoerg Roedel 	struct domain_context_mapping_data *data = opaque;
2089672cf6dfSJoerg Roedel 
2090672cf6dfSJoerg Roedel 	return domain_context_mapping_one(data->domain, data->iommu,
2091672cf6dfSJoerg Roedel 					  data->table, PCI_BUS_NUM(alias),
2092672cf6dfSJoerg Roedel 					  alias & 0xff);
2093672cf6dfSJoerg Roedel }
2094672cf6dfSJoerg Roedel 
2095672cf6dfSJoerg Roedel static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)2096672cf6dfSJoerg Roedel domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2097672cf6dfSJoerg Roedel {
2098672cf6dfSJoerg Roedel 	struct domain_context_mapping_data data;
2099672cf6dfSJoerg Roedel 	struct pasid_table *table;
2100672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
2101672cf6dfSJoerg Roedel 	u8 bus, devfn;
2102672cf6dfSJoerg Roedel 
2103672cf6dfSJoerg Roedel 	iommu = device_to_iommu(dev, &bus, &devfn);
2104672cf6dfSJoerg Roedel 	if (!iommu)
2105672cf6dfSJoerg Roedel 		return -ENODEV;
2106672cf6dfSJoerg Roedel 
2107672cf6dfSJoerg Roedel 	table = intel_pasid_get_table(dev);
2108672cf6dfSJoerg Roedel 
2109672cf6dfSJoerg Roedel 	if (!dev_is_pci(dev))
2110672cf6dfSJoerg Roedel 		return domain_context_mapping_one(domain, iommu, table,
2111672cf6dfSJoerg Roedel 						  bus, devfn);
2112672cf6dfSJoerg Roedel 
2113672cf6dfSJoerg Roedel 	data.domain = domain;
2114672cf6dfSJoerg Roedel 	data.iommu = iommu;
2115672cf6dfSJoerg Roedel 	data.table = table;
2116672cf6dfSJoerg Roedel 
2117672cf6dfSJoerg Roedel 	return pci_for_each_dma_alias(to_pci_dev(dev),
2118672cf6dfSJoerg Roedel 				      &domain_context_mapping_cb, &data);
2119672cf6dfSJoerg Roedel }
2120672cf6dfSJoerg Roedel 
2121672cf6dfSJoerg Roedel /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)2122672cf6dfSJoerg Roedel static inline unsigned long aligned_nrpages(unsigned long host_addr,
2123672cf6dfSJoerg Roedel 					    size_t size)
2124672cf6dfSJoerg Roedel {
2125672cf6dfSJoerg Roedel 	host_addr &= ~PAGE_MASK;
2126672cf6dfSJoerg Roedel 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2127672cf6dfSJoerg Roedel }
2128672cf6dfSJoerg Roedel 
2129672cf6dfSJoerg Roedel /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)2130672cf6dfSJoerg Roedel static inline int hardware_largepage_caps(struct dmar_domain *domain,
2131672cf6dfSJoerg Roedel 					  unsigned long iov_pfn,
2132672cf6dfSJoerg Roedel 					  unsigned long phy_pfn,
2133672cf6dfSJoerg Roedel 					  unsigned long pages)
2134672cf6dfSJoerg Roedel {
2135672cf6dfSJoerg Roedel 	int support, level = 1;
2136672cf6dfSJoerg Roedel 	unsigned long pfnmerge;
2137672cf6dfSJoerg Roedel 
2138672cf6dfSJoerg Roedel 	support = domain->iommu_superpage;
2139672cf6dfSJoerg Roedel 
2140672cf6dfSJoerg Roedel 	/* To use a large page, the virtual *and* physical addresses
2141672cf6dfSJoerg Roedel 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2142672cf6dfSJoerg Roedel 	   of them will mean we have to use smaller pages. So just
2143672cf6dfSJoerg Roedel 	   merge them and check both at once. */
2144672cf6dfSJoerg Roedel 	pfnmerge = iov_pfn | phy_pfn;
2145672cf6dfSJoerg Roedel 
2146672cf6dfSJoerg Roedel 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2147672cf6dfSJoerg Roedel 		pages >>= VTD_STRIDE_SHIFT;
2148672cf6dfSJoerg Roedel 		if (!pages)
2149672cf6dfSJoerg Roedel 			break;
2150672cf6dfSJoerg Roedel 		pfnmerge >>= VTD_STRIDE_SHIFT;
2151672cf6dfSJoerg Roedel 		level++;
2152672cf6dfSJoerg Roedel 		support--;
2153672cf6dfSJoerg Roedel 	}
2154672cf6dfSJoerg Roedel 	return level;
2155672cf6dfSJoerg Roedel }
2156672cf6dfSJoerg Roedel 
215738c527aeSLongpeng(Mike) /*
215838c527aeSLongpeng(Mike)  * Ensure that old small page tables are removed to make room for superpage(s).
215938c527aeSLongpeng(Mike)  * We're going to add new large pages, so make sure we don't remove their parent
216038c527aeSLongpeng(Mike)  * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
216138c527aeSLongpeng(Mike)  */
switch_to_super_page(struct dmar_domain * domain,unsigned long start_pfn,unsigned long end_pfn,int level)216238c527aeSLongpeng(Mike) static void switch_to_super_page(struct dmar_domain *domain,
216338c527aeSLongpeng(Mike) 				 unsigned long start_pfn,
216438c527aeSLongpeng(Mike) 				 unsigned long end_pfn, int level)
216538c527aeSLongpeng(Mike) {
216638c527aeSLongpeng(Mike) 	unsigned long lvl_pages = lvl_to_nr_pages(level);
2167ba949f4cSLu Baolu 	struct iommu_domain_info *info;
216838c527aeSLongpeng(Mike) 	struct dma_pte *pte = NULL;
2169ba949f4cSLu Baolu 	unsigned long i;
217038c527aeSLongpeng(Mike) 
217138c527aeSLongpeng(Mike) 	while (start_pfn <= end_pfn) {
217238c527aeSLongpeng(Mike) 		if (!pte)
21732d4d7676SJason Gunthorpe 			pte = pfn_to_dma_pte(domain, start_pfn, &level,
21742d4d7676SJason Gunthorpe 					     GFP_ATOMIC);
217538c527aeSLongpeng(Mike) 
217638c527aeSLongpeng(Mike) 		if (dma_pte_present(pte)) {
217738c527aeSLongpeng(Mike) 			dma_pte_free_pagetable(domain, start_pfn,
217838c527aeSLongpeng(Mike) 					       start_pfn + lvl_pages - 1,
217938c527aeSLongpeng(Mike) 					       level + 1);
218038c527aeSLongpeng(Mike) 
2181ba949f4cSLu Baolu 			xa_for_each(&domain->iommu_array, i, info)
2182ba949f4cSLu Baolu 				iommu_flush_iotlb_psi(info->iommu, domain,
218338c527aeSLongpeng(Mike) 						      start_pfn, lvl_pages,
218438c527aeSLongpeng(Mike) 						      0, 0);
218538c527aeSLongpeng(Mike) 		}
218638c527aeSLongpeng(Mike) 
218738c527aeSLongpeng(Mike) 		pte++;
218838c527aeSLongpeng(Mike) 		start_pfn += lvl_pages;
218938c527aeSLongpeng(Mike) 		if (first_pte_in_page(pte))
219038c527aeSLongpeng(Mike) 			pte = NULL;
219138c527aeSLongpeng(Mike) 	}
219238c527aeSLongpeng(Mike) }
219338c527aeSLongpeng(Mike) 
219458a8bb39SLu Baolu static int
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot,gfp_t gfp)219558a8bb39SLu Baolu __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
21962d4d7676SJason Gunthorpe 		 unsigned long phys_pfn, unsigned long nr_pages, int prot,
21972d4d7676SJason Gunthorpe 		 gfp_t gfp)
2198672cf6dfSJoerg Roedel {
219975cc1018SLu Baolu 	struct dma_pte *first_pte = NULL, *pte = NULL;
2200672cf6dfSJoerg Roedel 	unsigned int largepage_lvl = 0;
2201672cf6dfSJoerg Roedel 	unsigned long lvl_pages = 0;
220258a8bb39SLu Baolu 	phys_addr_t pteval;
2203672cf6dfSJoerg Roedel 	u64 attr;
2204672cf6dfSJoerg Roedel 
2205cbf2f9e8STina Zhang 	if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)))
2206cbf2f9e8STina Zhang 		return -EINVAL;
2207672cf6dfSJoerg Roedel 
2208672cf6dfSJoerg Roedel 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2209672cf6dfSJoerg Roedel 		return -EINVAL;
2210672cf6dfSJoerg Roedel 
2211672cf6dfSJoerg Roedel 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2212eea53c58SLu Baolu 	attr |= DMA_FL_PTE_PRESENT;
2213e5b0feb4SLu Baolu 	if (domain->use_first_level) {
2214289b3b00SLu Baolu 		attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2215a8ce9ebbSLu Baolu 		if (prot & DMA_PTE_WRITE)
2216a8ce9ebbSLu Baolu 			attr |= DMA_FL_PTE_DIRTY;
2217a8ce9ebbSLu Baolu 	}
2218a8ce9ebbSLu Baolu 
22199cdfbfc6SLu Baolu 	domain->has_mappings = true;
22209cdfbfc6SLu Baolu 
2221672cf6dfSJoerg Roedel 	pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2222672cf6dfSJoerg Roedel 
2223672cf6dfSJoerg Roedel 	while (nr_pages > 0) {
2224672cf6dfSJoerg Roedel 		uint64_t tmp;
2225672cf6dfSJoerg Roedel 
2226672cf6dfSJoerg Roedel 		if (!pte) {
222758a8bb39SLu Baolu 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
222858a8bb39SLu Baolu 					phys_pfn, nr_pages);
2229672cf6dfSJoerg Roedel 
22302d4d7676SJason Gunthorpe 			pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
22312d4d7676SJason Gunthorpe 					     gfp);
2232672cf6dfSJoerg Roedel 			if (!pte)
2233672cf6dfSJoerg Roedel 				return -ENOMEM;
223475cc1018SLu Baolu 			first_pte = pte;
223575cc1018SLu Baolu 
22369906b935SLongpeng(Mike) 			lvl_pages = lvl_to_nr_pages(largepage_lvl);
22379906b935SLongpeng(Mike) 
2238672cf6dfSJoerg Roedel 			/* It is large page*/
2239672cf6dfSJoerg Roedel 			if (largepage_lvl > 1) {
224038c527aeSLongpeng(Mike) 				unsigned long end_pfn;
22419906b935SLongpeng(Mike) 				unsigned long pages_to_remove;
2242672cf6dfSJoerg Roedel 
2243672cf6dfSJoerg Roedel 				pteval |= DMA_PTE_LARGE_PAGE;
22449906b935SLongpeng(Mike) 				pages_to_remove = min_t(unsigned long, nr_pages,
22459906b935SLongpeng(Mike) 							nr_pte_to_next_page(pte) * lvl_pages);
22469906b935SLongpeng(Mike) 				end_pfn = iov_pfn + pages_to_remove - 1;
224738c527aeSLongpeng(Mike) 				switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2248672cf6dfSJoerg Roedel 			} else {
2249672cf6dfSJoerg Roedel 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2250672cf6dfSJoerg Roedel 			}
2251672cf6dfSJoerg Roedel 
2252672cf6dfSJoerg Roedel 		}
2253672cf6dfSJoerg Roedel 		/* We don't need lock here, nobody else
2254672cf6dfSJoerg Roedel 		 * touches the iova range
2255672cf6dfSJoerg Roedel 		 */
2256672cf6dfSJoerg Roedel 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2257672cf6dfSJoerg Roedel 		if (tmp) {
2258672cf6dfSJoerg Roedel 			static int dumps = 5;
2259672cf6dfSJoerg Roedel 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2260672cf6dfSJoerg Roedel 				iov_pfn, tmp, (unsigned long long)pteval);
2261672cf6dfSJoerg Roedel 			if (dumps) {
2262672cf6dfSJoerg Roedel 				dumps--;
2263672cf6dfSJoerg Roedel 				debug_dma_dump_mappings(NULL);
2264672cf6dfSJoerg Roedel 			}
2265672cf6dfSJoerg Roedel 			WARN_ON(1);
2266672cf6dfSJoerg Roedel 		}
2267672cf6dfSJoerg Roedel 
2268672cf6dfSJoerg Roedel 		nr_pages -= lvl_pages;
2269672cf6dfSJoerg Roedel 		iov_pfn += lvl_pages;
2270672cf6dfSJoerg Roedel 		phys_pfn += lvl_pages;
2271672cf6dfSJoerg Roedel 		pteval += lvl_pages * VTD_PAGE_SIZE;
2272672cf6dfSJoerg Roedel 
2273672cf6dfSJoerg Roedel 		/* If the next PTE would be the first in a new page, then we
227458a8bb39SLu Baolu 		 * need to flush the cache on the entries we've just written.
227558a8bb39SLu Baolu 		 * And then we'll need to recalculate 'pte', so clear it and
227658a8bb39SLu Baolu 		 * let it get set again in the if (!pte) block above.
227758a8bb39SLu Baolu 		 *
227858a8bb39SLu Baolu 		 * If we're done (!nr_pages) we need to flush the cache too.
227958a8bb39SLu Baolu 		 *
228058a8bb39SLu Baolu 		 * Also if we've been setting superpages, we may need to
228158a8bb39SLu Baolu 		 * recalculate 'pte' and switch back to smaller pages for the
228258a8bb39SLu Baolu 		 * end of the mapping, if the trailing size is not enough to
228358a8bb39SLu Baolu 		 * use another superpage (i.e. nr_pages < lvl_pages).
228458a8bb39SLu Baolu 		 */
2285672cf6dfSJoerg Roedel 		pte++;
2286672cf6dfSJoerg Roedel 		if (!nr_pages || first_pte_in_page(pte) ||
228775cc1018SLu Baolu 		    (largepage_lvl > 1 && nr_pages < lvl_pages)) {
228875cc1018SLu Baolu 			domain_flush_cache(domain, first_pte,
228975cc1018SLu Baolu 					   (void *)pte - (void *)first_pte);
2290672cf6dfSJoerg Roedel 			pte = NULL;
2291672cf6dfSJoerg Roedel 		}
229275cc1018SLu Baolu 	}
2293672cf6dfSJoerg Roedel 
2294672cf6dfSJoerg Roedel 	return 0;
2295672cf6dfSJoerg Roedel }
2296672cf6dfSJoerg Roedel 
domain_context_clear_one(struct device_domain_info * info,u8 bus,u8 devfn)229737764b95SSanjay Kumar static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2298672cf6dfSJoerg Roedel {
229937764b95SSanjay Kumar 	struct intel_iommu *iommu = info->iommu;
2300672cf6dfSJoerg Roedel 	struct context_entry *context;
2301672cf6dfSJoerg Roedel 	u16 did_old;
2302672cf6dfSJoerg Roedel 
2303672cf6dfSJoerg Roedel 	if (!iommu)
2304672cf6dfSJoerg Roedel 		return;
2305672cf6dfSJoerg Roedel 
2306ffd5869dSLu Baolu 	spin_lock(&iommu->lock);
2307672cf6dfSJoerg Roedel 	context = iommu_context_addr(iommu, bus, devfn, 0);
2308672cf6dfSJoerg Roedel 	if (!context) {
2309ffd5869dSLu Baolu 		spin_unlock(&iommu->lock);
2310672cf6dfSJoerg Roedel 		return;
2311672cf6dfSJoerg Roedel 	}
231237764b95SSanjay Kumar 
231337764b95SSanjay Kumar 	if (sm_supported(iommu)) {
231437764b95SSanjay Kumar 		if (hw_pass_through && domain_type_is_si(info->domain))
231537764b95SSanjay Kumar 			did_old = FLPT_DEFAULT_DID;
231637764b95SSanjay Kumar 		else
2317ba949f4cSLu Baolu 			did_old = domain_id_iommu(info->domain, iommu);
231837764b95SSanjay Kumar 	} else {
2319672cf6dfSJoerg Roedel 		did_old = context_domain_id(context);
232037764b95SSanjay Kumar 	}
232137764b95SSanjay Kumar 
2322672cf6dfSJoerg Roedel 	context_clear_entry(context);
2323672cf6dfSJoerg Roedel 	__iommu_flush_cache(iommu, context, sizeof(*context));
2324ffd5869dSLu Baolu 	spin_unlock(&iommu->lock);
2325672cf6dfSJoerg Roedel 	iommu->flush.flush_context(iommu,
2326672cf6dfSJoerg Roedel 				   did_old,
2327672cf6dfSJoerg Roedel 				   (((u16)bus) << 8) | devfn,
2328672cf6dfSJoerg Roedel 				   DMA_CCMD_MASK_NOBIT,
2329672cf6dfSJoerg Roedel 				   DMA_CCMD_DEVICE_INVL);
2330c0474a60SLu Baolu 
2331c0474a60SLu Baolu 	if (sm_supported(iommu))
2332c0474a60SLu Baolu 		qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2333c0474a60SLu Baolu 
2334672cf6dfSJoerg Roedel 	iommu->flush.flush_iotlb(iommu,
2335672cf6dfSJoerg Roedel 				 did_old,
2336672cf6dfSJoerg Roedel 				 0,
2337672cf6dfSJoerg Roedel 				 0,
2338672cf6dfSJoerg Roedel 				 DMA_TLB_DSI_FLUSH);
233937764b95SSanjay Kumar 
234037764b95SSanjay Kumar 	__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2341672cf6dfSJoerg Roedel }
2342672cf6dfSJoerg Roedel 
domain_setup_first_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid)2343672cf6dfSJoerg Roedel static int domain_setup_first_level(struct intel_iommu *iommu,
2344672cf6dfSJoerg Roedel 				    struct dmar_domain *domain,
2345672cf6dfSJoerg Roedel 				    struct device *dev,
2346c7b6bac9SFenghua Yu 				    u32 pasid)
2347672cf6dfSJoerg Roedel {
2348672cf6dfSJoerg Roedel 	struct dma_pte *pgd = domain->pgd;
2349672cf6dfSJoerg Roedel 	int agaw, level;
235054c80d90SLu Baolu 	int flags = 0;
2351672cf6dfSJoerg Roedel 
2352672cf6dfSJoerg Roedel 	/*
2353672cf6dfSJoerg Roedel 	 * Skip top levels of page tables for iommu which has
2354672cf6dfSJoerg Roedel 	 * less agaw than default. Unnecessary for PT mode.
2355672cf6dfSJoerg Roedel 	 */
2356672cf6dfSJoerg Roedel 	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2357672cf6dfSJoerg Roedel 		pgd = phys_to_virt(dma_pte_addr(pgd));
2358672cf6dfSJoerg Roedel 		if (!dma_pte_present(pgd))
2359672cf6dfSJoerg Roedel 			return -ENOMEM;
2360672cf6dfSJoerg Roedel 	}
2361672cf6dfSJoerg Roedel 
2362672cf6dfSJoerg Roedel 	level = agaw_to_level(agaw);
2363672cf6dfSJoerg Roedel 	if (level != 4 && level != 5)
2364672cf6dfSJoerg Roedel 		return -EINVAL;
2365672cf6dfSJoerg Roedel 
236654c80d90SLu Baolu 	if (level == 5)
236754c80d90SLu Baolu 		flags |= PASID_FLAG_FL5LP;
2368672cf6dfSJoerg Roedel 
2369fc0051cbSLu Baolu 	if (domain->force_snooping)
23706c00612dSLu Baolu 		flags |= PASID_FLAG_PAGE_SNOOP;
23716c00612dSLu Baolu 
2372672cf6dfSJoerg Roedel 	return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2373ba949f4cSLu Baolu 					     domain_id_iommu(domain, iommu),
2374672cf6dfSJoerg Roedel 					     flags);
2375672cf6dfSJoerg Roedel }
2376672cf6dfSJoerg Roedel 
dev_is_real_dma_subdevice(struct device * dev)2377672cf6dfSJoerg Roedel static bool dev_is_real_dma_subdevice(struct device *dev)
2378672cf6dfSJoerg Roedel {
2379672cf6dfSJoerg Roedel 	return dev && dev_is_pci(dev) &&
2380672cf6dfSJoerg Roedel 	       pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2381672cf6dfSJoerg Roedel }
2382672cf6dfSJoerg Roedel 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long first_vpfn,unsigned long last_vpfn)2383672cf6dfSJoerg Roedel static int iommu_domain_identity_map(struct dmar_domain *domain,
2384672cf6dfSJoerg Roedel 				     unsigned long first_vpfn,
2385672cf6dfSJoerg Roedel 				     unsigned long last_vpfn)
2386672cf6dfSJoerg Roedel {
2387672cf6dfSJoerg Roedel 	/*
2388672cf6dfSJoerg Roedel 	 * RMRR range might have overlap with physical memory range,
2389672cf6dfSJoerg Roedel 	 * clear it first
2390672cf6dfSJoerg Roedel 	 */
2391672cf6dfSJoerg Roedel 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2392672cf6dfSJoerg Roedel 
239358a8bb39SLu Baolu 	return __domain_mapping(domain, first_vpfn,
2394672cf6dfSJoerg Roedel 				first_vpfn, last_vpfn - first_vpfn + 1,
23954951eb26SJason Gunthorpe 				DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
2396672cf6dfSJoerg Roedel }
2397672cf6dfSJoerg Roedel 
2398672cf6dfSJoerg Roedel static int md_domain_init(struct dmar_domain *domain, int guest_width);
2399672cf6dfSJoerg Roedel 
si_domain_init(int hw)2400672cf6dfSJoerg Roedel static int __init si_domain_init(int hw)
2401672cf6dfSJoerg Roedel {
2402672cf6dfSJoerg Roedel 	struct dmar_rmrr_unit *rmrr;
2403672cf6dfSJoerg Roedel 	struct device *dev;
2404672cf6dfSJoerg Roedel 	int i, nid, ret;
2405672cf6dfSJoerg Roedel 
2406b34380a6SLu Baolu 	si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2407672cf6dfSJoerg Roedel 	if (!si_domain)
2408672cf6dfSJoerg Roedel 		return -EFAULT;
2409672cf6dfSJoerg Roedel 
2410672cf6dfSJoerg Roedel 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2411672cf6dfSJoerg Roedel 		domain_exit(si_domain);
2412620bf9f9SJerry Snitselaar 		si_domain = NULL;
2413672cf6dfSJoerg Roedel 		return -EFAULT;
2414672cf6dfSJoerg Roedel 	}
2415672cf6dfSJoerg Roedel 
2416672cf6dfSJoerg Roedel 	if (hw)
2417672cf6dfSJoerg Roedel 		return 0;
2418672cf6dfSJoerg Roedel 
2419672cf6dfSJoerg Roedel 	for_each_online_node(nid) {
2420672cf6dfSJoerg Roedel 		unsigned long start_pfn, end_pfn;
2421672cf6dfSJoerg Roedel 		int i;
2422672cf6dfSJoerg Roedel 
2423672cf6dfSJoerg Roedel 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2424672cf6dfSJoerg Roedel 			ret = iommu_domain_identity_map(si_domain,
2425fb5f50a4SYanfei Xu 					mm_to_dma_pfn_start(start_pfn),
2426ddeff5d6SJon Pan-Doh 					mm_to_dma_pfn_end(end_pfn-1));
2427672cf6dfSJoerg Roedel 			if (ret)
2428672cf6dfSJoerg Roedel 				return ret;
2429672cf6dfSJoerg Roedel 		}
2430672cf6dfSJoerg Roedel 	}
2431672cf6dfSJoerg Roedel 
2432672cf6dfSJoerg Roedel 	/*
2433672cf6dfSJoerg Roedel 	 * Identity map the RMRRs so that devices with RMRRs could also use
2434672cf6dfSJoerg Roedel 	 * the si_domain.
2435672cf6dfSJoerg Roedel 	 */
2436672cf6dfSJoerg Roedel 	for_each_rmrr_units(rmrr) {
2437672cf6dfSJoerg Roedel 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2438672cf6dfSJoerg Roedel 					  i, dev) {
2439672cf6dfSJoerg Roedel 			unsigned long long start = rmrr->base_address;
2440672cf6dfSJoerg Roedel 			unsigned long long end = rmrr->end_address;
2441672cf6dfSJoerg Roedel 
2442672cf6dfSJoerg Roedel 			if (WARN_ON(end < start ||
2443672cf6dfSJoerg Roedel 				    end >> agaw_to_width(si_domain->agaw)))
2444672cf6dfSJoerg Roedel 				continue;
2445672cf6dfSJoerg Roedel 
244648f0bcfbSLu Baolu 			ret = iommu_domain_identity_map(si_domain,
2447fb5f50a4SYanfei Xu 					mm_to_dma_pfn_start(start >> PAGE_SHIFT),
2448fb5f50a4SYanfei Xu 					mm_to_dma_pfn_end(end >> PAGE_SHIFT));
2449672cf6dfSJoerg Roedel 			if (ret)
2450672cf6dfSJoerg Roedel 				return ret;
2451672cf6dfSJoerg Roedel 		}
2452672cf6dfSJoerg Roedel 	}
2453672cf6dfSJoerg Roedel 
2454672cf6dfSJoerg Roedel 	return 0;
2455672cf6dfSJoerg Roedel }
2456672cf6dfSJoerg Roedel 
dmar_domain_attach_device(struct dmar_domain * domain,struct device * dev)2457a8204479SLu Baolu static int dmar_domain_attach_device(struct dmar_domain *domain,
2458a8204479SLu Baolu 				     struct device *dev)
2459672cf6dfSJoerg Roedel {
2460bac4e778SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2461672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
2462a349ffcbSLu Baolu 	unsigned long flags;
2463672cf6dfSJoerg Roedel 	u8 bus, devfn;
2464bac4e778SLu Baolu 	int ret;
2465672cf6dfSJoerg Roedel 
2466672cf6dfSJoerg Roedel 	iommu = device_to_iommu(dev, &bus, &devfn);
2467672cf6dfSJoerg Roedel 	if (!iommu)
2468672cf6dfSJoerg Roedel 		return -ENODEV;
2469672cf6dfSJoerg Roedel 
2470bac4e778SLu Baolu 	ret = domain_attach_iommu(domain, iommu);
2471969aaefbSLu Baolu 	if (ret)
2472bac4e778SLu Baolu 		return ret;
2473969aaefbSLu Baolu 	info->domain = domain;
2474a349ffcbSLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
2475bac4e778SLu Baolu 	list_add(&info->link, &domain->devices);
2476a349ffcbSLu Baolu 	spin_unlock_irqrestore(&domain->lock, flags);
2477bac4e778SLu Baolu 
2478bac4e778SLu Baolu 	/* PASID table is mandatory for a PCI device in scalable mode. */
2479bac4e778SLu Baolu 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
2480bac4e778SLu Baolu 		/* Setup the PASID entry for requests without PASID: */
2481bac4e778SLu Baolu 		if (hw_pass_through && domain_type_is_si(domain))
2482bac4e778SLu Baolu 			ret = intel_pasid_setup_pass_through(iommu, domain,
248342987801SJacob Pan 					dev, IOMMU_NO_PASID);
2484e5b0feb4SLu Baolu 		else if (domain->use_first_level)
2485bac4e778SLu Baolu 			ret = domain_setup_first_level(iommu, domain, dev,
248642987801SJacob Pan 					IOMMU_NO_PASID);
2487bac4e778SLu Baolu 		else
2488bac4e778SLu Baolu 			ret = intel_pasid_setup_second_level(iommu, domain,
248942987801SJacob Pan 					dev, IOMMU_NO_PASID);
2490bac4e778SLu Baolu 		if (ret) {
2491bac4e778SLu Baolu 			dev_err(dev, "Setup RID2PASID failed\n");
2492c7be17c2SLu Baolu 			device_block_translation(dev);
2493bac4e778SLu Baolu 			return ret;
2494bac4e778SLu Baolu 		}
2495bac4e778SLu Baolu 	}
2496bac4e778SLu Baolu 
2497bac4e778SLu Baolu 	ret = domain_context_mapping(domain, dev);
2498bac4e778SLu Baolu 	if (ret) {
2499bac4e778SLu Baolu 		dev_err(dev, "Domain context map failed\n");
2500c7be17c2SLu Baolu 		device_block_translation(dev);
2501bac4e778SLu Baolu 		return ret;
2502bac4e778SLu Baolu 	}
2503672cf6dfSJoerg Roedel 
2504c0b0cfd9SLu Baolu 	if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
2505c7be17c2SLu Baolu 		iommu_enable_pci_caps(info);
2506c7be17c2SLu Baolu 
2507672cf6dfSJoerg Roedel 	return 0;
2508672cf6dfSJoerg Roedel }
2509672cf6dfSJoerg Roedel 
2510672cf6dfSJoerg Roedel /**
2511672cf6dfSJoerg Roedel  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2512672cf6dfSJoerg Roedel  * is relaxable (ie. is allowed to be not enforced under some conditions)
2513672cf6dfSJoerg Roedel  * @dev: device handle
2514672cf6dfSJoerg Roedel  *
2515672cf6dfSJoerg Roedel  * We assume that PCI USB devices with RMRRs have them largely
2516672cf6dfSJoerg Roedel  * for historical reasons and that the RMRR space is not actively used post
2517672cf6dfSJoerg Roedel  * boot.  This exclusion may change if vendors begin to abuse it.
2518672cf6dfSJoerg Roedel  *
2519672cf6dfSJoerg Roedel  * The same exception is made for graphics devices, with the requirement that
2520672cf6dfSJoerg Roedel  * any use of the RMRR regions will be torn down before assigning the device
2521672cf6dfSJoerg Roedel  * to a guest.
2522672cf6dfSJoerg Roedel  *
2523672cf6dfSJoerg Roedel  * Return: true if the RMRR is relaxable, false otherwise
2524672cf6dfSJoerg Roedel  */
device_rmrr_is_relaxable(struct device * dev)2525672cf6dfSJoerg Roedel static bool device_rmrr_is_relaxable(struct device *dev)
2526672cf6dfSJoerg Roedel {
2527672cf6dfSJoerg Roedel 	struct pci_dev *pdev;
2528672cf6dfSJoerg Roedel 
2529672cf6dfSJoerg Roedel 	if (!dev_is_pci(dev))
2530672cf6dfSJoerg Roedel 		return false;
2531672cf6dfSJoerg Roedel 
2532672cf6dfSJoerg Roedel 	pdev = to_pci_dev(dev);
2533672cf6dfSJoerg Roedel 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2534672cf6dfSJoerg Roedel 		return true;
2535672cf6dfSJoerg Roedel 	else
2536672cf6dfSJoerg Roedel 		return false;
2537672cf6dfSJoerg Roedel }
2538672cf6dfSJoerg Roedel 
2539672cf6dfSJoerg Roedel /*
2540672cf6dfSJoerg Roedel  * Return the required default domain type for a specific device.
2541672cf6dfSJoerg Roedel  *
2542672cf6dfSJoerg Roedel  * @dev: the device in query
2543672cf6dfSJoerg Roedel  * @startup: true if this is during early boot
2544672cf6dfSJoerg Roedel  *
2545672cf6dfSJoerg Roedel  * Returns:
2546672cf6dfSJoerg Roedel  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2547672cf6dfSJoerg Roedel  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2548672cf6dfSJoerg Roedel  *  - 0: both identity and dynamic domains work for this device
2549672cf6dfSJoerg Roedel  */
device_def_domain_type(struct device * dev)2550672cf6dfSJoerg Roedel static int device_def_domain_type(struct device *dev)
2551672cf6dfSJoerg Roedel {
2552672cf6dfSJoerg Roedel 	if (dev_is_pci(dev)) {
2553672cf6dfSJoerg Roedel 		struct pci_dev *pdev = to_pci_dev(dev);
2554672cf6dfSJoerg Roedel 
2555672cf6dfSJoerg Roedel 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2556672cf6dfSJoerg Roedel 			return IOMMU_DOMAIN_IDENTITY;
2557672cf6dfSJoerg Roedel 
2558672cf6dfSJoerg Roedel 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2559672cf6dfSJoerg Roedel 			return IOMMU_DOMAIN_IDENTITY;
2560672cf6dfSJoerg Roedel 	}
2561672cf6dfSJoerg Roedel 
2562672cf6dfSJoerg Roedel 	return 0;
2563672cf6dfSJoerg Roedel }
2564672cf6dfSJoerg Roedel 
intel_iommu_init_qi(struct intel_iommu * iommu)2565672cf6dfSJoerg Roedel static void intel_iommu_init_qi(struct intel_iommu *iommu)
2566672cf6dfSJoerg Roedel {
2567672cf6dfSJoerg Roedel 	/*
2568672cf6dfSJoerg Roedel 	 * Start from the sane iommu hardware state.
2569672cf6dfSJoerg Roedel 	 * If the queued invalidation is already initialized by us
2570672cf6dfSJoerg Roedel 	 * (for example, while enabling interrupt-remapping) then
2571672cf6dfSJoerg Roedel 	 * we got the things already rolling from a sane state.
2572672cf6dfSJoerg Roedel 	 */
2573672cf6dfSJoerg Roedel 	if (!iommu->qi) {
2574672cf6dfSJoerg Roedel 		/*
2575672cf6dfSJoerg Roedel 		 * Clear any previous faults.
2576672cf6dfSJoerg Roedel 		 */
2577672cf6dfSJoerg Roedel 		dmar_fault(-1, iommu);
2578672cf6dfSJoerg Roedel 		/*
2579672cf6dfSJoerg Roedel 		 * Disable queued invalidation if supported and already enabled
2580672cf6dfSJoerg Roedel 		 * before OS handover.
2581672cf6dfSJoerg Roedel 		 */
2582672cf6dfSJoerg Roedel 		dmar_disable_qi(iommu);
2583672cf6dfSJoerg Roedel 	}
2584672cf6dfSJoerg Roedel 
2585672cf6dfSJoerg Roedel 	if (dmar_enable_qi(iommu)) {
2586672cf6dfSJoerg Roedel 		/*
2587672cf6dfSJoerg Roedel 		 * Queued Invalidate not enabled, use Register Based Invalidate
2588672cf6dfSJoerg Roedel 		 */
2589672cf6dfSJoerg Roedel 		iommu->flush.flush_context = __iommu_flush_context;
2590672cf6dfSJoerg Roedel 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2591672cf6dfSJoerg Roedel 		pr_info("%s: Using Register based invalidation\n",
2592672cf6dfSJoerg Roedel 			iommu->name);
2593672cf6dfSJoerg Roedel 	} else {
2594672cf6dfSJoerg Roedel 		iommu->flush.flush_context = qi_flush_context;
2595672cf6dfSJoerg Roedel 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2596672cf6dfSJoerg Roedel 		pr_info("%s: Using Queued invalidation\n", iommu->name);
2597672cf6dfSJoerg Roedel 	}
2598672cf6dfSJoerg Roedel }
2599672cf6dfSJoerg Roedel 
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)2600672cf6dfSJoerg Roedel static int copy_context_table(struct intel_iommu *iommu,
2601672cf6dfSJoerg Roedel 			      struct root_entry *old_re,
2602672cf6dfSJoerg Roedel 			      struct context_entry **tbl,
2603672cf6dfSJoerg Roedel 			      int bus, bool ext)
2604672cf6dfSJoerg Roedel {
2605672cf6dfSJoerg Roedel 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2606672cf6dfSJoerg Roedel 	struct context_entry *new_ce = NULL, ce;
2607672cf6dfSJoerg Roedel 	struct context_entry *old_ce = NULL;
2608672cf6dfSJoerg Roedel 	struct root_entry re;
2609672cf6dfSJoerg Roedel 	phys_addr_t old_ce_phys;
2610672cf6dfSJoerg Roedel 
2611672cf6dfSJoerg Roedel 	tbl_idx = ext ? bus * 2 : bus;
2612672cf6dfSJoerg Roedel 	memcpy(&re, old_re, sizeof(re));
2613672cf6dfSJoerg Roedel 
2614672cf6dfSJoerg Roedel 	for (devfn = 0; devfn < 256; devfn++) {
2615672cf6dfSJoerg Roedel 		/* First calculate the correct index */
2616672cf6dfSJoerg Roedel 		idx = (ext ? devfn * 2 : devfn) % 256;
2617672cf6dfSJoerg Roedel 
2618672cf6dfSJoerg Roedel 		if (idx == 0) {
2619672cf6dfSJoerg Roedel 			/* First save what we may have and clean up */
2620672cf6dfSJoerg Roedel 			if (new_ce) {
2621672cf6dfSJoerg Roedel 				tbl[tbl_idx] = new_ce;
2622672cf6dfSJoerg Roedel 				__iommu_flush_cache(iommu, new_ce,
2623672cf6dfSJoerg Roedel 						    VTD_PAGE_SIZE);
2624672cf6dfSJoerg Roedel 				pos = 1;
2625672cf6dfSJoerg Roedel 			}
2626672cf6dfSJoerg Roedel 
2627672cf6dfSJoerg Roedel 			if (old_ce)
2628672cf6dfSJoerg Roedel 				memunmap(old_ce);
2629672cf6dfSJoerg Roedel 
2630672cf6dfSJoerg Roedel 			ret = 0;
2631672cf6dfSJoerg Roedel 			if (devfn < 0x80)
2632672cf6dfSJoerg Roedel 				old_ce_phys = root_entry_lctp(&re);
2633672cf6dfSJoerg Roedel 			else
2634672cf6dfSJoerg Roedel 				old_ce_phys = root_entry_uctp(&re);
2635672cf6dfSJoerg Roedel 
2636672cf6dfSJoerg Roedel 			if (!old_ce_phys) {
2637672cf6dfSJoerg Roedel 				if (ext && devfn == 0) {
2638672cf6dfSJoerg Roedel 					/* No LCTP, try UCTP */
2639672cf6dfSJoerg Roedel 					devfn = 0x7f;
2640672cf6dfSJoerg Roedel 					continue;
2641672cf6dfSJoerg Roedel 				} else {
2642672cf6dfSJoerg Roedel 					goto out;
2643672cf6dfSJoerg Roedel 				}
2644672cf6dfSJoerg Roedel 			}
2645672cf6dfSJoerg Roedel 
2646672cf6dfSJoerg Roedel 			ret = -ENOMEM;
2647672cf6dfSJoerg Roedel 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
2648672cf6dfSJoerg Roedel 					MEMREMAP_WB);
2649672cf6dfSJoerg Roedel 			if (!old_ce)
2650672cf6dfSJoerg Roedel 				goto out;
2651672cf6dfSJoerg Roedel 
26524951eb26SJason Gunthorpe 			new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
2653672cf6dfSJoerg Roedel 			if (!new_ce)
2654672cf6dfSJoerg Roedel 				goto out_unmap;
2655672cf6dfSJoerg Roedel 
2656672cf6dfSJoerg Roedel 			ret = 0;
2657672cf6dfSJoerg Roedel 		}
2658672cf6dfSJoerg Roedel 
2659672cf6dfSJoerg Roedel 		/* Now copy the context entry */
2660672cf6dfSJoerg Roedel 		memcpy(&ce, old_ce + idx, sizeof(ce));
2661672cf6dfSJoerg Roedel 
26620c5f6c0dSLu Baolu 		if (!context_present(&ce))
2663672cf6dfSJoerg Roedel 			continue;
2664672cf6dfSJoerg Roedel 
2665672cf6dfSJoerg Roedel 		did = context_domain_id(&ce);
2666672cf6dfSJoerg Roedel 		if (did >= 0 && did < cap_ndoms(iommu->cap))
2667672cf6dfSJoerg Roedel 			set_bit(did, iommu->domain_ids);
2668672cf6dfSJoerg Roedel 
26690c5f6c0dSLu Baolu 		set_context_copied(iommu, bus, devfn);
2670672cf6dfSJoerg Roedel 		new_ce[idx] = ce;
2671672cf6dfSJoerg Roedel 	}
2672672cf6dfSJoerg Roedel 
2673672cf6dfSJoerg Roedel 	tbl[tbl_idx + pos] = new_ce;
2674672cf6dfSJoerg Roedel 
2675672cf6dfSJoerg Roedel 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2676672cf6dfSJoerg Roedel 
2677672cf6dfSJoerg Roedel out_unmap:
2678672cf6dfSJoerg Roedel 	memunmap(old_ce);
2679672cf6dfSJoerg Roedel 
2680672cf6dfSJoerg Roedel out:
2681672cf6dfSJoerg Roedel 	return ret;
2682672cf6dfSJoerg Roedel }
2683672cf6dfSJoerg Roedel 
copy_translation_tables(struct intel_iommu * iommu)2684672cf6dfSJoerg Roedel static int copy_translation_tables(struct intel_iommu *iommu)
2685672cf6dfSJoerg Roedel {
2686672cf6dfSJoerg Roedel 	struct context_entry **ctxt_tbls;
2687672cf6dfSJoerg Roedel 	struct root_entry *old_rt;
2688672cf6dfSJoerg Roedel 	phys_addr_t old_rt_phys;
2689672cf6dfSJoerg Roedel 	int ctxt_table_entries;
2690672cf6dfSJoerg Roedel 	u64 rtaddr_reg;
2691672cf6dfSJoerg Roedel 	int bus, ret;
2692672cf6dfSJoerg Roedel 	bool new_ext, ext;
2693672cf6dfSJoerg Roedel 
2694672cf6dfSJoerg Roedel 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
26950c5f6c0dSLu Baolu 	ext        = !!(rtaddr_reg & DMA_RTADDR_SMT);
26960c5f6c0dSLu Baolu 	new_ext    = !!sm_supported(iommu);
2697672cf6dfSJoerg Roedel 
2698672cf6dfSJoerg Roedel 	/*
2699672cf6dfSJoerg Roedel 	 * The RTT bit can only be changed when translation is disabled,
2700672cf6dfSJoerg Roedel 	 * but disabling translation means to open a window for data
2701672cf6dfSJoerg Roedel 	 * corruption. So bail out and don't copy anything if we would
2702672cf6dfSJoerg Roedel 	 * have to change the bit.
2703672cf6dfSJoerg Roedel 	 */
2704672cf6dfSJoerg Roedel 	if (new_ext != ext)
2705672cf6dfSJoerg Roedel 		return -EINVAL;
2706672cf6dfSJoerg Roedel 
27070c5f6c0dSLu Baolu 	iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
27080c5f6c0dSLu Baolu 	if (!iommu->copied_tables)
27090c5f6c0dSLu Baolu 		return -ENOMEM;
27100c5f6c0dSLu Baolu 
2711672cf6dfSJoerg Roedel 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2712672cf6dfSJoerg Roedel 	if (!old_rt_phys)
2713672cf6dfSJoerg Roedel 		return -EINVAL;
2714672cf6dfSJoerg Roedel 
2715672cf6dfSJoerg Roedel 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
2716672cf6dfSJoerg Roedel 	if (!old_rt)
2717672cf6dfSJoerg Roedel 		return -ENOMEM;
2718672cf6dfSJoerg Roedel 
2719672cf6dfSJoerg Roedel 	/* This is too big for the stack - allocate it from slab */
2720672cf6dfSJoerg Roedel 	ctxt_table_entries = ext ? 512 : 256;
2721672cf6dfSJoerg Roedel 	ret = -ENOMEM;
2722672cf6dfSJoerg Roedel 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
2723672cf6dfSJoerg Roedel 	if (!ctxt_tbls)
2724672cf6dfSJoerg Roedel 		goto out_unmap;
2725672cf6dfSJoerg Roedel 
2726672cf6dfSJoerg Roedel 	for (bus = 0; bus < 256; bus++) {
2727672cf6dfSJoerg Roedel 		ret = copy_context_table(iommu, &old_rt[bus],
2728672cf6dfSJoerg Roedel 					 ctxt_tbls, bus, ext);
2729672cf6dfSJoerg Roedel 		if (ret) {
2730672cf6dfSJoerg Roedel 			pr_err("%s: Failed to copy context table for bus %d\n",
2731672cf6dfSJoerg Roedel 				iommu->name, bus);
2732672cf6dfSJoerg Roedel 			continue;
2733672cf6dfSJoerg Roedel 		}
2734672cf6dfSJoerg Roedel 	}
2735672cf6dfSJoerg Roedel 
2736ffd5869dSLu Baolu 	spin_lock(&iommu->lock);
2737672cf6dfSJoerg Roedel 
2738672cf6dfSJoerg Roedel 	/* Context tables are copied, now write them to the root_entry table */
2739672cf6dfSJoerg Roedel 	for (bus = 0; bus < 256; bus++) {
2740672cf6dfSJoerg Roedel 		int idx = ext ? bus * 2 : bus;
2741672cf6dfSJoerg Roedel 		u64 val;
2742672cf6dfSJoerg Roedel 
2743672cf6dfSJoerg Roedel 		if (ctxt_tbls[idx]) {
2744672cf6dfSJoerg Roedel 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
2745672cf6dfSJoerg Roedel 			iommu->root_entry[bus].lo = val;
2746672cf6dfSJoerg Roedel 		}
2747672cf6dfSJoerg Roedel 
2748672cf6dfSJoerg Roedel 		if (!ext || !ctxt_tbls[idx + 1])
2749672cf6dfSJoerg Roedel 			continue;
2750672cf6dfSJoerg Roedel 
2751672cf6dfSJoerg Roedel 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2752672cf6dfSJoerg Roedel 		iommu->root_entry[bus].hi = val;
2753672cf6dfSJoerg Roedel 	}
2754672cf6dfSJoerg Roedel 
2755ffd5869dSLu Baolu 	spin_unlock(&iommu->lock);
2756672cf6dfSJoerg Roedel 
2757672cf6dfSJoerg Roedel 	kfree(ctxt_tbls);
2758672cf6dfSJoerg Roedel 
2759672cf6dfSJoerg Roedel 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2760672cf6dfSJoerg Roedel 
2761672cf6dfSJoerg Roedel 	ret = 0;
2762672cf6dfSJoerg Roedel 
2763672cf6dfSJoerg Roedel out_unmap:
2764672cf6dfSJoerg Roedel 	memunmap(old_rt);
2765672cf6dfSJoerg Roedel 
2766672cf6dfSJoerg Roedel 	return ret;
2767672cf6dfSJoerg Roedel }
2768672cf6dfSJoerg Roedel 
init_dmars(void)2769672cf6dfSJoerg Roedel static int __init init_dmars(void)
2770672cf6dfSJoerg Roedel {
2771672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
2772672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
2773672cf6dfSJoerg Roedel 	int ret;
2774672cf6dfSJoerg Roedel 
2775ad3d1902SKyung Min Park 	ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
2776ad3d1902SKyung Min Park 	if (ret)
2777ad3d1902SKyung Min Park 		goto free_iommu;
2778ad3d1902SKyung Min Park 
2779672cf6dfSJoerg Roedel 	for_each_iommu(iommu, drhd) {
2780672cf6dfSJoerg Roedel 		if (drhd->ignored) {
2781672cf6dfSJoerg Roedel 			iommu_disable_translation(iommu);
2782672cf6dfSJoerg Roedel 			continue;
2783672cf6dfSJoerg Roedel 		}
2784672cf6dfSJoerg Roedel 
2785672cf6dfSJoerg Roedel 		/*
2786672cf6dfSJoerg Roedel 		 * Find the max pasid size of all IOMMU's in the system.
2787672cf6dfSJoerg Roedel 		 * We need to ensure the system pasid table is no bigger
2788672cf6dfSJoerg Roedel 		 * than the smallest supported.
2789672cf6dfSJoerg Roedel 		 */
2790672cf6dfSJoerg Roedel 		if (pasid_supported(iommu)) {
2791672cf6dfSJoerg Roedel 			u32 temp = 2 << ecap_pss(iommu->ecap);
2792672cf6dfSJoerg Roedel 
2793672cf6dfSJoerg Roedel 			intel_pasid_max_id = min_t(u32, temp,
2794672cf6dfSJoerg Roedel 						   intel_pasid_max_id);
2795672cf6dfSJoerg Roedel 		}
2796672cf6dfSJoerg Roedel 
2797672cf6dfSJoerg Roedel 		intel_iommu_init_qi(iommu);
2798672cf6dfSJoerg Roedel 
2799672cf6dfSJoerg Roedel 		ret = iommu_init_domains(iommu);
2800672cf6dfSJoerg Roedel 		if (ret)
2801672cf6dfSJoerg Roedel 			goto free_iommu;
2802672cf6dfSJoerg Roedel 
2803672cf6dfSJoerg Roedel 		init_translation_status(iommu);
2804672cf6dfSJoerg Roedel 
2805672cf6dfSJoerg Roedel 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
2806672cf6dfSJoerg Roedel 			iommu_disable_translation(iommu);
2807672cf6dfSJoerg Roedel 			clear_translation_pre_enabled(iommu);
2808672cf6dfSJoerg Roedel 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
2809672cf6dfSJoerg Roedel 				iommu->name);
2810672cf6dfSJoerg Roedel 		}
2811672cf6dfSJoerg Roedel 
2812672cf6dfSJoerg Roedel 		/*
2813672cf6dfSJoerg Roedel 		 * TBD:
2814672cf6dfSJoerg Roedel 		 * we could share the same root & context tables
2815672cf6dfSJoerg Roedel 		 * among all IOMMU's. Need to Split it later.
2816672cf6dfSJoerg Roedel 		 */
2817672cf6dfSJoerg Roedel 		ret = iommu_alloc_root_entry(iommu);
2818672cf6dfSJoerg Roedel 		if (ret)
2819672cf6dfSJoerg Roedel 			goto free_iommu;
2820672cf6dfSJoerg Roedel 
2821672cf6dfSJoerg Roedel 		if (translation_pre_enabled(iommu)) {
2822672cf6dfSJoerg Roedel 			pr_info("Translation already enabled - trying to copy translation structures\n");
2823672cf6dfSJoerg Roedel 
2824672cf6dfSJoerg Roedel 			ret = copy_translation_tables(iommu);
2825672cf6dfSJoerg Roedel 			if (ret) {
2826672cf6dfSJoerg Roedel 				/*
2827672cf6dfSJoerg Roedel 				 * We found the IOMMU with translation
2828672cf6dfSJoerg Roedel 				 * enabled - but failed to copy over the
2829672cf6dfSJoerg Roedel 				 * old root-entry table. Try to proceed
2830672cf6dfSJoerg Roedel 				 * by disabling translation now and
2831672cf6dfSJoerg Roedel 				 * allocating a clean root-entry table.
2832672cf6dfSJoerg Roedel 				 * This might cause DMAR faults, but
2833672cf6dfSJoerg Roedel 				 * probably the dump will still succeed.
2834672cf6dfSJoerg Roedel 				 */
2835672cf6dfSJoerg Roedel 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
2836672cf6dfSJoerg Roedel 				       iommu->name);
2837672cf6dfSJoerg Roedel 				iommu_disable_translation(iommu);
2838672cf6dfSJoerg Roedel 				clear_translation_pre_enabled(iommu);
2839672cf6dfSJoerg Roedel 			} else {
2840672cf6dfSJoerg Roedel 				pr_info("Copied translation tables from previous kernel for %s\n",
2841672cf6dfSJoerg Roedel 					iommu->name);
2842672cf6dfSJoerg Roedel 			}
2843672cf6dfSJoerg Roedel 		}
2844672cf6dfSJoerg Roedel 
2845672cf6dfSJoerg Roedel 		if (!ecap_pass_through(iommu->ecap))
2846672cf6dfSJoerg Roedel 			hw_pass_through = 0;
2847672cf6dfSJoerg Roedel 		intel_svm_check(iommu);
2848672cf6dfSJoerg Roedel 	}
2849672cf6dfSJoerg Roedel 
2850672cf6dfSJoerg Roedel 	/*
2851672cf6dfSJoerg Roedel 	 * Now that qi is enabled on all iommus, set the root entry and flush
2852672cf6dfSJoerg Roedel 	 * caches. This is required on some Intel X58 chipsets, otherwise the
2853672cf6dfSJoerg Roedel 	 * flush_context function will loop forever and the boot hangs.
2854672cf6dfSJoerg Roedel 	 */
2855672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
2856672cf6dfSJoerg Roedel 		iommu_flush_write_buffer(iommu);
2857672cf6dfSJoerg Roedel 		iommu_set_root_entry(iommu);
2858672cf6dfSJoerg Roedel 	}
2859672cf6dfSJoerg Roedel 
2860672cf6dfSJoerg Roedel #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2861672cf6dfSJoerg Roedel 	dmar_map_gfx = 0;
2862672cf6dfSJoerg Roedel #endif
2863672cf6dfSJoerg Roedel 
2864672cf6dfSJoerg Roedel 	if (!dmar_map_gfx)
2865672cf6dfSJoerg Roedel 		iommu_identity_mapping |= IDENTMAP_GFX;
2866672cf6dfSJoerg Roedel 
2867672cf6dfSJoerg Roedel 	check_tylersburg_isoch();
2868672cf6dfSJoerg Roedel 
2869672cf6dfSJoerg Roedel 	ret = si_domain_init(hw_pass_through);
2870672cf6dfSJoerg Roedel 	if (ret)
2871672cf6dfSJoerg Roedel 		goto free_iommu;
2872672cf6dfSJoerg Roedel 
2873672cf6dfSJoerg Roedel 	/*
2874672cf6dfSJoerg Roedel 	 * for each drhd
2875672cf6dfSJoerg Roedel 	 *   enable fault log
2876672cf6dfSJoerg Roedel 	 *   global invalidate context cache
2877672cf6dfSJoerg Roedel 	 *   global invalidate iotlb
2878672cf6dfSJoerg Roedel 	 *   enable translation
2879672cf6dfSJoerg Roedel 	 */
2880672cf6dfSJoerg Roedel 	for_each_iommu(iommu, drhd) {
2881672cf6dfSJoerg Roedel 		if (drhd->ignored) {
2882672cf6dfSJoerg Roedel 			/*
2883672cf6dfSJoerg Roedel 			 * we always have to disable PMRs or DMA may fail on
2884672cf6dfSJoerg Roedel 			 * this device
2885672cf6dfSJoerg Roedel 			 */
2886672cf6dfSJoerg Roedel 			if (force_on)
2887672cf6dfSJoerg Roedel 				iommu_disable_protect_mem_regions(iommu);
2888672cf6dfSJoerg Roedel 			continue;
2889672cf6dfSJoerg Roedel 		}
2890672cf6dfSJoerg Roedel 
2891672cf6dfSJoerg Roedel 		iommu_flush_write_buffer(iommu);
2892672cf6dfSJoerg Roedel 
2893672cf6dfSJoerg Roedel #ifdef CONFIG_INTEL_IOMMU_SVM
2894672cf6dfSJoerg Roedel 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
2895672cf6dfSJoerg Roedel 			/*
2896672cf6dfSJoerg Roedel 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
2897672cf6dfSJoerg Roedel 			 * could cause possible lock race condition.
2898672cf6dfSJoerg Roedel 			 */
2899672cf6dfSJoerg Roedel 			up_write(&dmar_global_lock);
2900672cf6dfSJoerg Roedel 			ret = intel_svm_enable_prq(iommu);
2901672cf6dfSJoerg Roedel 			down_write(&dmar_global_lock);
2902672cf6dfSJoerg Roedel 			if (ret)
2903672cf6dfSJoerg Roedel 				goto free_iommu;
2904672cf6dfSJoerg Roedel 		}
2905672cf6dfSJoerg Roedel #endif
2906672cf6dfSJoerg Roedel 		ret = dmar_set_interrupt(iommu);
2907672cf6dfSJoerg Roedel 		if (ret)
2908672cf6dfSJoerg Roedel 			goto free_iommu;
2909672cf6dfSJoerg Roedel 	}
2910672cf6dfSJoerg Roedel 
2911672cf6dfSJoerg Roedel 	return 0;
2912672cf6dfSJoerg Roedel 
2913672cf6dfSJoerg Roedel free_iommu:
2914672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
2915672cf6dfSJoerg Roedel 		disable_dmar_iommu(iommu);
2916672cf6dfSJoerg Roedel 		free_dmar_iommu(iommu);
2917672cf6dfSJoerg Roedel 	}
2918620bf9f9SJerry Snitselaar 	if (si_domain) {
2919620bf9f9SJerry Snitselaar 		domain_exit(si_domain);
2920620bf9f9SJerry Snitselaar 		si_domain = NULL;
2921620bf9f9SJerry Snitselaar 	}
2922672cf6dfSJoerg Roedel 
2923672cf6dfSJoerg Roedel 	return ret;
2924672cf6dfSJoerg Roedel }
2925672cf6dfSJoerg Roedel 
init_no_remapping_devices(void)2926672cf6dfSJoerg Roedel static void __init init_no_remapping_devices(void)
2927672cf6dfSJoerg Roedel {
2928672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
2929672cf6dfSJoerg Roedel 	struct device *dev;
2930672cf6dfSJoerg Roedel 	int i;
2931672cf6dfSJoerg Roedel 
2932672cf6dfSJoerg Roedel 	for_each_drhd_unit(drhd) {
2933672cf6dfSJoerg Roedel 		if (!drhd->include_all) {
2934672cf6dfSJoerg Roedel 			for_each_active_dev_scope(drhd->devices,
2935672cf6dfSJoerg Roedel 						  drhd->devices_cnt, i, dev)
2936672cf6dfSJoerg Roedel 				break;
2937672cf6dfSJoerg Roedel 			/* ignore DMAR unit if no devices exist */
2938672cf6dfSJoerg Roedel 			if (i == drhd->devices_cnt)
2939672cf6dfSJoerg Roedel 				drhd->ignored = 1;
2940672cf6dfSJoerg Roedel 		}
2941672cf6dfSJoerg Roedel 	}
2942672cf6dfSJoerg Roedel 
2943672cf6dfSJoerg Roedel 	for_each_active_drhd_unit(drhd) {
2944672cf6dfSJoerg Roedel 		if (drhd->include_all)
2945672cf6dfSJoerg Roedel 			continue;
2946672cf6dfSJoerg Roedel 
2947672cf6dfSJoerg Roedel 		for_each_active_dev_scope(drhd->devices,
2948672cf6dfSJoerg Roedel 					  drhd->devices_cnt, i, dev)
2949672cf6dfSJoerg Roedel 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
2950672cf6dfSJoerg Roedel 				break;
2951672cf6dfSJoerg Roedel 		if (i < drhd->devices_cnt)
2952672cf6dfSJoerg Roedel 			continue;
2953672cf6dfSJoerg Roedel 
2954672cf6dfSJoerg Roedel 		/* This IOMMU has *only* gfx devices. Either bypass it or
2955672cf6dfSJoerg Roedel 		   set the gfx_mapped flag, as appropriate */
2956b1012ca8SLu Baolu 		drhd->gfx_dedicated = 1;
29572d33b7d6SLu Baolu 		if (!dmar_map_gfx)
2958672cf6dfSJoerg Roedel 			drhd->ignored = 1;
2959672cf6dfSJoerg Roedel 	}
2960672cf6dfSJoerg Roedel }
2961672cf6dfSJoerg Roedel 
2962672cf6dfSJoerg Roedel #ifdef CONFIG_SUSPEND
init_iommu_hw(void)2963672cf6dfSJoerg Roedel static int init_iommu_hw(void)
2964672cf6dfSJoerg Roedel {
2965672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
2966672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = NULL;
2967a0e9911aSYanfei Xu 	int ret;
2968672cf6dfSJoerg Roedel 
2969a0e9911aSYanfei Xu 	for_each_active_iommu(iommu, drhd) {
2970a0e9911aSYanfei Xu 		if (iommu->qi) {
2971a0e9911aSYanfei Xu 			ret = dmar_reenable_qi(iommu);
2972a0e9911aSYanfei Xu 			if (ret)
2973a0e9911aSYanfei Xu 				return ret;
2974a0e9911aSYanfei Xu 		}
2975a0e9911aSYanfei Xu 	}
2976672cf6dfSJoerg Roedel 
2977672cf6dfSJoerg Roedel 	for_each_iommu(iommu, drhd) {
2978672cf6dfSJoerg Roedel 		if (drhd->ignored) {
2979672cf6dfSJoerg Roedel 			/*
2980672cf6dfSJoerg Roedel 			 * we always have to disable PMRs or DMA may fail on
2981672cf6dfSJoerg Roedel 			 * this device
2982672cf6dfSJoerg Roedel 			 */
2983672cf6dfSJoerg Roedel 			if (force_on)
2984672cf6dfSJoerg Roedel 				iommu_disable_protect_mem_regions(iommu);
2985672cf6dfSJoerg Roedel 			continue;
2986672cf6dfSJoerg Roedel 		}
2987672cf6dfSJoerg Roedel 
2988672cf6dfSJoerg Roedel 		iommu_flush_write_buffer(iommu);
2989672cf6dfSJoerg Roedel 		iommu_set_root_entry(iommu);
2990672cf6dfSJoerg Roedel 		iommu_enable_translation(iommu);
2991672cf6dfSJoerg Roedel 		iommu_disable_protect_mem_regions(iommu);
2992672cf6dfSJoerg Roedel 	}
2993672cf6dfSJoerg Roedel 
2994672cf6dfSJoerg Roedel 	return 0;
2995672cf6dfSJoerg Roedel }
2996672cf6dfSJoerg Roedel 
iommu_flush_all(void)2997672cf6dfSJoerg Roedel static void iommu_flush_all(void)
2998672cf6dfSJoerg Roedel {
2999672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3000672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
3001672cf6dfSJoerg Roedel 
3002672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
3003672cf6dfSJoerg Roedel 		iommu->flush.flush_context(iommu, 0, 0, 0,
3004672cf6dfSJoerg Roedel 					   DMA_CCMD_GLOBAL_INVL);
3005672cf6dfSJoerg Roedel 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3006672cf6dfSJoerg Roedel 					 DMA_TLB_GLOBAL_FLUSH);
3007672cf6dfSJoerg Roedel 	}
3008672cf6dfSJoerg Roedel }
3009672cf6dfSJoerg Roedel 
iommu_suspend(void)3010672cf6dfSJoerg Roedel static int iommu_suspend(void)
3011672cf6dfSJoerg Roedel {
3012672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3013672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = NULL;
3014672cf6dfSJoerg Roedel 	unsigned long flag;
3015672cf6dfSJoerg Roedel 
3016672cf6dfSJoerg Roedel 	iommu_flush_all();
3017672cf6dfSJoerg Roedel 
3018672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
3019672cf6dfSJoerg Roedel 		iommu_disable_translation(iommu);
3020672cf6dfSJoerg Roedel 
3021672cf6dfSJoerg Roedel 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3022672cf6dfSJoerg Roedel 
3023672cf6dfSJoerg Roedel 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3024672cf6dfSJoerg Roedel 			readl(iommu->reg + DMAR_FECTL_REG);
3025672cf6dfSJoerg Roedel 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3026672cf6dfSJoerg Roedel 			readl(iommu->reg + DMAR_FEDATA_REG);
3027672cf6dfSJoerg Roedel 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3028672cf6dfSJoerg Roedel 			readl(iommu->reg + DMAR_FEADDR_REG);
3029672cf6dfSJoerg Roedel 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3030672cf6dfSJoerg Roedel 			readl(iommu->reg + DMAR_FEUADDR_REG);
3031672cf6dfSJoerg Roedel 
3032672cf6dfSJoerg Roedel 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3033672cf6dfSJoerg Roedel 	}
3034672cf6dfSJoerg Roedel 	return 0;
3035672cf6dfSJoerg Roedel }
3036672cf6dfSJoerg Roedel 
iommu_resume(void)3037672cf6dfSJoerg Roedel static void iommu_resume(void)
3038672cf6dfSJoerg Roedel {
3039672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3040672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = NULL;
3041672cf6dfSJoerg Roedel 	unsigned long flag;
3042672cf6dfSJoerg Roedel 
3043672cf6dfSJoerg Roedel 	if (init_iommu_hw()) {
3044672cf6dfSJoerg Roedel 		if (force_on)
3045672cf6dfSJoerg Roedel 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3046672cf6dfSJoerg Roedel 		else
3047672cf6dfSJoerg Roedel 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3048672cf6dfSJoerg Roedel 		return;
3049672cf6dfSJoerg Roedel 	}
3050672cf6dfSJoerg Roedel 
3051672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
3052672cf6dfSJoerg Roedel 
3053672cf6dfSJoerg Roedel 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3054672cf6dfSJoerg Roedel 
3055672cf6dfSJoerg Roedel 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3056672cf6dfSJoerg Roedel 			iommu->reg + DMAR_FECTL_REG);
3057672cf6dfSJoerg Roedel 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3058672cf6dfSJoerg Roedel 			iommu->reg + DMAR_FEDATA_REG);
3059672cf6dfSJoerg Roedel 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3060672cf6dfSJoerg Roedel 			iommu->reg + DMAR_FEADDR_REG);
3061672cf6dfSJoerg Roedel 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3062672cf6dfSJoerg Roedel 			iommu->reg + DMAR_FEUADDR_REG);
3063672cf6dfSJoerg Roedel 
3064672cf6dfSJoerg Roedel 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3065672cf6dfSJoerg Roedel 	}
3066672cf6dfSJoerg Roedel }
3067672cf6dfSJoerg Roedel 
3068672cf6dfSJoerg Roedel static struct syscore_ops iommu_syscore_ops = {
3069672cf6dfSJoerg Roedel 	.resume		= iommu_resume,
3070672cf6dfSJoerg Roedel 	.suspend	= iommu_suspend,
3071672cf6dfSJoerg Roedel };
3072672cf6dfSJoerg Roedel 
init_iommu_pm_ops(void)3073672cf6dfSJoerg Roedel static void __init init_iommu_pm_ops(void)
3074672cf6dfSJoerg Roedel {
3075672cf6dfSJoerg Roedel 	register_syscore_ops(&iommu_syscore_ops);
3076672cf6dfSJoerg Roedel }
3077672cf6dfSJoerg Roedel 
3078672cf6dfSJoerg Roedel #else
init_iommu_pm_ops(void)3079672cf6dfSJoerg Roedel static inline void init_iommu_pm_ops(void) {}
3080672cf6dfSJoerg Roedel #endif	/* CONFIG_PM */
3081672cf6dfSJoerg Roedel 
rmrr_sanity_check(struct acpi_dmar_reserved_memory * rmrr)308245967ffbSMarco Bonelli static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3083672cf6dfSJoerg Roedel {
3084672cf6dfSJoerg Roedel 	if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3085672cf6dfSJoerg Roedel 	    !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3086672cf6dfSJoerg Roedel 	    rmrr->end_address <= rmrr->base_address ||
3087672cf6dfSJoerg Roedel 	    arch_rmrr_sanity_check(rmrr))
3088672cf6dfSJoerg Roedel 		return -EINVAL;
3089672cf6dfSJoerg Roedel 
3090672cf6dfSJoerg Roedel 	return 0;
3091672cf6dfSJoerg Roedel }
3092672cf6dfSJoerg Roedel 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)3093672cf6dfSJoerg Roedel int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3094672cf6dfSJoerg Roedel {
3095672cf6dfSJoerg Roedel 	struct acpi_dmar_reserved_memory *rmrr;
3096672cf6dfSJoerg Roedel 	struct dmar_rmrr_unit *rmrru;
3097672cf6dfSJoerg Roedel 
3098672cf6dfSJoerg Roedel 	rmrr = (struct acpi_dmar_reserved_memory *)header;
3099672cf6dfSJoerg Roedel 	if (rmrr_sanity_check(rmrr)) {
3100672cf6dfSJoerg Roedel 		pr_warn(FW_BUG
3101672cf6dfSJoerg Roedel 			   "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3102672cf6dfSJoerg Roedel 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3103672cf6dfSJoerg Roedel 			   rmrr->base_address, rmrr->end_address,
3104672cf6dfSJoerg Roedel 			   dmi_get_system_info(DMI_BIOS_VENDOR),
3105672cf6dfSJoerg Roedel 			   dmi_get_system_info(DMI_BIOS_VERSION),
3106672cf6dfSJoerg Roedel 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
3107672cf6dfSJoerg Roedel 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3108672cf6dfSJoerg Roedel 	}
3109672cf6dfSJoerg Roedel 
3110672cf6dfSJoerg Roedel 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3111672cf6dfSJoerg Roedel 	if (!rmrru)
3112672cf6dfSJoerg Roedel 		goto out;
3113672cf6dfSJoerg Roedel 
3114672cf6dfSJoerg Roedel 	rmrru->hdr = header;
3115672cf6dfSJoerg Roedel 
3116672cf6dfSJoerg Roedel 	rmrru->base_address = rmrr->base_address;
3117672cf6dfSJoerg Roedel 	rmrru->end_address = rmrr->end_address;
3118672cf6dfSJoerg Roedel 
3119672cf6dfSJoerg Roedel 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3120672cf6dfSJoerg Roedel 				((void *)rmrr) + rmrr->header.length,
3121672cf6dfSJoerg Roedel 				&rmrru->devices_cnt);
3122672cf6dfSJoerg Roedel 	if (rmrru->devices_cnt && rmrru->devices == NULL)
3123672cf6dfSJoerg Roedel 		goto free_rmrru;
3124672cf6dfSJoerg Roedel 
3125672cf6dfSJoerg Roedel 	list_add(&rmrru->list, &dmar_rmrr_units);
3126672cf6dfSJoerg Roedel 
3127672cf6dfSJoerg Roedel 	return 0;
3128672cf6dfSJoerg Roedel free_rmrru:
3129672cf6dfSJoerg Roedel 	kfree(rmrru);
3130672cf6dfSJoerg Roedel out:
3131672cf6dfSJoerg Roedel 	return -ENOMEM;
3132672cf6dfSJoerg Roedel }
3133672cf6dfSJoerg Roedel 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)3134672cf6dfSJoerg Roedel static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3135672cf6dfSJoerg Roedel {
3136672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru;
3137672cf6dfSJoerg Roedel 	struct acpi_dmar_atsr *tmp;
3138672cf6dfSJoerg Roedel 
3139672cf6dfSJoerg Roedel 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3140672cf6dfSJoerg Roedel 				dmar_rcu_check()) {
3141672cf6dfSJoerg Roedel 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3142672cf6dfSJoerg Roedel 		if (atsr->segment != tmp->segment)
3143672cf6dfSJoerg Roedel 			continue;
3144672cf6dfSJoerg Roedel 		if (atsr->header.length != tmp->header.length)
3145672cf6dfSJoerg Roedel 			continue;
3146672cf6dfSJoerg Roedel 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
3147672cf6dfSJoerg Roedel 			return atsru;
3148672cf6dfSJoerg Roedel 	}
3149672cf6dfSJoerg Roedel 
3150672cf6dfSJoerg Roedel 	return NULL;
3151672cf6dfSJoerg Roedel }
3152672cf6dfSJoerg Roedel 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)3153672cf6dfSJoerg Roedel int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3154672cf6dfSJoerg Roedel {
3155672cf6dfSJoerg Roedel 	struct acpi_dmar_atsr *atsr;
3156672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru;
3157672cf6dfSJoerg Roedel 
3158672cf6dfSJoerg Roedel 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3159672cf6dfSJoerg Roedel 		return 0;
3160672cf6dfSJoerg Roedel 
3161672cf6dfSJoerg Roedel 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3162672cf6dfSJoerg Roedel 	atsru = dmar_find_atsr(atsr);
3163672cf6dfSJoerg Roedel 	if (atsru)
3164672cf6dfSJoerg Roedel 		return 0;
3165672cf6dfSJoerg Roedel 
3166672cf6dfSJoerg Roedel 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3167672cf6dfSJoerg Roedel 	if (!atsru)
3168672cf6dfSJoerg Roedel 		return -ENOMEM;
3169672cf6dfSJoerg Roedel 
3170672cf6dfSJoerg Roedel 	/*
3171672cf6dfSJoerg Roedel 	 * If memory is allocated from slab by ACPI _DSM method, we need to
3172672cf6dfSJoerg Roedel 	 * copy the memory content because the memory buffer will be freed
3173672cf6dfSJoerg Roedel 	 * on return.
3174672cf6dfSJoerg Roedel 	 */
3175672cf6dfSJoerg Roedel 	atsru->hdr = (void *)(atsru + 1);
3176672cf6dfSJoerg Roedel 	memcpy(atsru->hdr, hdr, hdr->length);
3177672cf6dfSJoerg Roedel 	atsru->include_all = atsr->flags & 0x1;
3178672cf6dfSJoerg Roedel 	if (!atsru->include_all) {
3179672cf6dfSJoerg Roedel 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3180672cf6dfSJoerg Roedel 				(void *)atsr + atsr->header.length,
3181672cf6dfSJoerg Roedel 				&atsru->devices_cnt);
3182672cf6dfSJoerg Roedel 		if (atsru->devices_cnt && atsru->devices == NULL) {
3183672cf6dfSJoerg Roedel 			kfree(atsru);
3184672cf6dfSJoerg Roedel 			return -ENOMEM;
3185672cf6dfSJoerg Roedel 		}
3186672cf6dfSJoerg Roedel 	}
3187672cf6dfSJoerg Roedel 
3188672cf6dfSJoerg Roedel 	list_add_rcu(&atsru->list, &dmar_atsr_units);
3189672cf6dfSJoerg Roedel 
3190672cf6dfSJoerg Roedel 	return 0;
3191672cf6dfSJoerg Roedel }
3192672cf6dfSJoerg Roedel 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)3193672cf6dfSJoerg Roedel static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3194672cf6dfSJoerg Roedel {
3195672cf6dfSJoerg Roedel 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3196672cf6dfSJoerg Roedel 	kfree(atsru);
3197672cf6dfSJoerg Roedel }
3198672cf6dfSJoerg Roedel 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)3199672cf6dfSJoerg Roedel int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3200672cf6dfSJoerg Roedel {
3201672cf6dfSJoerg Roedel 	struct acpi_dmar_atsr *atsr;
3202672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru;
3203672cf6dfSJoerg Roedel 
3204672cf6dfSJoerg Roedel 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3205672cf6dfSJoerg Roedel 	atsru = dmar_find_atsr(atsr);
3206672cf6dfSJoerg Roedel 	if (atsru) {
3207672cf6dfSJoerg Roedel 		list_del_rcu(&atsru->list);
3208672cf6dfSJoerg Roedel 		synchronize_rcu();
3209672cf6dfSJoerg Roedel 		intel_iommu_free_atsr(atsru);
3210672cf6dfSJoerg Roedel 	}
3211672cf6dfSJoerg Roedel 
3212672cf6dfSJoerg Roedel 	return 0;
3213672cf6dfSJoerg Roedel }
3214672cf6dfSJoerg Roedel 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)3215672cf6dfSJoerg Roedel int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3216672cf6dfSJoerg Roedel {
3217672cf6dfSJoerg Roedel 	int i;
3218672cf6dfSJoerg Roedel 	struct device *dev;
3219672cf6dfSJoerg Roedel 	struct acpi_dmar_atsr *atsr;
3220672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru;
3221672cf6dfSJoerg Roedel 
3222672cf6dfSJoerg Roedel 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3223672cf6dfSJoerg Roedel 	atsru = dmar_find_atsr(atsr);
3224672cf6dfSJoerg Roedel 	if (!atsru)
3225672cf6dfSJoerg Roedel 		return 0;
3226672cf6dfSJoerg Roedel 
3227672cf6dfSJoerg Roedel 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3228672cf6dfSJoerg Roedel 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3229672cf6dfSJoerg Roedel 					  i, dev)
3230672cf6dfSJoerg Roedel 			return -EBUSY;
3231672cf6dfSJoerg Roedel 	}
3232672cf6dfSJoerg Roedel 
3233672cf6dfSJoerg Roedel 	return 0;
3234672cf6dfSJoerg Roedel }
3235672cf6dfSJoerg Roedel 
dmar_find_satc(struct acpi_dmar_satc * satc)323631a75cbbSYian Chen static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
323731a75cbbSYian Chen {
323831a75cbbSYian Chen 	struct dmar_satc_unit *satcu;
323931a75cbbSYian Chen 	struct acpi_dmar_satc *tmp;
324031a75cbbSYian Chen 
324131a75cbbSYian Chen 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
324231a75cbbSYian Chen 				dmar_rcu_check()) {
324331a75cbbSYian Chen 		tmp = (struct acpi_dmar_satc *)satcu->hdr;
324431a75cbbSYian Chen 		if (satc->segment != tmp->segment)
324531a75cbbSYian Chen 			continue;
324631a75cbbSYian Chen 		if (satc->header.length != tmp->header.length)
324731a75cbbSYian Chen 			continue;
324831a75cbbSYian Chen 		if (memcmp(satc, tmp, satc->header.length) == 0)
324931a75cbbSYian Chen 			return satcu;
325031a75cbbSYian Chen 	}
325131a75cbbSYian Chen 
325231a75cbbSYian Chen 	return NULL;
325331a75cbbSYian Chen }
325431a75cbbSYian Chen 
dmar_parse_one_satc(struct acpi_dmar_header * hdr,void * arg)325531a75cbbSYian Chen int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
325631a75cbbSYian Chen {
325731a75cbbSYian Chen 	struct acpi_dmar_satc *satc;
325831a75cbbSYian Chen 	struct dmar_satc_unit *satcu;
325931a75cbbSYian Chen 
326031a75cbbSYian Chen 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
326131a75cbbSYian Chen 		return 0;
326231a75cbbSYian Chen 
326331a75cbbSYian Chen 	satc = container_of(hdr, struct acpi_dmar_satc, header);
326431a75cbbSYian Chen 	satcu = dmar_find_satc(satc);
326531a75cbbSYian Chen 	if (satcu)
326631a75cbbSYian Chen 		return 0;
326731a75cbbSYian Chen 
326831a75cbbSYian Chen 	satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
326931a75cbbSYian Chen 	if (!satcu)
327031a75cbbSYian Chen 		return -ENOMEM;
327131a75cbbSYian Chen 
327231a75cbbSYian Chen 	satcu->hdr = (void *)(satcu + 1);
327331a75cbbSYian Chen 	memcpy(satcu->hdr, hdr, hdr->length);
327431a75cbbSYian Chen 	satcu->atc_required = satc->flags & 0x1;
327531a75cbbSYian Chen 	satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
327631a75cbbSYian Chen 					      (void *)satc + satc->header.length,
327731a75cbbSYian Chen 					      &satcu->devices_cnt);
327831a75cbbSYian Chen 	if (satcu->devices_cnt && !satcu->devices) {
327931a75cbbSYian Chen 		kfree(satcu);
328031a75cbbSYian Chen 		return -ENOMEM;
328131a75cbbSYian Chen 	}
328231a75cbbSYian Chen 	list_add_rcu(&satcu->list, &dmar_satc_units);
328331a75cbbSYian Chen 
328431a75cbbSYian Chen 	return 0;
328531a75cbbSYian Chen }
328631a75cbbSYian Chen 
intel_iommu_add(struct dmar_drhd_unit * dmaru)3287672cf6dfSJoerg Roedel static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3288672cf6dfSJoerg Roedel {
3289672cf6dfSJoerg Roedel 	int sp, ret;
3290672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dmaru->iommu;
3291672cf6dfSJoerg Roedel 
3292ad3d1902SKyung Min Park 	ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3293ad3d1902SKyung Min Park 	if (ret)
3294ad3d1902SKyung Min Park 		goto out;
3295ad3d1902SKyung Min Park 
3296672cf6dfSJoerg Roedel 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3297672cf6dfSJoerg Roedel 		pr_warn("%s: Doesn't support hardware pass through.\n",
3298672cf6dfSJoerg Roedel 			iommu->name);
3299672cf6dfSJoerg Roedel 		return -ENXIO;
3300672cf6dfSJoerg Roedel 	}
3301e8055226SLu Baolu 
3302672cf6dfSJoerg Roedel 	sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3303672cf6dfSJoerg Roedel 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3304672cf6dfSJoerg Roedel 		pr_warn("%s: Doesn't support large page.\n",
3305672cf6dfSJoerg Roedel 			iommu->name);
3306672cf6dfSJoerg Roedel 		return -ENXIO;
3307672cf6dfSJoerg Roedel 	}
3308672cf6dfSJoerg Roedel 
3309672cf6dfSJoerg Roedel 	/*
3310672cf6dfSJoerg Roedel 	 * Disable translation if already enabled prior to OS handover.
3311672cf6dfSJoerg Roedel 	 */
3312672cf6dfSJoerg Roedel 	if (iommu->gcmd & DMA_GCMD_TE)
3313672cf6dfSJoerg Roedel 		iommu_disable_translation(iommu);
3314672cf6dfSJoerg Roedel 
3315672cf6dfSJoerg Roedel 	ret = iommu_init_domains(iommu);
3316672cf6dfSJoerg Roedel 	if (ret == 0)
3317672cf6dfSJoerg Roedel 		ret = iommu_alloc_root_entry(iommu);
3318672cf6dfSJoerg Roedel 	if (ret)
3319672cf6dfSJoerg Roedel 		goto out;
3320672cf6dfSJoerg Roedel 
3321672cf6dfSJoerg Roedel 	intel_svm_check(iommu);
3322672cf6dfSJoerg Roedel 
3323672cf6dfSJoerg Roedel 	if (dmaru->ignored) {
3324672cf6dfSJoerg Roedel 		/*
3325672cf6dfSJoerg Roedel 		 * we always have to disable PMRs or DMA may fail on this device
3326672cf6dfSJoerg Roedel 		 */
3327672cf6dfSJoerg Roedel 		if (force_on)
3328672cf6dfSJoerg Roedel 			iommu_disable_protect_mem_regions(iommu);
3329672cf6dfSJoerg Roedel 		return 0;
3330672cf6dfSJoerg Roedel 	}
3331672cf6dfSJoerg Roedel 
3332672cf6dfSJoerg Roedel 	intel_iommu_init_qi(iommu);
3333672cf6dfSJoerg Roedel 	iommu_flush_write_buffer(iommu);
3334672cf6dfSJoerg Roedel 
3335672cf6dfSJoerg Roedel #ifdef CONFIG_INTEL_IOMMU_SVM
3336672cf6dfSJoerg Roedel 	if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3337672cf6dfSJoerg Roedel 		ret = intel_svm_enable_prq(iommu);
3338672cf6dfSJoerg Roedel 		if (ret)
3339672cf6dfSJoerg Roedel 			goto disable_iommu;
3340672cf6dfSJoerg Roedel 	}
3341672cf6dfSJoerg Roedel #endif
3342672cf6dfSJoerg Roedel 	ret = dmar_set_interrupt(iommu);
3343672cf6dfSJoerg Roedel 	if (ret)
3344672cf6dfSJoerg Roedel 		goto disable_iommu;
3345672cf6dfSJoerg Roedel 
3346672cf6dfSJoerg Roedel 	iommu_set_root_entry(iommu);
3347672cf6dfSJoerg Roedel 	iommu_enable_translation(iommu);
3348672cf6dfSJoerg Roedel 
3349672cf6dfSJoerg Roedel 	iommu_disable_protect_mem_regions(iommu);
3350672cf6dfSJoerg Roedel 	return 0;
3351672cf6dfSJoerg Roedel 
3352672cf6dfSJoerg Roedel disable_iommu:
3353672cf6dfSJoerg Roedel 	disable_dmar_iommu(iommu);
3354672cf6dfSJoerg Roedel out:
3355672cf6dfSJoerg Roedel 	free_dmar_iommu(iommu);
3356672cf6dfSJoerg Roedel 	return ret;
3357672cf6dfSJoerg Roedel }
3358672cf6dfSJoerg Roedel 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)3359672cf6dfSJoerg Roedel int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3360672cf6dfSJoerg Roedel {
3361672cf6dfSJoerg Roedel 	int ret = 0;
3362672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dmaru->iommu;
3363672cf6dfSJoerg Roedel 
3364672cf6dfSJoerg Roedel 	if (!intel_iommu_enabled)
3365672cf6dfSJoerg Roedel 		return 0;
3366672cf6dfSJoerg Roedel 	if (iommu == NULL)
3367672cf6dfSJoerg Roedel 		return -EINVAL;
3368672cf6dfSJoerg Roedel 
3369672cf6dfSJoerg Roedel 	if (insert) {
3370672cf6dfSJoerg Roedel 		ret = intel_iommu_add(dmaru);
3371672cf6dfSJoerg Roedel 	} else {
3372672cf6dfSJoerg Roedel 		disable_dmar_iommu(iommu);
3373672cf6dfSJoerg Roedel 		free_dmar_iommu(iommu);
3374672cf6dfSJoerg Roedel 	}
3375672cf6dfSJoerg Roedel 
3376672cf6dfSJoerg Roedel 	return ret;
3377672cf6dfSJoerg Roedel }
3378672cf6dfSJoerg Roedel 
intel_iommu_free_dmars(void)3379672cf6dfSJoerg Roedel static void intel_iommu_free_dmars(void)
3380672cf6dfSJoerg Roedel {
3381672cf6dfSJoerg Roedel 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
3382672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru, *atsr_n;
338331a75cbbSYian Chen 	struct dmar_satc_unit *satcu, *satc_n;
3384672cf6dfSJoerg Roedel 
3385672cf6dfSJoerg Roedel 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3386672cf6dfSJoerg Roedel 		list_del(&rmrru->list);
3387672cf6dfSJoerg Roedel 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3388672cf6dfSJoerg Roedel 		kfree(rmrru);
3389672cf6dfSJoerg Roedel 	}
3390672cf6dfSJoerg Roedel 
3391672cf6dfSJoerg Roedel 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3392672cf6dfSJoerg Roedel 		list_del(&atsru->list);
3393672cf6dfSJoerg Roedel 		intel_iommu_free_atsr(atsru);
3394672cf6dfSJoerg Roedel 	}
339531a75cbbSYian Chen 	list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
339631a75cbbSYian Chen 		list_del(&satcu->list);
339731a75cbbSYian Chen 		dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
339831a75cbbSYian Chen 		kfree(satcu);
339931a75cbbSYian Chen 	}
3400672cf6dfSJoerg Roedel }
3401672cf6dfSJoerg Roedel 
dmar_find_matched_satc_unit(struct pci_dev * dev)340297f2f2c5SYian Chen static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
340397f2f2c5SYian Chen {
340497f2f2c5SYian Chen 	struct dmar_satc_unit *satcu;
340597f2f2c5SYian Chen 	struct acpi_dmar_satc *satc;
340697f2f2c5SYian Chen 	struct device *tmp;
340797f2f2c5SYian Chen 	int i;
340897f2f2c5SYian Chen 
340997f2f2c5SYian Chen 	dev = pci_physfn(dev);
341097f2f2c5SYian Chen 	rcu_read_lock();
341197f2f2c5SYian Chen 
341297f2f2c5SYian Chen 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
341397f2f2c5SYian Chen 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
341497f2f2c5SYian Chen 		if (satc->segment != pci_domain_nr(dev->bus))
341597f2f2c5SYian Chen 			continue;
341697f2f2c5SYian Chen 		for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
341797f2f2c5SYian Chen 			if (to_pci_dev(tmp) == dev)
341897f2f2c5SYian Chen 				goto out;
341997f2f2c5SYian Chen 	}
342097f2f2c5SYian Chen 	satcu = NULL;
342197f2f2c5SYian Chen out:
342297f2f2c5SYian Chen 	rcu_read_unlock();
342397f2f2c5SYian Chen 	return satcu;
342497f2f2c5SYian Chen }
342597f2f2c5SYian Chen 
dmar_ats_supported(struct pci_dev * dev,struct intel_iommu * iommu)342697f2f2c5SYian Chen static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
3427672cf6dfSJoerg Roedel {
3428672cf6dfSJoerg Roedel 	int i, ret = 1;
3429672cf6dfSJoerg Roedel 	struct pci_bus *bus;
3430672cf6dfSJoerg Roedel 	struct pci_dev *bridge = NULL;
3431672cf6dfSJoerg Roedel 	struct device *tmp;
3432672cf6dfSJoerg Roedel 	struct acpi_dmar_atsr *atsr;
3433672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru;
343497f2f2c5SYian Chen 	struct dmar_satc_unit *satcu;
3435672cf6dfSJoerg Roedel 
3436672cf6dfSJoerg Roedel 	dev = pci_physfn(dev);
343797f2f2c5SYian Chen 	satcu = dmar_find_matched_satc_unit(dev);
343897f2f2c5SYian Chen 	if (satcu)
343997f2f2c5SYian Chen 		/*
344097f2f2c5SYian Chen 		 * This device supports ATS as it is in SATC table.
344197f2f2c5SYian Chen 		 * When IOMMU is in legacy mode, enabling ATS is done
344297f2f2c5SYian Chen 		 * automatically by HW for the device that requires
344397f2f2c5SYian Chen 		 * ATS, hence OS should not enable this device ATS
344497f2f2c5SYian Chen 		 * to avoid duplicated TLB invalidation.
344597f2f2c5SYian Chen 		 */
344697f2f2c5SYian Chen 		return !(satcu->atc_required && !sm_supported(iommu));
344797f2f2c5SYian Chen 
3448672cf6dfSJoerg Roedel 	for (bus = dev->bus; bus; bus = bus->parent) {
3449672cf6dfSJoerg Roedel 		bridge = bus->self;
3450672cf6dfSJoerg Roedel 		/* If it's an integrated device, allow ATS */
3451672cf6dfSJoerg Roedel 		if (!bridge)
3452672cf6dfSJoerg Roedel 			return 1;
3453672cf6dfSJoerg Roedel 		/* Connected via non-PCIe: no ATS */
3454672cf6dfSJoerg Roedel 		if (!pci_is_pcie(bridge) ||
3455672cf6dfSJoerg Roedel 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3456672cf6dfSJoerg Roedel 			return 0;
3457672cf6dfSJoerg Roedel 		/* If we found the root port, look it up in the ATSR */
3458672cf6dfSJoerg Roedel 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3459672cf6dfSJoerg Roedel 			break;
3460672cf6dfSJoerg Roedel 	}
3461672cf6dfSJoerg Roedel 
3462672cf6dfSJoerg Roedel 	rcu_read_lock();
3463672cf6dfSJoerg Roedel 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3464672cf6dfSJoerg Roedel 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3465672cf6dfSJoerg Roedel 		if (atsr->segment != pci_domain_nr(dev->bus))
3466672cf6dfSJoerg Roedel 			continue;
3467672cf6dfSJoerg Roedel 
3468672cf6dfSJoerg Roedel 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3469672cf6dfSJoerg Roedel 			if (tmp == &bridge->dev)
3470672cf6dfSJoerg Roedel 				goto out;
3471672cf6dfSJoerg Roedel 
3472672cf6dfSJoerg Roedel 		if (atsru->include_all)
3473672cf6dfSJoerg Roedel 			goto out;
3474672cf6dfSJoerg Roedel 	}
3475672cf6dfSJoerg Roedel 	ret = 0;
3476672cf6dfSJoerg Roedel out:
3477672cf6dfSJoerg Roedel 	rcu_read_unlock();
3478672cf6dfSJoerg Roedel 
3479672cf6dfSJoerg Roedel 	return ret;
3480672cf6dfSJoerg Roedel }
3481672cf6dfSJoerg Roedel 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)3482672cf6dfSJoerg Roedel int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3483672cf6dfSJoerg Roedel {
3484672cf6dfSJoerg Roedel 	int ret;
3485672cf6dfSJoerg Roedel 	struct dmar_rmrr_unit *rmrru;
3486672cf6dfSJoerg Roedel 	struct dmar_atsr_unit *atsru;
348731a75cbbSYian Chen 	struct dmar_satc_unit *satcu;
3488672cf6dfSJoerg Roedel 	struct acpi_dmar_atsr *atsr;
3489672cf6dfSJoerg Roedel 	struct acpi_dmar_reserved_memory *rmrr;
349031a75cbbSYian Chen 	struct acpi_dmar_satc *satc;
3491672cf6dfSJoerg Roedel 
3492672cf6dfSJoerg Roedel 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
3493672cf6dfSJoerg Roedel 		return 0;
3494672cf6dfSJoerg Roedel 
3495672cf6dfSJoerg Roedel 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3496672cf6dfSJoerg Roedel 		rmrr = container_of(rmrru->hdr,
3497672cf6dfSJoerg Roedel 				    struct acpi_dmar_reserved_memory, header);
3498672cf6dfSJoerg Roedel 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3499672cf6dfSJoerg Roedel 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3500672cf6dfSJoerg Roedel 				((void *)rmrr) + rmrr->header.length,
3501672cf6dfSJoerg Roedel 				rmrr->segment, rmrru->devices,
3502672cf6dfSJoerg Roedel 				rmrru->devices_cnt);
3503672cf6dfSJoerg Roedel 			if (ret < 0)
3504672cf6dfSJoerg Roedel 				return ret;
3505672cf6dfSJoerg Roedel 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3506672cf6dfSJoerg Roedel 			dmar_remove_dev_scope(info, rmrr->segment,
3507672cf6dfSJoerg Roedel 				rmrru->devices, rmrru->devices_cnt);
3508672cf6dfSJoerg Roedel 		}
3509672cf6dfSJoerg Roedel 	}
3510672cf6dfSJoerg Roedel 
3511672cf6dfSJoerg Roedel 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
3512672cf6dfSJoerg Roedel 		if (atsru->include_all)
3513672cf6dfSJoerg Roedel 			continue;
3514672cf6dfSJoerg Roedel 
3515672cf6dfSJoerg Roedel 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3516672cf6dfSJoerg Roedel 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3517672cf6dfSJoerg Roedel 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3518672cf6dfSJoerg Roedel 					(void *)atsr + atsr->header.length,
3519672cf6dfSJoerg Roedel 					atsr->segment, atsru->devices,
3520672cf6dfSJoerg Roedel 					atsru->devices_cnt);
3521672cf6dfSJoerg Roedel 			if (ret > 0)
3522672cf6dfSJoerg Roedel 				break;
3523672cf6dfSJoerg Roedel 			else if (ret < 0)
3524672cf6dfSJoerg Roedel 				return ret;
3525672cf6dfSJoerg Roedel 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3526672cf6dfSJoerg Roedel 			if (dmar_remove_dev_scope(info, atsr->segment,
3527672cf6dfSJoerg Roedel 					atsru->devices, atsru->devices_cnt))
3528672cf6dfSJoerg Roedel 				break;
3529672cf6dfSJoerg Roedel 		}
3530672cf6dfSJoerg Roedel 	}
353131a75cbbSYian Chen 	list_for_each_entry(satcu, &dmar_satc_units, list) {
353231a75cbbSYian Chen 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
353331a75cbbSYian Chen 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
353431a75cbbSYian Chen 			ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
353531a75cbbSYian Chen 					(void *)satc + satc->header.length,
353631a75cbbSYian Chen 					satc->segment, satcu->devices,
353731a75cbbSYian Chen 					satcu->devices_cnt);
353831a75cbbSYian Chen 			if (ret > 0)
353931a75cbbSYian Chen 				break;
354031a75cbbSYian Chen 			else if (ret < 0)
354131a75cbbSYian Chen 				return ret;
354231a75cbbSYian Chen 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
354331a75cbbSYian Chen 			if (dmar_remove_dev_scope(info, satc->segment,
354431a75cbbSYian Chen 					satcu->devices, satcu->devices_cnt))
354531a75cbbSYian Chen 				break;
354631a75cbbSYian Chen 		}
354731a75cbbSYian Chen 	}
3548672cf6dfSJoerg Roedel 
3549672cf6dfSJoerg Roedel 	return 0;
3550672cf6dfSJoerg Roedel }
3551672cf6dfSJoerg Roedel 
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)3552672cf6dfSJoerg Roedel static int intel_iommu_memory_notifier(struct notifier_block *nb,
3553672cf6dfSJoerg Roedel 				       unsigned long val, void *v)
3554672cf6dfSJoerg Roedel {
3555672cf6dfSJoerg Roedel 	struct memory_notify *mhp = v;
3556fb5f50a4SYanfei Xu 	unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn);
3557fb5f50a4SYanfei Xu 	unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn +
3558672cf6dfSJoerg Roedel 			mhp->nr_pages - 1);
3559672cf6dfSJoerg Roedel 
3560672cf6dfSJoerg Roedel 	switch (val) {
3561672cf6dfSJoerg Roedel 	case MEM_GOING_ONLINE:
3562672cf6dfSJoerg Roedel 		if (iommu_domain_identity_map(si_domain,
3563672cf6dfSJoerg Roedel 					      start_vpfn, last_vpfn)) {
3564672cf6dfSJoerg Roedel 			pr_warn("Failed to build identity map for [%lx-%lx]\n",
3565672cf6dfSJoerg Roedel 				start_vpfn, last_vpfn);
3566672cf6dfSJoerg Roedel 			return NOTIFY_BAD;
3567672cf6dfSJoerg Roedel 		}
3568672cf6dfSJoerg Roedel 		break;
3569672cf6dfSJoerg Roedel 
3570672cf6dfSJoerg Roedel 	case MEM_OFFLINE:
3571672cf6dfSJoerg Roedel 	case MEM_CANCEL_ONLINE:
3572672cf6dfSJoerg Roedel 		{
3573672cf6dfSJoerg Roedel 			struct dmar_drhd_unit *drhd;
3574672cf6dfSJoerg Roedel 			struct intel_iommu *iommu;
357587f60cc6SMatthew Wilcox (Oracle) 			LIST_HEAD(freelist);
3576672cf6dfSJoerg Roedel 
357787f60cc6SMatthew Wilcox (Oracle) 			domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
3578672cf6dfSJoerg Roedel 
3579672cf6dfSJoerg Roedel 			rcu_read_lock();
3580672cf6dfSJoerg Roedel 			for_each_active_iommu(iommu, drhd)
3581672cf6dfSJoerg Roedel 				iommu_flush_iotlb_psi(iommu, si_domain,
3582672cf6dfSJoerg Roedel 					start_vpfn, mhp->nr_pages,
358387f60cc6SMatthew Wilcox (Oracle) 					list_empty(&freelist), 0);
3584672cf6dfSJoerg Roedel 			rcu_read_unlock();
358587f60cc6SMatthew Wilcox (Oracle) 			put_pages_list(&freelist);
3586672cf6dfSJoerg Roedel 		}
3587672cf6dfSJoerg Roedel 		break;
3588672cf6dfSJoerg Roedel 	}
3589672cf6dfSJoerg Roedel 
3590672cf6dfSJoerg Roedel 	return NOTIFY_OK;
3591672cf6dfSJoerg Roedel }
3592672cf6dfSJoerg Roedel 
3593672cf6dfSJoerg Roedel static struct notifier_block intel_iommu_memory_nb = {
3594672cf6dfSJoerg Roedel 	.notifier_call = intel_iommu_memory_notifier,
3595672cf6dfSJoerg Roedel 	.priority = 0
3596672cf6dfSJoerg Roedel };
3597672cf6dfSJoerg Roedel 
intel_disable_iommus(void)3598672cf6dfSJoerg Roedel static void intel_disable_iommus(void)
3599672cf6dfSJoerg Roedel {
3600672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = NULL;
3601672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3602672cf6dfSJoerg Roedel 
3603672cf6dfSJoerg Roedel 	for_each_iommu(iommu, drhd)
3604672cf6dfSJoerg Roedel 		iommu_disable_translation(iommu);
3605672cf6dfSJoerg Roedel }
3606672cf6dfSJoerg Roedel 
intel_iommu_shutdown(void)3607672cf6dfSJoerg Roedel void intel_iommu_shutdown(void)
3608672cf6dfSJoerg Roedel {
3609672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3610672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = NULL;
3611672cf6dfSJoerg Roedel 
3612672cf6dfSJoerg Roedel 	if (no_iommu || dmar_disabled)
3613672cf6dfSJoerg Roedel 		return;
3614672cf6dfSJoerg Roedel 
3615672cf6dfSJoerg Roedel 	down_write(&dmar_global_lock);
3616672cf6dfSJoerg Roedel 
3617672cf6dfSJoerg Roedel 	/* Disable PMRs explicitly here. */
3618672cf6dfSJoerg Roedel 	for_each_iommu(iommu, drhd)
3619672cf6dfSJoerg Roedel 		iommu_disable_protect_mem_regions(iommu);
3620672cf6dfSJoerg Roedel 
3621672cf6dfSJoerg Roedel 	/* Make sure the IOMMUs are switched off */
3622672cf6dfSJoerg Roedel 	intel_disable_iommus();
3623672cf6dfSJoerg Roedel 
3624672cf6dfSJoerg Roedel 	up_write(&dmar_global_lock);
3625672cf6dfSJoerg Roedel }
3626672cf6dfSJoerg Roedel 
dev_to_intel_iommu(struct device * dev)3627672cf6dfSJoerg Roedel static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
3628672cf6dfSJoerg Roedel {
3629672cf6dfSJoerg Roedel 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
3630672cf6dfSJoerg Roedel 
3631672cf6dfSJoerg Roedel 	return container_of(iommu_dev, struct intel_iommu, iommu);
3632672cf6dfSJoerg Roedel }
3633672cf6dfSJoerg Roedel 
version_show(struct device * dev,struct device_attribute * attr,char * buf)36343bc770b0SYueHaibing static ssize_t version_show(struct device *dev,
36353bc770b0SYueHaibing 			    struct device_attribute *attr, char *buf)
3636672cf6dfSJoerg Roedel {
3637672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3638672cf6dfSJoerg Roedel 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
3639c33fcc13SLu Baolu 	return sysfs_emit(buf, "%d:%d\n",
3640672cf6dfSJoerg Roedel 			  DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3641672cf6dfSJoerg Roedel }
36423bc770b0SYueHaibing static DEVICE_ATTR_RO(version);
3643672cf6dfSJoerg Roedel 
address_show(struct device * dev,struct device_attribute * attr,char * buf)36443bc770b0SYueHaibing static ssize_t address_show(struct device *dev,
36453bc770b0SYueHaibing 			    struct device_attribute *attr, char *buf)
3646672cf6dfSJoerg Roedel {
3647672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3648c33fcc13SLu Baolu 	return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
3649672cf6dfSJoerg Roedel }
36503bc770b0SYueHaibing static DEVICE_ATTR_RO(address);
3651672cf6dfSJoerg Roedel 
cap_show(struct device * dev,struct device_attribute * attr,char * buf)36523bc770b0SYueHaibing static ssize_t cap_show(struct device *dev,
36533bc770b0SYueHaibing 			struct device_attribute *attr, char *buf)
3654672cf6dfSJoerg Roedel {
3655672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3656c33fcc13SLu Baolu 	return sysfs_emit(buf, "%llx\n", iommu->cap);
3657672cf6dfSJoerg Roedel }
36583bc770b0SYueHaibing static DEVICE_ATTR_RO(cap);
3659672cf6dfSJoerg Roedel 
ecap_show(struct device * dev,struct device_attribute * attr,char * buf)36603bc770b0SYueHaibing static ssize_t ecap_show(struct device *dev,
36613bc770b0SYueHaibing 			 struct device_attribute *attr, char *buf)
3662672cf6dfSJoerg Roedel {
3663672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3664c33fcc13SLu Baolu 	return sysfs_emit(buf, "%llx\n", iommu->ecap);
3665672cf6dfSJoerg Roedel }
36663bc770b0SYueHaibing static DEVICE_ATTR_RO(ecap);
3667672cf6dfSJoerg Roedel 
domains_supported_show(struct device * dev,struct device_attribute * attr,char * buf)36683bc770b0SYueHaibing static ssize_t domains_supported_show(struct device *dev,
36693bc770b0SYueHaibing 				      struct device_attribute *attr, char *buf)
3670672cf6dfSJoerg Roedel {
3671672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3672c33fcc13SLu Baolu 	return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
3673672cf6dfSJoerg Roedel }
36743bc770b0SYueHaibing static DEVICE_ATTR_RO(domains_supported);
3675672cf6dfSJoerg Roedel 
domains_used_show(struct device * dev,struct device_attribute * attr,char * buf)36763bc770b0SYueHaibing static ssize_t domains_used_show(struct device *dev,
36773bc770b0SYueHaibing 				 struct device_attribute *attr, char *buf)
3678672cf6dfSJoerg Roedel {
3679672cf6dfSJoerg Roedel 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3680c33fcc13SLu Baolu 	return sysfs_emit(buf, "%d\n",
3681c33fcc13SLu Baolu 			  bitmap_weight(iommu->domain_ids,
3682672cf6dfSJoerg Roedel 					cap_ndoms(iommu->cap)));
3683672cf6dfSJoerg Roedel }
36843bc770b0SYueHaibing static DEVICE_ATTR_RO(domains_used);
3685672cf6dfSJoerg Roedel 
3686672cf6dfSJoerg Roedel static struct attribute *intel_iommu_attrs[] = {
3687672cf6dfSJoerg Roedel 	&dev_attr_version.attr,
3688672cf6dfSJoerg Roedel 	&dev_attr_address.attr,
3689672cf6dfSJoerg Roedel 	&dev_attr_cap.attr,
3690672cf6dfSJoerg Roedel 	&dev_attr_ecap.attr,
3691672cf6dfSJoerg Roedel 	&dev_attr_domains_supported.attr,
3692672cf6dfSJoerg Roedel 	&dev_attr_domains_used.attr,
3693672cf6dfSJoerg Roedel 	NULL,
3694672cf6dfSJoerg Roedel };
3695672cf6dfSJoerg Roedel 
3696672cf6dfSJoerg Roedel static struct attribute_group intel_iommu_group = {
3697672cf6dfSJoerg Roedel 	.name = "intel-iommu",
3698672cf6dfSJoerg Roedel 	.attrs = intel_iommu_attrs,
3699672cf6dfSJoerg Roedel };
3700672cf6dfSJoerg Roedel 
3701672cf6dfSJoerg Roedel const struct attribute_group *intel_iommu_groups[] = {
3702672cf6dfSJoerg Roedel 	&intel_iommu_group,
3703672cf6dfSJoerg Roedel 	NULL,
3704672cf6dfSJoerg Roedel };
3705672cf6dfSJoerg Roedel 
has_external_pci(void)370699b50be9SRajat Jain static inline bool has_external_pci(void)
3707672cf6dfSJoerg Roedel {
3708672cf6dfSJoerg Roedel 	struct pci_dev *pdev = NULL;
3709672cf6dfSJoerg Roedel 
3710672cf6dfSJoerg Roedel 	for_each_pci_dev(pdev)
3711afca9e19SXiongfeng Wang 		if (pdev->external_facing) {
3712afca9e19SXiongfeng Wang 			pci_dev_put(pdev);
3713672cf6dfSJoerg Roedel 			return true;
3714afca9e19SXiongfeng Wang 		}
3715672cf6dfSJoerg Roedel 
3716672cf6dfSJoerg Roedel 	return false;
3717672cf6dfSJoerg Roedel }
3718672cf6dfSJoerg Roedel 
platform_optin_force_iommu(void)3719672cf6dfSJoerg Roedel static int __init platform_optin_force_iommu(void)
3720672cf6dfSJoerg Roedel {
372199b50be9SRajat Jain 	if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
3722672cf6dfSJoerg Roedel 		return 0;
3723672cf6dfSJoerg Roedel 
3724672cf6dfSJoerg Roedel 	if (no_iommu || dmar_disabled)
3725672cf6dfSJoerg Roedel 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
3726672cf6dfSJoerg Roedel 
3727672cf6dfSJoerg Roedel 	/*
3728672cf6dfSJoerg Roedel 	 * If Intel-IOMMU is disabled by default, we will apply identity
3729672cf6dfSJoerg Roedel 	 * map for all devices except those marked as being untrusted.
3730672cf6dfSJoerg Roedel 	 */
3731672cf6dfSJoerg Roedel 	if (dmar_disabled)
3732672cf6dfSJoerg Roedel 		iommu_set_default_passthrough(false);
3733672cf6dfSJoerg Roedel 
3734672cf6dfSJoerg Roedel 	dmar_disabled = 0;
3735672cf6dfSJoerg Roedel 	no_iommu = 0;
3736672cf6dfSJoerg Roedel 
3737672cf6dfSJoerg Roedel 	return 1;
3738672cf6dfSJoerg Roedel }
3739672cf6dfSJoerg Roedel 
probe_acpi_namespace_devices(void)3740672cf6dfSJoerg Roedel static int __init probe_acpi_namespace_devices(void)
3741672cf6dfSJoerg Roedel {
3742672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3743672cf6dfSJoerg Roedel 	/* To avoid a -Wunused-but-set-variable warning. */
3744672cf6dfSJoerg Roedel 	struct intel_iommu *iommu __maybe_unused;
3745672cf6dfSJoerg Roedel 	struct device *dev;
3746672cf6dfSJoerg Roedel 	int i, ret = 0;
3747672cf6dfSJoerg Roedel 
3748672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
3749672cf6dfSJoerg Roedel 		for_each_active_dev_scope(drhd->devices,
3750672cf6dfSJoerg Roedel 					  drhd->devices_cnt, i, dev) {
3751672cf6dfSJoerg Roedel 			struct acpi_device_physical_node *pn;
3752672cf6dfSJoerg Roedel 			struct acpi_device *adev;
3753672cf6dfSJoerg Roedel 
3754672cf6dfSJoerg Roedel 			if (dev->bus != &acpi_bus_type)
3755672cf6dfSJoerg Roedel 				continue;
3756672cf6dfSJoerg Roedel 
3757672cf6dfSJoerg Roedel 			adev = to_acpi_device(dev);
3758672cf6dfSJoerg Roedel 			mutex_lock(&adev->physical_node_lock);
3759672cf6dfSJoerg Roedel 			list_for_each_entry(pn,
3760672cf6dfSJoerg Roedel 					    &adev->physical_node_list, node) {
3761672cf6dfSJoerg Roedel 				ret = iommu_probe_device(pn->dev);
3762672cf6dfSJoerg Roedel 				if (ret)
3763672cf6dfSJoerg Roedel 					break;
3764672cf6dfSJoerg Roedel 			}
3765672cf6dfSJoerg Roedel 			mutex_unlock(&adev->physical_node_lock);
3766672cf6dfSJoerg Roedel 
3767672cf6dfSJoerg Roedel 			if (ret)
3768672cf6dfSJoerg Roedel 				return ret;
3769672cf6dfSJoerg Roedel 		}
3770672cf6dfSJoerg Roedel 	}
3771672cf6dfSJoerg Roedel 
3772672cf6dfSJoerg Roedel 	return 0;
3773672cf6dfSJoerg Roedel }
3774672cf6dfSJoerg Roedel 
tboot_force_iommu(void)3775853788b9SLu Baolu static __init int tboot_force_iommu(void)
3776853788b9SLu Baolu {
3777853788b9SLu Baolu 	if (!tboot_enabled())
3778853788b9SLu Baolu 		return 0;
3779853788b9SLu Baolu 
3780853788b9SLu Baolu 	if (no_iommu || dmar_disabled)
3781853788b9SLu Baolu 		pr_warn("Forcing Intel-IOMMU to enabled\n");
3782853788b9SLu Baolu 
3783853788b9SLu Baolu 	dmar_disabled = 0;
3784853788b9SLu Baolu 	no_iommu = 0;
3785853788b9SLu Baolu 
3786853788b9SLu Baolu 	return 1;
3787853788b9SLu Baolu }
3788853788b9SLu Baolu 
intel_iommu_init(void)3789672cf6dfSJoerg Roedel int __init intel_iommu_init(void)
3790672cf6dfSJoerg Roedel {
3791672cf6dfSJoerg Roedel 	int ret = -ENODEV;
3792672cf6dfSJoerg Roedel 	struct dmar_drhd_unit *drhd;
3793672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
3794672cf6dfSJoerg Roedel 
3795672cf6dfSJoerg Roedel 	/*
3796672cf6dfSJoerg Roedel 	 * Intel IOMMU is required for a TXT/tboot launch or platform
3797672cf6dfSJoerg Roedel 	 * opt in, so enforce that.
3798672cf6dfSJoerg Roedel 	 */
37994d213e76SZhenzhong Duan 	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
38004d213e76SZhenzhong Duan 		    platform_optin_force_iommu();
3801672cf6dfSJoerg Roedel 
3802672cf6dfSJoerg Roedel 	down_write(&dmar_global_lock);
3803672cf6dfSJoerg Roedel 	if (dmar_table_init()) {
3804672cf6dfSJoerg Roedel 		if (force_on)
3805672cf6dfSJoerg Roedel 			panic("tboot: Failed to initialize DMAR table\n");
3806672cf6dfSJoerg Roedel 		goto out_free_dmar;
3807672cf6dfSJoerg Roedel 	}
3808672cf6dfSJoerg Roedel 
3809672cf6dfSJoerg Roedel 	if (dmar_dev_scope_init() < 0) {
3810672cf6dfSJoerg Roedel 		if (force_on)
3811672cf6dfSJoerg Roedel 			panic("tboot: Failed to initialize DMAR device scope\n");
3812672cf6dfSJoerg Roedel 		goto out_free_dmar;
3813672cf6dfSJoerg Roedel 	}
3814672cf6dfSJoerg Roedel 
3815672cf6dfSJoerg Roedel 	up_write(&dmar_global_lock);
3816672cf6dfSJoerg Roedel 
3817672cf6dfSJoerg Roedel 	/*
3818672cf6dfSJoerg Roedel 	 * The bus notifier takes the dmar_global_lock, so lockdep will
3819672cf6dfSJoerg Roedel 	 * complain later when we register it under the lock.
3820672cf6dfSJoerg Roedel 	 */
3821672cf6dfSJoerg Roedel 	dmar_register_bus_notifier();
3822672cf6dfSJoerg Roedel 
3823672cf6dfSJoerg Roedel 	down_write(&dmar_global_lock);
3824672cf6dfSJoerg Roedel 
3825672cf6dfSJoerg Roedel 	if (!no_iommu)
3826672cf6dfSJoerg Roedel 		intel_iommu_debugfs_init();
3827672cf6dfSJoerg Roedel 
3828672cf6dfSJoerg Roedel 	if (no_iommu || dmar_disabled) {
3829672cf6dfSJoerg Roedel 		/*
3830672cf6dfSJoerg Roedel 		 * We exit the function here to ensure IOMMU's remapping and
3831672cf6dfSJoerg Roedel 		 * mempool aren't setup, which means that the IOMMU's PMRs
3832672cf6dfSJoerg Roedel 		 * won't be disabled via the call to init_dmars(). So disable
3833672cf6dfSJoerg Roedel 		 * it explicitly here. The PMRs were setup by tboot prior to
3834672cf6dfSJoerg Roedel 		 * calling SENTER, but the kernel is expected to reset/tear
3835672cf6dfSJoerg Roedel 		 * down the PMRs.
3836672cf6dfSJoerg Roedel 		 */
3837672cf6dfSJoerg Roedel 		if (intel_iommu_tboot_noforce) {
3838672cf6dfSJoerg Roedel 			for_each_iommu(iommu, drhd)
3839672cf6dfSJoerg Roedel 				iommu_disable_protect_mem_regions(iommu);
3840672cf6dfSJoerg Roedel 		}
3841672cf6dfSJoerg Roedel 
3842672cf6dfSJoerg Roedel 		/*
3843672cf6dfSJoerg Roedel 		 * Make sure the IOMMUs are switched off, even when we
3844672cf6dfSJoerg Roedel 		 * boot into a kexec kernel and the previous kernel left
3845672cf6dfSJoerg Roedel 		 * them enabled
3846672cf6dfSJoerg Roedel 		 */
3847672cf6dfSJoerg Roedel 		intel_disable_iommus();
3848672cf6dfSJoerg Roedel 		goto out_free_dmar;
3849672cf6dfSJoerg Roedel 	}
3850672cf6dfSJoerg Roedel 
3851672cf6dfSJoerg Roedel 	if (list_empty(&dmar_rmrr_units))
3852672cf6dfSJoerg Roedel 		pr_info("No RMRR found\n");
3853672cf6dfSJoerg Roedel 
3854672cf6dfSJoerg Roedel 	if (list_empty(&dmar_atsr_units))
3855672cf6dfSJoerg Roedel 		pr_info("No ATSR found\n");
3856672cf6dfSJoerg Roedel 
385731a75cbbSYian Chen 	if (list_empty(&dmar_satc_units))
385831a75cbbSYian Chen 		pr_info("No SATC found\n");
385931a75cbbSYian Chen 
3860672cf6dfSJoerg Roedel 	init_no_remapping_devices();
3861672cf6dfSJoerg Roedel 
3862672cf6dfSJoerg Roedel 	ret = init_dmars();
3863672cf6dfSJoerg Roedel 	if (ret) {
3864672cf6dfSJoerg Roedel 		if (force_on)
3865672cf6dfSJoerg Roedel 			panic("tboot: Failed to initialize DMARs\n");
3866672cf6dfSJoerg Roedel 		pr_err("Initialization failed\n");
3867c588072bSTom Murphy 		goto out_free_dmar;
3868672cf6dfSJoerg Roedel 	}
3869672cf6dfSJoerg Roedel 	up_write(&dmar_global_lock);
3870672cf6dfSJoerg Roedel 
3871672cf6dfSJoerg Roedel 	init_iommu_pm_ops();
3872672cf6dfSJoerg Roedel 
3873672cf6dfSJoerg Roedel 	down_read(&dmar_global_lock);
3874672cf6dfSJoerg Roedel 	for_each_active_iommu(iommu, drhd) {
3875a250c23fSRobin Murphy 		/*
3876a250c23fSRobin Murphy 		 * The flush queue implementation does not perform
3877a250c23fSRobin Murphy 		 * page-selective invalidations that are required for efficient
3878a250c23fSRobin Murphy 		 * TLB flushes in virtual environments.  The benefit of batching
3879a250c23fSRobin Murphy 		 * is likely to be much lower than the overhead of synchronizing
3880a250c23fSRobin Murphy 		 * the virtual and physical IOMMU page-tables.
3881a250c23fSRobin Murphy 		 */
3882257ec290STina Zhang 		if (cap_caching_mode(iommu->cap) &&
3883257ec290STina Zhang 		    !first_level_by_default(IOMMU_DOMAIN_DMA)) {
3884d0e108b8SZhen Lei 			pr_info_once("IOMMU batching disallowed due to virtualization\n");
3885308723e3SJohn Garry 			iommu_set_dma_strict();
3886a250c23fSRobin Murphy 		}
3887672cf6dfSJoerg Roedel 		iommu_device_sysfs_add(&iommu->iommu, NULL,
3888672cf6dfSJoerg Roedel 				       intel_iommu_groups,
3889672cf6dfSJoerg Roedel 				       "%s", iommu->name);
38902d471b20SRobin Murphy 		iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
3891d8a7c0cfSKan Liang 
3892d8a7c0cfSKan Liang 		iommu_pmu_register(iommu);
3893672cf6dfSJoerg Roedel 	}
3894672cf6dfSJoerg Roedel 	up_read(&dmar_global_lock);
3895672cf6dfSJoerg Roedel 
3896672cf6dfSJoerg Roedel 	if (si_domain && !hw_pass_through)
3897672cf6dfSJoerg Roedel 		register_memory_notifier(&intel_iommu_memory_nb);
3898672cf6dfSJoerg Roedel 
3899672cf6dfSJoerg Roedel 	down_read(&dmar_global_lock);
3900672cf6dfSJoerg Roedel 	if (probe_acpi_namespace_devices())
3901672cf6dfSJoerg Roedel 		pr_warn("ACPI name space devices didn't probe correctly\n");
3902672cf6dfSJoerg Roedel 
3903672cf6dfSJoerg Roedel 	/* Finally, we enable the DMA remapping hardware. */
3904672cf6dfSJoerg Roedel 	for_each_iommu(iommu, drhd) {
3905672cf6dfSJoerg Roedel 		if (!drhd->ignored && !translation_pre_enabled(iommu))
3906672cf6dfSJoerg Roedel 			iommu_enable_translation(iommu);
3907672cf6dfSJoerg Roedel 
3908672cf6dfSJoerg Roedel 		iommu_disable_protect_mem_regions(iommu);
3909672cf6dfSJoerg Roedel 	}
3910672cf6dfSJoerg Roedel 	up_read(&dmar_global_lock);
3911672cf6dfSJoerg Roedel 
3912672cf6dfSJoerg Roedel 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
3913672cf6dfSJoerg Roedel 
3914672cf6dfSJoerg Roedel 	intel_iommu_enabled = 1;
3915672cf6dfSJoerg Roedel 
3916672cf6dfSJoerg Roedel 	return 0;
3917672cf6dfSJoerg Roedel 
3918672cf6dfSJoerg Roedel out_free_dmar:
3919672cf6dfSJoerg Roedel 	intel_iommu_free_dmars();
3920672cf6dfSJoerg Roedel 	up_write(&dmar_global_lock);
3921672cf6dfSJoerg Roedel 	return ret;
3922672cf6dfSJoerg Roedel }
3923672cf6dfSJoerg Roedel 
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)3924672cf6dfSJoerg Roedel static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
3925672cf6dfSJoerg Roedel {
392637764b95SSanjay Kumar 	struct device_domain_info *info = opaque;
3927672cf6dfSJoerg Roedel 
392837764b95SSanjay Kumar 	domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
3929672cf6dfSJoerg Roedel 	return 0;
3930672cf6dfSJoerg Roedel }
3931672cf6dfSJoerg Roedel 
3932672cf6dfSJoerg Roedel /*
3933672cf6dfSJoerg Roedel  * NB - intel-iommu lacks any sort of reference counting for the users of
3934672cf6dfSJoerg Roedel  * dependent devices.  If multiple endpoints have intersecting dependent
3935672cf6dfSJoerg Roedel  * devices, unbinding the driver from any one of them will possibly leave
3936672cf6dfSJoerg Roedel  * the others unable to operate.
3937672cf6dfSJoerg Roedel  */
domain_context_clear(struct device_domain_info * info)393837764b95SSanjay Kumar static void domain_context_clear(struct device_domain_info *info)
3939672cf6dfSJoerg Roedel {
3940fe2e0b6cSLu Baolu 	if (!dev_is_pci(info->dev)) {
394148f2183aSLu Baolu 		domain_context_clear_one(info, info->bus, info->devfn);
3942fe2e0b6cSLu Baolu 		return;
3943fe2e0b6cSLu Baolu 	}
3944672cf6dfSJoerg Roedel 
394537764b95SSanjay Kumar 	pci_for_each_dma_alias(to_pci_dev(info->dev),
394637764b95SSanjay Kumar 			       &domain_context_clear_one_cb, info);
3947672cf6dfSJoerg Roedel }
3948672cf6dfSJoerg Roedel 
dmar_remove_one_dev_info(struct device * dev)3949db75c957SLu Baolu static void dmar_remove_one_dev_info(struct device *dev)
3950672cf6dfSJoerg Roedel {
3951db75c957SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
39525eaafdf0SLu Baolu 	struct dmar_domain *domain = info->domain;
3953db75c957SLu Baolu 	struct intel_iommu *iommu = info->iommu;
3954a349ffcbSLu Baolu 	unsigned long flags;
3955672cf6dfSJoerg Roedel 
3956db75c957SLu Baolu 	if (!dev_is_real_dma_subdevice(info->dev)) {
3957672cf6dfSJoerg Roedel 		if (dev_is_pci(info->dev) && sm_supported(iommu))
3958672cf6dfSJoerg Roedel 			intel_pasid_tear_down_entry(iommu, info->dev,
395942987801SJacob Pan 					IOMMU_NO_PASID, false);
3960672cf6dfSJoerg Roedel 
3961ba502132SLu Baolu 		iommu_disable_pci_caps(info);
396237764b95SSanjay Kumar 		domain_context_clear(info);
3963672cf6dfSJoerg Roedel 	}
3964672cf6dfSJoerg Roedel 
3965a349ffcbSLu Baolu 	spin_lock_irqsave(&domain->lock, flags);
3966586081d3SLu Baolu 	list_del(&info->link);
3967a349ffcbSLu Baolu 	spin_unlock_irqrestore(&domain->lock, flags);
3968672cf6dfSJoerg Roedel 
3969672cf6dfSJoerg Roedel 	domain_detach_iommu(domain, iommu);
3970db75c957SLu Baolu 	info->domain = NULL;
3971672cf6dfSJoerg Roedel }
3972672cf6dfSJoerg Roedel 
3973c7be17c2SLu Baolu /*
3974c7be17c2SLu Baolu  * Clear the page table pointer in context or pasid table entries so that
3975c7be17c2SLu Baolu  * all DMA requests without PASID from the device are blocked. If the page
3976c7be17c2SLu Baolu  * table has been set, clean up the data structures.
3977c7be17c2SLu Baolu  */
device_block_translation(struct device * dev)3978c7be17c2SLu Baolu static void device_block_translation(struct device *dev)
3979c7be17c2SLu Baolu {
3980c7be17c2SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3981c7be17c2SLu Baolu 	struct intel_iommu *iommu = info->iommu;
3982c7be17c2SLu Baolu 	unsigned long flags;
3983c7be17c2SLu Baolu 
3984ba502132SLu Baolu 	iommu_disable_pci_caps(info);
3985c7be17c2SLu Baolu 	if (!dev_is_real_dma_subdevice(dev)) {
3986c7be17c2SLu Baolu 		if (sm_supported(iommu))
3987c7be17c2SLu Baolu 			intel_pasid_tear_down_entry(iommu, dev,
398842987801SJacob Pan 						    IOMMU_NO_PASID, false);
3989c7be17c2SLu Baolu 		else
3990c7be17c2SLu Baolu 			domain_context_clear(info);
3991c7be17c2SLu Baolu 	}
3992c7be17c2SLu Baolu 
3993c7be17c2SLu Baolu 	if (!info->domain)
3994c7be17c2SLu Baolu 		return;
3995c7be17c2SLu Baolu 
3996c7be17c2SLu Baolu 	spin_lock_irqsave(&info->domain->lock, flags);
3997c7be17c2SLu Baolu 	list_del(&info->link);
3998c7be17c2SLu Baolu 	spin_unlock_irqrestore(&info->domain->lock, flags);
3999c7be17c2SLu Baolu 
4000c7be17c2SLu Baolu 	domain_detach_iommu(info->domain, iommu);
4001c7be17c2SLu Baolu 	info->domain = NULL;
4002c7be17c2SLu Baolu }
4003c7be17c2SLu Baolu 
md_domain_init(struct dmar_domain * domain,int guest_width)4004672cf6dfSJoerg Roedel static int md_domain_init(struct dmar_domain *domain, int guest_width)
4005672cf6dfSJoerg Roedel {
4006672cf6dfSJoerg Roedel 	int adjust_width;
4007672cf6dfSJoerg Roedel 
4008672cf6dfSJoerg Roedel 	/* calculate AGAW */
4009672cf6dfSJoerg Roedel 	domain->gaw = guest_width;
4010672cf6dfSJoerg Roedel 	adjust_width = guestwidth_to_adjustwidth(guest_width);
4011672cf6dfSJoerg Roedel 	domain->agaw = width_to_agaw(adjust_width);
4012672cf6dfSJoerg Roedel 
40131f106ff0SParav Pandit 	domain->iommu_coherency = false;
4014672cf6dfSJoerg Roedel 	domain->iommu_superpage = 0;
4015672cf6dfSJoerg Roedel 	domain->max_addr = 0;
4016672cf6dfSJoerg Roedel 
4017672cf6dfSJoerg Roedel 	/* always allocate the top pgd */
40182552d3a2SJason Gunthorpe 	domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
4019672cf6dfSJoerg Roedel 	if (!domain->pgd)
4020672cf6dfSJoerg Roedel 		return -ENOMEM;
4021672cf6dfSJoerg Roedel 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4022672cf6dfSJoerg Roedel 	return 0;
4023672cf6dfSJoerg Roedel }
4024672cf6dfSJoerg Roedel 
blocking_domain_attach_dev(struct iommu_domain * domain,struct device * dev)402535a99c54SLu Baolu static int blocking_domain_attach_dev(struct iommu_domain *domain,
402635a99c54SLu Baolu 				      struct device *dev)
402735a99c54SLu Baolu {
402835a99c54SLu Baolu 	device_block_translation(dev);
402935a99c54SLu Baolu 	return 0;
403035a99c54SLu Baolu }
403135a99c54SLu Baolu 
403235a99c54SLu Baolu static struct iommu_domain blocking_domain = {
403335a99c54SLu Baolu 	.ops = &(const struct iommu_domain_ops) {
403435a99c54SLu Baolu 		.attach_dev	= blocking_domain_attach_dev,
403535a99c54SLu Baolu 		.free		= intel_iommu_domain_free
403635a99c54SLu Baolu 	}
403735a99c54SLu Baolu };
403835a99c54SLu Baolu 
intel_iommu_domain_alloc(unsigned type)4039672cf6dfSJoerg Roedel static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4040672cf6dfSJoerg Roedel {
4041672cf6dfSJoerg Roedel 	struct dmar_domain *dmar_domain;
4042672cf6dfSJoerg Roedel 	struct iommu_domain *domain;
4043672cf6dfSJoerg Roedel 
4044672cf6dfSJoerg Roedel 	switch (type) {
404535a99c54SLu Baolu 	case IOMMU_DOMAIN_BLOCKED:
404635a99c54SLu Baolu 		return &blocking_domain;
4047672cf6dfSJoerg Roedel 	case IOMMU_DOMAIN_DMA:
4048672cf6dfSJoerg Roedel 	case IOMMU_DOMAIN_UNMANAGED:
4049b34380a6SLu Baolu 		dmar_domain = alloc_domain(type);
4050672cf6dfSJoerg Roedel 		if (!dmar_domain) {
4051672cf6dfSJoerg Roedel 			pr_err("Can't allocate dmar_domain\n");
4052672cf6dfSJoerg Roedel 			return NULL;
4053672cf6dfSJoerg Roedel 		}
4054672cf6dfSJoerg Roedel 		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4055672cf6dfSJoerg Roedel 			pr_err("Domain initialization failed\n");
4056672cf6dfSJoerg Roedel 			domain_exit(dmar_domain);
4057672cf6dfSJoerg Roedel 			return NULL;
4058672cf6dfSJoerg Roedel 		}
4059672cf6dfSJoerg Roedel 
4060672cf6dfSJoerg Roedel 		domain = &dmar_domain->domain;
4061672cf6dfSJoerg Roedel 		domain->geometry.aperture_start = 0;
4062672cf6dfSJoerg Roedel 		domain->geometry.aperture_end   =
4063672cf6dfSJoerg Roedel 				__DOMAIN_MAX_ADDR(dmar_domain->gaw);
4064672cf6dfSJoerg Roedel 		domain->geometry.force_aperture = true;
4065672cf6dfSJoerg Roedel 
4066672cf6dfSJoerg Roedel 		return domain;
4067672cf6dfSJoerg Roedel 	case IOMMU_DOMAIN_IDENTITY:
4068672cf6dfSJoerg Roedel 		return &si_domain->domain;
4069eaca8889SLu Baolu 	case IOMMU_DOMAIN_SVA:
4070eaca8889SLu Baolu 		return intel_svm_domain_alloc();
4071672cf6dfSJoerg Roedel 	default:
4072672cf6dfSJoerg Roedel 		return NULL;
4073672cf6dfSJoerg Roedel 	}
4074672cf6dfSJoerg Roedel 
4075672cf6dfSJoerg Roedel 	return NULL;
4076672cf6dfSJoerg Roedel }
4077672cf6dfSJoerg Roedel 
intel_iommu_domain_free(struct iommu_domain * domain)4078672cf6dfSJoerg Roedel static void intel_iommu_domain_free(struct iommu_domain *domain)
4079672cf6dfSJoerg Roedel {
408035a99c54SLu Baolu 	if (domain != &si_domain->domain && domain != &blocking_domain)
4081672cf6dfSJoerg Roedel 		domain_exit(to_dmar_domain(domain));
4082672cf6dfSJoerg Roedel }
4083672cf6dfSJoerg Roedel 
prepare_domain_attach_device(struct iommu_domain * domain,struct device * dev)4084672cf6dfSJoerg Roedel static int prepare_domain_attach_device(struct iommu_domain *domain,
4085672cf6dfSJoerg Roedel 					struct device *dev)
4086672cf6dfSJoerg Roedel {
4087672cf6dfSJoerg Roedel 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4088672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
4089672cf6dfSJoerg Roedel 	int addr_width;
4090672cf6dfSJoerg Roedel 
4091dd6692f1SLu Baolu 	iommu = device_to_iommu(dev, NULL, NULL);
4092672cf6dfSJoerg Roedel 	if (!iommu)
4093672cf6dfSJoerg Roedel 		return -ENODEV;
4094672cf6dfSJoerg Roedel 
40959d6ab26aSLu Baolu 	if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
4096f4a14773SNicolin Chen 		return -EINVAL;
40979d6ab26aSLu Baolu 
4098672cf6dfSJoerg Roedel 	/* check if this iommu agaw is sufficient for max mapped address */
4099672cf6dfSJoerg Roedel 	addr_width = agaw_to_width(iommu->agaw);
4100672cf6dfSJoerg Roedel 	if (addr_width > cap_mgaw(iommu->cap))
4101672cf6dfSJoerg Roedel 		addr_width = cap_mgaw(iommu->cap);
4102672cf6dfSJoerg Roedel 
4103f4a14773SNicolin Chen 	if (dmar_domain->max_addr > (1LL << addr_width))
4104f4a14773SNicolin Chen 		return -EINVAL;
4105672cf6dfSJoerg Roedel 	dmar_domain->gaw = addr_width;
4106672cf6dfSJoerg Roedel 
4107672cf6dfSJoerg Roedel 	/*
4108672cf6dfSJoerg Roedel 	 * Knock out extra levels of page tables if necessary
4109672cf6dfSJoerg Roedel 	 */
4110672cf6dfSJoerg Roedel 	while (iommu->agaw < dmar_domain->agaw) {
4111672cf6dfSJoerg Roedel 		struct dma_pte *pte;
4112672cf6dfSJoerg Roedel 
4113672cf6dfSJoerg Roedel 		pte = dmar_domain->pgd;
4114672cf6dfSJoerg Roedel 		if (dma_pte_present(pte)) {
41157a0f06c1SParav Pandit 			dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4116672cf6dfSJoerg Roedel 			free_pgtable_page(pte);
4117672cf6dfSJoerg Roedel 		}
4118672cf6dfSJoerg Roedel 		dmar_domain->agaw--;
4119672cf6dfSJoerg Roedel 	}
4120672cf6dfSJoerg Roedel 
4121672cf6dfSJoerg Roedel 	return 0;
4122672cf6dfSJoerg Roedel }
4123672cf6dfSJoerg Roedel 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)4124672cf6dfSJoerg Roedel static int intel_iommu_attach_device(struct iommu_domain *domain,
4125672cf6dfSJoerg Roedel 				     struct device *dev)
4126672cf6dfSJoerg Roedel {
4127b1cf1563SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4128672cf6dfSJoerg Roedel 	int ret;
4129672cf6dfSJoerg Roedel 
4130586081d3SLu Baolu 	if (info->domain)
4131c7be17c2SLu Baolu 		device_block_translation(dev);
4132672cf6dfSJoerg Roedel 
4133672cf6dfSJoerg Roedel 	ret = prepare_domain_attach_device(domain, dev);
4134672cf6dfSJoerg Roedel 	if (ret)
4135672cf6dfSJoerg Roedel 		return ret;
4136672cf6dfSJoerg Roedel 
4137a8204479SLu Baolu 	return dmar_domain_attach_device(to_dmar_domain(domain), dev);
4138672cf6dfSJoerg Roedel }
4139672cf6dfSJoerg Roedel 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot,gfp_t gfp)4140672cf6dfSJoerg Roedel static int intel_iommu_map(struct iommu_domain *domain,
4141672cf6dfSJoerg Roedel 			   unsigned long iova, phys_addr_t hpa,
4142672cf6dfSJoerg Roedel 			   size_t size, int iommu_prot, gfp_t gfp)
4143672cf6dfSJoerg Roedel {
4144672cf6dfSJoerg Roedel 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4145672cf6dfSJoerg Roedel 	u64 max_addr;
4146672cf6dfSJoerg Roedel 	int prot = 0;
4147672cf6dfSJoerg Roedel 
4148672cf6dfSJoerg Roedel 	if (iommu_prot & IOMMU_READ)
4149672cf6dfSJoerg Roedel 		prot |= DMA_PTE_READ;
4150672cf6dfSJoerg Roedel 	if (iommu_prot & IOMMU_WRITE)
4151672cf6dfSJoerg Roedel 		prot |= DMA_PTE_WRITE;
4152fc0051cbSLu Baolu 	if (dmar_domain->set_pte_snp)
4153672cf6dfSJoerg Roedel 		prot |= DMA_PTE_SNP;
4154672cf6dfSJoerg Roedel 
4155672cf6dfSJoerg Roedel 	max_addr = iova + size;
4156672cf6dfSJoerg Roedel 	if (dmar_domain->max_addr < max_addr) {
4157672cf6dfSJoerg Roedel 		u64 end;
4158672cf6dfSJoerg Roedel 
4159672cf6dfSJoerg Roedel 		/* check if minimum agaw is sufficient for mapped address */
4160672cf6dfSJoerg Roedel 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4161672cf6dfSJoerg Roedel 		if (end < max_addr) {
4162672cf6dfSJoerg Roedel 			pr_err("%s: iommu width (%d) is not "
4163672cf6dfSJoerg Roedel 			       "sufficient for the mapped address (%llx)\n",
4164672cf6dfSJoerg Roedel 			       __func__, dmar_domain->gaw, max_addr);
4165672cf6dfSJoerg Roedel 			return -EFAULT;
4166672cf6dfSJoerg Roedel 		}
4167672cf6dfSJoerg Roedel 		dmar_domain->max_addr = max_addr;
4168672cf6dfSJoerg Roedel 	}
4169672cf6dfSJoerg Roedel 	/* Round up size to next multiple of PAGE_SIZE, if it and
4170672cf6dfSJoerg Roedel 	   the low bits of hpa would take us onto the next page */
4171672cf6dfSJoerg Roedel 	size = aligned_nrpages(hpa, size);
4172933fcd01SLu Baolu 	return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
41732d4d7676SJason Gunthorpe 				hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
4174672cf6dfSJoerg Roedel }
4175672cf6dfSJoerg Roedel 
intel_iommu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)41763f34f125SLu Baolu static int intel_iommu_map_pages(struct iommu_domain *domain,
41773f34f125SLu Baolu 				 unsigned long iova, phys_addr_t paddr,
41783f34f125SLu Baolu 				 size_t pgsize, size_t pgcount,
41793f34f125SLu Baolu 				 int prot, gfp_t gfp, size_t *mapped)
41803f34f125SLu Baolu {
41813f34f125SLu Baolu 	unsigned long pgshift = __ffs(pgsize);
41823f34f125SLu Baolu 	size_t size = pgcount << pgshift;
41833f34f125SLu Baolu 	int ret;
41843f34f125SLu Baolu 
41853f34f125SLu Baolu 	if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
41863f34f125SLu Baolu 		return -EINVAL;
41873f34f125SLu Baolu 
41883f34f125SLu Baolu 	if (!IS_ALIGNED(iova | paddr, pgsize))
41893f34f125SLu Baolu 		return -EINVAL;
41903f34f125SLu Baolu 
41913f34f125SLu Baolu 	ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
41923f34f125SLu Baolu 	if (!ret && mapped)
41933f34f125SLu Baolu 		*mapped = size;
41943f34f125SLu Baolu 
41953f34f125SLu Baolu 	return ret;
41963f34f125SLu Baolu }
41973f34f125SLu Baolu 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)4198672cf6dfSJoerg Roedel static size_t intel_iommu_unmap(struct iommu_domain *domain,
4199672cf6dfSJoerg Roedel 				unsigned long iova, size_t size,
4200672cf6dfSJoerg Roedel 				struct iommu_iotlb_gather *gather)
4201672cf6dfSJoerg Roedel {
4202672cf6dfSJoerg Roedel 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4203672cf6dfSJoerg Roedel 	unsigned long start_pfn, last_pfn;
42042a2b8eaaSTom Murphy 	int level = 0;
4205672cf6dfSJoerg Roedel 
4206672cf6dfSJoerg Roedel 	/* Cope with horrid API which requires us to unmap more than the
4207672cf6dfSJoerg Roedel 	   size argument if it happens to be a large-page mapping. */
4208cbf2f9e8STina Zhang 	if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
4209cbf2f9e8STina Zhang 				     &level, GFP_ATOMIC)))
4210cbf2f9e8STina Zhang 		return 0;
4211672cf6dfSJoerg Roedel 
4212672cf6dfSJoerg Roedel 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4213672cf6dfSJoerg Roedel 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4214672cf6dfSJoerg Roedel 
4215672cf6dfSJoerg Roedel 	start_pfn = iova >> VTD_PAGE_SHIFT;
4216672cf6dfSJoerg Roedel 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4217672cf6dfSJoerg Roedel 
421887f60cc6SMatthew Wilcox (Oracle) 	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
4219672cf6dfSJoerg Roedel 
4220672cf6dfSJoerg Roedel 	if (dmar_domain->max_addr == iova + size)
4221672cf6dfSJoerg Roedel 		dmar_domain->max_addr = iova;
4222672cf6dfSJoerg Roedel 
422316a75bbeSJacob Pan 	/*
422416a75bbeSJacob Pan 	 * We do not use page-selective IOTLB invalidation in flush queue,
422516a75bbeSJacob Pan 	 * so there is no need to track page and sync iotlb.
422616a75bbeSJacob Pan 	 */
422716a75bbeSJacob Pan 	if (!iommu_iotlb_gather_queued(gather))
42282a2b8eaaSTom Murphy 		iommu_iotlb_gather_add_page(domain, gather, iova, size);
42292a2b8eaaSTom Murphy 
4230672cf6dfSJoerg Roedel 	return size;
4231672cf6dfSJoerg Roedel }
4232672cf6dfSJoerg Roedel 
intel_iommu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)42333f34f125SLu Baolu static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
42343f34f125SLu Baolu 				      unsigned long iova,
42353f34f125SLu Baolu 				      size_t pgsize, size_t pgcount,
42363f34f125SLu Baolu 				      struct iommu_iotlb_gather *gather)
42373f34f125SLu Baolu {
42383f34f125SLu Baolu 	unsigned long pgshift = __ffs(pgsize);
42393f34f125SLu Baolu 	size_t size = pgcount << pgshift;
42403f34f125SLu Baolu 
42413f34f125SLu Baolu 	return intel_iommu_unmap(domain, iova, size, gather);
42423f34f125SLu Baolu }
42433f34f125SLu Baolu 
intel_iommu_tlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)42442a2b8eaaSTom Murphy static void intel_iommu_tlb_sync(struct iommu_domain *domain,
42452a2b8eaaSTom Murphy 				 struct iommu_iotlb_gather *gather)
42462a2b8eaaSTom Murphy {
42472a2b8eaaSTom Murphy 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
42482a2b8eaaSTom Murphy 	unsigned long iova_pfn = IOVA_PFN(gather->start);
42492a2b8eaaSTom Murphy 	size_t size = gather->end - gather->start;
4250ba949f4cSLu Baolu 	struct iommu_domain_info *info;
4251405a43ccSLu Baolu 	unsigned long start_pfn;
42522a2b8eaaSTom Murphy 	unsigned long nrpages;
4253ba949f4cSLu Baolu 	unsigned long i;
42542a2b8eaaSTom Murphy 
42552a2b8eaaSTom Murphy 	nrpages = aligned_nrpages(gather->start, size);
4256fb5f50a4SYanfei Xu 	start_pfn = mm_to_dma_pfn_start(iova_pfn);
42572a2b8eaaSTom Murphy 
4258ba949f4cSLu Baolu 	xa_for_each(&dmar_domain->iommu_array, i, info)
4259ba949f4cSLu Baolu 		iommu_flush_iotlb_psi(info->iommu, dmar_domain,
426087f60cc6SMatthew Wilcox (Oracle) 				      start_pfn, nrpages,
426187f60cc6SMatthew Wilcox (Oracle) 				      list_empty(&gather->freelist), 0);
42622a2b8eaaSTom Murphy 
426387f60cc6SMatthew Wilcox (Oracle) 	put_pages_list(&gather->freelist);
42642a2b8eaaSTom Murphy }
42652a2b8eaaSTom Murphy 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)4266672cf6dfSJoerg Roedel static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4267672cf6dfSJoerg Roedel 					    dma_addr_t iova)
4268672cf6dfSJoerg Roedel {
4269672cf6dfSJoerg Roedel 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4270672cf6dfSJoerg Roedel 	struct dma_pte *pte;
4271672cf6dfSJoerg Roedel 	int level = 0;
4272672cf6dfSJoerg Roedel 	u64 phys = 0;
4273672cf6dfSJoerg Roedel 
42742d4d7676SJason Gunthorpe 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
42752d4d7676SJason Gunthorpe 			     GFP_ATOMIC);
4276672cf6dfSJoerg Roedel 	if (pte && dma_pte_present(pte))
4277672cf6dfSJoerg Roedel 		phys = dma_pte_addr(pte) +
4278672cf6dfSJoerg Roedel 			(iova & (BIT_MASK(level_to_offset_bits(level) +
4279672cf6dfSJoerg Roedel 						VTD_PAGE_SHIFT) - 1));
4280672cf6dfSJoerg Roedel 
4281672cf6dfSJoerg Roedel 	return phys;
4282672cf6dfSJoerg Roedel }
4283672cf6dfSJoerg Roedel 
domain_support_force_snooping(struct dmar_domain * domain)4284fc0051cbSLu Baolu static bool domain_support_force_snooping(struct dmar_domain *domain)
4285fc0051cbSLu Baolu {
4286fc0051cbSLu Baolu 	struct device_domain_info *info;
4287fc0051cbSLu Baolu 	bool support = true;
4288fc0051cbSLu Baolu 
42895eaafdf0SLu Baolu 	assert_spin_locked(&domain->lock);
4290fc0051cbSLu Baolu 	list_for_each_entry(info, &domain->devices, link) {
4291fc0051cbSLu Baolu 		if (!ecap_sc_support(info->iommu->ecap)) {
4292fc0051cbSLu Baolu 			support = false;
4293fc0051cbSLu Baolu 			break;
4294fc0051cbSLu Baolu 		}
4295fc0051cbSLu Baolu 	}
4296fc0051cbSLu Baolu 
4297fc0051cbSLu Baolu 	return support;
4298fc0051cbSLu Baolu }
4299fc0051cbSLu Baolu 
domain_set_force_snooping(struct dmar_domain * domain)4300fc0051cbSLu Baolu static void domain_set_force_snooping(struct dmar_domain *domain)
4301fc0051cbSLu Baolu {
4302fc0051cbSLu Baolu 	struct device_domain_info *info;
4303fc0051cbSLu Baolu 
43045eaafdf0SLu Baolu 	assert_spin_locked(&domain->lock);
4305fc0051cbSLu Baolu 	/*
4306fc0051cbSLu Baolu 	 * Second level page table supports per-PTE snoop control. The
4307fc0051cbSLu Baolu 	 * iommu_map() interface will handle this by setting SNP bit.
4308fc0051cbSLu Baolu 	 */
4309e5b0feb4SLu Baolu 	if (!domain->use_first_level) {
4310fc0051cbSLu Baolu 		domain->set_pte_snp = true;
4311fc0051cbSLu Baolu 		return;
4312fc0051cbSLu Baolu 	}
4313fc0051cbSLu Baolu 
4314fc0051cbSLu Baolu 	list_for_each_entry(info, &domain->devices, link)
4315fc0051cbSLu Baolu 		intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
431642987801SJacob Pan 						     IOMMU_NO_PASID);
4317fc0051cbSLu Baolu }
4318fc0051cbSLu Baolu 
intel_iommu_enforce_cache_coherency(struct iommu_domain * domain)43196043257bSJason Gunthorpe static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
43206043257bSJason Gunthorpe {
43216043257bSJason Gunthorpe 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4322a349ffcbSLu Baolu 	unsigned long flags;
43236043257bSJason Gunthorpe 
4324fc0051cbSLu Baolu 	if (dmar_domain->force_snooping)
4325fc0051cbSLu Baolu 		return true;
4326fc0051cbSLu Baolu 
4327a349ffcbSLu Baolu 	spin_lock_irqsave(&dmar_domain->lock, flags);
43289cdfbfc6SLu Baolu 	if (!domain_support_force_snooping(dmar_domain) ||
43299cdfbfc6SLu Baolu 	    (!dmar_domain->use_first_level && dmar_domain->has_mappings)) {
4330a349ffcbSLu Baolu 		spin_unlock_irqrestore(&dmar_domain->lock, flags);
43316043257bSJason Gunthorpe 		return false;
4332fc0051cbSLu Baolu 	}
4333fc0051cbSLu Baolu 
4334fc0051cbSLu Baolu 	domain_set_force_snooping(dmar_domain);
43356043257bSJason Gunthorpe 	dmar_domain->force_snooping = true;
4336a349ffcbSLu Baolu 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
4337fc0051cbSLu Baolu 
43386043257bSJason Gunthorpe 	return true;
43396043257bSJason Gunthorpe }
43406043257bSJason Gunthorpe 
intel_iommu_capable(struct device * dev,enum iommu_cap cap)4341359ad157SRobin Murphy static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
4342672cf6dfSJoerg Roedel {
43434989764dSJason Gunthorpe 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4344672cf6dfSJoerg Roedel 
43454989764dSJason Gunthorpe 	switch (cap) {
43464989764dSJason Gunthorpe 	case IOMMU_CAP_CACHE_COHERENCY:
43474a20ce0fSRobin Murphy 	case IOMMU_CAP_DEFERRED_FLUSH:
43484989764dSJason Gunthorpe 		return true;
43494989764dSJason Gunthorpe 	case IOMMU_CAP_PRE_BOOT_PROTECTION:
43504989764dSJason Gunthorpe 		return dmar_platform_optin();
43514989764dSJason Gunthorpe 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
43524989764dSJason Gunthorpe 		return ecap_sc_support(info->iommu->ecap);
43534989764dSJason Gunthorpe 	default:
4354672cf6dfSJoerg Roedel 		return false;
4355672cf6dfSJoerg Roedel 	}
43564989764dSJason Gunthorpe }
4357672cf6dfSJoerg Roedel 
intel_iommu_probe_device(struct device * dev)4358672cf6dfSJoerg Roedel static struct iommu_device *intel_iommu_probe_device(struct device *dev)
4359672cf6dfSJoerg Roedel {
4360586081d3SLu Baolu 	struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
4361586081d3SLu Baolu 	struct device_domain_info *info;
4362672cf6dfSJoerg Roedel 	struct intel_iommu *iommu;
4363586081d3SLu Baolu 	u8 bus, devfn;
4364ec62b442SLu Baolu 	int ret;
4365672cf6dfSJoerg Roedel 
4366586081d3SLu Baolu 	iommu = device_to_iommu(dev, &bus, &devfn);
4367c919739cSRobin Murphy 	if (!iommu || !iommu->iommu.ops)
4368672cf6dfSJoerg Roedel 		return ERR_PTR(-ENODEV);
4369672cf6dfSJoerg Roedel 
4370586081d3SLu Baolu 	info = kzalloc(sizeof(*info), GFP_KERNEL);
4371586081d3SLu Baolu 	if (!info)
4372586081d3SLu Baolu 		return ERR_PTR(-ENOMEM);
4373586081d3SLu Baolu 
4374586081d3SLu Baolu 	if (dev_is_real_dma_subdevice(dev)) {
4375586081d3SLu Baolu 		info->bus = pdev->bus->number;
4376586081d3SLu Baolu 		info->devfn = pdev->devfn;
4377586081d3SLu Baolu 		info->segment = pci_domain_nr(pdev->bus);
4378586081d3SLu Baolu 	} else {
4379586081d3SLu Baolu 		info->bus = bus;
4380586081d3SLu Baolu 		info->devfn = devfn;
4381586081d3SLu Baolu 		info->segment = iommu->segment;
4382586081d3SLu Baolu 	}
4383586081d3SLu Baolu 
4384586081d3SLu Baolu 	info->dev = dev;
4385586081d3SLu Baolu 	info->iommu = iommu;
4386586081d3SLu Baolu 	if (dev_is_pci(dev)) {
4387586081d3SLu Baolu 		if (ecap_dev_iotlb_support(iommu->ecap) &&
4388586081d3SLu Baolu 		    pci_ats_supported(pdev) &&
4389e65a6897SJacob Pan 		    dmar_ats_supported(pdev, iommu)) {
4390586081d3SLu Baolu 			info->ats_supported = 1;
4391e65a6897SJacob Pan 			info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
43925ae40080SLu Baolu 
43935ae40080SLu Baolu 			/*
43945ae40080SLu Baolu 			 * For IOMMU that supports device IOTLB throttling
43955ae40080SLu Baolu 			 * (DIT), we assign PFSID to the invalidation desc
43965ae40080SLu Baolu 			 * of a VF such that IOMMU HW can gauge queue depth
43975ae40080SLu Baolu 			 * at PF level. If DIT is not set, PFSID will be
43985ae40080SLu Baolu 			 * treated as reserved, which should be set to 0.
43995ae40080SLu Baolu 			 */
44005ae40080SLu Baolu 			if (ecap_dit(iommu->ecap))
44015ae40080SLu Baolu 				info->pfsid = pci_dev_id(pci_physfn(pdev));
44025ae40080SLu Baolu 			info->ats_qdep = pci_ats_queue_depth(pdev);
4403e65a6897SJacob Pan 		}
4404586081d3SLu Baolu 		if (sm_supported(iommu)) {
4405586081d3SLu Baolu 			if (pasid_supported(iommu)) {
4406586081d3SLu Baolu 				int features = pci_pasid_features(pdev);
4407586081d3SLu Baolu 
4408586081d3SLu Baolu 				if (features >= 0)
4409586081d3SLu Baolu 					info->pasid_supported = features | 1;
4410586081d3SLu Baolu 			}
4411586081d3SLu Baolu 
4412586081d3SLu Baolu 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
4413586081d3SLu Baolu 			    pci_pri_supported(pdev))
4414586081d3SLu Baolu 				info->pri_supported = 1;
4415586081d3SLu Baolu 		}
4416586081d3SLu Baolu 	}
4417586081d3SLu Baolu 
4418586081d3SLu Baolu 	dev_iommu_priv_set(dev, info);
4419672cf6dfSJoerg Roedel 
4420ec62b442SLu Baolu 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
4421ec62b442SLu Baolu 		ret = intel_pasid_alloc_table(dev);
4422ec62b442SLu Baolu 		if (ret) {
4423ec62b442SLu Baolu 			dev_err(dev, "PASID table allocation failed\n");
4424ec62b442SLu Baolu 			dev_iommu_priv_set(dev, NULL);
4425ec62b442SLu Baolu 			kfree(info);
4426ec62b442SLu Baolu 			return ERR_PTR(ret);
4427ec62b442SLu Baolu 		}
4428ec62b442SLu Baolu 	}
4429ec62b442SLu Baolu 
4430672cf6dfSJoerg Roedel 	return &iommu->iommu;
4431672cf6dfSJoerg Roedel }
4432672cf6dfSJoerg Roedel 
intel_iommu_release_device(struct device * dev)4433672cf6dfSJoerg Roedel static void intel_iommu_release_device(struct device *dev)
4434672cf6dfSJoerg Roedel {
4435586081d3SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4436672cf6dfSJoerg Roedel 
4437672cf6dfSJoerg Roedel 	dmar_remove_one_dev_info(dev);
4438ec62b442SLu Baolu 	intel_pasid_free_table(dev);
4439586081d3SLu Baolu 	dev_iommu_priv_set(dev, NULL);
4440586081d3SLu Baolu 	kfree(info);
4441672cf6dfSJoerg Roedel 	set_dma_ops(dev, NULL);
4442672cf6dfSJoerg Roedel }
4443672cf6dfSJoerg Roedel 
intel_iommu_probe_finalize(struct device * dev)4444672cf6dfSJoerg Roedel static void intel_iommu_probe_finalize(struct device *dev)
4445672cf6dfSJoerg Roedel {
4446672cf6dfSJoerg Roedel 	set_dma_ops(dev, NULL);
444778ca0784SRobin Murphy 	iommu_setup_dma_ops(dev, 0, U64_MAX);
4448672cf6dfSJoerg Roedel }
4449672cf6dfSJoerg Roedel 
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)4450672cf6dfSJoerg Roedel static void intel_iommu_get_resv_regions(struct device *device,
4451672cf6dfSJoerg Roedel 					 struct list_head *head)
4452672cf6dfSJoerg Roedel {
4453672cf6dfSJoerg Roedel 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
4454672cf6dfSJoerg Roedel 	struct iommu_resv_region *reg;
4455672cf6dfSJoerg Roedel 	struct dmar_rmrr_unit *rmrr;
4456672cf6dfSJoerg Roedel 	struct device *i_dev;
4457672cf6dfSJoerg Roedel 	int i;
4458672cf6dfSJoerg Roedel 
4459bf638a65SLu Baolu 	rcu_read_lock();
4460672cf6dfSJoerg Roedel 	for_each_rmrr_units(rmrr) {
4461672cf6dfSJoerg Roedel 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
4462672cf6dfSJoerg Roedel 					  i, i_dev) {
4463672cf6dfSJoerg Roedel 			struct iommu_resv_region *resv;
4464672cf6dfSJoerg Roedel 			enum iommu_resv_type type;
4465672cf6dfSJoerg Roedel 			size_t length;
4466672cf6dfSJoerg Roedel 
4467672cf6dfSJoerg Roedel 			if (i_dev != device &&
4468672cf6dfSJoerg Roedel 			    !is_downstream_to_pci_bridge(device, i_dev))
4469672cf6dfSJoerg Roedel 				continue;
4470672cf6dfSJoerg Roedel 
4471672cf6dfSJoerg Roedel 			length = rmrr->end_address - rmrr->base_address + 1;
4472672cf6dfSJoerg Roedel 
4473672cf6dfSJoerg Roedel 			type = device_rmrr_is_relaxable(device) ?
4474672cf6dfSJoerg Roedel 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
4475672cf6dfSJoerg Roedel 
4476672cf6dfSJoerg Roedel 			resv = iommu_alloc_resv_region(rmrr->base_address,
44770251d010SLu Baolu 						       length, prot, type,
4478bf638a65SLu Baolu 						       GFP_ATOMIC);
4479672cf6dfSJoerg Roedel 			if (!resv)
4480672cf6dfSJoerg Roedel 				break;
4481672cf6dfSJoerg Roedel 
4482672cf6dfSJoerg Roedel 			list_add_tail(&resv->list, head);
4483672cf6dfSJoerg Roedel 		}
4484672cf6dfSJoerg Roedel 	}
4485bf638a65SLu Baolu 	rcu_read_unlock();
4486672cf6dfSJoerg Roedel 
4487672cf6dfSJoerg Roedel #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
4488672cf6dfSJoerg Roedel 	if (dev_is_pci(device)) {
4489672cf6dfSJoerg Roedel 		struct pci_dev *pdev = to_pci_dev(device);
4490672cf6dfSJoerg Roedel 
4491672cf6dfSJoerg Roedel 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
4492672cf6dfSJoerg Roedel 			reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
44930251d010SLu Baolu 					IOMMU_RESV_DIRECT_RELAXABLE,
44940251d010SLu Baolu 					GFP_KERNEL);
4495672cf6dfSJoerg Roedel 			if (reg)
4496672cf6dfSJoerg Roedel 				list_add_tail(&reg->list, head);
4497672cf6dfSJoerg Roedel 		}
4498672cf6dfSJoerg Roedel 	}
4499672cf6dfSJoerg Roedel #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
4500672cf6dfSJoerg Roedel 
4501672cf6dfSJoerg Roedel 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
4502672cf6dfSJoerg Roedel 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
45030251d010SLu Baolu 				      0, IOMMU_RESV_MSI, GFP_KERNEL);
4504672cf6dfSJoerg Roedel 	if (!reg)
4505672cf6dfSJoerg Roedel 		return;
4506672cf6dfSJoerg Roedel 	list_add_tail(&reg->list, head);
4507672cf6dfSJoerg Roedel }
4508672cf6dfSJoerg Roedel 
intel_iommu_device_group(struct device * dev)4509672cf6dfSJoerg Roedel static struct iommu_group *intel_iommu_device_group(struct device *dev)
4510672cf6dfSJoerg Roedel {
4511672cf6dfSJoerg Roedel 	if (dev_is_pci(dev))
4512672cf6dfSJoerg Roedel 		return pci_device_group(dev);
4513672cf6dfSJoerg Roedel 	return generic_device_group(dev);
4514672cf6dfSJoerg Roedel }
4515672cf6dfSJoerg Roedel 
intel_iommu_enable_sva(struct device * dev)45164c82b886SLu Baolu static int intel_iommu_enable_sva(struct device *dev)
45174c82b886SLu Baolu {
4518586081d3SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4519934ed458SColin Ian King 	struct intel_iommu *iommu;
45204c82b886SLu Baolu 
4521934ed458SColin Ian King 	if (!info || dmar_disabled)
4522934ed458SColin Ian King 		return -EINVAL;
4523934ed458SColin Ian King 
4524934ed458SColin Ian King 	iommu = info->iommu;
4525934ed458SColin Ian King 	if (!iommu)
45264c82b886SLu Baolu 		return -EINVAL;
45274c82b886SLu Baolu 
45284c82b886SLu Baolu 	if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
45294c82b886SLu Baolu 		return -ENODEV;
45304c82b886SLu Baolu 
4531a86fb771SLu Baolu 	if (!info->pasid_enabled || !info->ats_enabled)
45324c82b886SLu Baolu 		return -EINVAL;
45334c82b886SLu Baolu 
4534a86fb771SLu Baolu 	/*
4535a86fb771SLu Baolu 	 * Devices having device-specific I/O fault handling should not
4536a86fb771SLu Baolu 	 * support PCI/PRI. The IOMMU side has no means to check the
4537a86fb771SLu Baolu 	 * capability of device-specific IOPF.  Therefore, IOMMU can only
4538a86fb771SLu Baolu 	 * default that if the device driver enables SVA on a non-PRI
4539a86fb771SLu Baolu 	 * device, it will handle IOPF in its own way.
4540a86fb771SLu Baolu 	 */
4541a86fb771SLu Baolu 	if (!info->pri_supported)
4542a86fb771SLu Baolu 		return 0;
4543a86fb771SLu Baolu 
4544a86fb771SLu Baolu 	/* Devices supporting PRI should have it enabled. */
4545a86fb771SLu Baolu 	if (!info->pri_enabled)
45464c82b886SLu Baolu 		return -EINVAL;
45474c82b886SLu Baolu 
45483d4c7cc3SLu Baolu 	return 0;
45493d4c7cc3SLu Baolu }
45503d4c7cc3SLu Baolu 
intel_iommu_enable_iopf(struct device * dev)45513d4c7cc3SLu Baolu static int intel_iommu_enable_iopf(struct device *dev)
45523d4c7cc3SLu Baolu {
4553fbcde5bbSLu Baolu 	struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
45543d4c7cc3SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
45553d4c7cc3SLu Baolu 	struct intel_iommu *iommu;
45563d4c7cc3SLu Baolu 	int ret;
45573d4c7cc3SLu Baolu 
4558fbcde5bbSLu Baolu 	if (!pdev || !info || !info->ats_enabled || !info->pri_supported)
45593d4c7cc3SLu Baolu 		return -ENODEV;
4560fbcde5bbSLu Baolu 
4561fbcde5bbSLu Baolu 	if (info->pri_enabled)
4562fbcde5bbSLu Baolu 		return -EBUSY;
4563fbcde5bbSLu Baolu 
45643d4c7cc3SLu Baolu 	iommu = info->iommu;
45653d4c7cc3SLu Baolu 	if (!iommu)
45663d4c7cc3SLu Baolu 		return -EINVAL;
45673d4c7cc3SLu Baolu 
4568fbcde5bbSLu Baolu 	/* PASID is required in PRG Response Message. */
4569fbcde5bbSLu Baolu 	if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
4570fbcde5bbSLu Baolu 		return -EINVAL;
4571fbcde5bbSLu Baolu 
4572fbcde5bbSLu Baolu 	ret = pci_reset_pri(pdev);
4573fbcde5bbSLu Baolu 	if (ret)
4574fbcde5bbSLu Baolu 		return ret;
4575fbcde5bbSLu Baolu 
4576d5b9e4bfSLu Baolu 	ret = iopf_queue_add_device(iommu->iopf_queue, dev);
457760b1daa3SLu Baolu 	if (ret)
457860b1daa3SLu Baolu 		return ret;
457960b1daa3SLu Baolu 
4580d5b9e4bfSLu Baolu 	ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
458160b1daa3SLu Baolu 	if (ret)
4582fbcde5bbSLu Baolu 		goto iopf_remove_device;
4583fbcde5bbSLu Baolu 
4584fbcde5bbSLu Baolu 	ret = pci_enable_pri(pdev, PRQ_DEPTH);
4585fbcde5bbSLu Baolu 	if (ret)
4586fbcde5bbSLu Baolu 		goto iopf_unregister_handler;
4587fbcde5bbSLu Baolu 	info->pri_enabled = 1;
4588fbcde5bbSLu Baolu 
4589fbcde5bbSLu Baolu 	return 0;
4590fbcde5bbSLu Baolu 
4591fbcde5bbSLu Baolu iopf_unregister_handler:
4592fbcde5bbSLu Baolu 	iommu_unregister_device_fault_handler(dev);
4593fbcde5bbSLu Baolu iopf_remove_device:
459460b1daa3SLu Baolu 	iopf_queue_remove_device(iommu->iopf_queue, dev);
4595d5b9e4bfSLu Baolu 
4596d5b9e4bfSLu Baolu 	return ret;
45974c82b886SLu Baolu }
45984c82b886SLu Baolu 
intel_iommu_disable_iopf(struct device * dev)45993d4c7cc3SLu Baolu static int intel_iommu_disable_iopf(struct device *dev)
46004c82b886SLu Baolu {
4601586081d3SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
46024c82b886SLu Baolu 	struct intel_iommu *iommu = info->iommu;
46034c82b886SLu Baolu 
4604fbcde5bbSLu Baolu 	if (!info->pri_enabled)
4605fbcde5bbSLu Baolu 		return -EINVAL;
460660b1daa3SLu Baolu 
4607fbcde5bbSLu Baolu 	/*
4608fbcde5bbSLu Baolu 	 * PCIe spec states that by clearing PRI enable bit, the Page
4609fbcde5bbSLu Baolu 	 * Request Interface will not issue new page requests, but has
4610fbcde5bbSLu Baolu 	 * outstanding page requests that have been transmitted or are
4611fbcde5bbSLu Baolu 	 * queued for transmission. This is supposed to be called after
4612fbcde5bbSLu Baolu 	 * the device driver has stopped DMA, all PASIDs have been
4613fbcde5bbSLu Baolu 	 * unbound and the outstanding PRQs have been drained.
4614fbcde5bbSLu Baolu 	 */
4615fbcde5bbSLu Baolu 	pci_disable_pri(to_pci_dev(dev));
4616fbcde5bbSLu Baolu 	info->pri_enabled = 0;
4617d5b9e4bfSLu Baolu 
46187b8aa998SLu Baolu 	/*
46197b8aa998SLu Baolu 	 * With PRI disabled and outstanding PRQs drained, unregistering
46207b8aa998SLu Baolu 	 * fault handler and removing device from iopf queue should never
46217b8aa998SLu Baolu 	 * fail.
46227b8aa998SLu Baolu 	 */
46237b8aa998SLu Baolu 	WARN_ON(iommu_unregister_device_fault_handler(dev));
46247b8aa998SLu Baolu 	WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev));
46254c82b886SLu Baolu 
462694f797adSLu Baolu 	return 0;
4627672cf6dfSJoerg Roedel }
4628672cf6dfSJoerg Roedel 
4629672cf6dfSJoerg Roedel static int
intel_iommu_dev_enable_feat(struct device * dev,enum iommu_dev_features feat)4630672cf6dfSJoerg Roedel intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
4631672cf6dfSJoerg Roedel {
46324c82b886SLu Baolu 	switch (feat) {
46334c82b886SLu Baolu 	case IOMMU_DEV_FEAT_IOPF:
463494f797adSLu Baolu 		return intel_iommu_enable_iopf(dev);
46359003351cSJean-Philippe Brucker 
46364c82b886SLu Baolu 	case IOMMU_DEV_FEAT_SVA:
46374c82b886SLu Baolu 		return intel_iommu_enable_sva(dev);
4638672cf6dfSJoerg Roedel 
46394c82b886SLu Baolu 	default:
4640672cf6dfSJoerg Roedel 		return -ENODEV;
4641672cf6dfSJoerg Roedel 	}
4642672cf6dfSJoerg Roedel }
4643672cf6dfSJoerg Roedel 
4644672cf6dfSJoerg Roedel static int
intel_iommu_dev_disable_feat(struct device * dev,enum iommu_dev_features feat)4645672cf6dfSJoerg Roedel intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
4646672cf6dfSJoerg Roedel {
46474c82b886SLu Baolu 	switch (feat) {
46484c82b886SLu Baolu 	case IOMMU_DEV_FEAT_IOPF:
46493d4c7cc3SLu Baolu 		return intel_iommu_disable_iopf(dev);
46504c82b886SLu Baolu 
46514c82b886SLu Baolu 	case IOMMU_DEV_FEAT_SVA:
46523d4c7cc3SLu Baolu 		return 0;
46534c82b886SLu Baolu 
46544c82b886SLu Baolu 	default:
4655672cf6dfSJoerg Roedel 		return -ENODEV;
4656672cf6dfSJoerg Roedel 	}
46574c82b886SLu Baolu }
4658672cf6dfSJoerg Roedel 
intel_iommu_is_attach_deferred(struct device * dev)465941bb23e7SLu Baolu static bool intel_iommu_is_attach_deferred(struct device *dev)
4660672cf6dfSJoerg Roedel {
4661586081d3SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4662672cf6dfSJoerg Roedel 
4663586081d3SLu Baolu 	return translation_pre_enabled(info->iommu) && !info->domain;
4664672cf6dfSJoerg Roedel }
4665672cf6dfSJoerg Roedel 
466667e8a5b1SRajat Jain /*
466767e8a5b1SRajat Jain  * Check that the device does not live on an external facing PCI port that is
466867e8a5b1SRajat Jain  * marked as untrusted. Such devices should not be able to apply quirks and
466967e8a5b1SRajat Jain  * thus not be able to bypass the IOMMU restrictions.
467067e8a5b1SRajat Jain  */
risky_device(struct pci_dev * pdev)467167e8a5b1SRajat Jain static bool risky_device(struct pci_dev *pdev)
467267e8a5b1SRajat Jain {
467367e8a5b1SRajat Jain 	if (pdev->untrusted) {
467467e8a5b1SRajat Jain 		pci_info(pdev,
467567e8a5b1SRajat Jain 			 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
467667e8a5b1SRajat Jain 			 pdev->vendor, pdev->device);
467767e8a5b1SRajat Jain 		pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
467867e8a5b1SRajat Jain 		return true;
467967e8a5b1SRajat Jain 	}
468067e8a5b1SRajat Jain 	return false;
468167e8a5b1SRajat Jain }
468267e8a5b1SRajat Jain 
intel_iommu_iotlb_sync_map(struct iommu_domain * domain,unsigned long iova,size_t size)4683933fcd01SLu Baolu static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
4684933fcd01SLu Baolu 				       unsigned long iova, size_t size)
4685933fcd01SLu Baolu {
4686933fcd01SLu Baolu 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4687933fcd01SLu Baolu 	unsigned long pages = aligned_nrpages(iova, size);
4688933fcd01SLu Baolu 	unsigned long pfn = iova >> VTD_PAGE_SHIFT;
4689ba949f4cSLu Baolu 	struct iommu_domain_info *info;
4690ba949f4cSLu Baolu 	unsigned long i;
4691933fcd01SLu Baolu 
4692ba949f4cSLu Baolu 	xa_for_each(&dmar_domain->iommu_array, i, info)
4693ba949f4cSLu Baolu 		__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
4694933fcd01SLu Baolu }
4695933fcd01SLu Baolu 
intel_iommu_remove_dev_pasid(struct device * dev,ioasid_t pasid)4696eaca8889SLu Baolu static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
4697eaca8889SLu Baolu {
4698eaca8889SLu Baolu 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
46997d0c9da6SLu Baolu 	struct dev_pasid_info *curr, *dev_pasid = NULL;
47007d0c9da6SLu Baolu 	struct dmar_domain *dmar_domain;
4701eaca8889SLu Baolu 	struct iommu_domain *domain;
47027d0c9da6SLu Baolu 	unsigned long flags;
4703eaca8889SLu Baolu 
4704eaca8889SLu Baolu 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
470515478623SLu Baolu 	if (WARN_ON_ONCE(!domain))
470615478623SLu Baolu 		goto out_tear_down;
470715478623SLu Baolu 
470815478623SLu Baolu 	/*
470915478623SLu Baolu 	 * The SVA implementation needs to handle its own stuffs like the mm
471015478623SLu Baolu 	 * notification. Before consolidating that code into iommu core, let
471115478623SLu Baolu 	 * the intel sva code handle it.
471215478623SLu Baolu 	 */
471315478623SLu Baolu 	if (domain->type == IOMMU_DOMAIN_SVA) {
4714eaca8889SLu Baolu 		intel_svm_remove_dev_pasid(dev, pasid);
471515478623SLu Baolu 		goto out_tear_down;
4716eaca8889SLu Baolu 	}
4717eaca8889SLu Baolu 
47187d0c9da6SLu Baolu 	dmar_domain = to_dmar_domain(domain);
47197d0c9da6SLu Baolu 	spin_lock_irqsave(&dmar_domain->lock, flags);
47207d0c9da6SLu Baolu 	list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
47217d0c9da6SLu Baolu 		if (curr->dev == dev && curr->pasid == pasid) {
47227d0c9da6SLu Baolu 			list_del(&curr->link_domain);
47237d0c9da6SLu Baolu 			dev_pasid = curr;
47247d0c9da6SLu Baolu 			break;
47257d0c9da6SLu Baolu 		}
47267d0c9da6SLu Baolu 	}
47277d0c9da6SLu Baolu 	WARN_ON_ONCE(!dev_pasid);
47287d0c9da6SLu Baolu 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
472915478623SLu Baolu 
47307d0c9da6SLu Baolu 	domain_detach_iommu(dmar_domain, iommu);
47317d0c9da6SLu Baolu 	kfree(dev_pasid);
473215478623SLu Baolu out_tear_down:
4733eaca8889SLu Baolu 	intel_pasid_tear_down_entry(iommu, dev, pasid, false);
473415478623SLu Baolu 	intel_drain_pasid_prq(dev, pasid);
4735eaca8889SLu Baolu }
4736eaca8889SLu Baolu 
intel_iommu_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)47377d0c9da6SLu Baolu static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
47387d0c9da6SLu Baolu 				     struct device *dev, ioasid_t pasid)
47397d0c9da6SLu Baolu {
47407d0c9da6SLu Baolu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
47417d0c9da6SLu Baolu 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
47427d0c9da6SLu Baolu 	struct intel_iommu *iommu = info->iommu;
47437d0c9da6SLu Baolu 	struct dev_pasid_info *dev_pasid;
47447d0c9da6SLu Baolu 	unsigned long flags;
47457d0c9da6SLu Baolu 	int ret;
47467d0c9da6SLu Baolu 
47477d0c9da6SLu Baolu 	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
47487d0c9da6SLu Baolu 		return -EOPNOTSUPP;
47497d0c9da6SLu Baolu 
47507d0c9da6SLu Baolu 	if (context_copied(iommu, info->bus, info->devfn))
47517d0c9da6SLu Baolu 		return -EBUSY;
47527d0c9da6SLu Baolu 
47537d0c9da6SLu Baolu 	ret = prepare_domain_attach_device(domain, dev);
47547d0c9da6SLu Baolu 	if (ret)
47557d0c9da6SLu Baolu 		return ret;
47567d0c9da6SLu Baolu 
47577d0c9da6SLu Baolu 	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
47587d0c9da6SLu Baolu 	if (!dev_pasid)
47597d0c9da6SLu Baolu 		return -ENOMEM;
47607d0c9da6SLu Baolu 
47617d0c9da6SLu Baolu 	ret = domain_attach_iommu(dmar_domain, iommu);
47627d0c9da6SLu Baolu 	if (ret)
47637d0c9da6SLu Baolu 		goto out_free;
47647d0c9da6SLu Baolu 
47657d0c9da6SLu Baolu 	if (domain_type_is_si(dmar_domain))
47667d0c9da6SLu Baolu 		ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
47677d0c9da6SLu Baolu 						     dev, pasid);
47687d0c9da6SLu Baolu 	else if (dmar_domain->use_first_level)
47697d0c9da6SLu Baolu 		ret = domain_setup_first_level(iommu, dmar_domain,
47707d0c9da6SLu Baolu 					       dev, pasid);
47717d0c9da6SLu Baolu 	else
47727d0c9da6SLu Baolu 		ret = intel_pasid_setup_second_level(iommu, dmar_domain,
47737d0c9da6SLu Baolu 						     dev, pasid);
47747d0c9da6SLu Baolu 	if (ret)
47757d0c9da6SLu Baolu 		goto out_detach_iommu;
47767d0c9da6SLu Baolu 
47777d0c9da6SLu Baolu 	dev_pasid->dev = dev;
47787d0c9da6SLu Baolu 	dev_pasid->pasid = pasid;
47797d0c9da6SLu Baolu 	spin_lock_irqsave(&dmar_domain->lock, flags);
47807d0c9da6SLu Baolu 	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
47817d0c9da6SLu Baolu 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
47827d0c9da6SLu Baolu 
47837d0c9da6SLu Baolu 	return 0;
47847d0c9da6SLu Baolu out_detach_iommu:
47857d0c9da6SLu Baolu 	domain_detach_iommu(dmar_domain, iommu);
47867d0c9da6SLu Baolu out_free:
47877d0c9da6SLu Baolu 	kfree(dev_pasid);
47887d0c9da6SLu Baolu 	return ret;
4789672cf6dfSJoerg Roedel }
4790672cf6dfSJoerg Roedel 
intel_iommu_hw_info(struct device * dev,u32 * length,u32 * type)479155243393SYi Liu static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
479255243393SYi Liu {
479355243393SYi Liu 	struct device_domain_info *info = dev_iommu_priv_get(dev);
479455243393SYi Liu 	struct intel_iommu *iommu = info->iommu;
479555243393SYi Liu 	struct iommu_hw_info_vtd *vtd;
479655243393SYi Liu 
479755243393SYi Liu 	vtd = kzalloc(sizeof(*vtd), GFP_KERNEL);
479855243393SYi Liu 	if (!vtd)
479955243393SYi Liu 		return ERR_PTR(-ENOMEM);
480055243393SYi Liu 
480155243393SYi Liu 	vtd->cap_reg = iommu->cap;
480255243393SYi Liu 	vtd->ecap_reg = iommu->ecap;
480355243393SYi Liu 	*length = sizeof(*vtd);
480455243393SYi Liu 	*type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
480555243393SYi Liu 	return vtd;
480655243393SYi Liu }
480755243393SYi Liu 
4808672cf6dfSJoerg Roedel const struct iommu_ops intel_iommu_ops = {
4809672cf6dfSJoerg Roedel 	.capable		= intel_iommu_capable,
481055243393SYi Liu 	.hw_info		= intel_iommu_hw_info,
4811672cf6dfSJoerg Roedel 	.domain_alloc		= intel_iommu_domain_alloc,
4812672cf6dfSJoerg Roedel 	.probe_device		= intel_iommu_probe_device,
4813672cf6dfSJoerg Roedel 	.probe_finalize		= intel_iommu_probe_finalize,
4814672cf6dfSJoerg Roedel 	.release_device		= intel_iommu_release_device,
4815672cf6dfSJoerg Roedel 	.get_resv_regions	= intel_iommu_get_resv_regions,
4816672cf6dfSJoerg Roedel 	.device_group		= intel_iommu_device_group,
4817672cf6dfSJoerg Roedel 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
4818672cf6dfSJoerg Roedel 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
4819672cf6dfSJoerg Roedel 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
4820672cf6dfSJoerg Roedel 	.def_domain_type	= device_def_domain_type,
4821eaca8889SLu Baolu 	.remove_dev_pasid	= intel_iommu_remove_dev_pasid,
4822a886d5a7SLu Baolu 	.pgsize_bitmap		= SZ_4K,
4823672cf6dfSJoerg Roedel #ifdef CONFIG_INTEL_IOMMU_SVM
48248b737121SLu Baolu 	.page_response		= intel_svm_page_response,
4825672cf6dfSJoerg Roedel #endif
48269a630a4bSLu Baolu 	.default_domain_ops = &(const struct iommu_domain_ops) {
48279a630a4bSLu Baolu 		.attach_dev		= intel_iommu_attach_device,
48287d0c9da6SLu Baolu 		.set_dev_pasid		= intel_iommu_set_dev_pasid,
48299a630a4bSLu Baolu 		.map_pages		= intel_iommu_map_pages,
48309a630a4bSLu Baolu 		.unmap_pages		= intel_iommu_unmap_pages,
48319a630a4bSLu Baolu 		.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
48329a630a4bSLu Baolu 		.flush_iotlb_all        = intel_flush_iotlb_all,
48339a630a4bSLu Baolu 		.iotlb_sync		= intel_iommu_tlb_sync,
48349a630a4bSLu Baolu 		.iova_to_phys		= intel_iommu_iova_to_phys,
48359a630a4bSLu Baolu 		.free			= intel_iommu_domain_free,
48366043257bSJason Gunthorpe 		.enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
48379a630a4bSLu Baolu 	}
4838672cf6dfSJoerg Roedel };
4839672cf6dfSJoerg Roedel 
quirk_iommu_igfx(struct pci_dev * dev)4840672cf6dfSJoerg Roedel static void quirk_iommu_igfx(struct pci_dev *dev)
4841672cf6dfSJoerg Roedel {
484267e8a5b1SRajat Jain 	if (risky_device(dev))
484367e8a5b1SRajat Jain 		return;
484467e8a5b1SRajat Jain 
4845672cf6dfSJoerg Roedel 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
4846672cf6dfSJoerg Roedel 	dmar_map_gfx = 0;
4847672cf6dfSJoerg Roedel }
4848672cf6dfSJoerg Roedel 
4849672cf6dfSJoerg Roedel /* G4x/GM45 integrated gfx dmar support is totally busted. */
4850672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
4851672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
4852672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
4853672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
4854672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
4855672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
4856672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
4857672cf6dfSJoerg Roedel 
4858672cf6dfSJoerg Roedel /* Broadwell igfx malfunctions with dmar */
4859672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
4860672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
4861672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
4862672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
4863672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
4864672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
4865672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
4866672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
4867672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
4868672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
4869672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
4870672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
4871672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
4872672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
4873672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
4874672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
4875672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
4876672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
4877672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
4878672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
4879672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
4880672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
4881672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
4882672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
4883672cf6dfSJoerg Roedel 
quirk_iommu_rwbf(struct pci_dev * dev)4884672cf6dfSJoerg Roedel static void quirk_iommu_rwbf(struct pci_dev *dev)
4885672cf6dfSJoerg Roedel {
488667e8a5b1SRajat Jain 	if (risky_device(dev))
488767e8a5b1SRajat Jain 		return;
488867e8a5b1SRajat Jain 
4889672cf6dfSJoerg Roedel 	/*
4890672cf6dfSJoerg Roedel 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
4891672cf6dfSJoerg Roedel 	 * but needs it. Same seems to hold for the desktop versions.
4892672cf6dfSJoerg Roedel 	 */
4893672cf6dfSJoerg Roedel 	pci_info(dev, "Forcing write-buffer flush capability\n");
4894672cf6dfSJoerg Roedel 	rwbf_quirk = 1;
4895672cf6dfSJoerg Roedel }
4896672cf6dfSJoerg Roedel 
4897672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4898672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4899672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4900672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4901672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4902672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4903672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4904672cf6dfSJoerg Roedel 
4905672cf6dfSJoerg Roedel #define GGC 0x52
4906672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4907672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4908672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4909672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4910672cf6dfSJoerg Roedel #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4911672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4912672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4913672cf6dfSJoerg Roedel #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4914672cf6dfSJoerg Roedel 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4915672cf6dfSJoerg Roedel static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4916672cf6dfSJoerg Roedel {
4917672cf6dfSJoerg Roedel 	unsigned short ggc;
4918672cf6dfSJoerg Roedel 
491967e8a5b1SRajat Jain 	if (risky_device(dev))
492067e8a5b1SRajat Jain 		return;
492167e8a5b1SRajat Jain 
4922672cf6dfSJoerg Roedel 	if (pci_read_config_word(dev, GGC, &ggc))
4923672cf6dfSJoerg Roedel 		return;
4924672cf6dfSJoerg Roedel 
4925672cf6dfSJoerg Roedel 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4926672cf6dfSJoerg Roedel 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4927672cf6dfSJoerg Roedel 		dmar_map_gfx = 0;
4928672cf6dfSJoerg Roedel 	} else if (dmar_map_gfx) {
4929672cf6dfSJoerg Roedel 		/* we have to ensure the gfx device is idle before we flush */
4930672cf6dfSJoerg Roedel 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4931308723e3SJohn Garry 		iommu_set_dma_strict();
4932672cf6dfSJoerg Roedel 	}
4933672cf6dfSJoerg Roedel }
4934672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4935672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4936672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4937672cf6dfSJoerg Roedel DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4938672cf6dfSJoerg Roedel 
quirk_igfx_skip_te_disable(struct pci_dev * dev)4939b1012ca8SLu Baolu static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4940b1012ca8SLu Baolu {
4941b1012ca8SLu Baolu 	unsigned short ver;
4942b1012ca8SLu Baolu 
4943b1012ca8SLu Baolu 	if (!IS_GFX_DEVICE(dev))
4944b1012ca8SLu Baolu 		return;
4945b1012ca8SLu Baolu 
4946b1012ca8SLu Baolu 	ver = (dev->device >> 8) & 0xff;
4947b1012ca8SLu Baolu 	if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4948b1012ca8SLu Baolu 	    ver != 0x4e && ver != 0x8a && ver != 0x98 &&
494938b35423SAbdul Halim, Mohd Syazwan 	    ver != 0x9a && ver != 0xa7 && ver != 0x7d)
4950b1012ca8SLu Baolu 		return;
4951b1012ca8SLu Baolu 
4952b1012ca8SLu Baolu 	if (risky_device(dev))
4953b1012ca8SLu Baolu 		return;
4954b1012ca8SLu Baolu 
4955b1012ca8SLu Baolu 	pci_info(dev, "Skip IOMMU disabling for graphics\n");
4956b1012ca8SLu Baolu 	iommu_skip_te_disable = 1;
4957b1012ca8SLu Baolu }
4958b1012ca8SLu Baolu DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4959b1012ca8SLu Baolu 
4960672cf6dfSJoerg Roedel /* On Tylersburg chipsets, some BIOSes have been known to enable the
4961672cf6dfSJoerg Roedel    ISOCH DMAR unit for the Azalia sound device, but not give it any
4962672cf6dfSJoerg Roedel    TLB entries, which causes it to deadlock. Check for that.  We do
4963672cf6dfSJoerg Roedel    this in a function called from init_dmars(), instead of in a PCI
4964672cf6dfSJoerg Roedel    quirk, because we don't want to print the obnoxious "BIOS broken"
4965672cf6dfSJoerg Roedel    message if VT-d is actually disabled.
4966672cf6dfSJoerg Roedel */
check_tylersburg_isoch(void)4967672cf6dfSJoerg Roedel static void __init check_tylersburg_isoch(void)
4968672cf6dfSJoerg Roedel {
4969672cf6dfSJoerg Roedel 	struct pci_dev *pdev;
4970672cf6dfSJoerg Roedel 	uint32_t vtisochctrl;
4971672cf6dfSJoerg Roedel 
4972672cf6dfSJoerg Roedel 	/* If there's no Azalia in the system anyway, forget it. */
4973672cf6dfSJoerg Roedel 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4974672cf6dfSJoerg Roedel 	if (!pdev)
4975672cf6dfSJoerg Roedel 		return;
497667e8a5b1SRajat Jain 
497767e8a5b1SRajat Jain 	if (risky_device(pdev)) {
497867e8a5b1SRajat Jain 		pci_dev_put(pdev);
497967e8a5b1SRajat Jain 		return;
498067e8a5b1SRajat Jain 	}
498167e8a5b1SRajat Jain 
4982672cf6dfSJoerg Roedel 	pci_dev_put(pdev);
4983672cf6dfSJoerg Roedel 
4984672cf6dfSJoerg Roedel 	/* System Management Registers. Might be hidden, in which case
4985672cf6dfSJoerg Roedel 	   we can't do the sanity check. But that's OK, because the
4986672cf6dfSJoerg Roedel 	   known-broken BIOSes _don't_ actually hide it, so far. */
4987672cf6dfSJoerg Roedel 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4988672cf6dfSJoerg Roedel 	if (!pdev)
4989672cf6dfSJoerg Roedel 		return;
4990672cf6dfSJoerg Roedel 
499167e8a5b1SRajat Jain 	if (risky_device(pdev)) {
499267e8a5b1SRajat Jain 		pci_dev_put(pdev);
499367e8a5b1SRajat Jain 		return;
499467e8a5b1SRajat Jain 	}
499567e8a5b1SRajat Jain 
4996672cf6dfSJoerg Roedel 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4997672cf6dfSJoerg Roedel 		pci_dev_put(pdev);
4998672cf6dfSJoerg Roedel 		return;
4999672cf6dfSJoerg Roedel 	}
5000672cf6dfSJoerg Roedel 
5001672cf6dfSJoerg Roedel 	pci_dev_put(pdev);
5002672cf6dfSJoerg Roedel 
5003672cf6dfSJoerg Roedel 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5004672cf6dfSJoerg Roedel 	if (vtisochctrl & 1)
5005672cf6dfSJoerg Roedel 		return;
5006672cf6dfSJoerg Roedel 
5007672cf6dfSJoerg Roedel 	/* Drop all bits other than the number of TLB entries */
5008672cf6dfSJoerg Roedel 	vtisochctrl &= 0x1c;
5009672cf6dfSJoerg Roedel 
5010672cf6dfSJoerg Roedel 	/* If we have the recommended number of TLB entries (16), fine. */
5011672cf6dfSJoerg Roedel 	if (vtisochctrl == 0x10)
5012672cf6dfSJoerg Roedel 		return;
5013672cf6dfSJoerg Roedel 
5014672cf6dfSJoerg Roedel 	/* Zero TLB entries? You get to ride the short bus to school. */
5015672cf6dfSJoerg Roedel 	if (!vtisochctrl) {
5016672cf6dfSJoerg Roedel 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5017672cf6dfSJoerg Roedel 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5018672cf6dfSJoerg Roedel 		     dmi_get_system_info(DMI_BIOS_VENDOR),
5019672cf6dfSJoerg Roedel 		     dmi_get_system_info(DMI_BIOS_VERSION),
5020672cf6dfSJoerg Roedel 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
5021672cf6dfSJoerg Roedel 		iommu_identity_mapping |= IDENTMAP_AZALIA;
5022672cf6dfSJoerg Roedel 		return;
5023672cf6dfSJoerg Roedel 	}
5024672cf6dfSJoerg Roedel 
5025672cf6dfSJoerg Roedel 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5026672cf6dfSJoerg Roedel 	       vtisochctrl);
5027672cf6dfSJoerg Roedel }
5028e65a6897SJacob Pan 
5029e65a6897SJacob Pan /*
5030e65a6897SJacob Pan  * Here we deal with a device TLB defect where device may inadvertently issue ATS
5031e65a6897SJacob Pan  * invalidation completion before posted writes initiated with translated address
5032e65a6897SJacob Pan  * that utilized translations matching the invalidation address range, violating
5033e65a6897SJacob Pan  * the invalidation completion ordering.
5034e65a6897SJacob Pan  * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
5035e65a6897SJacob Pan  * vulnerable to this defect. In other words, any dTLB invalidation initiated not
5036e65a6897SJacob Pan  * under the control of the trusted/privileged host device driver must use this
5037e65a6897SJacob Pan  * quirk.
5038e65a6897SJacob Pan  * Device TLBs are invalidated under the following six conditions:
5039e65a6897SJacob Pan  * 1. Device driver does DMA API unmap IOVA
5040e65a6897SJacob Pan  * 2. Device driver unbind a PASID from a process, sva_unbind_device()
5041e65a6897SJacob Pan  * 3. PASID is torn down, after PASID cache is flushed. e.g. process
5042e65a6897SJacob Pan  *    exit_mmap() due to crash
5043e65a6897SJacob Pan  * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
5044e65a6897SJacob Pan  *    VM has to free pages that were unmapped
5045e65a6897SJacob Pan  * 5. Userspace driver unmaps a DMA buffer
5046e65a6897SJacob Pan  * 6. Cache invalidation in vSVA usage (upcoming)
5047e65a6897SJacob Pan  *
5048e65a6897SJacob Pan  * For #1 and #2, device drivers are responsible for stopping DMA traffic
5049e65a6897SJacob Pan  * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
5050e65a6897SJacob Pan  * invalidate TLB the same way as normal user unmap which will use this quirk.
5051e65a6897SJacob Pan  * The dTLB invalidation after PASID cache flush does not need this quirk.
5052e65a6897SJacob Pan  *
5053e65a6897SJacob Pan  * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
5054e65a6897SJacob Pan  */
quirk_extra_dev_tlb_flush(struct device_domain_info * info,unsigned long address,unsigned long mask,u32 pasid,u16 qdep)5055e65a6897SJacob Pan void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
5056e65a6897SJacob Pan 			       unsigned long address, unsigned long mask,
5057e65a6897SJacob Pan 			       u32 pasid, u16 qdep)
5058e65a6897SJacob Pan {
5059e65a6897SJacob Pan 	u16 sid;
5060e65a6897SJacob Pan 
5061e65a6897SJacob Pan 	if (likely(!info->dtlb_extra_inval))
5062e65a6897SJacob Pan 		return;
5063e65a6897SJacob Pan 
5064e65a6897SJacob Pan 	sid = PCI_DEVID(info->bus, info->devfn);
506542987801SJacob Pan 	if (pasid == IOMMU_NO_PASID) {
5066e65a6897SJacob Pan 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
5067e65a6897SJacob Pan 				   qdep, address, mask);
5068e65a6897SJacob Pan 	} else {
5069e65a6897SJacob Pan 		qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
5070e65a6897SJacob Pan 					 pasid, qdep, address, mask);
5071e65a6897SJacob Pan 	}
5072e65a6897SJacob Pan }
5073dc578758SKan Liang 
5074dc578758SKan Liang #define ecmd_get_status_code(res)	(((res) & 0xff) >> 1)
5075dc578758SKan Liang 
5076dc578758SKan Liang /*
5077dc578758SKan Liang  * Function to submit a command to the enhanced command interface. The
5078dc578758SKan Liang  * valid enhanced command descriptions are defined in Table 47 of the
5079dc578758SKan Liang  * VT-d spec. The VT-d hardware implementation may support some but not
5080dc578758SKan Liang  * all commands, which can be determined by checking the Enhanced
5081dc578758SKan Liang  * Command Capability Register.
5082dc578758SKan Liang  *
5083dc578758SKan Liang  * Return values:
5084dc578758SKan Liang  *  - 0: Command successful without any error;
5085dc578758SKan Liang  *  - Negative: software error value;
5086dc578758SKan Liang  *  - Nonzero positive: failure status code defined in Table 48.
5087dc578758SKan Liang  */
ecmd_submit_sync(struct intel_iommu * iommu,u8 ecmd,u64 oa,u64 ob)5088dc578758SKan Liang int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
5089dc578758SKan Liang {
5090dc578758SKan Liang 	unsigned long flags;
5091dc578758SKan Liang 	u64 res;
5092dc578758SKan Liang 	int ret;
5093dc578758SKan Liang 
5094dc578758SKan Liang 	if (!cap_ecmds(iommu->cap))
5095dc578758SKan Liang 		return -ENODEV;
5096dc578758SKan Liang 
5097dc578758SKan Liang 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
5098dc578758SKan Liang 
5099dc578758SKan Liang 	res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
5100dc578758SKan Liang 	if (res & DMA_ECMD_ECRSP_IP) {
5101dc578758SKan Liang 		ret = -EBUSY;
5102dc578758SKan Liang 		goto err;
5103dc578758SKan Liang 	}
5104dc578758SKan Liang 
5105dc578758SKan Liang 	/*
5106dc578758SKan Liang 	 * Unconditionally write the operand B, because
5107dc578758SKan Liang 	 * - There is no side effect if an ecmd doesn't require an
5108dc578758SKan Liang 	 *   operand B, but we set the register to some value.
5109dc578758SKan Liang 	 * - It's not invoked in any critical path. The extra MMIO
5110dc578758SKan Liang 	 *   write doesn't bring any performance concerns.
5111dc578758SKan Liang 	 */
5112dc578758SKan Liang 	dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
5113dc578758SKan Liang 	dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
5114dc578758SKan Liang 
5115dc578758SKan Liang 	IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
5116dc578758SKan Liang 		      !(res & DMA_ECMD_ECRSP_IP), res);
5117dc578758SKan Liang 
5118dc578758SKan Liang 	if (res & DMA_ECMD_ECRSP_IP) {
5119dc578758SKan Liang 		ret = -ETIMEDOUT;
5120dc578758SKan Liang 		goto err;
5121dc578758SKan Liang 	}
5122dc578758SKan Liang 
5123dc578758SKan Liang 	ret = ecmd_get_status_code(res);
5124dc578758SKan Liang err:
5125dc578758SKan Liang 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
5126dc578758SKan Liang 
5127dc578758SKan Liang 	return ret;
5128dc578758SKan Liang }
5129