xref: /openbmc/linux/drivers/iommu/intel/iommu.c (revision 81464192839de0b5bc84c5739381101e04d94f62)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>,
6  *          Ashok Raj <ashok.raj@intel.com>,
7  *          Shaohua Li <shaohua.li@intel.com>,
8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9  *          Fenghua Yu <fenghua.yu@intel.com>
10  *          Joerg Roedel <jroedel@suse.de>
11  */
12 
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15 
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
31 #include <linux/io.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
49 
50 #include "../irq_remapping.h"
51 #include "intel-pasid.h"
52 
53 #define ROOT_SIZE		VTD_PAGE_SIZE
54 #define CONTEXT_SIZE		VTD_PAGE_SIZE
55 
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60 
61 #define IOAPIC_RANGE_START	(0xfee00000)
62 #define IOAPIC_RANGE_END	(0xfeefffff)
63 #define IOVA_START_ADDR		(0x1000)
64 
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
66 
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69 
70 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72 
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
76 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78 
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN		(1)
81 
82 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
83 
84 /* page table handling */
85 #define LEVEL_STRIDE		(9)
86 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
87 
88 /*
89  * This bitmap is used to advertise the page sizes our hardware support
90  * to the IOMMU core, which will then use this information to split
91  * physically contiguous memory regions it is mapping into page sizes
92  * that we support.
93  *
94  * Traditionally the IOMMU core just handed us the mappings directly,
95  * after making sure the size is an order of a 4KiB page and that the
96  * mapping has natural alignment.
97  *
98  * To retain this behavior, we currently advertise that we support
99  * all page sizes that are an order of 4KiB.
100  *
101  * If at some point we'd like to utilize the IOMMU core's new behavior,
102  * we could change this to advertise the real page sizes we support.
103  */
104 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
105 
106 static inline int agaw_to_level(int agaw)
107 {
108 	return agaw + 2;
109 }
110 
111 static inline int agaw_to_width(int agaw)
112 {
113 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
114 }
115 
116 static inline int width_to_agaw(int width)
117 {
118 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
119 }
120 
121 static inline unsigned int level_to_offset_bits(int level)
122 {
123 	return (level - 1) * LEVEL_STRIDE;
124 }
125 
126 static inline int pfn_level_offset(unsigned long pfn, int level)
127 {
128 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129 }
130 
131 static inline unsigned long level_mask(int level)
132 {
133 	return -1UL << level_to_offset_bits(level);
134 }
135 
136 static inline unsigned long level_size(int level)
137 {
138 	return 1UL << level_to_offset_bits(level);
139 }
140 
141 static inline unsigned long align_to_level(unsigned long pfn, int level)
142 {
143 	return (pfn + level_size(level) - 1) & level_mask(level);
144 }
145 
146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147 {
148 	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
149 }
150 
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152    are never going to work. */
153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154 {
155 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156 }
157 
158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159 {
160 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161 }
162 static inline unsigned long page_to_dma_pfn(struct page *pg)
163 {
164 	return mm_to_dma_pfn(page_to_pfn(pg));
165 }
166 static inline unsigned long virt_to_dma_pfn(void *p)
167 {
168 	return page_to_dma_pfn(virt_to_page(p));
169 }
170 
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu **g_iommus;
173 
174 static void __init check_tylersburg_isoch(void);
175 static int rwbf_quirk;
176 
177 /*
178  * set to 1 to panic kernel if can't successfully enable VT-d
179  * (used when kernel is launched w/ TXT)
180  */
181 static int force_on = 0;
182 int intel_iommu_tboot_noforce;
183 static int no_platform_optin;
184 
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
186 
187 /*
188  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189  * if marked present.
190  */
191 static phys_addr_t root_entry_lctp(struct root_entry *re)
192 {
193 	if (!(re->lo & 1))
194 		return 0;
195 
196 	return re->lo & VTD_PAGE_MASK;
197 }
198 
199 /*
200  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201  * if marked present.
202  */
203 static phys_addr_t root_entry_uctp(struct root_entry *re)
204 {
205 	if (!(re->hi & 1))
206 		return 0;
207 
208 	return re->hi & VTD_PAGE_MASK;
209 }
210 
211 static inline void context_clear_pasid_enable(struct context_entry *context)
212 {
213 	context->lo &= ~(1ULL << 11);
214 }
215 
216 static inline bool context_pasid_enabled(struct context_entry *context)
217 {
218 	return !!(context->lo & (1ULL << 11));
219 }
220 
221 static inline void context_set_copied(struct context_entry *context)
222 {
223 	context->hi |= (1ull << 3);
224 }
225 
226 static inline bool context_copied(struct context_entry *context)
227 {
228 	return !!(context->hi & (1ULL << 3));
229 }
230 
231 static inline bool __context_present(struct context_entry *context)
232 {
233 	return (context->lo & 1);
234 }
235 
236 bool context_present(struct context_entry *context)
237 {
238 	return context_pasid_enabled(context) ?
239 	     __context_present(context) :
240 	     __context_present(context) && !context_copied(context);
241 }
242 
243 static inline void context_set_present(struct context_entry *context)
244 {
245 	context->lo |= 1;
246 }
247 
248 static inline void context_set_fault_enable(struct context_entry *context)
249 {
250 	context->lo &= (((u64)-1) << 2) | 1;
251 }
252 
253 static inline void context_set_translation_type(struct context_entry *context,
254 						unsigned long value)
255 {
256 	context->lo &= (((u64)-1) << 4) | 3;
257 	context->lo |= (value & 3) << 2;
258 }
259 
260 static inline void context_set_address_root(struct context_entry *context,
261 					    unsigned long value)
262 {
263 	context->lo &= ~VTD_PAGE_MASK;
264 	context->lo |= value & VTD_PAGE_MASK;
265 }
266 
267 static inline void context_set_address_width(struct context_entry *context,
268 					     unsigned long value)
269 {
270 	context->hi |= value & 7;
271 }
272 
273 static inline void context_set_domain_id(struct context_entry *context,
274 					 unsigned long value)
275 {
276 	context->hi |= (value & ((1 << 16) - 1)) << 8;
277 }
278 
279 static inline int context_domain_id(struct context_entry *c)
280 {
281 	return((c->hi >> 8) & 0xffff);
282 }
283 
284 static inline void context_clear_entry(struct context_entry *context)
285 {
286 	context->lo = 0;
287 	context->hi = 0;
288 }
289 
290 /*
291  * This domain is a statically identity mapping domain.
292  *	1. This domain creats a static 1:1 mapping to all usable memory.
293  * 	2. It maps to each iommu if successful.
294  *	3. Each iommu mapps to this domain if successful.
295  */
296 static struct dmar_domain *si_domain;
297 static int hw_pass_through = 1;
298 
299 #define for_each_domain_iommu(idx, domain)			\
300 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
301 		if (domain->iommu_refcnt[idx])
302 
303 struct dmar_rmrr_unit {
304 	struct list_head list;		/* list of rmrr units	*/
305 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
306 	u64	base_address;		/* reserved base address*/
307 	u64	end_address;		/* reserved end address */
308 	struct dmar_dev_scope *devices;	/* target devices */
309 	int	devices_cnt;		/* target device count */
310 };
311 
312 struct dmar_atsr_unit {
313 	struct list_head list;		/* list of ATSR units */
314 	struct acpi_dmar_header *hdr;	/* ACPI header */
315 	struct dmar_dev_scope *devices;	/* target devices */
316 	int devices_cnt;		/* target device count */
317 	u8 include_all:1;		/* include all ports */
318 };
319 
320 static LIST_HEAD(dmar_atsr_units);
321 static LIST_HEAD(dmar_rmrr_units);
322 
323 #define for_each_rmrr_units(rmrr) \
324 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
325 
326 /* bitmap for indexing intel_iommus */
327 static int g_num_of_iommus;
328 
329 static void domain_exit(struct dmar_domain *domain);
330 static void domain_remove_dev_info(struct dmar_domain *domain);
331 static void dmar_remove_one_dev_info(struct device *dev);
332 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
333 static int intel_iommu_attach_device(struct iommu_domain *domain,
334 				     struct device *dev);
335 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
336 					    dma_addr_t iova);
337 
338 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
339 int dmar_disabled = 0;
340 #else
341 int dmar_disabled = 1;
342 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
343 
344 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
345 int intel_iommu_sm = 1;
346 #else
347 int intel_iommu_sm;
348 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
349 
350 int intel_iommu_enabled = 0;
351 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
352 
353 static int dmar_map_gfx = 1;
354 static int dmar_forcedac;
355 static int intel_iommu_strict;
356 static int intel_iommu_superpage = 1;
357 static int iommu_identity_mapping;
358 static int intel_no_bounce;
359 
360 #define IDENTMAP_GFX		2
361 #define IDENTMAP_AZALIA		4
362 
363 int intel_iommu_gfx_mapped;
364 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
365 
366 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
367 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
368 struct device_domain_info *get_domain_info(struct device *dev)
369 {
370 	struct device_domain_info *info;
371 
372 	if (!dev)
373 		return NULL;
374 
375 	info = dev->archdata.iommu;
376 	if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
377 		     info == DEFER_DEVICE_DOMAIN_INFO))
378 		return NULL;
379 
380 	return info;
381 }
382 
383 DEFINE_SPINLOCK(device_domain_lock);
384 static LIST_HEAD(device_domain_list);
385 
386 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&	\
387 				to_pci_dev(d)->untrusted)
388 
389 /*
390  * Iterate over elements in device_domain_list and call the specified
391  * callback @fn against each element.
392  */
393 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
394 				     void *data), void *data)
395 {
396 	int ret = 0;
397 	unsigned long flags;
398 	struct device_domain_info *info;
399 
400 	spin_lock_irqsave(&device_domain_lock, flags);
401 	list_for_each_entry(info, &device_domain_list, global) {
402 		ret = fn(info, data);
403 		if (ret) {
404 			spin_unlock_irqrestore(&device_domain_lock, flags);
405 			return ret;
406 		}
407 	}
408 	spin_unlock_irqrestore(&device_domain_lock, flags);
409 
410 	return 0;
411 }
412 
413 const struct iommu_ops intel_iommu_ops;
414 
415 static bool translation_pre_enabled(struct intel_iommu *iommu)
416 {
417 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
418 }
419 
420 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
421 {
422 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
423 }
424 
425 static void init_translation_status(struct intel_iommu *iommu)
426 {
427 	u32 gsts;
428 
429 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
430 	if (gsts & DMA_GSTS_TES)
431 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
432 }
433 
434 static int __init intel_iommu_setup(char *str)
435 {
436 	if (!str)
437 		return -EINVAL;
438 	while (*str) {
439 		if (!strncmp(str, "on", 2)) {
440 			dmar_disabled = 0;
441 			pr_info("IOMMU enabled\n");
442 		} else if (!strncmp(str, "off", 3)) {
443 			dmar_disabled = 1;
444 			no_platform_optin = 1;
445 			pr_info("IOMMU disabled\n");
446 		} else if (!strncmp(str, "igfx_off", 8)) {
447 			dmar_map_gfx = 0;
448 			pr_info("Disable GFX device mapping\n");
449 		} else if (!strncmp(str, "forcedac", 8)) {
450 			pr_info("Forcing DAC for PCI devices\n");
451 			dmar_forcedac = 1;
452 		} else if (!strncmp(str, "strict", 6)) {
453 			pr_info("Disable batched IOTLB flush\n");
454 			intel_iommu_strict = 1;
455 		} else if (!strncmp(str, "sp_off", 6)) {
456 			pr_info("Disable supported super page\n");
457 			intel_iommu_superpage = 0;
458 		} else if (!strncmp(str, "sm_on", 5)) {
459 			pr_info("Intel-IOMMU: scalable mode supported\n");
460 			intel_iommu_sm = 1;
461 		} else if (!strncmp(str, "tboot_noforce", 13)) {
462 			pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
463 			intel_iommu_tboot_noforce = 1;
464 		} else if (!strncmp(str, "nobounce", 8)) {
465 			pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
466 			intel_no_bounce = 1;
467 		}
468 
469 		str += strcspn(str, ",");
470 		while (*str == ',')
471 			str++;
472 	}
473 	return 0;
474 }
475 __setup("intel_iommu=", intel_iommu_setup);
476 
477 static struct kmem_cache *iommu_domain_cache;
478 static struct kmem_cache *iommu_devinfo_cache;
479 
480 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
481 {
482 	struct dmar_domain **domains;
483 	int idx = did >> 8;
484 
485 	domains = iommu->domains[idx];
486 	if (!domains)
487 		return NULL;
488 
489 	return domains[did & 0xff];
490 }
491 
492 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
493 			     struct dmar_domain *domain)
494 {
495 	struct dmar_domain **domains;
496 	int idx = did >> 8;
497 
498 	if (!iommu->domains[idx]) {
499 		size_t size = 256 * sizeof(struct dmar_domain *);
500 		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
501 	}
502 
503 	domains = iommu->domains[idx];
504 	if (WARN_ON(!domains))
505 		return;
506 	else
507 		domains[did & 0xff] = domain;
508 }
509 
510 void *alloc_pgtable_page(int node)
511 {
512 	struct page *page;
513 	void *vaddr = NULL;
514 
515 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
516 	if (page)
517 		vaddr = page_address(page);
518 	return vaddr;
519 }
520 
521 void free_pgtable_page(void *vaddr)
522 {
523 	free_page((unsigned long)vaddr);
524 }
525 
526 static inline void *alloc_domain_mem(void)
527 {
528 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
529 }
530 
531 static void free_domain_mem(void *vaddr)
532 {
533 	kmem_cache_free(iommu_domain_cache, vaddr);
534 }
535 
536 static inline void * alloc_devinfo_mem(void)
537 {
538 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
539 }
540 
541 static inline void free_devinfo_mem(void *vaddr)
542 {
543 	kmem_cache_free(iommu_devinfo_cache, vaddr);
544 }
545 
546 static inline int domain_type_is_si(struct dmar_domain *domain)
547 {
548 	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
549 }
550 
551 static inline bool domain_use_first_level(struct dmar_domain *domain)
552 {
553 	return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
554 }
555 
556 static inline int domain_pfn_supported(struct dmar_domain *domain,
557 				       unsigned long pfn)
558 {
559 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
560 
561 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
562 }
563 
564 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
565 {
566 	unsigned long sagaw;
567 	int agaw = -1;
568 
569 	sagaw = cap_sagaw(iommu->cap);
570 	for (agaw = width_to_agaw(max_gaw);
571 	     agaw >= 0; agaw--) {
572 		if (test_bit(agaw, &sagaw))
573 			break;
574 	}
575 
576 	return agaw;
577 }
578 
579 /*
580  * Calculate max SAGAW for each iommu.
581  */
582 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
583 {
584 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
585 }
586 
587 /*
588  * calculate agaw for each iommu.
589  * "SAGAW" may be different across iommus, use a default agaw, and
590  * get a supported less agaw for iommus that don't support the default agaw.
591  */
592 int iommu_calculate_agaw(struct intel_iommu *iommu)
593 {
594 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
595 }
596 
597 /* This functionin only returns single iommu in a domain */
598 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
599 {
600 	int iommu_id;
601 
602 	/* si_domain and vm domain should not get here. */
603 	if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
604 		return NULL;
605 
606 	for_each_domain_iommu(iommu_id, domain)
607 		break;
608 
609 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
610 		return NULL;
611 
612 	return g_iommus[iommu_id];
613 }
614 
615 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
616 {
617 	return sm_supported(iommu) ?
618 			ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
619 }
620 
621 static void domain_update_iommu_coherency(struct dmar_domain *domain)
622 {
623 	struct dmar_drhd_unit *drhd;
624 	struct intel_iommu *iommu;
625 	bool found = false;
626 	int i;
627 
628 	domain->iommu_coherency = 1;
629 
630 	for_each_domain_iommu(i, domain) {
631 		found = true;
632 		if (!iommu_paging_structure_coherency(g_iommus[i])) {
633 			domain->iommu_coherency = 0;
634 			break;
635 		}
636 	}
637 	if (found)
638 		return;
639 
640 	/* No hardware attached; use lowest common denominator */
641 	rcu_read_lock();
642 	for_each_active_iommu(iommu, drhd) {
643 		if (!iommu_paging_structure_coherency(iommu)) {
644 			domain->iommu_coherency = 0;
645 			break;
646 		}
647 	}
648 	rcu_read_unlock();
649 }
650 
651 static int domain_update_iommu_snooping(struct intel_iommu *skip)
652 {
653 	struct dmar_drhd_unit *drhd;
654 	struct intel_iommu *iommu;
655 	int ret = 1;
656 
657 	rcu_read_lock();
658 	for_each_active_iommu(iommu, drhd) {
659 		if (iommu != skip) {
660 			if (!ecap_sc_support(iommu->ecap)) {
661 				ret = 0;
662 				break;
663 			}
664 		}
665 	}
666 	rcu_read_unlock();
667 
668 	return ret;
669 }
670 
671 static int domain_update_iommu_superpage(struct dmar_domain *domain,
672 					 struct intel_iommu *skip)
673 {
674 	struct dmar_drhd_unit *drhd;
675 	struct intel_iommu *iommu;
676 	int mask = 0x3;
677 
678 	if (!intel_iommu_superpage) {
679 		return 0;
680 	}
681 
682 	/* set iommu_superpage to the smallest common denominator */
683 	rcu_read_lock();
684 	for_each_active_iommu(iommu, drhd) {
685 		if (iommu != skip) {
686 			if (domain && domain_use_first_level(domain)) {
687 				if (!cap_fl1gp_support(iommu->cap))
688 					mask = 0x1;
689 			} else {
690 				mask &= cap_super_page_val(iommu->cap);
691 			}
692 
693 			if (!mask)
694 				break;
695 		}
696 	}
697 	rcu_read_unlock();
698 
699 	return fls(mask);
700 }
701 
702 /* Some capabilities may be different across iommus */
703 static void domain_update_iommu_cap(struct dmar_domain *domain)
704 {
705 	domain_update_iommu_coherency(domain);
706 	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
707 	domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
708 }
709 
710 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
711 					 u8 devfn, int alloc)
712 {
713 	struct root_entry *root = &iommu->root_entry[bus];
714 	struct context_entry *context;
715 	u64 *entry;
716 
717 	entry = &root->lo;
718 	if (sm_supported(iommu)) {
719 		if (devfn >= 0x80) {
720 			devfn -= 0x80;
721 			entry = &root->hi;
722 		}
723 		devfn *= 2;
724 	}
725 	if (*entry & 1)
726 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
727 	else {
728 		unsigned long phy_addr;
729 		if (!alloc)
730 			return NULL;
731 
732 		context = alloc_pgtable_page(iommu->node);
733 		if (!context)
734 			return NULL;
735 
736 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
737 		phy_addr = virt_to_phys((void *)context);
738 		*entry = phy_addr | 1;
739 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
740 	}
741 	return &context[devfn];
742 }
743 
744 static int iommu_dummy(struct device *dev)
745 {
746 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
747 }
748 
749 static bool attach_deferred(struct device *dev)
750 {
751 	return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
752 }
753 
754 /**
755  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
756  *				 sub-hierarchy of a candidate PCI-PCI bridge
757  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
758  * @bridge: the candidate PCI-PCI bridge
759  *
760  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
761  */
762 static bool
763 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
764 {
765 	struct pci_dev *pdev, *pbridge;
766 
767 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
768 		return false;
769 
770 	pdev = to_pci_dev(dev);
771 	pbridge = to_pci_dev(bridge);
772 
773 	if (pbridge->subordinate &&
774 	    pbridge->subordinate->number <= pdev->bus->number &&
775 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
776 		return true;
777 
778 	return false;
779 }
780 
781 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
782 {
783 	struct dmar_drhd_unit *drhd = NULL;
784 	struct intel_iommu *iommu;
785 	struct device *tmp;
786 	struct pci_dev *pdev = NULL;
787 	u16 segment = 0;
788 	int i;
789 
790 	if (iommu_dummy(dev))
791 		return NULL;
792 
793 	if (dev_is_pci(dev)) {
794 		struct pci_dev *pf_pdev;
795 
796 		pdev = pci_real_dma_dev(to_pci_dev(dev));
797 
798 		/* VFs aren't listed in scope tables; we need to look up
799 		 * the PF instead to find the IOMMU. */
800 		pf_pdev = pci_physfn(pdev);
801 		dev = &pf_pdev->dev;
802 		segment = pci_domain_nr(pdev->bus);
803 	} else if (has_acpi_companion(dev))
804 		dev = &ACPI_COMPANION(dev)->dev;
805 
806 	rcu_read_lock();
807 	for_each_active_iommu(iommu, drhd) {
808 		if (pdev && segment != drhd->segment)
809 			continue;
810 
811 		for_each_active_dev_scope(drhd->devices,
812 					  drhd->devices_cnt, i, tmp) {
813 			if (tmp == dev) {
814 				/* For a VF use its original BDF# not that of the PF
815 				 * which we used for the IOMMU lookup. Strictly speaking
816 				 * we could do this for all PCI devices; we only need to
817 				 * get the BDF# from the scope table for ACPI matches. */
818 				if (pdev && pdev->is_virtfn)
819 					goto got_pdev;
820 
821 				*bus = drhd->devices[i].bus;
822 				*devfn = drhd->devices[i].devfn;
823 				goto out;
824 			}
825 
826 			if (is_downstream_to_pci_bridge(dev, tmp))
827 				goto got_pdev;
828 		}
829 
830 		if (pdev && drhd->include_all) {
831 		got_pdev:
832 			*bus = pdev->bus->number;
833 			*devfn = pdev->devfn;
834 			goto out;
835 		}
836 	}
837 	iommu = NULL;
838  out:
839 	rcu_read_unlock();
840 
841 	return iommu;
842 }
843 
844 static void domain_flush_cache(struct dmar_domain *domain,
845 			       void *addr, int size)
846 {
847 	if (!domain->iommu_coherency)
848 		clflush_cache_range(addr, size);
849 }
850 
851 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
852 {
853 	struct context_entry *context;
854 	int ret = 0;
855 	unsigned long flags;
856 
857 	spin_lock_irqsave(&iommu->lock, flags);
858 	context = iommu_context_addr(iommu, bus, devfn, 0);
859 	if (context)
860 		ret = context_present(context);
861 	spin_unlock_irqrestore(&iommu->lock, flags);
862 	return ret;
863 }
864 
865 static void free_context_table(struct intel_iommu *iommu)
866 {
867 	int i;
868 	unsigned long flags;
869 	struct context_entry *context;
870 
871 	spin_lock_irqsave(&iommu->lock, flags);
872 	if (!iommu->root_entry) {
873 		goto out;
874 	}
875 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
876 		context = iommu_context_addr(iommu, i, 0, 0);
877 		if (context)
878 			free_pgtable_page(context);
879 
880 		if (!sm_supported(iommu))
881 			continue;
882 
883 		context = iommu_context_addr(iommu, i, 0x80, 0);
884 		if (context)
885 			free_pgtable_page(context);
886 
887 	}
888 	free_pgtable_page(iommu->root_entry);
889 	iommu->root_entry = NULL;
890 out:
891 	spin_unlock_irqrestore(&iommu->lock, flags);
892 }
893 
894 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
895 				      unsigned long pfn, int *target_level)
896 {
897 	struct dma_pte *parent, *pte;
898 	int level = agaw_to_level(domain->agaw);
899 	int offset;
900 
901 	BUG_ON(!domain->pgd);
902 
903 	if (!domain_pfn_supported(domain, pfn))
904 		/* Address beyond IOMMU's addressing capabilities. */
905 		return NULL;
906 
907 	parent = domain->pgd;
908 
909 	while (1) {
910 		void *tmp_page;
911 
912 		offset = pfn_level_offset(pfn, level);
913 		pte = &parent[offset];
914 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
915 			break;
916 		if (level == *target_level)
917 			break;
918 
919 		if (!dma_pte_present(pte)) {
920 			uint64_t pteval;
921 
922 			tmp_page = alloc_pgtable_page(domain->nid);
923 
924 			if (!tmp_page)
925 				return NULL;
926 
927 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
928 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
929 			if (domain_use_first_level(domain))
930 				pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
931 			if (cmpxchg64(&pte->val, 0ULL, pteval))
932 				/* Someone else set it while we were thinking; use theirs. */
933 				free_pgtable_page(tmp_page);
934 			else
935 				domain_flush_cache(domain, pte, sizeof(*pte));
936 		}
937 		if (level == 1)
938 			break;
939 
940 		parent = phys_to_virt(dma_pte_addr(pte));
941 		level--;
942 	}
943 
944 	if (!*target_level)
945 		*target_level = level;
946 
947 	return pte;
948 }
949 
950 /* return address's pte at specific level */
951 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
952 					 unsigned long pfn,
953 					 int level, int *large_page)
954 {
955 	struct dma_pte *parent, *pte;
956 	int total = agaw_to_level(domain->agaw);
957 	int offset;
958 
959 	parent = domain->pgd;
960 	while (level <= total) {
961 		offset = pfn_level_offset(pfn, total);
962 		pte = &parent[offset];
963 		if (level == total)
964 			return pte;
965 
966 		if (!dma_pte_present(pte)) {
967 			*large_page = total;
968 			break;
969 		}
970 
971 		if (dma_pte_superpage(pte)) {
972 			*large_page = total;
973 			return pte;
974 		}
975 
976 		parent = phys_to_virt(dma_pte_addr(pte));
977 		total--;
978 	}
979 	return NULL;
980 }
981 
982 /* clear last level pte, a tlb flush should be followed */
983 static void dma_pte_clear_range(struct dmar_domain *domain,
984 				unsigned long start_pfn,
985 				unsigned long last_pfn)
986 {
987 	unsigned int large_page;
988 	struct dma_pte *first_pte, *pte;
989 
990 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
991 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
992 	BUG_ON(start_pfn > last_pfn);
993 
994 	/* we don't need lock here; nobody else touches the iova range */
995 	do {
996 		large_page = 1;
997 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
998 		if (!pte) {
999 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1000 			continue;
1001 		}
1002 		do {
1003 			dma_clear_pte(pte);
1004 			start_pfn += lvl_to_nr_pages(large_page);
1005 			pte++;
1006 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1007 
1008 		domain_flush_cache(domain, first_pte,
1009 				   (void *)pte - (void *)first_pte);
1010 
1011 	} while (start_pfn && start_pfn <= last_pfn);
1012 }
1013 
1014 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1015 			       int retain_level, struct dma_pte *pte,
1016 			       unsigned long pfn, unsigned long start_pfn,
1017 			       unsigned long last_pfn)
1018 {
1019 	pfn = max(start_pfn, pfn);
1020 	pte = &pte[pfn_level_offset(pfn, level)];
1021 
1022 	do {
1023 		unsigned long level_pfn;
1024 		struct dma_pte *level_pte;
1025 
1026 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1027 			goto next;
1028 
1029 		level_pfn = pfn & level_mask(level);
1030 		level_pte = phys_to_virt(dma_pte_addr(pte));
1031 
1032 		if (level > 2) {
1033 			dma_pte_free_level(domain, level - 1, retain_level,
1034 					   level_pte, level_pfn, start_pfn,
1035 					   last_pfn);
1036 		}
1037 
1038 		/*
1039 		 * Free the page table if we're below the level we want to
1040 		 * retain and the range covers the entire table.
1041 		 */
1042 		if (level < retain_level && !(start_pfn > level_pfn ||
1043 		      last_pfn < level_pfn + level_size(level) - 1)) {
1044 			dma_clear_pte(pte);
1045 			domain_flush_cache(domain, pte, sizeof(*pte));
1046 			free_pgtable_page(level_pte);
1047 		}
1048 next:
1049 		pfn += level_size(level);
1050 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1051 }
1052 
1053 /*
1054  * clear last level (leaf) ptes and free page table pages below the
1055  * level we wish to keep intact.
1056  */
1057 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1058 				   unsigned long start_pfn,
1059 				   unsigned long last_pfn,
1060 				   int retain_level)
1061 {
1062 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1063 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1064 	BUG_ON(start_pfn > last_pfn);
1065 
1066 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1067 
1068 	/* We don't need lock here; nobody else touches the iova range */
1069 	dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1070 			   domain->pgd, 0, start_pfn, last_pfn);
1071 
1072 	/* free pgd */
1073 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1074 		free_pgtable_page(domain->pgd);
1075 		domain->pgd = NULL;
1076 	}
1077 }
1078 
1079 /* When a page at a given level is being unlinked from its parent, we don't
1080    need to *modify* it at all. All we need to do is make a list of all the
1081    pages which can be freed just as soon as we've flushed the IOTLB and we
1082    know the hardware page-walk will no longer touch them.
1083    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1084    be freed. */
1085 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1086 					    int level, struct dma_pte *pte,
1087 					    struct page *freelist)
1088 {
1089 	struct page *pg;
1090 
1091 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1092 	pg->freelist = freelist;
1093 	freelist = pg;
1094 
1095 	if (level == 1)
1096 		return freelist;
1097 
1098 	pte = page_address(pg);
1099 	do {
1100 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1101 			freelist = dma_pte_list_pagetables(domain, level - 1,
1102 							   pte, freelist);
1103 		pte++;
1104 	} while (!first_pte_in_page(pte));
1105 
1106 	return freelist;
1107 }
1108 
1109 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1110 					struct dma_pte *pte, unsigned long pfn,
1111 					unsigned long start_pfn,
1112 					unsigned long last_pfn,
1113 					struct page *freelist)
1114 {
1115 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1116 
1117 	pfn = max(start_pfn, pfn);
1118 	pte = &pte[pfn_level_offset(pfn, level)];
1119 
1120 	do {
1121 		unsigned long level_pfn;
1122 
1123 		if (!dma_pte_present(pte))
1124 			goto next;
1125 
1126 		level_pfn = pfn & level_mask(level);
1127 
1128 		/* If range covers entire pagetable, free it */
1129 		if (start_pfn <= level_pfn &&
1130 		    last_pfn >= level_pfn + level_size(level) - 1) {
1131 			/* These suborbinate page tables are going away entirely. Don't
1132 			   bother to clear them; we're just going to *free* them. */
1133 			if (level > 1 && !dma_pte_superpage(pte))
1134 				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1135 
1136 			dma_clear_pte(pte);
1137 			if (!first_pte)
1138 				first_pte = pte;
1139 			last_pte = pte;
1140 		} else if (level > 1) {
1141 			/* Recurse down into a level that isn't *entirely* obsolete */
1142 			freelist = dma_pte_clear_level(domain, level - 1,
1143 						       phys_to_virt(dma_pte_addr(pte)),
1144 						       level_pfn, start_pfn, last_pfn,
1145 						       freelist);
1146 		}
1147 next:
1148 		pfn += level_size(level);
1149 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1150 
1151 	if (first_pte)
1152 		domain_flush_cache(domain, first_pte,
1153 				   (void *)++last_pte - (void *)first_pte);
1154 
1155 	return freelist;
1156 }
1157 
1158 /* We can't just free the pages because the IOMMU may still be walking
1159    the page tables, and may have cached the intermediate levels. The
1160    pages can only be freed after the IOTLB flush has been done. */
1161 static struct page *domain_unmap(struct dmar_domain *domain,
1162 				 unsigned long start_pfn,
1163 				 unsigned long last_pfn)
1164 {
1165 	struct page *freelist;
1166 
1167 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1168 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1169 	BUG_ON(start_pfn > last_pfn);
1170 
1171 	/* we don't need lock here; nobody else touches the iova range */
1172 	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1173 				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1174 
1175 	/* free pgd */
1176 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1177 		struct page *pgd_page = virt_to_page(domain->pgd);
1178 		pgd_page->freelist = freelist;
1179 		freelist = pgd_page;
1180 
1181 		domain->pgd = NULL;
1182 	}
1183 
1184 	return freelist;
1185 }
1186 
1187 static void dma_free_pagelist(struct page *freelist)
1188 {
1189 	struct page *pg;
1190 
1191 	while ((pg = freelist)) {
1192 		freelist = pg->freelist;
1193 		free_pgtable_page(page_address(pg));
1194 	}
1195 }
1196 
1197 static void iova_entry_free(unsigned long data)
1198 {
1199 	struct page *freelist = (struct page *)data;
1200 
1201 	dma_free_pagelist(freelist);
1202 }
1203 
1204 /* iommu handling */
1205 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1206 {
1207 	struct root_entry *root;
1208 	unsigned long flags;
1209 
1210 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1211 	if (!root) {
1212 		pr_err("Allocating root entry for %s failed\n",
1213 			iommu->name);
1214 		return -ENOMEM;
1215 	}
1216 
1217 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1218 
1219 	spin_lock_irqsave(&iommu->lock, flags);
1220 	iommu->root_entry = root;
1221 	spin_unlock_irqrestore(&iommu->lock, flags);
1222 
1223 	return 0;
1224 }
1225 
1226 static void iommu_set_root_entry(struct intel_iommu *iommu)
1227 {
1228 	u64 addr;
1229 	u32 sts;
1230 	unsigned long flag;
1231 
1232 	addr = virt_to_phys(iommu->root_entry);
1233 	if (sm_supported(iommu))
1234 		addr |= DMA_RTADDR_SMT;
1235 
1236 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1237 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1238 
1239 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1240 
1241 	/* Make sure hardware complete it */
1242 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1243 		      readl, (sts & DMA_GSTS_RTPS), sts);
1244 
1245 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1246 }
1247 
1248 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1249 {
1250 	u32 val;
1251 	unsigned long flag;
1252 
1253 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1254 		return;
1255 
1256 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1257 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1258 
1259 	/* Make sure hardware complete it */
1260 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1261 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1262 
1263 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1264 }
1265 
1266 /* return value determine if we need a write buffer flush */
1267 static void __iommu_flush_context(struct intel_iommu *iommu,
1268 				  u16 did, u16 source_id, u8 function_mask,
1269 				  u64 type)
1270 {
1271 	u64 val = 0;
1272 	unsigned long flag;
1273 
1274 	switch (type) {
1275 	case DMA_CCMD_GLOBAL_INVL:
1276 		val = DMA_CCMD_GLOBAL_INVL;
1277 		break;
1278 	case DMA_CCMD_DOMAIN_INVL:
1279 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1280 		break;
1281 	case DMA_CCMD_DEVICE_INVL:
1282 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1283 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1284 		break;
1285 	default:
1286 		BUG();
1287 	}
1288 	val |= DMA_CCMD_ICC;
1289 
1290 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1291 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1292 
1293 	/* Make sure hardware complete it */
1294 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1295 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1296 
1297 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1298 }
1299 
1300 /* return value determine if we need a write buffer flush */
1301 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1302 				u64 addr, unsigned int size_order, u64 type)
1303 {
1304 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1305 	u64 val = 0, val_iva = 0;
1306 	unsigned long flag;
1307 
1308 	switch (type) {
1309 	case DMA_TLB_GLOBAL_FLUSH:
1310 		/* global flush doesn't need set IVA_REG */
1311 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1312 		break;
1313 	case DMA_TLB_DSI_FLUSH:
1314 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1315 		break;
1316 	case DMA_TLB_PSI_FLUSH:
1317 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1318 		/* IH bit is passed in as part of address */
1319 		val_iva = size_order | addr;
1320 		break;
1321 	default:
1322 		BUG();
1323 	}
1324 	/* Note: set drain read/write */
1325 #if 0
1326 	/*
1327 	 * This is probably to be super secure.. Looks like we can
1328 	 * ignore it without any impact.
1329 	 */
1330 	if (cap_read_drain(iommu->cap))
1331 		val |= DMA_TLB_READ_DRAIN;
1332 #endif
1333 	if (cap_write_drain(iommu->cap))
1334 		val |= DMA_TLB_WRITE_DRAIN;
1335 
1336 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1337 	/* Note: Only uses first TLB reg currently */
1338 	if (val_iva)
1339 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1340 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1341 
1342 	/* Make sure hardware complete it */
1343 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1344 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1345 
1346 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1347 
1348 	/* check IOTLB invalidation granularity */
1349 	if (DMA_TLB_IAIG(val) == 0)
1350 		pr_err("Flush IOTLB failed\n");
1351 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1352 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1353 			(unsigned long long)DMA_TLB_IIRG(type),
1354 			(unsigned long long)DMA_TLB_IAIG(val));
1355 }
1356 
1357 static struct device_domain_info *
1358 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1359 			 u8 bus, u8 devfn)
1360 {
1361 	struct device_domain_info *info;
1362 
1363 	assert_spin_locked(&device_domain_lock);
1364 
1365 	if (!iommu->qi)
1366 		return NULL;
1367 
1368 	list_for_each_entry(info, &domain->devices, link)
1369 		if (info->iommu == iommu && info->bus == bus &&
1370 		    info->devfn == devfn) {
1371 			if (info->ats_supported && info->dev)
1372 				return info;
1373 			break;
1374 		}
1375 
1376 	return NULL;
1377 }
1378 
1379 static void domain_update_iotlb(struct dmar_domain *domain)
1380 {
1381 	struct device_domain_info *info;
1382 	bool has_iotlb_device = false;
1383 
1384 	assert_spin_locked(&device_domain_lock);
1385 
1386 	list_for_each_entry(info, &domain->devices, link) {
1387 		struct pci_dev *pdev;
1388 
1389 		if (!info->dev || !dev_is_pci(info->dev))
1390 			continue;
1391 
1392 		pdev = to_pci_dev(info->dev);
1393 		if (pdev->ats_enabled) {
1394 			has_iotlb_device = true;
1395 			break;
1396 		}
1397 	}
1398 
1399 	domain->has_iotlb_device = has_iotlb_device;
1400 }
1401 
1402 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1403 {
1404 	struct pci_dev *pdev;
1405 
1406 	assert_spin_locked(&device_domain_lock);
1407 
1408 	if (!info || !dev_is_pci(info->dev))
1409 		return;
1410 
1411 	pdev = to_pci_dev(info->dev);
1412 	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
1413 	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1414 	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1415 	 * reserved, which should be set to 0.
1416 	 */
1417 	if (!ecap_dit(info->iommu->ecap))
1418 		info->pfsid = 0;
1419 	else {
1420 		struct pci_dev *pf_pdev;
1421 
1422 		/* pdev will be returned if device is not a vf */
1423 		pf_pdev = pci_physfn(pdev);
1424 		info->pfsid = pci_dev_id(pf_pdev);
1425 	}
1426 
1427 #ifdef CONFIG_INTEL_IOMMU_SVM
1428 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1429 	   the device if you enable PASID support after ATS support is
1430 	   undefined. So always enable PASID support on devices which
1431 	   have it, even if we can't yet know if we're ever going to
1432 	   use it. */
1433 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1434 		info->pasid_enabled = 1;
1435 
1436 	if (info->pri_supported &&
1437 	    (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1438 	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1439 		info->pri_enabled = 1;
1440 #endif
1441 	if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1442 	    !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1443 		info->ats_enabled = 1;
1444 		domain_update_iotlb(info->domain);
1445 		info->ats_qdep = pci_ats_queue_depth(pdev);
1446 	}
1447 }
1448 
1449 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1450 {
1451 	struct pci_dev *pdev;
1452 
1453 	assert_spin_locked(&device_domain_lock);
1454 
1455 	if (!dev_is_pci(info->dev))
1456 		return;
1457 
1458 	pdev = to_pci_dev(info->dev);
1459 
1460 	if (info->ats_enabled) {
1461 		pci_disable_ats(pdev);
1462 		info->ats_enabled = 0;
1463 		domain_update_iotlb(info->domain);
1464 	}
1465 #ifdef CONFIG_INTEL_IOMMU_SVM
1466 	if (info->pri_enabled) {
1467 		pci_disable_pri(pdev);
1468 		info->pri_enabled = 0;
1469 	}
1470 	if (info->pasid_enabled) {
1471 		pci_disable_pasid(pdev);
1472 		info->pasid_enabled = 0;
1473 	}
1474 #endif
1475 }
1476 
1477 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1478 				  u64 addr, unsigned mask)
1479 {
1480 	u16 sid, qdep;
1481 	unsigned long flags;
1482 	struct device_domain_info *info;
1483 
1484 	if (!domain->has_iotlb_device)
1485 		return;
1486 
1487 	spin_lock_irqsave(&device_domain_lock, flags);
1488 	list_for_each_entry(info, &domain->devices, link) {
1489 		if (!info->ats_enabled)
1490 			continue;
1491 
1492 		sid = info->bus << 8 | info->devfn;
1493 		qdep = info->ats_qdep;
1494 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1495 				qdep, addr, mask);
1496 	}
1497 	spin_unlock_irqrestore(&device_domain_lock, flags);
1498 }
1499 
1500 static void domain_flush_piotlb(struct intel_iommu *iommu,
1501 				struct dmar_domain *domain,
1502 				u64 addr, unsigned long npages, bool ih)
1503 {
1504 	u16 did = domain->iommu_did[iommu->seq_id];
1505 
1506 	if (domain->default_pasid)
1507 		qi_flush_piotlb(iommu, did, domain->default_pasid,
1508 				addr, npages, ih);
1509 
1510 	if (!list_empty(&domain->devices))
1511 		qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1512 }
1513 
1514 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1515 				  struct dmar_domain *domain,
1516 				  unsigned long pfn, unsigned int pages,
1517 				  int ih, int map)
1518 {
1519 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1520 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1521 	u16 did = domain->iommu_did[iommu->seq_id];
1522 
1523 	BUG_ON(pages == 0);
1524 
1525 	if (ih)
1526 		ih = 1 << 6;
1527 
1528 	if (domain_use_first_level(domain)) {
1529 		domain_flush_piotlb(iommu, domain, addr, pages, ih);
1530 	} else {
1531 		/*
1532 		 * Fallback to domain selective flush if no PSI support or
1533 		 * the size is too big. PSI requires page size to be 2 ^ x,
1534 		 * and the base address is naturally aligned to the size.
1535 		 */
1536 		if (!cap_pgsel_inv(iommu->cap) ||
1537 		    mask > cap_max_amask_val(iommu->cap))
1538 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
1539 							DMA_TLB_DSI_FLUSH);
1540 		else
1541 			iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1542 							DMA_TLB_PSI_FLUSH);
1543 	}
1544 
1545 	/*
1546 	 * In caching mode, changes of pages from non-present to present require
1547 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1548 	 */
1549 	if (!cap_caching_mode(iommu->cap) || !map)
1550 		iommu_flush_dev_iotlb(domain, addr, mask);
1551 }
1552 
1553 /* Notification for newly created mappings */
1554 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1555 					struct dmar_domain *domain,
1556 					unsigned long pfn, unsigned int pages)
1557 {
1558 	/*
1559 	 * It's a non-present to present mapping. Only flush if caching mode
1560 	 * and second level.
1561 	 */
1562 	if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1563 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1564 	else
1565 		iommu_flush_write_buffer(iommu);
1566 }
1567 
1568 static void iommu_flush_iova(struct iova_domain *iovad)
1569 {
1570 	struct dmar_domain *domain;
1571 	int idx;
1572 
1573 	domain = container_of(iovad, struct dmar_domain, iovad);
1574 
1575 	for_each_domain_iommu(idx, domain) {
1576 		struct intel_iommu *iommu = g_iommus[idx];
1577 		u16 did = domain->iommu_did[iommu->seq_id];
1578 
1579 		if (domain_use_first_level(domain))
1580 			domain_flush_piotlb(iommu, domain, 0, -1, 0);
1581 		else
1582 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
1583 						 DMA_TLB_DSI_FLUSH);
1584 
1585 		if (!cap_caching_mode(iommu->cap))
1586 			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1587 					      0, MAX_AGAW_PFN_WIDTH);
1588 	}
1589 }
1590 
1591 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1592 {
1593 	u32 pmen;
1594 	unsigned long flags;
1595 
1596 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1597 		return;
1598 
1599 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1600 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1601 	pmen &= ~DMA_PMEN_EPM;
1602 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1603 
1604 	/* wait for the protected region status bit to clear */
1605 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1606 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1607 
1608 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1609 }
1610 
1611 static void iommu_enable_translation(struct intel_iommu *iommu)
1612 {
1613 	u32 sts;
1614 	unsigned long flags;
1615 
1616 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1617 	iommu->gcmd |= DMA_GCMD_TE;
1618 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1619 
1620 	/* Make sure hardware complete it */
1621 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1622 		      readl, (sts & DMA_GSTS_TES), sts);
1623 
1624 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1625 }
1626 
1627 static void iommu_disable_translation(struct intel_iommu *iommu)
1628 {
1629 	u32 sts;
1630 	unsigned long flag;
1631 
1632 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1633 	iommu->gcmd &= ~DMA_GCMD_TE;
1634 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1635 
1636 	/* Make sure hardware complete it */
1637 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1638 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1639 
1640 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1641 }
1642 
1643 static int iommu_init_domains(struct intel_iommu *iommu)
1644 {
1645 	u32 ndomains, nlongs;
1646 	size_t size;
1647 
1648 	ndomains = cap_ndoms(iommu->cap);
1649 	pr_debug("%s: Number of Domains supported <%d>\n",
1650 		 iommu->name, ndomains);
1651 	nlongs = BITS_TO_LONGS(ndomains);
1652 
1653 	spin_lock_init(&iommu->lock);
1654 
1655 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1656 	if (!iommu->domain_ids) {
1657 		pr_err("%s: Allocating domain id array failed\n",
1658 		       iommu->name);
1659 		return -ENOMEM;
1660 	}
1661 
1662 	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1663 	iommu->domains = kzalloc(size, GFP_KERNEL);
1664 
1665 	if (iommu->domains) {
1666 		size = 256 * sizeof(struct dmar_domain *);
1667 		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1668 	}
1669 
1670 	if (!iommu->domains || !iommu->domains[0]) {
1671 		pr_err("%s: Allocating domain array failed\n",
1672 		       iommu->name);
1673 		kfree(iommu->domain_ids);
1674 		kfree(iommu->domains);
1675 		iommu->domain_ids = NULL;
1676 		iommu->domains    = NULL;
1677 		return -ENOMEM;
1678 	}
1679 
1680 	/*
1681 	 * If Caching mode is set, then invalid translations are tagged
1682 	 * with domain-id 0, hence we need to pre-allocate it. We also
1683 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1684 	 * make sure it is not used for a real domain.
1685 	 */
1686 	set_bit(0, iommu->domain_ids);
1687 
1688 	/*
1689 	 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1690 	 * entry for first-level or pass-through translation modes should
1691 	 * be programmed with a domain id different from those used for
1692 	 * second-level or nested translation. We reserve a domain id for
1693 	 * this purpose.
1694 	 */
1695 	if (sm_supported(iommu))
1696 		set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1697 
1698 	return 0;
1699 }
1700 
1701 static void disable_dmar_iommu(struct intel_iommu *iommu)
1702 {
1703 	struct device_domain_info *info, *tmp;
1704 	unsigned long flags;
1705 
1706 	if (!iommu->domains || !iommu->domain_ids)
1707 		return;
1708 
1709 	spin_lock_irqsave(&device_domain_lock, flags);
1710 	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1711 		if (info->iommu != iommu)
1712 			continue;
1713 
1714 		if (!info->dev || !info->domain)
1715 			continue;
1716 
1717 		__dmar_remove_one_dev_info(info);
1718 	}
1719 	spin_unlock_irqrestore(&device_domain_lock, flags);
1720 
1721 	if (iommu->gcmd & DMA_GCMD_TE)
1722 		iommu_disable_translation(iommu);
1723 }
1724 
1725 static void free_dmar_iommu(struct intel_iommu *iommu)
1726 {
1727 	if ((iommu->domains) && (iommu->domain_ids)) {
1728 		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1729 		int i;
1730 
1731 		for (i = 0; i < elems; i++)
1732 			kfree(iommu->domains[i]);
1733 		kfree(iommu->domains);
1734 		kfree(iommu->domain_ids);
1735 		iommu->domains = NULL;
1736 		iommu->domain_ids = NULL;
1737 	}
1738 
1739 	g_iommus[iommu->seq_id] = NULL;
1740 
1741 	/* free context mapping */
1742 	free_context_table(iommu);
1743 
1744 #ifdef CONFIG_INTEL_IOMMU_SVM
1745 	if (pasid_supported(iommu)) {
1746 		if (ecap_prs(iommu->ecap))
1747 			intel_svm_finish_prq(iommu);
1748 	}
1749 	if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
1750 		ioasid_unregister_allocator(&iommu->pasid_allocator);
1751 
1752 #endif
1753 }
1754 
1755 /*
1756  * Check and return whether first level is used by default for
1757  * DMA translation.
1758  */
1759 static bool first_level_by_default(void)
1760 {
1761 	struct dmar_drhd_unit *drhd;
1762 	struct intel_iommu *iommu;
1763 	static int first_level_support = -1;
1764 
1765 	if (likely(first_level_support != -1))
1766 		return first_level_support;
1767 
1768 	first_level_support = 1;
1769 
1770 	rcu_read_lock();
1771 	for_each_active_iommu(iommu, drhd) {
1772 		if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
1773 			first_level_support = 0;
1774 			break;
1775 		}
1776 	}
1777 	rcu_read_unlock();
1778 
1779 	return first_level_support;
1780 }
1781 
1782 static struct dmar_domain *alloc_domain(int flags)
1783 {
1784 	struct dmar_domain *domain;
1785 
1786 	domain = alloc_domain_mem();
1787 	if (!domain)
1788 		return NULL;
1789 
1790 	memset(domain, 0, sizeof(*domain));
1791 	domain->nid = NUMA_NO_NODE;
1792 	domain->flags = flags;
1793 	if (first_level_by_default())
1794 		domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1795 	domain->has_iotlb_device = false;
1796 	INIT_LIST_HEAD(&domain->devices);
1797 
1798 	return domain;
1799 }
1800 
1801 /* Must be called with iommu->lock */
1802 static int domain_attach_iommu(struct dmar_domain *domain,
1803 			       struct intel_iommu *iommu)
1804 {
1805 	unsigned long ndomains;
1806 	int num;
1807 
1808 	assert_spin_locked(&device_domain_lock);
1809 	assert_spin_locked(&iommu->lock);
1810 
1811 	domain->iommu_refcnt[iommu->seq_id] += 1;
1812 	domain->iommu_count += 1;
1813 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1814 		ndomains = cap_ndoms(iommu->cap);
1815 		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1816 
1817 		if (num >= ndomains) {
1818 			pr_err("%s: No free domain ids\n", iommu->name);
1819 			domain->iommu_refcnt[iommu->seq_id] -= 1;
1820 			domain->iommu_count -= 1;
1821 			return -ENOSPC;
1822 		}
1823 
1824 		set_bit(num, iommu->domain_ids);
1825 		set_iommu_domain(iommu, num, domain);
1826 
1827 		domain->iommu_did[iommu->seq_id] = num;
1828 		domain->nid			 = iommu->node;
1829 
1830 		domain_update_iommu_cap(domain);
1831 	}
1832 
1833 	return 0;
1834 }
1835 
1836 static int domain_detach_iommu(struct dmar_domain *domain,
1837 			       struct intel_iommu *iommu)
1838 {
1839 	int num, count;
1840 
1841 	assert_spin_locked(&device_domain_lock);
1842 	assert_spin_locked(&iommu->lock);
1843 
1844 	domain->iommu_refcnt[iommu->seq_id] -= 1;
1845 	count = --domain->iommu_count;
1846 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1847 		num = domain->iommu_did[iommu->seq_id];
1848 		clear_bit(num, iommu->domain_ids);
1849 		set_iommu_domain(iommu, num, NULL);
1850 
1851 		domain_update_iommu_cap(domain);
1852 		domain->iommu_did[iommu->seq_id] = 0;
1853 	}
1854 
1855 	return count;
1856 }
1857 
1858 static struct iova_domain reserved_iova_list;
1859 static struct lock_class_key reserved_rbtree_key;
1860 
1861 static int dmar_init_reserved_ranges(void)
1862 {
1863 	struct pci_dev *pdev = NULL;
1864 	struct iova *iova;
1865 	int i;
1866 
1867 	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1868 
1869 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1870 		&reserved_rbtree_key);
1871 
1872 	/* IOAPIC ranges shouldn't be accessed by DMA */
1873 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1874 		IOVA_PFN(IOAPIC_RANGE_END));
1875 	if (!iova) {
1876 		pr_err("Reserve IOAPIC range failed\n");
1877 		return -ENODEV;
1878 	}
1879 
1880 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1881 	for_each_pci_dev(pdev) {
1882 		struct resource *r;
1883 
1884 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1885 			r = &pdev->resource[i];
1886 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1887 				continue;
1888 			iova = reserve_iova(&reserved_iova_list,
1889 					    IOVA_PFN(r->start),
1890 					    IOVA_PFN(r->end));
1891 			if (!iova) {
1892 				pci_err(pdev, "Reserve iova for %pR failed\n", r);
1893 				return -ENODEV;
1894 			}
1895 		}
1896 	}
1897 	return 0;
1898 }
1899 
1900 static inline int guestwidth_to_adjustwidth(int gaw)
1901 {
1902 	int agaw;
1903 	int r = (gaw - 12) % 9;
1904 
1905 	if (r == 0)
1906 		agaw = gaw;
1907 	else
1908 		agaw = gaw + 9 - r;
1909 	if (agaw > 64)
1910 		agaw = 64;
1911 	return agaw;
1912 }
1913 
1914 static void domain_exit(struct dmar_domain *domain)
1915 {
1916 
1917 	/* Remove associated devices and clear attached or cached domains */
1918 	domain_remove_dev_info(domain);
1919 
1920 	/* destroy iovas */
1921 	if (domain->domain.type == IOMMU_DOMAIN_DMA)
1922 		put_iova_domain(&domain->iovad);
1923 
1924 	if (domain->pgd) {
1925 		struct page *freelist;
1926 
1927 		freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1928 		dma_free_pagelist(freelist);
1929 	}
1930 
1931 	free_domain_mem(domain);
1932 }
1933 
1934 /*
1935  * Get the PASID directory size for scalable mode context entry.
1936  * Value of X in the PDTS field of a scalable mode context entry
1937  * indicates PASID directory with 2^(X + 7) entries.
1938  */
1939 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1940 {
1941 	int pds, max_pde;
1942 
1943 	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1944 	pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1945 	if (pds < 7)
1946 		return 0;
1947 
1948 	return pds - 7;
1949 }
1950 
1951 /*
1952  * Set the RID_PASID field of a scalable mode context entry. The
1953  * IOMMU hardware will use the PASID value set in this field for
1954  * DMA translations of DMA requests without PASID.
1955  */
1956 static inline void
1957 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1958 {
1959 	context->hi |= pasid & ((1 << 20) - 1);
1960 }
1961 
1962 /*
1963  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1964  * entry.
1965  */
1966 static inline void context_set_sm_dte(struct context_entry *context)
1967 {
1968 	context->lo |= (1 << 2);
1969 }
1970 
1971 /*
1972  * Set the PRE(Page Request Enable) field of a scalable mode context
1973  * entry.
1974  */
1975 static inline void context_set_sm_pre(struct context_entry *context)
1976 {
1977 	context->lo |= (1 << 4);
1978 }
1979 
1980 /* Convert value to context PASID directory size field coding. */
1981 #define context_pdts(pds)	(((pds) & 0x7) << 9)
1982 
1983 static int domain_context_mapping_one(struct dmar_domain *domain,
1984 				      struct intel_iommu *iommu,
1985 				      struct pasid_table *table,
1986 				      u8 bus, u8 devfn)
1987 {
1988 	u16 did = domain->iommu_did[iommu->seq_id];
1989 	int translation = CONTEXT_TT_MULTI_LEVEL;
1990 	struct device_domain_info *info = NULL;
1991 	struct context_entry *context;
1992 	unsigned long flags;
1993 	int ret;
1994 
1995 	WARN_ON(did == 0);
1996 
1997 	if (hw_pass_through && domain_type_is_si(domain))
1998 		translation = CONTEXT_TT_PASS_THROUGH;
1999 
2000 	pr_debug("Set context mapping for %02x:%02x.%d\n",
2001 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2002 
2003 	BUG_ON(!domain->pgd);
2004 
2005 	spin_lock_irqsave(&device_domain_lock, flags);
2006 	spin_lock(&iommu->lock);
2007 
2008 	ret = -ENOMEM;
2009 	context = iommu_context_addr(iommu, bus, devfn, 1);
2010 	if (!context)
2011 		goto out_unlock;
2012 
2013 	ret = 0;
2014 	if (context_present(context))
2015 		goto out_unlock;
2016 
2017 	/*
2018 	 * For kdump cases, old valid entries may be cached due to the
2019 	 * in-flight DMA and copied pgtable, but there is no unmapping
2020 	 * behaviour for them, thus we need an explicit cache flush for
2021 	 * the newly-mapped device. For kdump, at this point, the device
2022 	 * is supposed to finish reset at its driver probe stage, so no
2023 	 * in-flight DMA will exist, and we don't need to worry anymore
2024 	 * hereafter.
2025 	 */
2026 	if (context_copied(context)) {
2027 		u16 did_old = context_domain_id(context);
2028 
2029 		if (did_old < cap_ndoms(iommu->cap)) {
2030 			iommu->flush.flush_context(iommu, did_old,
2031 						   (((u16)bus) << 8) | devfn,
2032 						   DMA_CCMD_MASK_NOBIT,
2033 						   DMA_CCMD_DEVICE_INVL);
2034 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2035 						 DMA_TLB_DSI_FLUSH);
2036 		}
2037 	}
2038 
2039 	context_clear_entry(context);
2040 
2041 	if (sm_supported(iommu)) {
2042 		unsigned long pds;
2043 
2044 		WARN_ON(!table);
2045 
2046 		/* Setup the PASID DIR pointer: */
2047 		pds = context_get_sm_pds(table);
2048 		context->lo = (u64)virt_to_phys(table->table) |
2049 				context_pdts(pds);
2050 
2051 		/* Setup the RID_PASID field: */
2052 		context_set_sm_rid2pasid(context, PASID_RID2PASID);
2053 
2054 		/*
2055 		 * Setup the Device-TLB enable bit and Page request
2056 		 * Enable bit:
2057 		 */
2058 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2059 		if (info && info->ats_supported)
2060 			context_set_sm_dte(context);
2061 		if (info && info->pri_supported)
2062 			context_set_sm_pre(context);
2063 	} else {
2064 		struct dma_pte *pgd = domain->pgd;
2065 		int agaw;
2066 
2067 		context_set_domain_id(context, did);
2068 
2069 		if (translation != CONTEXT_TT_PASS_THROUGH) {
2070 			/*
2071 			 * Skip top levels of page tables for iommu which has
2072 			 * less agaw than default. Unnecessary for PT mode.
2073 			 */
2074 			for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2075 				ret = -ENOMEM;
2076 				pgd = phys_to_virt(dma_pte_addr(pgd));
2077 				if (!dma_pte_present(pgd))
2078 					goto out_unlock;
2079 			}
2080 
2081 			info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2082 			if (info && info->ats_supported)
2083 				translation = CONTEXT_TT_DEV_IOTLB;
2084 			else
2085 				translation = CONTEXT_TT_MULTI_LEVEL;
2086 
2087 			context_set_address_root(context, virt_to_phys(pgd));
2088 			context_set_address_width(context, agaw);
2089 		} else {
2090 			/*
2091 			 * In pass through mode, AW must be programmed to
2092 			 * indicate the largest AGAW value supported by
2093 			 * hardware. And ASR is ignored by hardware.
2094 			 */
2095 			context_set_address_width(context, iommu->msagaw);
2096 		}
2097 
2098 		context_set_translation_type(context, translation);
2099 	}
2100 
2101 	context_set_fault_enable(context);
2102 	context_set_present(context);
2103 	if (!ecap_coherent(iommu->ecap))
2104 		clflush_cache_range(context, sizeof(*context));
2105 
2106 	/*
2107 	 * It's a non-present to present mapping. If hardware doesn't cache
2108 	 * non-present entry we only need to flush the write-buffer. If the
2109 	 * _does_ cache non-present entries, then it does so in the special
2110 	 * domain #0, which we have to flush:
2111 	 */
2112 	if (cap_caching_mode(iommu->cap)) {
2113 		iommu->flush.flush_context(iommu, 0,
2114 					   (((u16)bus) << 8) | devfn,
2115 					   DMA_CCMD_MASK_NOBIT,
2116 					   DMA_CCMD_DEVICE_INVL);
2117 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2118 	} else {
2119 		iommu_flush_write_buffer(iommu);
2120 	}
2121 	iommu_enable_dev_iotlb(info);
2122 
2123 	ret = 0;
2124 
2125 out_unlock:
2126 	spin_unlock(&iommu->lock);
2127 	spin_unlock_irqrestore(&device_domain_lock, flags);
2128 
2129 	return ret;
2130 }
2131 
2132 struct domain_context_mapping_data {
2133 	struct dmar_domain *domain;
2134 	struct intel_iommu *iommu;
2135 	struct pasid_table *table;
2136 };
2137 
2138 static int domain_context_mapping_cb(struct pci_dev *pdev,
2139 				     u16 alias, void *opaque)
2140 {
2141 	struct domain_context_mapping_data *data = opaque;
2142 
2143 	return domain_context_mapping_one(data->domain, data->iommu,
2144 					  data->table, PCI_BUS_NUM(alias),
2145 					  alias & 0xff);
2146 }
2147 
2148 static int
2149 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2150 {
2151 	struct domain_context_mapping_data data;
2152 	struct pasid_table *table;
2153 	struct intel_iommu *iommu;
2154 	u8 bus, devfn;
2155 
2156 	iommu = device_to_iommu(dev, &bus, &devfn);
2157 	if (!iommu)
2158 		return -ENODEV;
2159 
2160 	table = intel_pasid_get_table(dev);
2161 
2162 	if (!dev_is_pci(dev))
2163 		return domain_context_mapping_one(domain, iommu, table,
2164 						  bus, devfn);
2165 
2166 	data.domain = domain;
2167 	data.iommu = iommu;
2168 	data.table = table;
2169 
2170 	return pci_for_each_dma_alias(to_pci_dev(dev),
2171 				      &domain_context_mapping_cb, &data);
2172 }
2173 
2174 static int domain_context_mapped_cb(struct pci_dev *pdev,
2175 				    u16 alias, void *opaque)
2176 {
2177 	struct intel_iommu *iommu = opaque;
2178 
2179 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2180 }
2181 
2182 static int domain_context_mapped(struct device *dev)
2183 {
2184 	struct intel_iommu *iommu;
2185 	u8 bus, devfn;
2186 
2187 	iommu = device_to_iommu(dev, &bus, &devfn);
2188 	if (!iommu)
2189 		return -ENODEV;
2190 
2191 	if (!dev_is_pci(dev))
2192 		return device_context_mapped(iommu, bus, devfn);
2193 
2194 	return !pci_for_each_dma_alias(to_pci_dev(dev),
2195 				       domain_context_mapped_cb, iommu);
2196 }
2197 
2198 /* Returns a number of VTD pages, but aligned to MM page size */
2199 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2200 					    size_t size)
2201 {
2202 	host_addr &= ~PAGE_MASK;
2203 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2204 }
2205 
2206 /* Return largest possible superpage level for a given mapping */
2207 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2208 					  unsigned long iov_pfn,
2209 					  unsigned long phy_pfn,
2210 					  unsigned long pages)
2211 {
2212 	int support, level = 1;
2213 	unsigned long pfnmerge;
2214 
2215 	support = domain->iommu_superpage;
2216 
2217 	/* To use a large page, the virtual *and* physical addresses
2218 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2219 	   of them will mean we have to use smaller pages. So just
2220 	   merge them and check both at once. */
2221 	pfnmerge = iov_pfn | phy_pfn;
2222 
2223 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2224 		pages >>= VTD_STRIDE_SHIFT;
2225 		if (!pages)
2226 			break;
2227 		pfnmerge >>= VTD_STRIDE_SHIFT;
2228 		level++;
2229 		support--;
2230 	}
2231 	return level;
2232 }
2233 
2234 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2235 			    struct scatterlist *sg, unsigned long phys_pfn,
2236 			    unsigned long nr_pages, int prot)
2237 {
2238 	struct dma_pte *first_pte = NULL, *pte = NULL;
2239 	phys_addr_t uninitialized_var(pteval);
2240 	unsigned long sg_res = 0;
2241 	unsigned int largepage_lvl = 0;
2242 	unsigned long lvl_pages = 0;
2243 	u64 attr;
2244 
2245 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2246 
2247 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2248 		return -EINVAL;
2249 
2250 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2251 	if (domain_use_first_level(domain))
2252 		attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
2253 
2254 	if (!sg) {
2255 		sg_res = nr_pages;
2256 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2257 	}
2258 
2259 	while (nr_pages > 0) {
2260 		uint64_t tmp;
2261 
2262 		if (!sg_res) {
2263 			unsigned int pgoff = sg->offset & ~PAGE_MASK;
2264 
2265 			sg_res = aligned_nrpages(sg->offset, sg->length);
2266 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2267 			sg->dma_length = sg->length;
2268 			pteval = (sg_phys(sg) - pgoff) | attr;
2269 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2270 		}
2271 
2272 		if (!pte) {
2273 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2274 
2275 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2276 			if (!pte)
2277 				return -ENOMEM;
2278 			/* It is large page*/
2279 			if (largepage_lvl > 1) {
2280 				unsigned long nr_superpages, end_pfn;
2281 
2282 				pteval |= DMA_PTE_LARGE_PAGE;
2283 				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2284 
2285 				nr_superpages = sg_res / lvl_pages;
2286 				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2287 
2288 				/*
2289 				 * Ensure that old small page tables are
2290 				 * removed to make room for superpage(s).
2291 				 * We're adding new large pages, so make sure
2292 				 * we don't remove their parent tables.
2293 				 */
2294 				dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2295 						       largepage_lvl + 1);
2296 			} else {
2297 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2298 			}
2299 
2300 		}
2301 		/* We don't need lock here, nobody else
2302 		 * touches the iova range
2303 		 */
2304 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2305 		if (tmp) {
2306 			static int dumps = 5;
2307 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2308 				iov_pfn, tmp, (unsigned long long)pteval);
2309 			if (dumps) {
2310 				dumps--;
2311 				debug_dma_dump_mappings(NULL);
2312 			}
2313 			WARN_ON(1);
2314 		}
2315 
2316 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2317 
2318 		BUG_ON(nr_pages < lvl_pages);
2319 		BUG_ON(sg_res < lvl_pages);
2320 
2321 		nr_pages -= lvl_pages;
2322 		iov_pfn += lvl_pages;
2323 		phys_pfn += lvl_pages;
2324 		pteval += lvl_pages * VTD_PAGE_SIZE;
2325 		sg_res -= lvl_pages;
2326 
2327 		/* If the next PTE would be the first in a new page, then we
2328 		   need to flush the cache on the entries we've just written.
2329 		   And then we'll need to recalculate 'pte', so clear it and
2330 		   let it get set again in the if (!pte) block above.
2331 
2332 		   If we're done (!nr_pages) we need to flush the cache too.
2333 
2334 		   Also if we've been setting superpages, we may need to
2335 		   recalculate 'pte' and switch back to smaller pages for the
2336 		   end of the mapping, if the trailing size is not enough to
2337 		   use another superpage (i.e. sg_res < lvl_pages). */
2338 		pte++;
2339 		if (!nr_pages || first_pte_in_page(pte) ||
2340 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2341 			domain_flush_cache(domain, first_pte,
2342 					   (void *)pte - (void *)first_pte);
2343 			pte = NULL;
2344 		}
2345 
2346 		if (!sg_res && nr_pages)
2347 			sg = sg_next(sg);
2348 	}
2349 	return 0;
2350 }
2351 
2352 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2353 			  struct scatterlist *sg, unsigned long phys_pfn,
2354 			  unsigned long nr_pages, int prot)
2355 {
2356 	int iommu_id, ret;
2357 	struct intel_iommu *iommu;
2358 
2359 	/* Do the real mapping first */
2360 	ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2361 	if (ret)
2362 		return ret;
2363 
2364 	for_each_domain_iommu(iommu_id, domain) {
2365 		iommu = g_iommus[iommu_id];
2366 		__mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2367 	}
2368 
2369 	return 0;
2370 }
2371 
2372 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2373 				    struct scatterlist *sg, unsigned long nr_pages,
2374 				    int prot)
2375 {
2376 	return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2377 }
2378 
2379 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2380 				     unsigned long phys_pfn, unsigned long nr_pages,
2381 				     int prot)
2382 {
2383 	return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2384 }
2385 
2386 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2387 {
2388 	unsigned long flags;
2389 	struct context_entry *context;
2390 	u16 did_old;
2391 
2392 	if (!iommu)
2393 		return;
2394 
2395 	spin_lock_irqsave(&iommu->lock, flags);
2396 	context = iommu_context_addr(iommu, bus, devfn, 0);
2397 	if (!context) {
2398 		spin_unlock_irqrestore(&iommu->lock, flags);
2399 		return;
2400 	}
2401 	did_old = context_domain_id(context);
2402 	context_clear_entry(context);
2403 	__iommu_flush_cache(iommu, context, sizeof(*context));
2404 	spin_unlock_irqrestore(&iommu->lock, flags);
2405 	iommu->flush.flush_context(iommu,
2406 				   did_old,
2407 				   (((u16)bus) << 8) | devfn,
2408 				   DMA_CCMD_MASK_NOBIT,
2409 				   DMA_CCMD_DEVICE_INVL);
2410 	iommu->flush.flush_iotlb(iommu,
2411 				 did_old,
2412 				 0,
2413 				 0,
2414 				 DMA_TLB_DSI_FLUSH);
2415 }
2416 
2417 static inline void unlink_domain_info(struct device_domain_info *info)
2418 {
2419 	assert_spin_locked(&device_domain_lock);
2420 	list_del(&info->link);
2421 	list_del(&info->global);
2422 	if (info->dev)
2423 		info->dev->archdata.iommu = NULL;
2424 }
2425 
2426 static void domain_remove_dev_info(struct dmar_domain *domain)
2427 {
2428 	struct device_domain_info *info, *tmp;
2429 	unsigned long flags;
2430 
2431 	spin_lock_irqsave(&device_domain_lock, flags);
2432 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
2433 		__dmar_remove_one_dev_info(info);
2434 	spin_unlock_irqrestore(&device_domain_lock, flags);
2435 }
2436 
2437 struct dmar_domain *find_domain(struct device *dev)
2438 {
2439 	struct device_domain_info *info;
2440 
2441 	if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
2442 		return NULL;
2443 
2444 	/* No lock here, assumes no domain exit in normal case */
2445 	info = get_domain_info(dev);
2446 	if (likely(info))
2447 		return info->domain;
2448 
2449 	return NULL;
2450 }
2451 
2452 static void do_deferred_attach(struct device *dev)
2453 {
2454 	struct iommu_domain *domain;
2455 
2456 	dev->archdata.iommu = NULL;
2457 	domain = iommu_get_domain_for_dev(dev);
2458 	if (domain)
2459 		intel_iommu_attach_device(domain, dev);
2460 }
2461 
2462 static inline struct device_domain_info *
2463 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2464 {
2465 	struct device_domain_info *info;
2466 
2467 	list_for_each_entry(info, &device_domain_list, global)
2468 		if (info->segment == segment && info->bus == bus &&
2469 		    info->devfn == devfn)
2470 			return info;
2471 
2472 	return NULL;
2473 }
2474 
2475 static int domain_setup_first_level(struct intel_iommu *iommu,
2476 				    struct dmar_domain *domain,
2477 				    struct device *dev,
2478 				    int pasid)
2479 {
2480 	int flags = PASID_FLAG_SUPERVISOR_MODE;
2481 	struct dma_pte *pgd = domain->pgd;
2482 	int agaw, level;
2483 
2484 	/*
2485 	 * Skip top levels of page tables for iommu which has
2486 	 * less agaw than default. Unnecessary for PT mode.
2487 	 */
2488 	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2489 		pgd = phys_to_virt(dma_pte_addr(pgd));
2490 		if (!dma_pte_present(pgd))
2491 			return -ENOMEM;
2492 	}
2493 
2494 	level = agaw_to_level(agaw);
2495 	if (level != 4 && level != 5)
2496 		return -EINVAL;
2497 
2498 	flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
2499 
2500 	return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2501 					     domain->iommu_did[iommu->seq_id],
2502 					     flags);
2503 }
2504 
2505 static bool dev_is_real_dma_subdevice(struct device *dev)
2506 {
2507 	return dev && dev_is_pci(dev) &&
2508 	       pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2509 }
2510 
2511 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2512 						    int bus, int devfn,
2513 						    struct device *dev,
2514 						    struct dmar_domain *domain)
2515 {
2516 	struct dmar_domain *found = NULL;
2517 	struct device_domain_info *info;
2518 	unsigned long flags;
2519 	int ret;
2520 
2521 	info = alloc_devinfo_mem();
2522 	if (!info)
2523 		return NULL;
2524 
2525 	if (!dev_is_real_dma_subdevice(dev)) {
2526 		info->bus = bus;
2527 		info->devfn = devfn;
2528 		info->segment = iommu->segment;
2529 	} else {
2530 		struct pci_dev *pdev = to_pci_dev(dev);
2531 
2532 		info->bus = pdev->bus->number;
2533 		info->devfn = pdev->devfn;
2534 		info->segment = pci_domain_nr(pdev->bus);
2535 	}
2536 
2537 	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2538 	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2539 	info->ats_qdep = 0;
2540 	info->dev = dev;
2541 	info->domain = domain;
2542 	info->iommu = iommu;
2543 	info->pasid_table = NULL;
2544 	info->auxd_enabled = 0;
2545 	INIT_LIST_HEAD(&info->auxiliary_domains);
2546 
2547 	if (dev && dev_is_pci(dev)) {
2548 		struct pci_dev *pdev = to_pci_dev(info->dev);
2549 
2550 		if (ecap_dev_iotlb_support(iommu->ecap) &&
2551 		    pci_ats_supported(pdev) &&
2552 		    dmar_find_matched_atsr_unit(pdev))
2553 			info->ats_supported = 1;
2554 
2555 		if (sm_supported(iommu)) {
2556 			if (pasid_supported(iommu)) {
2557 				int features = pci_pasid_features(pdev);
2558 				if (features >= 0)
2559 					info->pasid_supported = features | 1;
2560 			}
2561 
2562 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
2563 			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2564 				info->pri_supported = 1;
2565 		}
2566 	}
2567 
2568 	spin_lock_irqsave(&device_domain_lock, flags);
2569 	if (dev)
2570 		found = find_domain(dev);
2571 
2572 	if (!found) {
2573 		struct device_domain_info *info2;
2574 		info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2575 						       info->devfn);
2576 		if (info2) {
2577 			found      = info2->domain;
2578 			info2->dev = dev;
2579 		}
2580 	}
2581 
2582 	if (found) {
2583 		spin_unlock_irqrestore(&device_domain_lock, flags);
2584 		free_devinfo_mem(info);
2585 		/* Caller must free the original domain */
2586 		return found;
2587 	}
2588 
2589 	spin_lock(&iommu->lock);
2590 	ret = domain_attach_iommu(domain, iommu);
2591 	spin_unlock(&iommu->lock);
2592 
2593 	if (ret) {
2594 		spin_unlock_irqrestore(&device_domain_lock, flags);
2595 		free_devinfo_mem(info);
2596 		return NULL;
2597 	}
2598 
2599 	list_add(&info->link, &domain->devices);
2600 	list_add(&info->global, &device_domain_list);
2601 	if (dev)
2602 		dev->archdata.iommu = info;
2603 	spin_unlock_irqrestore(&device_domain_lock, flags);
2604 
2605 	/* PASID table is mandatory for a PCI device in scalable mode. */
2606 	if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2607 		ret = intel_pasid_alloc_table(dev);
2608 		if (ret) {
2609 			dev_err(dev, "PASID table allocation failed\n");
2610 			dmar_remove_one_dev_info(dev);
2611 			return NULL;
2612 		}
2613 
2614 		/* Setup the PASID entry for requests without PASID: */
2615 		spin_lock(&iommu->lock);
2616 		if (hw_pass_through && domain_type_is_si(domain))
2617 			ret = intel_pasid_setup_pass_through(iommu, domain,
2618 					dev, PASID_RID2PASID);
2619 		else if (domain_use_first_level(domain))
2620 			ret = domain_setup_first_level(iommu, domain, dev,
2621 					PASID_RID2PASID);
2622 		else
2623 			ret = intel_pasid_setup_second_level(iommu, domain,
2624 					dev, PASID_RID2PASID);
2625 		spin_unlock(&iommu->lock);
2626 		if (ret) {
2627 			dev_err(dev, "Setup RID2PASID failed\n");
2628 			dmar_remove_one_dev_info(dev);
2629 			return NULL;
2630 		}
2631 	}
2632 
2633 	if (dev && domain_context_mapping(domain, dev)) {
2634 		dev_err(dev, "Domain context map failed\n");
2635 		dmar_remove_one_dev_info(dev);
2636 		return NULL;
2637 	}
2638 
2639 	return domain;
2640 }
2641 
2642 static int iommu_domain_identity_map(struct dmar_domain *domain,
2643 				     unsigned long first_vpfn,
2644 				     unsigned long last_vpfn)
2645 {
2646 	/*
2647 	 * RMRR range might have overlap with physical memory range,
2648 	 * clear it first
2649 	 */
2650 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2651 
2652 	return __domain_mapping(domain, first_vpfn, NULL,
2653 				first_vpfn, last_vpfn - first_vpfn + 1,
2654 				DMA_PTE_READ|DMA_PTE_WRITE);
2655 }
2656 
2657 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2658 
2659 static int __init si_domain_init(int hw)
2660 {
2661 	struct dmar_rmrr_unit *rmrr;
2662 	struct device *dev;
2663 	int i, nid, ret;
2664 
2665 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2666 	if (!si_domain)
2667 		return -EFAULT;
2668 
2669 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2670 		domain_exit(si_domain);
2671 		return -EFAULT;
2672 	}
2673 
2674 	if (hw)
2675 		return 0;
2676 
2677 	for_each_online_node(nid) {
2678 		unsigned long start_pfn, end_pfn;
2679 		int i;
2680 
2681 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2682 			ret = iommu_domain_identity_map(si_domain,
2683 					mm_to_dma_pfn(start_pfn),
2684 					mm_to_dma_pfn(end_pfn));
2685 			if (ret)
2686 				return ret;
2687 		}
2688 	}
2689 
2690 	/*
2691 	 * Identity map the RMRRs so that devices with RMRRs could also use
2692 	 * the si_domain.
2693 	 */
2694 	for_each_rmrr_units(rmrr) {
2695 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2696 					  i, dev) {
2697 			unsigned long long start = rmrr->base_address;
2698 			unsigned long long end = rmrr->end_address;
2699 
2700 			if (WARN_ON(end < start ||
2701 				    end >> agaw_to_width(si_domain->agaw)))
2702 				continue;
2703 
2704 			ret = iommu_domain_identity_map(si_domain,
2705 					mm_to_dma_pfn(start >> PAGE_SHIFT),
2706 					mm_to_dma_pfn(end >> PAGE_SHIFT));
2707 			if (ret)
2708 				return ret;
2709 		}
2710 	}
2711 
2712 	return 0;
2713 }
2714 
2715 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2716 {
2717 	struct dmar_domain *ndomain;
2718 	struct intel_iommu *iommu;
2719 	u8 bus, devfn;
2720 
2721 	iommu = device_to_iommu(dev, &bus, &devfn);
2722 	if (!iommu)
2723 		return -ENODEV;
2724 
2725 	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2726 	if (ndomain != domain)
2727 		return -EBUSY;
2728 
2729 	return 0;
2730 }
2731 
2732 static bool device_has_rmrr(struct device *dev)
2733 {
2734 	struct dmar_rmrr_unit *rmrr;
2735 	struct device *tmp;
2736 	int i;
2737 
2738 	rcu_read_lock();
2739 	for_each_rmrr_units(rmrr) {
2740 		/*
2741 		 * Return TRUE if this RMRR contains the device that
2742 		 * is passed in.
2743 		 */
2744 		for_each_active_dev_scope(rmrr->devices,
2745 					  rmrr->devices_cnt, i, tmp)
2746 			if (tmp == dev ||
2747 			    is_downstream_to_pci_bridge(dev, tmp)) {
2748 				rcu_read_unlock();
2749 				return true;
2750 			}
2751 	}
2752 	rcu_read_unlock();
2753 	return false;
2754 }
2755 
2756 /**
2757  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2758  * is relaxable (ie. is allowed to be not enforced under some conditions)
2759  * @dev: device handle
2760  *
2761  * We assume that PCI USB devices with RMRRs have them largely
2762  * for historical reasons and that the RMRR space is not actively used post
2763  * boot.  This exclusion may change if vendors begin to abuse it.
2764  *
2765  * The same exception is made for graphics devices, with the requirement that
2766  * any use of the RMRR regions will be torn down before assigning the device
2767  * to a guest.
2768  *
2769  * Return: true if the RMRR is relaxable, false otherwise
2770  */
2771 static bool device_rmrr_is_relaxable(struct device *dev)
2772 {
2773 	struct pci_dev *pdev;
2774 
2775 	if (!dev_is_pci(dev))
2776 		return false;
2777 
2778 	pdev = to_pci_dev(dev);
2779 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2780 		return true;
2781 	else
2782 		return false;
2783 }
2784 
2785 /*
2786  * There are a couple cases where we need to restrict the functionality of
2787  * devices associated with RMRRs.  The first is when evaluating a device for
2788  * identity mapping because problems exist when devices are moved in and out
2789  * of domains and their respective RMRR information is lost.  This means that
2790  * a device with associated RMRRs will never be in a "passthrough" domain.
2791  * The second is use of the device through the IOMMU API.  This interface
2792  * expects to have full control of the IOVA space for the device.  We cannot
2793  * satisfy both the requirement that RMRR access is maintained and have an
2794  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2795  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2796  * We therefore prevent devices associated with an RMRR from participating in
2797  * the IOMMU API, which eliminates them from device assignment.
2798  *
2799  * In both cases, devices which have relaxable RMRRs are not concerned by this
2800  * restriction. See device_rmrr_is_relaxable comment.
2801  */
2802 static bool device_is_rmrr_locked(struct device *dev)
2803 {
2804 	if (!device_has_rmrr(dev))
2805 		return false;
2806 
2807 	if (device_rmrr_is_relaxable(dev))
2808 		return false;
2809 
2810 	return true;
2811 }
2812 
2813 /*
2814  * Return the required default domain type for a specific device.
2815  *
2816  * @dev: the device in query
2817  * @startup: true if this is during early boot
2818  *
2819  * Returns:
2820  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2821  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2822  *  - 0: both identity and dynamic domains work for this device
2823  */
2824 static int device_def_domain_type(struct device *dev)
2825 {
2826 	if (dev_is_pci(dev)) {
2827 		struct pci_dev *pdev = to_pci_dev(dev);
2828 
2829 		/*
2830 		 * Prevent any device marked as untrusted from getting
2831 		 * placed into the statically identity mapping domain.
2832 		 */
2833 		if (pdev->untrusted)
2834 			return IOMMU_DOMAIN_DMA;
2835 
2836 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2837 			return IOMMU_DOMAIN_IDENTITY;
2838 
2839 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2840 			return IOMMU_DOMAIN_IDENTITY;
2841 	}
2842 
2843 	return 0;
2844 }
2845 
2846 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2847 {
2848 	/*
2849 	 * Start from the sane iommu hardware state.
2850 	 * If the queued invalidation is already initialized by us
2851 	 * (for example, while enabling interrupt-remapping) then
2852 	 * we got the things already rolling from a sane state.
2853 	 */
2854 	if (!iommu->qi) {
2855 		/*
2856 		 * Clear any previous faults.
2857 		 */
2858 		dmar_fault(-1, iommu);
2859 		/*
2860 		 * Disable queued invalidation if supported and already enabled
2861 		 * before OS handover.
2862 		 */
2863 		dmar_disable_qi(iommu);
2864 	}
2865 
2866 	if (dmar_enable_qi(iommu)) {
2867 		/*
2868 		 * Queued Invalidate not enabled, use Register Based Invalidate
2869 		 */
2870 		iommu->flush.flush_context = __iommu_flush_context;
2871 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2872 		pr_info("%s: Using Register based invalidation\n",
2873 			iommu->name);
2874 	} else {
2875 		iommu->flush.flush_context = qi_flush_context;
2876 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2877 		pr_info("%s: Using Queued invalidation\n", iommu->name);
2878 	}
2879 }
2880 
2881 static int copy_context_table(struct intel_iommu *iommu,
2882 			      struct root_entry *old_re,
2883 			      struct context_entry **tbl,
2884 			      int bus, bool ext)
2885 {
2886 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2887 	struct context_entry *new_ce = NULL, ce;
2888 	struct context_entry *old_ce = NULL;
2889 	struct root_entry re;
2890 	phys_addr_t old_ce_phys;
2891 
2892 	tbl_idx = ext ? bus * 2 : bus;
2893 	memcpy(&re, old_re, sizeof(re));
2894 
2895 	for (devfn = 0; devfn < 256; devfn++) {
2896 		/* First calculate the correct index */
2897 		idx = (ext ? devfn * 2 : devfn) % 256;
2898 
2899 		if (idx == 0) {
2900 			/* First save what we may have and clean up */
2901 			if (new_ce) {
2902 				tbl[tbl_idx] = new_ce;
2903 				__iommu_flush_cache(iommu, new_ce,
2904 						    VTD_PAGE_SIZE);
2905 				pos = 1;
2906 			}
2907 
2908 			if (old_ce)
2909 				memunmap(old_ce);
2910 
2911 			ret = 0;
2912 			if (devfn < 0x80)
2913 				old_ce_phys = root_entry_lctp(&re);
2914 			else
2915 				old_ce_phys = root_entry_uctp(&re);
2916 
2917 			if (!old_ce_phys) {
2918 				if (ext && devfn == 0) {
2919 					/* No LCTP, try UCTP */
2920 					devfn = 0x7f;
2921 					continue;
2922 				} else {
2923 					goto out;
2924 				}
2925 			}
2926 
2927 			ret = -ENOMEM;
2928 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
2929 					MEMREMAP_WB);
2930 			if (!old_ce)
2931 				goto out;
2932 
2933 			new_ce = alloc_pgtable_page(iommu->node);
2934 			if (!new_ce)
2935 				goto out_unmap;
2936 
2937 			ret = 0;
2938 		}
2939 
2940 		/* Now copy the context entry */
2941 		memcpy(&ce, old_ce + idx, sizeof(ce));
2942 
2943 		if (!__context_present(&ce))
2944 			continue;
2945 
2946 		did = context_domain_id(&ce);
2947 		if (did >= 0 && did < cap_ndoms(iommu->cap))
2948 			set_bit(did, iommu->domain_ids);
2949 
2950 		/*
2951 		 * We need a marker for copied context entries. This
2952 		 * marker needs to work for the old format as well as
2953 		 * for extended context entries.
2954 		 *
2955 		 * Bit 67 of the context entry is used. In the old
2956 		 * format this bit is available to software, in the
2957 		 * extended format it is the PGE bit, but PGE is ignored
2958 		 * by HW if PASIDs are disabled (and thus still
2959 		 * available).
2960 		 *
2961 		 * So disable PASIDs first and then mark the entry
2962 		 * copied. This means that we don't copy PASID
2963 		 * translations from the old kernel, but this is fine as
2964 		 * faults there are not fatal.
2965 		 */
2966 		context_clear_pasid_enable(&ce);
2967 		context_set_copied(&ce);
2968 
2969 		new_ce[idx] = ce;
2970 	}
2971 
2972 	tbl[tbl_idx + pos] = new_ce;
2973 
2974 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2975 
2976 out_unmap:
2977 	memunmap(old_ce);
2978 
2979 out:
2980 	return ret;
2981 }
2982 
2983 static int copy_translation_tables(struct intel_iommu *iommu)
2984 {
2985 	struct context_entry **ctxt_tbls;
2986 	struct root_entry *old_rt;
2987 	phys_addr_t old_rt_phys;
2988 	int ctxt_table_entries;
2989 	unsigned long flags;
2990 	u64 rtaddr_reg;
2991 	int bus, ret;
2992 	bool new_ext, ext;
2993 
2994 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2995 	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
2996 	new_ext    = !!ecap_ecs(iommu->ecap);
2997 
2998 	/*
2999 	 * The RTT bit can only be changed when translation is disabled,
3000 	 * but disabling translation means to open a window for data
3001 	 * corruption. So bail out and don't copy anything if we would
3002 	 * have to change the bit.
3003 	 */
3004 	if (new_ext != ext)
3005 		return -EINVAL;
3006 
3007 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3008 	if (!old_rt_phys)
3009 		return -EINVAL;
3010 
3011 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3012 	if (!old_rt)
3013 		return -ENOMEM;
3014 
3015 	/* This is too big for the stack - allocate it from slab */
3016 	ctxt_table_entries = ext ? 512 : 256;
3017 	ret = -ENOMEM;
3018 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3019 	if (!ctxt_tbls)
3020 		goto out_unmap;
3021 
3022 	for (bus = 0; bus < 256; bus++) {
3023 		ret = copy_context_table(iommu, &old_rt[bus],
3024 					 ctxt_tbls, bus, ext);
3025 		if (ret) {
3026 			pr_err("%s: Failed to copy context table for bus %d\n",
3027 				iommu->name, bus);
3028 			continue;
3029 		}
3030 	}
3031 
3032 	spin_lock_irqsave(&iommu->lock, flags);
3033 
3034 	/* Context tables are copied, now write them to the root_entry table */
3035 	for (bus = 0; bus < 256; bus++) {
3036 		int idx = ext ? bus * 2 : bus;
3037 		u64 val;
3038 
3039 		if (ctxt_tbls[idx]) {
3040 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
3041 			iommu->root_entry[bus].lo = val;
3042 		}
3043 
3044 		if (!ext || !ctxt_tbls[idx + 1])
3045 			continue;
3046 
3047 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3048 		iommu->root_entry[bus].hi = val;
3049 	}
3050 
3051 	spin_unlock_irqrestore(&iommu->lock, flags);
3052 
3053 	kfree(ctxt_tbls);
3054 
3055 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3056 
3057 	ret = 0;
3058 
3059 out_unmap:
3060 	memunmap(old_rt);
3061 
3062 	return ret;
3063 }
3064 
3065 #ifdef CONFIG_INTEL_IOMMU_SVM
3066 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3067 {
3068 	struct intel_iommu *iommu = data;
3069 	ioasid_t ioasid;
3070 
3071 	if (!iommu)
3072 		return INVALID_IOASID;
3073 	/*
3074 	 * VT-d virtual command interface always uses the full 20 bit
3075 	 * PASID range. Host can partition guest PASID range based on
3076 	 * policies but it is out of guest's control.
3077 	 */
3078 	if (min < PASID_MIN || max > intel_pasid_max_id)
3079 		return INVALID_IOASID;
3080 
3081 	if (vcmd_alloc_pasid(iommu, &ioasid))
3082 		return INVALID_IOASID;
3083 
3084 	return ioasid;
3085 }
3086 
3087 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3088 {
3089 	struct intel_iommu *iommu = data;
3090 
3091 	if (!iommu)
3092 		return;
3093 	/*
3094 	 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3095 	 * We can only free the PASID when all the devices are unbound.
3096 	 */
3097 	if (ioasid_find(NULL, ioasid, NULL)) {
3098 		pr_alert("Cannot free active IOASID %d\n", ioasid);
3099 		return;
3100 	}
3101 	vcmd_free_pasid(iommu, ioasid);
3102 }
3103 
3104 static void register_pasid_allocator(struct intel_iommu *iommu)
3105 {
3106 	/*
3107 	 * If we are running in the host, no need for custom allocator
3108 	 * in that PASIDs are allocated from the host system-wide.
3109 	 */
3110 	if (!cap_caching_mode(iommu->cap))
3111 		return;
3112 
3113 	if (!sm_supported(iommu)) {
3114 		pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3115 		return;
3116 	}
3117 
3118 	/*
3119 	 * Register a custom PASID allocator if we are running in a guest,
3120 	 * guest PASID must be obtained via virtual command interface.
3121 	 * There can be multiple vIOMMUs in each guest but only one allocator
3122 	 * is active. All vIOMMU allocators will eventually be calling the same
3123 	 * host allocator.
3124 	 */
3125 	if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap))
3126 		return;
3127 
3128 	pr_info("Register custom PASID allocator\n");
3129 	iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3130 	iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3131 	iommu->pasid_allocator.pdata = (void *)iommu;
3132 	if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3133 		pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3134 		/*
3135 		 * Disable scalable mode on this IOMMU if there
3136 		 * is no custom allocator. Mixing SM capable vIOMMU
3137 		 * and non-SM vIOMMU are not supported.
3138 		 */
3139 		intel_iommu_sm = 0;
3140 	}
3141 }
3142 #endif
3143 
3144 static int __init init_dmars(void)
3145 {
3146 	struct dmar_drhd_unit *drhd;
3147 	struct intel_iommu *iommu;
3148 	int ret;
3149 
3150 	/*
3151 	 * for each drhd
3152 	 *    allocate root
3153 	 *    initialize and program root entry to not present
3154 	 * endfor
3155 	 */
3156 	for_each_drhd_unit(drhd) {
3157 		/*
3158 		 * lock not needed as this is only incremented in the single
3159 		 * threaded kernel __init code path all other access are read
3160 		 * only
3161 		 */
3162 		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3163 			g_num_of_iommus++;
3164 			continue;
3165 		}
3166 		pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3167 	}
3168 
3169 	/* Preallocate enough resources for IOMMU hot-addition */
3170 	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3171 		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3172 
3173 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3174 			GFP_KERNEL);
3175 	if (!g_iommus) {
3176 		pr_err("Allocating global iommu array failed\n");
3177 		ret = -ENOMEM;
3178 		goto error;
3179 	}
3180 
3181 	for_each_iommu(iommu, drhd) {
3182 		if (drhd->ignored) {
3183 			iommu_disable_translation(iommu);
3184 			continue;
3185 		}
3186 
3187 		/*
3188 		 * Find the max pasid size of all IOMMU's in the system.
3189 		 * We need to ensure the system pasid table is no bigger
3190 		 * than the smallest supported.
3191 		 */
3192 		if (pasid_supported(iommu)) {
3193 			u32 temp = 2 << ecap_pss(iommu->ecap);
3194 
3195 			intel_pasid_max_id = min_t(u32, temp,
3196 						   intel_pasid_max_id);
3197 		}
3198 
3199 		g_iommus[iommu->seq_id] = iommu;
3200 
3201 		intel_iommu_init_qi(iommu);
3202 
3203 		ret = iommu_init_domains(iommu);
3204 		if (ret)
3205 			goto free_iommu;
3206 
3207 		init_translation_status(iommu);
3208 
3209 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3210 			iommu_disable_translation(iommu);
3211 			clear_translation_pre_enabled(iommu);
3212 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3213 				iommu->name);
3214 		}
3215 
3216 		/*
3217 		 * TBD:
3218 		 * we could share the same root & context tables
3219 		 * among all IOMMU's. Need to Split it later.
3220 		 */
3221 		ret = iommu_alloc_root_entry(iommu);
3222 		if (ret)
3223 			goto free_iommu;
3224 
3225 		if (translation_pre_enabled(iommu)) {
3226 			pr_info("Translation already enabled - trying to copy translation structures\n");
3227 
3228 			ret = copy_translation_tables(iommu);
3229 			if (ret) {
3230 				/*
3231 				 * We found the IOMMU with translation
3232 				 * enabled - but failed to copy over the
3233 				 * old root-entry table. Try to proceed
3234 				 * by disabling translation now and
3235 				 * allocating a clean root-entry table.
3236 				 * This might cause DMAR faults, but
3237 				 * probably the dump will still succeed.
3238 				 */
3239 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
3240 				       iommu->name);
3241 				iommu_disable_translation(iommu);
3242 				clear_translation_pre_enabled(iommu);
3243 			} else {
3244 				pr_info("Copied translation tables from previous kernel for %s\n",
3245 					iommu->name);
3246 			}
3247 		}
3248 
3249 		if (!ecap_pass_through(iommu->ecap))
3250 			hw_pass_through = 0;
3251 		intel_svm_check(iommu);
3252 	}
3253 
3254 	/*
3255 	 * Now that qi is enabled on all iommus, set the root entry and flush
3256 	 * caches. This is required on some Intel X58 chipsets, otherwise the
3257 	 * flush_context function will loop forever and the boot hangs.
3258 	 */
3259 	for_each_active_iommu(iommu, drhd) {
3260 		iommu_flush_write_buffer(iommu);
3261 #ifdef CONFIG_INTEL_IOMMU_SVM
3262 		register_pasid_allocator(iommu);
3263 #endif
3264 		iommu_set_root_entry(iommu);
3265 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3266 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3267 	}
3268 
3269 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3270 	dmar_map_gfx = 0;
3271 #endif
3272 
3273 	if (!dmar_map_gfx)
3274 		iommu_identity_mapping |= IDENTMAP_GFX;
3275 
3276 	check_tylersburg_isoch();
3277 
3278 	ret = si_domain_init(hw_pass_through);
3279 	if (ret)
3280 		goto free_iommu;
3281 
3282 	/*
3283 	 * for each drhd
3284 	 *   enable fault log
3285 	 *   global invalidate context cache
3286 	 *   global invalidate iotlb
3287 	 *   enable translation
3288 	 */
3289 	for_each_iommu(iommu, drhd) {
3290 		if (drhd->ignored) {
3291 			/*
3292 			 * we always have to disable PMRs or DMA may fail on
3293 			 * this device
3294 			 */
3295 			if (force_on)
3296 				iommu_disable_protect_mem_regions(iommu);
3297 			continue;
3298 		}
3299 
3300 		iommu_flush_write_buffer(iommu);
3301 
3302 #ifdef CONFIG_INTEL_IOMMU_SVM
3303 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3304 			/*
3305 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3306 			 * could cause possible lock race condition.
3307 			 */
3308 			up_write(&dmar_global_lock);
3309 			ret = intel_svm_enable_prq(iommu);
3310 			down_write(&dmar_global_lock);
3311 			if (ret)
3312 				goto free_iommu;
3313 		}
3314 #endif
3315 		ret = dmar_set_interrupt(iommu);
3316 		if (ret)
3317 			goto free_iommu;
3318 	}
3319 
3320 	return 0;
3321 
3322 free_iommu:
3323 	for_each_active_iommu(iommu, drhd) {
3324 		disable_dmar_iommu(iommu);
3325 		free_dmar_iommu(iommu);
3326 	}
3327 
3328 	kfree(g_iommus);
3329 
3330 error:
3331 	return ret;
3332 }
3333 
3334 /* This takes a number of _MM_ pages, not VTD pages */
3335 static unsigned long intel_alloc_iova(struct device *dev,
3336 				     struct dmar_domain *domain,
3337 				     unsigned long nrpages, uint64_t dma_mask)
3338 {
3339 	unsigned long iova_pfn;
3340 
3341 	/*
3342 	 * Restrict dma_mask to the width that the iommu can handle.
3343 	 * First-level translation restricts the input-address to a
3344 	 * canonical address (i.e., address bits 63:N have the same
3345 	 * value as address bit [N-1], where N is 48-bits with 4-level
3346 	 * paging and 57-bits with 5-level paging). Hence, skip bit
3347 	 * [N-1].
3348 	 */
3349 	if (domain_use_first_level(domain))
3350 		dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
3351 				 dma_mask);
3352 	else
3353 		dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
3354 				 dma_mask);
3355 
3356 	/* Ensure we reserve the whole size-aligned region */
3357 	nrpages = __roundup_pow_of_two(nrpages);
3358 
3359 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3360 		/*
3361 		 * First try to allocate an io virtual address in
3362 		 * DMA_BIT_MASK(32) and if that fails then try allocating
3363 		 * from higher range
3364 		 */
3365 		iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3366 					   IOVA_PFN(DMA_BIT_MASK(32)), false);
3367 		if (iova_pfn)
3368 			return iova_pfn;
3369 	}
3370 	iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3371 				   IOVA_PFN(dma_mask), true);
3372 	if (unlikely(!iova_pfn)) {
3373 		dev_err_once(dev, "Allocating %ld-page iova failed\n",
3374 			     nrpages);
3375 		return 0;
3376 	}
3377 
3378 	return iova_pfn;
3379 }
3380 
3381 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3382 				     size_t size, int dir, u64 dma_mask)
3383 {
3384 	struct dmar_domain *domain;
3385 	phys_addr_t start_paddr;
3386 	unsigned long iova_pfn;
3387 	int prot = 0;
3388 	int ret;
3389 	struct intel_iommu *iommu;
3390 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3391 
3392 	BUG_ON(dir == DMA_NONE);
3393 
3394 	if (unlikely(attach_deferred(dev)))
3395 		do_deferred_attach(dev);
3396 
3397 	domain = find_domain(dev);
3398 	if (!domain)
3399 		return DMA_MAPPING_ERROR;
3400 
3401 	iommu = domain_get_iommu(domain);
3402 	size = aligned_nrpages(paddr, size);
3403 
3404 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3405 	if (!iova_pfn)
3406 		goto error;
3407 
3408 	/*
3409 	 * Check if DMAR supports zero-length reads on write only
3410 	 * mappings..
3411 	 */
3412 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3413 			!cap_zlr(iommu->cap))
3414 		prot |= DMA_PTE_READ;
3415 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3416 		prot |= DMA_PTE_WRITE;
3417 	/*
3418 	 * paddr - (paddr + size) might be partial page, we should map the whole
3419 	 * page.  Note: if two part of one page are separately mapped, we
3420 	 * might have two guest_addr mapping to the same host paddr, but this
3421 	 * is not a big problem
3422 	 */
3423 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3424 				 mm_to_dma_pfn(paddr_pfn), size, prot);
3425 	if (ret)
3426 		goto error;
3427 
3428 	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3429 	start_paddr += paddr & ~PAGE_MASK;
3430 
3431 	trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3432 
3433 	return start_paddr;
3434 
3435 error:
3436 	if (iova_pfn)
3437 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3438 	dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3439 		size, (unsigned long long)paddr, dir);
3440 	return DMA_MAPPING_ERROR;
3441 }
3442 
3443 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3444 				 unsigned long offset, size_t size,
3445 				 enum dma_data_direction dir,
3446 				 unsigned long attrs)
3447 {
3448 	return __intel_map_single(dev, page_to_phys(page) + offset,
3449 				  size, dir, *dev->dma_mask);
3450 }
3451 
3452 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3453 				     size_t size, enum dma_data_direction dir,
3454 				     unsigned long attrs)
3455 {
3456 	return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
3457 }
3458 
3459 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3460 {
3461 	struct dmar_domain *domain;
3462 	unsigned long start_pfn, last_pfn;
3463 	unsigned long nrpages;
3464 	unsigned long iova_pfn;
3465 	struct intel_iommu *iommu;
3466 	struct page *freelist;
3467 	struct pci_dev *pdev = NULL;
3468 
3469 	domain = find_domain(dev);
3470 	BUG_ON(!domain);
3471 
3472 	iommu = domain_get_iommu(domain);
3473 
3474 	iova_pfn = IOVA_PFN(dev_addr);
3475 
3476 	nrpages = aligned_nrpages(dev_addr, size);
3477 	start_pfn = mm_to_dma_pfn(iova_pfn);
3478 	last_pfn = start_pfn + nrpages - 1;
3479 
3480 	if (dev_is_pci(dev))
3481 		pdev = to_pci_dev(dev);
3482 
3483 	freelist = domain_unmap(domain, start_pfn, last_pfn);
3484 	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3485 			!has_iova_flush_queue(&domain->iovad)) {
3486 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3487 				      nrpages, !freelist, 0);
3488 		/* free iova */
3489 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3490 		dma_free_pagelist(freelist);
3491 	} else {
3492 		queue_iova(&domain->iovad, iova_pfn, nrpages,
3493 			   (unsigned long)freelist);
3494 		/*
3495 		 * queue up the release of the unmap to save the 1/6th of the
3496 		 * cpu used up by the iotlb flush operation...
3497 		 */
3498 	}
3499 
3500 	trace_unmap_single(dev, dev_addr, size);
3501 }
3502 
3503 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3504 			     size_t size, enum dma_data_direction dir,
3505 			     unsigned long attrs)
3506 {
3507 	intel_unmap(dev, dev_addr, size);
3508 }
3509 
3510 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3511 		size_t size, enum dma_data_direction dir, unsigned long attrs)
3512 {
3513 	intel_unmap(dev, dev_addr, size);
3514 }
3515 
3516 static void *intel_alloc_coherent(struct device *dev, size_t size,
3517 				  dma_addr_t *dma_handle, gfp_t flags,
3518 				  unsigned long attrs)
3519 {
3520 	struct page *page = NULL;
3521 	int order;
3522 
3523 	if (unlikely(attach_deferred(dev)))
3524 		do_deferred_attach(dev);
3525 
3526 	size = PAGE_ALIGN(size);
3527 	order = get_order(size);
3528 
3529 	if (gfpflags_allow_blocking(flags)) {
3530 		unsigned int count = size >> PAGE_SHIFT;
3531 
3532 		page = dma_alloc_from_contiguous(dev, count, order,
3533 						 flags & __GFP_NOWARN);
3534 	}
3535 
3536 	if (!page)
3537 		page = alloc_pages(flags, order);
3538 	if (!page)
3539 		return NULL;
3540 	memset(page_address(page), 0, size);
3541 
3542 	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3543 					 DMA_BIDIRECTIONAL,
3544 					 dev->coherent_dma_mask);
3545 	if (*dma_handle != DMA_MAPPING_ERROR)
3546 		return page_address(page);
3547 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3548 		__free_pages(page, order);
3549 
3550 	return NULL;
3551 }
3552 
3553 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3554 				dma_addr_t dma_handle, unsigned long attrs)
3555 {
3556 	int order;
3557 	struct page *page = virt_to_page(vaddr);
3558 
3559 	size = PAGE_ALIGN(size);
3560 	order = get_order(size);
3561 
3562 	intel_unmap(dev, dma_handle, size);
3563 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3564 		__free_pages(page, order);
3565 }
3566 
3567 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3568 			   int nelems, enum dma_data_direction dir,
3569 			   unsigned long attrs)
3570 {
3571 	dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3572 	unsigned long nrpages = 0;
3573 	struct scatterlist *sg;
3574 	int i;
3575 
3576 	for_each_sg(sglist, sg, nelems, i) {
3577 		nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3578 	}
3579 
3580 	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3581 
3582 	trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3583 }
3584 
3585 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3586 			enum dma_data_direction dir, unsigned long attrs)
3587 {
3588 	int i;
3589 	struct dmar_domain *domain;
3590 	size_t size = 0;
3591 	int prot = 0;
3592 	unsigned long iova_pfn;
3593 	int ret;
3594 	struct scatterlist *sg;
3595 	unsigned long start_vpfn;
3596 	struct intel_iommu *iommu;
3597 
3598 	BUG_ON(dir == DMA_NONE);
3599 
3600 	if (unlikely(attach_deferred(dev)))
3601 		do_deferred_attach(dev);
3602 
3603 	domain = find_domain(dev);
3604 	if (!domain)
3605 		return 0;
3606 
3607 	iommu = domain_get_iommu(domain);
3608 
3609 	for_each_sg(sglist, sg, nelems, i)
3610 		size += aligned_nrpages(sg->offset, sg->length);
3611 
3612 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3613 				*dev->dma_mask);
3614 	if (!iova_pfn) {
3615 		sglist->dma_length = 0;
3616 		return 0;
3617 	}
3618 
3619 	/*
3620 	 * Check if DMAR supports zero-length reads on write only
3621 	 * mappings..
3622 	 */
3623 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3624 			!cap_zlr(iommu->cap))
3625 		prot |= DMA_PTE_READ;
3626 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3627 		prot |= DMA_PTE_WRITE;
3628 
3629 	start_vpfn = mm_to_dma_pfn(iova_pfn);
3630 
3631 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3632 	if (unlikely(ret)) {
3633 		dma_pte_free_pagetable(domain, start_vpfn,
3634 				       start_vpfn + size - 1,
3635 				       agaw_to_level(domain->agaw) + 1);
3636 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3637 		return 0;
3638 	}
3639 
3640 	for_each_sg(sglist, sg, nelems, i)
3641 		trace_map_sg(dev, i + 1, nelems, sg);
3642 
3643 	return nelems;
3644 }
3645 
3646 static u64 intel_get_required_mask(struct device *dev)
3647 {
3648 	return DMA_BIT_MASK(32);
3649 }
3650 
3651 static const struct dma_map_ops intel_dma_ops = {
3652 	.alloc = intel_alloc_coherent,
3653 	.free = intel_free_coherent,
3654 	.map_sg = intel_map_sg,
3655 	.unmap_sg = intel_unmap_sg,
3656 	.map_page = intel_map_page,
3657 	.unmap_page = intel_unmap_page,
3658 	.map_resource = intel_map_resource,
3659 	.unmap_resource = intel_unmap_resource,
3660 	.dma_supported = dma_direct_supported,
3661 	.mmap = dma_common_mmap,
3662 	.get_sgtable = dma_common_get_sgtable,
3663 	.get_required_mask = intel_get_required_mask,
3664 };
3665 
3666 static void
3667 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3668 		   enum dma_data_direction dir, enum dma_sync_target target)
3669 {
3670 	struct dmar_domain *domain;
3671 	phys_addr_t tlb_addr;
3672 
3673 	domain = find_domain(dev);
3674 	if (WARN_ON(!domain))
3675 		return;
3676 
3677 	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3678 	if (is_swiotlb_buffer(tlb_addr))
3679 		swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3680 }
3681 
3682 static dma_addr_t
3683 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3684 		  enum dma_data_direction dir, unsigned long attrs,
3685 		  u64 dma_mask)
3686 {
3687 	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3688 	struct dmar_domain *domain;
3689 	struct intel_iommu *iommu;
3690 	unsigned long iova_pfn;
3691 	unsigned long nrpages;
3692 	phys_addr_t tlb_addr;
3693 	int prot = 0;
3694 	int ret;
3695 
3696 	if (unlikely(attach_deferred(dev)))
3697 		do_deferred_attach(dev);
3698 
3699 	domain = find_domain(dev);
3700 
3701 	if (WARN_ON(dir == DMA_NONE || !domain))
3702 		return DMA_MAPPING_ERROR;
3703 
3704 	iommu = domain_get_iommu(domain);
3705 	if (WARN_ON(!iommu))
3706 		return DMA_MAPPING_ERROR;
3707 
3708 	nrpages = aligned_nrpages(0, size);
3709 	iova_pfn = intel_alloc_iova(dev, domain,
3710 				    dma_to_mm_pfn(nrpages), dma_mask);
3711 	if (!iova_pfn)
3712 		return DMA_MAPPING_ERROR;
3713 
3714 	/*
3715 	 * Check if DMAR supports zero-length reads on write only
3716 	 * mappings..
3717 	 */
3718 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3719 			!cap_zlr(iommu->cap))
3720 		prot |= DMA_PTE_READ;
3721 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3722 		prot |= DMA_PTE_WRITE;
3723 
3724 	/*
3725 	 * If both the physical buffer start address and size are
3726 	 * page aligned, we don't need to use a bounce page.
3727 	 */
3728 	if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3729 		tlb_addr = swiotlb_tbl_map_single(dev,
3730 				__phys_to_dma(dev, io_tlb_start),
3731 				paddr, size, aligned_size, dir, attrs);
3732 		if (tlb_addr == DMA_MAPPING_ERROR) {
3733 			goto swiotlb_error;
3734 		} else {
3735 			/* Cleanup the padding area. */
3736 			void *padding_start = phys_to_virt(tlb_addr);
3737 			size_t padding_size = aligned_size;
3738 
3739 			if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3740 			    (dir == DMA_TO_DEVICE ||
3741 			     dir == DMA_BIDIRECTIONAL)) {
3742 				padding_start += size;
3743 				padding_size -= size;
3744 			}
3745 
3746 			memset(padding_start, 0, padding_size);
3747 		}
3748 	} else {
3749 		tlb_addr = paddr;
3750 	}
3751 
3752 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3753 				 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3754 	if (ret)
3755 		goto mapping_error;
3756 
3757 	trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3758 
3759 	return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3760 
3761 mapping_error:
3762 	if (is_swiotlb_buffer(tlb_addr))
3763 		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3764 					 aligned_size, dir, attrs);
3765 swiotlb_error:
3766 	free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3767 	dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3768 		size, (unsigned long long)paddr, dir);
3769 
3770 	return DMA_MAPPING_ERROR;
3771 }
3772 
3773 static void
3774 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3775 		    enum dma_data_direction dir, unsigned long attrs)
3776 {
3777 	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3778 	struct dmar_domain *domain;
3779 	phys_addr_t tlb_addr;
3780 
3781 	domain = find_domain(dev);
3782 	if (WARN_ON(!domain))
3783 		return;
3784 
3785 	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3786 	if (WARN_ON(!tlb_addr))
3787 		return;
3788 
3789 	intel_unmap(dev, dev_addr, size);
3790 	if (is_swiotlb_buffer(tlb_addr))
3791 		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3792 					 aligned_size, dir, attrs);
3793 
3794 	trace_bounce_unmap_single(dev, dev_addr, size);
3795 }
3796 
3797 static dma_addr_t
3798 bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3799 		size_t size, enum dma_data_direction dir, unsigned long attrs)
3800 {
3801 	return bounce_map_single(dev, page_to_phys(page) + offset,
3802 				 size, dir, attrs, *dev->dma_mask);
3803 }
3804 
3805 static dma_addr_t
3806 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3807 		    enum dma_data_direction dir, unsigned long attrs)
3808 {
3809 	return bounce_map_single(dev, phys_addr, size,
3810 				 dir, attrs, *dev->dma_mask);
3811 }
3812 
3813 static void
3814 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3815 		  enum dma_data_direction dir, unsigned long attrs)
3816 {
3817 	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3818 }
3819 
3820 static void
3821 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3822 		      enum dma_data_direction dir, unsigned long attrs)
3823 {
3824 	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3825 }
3826 
3827 static void
3828 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3829 		enum dma_data_direction dir, unsigned long attrs)
3830 {
3831 	struct scatterlist *sg;
3832 	int i;
3833 
3834 	for_each_sg(sglist, sg, nelems, i)
3835 		bounce_unmap_page(dev, sg->dma_address,
3836 				  sg_dma_len(sg), dir, attrs);
3837 }
3838 
3839 static int
3840 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3841 	      enum dma_data_direction dir, unsigned long attrs)
3842 {
3843 	int i;
3844 	struct scatterlist *sg;
3845 
3846 	for_each_sg(sglist, sg, nelems, i) {
3847 		sg->dma_address = bounce_map_page(dev, sg_page(sg),
3848 						  sg->offset, sg->length,
3849 						  dir, attrs);
3850 		if (sg->dma_address == DMA_MAPPING_ERROR)
3851 			goto out_unmap;
3852 		sg_dma_len(sg) = sg->length;
3853 	}
3854 
3855 	for_each_sg(sglist, sg, nelems, i)
3856 		trace_bounce_map_sg(dev, i + 1, nelems, sg);
3857 
3858 	return nelems;
3859 
3860 out_unmap:
3861 	bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3862 	return 0;
3863 }
3864 
3865 static void
3866 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3867 			   size_t size, enum dma_data_direction dir)
3868 {
3869 	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3870 }
3871 
3872 static void
3873 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
3874 			      size_t size, enum dma_data_direction dir)
3875 {
3876 	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
3877 }
3878 
3879 static void
3880 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
3881 		       int nelems, enum dma_data_direction dir)
3882 {
3883 	struct scatterlist *sg;
3884 	int i;
3885 
3886 	for_each_sg(sglist, sg, nelems, i)
3887 		bounce_sync_single(dev, sg_dma_address(sg),
3888 				   sg_dma_len(sg), dir, SYNC_FOR_CPU);
3889 }
3890 
3891 static void
3892 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
3893 			  int nelems, enum dma_data_direction dir)
3894 {
3895 	struct scatterlist *sg;
3896 	int i;
3897 
3898 	for_each_sg(sglist, sg, nelems, i)
3899 		bounce_sync_single(dev, sg_dma_address(sg),
3900 				   sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
3901 }
3902 
3903 static const struct dma_map_ops bounce_dma_ops = {
3904 	.alloc			= intel_alloc_coherent,
3905 	.free			= intel_free_coherent,
3906 	.map_sg			= bounce_map_sg,
3907 	.unmap_sg		= bounce_unmap_sg,
3908 	.map_page		= bounce_map_page,
3909 	.unmap_page		= bounce_unmap_page,
3910 	.sync_single_for_cpu	= bounce_sync_single_for_cpu,
3911 	.sync_single_for_device	= bounce_sync_single_for_device,
3912 	.sync_sg_for_cpu	= bounce_sync_sg_for_cpu,
3913 	.sync_sg_for_device	= bounce_sync_sg_for_device,
3914 	.map_resource		= bounce_map_resource,
3915 	.unmap_resource		= bounce_unmap_resource,
3916 	.dma_supported		= dma_direct_supported,
3917 };
3918 
3919 static inline int iommu_domain_cache_init(void)
3920 {
3921 	int ret = 0;
3922 
3923 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3924 					 sizeof(struct dmar_domain),
3925 					 0,
3926 					 SLAB_HWCACHE_ALIGN,
3927 
3928 					 NULL);
3929 	if (!iommu_domain_cache) {
3930 		pr_err("Couldn't create iommu_domain cache\n");
3931 		ret = -ENOMEM;
3932 	}
3933 
3934 	return ret;
3935 }
3936 
3937 static inline int iommu_devinfo_cache_init(void)
3938 {
3939 	int ret = 0;
3940 
3941 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3942 					 sizeof(struct device_domain_info),
3943 					 0,
3944 					 SLAB_HWCACHE_ALIGN,
3945 					 NULL);
3946 	if (!iommu_devinfo_cache) {
3947 		pr_err("Couldn't create devinfo cache\n");
3948 		ret = -ENOMEM;
3949 	}
3950 
3951 	return ret;
3952 }
3953 
3954 static int __init iommu_init_mempool(void)
3955 {
3956 	int ret;
3957 	ret = iova_cache_get();
3958 	if (ret)
3959 		return ret;
3960 
3961 	ret = iommu_domain_cache_init();
3962 	if (ret)
3963 		goto domain_error;
3964 
3965 	ret = iommu_devinfo_cache_init();
3966 	if (!ret)
3967 		return ret;
3968 
3969 	kmem_cache_destroy(iommu_domain_cache);
3970 domain_error:
3971 	iova_cache_put();
3972 
3973 	return -ENOMEM;
3974 }
3975 
3976 static void __init iommu_exit_mempool(void)
3977 {
3978 	kmem_cache_destroy(iommu_devinfo_cache);
3979 	kmem_cache_destroy(iommu_domain_cache);
3980 	iova_cache_put();
3981 }
3982 
3983 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3984 {
3985 	struct dmar_drhd_unit *drhd;
3986 	u32 vtbar;
3987 	int rc;
3988 
3989 	/* We know that this device on this chipset has its own IOMMU.
3990 	 * If we find it under a different IOMMU, then the BIOS is lying
3991 	 * to us. Hope that the IOMMU for this device is actually
3992 	 * disabled, and it needs no translation...
3993 	 */
3994 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3995 	if (rc) {
3996 		/* "can't" happen */
3997 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3998 		return;
3999 	}
4000 	vtbar &= 0xffff0000;
4001 
4002 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
4003 	drhd = dmar_find_matched_drhd_unit(pdev);
4004 	if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
4005 		pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
4006 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4007 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4008 	}
4009 }
4010 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4011 
4012 static void __init init_no_remapping_devices(void)
4013 {
4014 	struct dmar_drhd_unit *drhd;
4015 	struct device *dev;
4016 	int i;
4017 
4018 	for_each_drhd_unit(drhd) {
4019 		if (!drhd->include_all) {
4020 			for_each_active_dev_scope(drhd->devices,
4021 						  drhd->devices_cnt, i, dev)
4022 				break;
4023 			/* ignore DMAR unit if no devices exist */
4024 			if (i == drhd->devices_cnt)
4025 				drhd->ignored = 1;
4026 		}
4027 	}
4028 
4029 	for_each_active_drhd_unit(drhd) {
4030 		if (drhd->include_all)
4031 			continue;
4032 
4033 		for_each_active_dev_scope(drhd->devices,
4034 					  drhd->devices_cnt, i, dev)
4035 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4036 				break;
4037 		if (i < drhd->devices_cnt)
4038 			continue;
4039 
4040 		/* This IOMMU has *only* gfx devices. Either bypass it or
4041 		   set the gfx_mapped flag, as appropriate */
4042 		if (!dmar_map_gfx) {
4043 			drhd->ignored = 1;
4044 			for_each_active_dev_scope(drhd->devices,
4045 						  drhd->devices_cnt, i, dev)
4046 				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4047 		}
4048 	}
4049 }
4050 
4051 #ifdef CONFIG_SUSPEND
4052 static int init_iommu_hw(void)
4053 {
4054 	struct dmar_drhd_unit *drhd;
4055 	struct intel_iommu *iommu = NULL;
4056 
4057 	for_each_active_iommu(iommu, drhd)
4058 		if (iommu->qi)
4059 			dmar_reenable_qi(iommu);
4060 
4061 	for_each_iommu(iommu, drhd) {
4062 		if (drhd->ignored) {
4063 			/*
4064 			 * we always have to disable PMRs or DMA may fail on
4065 			 * this device
4066 			 */
4067 			if (force_on)
4068 				iommu_disable_protect_mem_regions(iommu);
4069 			continue;
4070 		}
4071 
4072 		iommu_flush_write_buffer(iommu);
4073 
4074 		iommu_set_root_entry(iommu);
4075 
4076 		iommu->flush.flush_context(iommu, 0, 0, 0,
4077 					   DMA_CCMD_GLOBAL_INVL);
4078 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4079 		iommu_enable_translation(iommu);
4080 		iommu_disable_protect_mem_regions(iommu);
4081 	}
4082 
4083 	return 0;
4084 }
4085 
4086 static void iommu_flush_all(void)
4087 {
4088 	struct dmar_drhd_unit *drhd;
4089 	struct intel_iommu *iommu;
4090 
4091 	for_each_active_iommu(iommu, drhd) {
4092 		iommu->flush.flush_context(iommu, 0, 0, 0,
4093 					   DMA_CCMD_GLOBAL_INVL);
4094 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4095 					 DMA_TLB_GLOBAL_FLUSH);
4096 	}
4097 }
4098 
4099 static int iommu_suspend(void)
4100 {
4101 	struct dmar_drhd_unit *drhd;
4102 	struct intel_iommu *iommu = NULL;
4103 	unsigned long flag;
4104 
4105 	for_each_active_iommu(iommu, drhd) {
4106 		iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4107 						 GFP_ATOMIC);
4108 		if (!iommu->iommu_state)
4109 			goto nomem;
4110 	}
4111 
4112 	iommu_flush_all();
4113 
4114 	for_each_active_iommu(iommu, drhd) {
4115 		iommu_disable_translation(iommu);
4116 
4117 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4118 
4119 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
4120 			readl(iommu->reg + DMAR_FECTL_REG);
4121 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4122 			readl(iommu->reg + DMAR_FEDATA_REG);
4123 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4124 			readl(iommu->reg + DMAR_FEADDR_REG);
4125 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4126 			readl(iommu->reg + DMAR_FEUADDR_REG);
4127 
4128 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4129 	}
4130 	return 0;
4131 
4132 nomem:
4133 	for_each_active_iommu(iommu, drhd)
4134 		kfree(iommu->iommu_state);
4135 
4136 	return -ENOMEM;
4137 }
4138 
4139 static void iommu_resume(void)
4140 {
4141 	struct dmar_drhd_unit *drhd;
4142 	struct intel_iommu *iommu = NULL;
4143 	unsigned long flag;
4144 
4145 	if (init_iommu_hw()) {
4146 		if (force_on)
4147 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4148 		else
4149 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4150 		return;
4151 	}
4152 
4153 	for_each_active_iommu(iommu, drhd) {
4154 
4155 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4156 
4157 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4158 			iommu->reg + DMAR_FECTL_REG);
4159 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4160 			iommu->reg + DMAR_FEDATA_REG);
4161 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4162 			iommu->reg + DMAR_FEADDR_REG);
4163 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4164 			iommu->reg + DMAR_FEUADDR_REG);
4165 
4166 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4167 	}
4168 
4169 	for_each_active_iommu(iommu, drhd)
4170 		kfree(iommu->iommu_state);
4171 }
4172 
4173 static struct syscore_ops iommu_syscore_ops = {
4174 	.resume		= iommu_resume,
4175 	.suspend	= iommu_suspend,
4176 };
4177 
4178 static void __init init_iommu_pm_ops(void)
4179 {
4180 	register_syscore_ops(&iommu_syscore_ops);
4181 }
4182 
4183 #else
4184 static inline void init_iommu_pm_ops(void) {}
4185 #endif	/* CONFIG_PM */
4186 
4187 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
4188 {
4189 	if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
4190 	    !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
4191 	    rmrr->end_address <= rmrr->base_address ||
4192 	    arch_rmrr_sanity_check(rmrr))
4193 		return -EINVAL;
4194 
4195 	return 0;
4196 }
4197 
4198 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4199 {
4200 	struct acpi_dmar_reserved_memory *rmrr;
4201 	struct dmar_rmrr_unit *rmrru;
4202 
4203 	rmrr = (struct acpi_dmar_reserved_memory *)header;
4204 	if (rmrr_sanity_check(rmrr)) {
4205 		pr_warn(FW_BUG
4206 			   "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
4207 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4208 			   rmrr->base_address, rmrr->end_address,
4209 			   dmi_get_system_info(DMI_BIOS_VENDOR),
4210 			   dmi_get_system_info(DMI_BIOS_VERSION),
4211 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
4212 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4213 	}
4214 
4215 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4216 	if (!rmrru)
4217 		goto out;
4218 
4219 	rmrru->hdr = header;
4220 
4221 	rmrru->base_address = rmrr->base_address;
4222 	rmrru->end_address = rmrr->end_address;
4223 
4224 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4225 				((void *)rmrr) + rmrr->header.length,
4226 				&rmrru->devices_cnt);
4227 	if (rmrru->devices_cnt && rmrru->devices == NULL)
4228 		goto free_rmrru;
4229 
4230 	list_add(&rmrru->list, &dmar_rmrr_units);
4231 
4232 	return 0;
4233 free_rmrru:
4234 	kfree(rmrru);
4235 out:
4236 	return -ENOMEM;
4237 }
4238 
4239 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4240 {
4241 	struct dmar_atsr_unit *atsru;
4242 	struct acpi_dmar_atsr *tmp;
4243 
4244 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
4245 				dmar_rcu_check()) {
4246 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4247 		if (atsr->segment != tmp->segment)
4248 			continue;
4249 		if (atsr->header.length != tmp->header.length)
4250 			continue;
4251 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
4252 			return atsru;
4253 	}
4254 
4255 	return NULL;
4256 }
4257 
4258 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4259 {
4260 	struct acpi_dmar_atsr *atsr;
4261 	struct dmar_atsr_unit *atsru;
4262 
4263 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4264 		return 0;
4265 
4266 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4267 	atsru = dmar_find_atsr(atsr);
4268 	if (atsru)
4269 		return 0;
4270 
4271 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4272 	if (!atsru)
4273 		return -ENOMEM;
4274 
4275 	/*
4276 	 * If memory is allocated from slab by ACPI _DSM method, we need to
4277 	 * copy the memory content because the memory buffer will be freed
4278 	 * on return.
4279 	 */
4280 	atsru->hdr = (void *)(atsru + 1);
4281 	memcpy(atsru->hdr, hdr, hdr->length);
4282 	atsru->include_all = atsr->flags & 0x1;
4283 	if (!atsru->include_all) {
4284 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4285 				(void *)atsr + atsr->header.length,
4286 				&atsru->devices_cnt);
4287 		if (atsru->devices_cnt && atsru->devices == NULL) {
4288 			kfree(atsru);
4289 			return -ENOMEM;
4290 		}
4291 	}
4292 
4293 	list_add_rcu(&atsru->list, &dmar_atsr_units);
4294 
4295 	return 0;
4296 }
4297 
4298 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4299 {
4300 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4301 	kfree(atsru);
4302 }
4303 
4304 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4305 {
4306 	struct acpi_dmar_atsr *atsr;
4307 	struct dmar_atsr_unit *atsru;
4308 
4309 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4310 	atsru = dmar_find_atsr(atsr);
4311 	if (atsru) {
4312 		list_del_rcu(&atsru->list);
4313 		synchronize_rcu();
4314 		intel_iommu_free_atsr(atsru);
4315 	}
4316 
4317 	return 0;
4318 }
4319 
4320 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4321 {
4322 	int i;
4323 	struct device *dev;
4324 	struct acpi_dmar_atsr *atsr;
4325 	struct dmar_atsr_unit *atsru;
4326 
4327 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4328 	atsru = dmar_find_atsr(atsr);
4329 	if (!atsru)
4330 		return 0;
4331 
4332 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4333 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4334 					  i, dev)
4335 			return -EBUSY;
4336 	}
4337 
4338 	return 0;
4339 }
4340 
4341 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4342 {
4343 	int sp, ret;
4344 	struct intel_iommu *iommu = dmaru->iommu;
4345 
4346 	if (g_iommus[iommu->seq_id])
4347 		return 0;
4348 
4349 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4350 		pr_warn("%s: Doesn't support hardware pass through.\n",
4351 			iommu->name);
4352 		return -ENXIO;
4353 	}
4354 	if (!ecap_sc_support(iommu->ecap) &&
4355 	    domain_update_iommu_snooping(iommu)) {
4356 		pr_warn("%s: Doesn't support snooping.\n",
4357 			iommu->name);
4358 		return -ENXIO;
4359 	}
4360 	sp = domain_update_iommu_superpage(NULL, iommu) - 1;
4361 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4362 		pr_warn("%s: Doesn't support large page.\n",
4363 			iommu->name);
4364 		return -ENXIO;
4365 	}
4366 
4367 	/*
4368 	 * Disable translation if already enabled prior to OS handover.
4369 	 */
4370 	if (iommu->gcmd & DMA_GCMD_TE)
4371 		iommu_disable_translation(iommu);
4372 
4373 	g_iommus[iommu->seq_id] = iommu;
4374 	ret = iommu_init_domains(iommu);
4375 	if (ret == 0)
4376 		ret = iommu_alloc_root_entry(iommu);
4377 	if (ret)
4378 		goto out;
4379 
4380 	intel_svm_check(iommu);
4381 
4382 	if (dmaru->ignored) {
4383 		/*
4384 		 * we always have to disable PMRs or DMA may fail on this device
4385 		 */
4386 		if (force_on)
4387 			iommu_disable_protect_mem_regions(iommu);
4388 		return 0;
4389 	}
4390 
4391 	intel_iommu_init_qi(iommu);
4392 	iommu_flush_write_buffer(iommu);
4393 
4394 #ifdef CONFIG_INTEL_IOMMU_SVM
4395 	if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4396 		ret = intel_svm_enable_prq(iommu);
4397 		if (ret)
4398 			goto disable_iommu;
4399 	}
4400 #endif
4401 	ret = dmar_set_interrupt(iommu);
4402 	if (ret)
4403 		goto disable_iommu;
4404 
4405 	iommu_set_root_entry(iommu);
4406 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4407 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4408 	iommu_enable_translation(iommu);
4409 
4410 	iommu_disable_protect_mem_regions(iommu);
4411 	return 0;
4412 
4413 disable_iommu:
4414 	disable_dmar_iommu(iommu);
4415 out:
4416 	free_dmar_iommu(iommu);
4417 	return ret;
4418 }
4419 
4420 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4421 {
4422 	int ret = 0;
4423 	struct intel_iommu *iommu = dmaru->iommu;
4424 
4425 	if (!intel_iommu_enabled)
4426 		return 0;
4427 	if (iommu == NULL)
4428 		return -EINVAL;
4429 
4430 	if (insert) {
4431 		ret = intel_iommu_add(dmaru);
4432 	} else {
4433 		disable_dmar_iommu(iommu);
4434 		free_dmar_iommu(iommu);
4435 	}
4436 
4437 	return ret;
4438 }
4439 
4440 static void intel_iommu_free_dmars(void)
4441 {
4442 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
4443 	struct dmar_atsr_unit *atsru, *atsr_n;
4444 
4445 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4446 		list_del(&rmrru->list);
4447 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4448 		kfree(rmrru);
4449 	}
4450 
4451 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4452 		list_del(&atsru->list);
4453 		intel_iommu_free_atsr(atsru);
4454 	}
4455 }
4456 
4457 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4458 {
4459 	int i, ret = 1;
4460 	struct pci_bus *bus;
4461 	struct pci_dev *bridge = NULL;
4462 	struct device *tmp;
4463 	struct acpi_dmar_atsr *atsr;
4464 	struct dmar_atsr_unit *atsru;
4465 
4466 	dev = pci_physfn(dev);
4467 	for (bus = dev->bus; bus; bus = bus->parent) {
4468 		bridge = bus->self;
4469 		/* If it's an integrated device, allow ATS */
4470 		if (!bridge)
4471 			return 1;
4472 		/* Connected via non-PCIe: no ATS */
4473 		if (!pci_is_pcie(bridge) ||
4474 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4475 			return 0;
4476 		/* If we found the root port, look it up in the ATSR */
4477 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4478 			break;
4479 	}
4480 
4481 	rcu_read_lock();
4482 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4483 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4484 		if (atsr->segment != pci_domain_nr(dev->bus))
4485 			continue;
4486 
4487 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4488 			if (tmp == &bridge->dev)
4489 				goto out;
4490 
4491 		if (atsru->include_all)
4492 			goto out;
4493 	}
4494 	ret = 0;
4495 out:
4496 	rcu_read_unlock();
4497 
4498 	return ret;
4499 }
4500 
4501 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4502 {
4503 	int ret;
4504 	struct dmar_rmrr_unit *rmrru;
4505 	struct dmar_atsr_unit *atsru;
4506 	struct acpi_dmar_atsr *atsr;
4507 	struct acpi_dmar_reserved_memory *rmrr;
4508 
4509 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4510 		return 0;
4511 
4512 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4513 		rmrr = container_of(rmrru->hdr,
4514 				    struct acpi_dmar_reserved_memory, header);
4515 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4516 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4517 				((void *)rmrr) + rmrr->header.length,
4518 				rmrr->segment, rmrru->devices,
4519 				rmrru->devices_cnt);
4520 			if (ret < 0)
4521 				return ret;
4522 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4523 			dmar_remove_dev_scope(info, rmrr->segment,
4524 				rmrru->devices, rmrru->devices_cnt);
4525 		}
4526 	}
4527 
4528 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
4529 		if (atsru->include_all)
4530 			continue;
4531 
4532 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4533 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4534 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4535 					(void *)atsr + atsr->header.length,
4536 					atsr->segment, atsru->devices,
4537 					atsru->devices_cnt);
4538 			if (ret > 0)
4539 				break;
4540 			else if (ret < 0)
4541 				return ret;
4542 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4543 			if (dmar_remove_dev_scope(info, atsr->segment,
4544 					atsru->devices, atsru->devices_cnt))
4545 				break;
4546 		}
4547 	}
4548 
4549 	return 0;
4550 }
4551 
4552 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4553 				       unsigned long val, void *v)
4554 {
4555 	struct memory_notify *mhp = v;
4556 	unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4557 	unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4558 			mhp->nr_pages - 1);
4559 
4560 	switch (val) {
4561 	case MEM_GOING_ONLINE:
4562 		if (iommu_domain_identity_map(si_domain,
4563 					      start_vpfn, last_vpfn)) {
4564 			pr_warn("Failed to build identity map for [%lx-%lx]\n",
4565 				start_vpfn, last_vpfn);
4566 			return NOTIFY_BAD;
4567 		}
4568 		break;
4569 
4570 	case MEM_OFFLINE:
4571 	case MEM_CANCEL_ONLINE:
4572 		{
4573 			struct dmar_drhd_unit *drhd;
4574 			struct intel_iommu *iommu;
4575 			struct page *freelist;
4576 
4577 			freelist = domain_unmap(si_domain,
4578 						start_vpfn, last_vpfn);
4579 
4580 			rcu_read_lock();
4581 			for_each_active_iommu(iommu, drhd)
4582 				iommu_flush_iotlb_psi(iommu, si_domain,
4583 					start_vpfn, mhp->nr_pages,
4584 					!freelist, 0);
4585 			rcu_read_unlock();
4586 			dma_free_pagelist(freelist);
4587 		}
4588 		break;
4589 	}
4590 
4591 	return NOTIFY_OK;
4592 }
4593 
4594 static struct notifier_block intel_iommu_memory_nb = {
4595 	.notifier_call = intel_iommu_memory_notifier,
4596 	.priority = 0
4597 };
4598 
4599 static void free_all_cpu_cached_iovas(unsigned int cpu)
4600 {
4601 	int i;
4602 
4603 	for (i = 0; i < g_num_of_iommus; i++) {
4604 		struct intel_iommu *iommu = g_iommus[i];
4605 		struct dmar_domain *domain;
4606 		int did;
4607 
4608 		if (!iommu)
4609 			continue;
4610 
4611 		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4612 			domain = get_iommu_domain(iommu, (u16)did);
4613 
4614 			if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
4615 				continue;
4616 
4617 			free_cpu_cached_iovas(cpu, &domain->iovad);
4618 		}
4619 	}
4620 }
4621 
4622 static int intel_iommu_cpu_dead(unsigned int cpu)
4623 {
4624 	free_all_cpu_cached_iovas(cpu);
4625 	return 0;
4626 }
4627 
4628 static void intel_disable_iommus(void)
4629 {
4630 	struct intel_iommu *iommu = NULL;
4631 	struct dmar_drhd_unit *drhd;
4632 
4633 	for_each_iommu(iommu, drhd)
4634 		iommu_disable_translation(iommu);
4635 }
4636 
4637 void intel_iommu_shutdown(void)
4638 {
4639 	struct dmar_drhd_unit *drhd;
4640 	struct intel_iommu *iommu = NULL;
4641 
4642 	if (no_iommu || dmar_disabled)
4643 		return;
4644 
4645 	down_write(&dmar_global_lock);
4646 
4647 	/* Disable PMRs explicitly here. */
4648 	for_each_iommu(iommu, drhd)
4649 		iommu_disable_protect_mem_regions(iommu);
4650 
4651 	/* Make sure the IOMMUs are switched off */
4652 	intel_disable_iommus();
4653 
4654 	up_write(&dmar_global_lock);
4655 }
4656 
4657 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4658 {
4659 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4660 
4661 	return container_of(iommu_dev, struct intel_iommu, iommu);
4662 }
4663 
4664 static ssize_t intel_iommu_show_version(struct device *dev,
4665 					struct device_attribute *attr,
4666 					char *buf)
4667 {
4668 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4669 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4670 	return sprintf(buf, "%d:%d\n",
4671 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4672 }
4673 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4674 
4675 static ssize_t intel_iommu_show_address(struct device *dev,
4676 					struct device_attribute *attr,
4677 					char *buf)
4678 {
4679 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4680 	return sprintf(buf, "%llx\n", iommu->reg_phys);
4681 }
4682 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4683 
4684 static ssize_t intel_iommu_show_cap(struct device *dev,
4685 				    struct device_attribute *attr,
4686 				    char *buf)
4687 {
4688 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4689 	return sprintf(buf, "%llx\n", iommu->cap);
4690 }
4691 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4692 
4693 static ssize_t intel_iommu_show_ecap(struct device *dev,
4694 				    struct device_attribute *attr,
4695 				    char *buf)
4696 {
4697 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4698 	return sprintf(buf, "%llx\n", iommu->ecap);
4699 }
4700 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4701 
4702 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4703 				      struct device_attribute *attr,
4704 				      char *buf)
4705 {
4706 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4707 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4708 }
4709 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4710 
4711 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4712 					   struct device_attribute *attr,
4713 					   char *buf)
4714 {
4715 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4716 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4717 						  cap_ndoms(iommu->cap)));
4718 }
4719 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4720 
4721 static struct attribute *intel_iommu_attrs[] = {
4722 	&dev_attr_version.attr,
4723 	&dev_attr_address.attr,
4724 	&dev_attr_cap.attr,
4725 	&dev_attr_ecap.attr,
4726 	&dev_attr_domains_supported.attr,
4727 	&dev_attr_domains_used.attr,
4728 	NULL,
4729 };
4730 
4731 static struct attribute_group intel_iommu_group = {
4732 	.name = "intel-iommu",
4733 	.attrs = intel_iommu_attrs,
4734 };
4735 
4736 const struct attribute_group *intel_iommu_groups[] = {
4737 	&intel_iommu_group,
4738 	NULL,
4739 };
4740 
4741 static inline bool has_untrusted_dev(void)
4742 {
4743 	struct pci_dev *pdev = NULL;
4744 
4745 	for_each_pci_dev(pdev)
4746 		if (pdev->untrusted)
4747 			return true;
4748 
4749 	return false;
4750 }
4751 
4752 static int __init platform_optin_force_iommu(void)
4753 {
4754 	if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4755 		return 0;
4756 
4757 	if (no_iommu || dmar_disabled)
4758 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4759 
4760 	/*
4761 	 * If Intel-IOMMU is disabled by default, we will apply identity
4762 	 * map for all devices except those marked as being untrusted.
4763 	 */
4764 	if (dmar_disabled)
4765 		iommu_set_default_passthrough(false);
4766 
4767 	dmar_disabled = 0;
4768 	no_iommu = 0;
4769 
4770 	return 1;
4771 }
4772 
4773 static int __init probe_acpi_namespace_devices(void)
4774 {
4775 	struct dmar_drhd_unit *drhd;
4776 	/* To avoid a -Wunused-but-set-variable warning. */
4777 	struct intel_iommu *iommu __maybe_unused;
4778 	struct device *dev;
4779 	int i, ret = 0;
4780 
4781 	for_each_active_iommu(iommu, drhd) {
4782 		for_each_active_dev_scope(drhd->devices,
4783 					  drhd->devices_cnt, i, dev) {
4784 			struct acpi_device_physical_node *pn;
4785 			struct iommu_group *group;
4786 			struct acpi_device *adev;
4787 
4788 			if (dev->bus != &acpi_bus_type)
4789 				continue;
4790 
4791 			adev = to_acpi_device(dev);
4792 			mutex_lock(&adev->physical_node_lock);
4793 			list_for_each_entry(pn,
4794 					    &adev->physical_node_list, node) {
4795 				group = iommu_group_get(pn->dev);
4796 				if (group) {
4797 					iommu_group_put(group);
4798 					continue;
4799 				}
4800 
4801 				pn->dev->bus->iommu_ops = &intel_iommu_ops;
4802 				ret = iommu_probe_device(pn->dev);
4803 				if (ret)
4804 					break;
4805 			}
4806 			mutex_unlock(&adev->physical_node_lock);
4807 
4808 			if (ret)
4809 				return ret;
4810 		}
4811 	}
4812 
4813 	return 0;
4814 }
4815 
4816 int __init intel_iommu_init(void)
4817 {
4818 	int ret = -ENODEV;
4819 	struct dmar_drhd_unit *drhd;
4820 	struct intel_iommu *iommu;
4821 
4822 	/*
4823 	 * Intel IOMMU is required for a TXT/tboot launch or platform
4824 	 * opt in, so enforce that.
4825 	 */
4826 	force_on = tboot_force_iommu() || platform_optin_force_iommu();
4827 
4828 	if (iommu_init_mempool()) {
4829 		if (force_on)
4830 			panic("tboot: Failed to initialize iommu memory\n");
4831 		return -ENOMEM;
4832 	}
4833 
4834 	down_write(&dmar_global_lock);
4835 	if (dmar_table_init()) {
4836 		if (force_on)
4837 			panic("tboot: Failed to initialize DMAR table\n");
4838 		goto out_free_dmar;
4839 	}
4840 
4841 	if (dmar_dev_scope_init() < 0) {
4842 		if (force_on)
4843 			panic("tboot: Failed to initialize DMAR device scope\n");
4844 		goto out_free_dmar;
4845 	}
4846 
4847 	up_write(&dmar_global_lock);
4848 
4849 	/*
4850 	 * The bus notifier takes the dmar_global_lock, so lockdep will
4851 	 * complain later when we register it under the lock.
4852 	 */
4853 	dmar_register_bus_notifier();
4854 
4855 	down_write(&dmar_global_lock);
4856 
4857 	if (!no_iommu)
4858 		intel_iommu_debugfs_init();
4859 
4860 	if (no_iommu || dmar_disabled) {
4861 		/*
4862 		 * We exit the function here to ensure IOMMU's remapping and
4863 		 * mempool aren't setup, which means that the IOMMU's PMRs
4864 		 * won't be disabled via the call to init_dmars(). So disable
4865 		 * it explicitly here. The PMRs were setup by tboot prior to
4866 		 * calling SENTER, but the kernel is expected to reset/tear
4867 		 * down the PMRs.
4868 		 */
4869 		if (intel_iommu_tboot_noforce) {
4870 			for_each_iommu(iommu, drhd)
4871 				iommu_disable_protect_mem_regions(iommu);
4872 		}
4873 
4874 		/*
4875 		 * Make sure the IOMMUs are switched off, even when we
4876 		 * boot into a kexec kernel and the previous kernel left
4877 		 * them enabled
4878 		 */
4879 		intel_disable_iommus();
4880 		goto out_free_dmar;
4881 	}
4882 
4883 	if (list_empty(&dmar_rmrr_units))
4884 		pr_info("No RMRR found\n");
4885 
4886 	if (list_empty(&dmar_atsr_units))
4887 		pr_info("No ATSR found\n");
4888 
4889 	if (dmar_init_reserved_ranges()) {
4890 		if (force_on)
4891 			panic("tboot: Failed to reserve iommu ranges\n");
4892 		goto out_free_reserved_range;
4893 	}
4894 
4895 	if (dmar_map_gfx)
4896 		intel_iommu_gfx_mapped = 1;
4897 
4898 	init_no_remapping_devices();
4899 
4900 	ret = init_dmars();
4901 	if (ret) {
4902 		if (force_on)
4903 			panic("tboot: Failed to initialize DMARs\n");
4904 		pr_err("Initialization failed\n");
4905 		goto out_free_reserved_range;
4906 	}
4907 	up_write(&dmar_global_lock);
4908 
4909 	init_iommu_pm_ops();
4910 
4911 	down_read(&dmar_global_lock);
4912 	for_each_active_iommu(iommu, drhd) {
4913 		iommu_device_sysfs_add(&iommu->iommu, NULL,
4914 				       intel_iommu_groups,
4915 				       "%s", iommu->name);
4916 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4917 		iommu_device_register(&iommu->iommu);
4918 	}
4919 	up_read(&dmar_global_lock);
4920 
4921 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4922 	if (si_domain && !hw_pass_through)
4923 		register_memory_notifier(&intel_iommu_memory_nb);
4924 	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4925 			  intel_iommu_cpu_dead);
4926 
4927 	down_read(&dmar_global_lock);
4928 	if (probe_acpi_namespace_devices())
4929 		pr_warn("ACPI name space devices didn't probe correctly\n");
4930 
4931 	/* Finally, we enable the DMA remapping hardware. */
4932 	for_each_iommu(iommu, drhd) {
4933 		if (!drhd->ignored && !translation_pre_enabled(iommu))
4934 			iommu_enable_translation(iommu);
4935 
4936 		iommu_disable_protect_mem_regions(iommu);
4937 	}
4938 	up_read(&dmar_global_lock);
4939 
4940 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4941 
4942 	intel_iommu_enabled = 1;
4943 
4944 	return 0;
4945 
4946 out_free_reserved_range:
4947 	put_iova_domain(&reserved_iova_list);
4948 out_free_dmar:
4949 	intel_iommu_free_dmars();
4950 	up_write(&dmar_global_lock);
4951 	iommu_exit_mempool();
4952 	return ret;
4953 }
4954 
4955 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4956 {
4957 	struct intel_iommu *iommu = opaque;
4958 
4959 	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4960 	return 0;
4961 }
4962 
4963 /*
4964  * NB - intel-iommu lacks any sort of reference counting for the users of
4965  * dependent devices.  If multiple endpoints have intersecting dependent
4966  * devices, unbinding the driver from any one of them will possibly leave
4967  * the others unable to operate.
4968  */
4969 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4970 {
4971 	if (!iommu || !dev || !dev_is_pci(dev))
4972 		return;
4973 
4974 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4975 }
4976 
4977 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4978 {
4979 	struct dmar_domain *domain;
4980 	struct intel_iommu *iommu;
4981 	unsigned long flags;
4982 
4983 	assert_spin_locked(&device_domain_lock);
4984 
4985 	if (WARN_ON(!info))
4986 		return;
4987 
4988 	iommu = info->iommu;
4989 	domain = info->domain;
4990 
4991 	if (info->dev) {
4992 		if (dev_is_pci(info->dev) && sm_supported(iommu))
4993 			intel_pasid_tear_down_entry(iommu, info->dev,
4994 					PASID_RID2PASID, false);
4995 
4996 		iommu_disable_dev_iotlb(info);
4997 		if (!dev_is_real_dma_subdevice(info->dev))
4998 			domain_context_clear(iommu, info->dev);
4999 		intel_pasid_free_table(info->dev);
5000 	}
5001 
5002 	unlink_domain_info(info);
5003 
5004 	spin_lock_irqsave(&iommu->lock, flags);
5005 	domain_detach_iommu(domain, iommu);
5006 	spin_unlock_irqrestore(&iommu->lock, flags);
5007 
5008 	free_devinfo_mem(info);
5009 }
5010 
5011 static void dmar_remove_one_dev_info(struct device *dev)
5012 {
5013 	struct device_domain_info *info;
5014 	unsigned long flags;
5015 
5016 	spin_lock_irqsave(&device_domain_lock, flags);
5017 	info = get_domain_info(dev);
5018 	if (info)
5019 		__dmar_remove_one_dev_info(info);
5020 	spin_unlock_irqrestore(&device_domain_lock, flags);
5021 }
5022 
5023 static int md_domain_init(struct dmar_domain *domain, int guest_width)
5024 {
5025 	int adjust_width;
5026 
5027 	/* calculate AGAW */
5028 	domain->gaw = guest_width;
5029 	adjust_width = guestwidth_to_adjustwidth(guest_width);
5030 	domain->agaw = width_to_agaw(adjust_width);
5031 
5032 	domain->iommu_coherency = 0;
5033 	domain->iommu_snooping = 0;
5034 	domain->iommu_superpage = 0;
5035 	domain->max_addr = 0;
5036 
5037 	/* always allocate the top pgd */
5038 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5039 	if (!domain->pgd)
5040 		return -ENOMEM;
5041 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5042 	return 0;
5043 }
5044 
5045 static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
5046 {
5047 	init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5048 	copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
5049 
5050 	if (!intel_iommu_strict &&
5051 	    init_iova_flush_queue(&dmar_domain->iovad,
5052 				  iommu_flush_iova, iova_entry_free))
5053 		pr_info("iova flush queue initialization failed\n");
5054 }
5055 
5056 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5057 {
5058 	struct dmar_domain *dmar_domain;
5059 	struct iommu_domain *domain;
5060 
5061 	switch (type) {
5062 	case IOMMU_DOMAIN_DMA:
5063 	/* fallthrough */
5064 	case IOMMU_DOMAIN_UNMANAGED:
5065 		dmar_domain = alloc_domain(0);
5066 		if (!dmar_domain) {
5067 			pr_err("Can't allocate dmar_domain\n");
5068 			return NULL;
5069 		}
5070 		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5071 			pr_err("Domain initialization failed\n");
5072 			domain_exit(dmar_domain);
5073 			return NULL;
5074 		}
5075 
5076 		if (type == IOMMU_DOMAIN_DMA)
5077 			intel_init_iova_domain(dmar_domain);
5078 
5079 		domain_update_iommu_cap(dmar_domain);
5080 
5081 		domain = &dmar_domain->domain;
5082 		domain->geometry.aperture_start = 0;
5083 		domain->geometry.aperture_end   =
5084 				__DOMAIN_MAX_ADDR(dmar_domain->gaw);
5085 		domain->geometry.force_aperture = true;
5086 
5087 		return domain;
5088 	case IOMMU_DOMAIN_IDENTITY:
5089 		return &si_domain->domain;
5090 	default:
5091 		return NULL;
5092 	}
5093 
5094 	return NULL;
5095 }
5096 
5097 static void intel_iommu_domain_free(struct iommu_domain *domain)
5098 {
5099 	if (domain != &si_domain->domain)
5100 		domain_exit(to_dmar_domain(domain));
5101 }
5102 
5103 /*
5104  * Check whether a @domain could be attached to the @dev through the
5105  * aux-domain attach/detach APIs.
5106  */
5107 static inline bool
5108 is_aux_domain(struct device *dev, struct iommu_domain *domain)
5109 {
5110 	struct device_domain_info *info = get_domain_info(dev);
5111 
5112 	return info && info->auxd_enabled &&
5113 			domain->type == IOMMU_DOMAIN_UNMANAGED;
5114 }
5115 
5116 static void auxiliary_link_device(struct dmar_domain *domain,
5117 				  struct device *dev)
5118 {
5119 	struct device_domain_info *info = get_domain_info(dev);
5120 
5121 	assert_spin_locked(&device_domain_lock);
5122 	if (WARN_ON(!info))
5123 		return;
5124 
5125 	domain->auxd_refcnt++;
5126 	list_add(&domain->auxd, &info->auxiliary_domains);
5127 }
5128 
5129 static void auxiliary_unlink_device(struct dmar_domain *domain,
5130 				    struct device *dev)
5131 {
5132 	struct device_domain_info *info = get_domain_info(dev);
5133 
5134 	assert_spin_locked(&device_domain_lock);
5135 	if (WARN_ON(!info))
5136 		return;
5137 
5138 	list_del(&domain->auxd);
5139 	domain->auxd_refcnt--;
5140 
5141 	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5142 		ioasid_free(domain->default_pasid);
5143 }
5144 
5145 static int aux_domain_add_dev(struct dmar_domain *domain,
5146 			      struct device *dev)
5147 {
5148 	int ret;
5149 	u8 bus, devfn;
5150 	unsigned long flags;
5151 	struct intel_iommu *iommu;
5152 
5153 	iommu = device_to_iommu(dev, &bus, &devfn);
5154 	if (!iommu)
5155 		return -ENODEV;
5156 
5157 	if (domain->default_pasid <= 0) {
5158 		int pasid;
5159 
5160 		/* No private data needed for the default pasid */
5161 		pasid = ioasid_alloc(NULL, PASID_MIN,
5162 				     pci_max_pasids(to_pci_dev(dev)) - 1,
5163 				     NULL);
5164 		if (pasid == INVALID_IOASID) {
5165 			pr_err("Can't allocate default pasid\n");
5166 			return -ENODEV;
5167 		}
5168 		domain->default_pasid = pasid;
5169 	}
5170 
5171 	spin_lock_irqsave(&device_domain_lock, flags);
5172 	/*
5173 	 * iommu->lock must be held to attach domain to iommu and setup the
5174 	 * pasid entry for second level translation.
5175 	 */
5176 	spin_lock(&iommu->lock);
5177 	ret = domain_attach_iommu(domain, iommu);
5178 	if (ret)
5179 		goto attach_failed;
5180 
5181 	/* Setup the PASID entry for mediated devices: */
5182 	if (domain_use_first_level(domain))
5183 		ret = domain_setup_first_level(iommu, domain, dev,
5184 					       domain->default_pasid);
5185 	else
5186 		ret = intel_pasid_setup_second_level(iommu, domain, dev,
5187 						     domain->default_pasid);
5188 	if (ret)
5189 		goto table_failed;
5190 	spin_unlock(&iommu->lock);
5191 
5192 	auxiliary_link_device(domain, dev);
5193 
5194 	spin_unlock_irqrestore(&device_domain_lock, flags);
5195 
5196 	return 0;
5197 
5198 table_failed:
5199 	domain_detach_iommu(domain, iommu);
5200 attach_failed:
5201 	spin_unlock(&iommu->lock);
5202 	spin_unlock_irqrestore(&device_domain_lock, flags);
5203 	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5204 		ioasid_free(domain->default_pasid);
5205 
5206 	return ret;
5207 }
5208 
5209 static void aux_domain_remove_dev(struct dmar_domain *domain,
5210 				  struct device *dev)
5211 {
5212 	struct device_domain_info *info;
5213 	struct intel_iommu *iommu;
5214 	unsigned long flags;
5215 
5216 	if (!is_aux_domain(dev, &domain->domain))
5217 		return;
5218 
5219 	spin_lock_irqsave(&device_domain_lock, flags);
5220 	info = get_domain_info(dev);
5221 	iommu = info->iommu;
5222 
5223 	auxiliary_unlink_device(domain, dev);
5224 
5225 	spin_lock(&iommu->lock);
5226 	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
5227 	domain_detach_iommu(domain, iommu);
5228 	spin_unlock(&iommu->lock);
5229 
5230 	spin_unlock_irqrestore(&device_domain_lock, flags);
5231 }
5232 
5233 static int prepare_domain_attach_device(struct iommu_domain *domain,
5234 					struct device *dev)
5235 {
5236 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5237 	struct intel_iommu *iommu;
5238 	int addr_width;
5239 	u8 bus, devfn;
5240 
5241 	iommu = device_to_iommu(dev, &bus, &devfn);
5242 	if (!iommu)
5243 		return -ENODEV;
5244 
5245 	/* check if this iommu agaw is sufficient for max mapped address */
5246 	addr_width = agaw_to_width(iommu->agaw);
5247 	if (addr_width > cap_mgaw(iommu->cap))
5248 		addr_width = cap_mgaw(iommu->cap);
5249 
5250 	if (dmar_domain->max_addr > (1LL << addr_width)) {
5251 		dev_err(dev, "%s: iommu width (%d) is not "
5252 		        "sufficient for the mapped address (%llx)\n",
5253 		        __func__, addr_width, dmar_domain->max_addr);
5254 		return -EFAULT;
5255 	}
5256 	dmar_domain->gaw = addr_width;
5257 
5258 	/*
5259 	 * Knock out extra levels of page tables if necessary
5260 	 */
5261 	while (iommu->agaw < dmar_domain->agaw) {
5262 		struct dma_pte *pte;
5263 
5264 		pte = dmar_domain->pgd;
5265 		if (dma_pte_present(pte)) {
5266 			dmar_domain->pgd = (struct dma_pte *)
5267 				phys_to_virt(dma_pte_addr(pte));
5268 			free_pgtable_page(pte);
5269 		}
5270 		dmar_domain->agaw--;
5271 	}
5272 
5273 	return 0;
5274 }
5275 
5276 static int intel_iommu_attach_device(struct iommu_domain *domain,
5277 				     struct device *dev)
5278 {
5279 	int ret;
5280 
5281 	if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5282 	    device_is_rmrr_locked(dev)) {
5283 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
5284 		return -EPERM;
5285 	}
5286 
5287 	if (is_aux_domain(dev, domain))
5288 		return -EPERM;
5289 
5290 	/* normally dev is not mapped */
5291 	if (unlikely(domain_context_mapped(dev))) {
5292 		struct dmar_domain *old_domain;
5293 
5294 		old_domain = find_domain(dev);
5295 		if (old_domain)
5296 			dmar_remove_one_dev_info(dev);
5297 	}
5298 
5299 	ret = prepare_domain_attach_device(domain, dev);
5300 	if (ret)
5301 		return ret;
5302 
5303 	return domain_add_dev_info(to_dmar_domain(domain), dev);
5304 }
5305 
5306 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5307 					 struct device *dev)
5308 {
5309 	int ret;
5310 
5311 	if (!is_aux_domain(dev, domain))
5312 		return -EPERM;
5313 
5314 	ret = prepare_domain_attach_device(domain, dev);
5315 	if (ret)
5316 		return ret;
5317 
5318 	return aux_domain_add_dev(to_dmar_domain(domain), dev);
5319 }
5320 
5321 static void intel_iommu_detach_device(struct iommu_domain *domain,
5322 				      struct device *dev)
5323 {
5324 	dmar_remove_one_dev_info(dev);
5325 }
5326 
5327 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5328 					  struct device *dev)
5329 {
5330 	aux_domain_remove_dev(to_dmar_domain(domain), dev);
5331 }
5332 
5333 /*
5334  * 2D array for converting and sanitizing IOMMU generic TLB granularity to
5335  * VT-d granularity. Invalidation is typically included in the unmap operation
5336  * as a result of DMA or VFIO unmap. However, for assigned devices guest
5337  * owns the first level page tables. Invalidations of translation caches in the
5338  * guest are trapped and passed down to the host.
5339  *
5340  * vIOMMU in the guest will only expose first level page tables, therefore
5341  * we do not support IOTLB granularity for request without PASID (second level).
5342  *
5343  * For example, to find the VT-d granularity encoding for IOTLB
5344  * type and page selective granularity within PASID:
5345  * X: indexed by iommu cache type
5346  * Y: indexed by enum iommu_inv_granularity
5347  * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
5348  */
5349 
5350 static const int
5351 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
5352 	/*
5353 	 * PASID based IOTLB invalidation: PASID selective (per PASID),
5354 	 * page selective (address granularity)
5355 	 */
5356 	{-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
5357 	/* PASID based dev TLBs */
5358 	{-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
5359 	/* PASID cache */
5360 	{-EINVAL, -EINVAL, -EINVAL}
5361 };
5362 
5363 static inline int to_vtd_granularity(int type, int granu)
5364 {
5365 	return inv_type_granu_table[type][granu];
5366 }
5367 
5368 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
5369 {
5370 	u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
5371 
5372 	/* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
5373 	 * IOMMU cache invalidate API passes granu_size in bytes, and number of
5374 	 * granu size in contiguous memory.
5375 	 */
5376 	return order_base_2(nr_pages);
5377 }
5378 
5379 #ifdef CONFIG_INTEL_IOMMU_SVM
5380 static int
5381 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
5382 			   struct iommu_cache_invalidate_info *inv_info)
5383 {
5384 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5385 	struct device_domain_info *info;
5386 	struct intel_iommu *iommu;
5387 	unsigned long flags;
5388 	int cache_type;
5389 	u8 bus, devfn;
5390 	u16 did, sid;
5391 	int ret = 0;
5392 	u64 size = 0;
5393 
5394 	if (!inv_info || !dmar_domain ||
5395 	    inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
5396 		return -EINVAL;
5397 
5398 	if (!dev || !dev_is_pci(dev))
5399 		return -ENODEV;
5400 
5401 	iommu = device_to_iommu(dev, &bus, &devfn);
5402 	if (!iommu)
5403 		return -ENODEV;
5404 
5405 	if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
5406 		return -EINVAL;
5407 
5408 	spin_lock_irqsave(&device_domain_lock, flags);
5409 	spin_lock(&iommu->lock);
5410 	info = get_domain_info(dev);
5411 	if (!info) {
5412 		ret = -EINVAL;
5413 		goto out_unlock;
5414 	}
5415 	did = dmar_domain->iommu_did[iommu->seq_id];
5416 	sid = PCI_DEVID(bus, devfn);
5417 
5418 	/* Size is only valid in address selective invalidation */
5419 	if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
5420 		size = to_vtd_size(inv_info->addr_info.granule_size,
5421 				   inv_info->addr_info.nb_granules);
5422 
5423 	for_each_set_bit(cache_type,
5424 			 (unsigned long *)&inv_info->cache,
5425 			 IOMMU_CACHE_INV_TYPE_NR) {
5426 		int granu = 0;
5427 		u64 pasid = 0;
5428 
5429 		granu = to_vtd_granularity(cache_type, inv_info->granularity);
5430 		if (granu == -EINVAL) {
5431 			pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
5432 					   cache_type, inv_info->granularity);
5433 			break;
5434 		}
5435 
5436 		/*
5437 		 * PASID is stored in different locations based on the
5438 		 * granularity.
5439 		 */
5440 		if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
5441 		    (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
5442 			pasid = inv_info->pasid_info.pasid;
5443 		else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5444 			 (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
5445 			pasid = inv_info->addr_info.pasid;
5446 
5447 		switch (BIT(cache_type)) {
5448 		case IOMMU_CACHE_INV_TYPE_IOTLB:
5449 			if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5450 			    size &&
5451 			    (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
5452 				pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
5453 						   inv_info->addr_info.addr, size);
5454 				ret = -ERANGE;
5455 				goto out_unlock;
5456 			}
5457 
5458 			/*
5459 			 * If granu is PASID-selective, address is ignored.
5460 			 * We use npages = -1 to indicate that.
5461 			 */
5462 			qi_flush_piotlb(iommu, did, pasid,
5463 					mm_to_dma_pfn(inv_info->addr_info.addr),
5464 					(granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
5465 					inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
5466 
5467 			/*
5468 			 * Always flush device IOTLB if ATS is enabled. vIOMMU
5469 			 * in the guest may assume IOTLB flush is inclusive,
5470 			 * which is more efficient.
5471 			 */
5472 			if (info->ats_enabled)
5473 				qi_flush_dev_iotlb_pasid(iommu, sid,
5474 						info->pfsid, pasid,
5475 						info->ats_qdep,
5476 						inv_info->addr_info.addr,
5477 						size, granu);
5478 			break;
5479 		case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
5480 			if (info->ats_enabled)
5481 				qi_flush_dev_iotlb_pasid(iommu, sid,
5482 						info->pfsid, pasid,
5483 						info->ats_qdep,
5484 						inv_info->addr_info.addr,
5485 						size, granu);
5486 			else
5487 				pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5488 			break;
5489 		default:
5490 			dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5491 					    cache_type);
5492 			ret = -EINVAL;
5493 		}
5494 	}
5495 out_unlock:
5496 	spin_unlock(&iommu->lock);
5497 	spin_unlock_irqrestore(&device_domain_lock, flags);
5498 
5499 	return ret;
5500 }
5501 #endif
5502 
5503 static int intel_iommu_map(struct iommu_domain *domain,
5504 			   unsigned long iova, phys_addr_t hpa,
5505 			   size_t size, int iommu_prot, gfp_t gfp)
5506 {
5507 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5508 	u64 max_addr;
5509 	int prot = 0;
5510 	int ret;
5511 
5512 	if (iommu_prot & IOMMU_READ)
5513 		prot |= DMA_PTE_READ;
5514 	if (iommu_prot & IOMMU_WRITE)
5515 		prot |= DMA_PTE_WRITE;
5516 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5517 		prot |= DMA_PTE_SNP;
5518 
5519 	max_addr = iova + size;
5520 	if (dmar_domain->max_addr < max_addr) {
5521 		u64 end;
5522 
5523 		/* check if minimum agaw is sufficient for mapped address */
5524 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5525 		if (end < max_addr) {
5526 			pr_err("%s: iommu width (%d) is not "
5527 			       "sufficient for the mapped address (%llx)\n",
5528 			       __func__, dmar_domain->gaw, max_addr);
5529 			return -EFAULT;
5530 		}
5531 		dmar_domain->max_addr = max_addr;
5532 	}
5533 	/* Round up size to next multiple of PAGE_SIZE, if it and
5534 	   the low bits of hpa would take us onto the next page */
5535 	size = aligned_nrpages(hpa, size);
5536 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5537 				 hpa >> VTD_PAGE_SHIFT, size, prot);
5538 	return ret;
5539 }
5540 
5541 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5542 				unsigned long iova, size_t size,
5543 				struct iommu_iotlb_gather *gather)
5544 {
5545 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5546 	struct page *freelist = NULL;
5547 	unsigned long start_pfn, last_pfn;
5548 	unsigned int npages;
5549 	int iommu_id, level = 0;
5550 
5551 	/* Cope with horrid API which requires us to unmap more than the
5552 	   size argument if it happens to be a large-page mapping. */
5553 	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5554 
5555 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5556 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5557 
5558 	start_pfn = iova >> VTD_PAGE_SHIFT;
5559 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5560 
5561 	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5562 
5563 	npages = last_pfn - start_pfn + 1;
5564 
5565 	for_each_domain_iommu(iommu_id, dmar_domain)
5566 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5567 				      start_pfn, npages, !freelist, 0);
5568 
5569 	dma_free_pagelist(freelist);
5570 
5571 	if (dmar_domain->max_addr == iova + size)
5572 		dmar_domain->max_addr = iova;
5573 
5574 	return size;
5575 }
5576 
5577 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5578 					    dma_addr_t iova)
5579 {
5580 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5581 	struct dma_pte *pte;
5582 	int level = 0;
5583 	u64 phys = 0;
5584 
5585 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5586 	if (pte && dma_pte_present(pte))
5587 		phys = dma_pte_addr(pte) +
5588 			(iova & (BIT_MASK(level_to_offset_bits(level) +
5589 						VTD_PAGE_SHIFT) - 1));
5590 
5591 	return phys;
5592 }
5593 
5594 static inline bool scalable_mode_support(void)
5595 {
5596 	struct dmar_drhd_unit *drhd;
5597 	struct intel_iommu *iommu;
5598 	bool ret = true;
5599 
5600 	rcu_read_lock();
5601 	for_each_active_iommu(iommu, drhd) {
5602 		if (!sm_supported(iommu)) {
5603 			ret = false;
5604 			break;
5605 		}
5606 	}
5607 	rcu_read_unlock();
5608 
5609 	return ret;
5610 }
5611 
5612 static inline bool iommu_pasid_support(void)
5613 {
5614 	struct dmar_drhd_unit *drhd;
5615 	struct intel_iommu *iommu;
5616 	bool ret = true;
5617 
5618 	rcu_read_lock();
5619 	for_each_active_iommu(iommu, drhd) {
5620 		if (!pasid_supported(iommu)) {
5621 			ret = false;
5622 			break;
5623 		}
5624 	}
5625 	rcu_read_unlock();
5626 
5627 	return ret;
5628 }
5629 
5630 static inline bool nested_mode_support(void)
5631 {
5632 	struct dmar_drhd_unit *drhd;
5633 	struct intel_iommu *iommu;
5634 	bool ret = true;
5635 
5636 	rcu_read_lock();
5637 	for_each_active_iommu(iommu, drhd) {
5638 		if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
5639 			ret = false;
5640 			break;
5641 		}
5642 	}
5643 	rcu_read_unlock();
5644 
5645 	return ret;
5646 }
5647 
5648 static bool intel_iommu_capable(enum iommu_cap cap)
5649 {
5650 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
5651 		return domain_update_iommu_snooping(NULL) == 1;
5652 	if (cap == IOMMU_CAP_INTR_REMAP)
5653 		return irq_remapping_enabled == 1;
5654 
5655 	return false;
5656 }
5657 
5658 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5659 {
5660 	struct intel_iommu *iommu;
5661 	u8 bus, devfn;
5662 
5663 	iommu = device_to_iommu(dev, &bus, &devfn);
5664 	if (!iommu)
5665 		return ERR_PTR(-ENODEV);
5666 
5667 	if (translation_pre_enabled(iommu))
5668 		dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5669 
5670 	return &iommu->iommu;
5671 }
5672 
5673 static void intel_iommu_release_device(struct device *dev)
5674 {
5675 	struct intel_iommu *iommu;
5676 	u8 bus, devfn;
5677 
5678 	iommu = device_to_iommu(dev, &bus, &devfn);
5679 	if (!iommu)
5680 		return;
5681 
5682 	dmar_remove_one_dev_info(dev);
5683 
5684 	set_dma_ops(dev, NULL);
5685 }
5686 
5687 static void intel_iommu_probe_finalize(struct device *dev)
5688 {
5689 	struct iommu_domain *domain;
5690 
5691 	domain = iommu_get_domain_for_dev(dev);
5692 	if (device_needs_bounce(dev))
5693 		set_dma_ops(dev, &bounce_dma_ops);
5694 	else if (domain && domain->type == IOMMU_DOMAIN_DMA)
5695 		set_dma_ops(dev, &intel_dma_ops);
5696 	else
5697 		set_dma_ops(dev, NULL);
5698 }
5699 
5700 static void intel_iommu_get_resv_regions(struct device *device,
5701 					 struct list_head *head)
5702 {
5703 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5704 	struct iommu_resv_region *reg;
5705 	struct dmar_rmrr_unit *rmrr;
5706 	struct device *i_dev;
5707 	int i;
5708 
5709 	down_read(&dmar_global_lock);
5710 	for_each_rmrr_units(rmrr) {
5711 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5712 					  i, i_dev) {
5713 			struct iommu_resv_region *resv;
5714 			enum iommu_resv_type type;
5715 			size_t length;
5716 
5717 			if (i_dev != device &&
5718 			    !is_downstream_to_pci_bridge(device, i_dev))
5719 				continue;
5720 
5721 			length = rmrr->end_address - rmrr->base_address + 1;
5722 
5723 			type = device_rmrr_is_relaxable(device) ?
5724 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5725 
5726 			resv = iommu_alloc_resv_region(rmrr->base_address,
5727 						       length, prot, type);
5728 			if (!resv)
5729 				break;
5730 
5731 			list_add_tail(&resv->list, head);
5732 		}
5733 	}
5734 	up_read(&dmar_global_lock);
5735 
5736 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5737 	if (dev_is_pci(device)) {
5738 		struct pci_dev *pdev = to_pci_dev(device);
5739 
5740 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5741 			reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5742 						   IOMMU_RESV_DIRECT_RELAXABLE);
5743 			if (reg)
5744 				list_add_tail(&reg->list, head);
5745 		}
5746 	}
5747 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5748 
5749 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5750 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5751 				      0, IOMMU_RESV_MSI);
5752 	if (!reg)
5753 		return;
5754 	list_add_tail(&reg->list, head);
5755 }
5756 
5757 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5758 {
5759 	struct device_domain_info *info;
5760 	struct context_entry *context;
5761 	struct dmar_domain *domain;
5762 	unsigned long flags;
5763 	u64 ctx_lo;
5764 	int ret;
5765 
5766 	domain = find_domain(dev);
5767 	if (!domain)
5768 		return -EINVAL;
5769 
5770 	spin_lock_irqsave(&device_domain_lock, flags);
5771 	spin_lock(&iommu->lock);
5772 
5773 	ret = -EINVAL;
5774 	info = get_domain_info(dev);
5775 	if (!info || !info->pasid_supported)
5776 		goto out;
5777 
5778 	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5779 	if (WARN_ON(!context))
5780 		goto out;
5781 
5782 	ctx_lo = context[0].lo;
5783 
5784 	if (!(ctx_lo & CONTEXT_PASIDE)) {
5785 		ctx_lo |= CONTEXT_PASIDE;
5786 		context[0].lo = ctx_lo;
5787 		wmb();
5788 		iommu->flush.flush_context(iommu,
5789 					   domain->iommu_did[iommu->seq_id],
5790 					   PCI_DEVID(info->bus, info->devfn),
5791 					   DMA_CCMD_MASK_NOBIT,
5792 					   DMA_CCMD_DEVICE_INVL);
5793 	}
5794 
5795 	/* Enable PASID support in the device, if it wasn't already */
5796 	if (!info->pasid_enabled)
5797 		iommu_enable_dev_iotlb(info);
5798 
5799 	ret = 0;
5800 
5801  out:
5802 	spin_unlock(&iommu->lock);
5803 	spin_unlock_irqrestore(&device_domain_lock, flags);
5804 
5805 	return ret;
5806 }
5807 
5808 static void intel_iommu_apply_resv_region(struct device *dev,
5809 					  struct iommu_domain *domain,
5810 					  struct iommu_resv_region *region)
5811 {
5812 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5813 	unsigned long start, end;
5814 
5815 	start = IOVA_PFN(region->start);
5816 	end   = IOVA_PFN(region->start + region->length - 1);
5817 
5818 	WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5819 }
5820 
5821 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5822 {
5823 	if (dev_is_pci(dev))
5824 		return pci_device_group(dev);
5825 	return generic_device_group(dev);
5826 }
5827 
5828 #ifdef CONFIG_INTEL_IOMMU_SVM
5829 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5830 {
5831 	struct intel_iommu *iommu;
5832 	u8 bus, devfn;
5833 
5834 	if (iommu_dummy(dev)) {
5835 		dev_warn(dev,
5836 			 "No IOMMU translation for device; cannot enable SVM\n");
5837 		return NULL;
5838 	}
5839 
5840 	iommu = device_to_iommu(dev, &bus, &devfn);
5841 	if ((!iommu)) {
5842 		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5843 		return NULL;
5844 	}
5845 
5846 	return iommu;
5847 }
5848 #endif /* CONFIG_INTEL_IOMMU_SVM */
5849 
5850 static int intel_iommu_enable_auxd(struct device *dev)
5851 {
5852 	struct device_domain_info *info;
5853 	struct intel_iommu *iommu;
5854 	unsigned long flags;
5855 	u8 bus, devfn;
5856 	int ret;
5857 
5858 	iommu = device_to_iommu(dev, &bus, &devfn);
5859 	if (!iommu || dmar_disabled)
5860 		return -EINVAL;
5861 
5862 	if (!sm_supported(iommu) || !pasid_supported(iommu))
5863 		return -EINVAL;
5864 
5865 	ret = intel_iommu_enable_pasid(iommu, dev);
5866 	if (ret)
5867 		return -ENODEV;
5868 
5869 	spin_lock_irqsave(&device_domain_lock, flags);
5870 	info = get_domain_info(dev);
5871 	info->auxd_enabled = 1;
5872 	spin_unlock_irqrestore(&device_domain_lock, flags);
5873 
5874 	return 0;
5875 }
5876 
5877 static int intel_iommu_disable_auxd(struct device *dev)
5878 {
5879 	struct device_domain_info *info;
5880 	unsigned long flags;
5881 
5882 	spin_lock_irqsave(&device_domain_lock, flags);
5883 	info = get_domain_info(dev);
5884 	if (!WARN_ON(!info))
5885 		info->auxd_enabled = 0;
5886 	spin_unlock_irqrestore(&device_domain_lock, flags);
5887 
5888 	return 0;
5889 }
5890 
5891 /*
5892  * A PCI express designated vendor specific extended capability is defined
5893  * in the section 3.7 of Intel scalable I/O virtualization technical spec
5894  * for system software and tools to detect endpoint devices supporting the
5895  * Intel scalable IO virtualization without host driver dependency.
5896  *
5897  * Returns the address of the matching extended capability structure within
5898  * the device's PCI configuration space or 0 if the device does not support
5899  * it.
5900  */
5901 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5902 {
5903 	int pos;
5904 	u16 vendor, id;
5905 
5906 	pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5907 	while (pos) {
5908 		pci_read_config_word(pdev, pos + 4, &vendor);
5909 		pci_read_config_word(pdev, pos + 8, &id);
5910 		if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5911 			return pos;
5912 
5913 		pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5914 	}
5915 
5916 	return 0;
5917 }
5918 
5919 static bool
5920 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5921 {
5922 	if (feat == IOMMU_DEV_FEAT_AUX) {
5923 		int ret;
5924 
5925 		if (!dev_is_pci(dev) || dmar_disabled ||
5926 		    !scalable_mode_support() || !iommu_pasid_support())
5927 			return false;
5928 
5929 		ret = pci_pasid_features(to_pci_dev(dev));
5930 		if (ret < 0)
5931 			return false;
5932 
5933 		return !!siov_find_pci_dvsec(to_pci_dev(dev));
5934 	}
5935 
5936 	if (feat == IOMMU_DEV_FEAT_SVA) {
5937 		struct device_domain_info *info = get_domain_info(dev);
5938 
5939 		return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
5940 			info->pasid_supported && info->pri_supported &&
5941 			info->ats_supported;
5942 	}
5943 
5944 	return false;
5945 }
5946 
5947 static int
5948 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5949 {
5950 	if (feat == IOMMU_DEV_FEAT_AUX)
5951 		return intel_iommu_enable_auxd(dev);
5952 
5953 	if (feat == IOMMU_DEV_FEAT_SVA) {
5954 		struct device_domain_info *info = get_domain_info(dev);
5955 
5956 		if (!info)
5957 			return -EINVAL;
5958 
5959 		if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
5960 			return 0;
5961 	}
5962 
5963 	return -ENODEV;
5964 }
5965 
5966 static int
5967 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5968 {
5969 	if (feat == IOMMU_DEV_FEAT_AUX)
5970 		return intel_iommu_disable_auxd(dev);
5971 
5972 	return -ENODEV;
5973 }
5974 
5975 static bool
5976 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5977 {
5978 	struct device_domain_info *info = get_domain_info(dev);
5979 
5980 	if (feat == IOMMU_DEV_FEAT_AUX)
5981 		return scalable_mode_support() && info && info->auxd_enabled;
5982 
5983 	return false;
5984 }
5985 
5986 static int
5987 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5988 {
5989 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5990 
5991 	return dmar_domain->default_pasid > 0 ?
5992 			dmar_domain->default_pasid : -EINVAL;
5993 }
5994 
5995 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5996 					   struct device *dev)
5997 {
5998 	return attach_deferred(dev);
5999 }
6000 
6001 static int
6002 intel_iommu_domain_set_attr(struct iommu_domain *domain,
6003 			    enum iommu_attr attr, void *data)
6004 {
6005 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6006 	unsigned long flags;
6007 	int ret = 0;
6008 
6009 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
6010 		return -EINVAL;
6011 
6012 	switch (attr) {
6013 	case DOMAIN_ATTR_NESTING:
6014 		spin_lock_irqsave(&device_domain_lock, flags);
6015 		if (nested_mode_support() &&
6016 		    list_empty(&dmar_domain->devices)) {
6017 			dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
6018 			dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
6019 		} else {
6020 			ret = -ENODEV;
6021 		}
6022 		spin_unlock_irqrestore(&device_domain_lock, flags);
6023 		break;
6024 	default:
6025 		ret = -EINVAL;
6026 		break;
6027 	}
6028 
6029 	return ret;
6030 }
6031 
6032 /*
6033  * Check that the device does not live on an external facing PCI port that is
6034  * marked as untrusted. Such devices should not be able to apply quirks and
6035  * thus not be able to bypass the IOMMU restrictions.
6036  */
6037 static bool risky_device(struct pci_dev *pdev)
6038 {
6039 	if (pdev->untrusted) {
6040 		pci_info(pdev,
6041 			 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
6042 			 pdev->vendor, pdev->device);
6043 		pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
6044 		return true;
6045 	}
6046 	return false;
6047 }
6048 
6049 const struct iommu_ops intel_iommu_ops = {
6050 	.capable		= intel_iommu_capable,
6051 	.domain_alloc		= intel_iommu_domain_alloc,
6052 	.domain_free		= intel_iommu_domain_free,
6053 	.domain_set_attr	= intel_iommu_domain_set_attr,
6054 	.attach_dev		= intel_iommu_attach_device,
6055 	.detach_dev		= intel_iommu_detach_device,
6056 	.aux_attach_dev		= intel_iommu_aux_attach_device,
6057 	.aux_detach_dev		= intel_iommu_aux_detach_device,
6058 	.aux_get_pasid		= intel_iommu_aux_get_pasid,
6059 	.map			= intel_iommu_map,
6060 	.unmap			= intel_iommu_unmap,
6061 	.iova_to_phys		= intel_iommu_iova_to_phys,
6062 	.probe_device		= intel_iommu_probe_device,
6063 	.probe_finalize		= intel_iommu_probe_finalize,
6064 	.release_device		= intel_iommu_release_device,
6065 	.get_resv_regions	= intel_iommu_get_resv_regions,
6066 	.put_resv_regions	= generic_iommu_put_resv_regions,
6067 	.apply_resv_region	= intel_iommu_apply_resv_region,
6068 	.device_group		= intel_iommu_device_group,
6069 	.dev_has_feat		= intel_iommu_dev_has_feat,
6070 	.dev_feat_enabled	= intel_iommu_dev_feat_enabled,
6071 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
6072 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
6073 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
6074 	.def_domain_type	= device_def_domain_type,
6075 	.pgsize_bitmap		= INTEL_IOMMU_PGSIZES,
6076 #ifdef CONFIG_INTEL_IOMMU_SVM
6077 	.cache_invalidate	= intel_iommu_sva_invalidate,
6078 	.sva_bind_gpasid	= intel_svm_bind_gpasid,
6079 	.sva_unbind_gpasid	= intel_svm_unbind_gpasid,
6080 	.sva_bind		= intel_svm_bind,
6081 	.sva_unbind		= intel_svm_unbind,
6082 	.sva_get_pasid		= intel_svm_get_pasid,
6083 #endif
6084 };
6085 
6086 static void quirk_iommu_igfx(struct pci_dev *dev)
6087 {
6088 	if (risky_device(dev))
6089 		return;
6090 
6091 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
6092 	dmar_map_gfx = 0;
6093 }
6094 
6095 /* G4x/GM45 integrated gfx dmar support is totally busted. */
6096 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
6097 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
6098 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
6099 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
6100 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
6101 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
6102 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
6103 
6104 /* Broadwell igfx malfunctions with dmar */
6105 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6106 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6107 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6108 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6109 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6110 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6111 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6112 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6113 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6114 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6115 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6116 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6117 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6118 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6119 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6120 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6121 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6122 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6123 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6124 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6125 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6126 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6127 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6128 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
6129 
6130 static void quirk_iommu_rwbf(struct pci_dev *dev)
6131 {
6132 	if (risky_device(dev))
6133 		return;
6134 
6135 	/*
6136 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
6137 	 * but needs it. Same seems to hold for the desktop versions.
6138 	 */
6139 	pci_info(dev, "Forcing write-buffer flush capability\n");
6140 	rwbf_quirk = 1;
6141 }
6142 
6143 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6144 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6145 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6146 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6147 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6148 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6150 
6151 #define GGC 0x52
6152 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
6153 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
6154 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
6155 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
6156 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
6157 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
6158 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
6159 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
6160 
6161 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6162 {
6163 	unsigned short ggc;
6164 
6165 	if (risky_device(dev))
6166 		return;
6167 
6168 	if (pci_read_config_word(dev, GGC, &ggc))
6169 		return;
6170 
6171 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
6172 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6173 		dmar_map_gfx = 0;
6174 	} else if (dmar_map_gfx) {
6175 		/* we have to ensure the gfx device is idle before we flush */
6176 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6177 		intel_iommu_strict = 1;
6178        }
6179 }
6180 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6181 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6182 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6183 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6184 
6185 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6186    ISOCH DMAR unit for the Azalia sound device, but not give it any
6187    TLB entries, which causes it to deadlock. Check for that.  We do
6188    this in a function called from init_dmars(), instead of in a PCI
6189    quirk, because we don't want to print the obnoxious "BIOS broken"
6190    message if VT-d is actually disabled.
6191 */
6192 static void __init check_tylersburg_isoch(void)
6193 {
6194 	struct pci_dev *pdev;
6195 	uint32_t vtisochctrl;
6196 
6197 	/* If there's no Azalia in the system anyway, forget it. */
6198 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6199 	if (!pdev)
6200 		return;
6201 
6202 	if (risky_device(pdev)) {
6203 		pci_dev_put(pdev);
6204 		return;
6205 	}
6206 
6207 	pci_dev_put(pdev);
6208 
6209 	/* System Management Registers. Might be hidden, in which case
6210 	   we can't do the sanity check. But that's OK, because the
6211 	   known-broken BIOSes _don't_ actually hide it, so far. */
6212 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6213 	if (!pdev)
6214 		return;
6215 
6216 	if (risky_device(pdev)) {
6217 		pci_dev_put(pdev);
6218 		return;
6219 	}
6220 
6221 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6222 		pci_dev_put(pdev);
6223 		return;
6224 	}
6225 
6226 	pci_dev_put(pdev);
6227 
6228 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6229 	if (vtisochctrl & 1)
6230 		return;
6231 
6232 	/* Drop all bits other than the number of TLB entries */
6233 	vtisochctrl &= 0x1c;
6234 
6235 	/* If we have the recommended number of TLB entries (16), fine. */
6236 	if (vtisochctrl == 0x10)
6237 		return;
6238 
6239 	/* Zero TLB entries? You get to ride the short bus to school. */
6240 	if (!vtisochctrl) {
6241 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6242 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6243 		     dmi_get_system_info(DMI_BIOS_VENDOR),
6244 		     dmi_get_system_info(DMI_BIOS_VERSION),
6245 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
6246 		iommu_identity_mapping |= IDENTMAP_AZALIA;
6247 		return;
6248 	}
6249 
6250 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6251 	       vtisochctrl);
6252 }
6253