xref: /openbmc/linux/drivers/iommu/intel/dmar.c (revision 4daedf7a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 
35 #include "../irq_remapping.h"
36 
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41 	bool			ignore_unhandled;
42 	bool			print_entry;
43 };
44 
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59 
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66 
67 extern const struct iommu_ops intel_iommu_ops;
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			pr_warn("Out of memory when allocating notify_info "
151 				"for %s.\n", pci_name(dev));
152 			if (dmar_dev_scope_status == 0)
153 				dmar_dev_scope_status = -ENOMEM;
154 			return NULL;
155 		}
156 	}
157 
158 	info->event = event;
159 	info->dev = dev;
160 	info->seg = pci_domain_nr(dev->bus);
161 	info->level = level;
162 	if (event == BUS_NOTIFY_ADD_DEVICE) {
163 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164 			level--;
165 			info->path[level].bus = tmp->bus->number;
166 			info->path[level].device = PCI_SLOT(tmp->devfn);
167 			info->path[level].function = PCI_FUNC(tmp->devfn);
168 			if (pci_is_root_bus(tmp->bus))
169 				info->bus = tmp->bus->number;
170 		}
171 	}
172 
173 	return info;
174 }
175 
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178 	if ((void *)info != dmar_pci_notify_info_buf)
179 		kfree(info);
180 }
181 
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183 				struct acpi_dmar_pci_path *path, int count)
184 {
185 	int i;
186 
187 	if (info->bus != bus)
188 		goto fallback;
189 	if (info->level != count)
190 		goto fallback;
191 
192 	for (i = 0; i < count; i++) {
193 		if (path[i].device != info->path[i].device ||
194 		    path[i].function != info->path[i].function)
195 			goto fallback;
196 	}
197 
198 	return true;
199 
200 fallback:
201 
202 	if (count != 1)
203 		return false;
204 
205 	i = info->level - 1;
206 	if (bus              == info->path[i].bus &&
207 	    path[0].device   == info->path[i].device &&
208 	    path[0].function == info->path[i].function) {
209 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210 			bus, path[0].device, path[0].function);
211 		return true;
212 	}
213 
214 	return false;
215 }
216 
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219 			  void *start, void*end, u16 segment,
220 			  struct dmar_dev_scope *devices,
221 			  int devices_cnt)
222 {
223 	int i, level;
224 	struct device *tmp, *dev = &info->dev->dev;
225 	struct acpi_dmar_device_scope *scope;
226 	struct acpi_dmar_pci_path *path;
227 
228 	if (segment != info->seg)
229 		return 0;
230 
231 	for (; start < end; start += scope->length) {
232 		scope = start;
233 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235 			continue;
236 
237 		path = (struct acpi_dmar_pci_path *)(scope + 1);
238 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239 		if (!dmar_match_pci_path(info, scope->bus, path, level))
240 			continue;
241 
242 		/*
243 		 * We expect devices with endpoint scope to have normal PCI
244 		 * headers, and devices with bridge scope to have bridge PCI
245 		 * headers.  However PCI NTB devices may be listed in the
246 		 * DMAR table with bridge scope, even though they have a
247 		 * normal PCI header.  NTB devices are identified by class
248 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249 		 * for this special case.
250 		 */
251 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256 			pr_warn("Device scope type does not match for %s\n",
257 				pci_name(info->dev));
258 			return -EINVAL;
259 		}
260 
261 		for_each_dev_scope(devices, devices_cnt, i, tmp)
262 			if (tmp == NULL) {
263 				devices[i].bus = info->dev->bus->number;
264 				devices[i].devfn = info->dev->devfn;
265 				rcu_assign_pointer(devices[i].dev,
266 						   get_device(dev));
267 				return 1;
268 			}
269 		BUG_ON(i >= devices_cnt);
270 	}
271 
272 	return 0;
273 }
274 
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276 			  struct dmar_dev_scope *devices, int count)
277 {
278 	int index;
279 	struct device *tmp;
280 
281 	if (info->seg != segment)
282 		return 0;
283 
284 	for_each_active_dev_scope(devices, count, index, tmp)
285 		if (tmp == &info->dev->dev) {
286 			RCU_INIT_POINTER(devices[index].dev, NULL);
287 			synchronize_rcu();
288 			put_device(tmp);
289 			return 1;
290 		}
291 
292 	return 0;
293 }
294 
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297 	int ret = 0;
298 	struct dmar_drhd_unit *dmaru;
299 	struct acpi_dmar_hardware_unit *drhd;
300 
301 	for_each_drhd_unit(dmaru) {
302 		if (dmaru->include_all)
303 			continue;
304 
305 		drhd = container_of(dmaru->hdr,
306 				    struct acpi_dmar_hardware_unit, header);
307 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308 				((void *)drhd) + drhd->header.length,
309 				dmaru->segment,
310 				dmaru->devices, dmaru->devices_cnt);
311 		if (ret)
312 			break;
313 	}
314 	if (ret >= 0)
315 		ret = dmar_iommu_notify_scope_dev(info);
316 	if (ret < 0 && dmar_dev_scope_status == 0)
317 		dmar_dev_scope_status = ret;
318 
319 	return ret;
320 }
321 
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324 	struct dmar_drhd_unit *dmaru;
325 
326 	for_each_drhd_unit(dmaru)
327 		if (dmar_remove_dev_scope(info, dmaru->segment,
328 			dmaru->devices, dmaru->devices_cnt))
329 			break;
330 	dmar_iommu_notify_scope_dev(info);
331 }
332 
333 static int dmar_pci_bus_notifier(struct notifier_block *nb,
334 				 unsigned long action, void *data)
335 {
336 	struct pci_dev *pdev = to_pci_dev(data);
337 	struct dmar_pci_notify_info *info;
338 
339 	/* Only care about add/remove events for physical functions.
340 	 * For VFs we actually do the lookup based on the corresponding
341 	 * PF in device_to_iommu() anyway. */
342 	if (pdev->is_virtfn)
343 		return NOTIFY_DONE;
344 	if (action != BUS_NOTIFY_ADD_DEVICE &&
345 	    action != BUS_NOTIFY_REMOVED_DEVICE)
346 		return NOTIFY_DONE;
347 
348 	info = dmar_alloc_pci_notify_info(pdev, action);
349 	if (!info)
350 		return NOTIFY_DONE;
351 
352 	down_write(&dmar_global_lock);
353 	if (action == BUS_NOTIFY_ADD_DEVICE)
354 		dmar_pci_bus_add_dev(info);
355 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
356 		dmar_pci_bus_del_dev(info);
357 	up_write(&dmar_global_lock);
358 
359 	dmar_free_pci_notify_info(info);
360 
361 	return NOTIFY_OK;
362 }
363 
364 static struct notifier_block dmar_pci_bus_nb = {
365 	.notifier_call = dmar_pci_bus_notifier,
366 	.priority = INT_MIN,
367 };
368 
369 static struct dmar_drhd_unit *
370 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
371 {
372 	struct dmar_drhd_unit *dmaru;
373 
374 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
375 				dmar_rcu_check())
376 		if (dmaru->segment == drhd->segment &&
377 		    dmaru->reg_base_addr == drhd->address)
378 			return dmaru;
379 
380 	return NULL;
381 }
382 
383 /**
384  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
385  * structure which uniquely represent one DMA remapping hardware unit
386  * present in the platform
387  */
388 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
389 {
390 	struct acpi_dmar_hardware_unit *drhd;
391 	struct dmar_drhd_unit *dmaru;
392 	int ret;
393 
394 	drhd = (struct acpi_dmar_hardware_unit *)header;
395 	dmaru = dmar_find_dmaru(drhd);
396 	if (dmaru)
397 		goto out;
398 
399 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
400 	if (!dmaru)
401 		return -ENOMEM;
402 
403 	/*
404 	 * If header is allocated from slab by ACPI _DSM method, we need to
405 	 * copy the content because the memory buffer will be freed on return.
406 	 */
407 	dmaru->hdr = (void *)(dmaru + 1);
408 	memcpy(dmaru->hdr, header, header->length);
409 	dmaru->reg_base_addr = drhd->address;
410 	dmaru->segment = drhd->segment;
411 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
412 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
413 					      ((void *)drhd) + drhd->header.length,
414 					      &dmaru->devices_cnt);
415 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
416 		kfree(dmaru);
417 		return -ENOMEM;
418 	}
419 
420 	ret = alloc_iommu(dmaru);
421 	if (ret) {
422 		dmar_free_dev_scope(&dmaru->devices,
423 				    &dmaru->devices_cnt);
424 		kfree(dmaru);
425 		return ret;
426 	}
427 	dmar_register_drhd_unit(dmaru);
428 
429 out:
430 	if (arg)
431 		(*(int *)arg)++;
432 
433 	return 0;
434 }
435 
436 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
437 {
438 	if (dmaru->devices && dmaru->devices_cnt)
439 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
440 	if (dmaru->iommu)
441 		free_iommu(dmaru->iommu);
442 	kfree(dmaru);
443 }
444 
445 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
446 				      void *arg)
447 {
448 	struct acpi_dmar_andd *andd = (void *)header;
449 
450 	/* Check for NUL termination within the designated length */
451 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
452 		pr_warn(FW_BUG
453 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
454 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
455 			   dmi_get_system_info(DMI_BIOS_VENDOR),
456 			   dmi_get_system_info(DMI_BIOS_VERSION),
457 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
458 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
459 		return -EINVAL;
460 	}
461 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
462 		andd->device_name);
463 
464 	return 0;
465 }
466 
467 #ifdef CONFIG_ACPI_NUMA
468 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
469 {
470 	struct acpi_dmar_rhsa *rhsa;
471 	struct dmar_drhd_unit *drhd;
472 
473 	rhsa = (struct acpi_dmar_rhsa *)header;
474 	for_each_drhd_unit(drhd) {
475 		if (drhd->reg_base_addr == rhsa->base_address) {
476 			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
477 
478 			if (!node_online(node))
479 				node = NUMA_NO_NODE;
480 			drhd->iommu->node = node;
481 			return 0;
482 		}
483 	}
484 	pr_warn(FW_BUG
485 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
486 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
487 		rhsa->base_address,
488 		dmi_get_system_info(DMI_BIOS_VENDOR),
489 		dmi_get_system_info(DMI_BIOS_VERSION),
490 		dmi_get_system_info(DMI_PRODUCT_VERSION));
491 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
492 
493 	return 0;
494 }
495 #else
496 #define	dmar_parse_one_rhsa		dmar_res_noop
497 #endif
498 
499 static void
500 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
501 {
502 	struct acpi_dmar_hardware_unit *drhd;
503 	struct acpi_dmar_reserved_memory *rmrr;
504 	struct acpi_dmar_atsr *atsr;
505 	struct acpi_dmar_rhsa *rhsa;
506 
507 	switch (header->type) {
508 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
509 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
510 				    header);
511 		pr_info("DRHD base: %#016Lx flags: %#x\n",
512 			(unsigned long long)drhd->address, drhd->flags);
513 		break;
514 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
515 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
516 				    header);
517 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
518 			(unsigned long long)rmrr->base_address,
519 			(unsigned long long)rmrr->end_address);
520 		break;
521 	case ACPI_DMAR_TYPE_ROOT_ATS:
522 		atsr = container_of(header, struct acpi_dmar_atsr, header);
523 		pr_info("ATSR flags: %#x\n", atsr->flags);
524 		break;
525 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
526 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
527 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
528 		       (unsigned long long)rhsa->base_address,
529 		       rhsa->proximity_domain);
530 		break;
531 	case ACPI_DMAR_TYPE_NAMESPACE:
532 		/* We don't print this here because we need to sanity-check
533 		   it first. So print it in dmar_parse_one_andd() instead. */
534 		break;
535 	}
536 }
537 
538 /**
539  * dmar_table_detect - checks to see if the platform supports DMAR devices
540  */
541 static int __init dmar_table_detect(void)
542 {
543 	acpi_status status = AE_OK;
544 
545 	/* if we could find DMAR table, then there are DMAR devices */
546 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
547 
548 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
549 		pr_warn("Unable to map DMAR\n");
550 		status = AE_NOT_FOUND;
551 	}
552 
553 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
554 }
555 
556 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
557 				       size_t len, struct dmar_res_callback *cb)
558 {
559 	struct acpi_dmar_header *iter, *next;
560 	struct acpi_dmar_header *end = ((void *)start) + len;
561 
562 	for (iter = start; iter < end; iter = next) {
563 		next = (void *)iter + iter->length;
564 		if (iter->length == 0) {
565 			/* Avoid looping forever on bad ACPI tables */
566 			pr_debug(FW_BUG "Invalid 0-length structure\n");
567 			break;
568 		} else if (next > end) {
569 			/* Avoid passing table end */
570 			pr_warn(FW_BUG "Record passes table end\n");
571 			return -EINVAL;
572 		}
573 
574 		if (cb->print_entry)
575 			dmar_table_print_dmar_entry(iter);
576 
577 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
578 			/* continue for forward compatibility */
579 			pr_debug("Unknown DMAR structure type %d\n",
580 				 iter->type);
581 		} else if (cb->cb[iter->type]) {
582 			int ret;
583 
584 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
585 			if (ret)
586 				return ret;
587 		} else if (!cb->ignore_unhandled) {
588 			pr_warn("No handler for DMAR structure type %d\n",
589 				iter->type);
590 			return -EINVAL;
591 		}
592 	}
593 
594 	return 0;
595 }
596 
597 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
598 				       struct dmar_res_callback *cb)
599 {
600 	return dmar_walk_remapping_entries((void *)(dmar + 1),
601 			dmar->header.length - sizeof(*dmar), cb);
602 }
603 
604 /**
605  * parse_dmar_table - parses the DMA reporting table
606  */
607 static int __init
608 parse_dmar_table(void)
609 {
610 	struct acpi_table_dmar *dmar;
611 	int drhd_count = 0;
612 	int ret;
613 	struct dmar_res_callback cb = {
614 		.print_entry = true,
615 		.ignore_unhandled = true,
616 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
617 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
618 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
619 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
620 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
621 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
622 	};
623 
624 	/*
625 	 * Do it again, earlier dmar_tbl mapping could be mapped with
626 	 * fixed map.
627 	 */
628 	dmar_table_detect();
629 
630 	/*
631 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
632 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
633 	 */
634 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
635 
636 	dmar = (struct acpi_table_dmar *)dmar_tbl;
637 	if (!dmar)
638 		return -ENODEV;
639 
640 	if (dmar->width < PAGE_SHIFT - 1) {
641 		pr_warn("Invalid DMAR haw\n");
642 		return -EINVAL;
643 	}
644 
645 	pr_info("Host address width %d\n", dmar->width + 1);
646 	ret = dmar_walk_dmar_table(dmar, &cb);
647 	if (ret == 0 && drhd_count == 0)
648 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
649 
650 	return ret;
651 }
652 
653 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
654 				 int cnt, struct pci_dev *dev)
655 {
656 	int index;
657 	struct device *tmp;
658 
659 	while (dev) {
660 		for_each_active_dev_scope(devices, cnt, index, tmp)
661 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
662 				return 1;
663 
664 		/* Check our parent */
665 		dev = dev->bus->self;
666 	}
667 
668 	return 0;
669 }
670 
671 struct dmar_drhd_unit *
672 dmar_find_matched_drhd_unit(struct pci_dev *dev)
673 {
674 	struct dmar_drhd_unit *dmaru;
675 	struct acpi_dmar_hardware_unit *drhd;
676 
677 	dev = pci_physfn(dev);
678 
679 	rcu_read_lock();
680 	for_each_drhd_unit(dmaru) {
681 		drhd = container_of(dmaru->hdr,
682 				    struct acpi_dmar_hardware_unit,
683 				    header);
684 
685 		if (dmaru->include_all &&
686 		    drhd->segment == pci_domain_nr(dev->bus))
687 			goto out;
688 
689 		if (dmar_pci_device_match(dmaru->devices,
690 					  dmaru->devices_cnt, dev))
691 			goto out;
692 	}
693 	dmaru = NULL;
694 out:
695 	rcu_read_unlock();
696 
697 	return dmaru;
698 }
699 
700 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
701 					      struct acpi_device *adev)
702 {
703 	struct dmar_drhd_unit *dmaru;
704 	struct acpi_dmar_hardware_unit *drhd;
705 	struct acpi_dmar_device_scope *scope;
706 	struct device *tmp;
707 	int i;
708 	struct acpi_dmar_pci_path *path;
709 
710 	for_each_drhd_unit(dmaru) {
711 		drhd = container_of(dmaru->hdr,
712 				    struct acpi_dmar_hardware_unit,
713 				    header);
714 
715 		for (scope = (void *)(drhd + 1);
716 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
717 		     scope = ((void *)scope) + scope->length) {
718 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
719 				continue;
720 			if (scope->enumeration_id != device_number)
721 				continue;
722 
723 			path = (void *)(scope + 1);
724 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
725 				dev_name(&adev->dev), dmaru->reg_base_addr,
726 				scope->bus, path->device, path->function);
727 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
728 				if (tmp == NULL) {
729 					dmaru->devices[i].bus = scope->bus;
730 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
731 									    path->function);
732 					rcu_assign_pointer(dmaru->devices[i].dev,
733 							   get_device(&adev->dev));
734 					return;
735 				}
736 			BUG_ON(i >= dmaru->devices_cnt);
737 		}
738 	}
739 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
740 		device_number, dev_name(&adev->dev));
741 }
742 
743 static int __init dmar_acpi_dev_scope_init(void)
744 {
745 	struct acpi_dmar_andd *andd;
746 
747 	if (dmar_tbl == NULL)
748 		return -ENODEV;
749 
750 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
751 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
752 	     andd = ((void *)andd) + andd->header.length) {
753 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
754 			acpi_handle h;
755 			struct acpi_device *adev;
756 
757 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
758 							  andd->device_name,
759 							  &h))) {
760 				pr_err("Failed to find handle for ACPI object %s\n",
761 				       andd->device_name);
762 				continue;
763 			}
764 			if (acpi_bus_get_device(h, &adev)) {
765 				pr_err("Failed to get device for ACPI object %s\n",
766 				       andd->device_name);
767 				continue;
768 			}
769 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
770 		}
771 	}
772 	return 0;
773 }
774 
775 int __init dmar_dev_scope_init(void)
776 {
777 	struct pci_dev *dev = NULL;
778 	struct dmar_pci_notify_info *info;
779 
780 	if (dmar_dev_scope_status != 1)
781 		return dmar_dev_scope_status;
782 
783 	if (list_empty(&dmar_drhd_units)) {
784 		dmar_dev_scope_status = -ENODEV;
785 	} else {
786 		dmar_dev_scope_status = 0;
787 
788 		dmar_acpi_dev_scope_init();
789 
790 		for_each_pci_dev(dev) {
791 			if (dev->is_virtfn)
792 				continue;
793 
794 			info = dmar_alloc_pci_notify_info(dev,
795 					BUS_NOTIFY_ADD_DEVICE);
796 			if (!info) {
797 				return dmar_dev_scope_status;
798 			} else {
799 				dmar_pci_bus_add_dev(info);
800 				dmar_free_pci_notify_info(info);
801 			}
802 		}
803 	}
804 
805 	return dmar_dev_scope_status;
806 }
807 
808 void __init dmar_register_bus_notifier(void)
809 {
810 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
811 }
812 
813 
814 int __init dmar_table_init(void)
815 {
816 	static int dmar_table_initialized;
817 	int ret;
818 
819 	if (dmar_table_initialized == 0) {
820 		ret = parse_dmar_table();
821 		if (ret < 0) {
822 			if (ret != -ENODEV)
823 				pr_info("Parse DMAR table failure.\n");
824 		} else  if (list_empty(&dmar_drhd_units)) {
825 			pr_info("No DMAR devices found\n");
826 			ret = -ENODEV;
827 		}
828 
829 		if (ret < 0)
830 			dmar_table_initialized = ret;
831 		else
832 			dmar_table_initialized = 1;
833 	}
834 
835 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
836 }
837 
838 static void warn_invalid_dmar(u64 addr, const char *message)
839 {
840 	pr_warn_once(FW_BUG
841 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
842 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
843 		addr, message,
844 		dmi_get_system_info(DMI_BIOS_VENDOR),
845 		dmi_get_system_info(DMI_BIOS_VERSION),
846 		dmi_get_system_info(DMI_PRODUCT_VERSION));
847 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
848 }
849 
850 static int __ref
851 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
852 {
853 	struct acpi_dmar_hardware_unit *drhd;
854 	void __iomem *addr;
855 	u64 cap, ecap;
856 
857 	drhd = (void *)entry;
858 	if (!drhd->address) {
859 		warn_invalid_dmar(0, "");
860 		return -EINVAL;
861 	}
862 
863 	if (arg)
864 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
865 	else
866 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
867 	if (!addr) {
868 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
869 		return -EINVAL;
870 	}
871 
872 	cap = dmar_readq(addr + DMAR_CAP_REG);
873 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
874 
875 	if (arg)
876 		iounmap(addr);
877 	else
878 		early_iounmap(addr, VTD_PAGE_SIZE);
879 
880 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
881 		warn_invalid_dmar(drhd->address, " returns all ones");
882 		return -EINVAL;
883 	}
884 
885 	return 0;
886 }
887 
888 int __init detect_intel_iommu(void)
889 {
890 	int ret;
891 	struct dmar_res_callback validate_drhd_cb = {
892 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
893 		.ignore_unhandled = true,
894 	};
895 
896 	down_write(&dmar_global_lock);
897 	ret = dmar_table_detect();
898 	if (!ret)
899 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
900 					   &validate_drhd_cb);
901 	if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) {
902 		iommu_detected = 1;
903 		/* Make sure ACS will be enabled */
904 		pci_request_acs();
905 	}
906 
907 #ifdef CONFIG_X86
908 	if (!ret) {
909 		x86_init.iommu.iommu_init = intel_iommu_init;
910 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
911 	}
912 
913 #endif
914 
915 	if (dmar_tbl) {
916 		acpi_put_table(dmar_tbl);
917 		dmar_tbl = NULL;
918 	}
919 	up_write(&dmar_global_lock);
920 
921 	return ret ? ret : 1;
922 }
923 
924 static void unmap_iommu(struct intel_iommu *iommu)
925 {
926 	iounmap(iommu->reg);
927 	release_mem_region(iommu->reg_phys, iommu->reg_size);
928 }
929 
930 /**
931  * map_iommu: map the iommu's registers
932  * @iommu: the iommu to map
933  * @phys_addr: the physical address of the base resgister
934  *
935  * Memory map the iommu's registers.  Start w/ a single page, and
936  * possibly expand if that turns out to be insufficent.
937  */
938 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
939 {
940 	int map_size, err=0;
941 
942 	iommu->reg_phys = phys_addr;
943 	iommu->reg_size = VTD_PAGE_SIZE;
944 
945 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
946 		pr_err("Can't reserve memory\n");
947 		err = -EBUSY;
948 		goto out;
949 	}
950 
951 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
952 	if (!iommu->reg) {
953 		pr_err("Can't map the region\n");
954 		err = -ENOMEM;
955 		goto release;
956 	}
957 
958 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
959 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
960 
961 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
962 		err = -EINVAL;
963 		warn_invalid_dmar(phys_addr, " returns all ones");
964 		goto unmap;
965 	}
966 	iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
967 
968 	/* the registers might be more than one page */
969 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
970 			 cap_max_fault_reg_offset(iommu->cap));
971 	map_size = VTD_PAGE_ALIGN(map_size);
972 	if (map_size > iommu->reg_size) {
973 		iounmap(iommu->reg);
974 		release_mem_region(iommu->reg_phys, iommu->reg_size);
975 		iommu->reg_size = map_size;
976 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
977 					iommu->name)) {
978 			pr_err("Can't reserve memory\n");
979 			err = -EBUSY;
980 			goto out;
981 		}
982 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
983 		if (!iommu->reg) {
984 			pr_err("Can't map the region\n");
985 			err = -ENOMEM;
986 			goto release;
987 		}
988 	}
989 	err = 0;
990 	goto out;
991 
992 unmap:
993 	iounmap(iommu->reg);
994 release:
995 	release_mem_region(iommu->reg_phys, iommu->reg_size);
996 out:
997 	return err;
998 }
999 
1000 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1001 {
1002 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1003 					    DMAR_UNITS_SUPPORTED);
1004 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1005 		iommu->seq_id = -1;
1006 	} else {
1007 		set_bit(iommu->seq_id, dmar_seq_ids);
1008 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1009 	}
1010 
1011 	return iommu->seq_id;
1012 }
1013 
1014 static void dmar_free_seq_id(struct intel_iommu *iommu)
1015 {
1016 	if (iommu->seq_id >= 0) {
1017 		clear_bit(iommu->seq_id, dmar_seq_ids);
1018 		iommu->seq_id = -1;
1019 	}
1020 }
1021 
1022 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1023 {
1024 	struct intel_iommu *iommu;
1025 	u32 ver, sts;
1026 	int agaw = 0;
1027 	int msagaw = 0;
1028 	int err;
1029 
1030 	if (!drhd->reg_base_addr) {
1031 		warn_invalid_dmar(0, "");
1032 		return -EINVAL;
1033 	}
1034 
1035 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1036 	if (!iommu)
1037 		return -ENOMEM;
1038 
1039 	if (dmar_alloc_seq_id(iommu) < 0) {
1040 		pr_err("Failed to allocate seq_id\n");
1041 		err = -ENOSPC;
1042 		goto error;
1043 	}
1044 
1045 	err = map_iommu(iommu, drhd->reg_base_addr);
1046 	if (err) {
1047 		pr_err("Failed to map %s\n", iommu->name);
1048 		goto error_free_seq_id;
1049 	}
1050 
1051 	err = -EINVAL;
1052 	agaw = iommu_calculate_agaw(iommu);
1053 	if (agaw < 0) {
1054 		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1055 			iommu->seq_id);
1056 		goto err_unmap;
1057 	}
1058 	msagaw = iommu_calculate_max_sagaw(iommu);
1059 	if (msagaw < 0) {
1060 		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1061 			iommu->seq_id);
1062 		goto err_unmap;
1063 	}
1064 	iommu->agaw = agaw;
1065 	iommu->msagaw = msagaw;
1066 	iommu->segment = drhd->segment;
1067 
1068 	iommu->node = NUMA_NO_NODE;
1069 
1070 	ver = readl(iommu->reg + DMAR_VER_REG);
1071 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1072 		iommu->name,
1073 		(unsigned long long)drhd->reg_base_addr,
1074 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1075 		(unsigned long long)iommu->cap,
1076 		(unsigned long long)iommu->ecap);
1077 
1078 	/* Reflect status in gcmd */
1079 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1080 	if (sts & DMA_GSTS_IRES)
1081 		iommu->gcmd |= DMA_GCMD_IRE;
1082 	if (sts & DMA_GSTS_TES)
1083 		iommu->gcmd |= DMA_GCMD_TE;
1084 	if (sts & DMA_GSTS_QIES)
1085 		iommu->gcmd |= DMA_GCMD_QIE;
1086 
1087 	raw_spin_lock_init(&iommu->register_lock);
1088 
1089 	if (intel_iommu_enabled) {
1090 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1091 					     intel_iommu_groups,
1092 					     "%s", iommu->name);
1093 		if (err)
1094 			goto err_unmap;
1095 
1096 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1097 
1098 		err = iommu_device_register(&iommu->iommu);
1099 		if (err)
1100 			goto err_unmap;
1101 	}
1102 
1103 	drhd->iommu = iommu;
1104 
1105 	return 0;
1106 
1107 err_unmap:
1108 	unmap_iommu(iommu);
1109 error_free_seq_id:
1110 	dmar_free_seq_id(iommu);
1111 error:
1112 	kfree(iommu);
1113 	return err;
1114 }
1115 
1116 static void free_iommu(struct intel_iommu *iommu)
1117 {
1118 	if (intel_iommu_enabled) {
1119 		iommu_device_unregister(&iommu->iommu);
1120 		iommu_device_sysfs_remove(&iommu->iommu);
1121 	}
1122 
1123 	if (iommu->irq) {
1124 		if (iommu->pr_irq) {
1125 			free_irq(iommu->pr_irq, iommu);
1126 			dmar_free_hwirq(iommu->pr_irq);
1127 			iommu->pr_irq = 0;
1128 		}
1129 		free_irq(iommu->irq, iommu);
1130 		dmar_free_hwirq(iommu->irq);
1131 		iommu->irq = 0;
1132 	}
1133 
1134 	if (iommu->qi) {
1135 		free_page((unsigned long)iommu->qi->desc);
1136 		kfree(iommu->qi->desc_status);
1137 		kfree(iommu->qi);
1138 	}
1139 
1140 	if (iommu->reg)
1141 		unmap_iommu(iommu);
1142 
1143 	dmar_free_seq_id(iommu);
1144 	kfree(iommu);
1145 }
1146 
1147 /*
1148  * Reclaim all the submitted descriptors which have completed its work.
1149  */
1150 static inline void reclaim_free_desc(struct q_inval *qi)
1151 {
1152 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1153 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1154 		qi->desc_status[qi->free_tail] = QI_FREE;
1155 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1156 		qi->free_cnt++;
1157 	}
1158 }
1159 
1160 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1161 {
1162 	u32 fault;
1163 	int head, tail;
1164 	struct q_inval *qi = iommu->qi;
1165 	int shift = qi_shift(iommu);
1166 
1167 	if (qi->desc_status[wait_index] == QI_ABORT)
1168 		return -EAGAIN;
1169 
1170 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1171 
1172 	/*
1173 	 * If IQE happens, the head points to the descriptor associated
1174 	 * with the error. No new descriptors are fetched until the IQE
1175 	 * is cleared.
1176 	 */
1177 	if (fault & DMA_FSTS_IQE) {
1178 		head = readl(iommu->reg + DMAR_IQH_REG);
1179 		if ((head >> shift) == index) {
1180 			struct qi_desc *desc = qi->desc + head;
1181 
1182 			/*
1183 			 * desc->qw2 and desc->qw3 are either reserved or
1184 			 * used by software as private data. We won't print
1185 			 * out these two qw's for security consideration.
1186 			 */
1187 			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1188 			       (unsigned long long)desc->qw0,
1189 			       (unsigned long long)desc->qw1);
1190 			memcpy(desc, qi->desc + (wait_index << shift),
1191 			       1 << shift);
1192 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1193 			return -EINVAL;
1194 		}
1195 	}
1196 
1197 	/*
1198 	 * If ITE happens, all pending wait_desc commands are aborted.
1199 	 * No new descriptors are fetched until the ITE is cleared.
1200 	 */
1201 	if (fault & DMA_FSTS_ITE) {
1202 		head = readl(iommu->reg + DMAR_IQH_REG);
1203 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1204 		head |= 1;
1205 		tail = readl(iommu->reg + DMAR_IQT_REG);
1206 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1207 
1208 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1209 
1210 		do {
1211 			if (qi->desc_status[head] == QI_IN_USE)
1212 				qi->desc_status[head] = QI_ABORT;
1213 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1214 		} while (head != tail);
1215 
1216 		if (qi->desc_status[wait_index] == QI_ABORT)
1217 			return -EAGAIN;
1218 	}
1219 
1220 	if (fault & DMA_FSTS_ICE)
1221 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1222 
1223 	return 0;
1224 }
1225 
1226 /*
1227  * Function to submit invalidation descriptors of all types to the queued
1228  * invalidation interface(QI). Multiple descriptors can be submitted at a
1229  * time, a wait descriptor will be appended to each submission to ensure
1230  * hardware has completed the invalidation before return. Wait descriptors
1231  * can be part of the submission but it will not be polled for completion.
1232  */
1233 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1234 		   unsigned int count, unsigned long options)
1235 {
1236 	struct q_inval *qi = iommu->qi;
1237 	struct qi_desc wait_desc;
1238 	int wait_index, index;
1239 	unsigned long flags;
1240 	int offset, shift;
1241 	int rc, i;
1242 
1243 	if (!qi)
1244 		return 0;
1245 
1246 restart:
1247 	rc = 0;
1248 
1249 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1250 	/*
1251 	 * Check if we have enough empty slots in the queue to submit,
1252 	 * the calculation is based on:
1253 	 * # of desc + 1 wait desc + 1 space between head and tail
1254 	 */
1255 	while (qi->free_cnt < count + 2) {
1256 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1257 		cpu_relax();
1258 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1259 	}
1260 
1261 	index = qi->free_head;
1262 	wait_index = (index + count) % QI_LENGTH;
1263 	shift = qi_shift(iommu);
1264 
1265 	for (i = 0; i < count; i++) {
1266 		offset = ((index + i) % QI_LENGTH) << shift;
1267 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1268 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1269 	}
1270 	qi->desc_status[wait_index] = QI_IN_USE;
1271 
1272 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1273 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1274 	if (options & QI_OPT_WAIT_DRAIN)
1275 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1276 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1277 	wait_desc.qw2 = 0;
1278 	wait_desc.qw3 = 0;
1279 
1280 	offset = wait_index << shift;
1281 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1282 
1283 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1284 	qi->free_cnt -= count + 1;
1285 
1286 	/*
1287 	 * update the HW tail register indicating the presence of
1288 	 * new descriptors.
1289 	 */
1290 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1291 
1292 	while (qi->desc_status[wait_index] != QI_DONE) {
1293 		/*
1294 		 * We will leave the interrupts disabled, to prevent interrupt
1295 		 * context to queue another cmd while a cmd is already submitted
1296 		 * and waiting for completion on this cpu. This is to avoid
1297 		 * a deadlock where the interrupt context can wait indefinitely
1298 		 * for free slots in the queue.
1299 		 */
1300 		rc = qi_check_fault(iommu, index, wait_index);
1301 		if (rc)
1302 			break;
1303 
1304 		raw_spin_unlock(&qi->q_lock);
1305 		cpu_relax();
1306 		raw_spin_lock(&qi->q_lock);
1307 	}
1308 
1309 	for (i = 0; i < count; i++)
1310 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1311 
1312 	reclaim_free_desc(qi);
1313 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1314 
1315 	if (rc == -EAGAIN)
1316 		goto restart;
1317 
1318 	return rc;
1319 }
1320 
1321 /*
1322  * Flush the global interrupt entry cache.
1323  */
1324 void qi_global_iec(struct intel_iommu *iommu)
1325 {
1326 	struct qi_desc desc;
1327 
1328 	desc.qw0 = QI_IEC_TYPE;
1329 	desc.qw1 = 0;
1330 	desc.qw2 = 0;
1331 	desc.qw3 = 0;
1332 
1333 	/* should never fail */
1334 	qi_submit_sync(iommu, &desc, 1, 0);
1335 }
1336 
1337 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1338 		      u64 type)
1339 {
1340 	struct qi_desc desc;
1341 
1342 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1343 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1344 	desc.qw1 = 0;
1345 	desc.qw2 = 0;
1346 	desc.qw3 = 0;
1347 
1348 	qi_submit_sync(iommu, &desc, 1, 0);
1349 }
1350 
1351 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1352 		    unsigned int size_order, u64 type)
1353 {
1354 	u8 dw = 0, dr = 0;
1355 
1356 	struct qi_desc desc;
1357 	int ih = 0;
1358 
1359 	if (cap_write_drain(iommu->cap))
1360 		dw = 1;
1361 
1362 	if (cap_read_drain(iommu->cap))
1363 		dr = 1;
1364 
1365 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1366 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1367 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1368 		| QI_IOTLB_AM(size_order);
1369 	desc.qw2 = 0;
1370 	desc.qw3 = 0;
1371 
1372 	qi_submit_sync(iommu, &desc, 1, 0);
1373 }
1374 
1375 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1376 			u16 qdep, u64 addr, unsigned mask)
1377 {
1378 	struct qi_desc desc;
1379 
1380 	if (mask) {
1381 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1382 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1383 	} else
1384 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1385 
1386 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1387 		qdep = 0;
1388 
1389 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1390 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1391 	desc.qw2 = 0;
1392 	desc.qw3 = 0;
1393 
1394 	qi_submit_sync(iommu, &desc, 1, 0);
1395 }
1396 
1397 /* PASID-based IOTLB invalidation */
1398 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1399 		     unsigned long npages, bool ih)
1400 {
1401 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1402 
1403 	/*
1404 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1405 	 * a positive value for Page-selective-within-PASID invalidation.
1406 	 * 0 is not a valid input.
1407 	 */
1408 	if (WARN_ON(!npages)) {
1409 		pr_err("Invalid input npages = %ld\n", npages);
1410 		return;
1411 	}
1412 
1413 	if (npages == -1) {
1414 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1415 				QI_EIOTLB_DID(did) |
1416 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1417 				QI_EIOTLB_TYPE;
1418 		desc.qw1 = 0;
1419 	} else {
1420 		int mask = ilog2(__roundup_pow_of_two(npages));
1421 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1422 
1423 		if (WARN_ON_ONCE(!ALIGN(addr, align)))
1424 			addr &= ~(align - 1);
1425 
1426 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1427 				QI_EIOTLB_DID(did) |
1428 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1429 				QI_EIOTLB_TYPE;
1430 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1431 				QI_EIOTLB_IH(ih) |
1432 				QI_EIOTLB_AM(mask);
1433 	}
1434 
1435 	qi_submit_sync(iommu, &desc, 1, 0);
1436 }
1437 
1438 /* PASID-based device IOTLB Invalidate */
1439 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1440 			      u32 pasid,  u16 qdep, u64 addr,
1441 			      unsigned int size_order, u64 granu)
1442 {
1443 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1444 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1445 
1446 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1447 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1448 		QI_DEV_IOTLB_PFSID(pfsid);
1449 	desc.qw1 = QI_DEV_EIOTLB_GLOB(granu);
1450 
1451 	/*
1452 	 * If S bit is 0, we only flush a single page. If S bit is set,
1453 	 * The least significant zero bit indicates the invalidation address
1454 	 * range. VT-d spec 6.5.2.6.
1455 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1456 	 * size order = 0 is PAGE_SIZE 4KB
1457 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1458 	 * ECAP.
1459 	 */
1460 	desc.qw1 |= addr & ~mask;
1461 	if (size_order)
1462 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1463 
1464 	qi_submit_sync(iommu, &desc, 1, 0);
1465 }
1466 
1467 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1468 			  u64 granu, int pasid)
1469 {
1470 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1471 
1472 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1473 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1474 	qi_submit_sync(iommu, &desc, 1, 0);
1475 }
1476 
1477 /*
1478  * Disable Queued Invalidation interface.
1479  */
1480 void dmar_disable_qi(struct intel_iommu *iommu)
1481 {
1482 	unsigned long flags;
1483 	u32 sts;
1484 	cycles_t start_time = get_cycles();
1485 
1486 	if (!ecap_qis(iommu->ecap))
1487 		return;
1488 
1489 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1490 
1491 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1492 	if (!(sts & DMA_GSTS_QIES))
1493 		goto end;
1494 
1495 	/*
1496 	 * Give a chance to HW to complete the pending invalidation requests.
1497 	 */
1498 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1499 		readl(iommu->reg + DMAR_IQH_REG)) &&
1500 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1501 		cpu_relax();
1502 
1503 	iommu->gcmd &= ~DMA_GCMD_QIE;
1504 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1505 
1506 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1507 		      !(sts & DMA_GSTS_QIES), sts);
1508 end:
1509 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1510 }
1511 
1512 /*
1513  * Enable queued invalidation.
1514  */
1515 static void __dmar_enable_qi(struct intel_iommu *iommu)
1516 {
1517 	u32 sts;
1518 	unsigned long flags;
1519 	struct q_inval *qi = iommu->qi;
1520 	u64 val = virt_to_phys(qi->desc);
1521 
1522 	qi->free_head = qi->free_tail = 0;
1523 	qi->free_cnt = QI_LENGTH;
1524 
1525 	/*
1526 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1527 	 * is present.
1528 	 */
1529 	if (ecap_smts(iommu->ecap))
1530 		val |= (1 << 11) | 1;
1531 
1532 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1533 
1534 	/* write zero to the tail reg */
1535 	writel(0, iommu->reg + DMAR_IQT_REG);
1536 
1537 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1538 
1539 	iommu->gcmd |= DMA_GCMD_QIE;
1540 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1541 
1542 	/* Make sure hardware complete it */
1543 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1544 
1545 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1546 }
1547 
1548 /*
1549  * Enable Queued Invalidation interface. This is a must to support
1550  * interrupt-remapping. Also used by DMA-remapping, which replaces
1551  * register based IOTLB invalidation.
1552  */
1553 int dmar_enable_qi(struct intel_iommu *iommu)
1554 {
1555 	struct q_inval *qi;
1556 	struct page *desc_page;
1557 
1558 	if (!ecap_qis(iommu->ecap))
1559 		return -ENOENT;
1560 
1561 	/*
1562 	 * queued invalidation is already setup and enabled.
1563 	 */
1564 	if (iommu->qi)
1565 		return 0;
1566 
1567 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1568 	if (!iommu->qi)
1569 		return -ENOMEM;
1570 
1571 	qi = iommu->qi;
1572 
1573 	/*
1574 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1575 	 * if the remapping hardware supports scalable mode translation.
1576 	 */
1577 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1578 				     !!ecap_smts(iommu->ecap));
1579 	if (!desc_page) {
1580 		kfree(qi);
1581 		iommu->qi = NULL;
1582 		return -ENOMEM;
1583 	}
1584 
1585 	qi->desc = page_address(desc_page);
1586 
1587 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1588 	if (!qi->desc_status) {
1589 		free_page((unsigned long) qi->desc);
1590 		kfree(qi);
1591 		iommu->qi = NULL;
1592 		return -ENOMEM;
1593 	}
1594 
1595 	raw_spin_lock_init(&qi->q_lock);
1596 
1597 	__dmar_enable_qi(iommu);
1598 
1599 	return 0;
1600 }
1601 
1602 /* iommu interrupt handling. Most stuff are MSI-like. */
1603 
1604 enum faulttype {
1605 	DMA_REMAP,
1606 	INTR_REMAP,
1607 	UNKNOWN,
1608 };
1609 
1610 static const char *dma_remap_fault_reasons[] =
1611 {
1612 	"Software",
1613 	"Present bit in root entry is clear",
1614 	"Present bit in context entry is clear",
1615 	"Invalid context entry",
1616 	"Access beyond MGAW",
1617 	"PTE Write access is not set",
1618 	"PTE Read access is not set",
1619 	"Next page table ptr is invalid",
1620 	"Root table address invalid",
1621 	"Context table ptr is invalid",
1622 	"non-zero reserved fields in RTP",
1623 	"non-zero reserved fields in CTP",
1624 	"non-zero reserved fields in PTE",
1625 	"PCE for translation request specifies blocking",
1626 };
1627 
1628 static const char * const dma_remap_sm_fault_reasons[] = {
1629 	"SM: Invalid Root Table Address",
1630 	"SM: TTM 0 for request with PASID",
1631 	"SM: TTM 0 for page group request",
1632 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1633 	"SM: Error attempting to access Root Entry",
1634 	"SM: Present bit in Root Entry is clear",
1635 	"SM: Non-zero reserved field set in Root Entry",
1636 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1637 	"SM: Error attempting to access Context Entry",
1638 	"SM: Present bit in Context Entry is clear",
1639 	"SM: Non-zero reserved field set in the Context Entry",
1640 	"SM: Invalid Context Entry",
1641 	"SM: DTE field in Context Entry is clear",
1642 	"SM: PASID Enable field in Context Entry is clear",
1643 	"SM: PASID is larger than the max in Context Entry",
1644 	"SM: PRE field in Context-Entry is clear",
1645 	"SM: RID_PASID field error in Context-Entry",
1646 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1647 	"SM: Error attempting to access the PASID Directory Entry",
1648 	"SM: Present bit in Directory Entry is clear",
1649 	"SM: Non-zero reserved field set in PASID Directory Entry",
1650 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1651 	"SM: Error attempting to access PASID Table Entry",
1652 	"SM: Present bit in PASID Table Entry is clear",
1653 	"SM: Non-zero reserved field set in PASID Table Entry",
1654 	"SM: Invalid Scalable-Mode PASID Table Entry",
1655 	"SM: ERE field is clear in PASID Table Entry",
1656 	"SM: SRE field is clear in PASID Table Entry",
1657 	"Unknown", "Unknown",/* 0x5E-0x5F */
1658 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1659 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1660 	"SM: Error attempting to access first-level paging entry",
1661 	"SM: Present bit in first-level paging entry is clear",
1662 	"SM: Non-zero reserved field set in first-level paging entry",
1663 	"SM: Error attempting to access FL-PML4 entry",
1664 	"SM: First-level entry address beyond MGAW in Nested translation",
1665 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1666 	"SM: Read permission error in first-level paging entry in Nested translation",
1667 	"SM: Write permission error in first-level paging entry in Nested translation",
1668 	"SM: Error attempting to access second-level paging entry",
1669 	"SM: Read/Write permission error in second-level paging entry",
1670 	"SM: Non-zero reserved field set in second-level paging entry",
1671 	"SM: Invalid second-level page table pointer",
1672 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1673 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1674 	"SM: Address in first-level translation is not canonical",
1675 	"SM: U/S set 0 for first-level translation with user privilege",
1676 	"SM: No execute permission for request with PASID and ER=1",
1677 	"SM: Address beyond the DMA hardware max",
1678 	"SM: Second-level entry address beyond the max",
1679 	"SM: No write permission for Write/AtomicOp request",
1680 	"SM: No read permission for Read/AtomicOp request",
1681 	"SM: Invalid address-interrupt address",
1682 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1683 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1684 };
1685 
1686 static const char *irq_remap_fault_reasons[] =
1687 {
1688 	"Detected reserved fields in the decoded interrupt-remapped request",
1689 	"Interrupt index exceeded the interrupt-remapping table size",
1690 	"Present field in the IRTE entry is clear",
1691 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1692 	"Detected reserved fields in the IRTE entry",
1693 	"Blocked a compatibility format interrupt request",
1694 	"Blocked an interrupt request due to source-id verification failure",
1695 };
1696 
1697 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1698 {
1699 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1700 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1701 		*fault_type = INTR_REMAP;
1702 		return irq_remap_fault_reasons[fault_reason - 0x20];
1703 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1704 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1705 		*fault_type = DMA_REMAP;
1706 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1707 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1708 		*fault_type = DMA_REMAP;
1709 		return dma_remap_fault_reasons[fault_reason];
1710 	} else {
1711 		*fault_type = UNKNOWN;
1712 		return "Unknown";
1713 	}
1714 }
1715 
1716 
1717 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1718 {
1719 	if (iommu->irq == irq)
1720 		return DMAR_FECTL_REG;
1721 	else if (iommu->pr_irq == irq)
1722 		return DMAR_PECTL_REG;
1723 	else
1724 		BUG();
1725 }
1726 
1727 void dmar_msi_unmask(struct irq_data *data)
1728 {
1729 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1730 	int reg = dmar_msi_reg(iommu, data->irq);
1731 	unsigned long flag;
1732 
1733 	/* unmask it */
1734 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1735 	writel(0, iommu->reg + reg);
1736 	/* Read a reg to force flush the post write */
1737 	readl(iommu->reg + reg);
1738 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1739 }
1740 
1741 void dmar_msi_mask(struct irq_data *data)
1742 {
1743 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1744 	int reg = dmar_msi_reg(iommu, data->irq);
1745 	unsigned long flag;
1746 
1747 	/* mask it */
1748 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1749 	writel(DMA_FECTL_IM, iommu->reg + reg);
1750 	/* Read a reg to force flush the post write */
1751 	readl(iommu->reg + reg);
1752 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1753 }
1754 
1755 void dmar_msi_write(int irq, struct msi_msg *msg)
1756 {
1757 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1758 	int reg = dmar_msi_reg(iommu, irq);
1759 	unsigned long flag;
1760 
1761 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1762 	writel(msg->data, iommu->reg + reg + 4);
1763 	writel(msg->address_lo, iommu->reg + reg + 8);
1764 	writel(msg->address_hi, iommu->reg + reg + 12);
1765 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1766 }
1767 
1768 void dmar_msi_read(int irq, struct msi_msg *msg)
1769 {
1770 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1771 	int reg = dmar_msi_reg(iommu, irq);
1772 	unsigned long flag;
1773 
1774 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1775 	msg->data = readl(iommu->reg + reg + 4);
1776 	msg->address_lo = readl(iommu->reg + reg + 8);
1777 	msg->address_hi = readl(iommu->reg + reg + 12);
1778 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1779 }
1780 
1781 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1782 		u8 fault_reason, int pasid, u16 source_id,
1783 		unsigned long long addr)
1784 {
1785 	const char *reason;
1786 	int fault_type;
1787 
1788 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1789 
1790 	if (fault_type == INTR_REMAP)
1791 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1792 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1793 			PCI_FUNC(source_id & 0xFF), addr >> 48,
1794 			fault_reason, reason);
1795 	else
1796 		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1797 		       type ? "DMA Read" : "DMA Write",
1798 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1799 		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1800 		       fault_reason, reason);
1801 	return 0;
1802 }
1803 
1804 #define PRIMARY_FAULT_REG_LEN (16)
1805 irqreturn_t dmar_fault(int irq, void *dev_id)
1806 {
1807 	struct intel_iommu *iommu = dev_id;
1808 	int reg, fault_index;
1809 	u32 fault_status;
1810 	unsigned long flag;
1811 	static DEFINE_RATELIMIT_STATE(rs,
1812 				      DEFAULT_RATELIMIT_INTERVAL,
1813 				      DEFAULT_RATELIMIT_BURST);
1814 
1815 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1816 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1817 	if (fault_status && __ratelimit(&rs))
1818 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1819 
1820 	/* TBD: ignore advanced fault log currently */
1821 	if (!(fault_status & DMA_FSTS_PPF))
1822 		goto unlock_exit;
1823 
1824 	fault_index = dma_fsts_fault_record_index(fault_status);
1825 	reg = cap_fault_reg_offset(iommu->cap);
1826 	while (1) {
1827 		/* Disable printing, simply clear the fault when ratelimited */
1828 		bool ratelimited = !__ratelimit(&rs);
1829 		u8 fault_reason;
1830 		u16 source_id;
1831 		u64 guest_addr;
1832 		int type, pasid;
1833 		u32 data;
1834 		bool pasid_present;
1835 
1836 		/* highest 32 bits */
1837 		data = readl(iommu->reg + reg +
1838 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1839 		if (!(data & DMA_FRCD_F))
1840 			break;
1841 
1842 		if (!ratelimited) {
1843 			fault_reason = dma_frcd_fault_reason(data);
1844 			type = dma_frcd_type(data);
1845 
1846 			pasid = dma_frcd_pasid_value(data);
1847 			data = readl(iommu->reg + reg +
1848 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1849 			source_id = dma_frcd_source_id(data);
1850 
1851 			pasid_present = dma_frcd_pasid_present(data);
1852 			guest_addr = dmar_readq(iommu->reg + reg +
1853 					fault_index * PRIMARY_FAULT_REG_LEN);
1854 			guest_addr = dma_frcd_page_addr(guest_addr);
1855 		}
1856 
1857 		/* clear the fault */
1858 		writel(DMA_FRCD_F, iommu->reg + reg +
1859 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1860 
1861 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1862 
1863 		if (!ratelimited)
1864 			/* Using pasid -1 if pasid is not present */
1865 			dmar_fault_do_one(iommu, type, fault_reason,
1866 					  pasid_present ? pasid : -1,
1867 					  source_id, guest_addr);
1868 
1869 		fault_index++;
1870 		if (fault_index >= cap_num_fault_regs(iommu->cap))
1871 			fault_index = 0;
1872 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1873 	}
1874 
1875 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1876 	       iommu->reg + DMAR_FSTS_REG);
1877 
1878 unlock_exit:
1879 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1880 	return IRQ_HANDLED;
1881 }
1882 
1883 int dmar_set_interrupt(struct intel_iommu *iommu)
1884 {
1885 	int irq, ret;
1886 
1887 	/*
1888 	 * Check if the fault interrupt is already initialized.
1889 	 */
1890 	if (iommu->irq)
1891 		return 0;
1892 
1893 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1894 	if (irq > 0) {
1895 		iommu->irq = irq;
1896 	} else {
1897 		pr_err("No free IRQ vectors\n");
1898 		return -EINVAL;
1899 	}
1900 
1901 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1902 	if (ret)
1903 		pr_err("Can't request irq\n");
1904 	return ret;
1905 }
1906 
1907 int __init enable_drhd_fault_handling(void)
1908 {
1909 	struct dmar_drhd_unit *drhd;
1910 	struct intel_iommu *iommu;
1911 
1912 	/*
1913 	 * Enable fault control interrupt.
1914 	 */
1915 	for_each_iommu(iommu, drhd) {
1916 		u32 fault_status;
1917 		int ret = dmar_set_interrupt(iommu);
1918 
1919 		if (ret) {
1920 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1921 			       (unsigned long long)drhd->reg_base_addr, ret);
1922 			return -1;
1923 		}
1924 
1925 		/*
1926 		 * Clear any previous faults.
1927 		 */
1928 		dmar_fault(iommu->irq, iommu);
1929 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1930 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1931 	}
1932 
1933 	return 0;
1934 }
1935 
1936 /*
1937  * Re-enable Queued Invalidation interface.
1938  */
1939 int dmar_reenable_qi(struct intel_iommu *iommu)
1940 {
1941 	if (!ecap_qis(iommu->ecap))
1942 		return -ENOENT;
1943 
1944 	if (!iommu->qi)
1945 		return -ENOENT;
1946 
1947 	/*
1948 	 * First disable queued invalidation.
1949 	 */
1950 	dmar_disable_qi(iommu);
1951 	/*
1952 	 * Then enable queued invalidation again. Since there is no pending
1953 	 * invalidation requests now, it's safe to re-enable queued
1954 	 * invalidation.
1955 	 */
1956 	__dmar_enable_qi(iommu);
1957 
1958 	return 0;
1959 }
1960 
1961 /*
1962  * Check interrupt remapping support in DMAR table description.
1963  */
1964 int __init dmar_ir_support(void)
1965 {
1966 	struct acpi_table_dmar *dmar;
1967 	dmar = (struct acpi_table_dmar *)dmar_tbl;
1968 	if (!dmar)
1969 		return 0;
1970 	return dmar->flags & 0x1;
1971 }
1972 
1973 /* Check whether DMAR units are in use */
1974 static inline bool dmar_in_use(void)
1975 {
1976 	return irq_remapping_enabled || intel_iommu_enabled;
1977 }
1978 
1979 static int __init dmar_free_unused_resources(void)
1980 {
1981 	struct dmar_drhd_unit *dmaru, *dmaru_n;
1982 
1983 	if (dmar_in_use())
1984 		return 0;
1985 
1986 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1987 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1988 
1989 	down_write(&dmar_global_lock);
1990 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1991 		list_del(&dmaru->list);
1992 		dmar_free_drhd(dmaru);
1993 	}
1994 	up_write(&dmar_global_lock);
1995 
1996 	return 0;
1997 }
1998 
1999 late_initcall(dmar_free_unused_resources);
2000 IOMMU_INIT_POST(detect_intel_iommu);
2001 
2002 /*
2003  * DMAR Hotplug Support
2004  * For more details, please refer to Intel(R) Virtualization Technology
2005  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2006  * "Remapping Hardware Unit Hot Plug".
2007  */
2008 static guid_t dmar_hp_guid =
2009 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2010 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2011 
2012 /*
2013  * Currently there's only one revision and BIOS will not check the revision id,
2014  * so use 0 for safety.
2015  */
2016 #define	DMAR_DSM_REV_ID			0
2017 #define	DMAR_DSM_FUNC_DRHD		1
2018 #define	DMAR_DSM_FUNC_ATSR		2
2019 #define	DMAR_DSM_FUNC_RHSA		3
2020 
2021 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2022 {
2023 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2024 }
2025 
2026 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2027 				  dmar_res_handler_t handler, void *arg)
2028 {
2029 	int ret = -ENODEV;
2030 	union acpi_object *obj;
2031 	struct acpi_dmar_header *start;
2032 	struct dmar_res_callback callback;
2033 	static int res_type[] = {
2034 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2035 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2036 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2037 	};
2038 
2039 	if (!dmar_detect_dsm(handle, func))
2040 		return 0;
2041 
2042 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2043 				      func, NULL, ACPI_TYPE_BUFFER);
2044 	if (!obj)
2045 		return -ENODEV;
2046 
2047 	memset(&callback, 0, sizeof(callback));
2048 	callback.cb[res_type[func]] = handler;
2049 	callback.arg[res_type[func]] = arg;
2050 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2051 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2052 
2053 	ACPI_FREE(obj);
2054 
2055 	return ret;
2056 }
2057 
2058 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2059 {
2060 	int ret;
2061 	struct dmar_drhd_unit *dmaru;
2062 
2063 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2064 	if (!dmaru)
2065 		return -ENODEV;
2066 
2067 	ret = dmar_ir_hotplug(dmaru, true);
2068 	if (ret == 0)
2069 		ret = dmar_iommu_hotplug(dmaru, true);
2070 
2071 	return ret;
2072 }
2073 
2074 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2075 {
2076 	int i, ret;
2077 	struct device *dev;
2078 	struct dmar_drhd_unit *dmaru;
2079 
2080 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2081 	if (!dmaru)
2082 		return 0;
2083 
2084 	/*
2085 	 * All PCI devices managed by this unit should have been destroyed.
2086 	 */
2087 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2088 		for_each_active_dev_scope(dmaru->devices,
2089 					  dmaru->devices_cnt, i, dev)
2090 			return -EBUSY;
2091 	}
2092 
2093 	ret = dmar_ir_hotplug(dmaru, false);
2094 	if (ret == 0)
2095 		ret = dmar_iommu_hotplug(dmaru, false);
2096 
2097 	return ret;
2098 }
2099 
2100 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2101 {
2102 	struct dmar_drhd_unit *dmaru;
2103 
2104 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2105 	if (dmaru) {
2106 		list_del_rcu(&dmaru->list);
2107 		synchronize_rcu();
2108 		dmar_free_drhd(dmaru);
2109 	}
2110 
2111 	return 0;
2112 }
2113 
2114 static int dmar_hotplug_insert(acpi_handle handle)
2115 {
2116 	int ret;
2117 	int drhd_count = 0;
2118 
2119 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2120 				     &dmar_validate_one_drhd, (void *)1);
2121 	if (ret)
2122 		goto out;
2123 
2124 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2125 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2126 	if (ret == 0 && drhd_count == 0) {
2127 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2128 		goto out;
2129 	} else if (ret) {
2130 		goto release_drhd;
2131 	}
2132 
2133 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2134 				     &dmar_parse_one_rhsa, NULL);
2135 	if (ret)
2136 		goto release_drhd;
2137 
2138 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2139 				     &dmar_parse_one_atsr, NULL);
2140 	if (ret)
2141 		goto release_atsr;
2142 
2143 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2144 				     &dmar_hp_add_drhd, NULL);
2145 	if (!ret)
2146 		return 0;
2147 
2148 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2149 			       &dmar_hp_remove_drhd, NULL);
2150 release_atsr:
2151 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2152 			       &dmar_release_one_atsr, NULL);
2153 release_drhd:
2154 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2155 			       &dmar_hp_release_drhd, NULL);
2156 out:
2157 	return ret;
2158 }
2159 
2160 static int dmar_hotplug_remove(acpi_handle handle)
2161 {
2162 	int ret;
2163 
2164 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2165 				     &dmar_check_one_atsr, NULL);
2166 	if (ret)
2167 		return ret;
2168 
2169 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2170 				     &dmar_hp_remove_drhd, NULL);
2171 	if (ret == 0) {
2172 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2173 					       &dmar_release_one_atsr, NULL));
2174 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2175 					       &dmar_hp_release_drhd, NULL));
2176 	} else {
2177 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2178 				       &dmar_hp_add_drhd, NULL);
2179 	}
2180 
2181 	return ret;
2182 }
2183 
2184 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2185 				       void *context, void **retval)
2186 {
2187 	acpi_handle *phdl = retval;
2188 
2189 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2190 		*phdl = handle;
2191 		return AE_CTRL_TERMINATE;
2192 	}
2193 
2194 	return AE_OK;
2195 }
2196 
2197 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2198 {
2199 	int ret;
2200 	acpi_handle tmp = NULL;
2201 	acpi_status status;
2202 
2203 	if (!dmar_in_use())
2204 		return 0;
2205 
2206 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2207 		tmp = handle;
2208 	} else {
2209 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2210 					     ACPI_UINT32_MAX,
2211 					     dmar_get_dsm_handle,
2212 					     NULL, NULL, &tmp);
2213 		if (ACPI_FAILURE(status)) {
2214 			pr_warn("Failed to locate _DSM method.\n");
2215 			return -ENXIO;
2216 		}
2217 	}
2218 	if (tmp == NULL)
2219 		return 0;
2220 
2221 	down_write(&dmar_global_lock);
2222 	if (insert)
2223 		ret = dmar_hotplug_insert(tmp);
2224 	else
2225 		ret = dmar_hotplug_remove(tmp);
2226 	up_write(&dmar_global_lock);
2227 
2228 	return ret;
2229 }
2230 
2231 int dmar_device_add(acpi_handle handle)
2232 {
2233 	return dmar_device_hotplug(handle, true);
2234 }
2235 
2236 int dmar_device_remove(acpi_handle handle)
2237 {
2238 	return dmar_device_hotplug(handle, false);
2239 }
2240 
2241 /*
2242  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2243  *
2244  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2245  * the ACPI DMAR table. This means that the platform boot firmware has made
2246  * sure no device can issue DMA outside of RMRR regions.
2247  */
2248 bool dmar_platform_optin(void)
2249 {
2250 	struct acpi_table_dmar *dmar;
2251 	acpi_status status;
2252 	bool ret;
2253 
2254 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2255 				(struct acpi_table_header **)&dmar);
2256 	if (ACPI_FAILURE(status))
2257 		return false;
2258 
2259 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2260 	acpi_put_table((struct acpi_table_header *)dmar);
2261 
2262 	return ret;
2263 }
2264 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2265