xref: /openbmc/linux/drivers/iommu/intel/dmar.c (revision 55fd7e02)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 
35 #include "../irq_remapping.h"
36 
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41 	bool			ignore_unhandled;
42 	bool			print_entry;
43 };
44 
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59 
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66 
67 extern const struct iommu_ops intel_iommu_ops;
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			pr_warn("Out of memory when allocating notify_info "
151 				"for %s.\n", pci_name(dev));
152 			if (dmar_dev_scope_status == 0)
153 				dmar_dev_scope_status = -ENOMEM;
154 			return NULL;
155 		}
156 	}
157 
158 	info->event = event;
159 	info->dev = dev;
160 	info->seg = pci_domain_nr(dev->bus);
161 	info->level = level;
162 	if (event == BUS_NOTIFY_ADD_DEVICE) {
163 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164 			level--;
165 			info->path[level].bus = tmp->bus->number;
166 			info->path[level].device = PCI_SLOT(tmp->devfn);
167 			info->path[level].function = PCI_FUNC(tmp->devfn);
168 			if (pci_is_root_bus(tmp->bus))
169 				info->bus = tmp->bus->number;
170 		}
171 	}
172 
173 	return info;
174 }
175 
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178 	if ((void *)info != dmar_pci_notify_info_buf)
179 		kfree(info);
180 }
181 
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183 				struct acpi_dmar_pci_path *path, int count)
184 {
185 	int i;
186 
187 	if (info->bus != bus)
188 		goto fallback;
189 	if (info->level != count)
190 		goto fallback;
191 
192 	for (i = 0; i < count; i++) {
193 		if (path[i].device != info->path[i].device ||
194 		    path[i].function != info->path[i].function)
195 			goto fallback;
196 	}
197 
198 	return true;
199 
200 fallback:
201 
202 	if (count != 1)
203 		return false;
204 
205 	i = info->level - 1;
206 	if (bus              == info->path[i].bus &&
207 	    path[0].device   == info->path[i].device &&
208 	    path[0].function == info->path[i].function) {
209 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210 			bus, path[0].device, path[0].function);
211 		return true;
212 	}
213 
214 	return false;
215 }
216 
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219 			  void *start, void*end, u16 segment,
220 			  struct dmar_dev_scope *devices,
221 			  int devices_cnt)
222 {
223 	int i, level;
224 	struct device *tmp, *dev = &info->dev->dev;
225 	struct acpi_dmar_device_scope *scope;
226 	struct acpi_dmar_pci_path *path;
227 
228 	if (segment != info->seg)
229 		return 0;
230 
231 	for (; start < end; start += scope->length) {
232 		scope = start;
233 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235 			continue;
236 
237 		path = (struct acpi_dmar_pci_path *)(scope + 1);
238 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239 		if (!dmar_match_pci_path(info, scope->bus, path, level))
240 			continue;
241 
242 		/*
243 		 * We expect devices with endpoint scope to have normal PCI
244 		 * headers, and devices with bridge scope to have bridge PCI
245 		 * headers.  However PCI NTB devices may be listed in the
246 		 * DMAR table with bridge scope, even though they have a
247 		 * normal PCI header.  NTB devices are identified by class
248 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249 		 * for this special case.
250 		 */
251 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256 			pr_warn("Device scope type does not match for %s\n",
257 				pci_name(info->dev));
258 			return -EINVAL;
259 		}
260 
261 		for_each_dev_scope(devices, devices_cnt, i, tmp)
262 			if (tmp == NULL) {
263 				devices[i].bus = info->dev->bus->number;
264 				devices[i].devfn = info->dev->devfn;
265 				rcu_assign_pointer(devices[i].dev,
266 						   get_device(dev));
267 				return 1;
268 			}
269 		BUG_ON(i >= devices_cnt);
270 	}
271 
272 	return 0;
273 }
274 
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276 			  struct dmar_dev_scope *devices, int count)
277 {
278 	int index;
279 	struct device *tmp;
280 
281 	if (info->seg != segment)
282 		return 0;
283 
284 	for_each_active_dev_scope(devices, count, index, tmp)
285 		if (tmp == &info->dev->dev) {
286 			RCU_INIT_POINTER(devices[index].dev, NULL);
287 			synchronize_rcu();
288 			put_device(tmp);
289 			return 1;
290 		}
291 
292 	return 0;
293 }
294 
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297 	int ret = 0;
298 	struct dmar_drhd_unit *dmaru;
299 	struct acpi_dmar_hardware_unit *drhd;
300 
301 	for_each_drhd_unit(dmaru) {
302 		if (dmaru->include_all)
303 			continue;
304 
305 		drhd = container_of(dmaru->hdr,
306 				    struct acpi_dmar_hardware_unit, header);
307 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308 				((void *)drhd) + drhd->header.length,
309 				dmaru->segment,
310 				dmaru->devices, dmaru->devices_cnt);
311 		if (ret)
312 			break;
313 	}
314 	if (ret >= 0)
315 		ret = dmar_iommu_notify_scope_dev(info);
316 	if (ret < 0 && dmar_dev_scope_status == 0)
317 		dmar_dev_scope_status = ret;
318 
319 	return ret;
320 }
321 
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324 	struct dmar_drhd_unit *dmaru;
325 
326 	for_each_drhd_unit(dmaru)
327 		if (dmar_remove_dev_scope(info, dmaru->segment,
328 			dmaru->devices, dmaru->devices_cnt))
329 			break;
330 	dmar_iommu_notify_scope_dev(info);
331 }
332 
333 static int dmar_pci_bus_notifier(struct notifier_block *nb,
334 				 unsigned long action, void *data)
335 {
336 	struct pci_dev *pdev = to_pci_dev(data);
337 	struct dmar_pci_notify_info *info;
338 
339 	/* Only care about add/remove events for physical functions.
340 	 * For VFs we actually do the lookup based on the corresponding
341 	 * PF in device_to_iommu() anyway. */
342 	if (pdev->is_virtfn)
343 		return NOTIFY_DONE;
344 	if (action != BUS_NOTIFY_ADD_DEVICE &&
345 	    action != BUS_NOTIFY_REMOVED_DEVICE)
346 		return NOTIFY_DONE;
347 
348 	info = dmar_alloc_pci_notify_info(pdev, action);
349 	if (!info)
350 		return NOTIFY_DONE;
351 
352 	down_write(&dmar_global_lock);
353 	if (action == BUS_NOTIFY_ADD_DEVICE)
354 		dmar_pci_bus_add_dev(info);
355 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
356 		dmar_pci_bus_del_dev(info);
357 	up_write(&dmar_global_lock);
358 
359 	dmar_free_pci_notify_info(info);
360 
361 	return NOTIFY_OK;
362 }
363 
364 static struct notifier_block dmar_pci_bus_nb = {
365 	.notifier_call = dmar_pci_bus_notifier,
366 	.priority = INT_MIN,
367 };
368 
369 static struct dmar_drhd_unit *
370 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
371 {
372 	struct dmar_drhd_unit *dmaru;
373 
374 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
375 				dmar_rcu_check())
376 		if (dmaru->segment == drhd->segment &&
377 		    dmaru->reg_base_addr == drhd->address)
378 			return dmaru;
379 
380 	return NULL;
381 }
382 
383 /**
384  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
385  * structure which uniquely represent one DMA remapping hardware unit
386  * present in the platform
387  */
388 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
389 {
390 	struct acpi_dmar_hardware_unit *drhd;
391 	struct dmar_drhd_unit *dmaru;
392 	int ret;
393 
394 	drhd = (struct acpi_dmar_hardware_unit *)header;
395 	dmaru = dmar_find_dmaru(drhd);
396 	if (dmaru)
397 		goto out;
398 
399 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
400 	if (!dmaru)
401 		return -ENOMEM;
402 
403 	/*
404 	 * If header is allocated from slab by ACPI _DSM method, we need to
405 	 * copy the content because the memory buffer will be freed on return.
406 	 */
407 	dmaru->hdr = (void *)(dmaru + 1);
408 	memcpy(dmaru->hdr, header, header->length);
409 	dmaru->reg_base_addr = drhd->address;
410 	dmaru->segment = drhd->segment;
411 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
412 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
413 					      ((void *)drhd) + drhd->header.length,
414 					      &dmaru->devices_cnt);
415 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
416 		kfree(dmaru);
417 		return -ENOMEM;
418 	}
419 
420 	ret = alloc_iommu(dmaru);
421 	if (ret) {
422 		dmar_free_dev_scope(&dmaru->devices,
423 				    &dmaru->devices_cnt);
424 		kfree(dmaru);
425 		return ret;
426 	}
427 	dmar_register_drhd_unit(dmaru);
428 
429 out:
430 	if (arg)
431 		(*(int *)arg)++;
432 
433 	return 0;
434 }
435 
436 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
437 {
438 	if (dmaru->devices && dmaru->devices_cnt)
439 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
440 	if (dmaru->iommu)
441 		free_iommu(dmaru->iommu);
442 	kfree(dmaru);
443 }
444 
445 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
446 				      void *arg)
447 {
448 	struct acpi_dmar_andd *andd = (void *)header;
449 
450 	/* Check for NUL termination within the designated length */
451 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
452 		pr_warn(FW_BUG
453 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
454 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
455 			   dmi_get_system_info(DMI_BIOS_VENDOR),
456 			   dmi_get_system_info(DMI_BIOS_VERSION),
457 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
458 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
459 		return -EINVAL;
460 	}
461 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
462 		andd->device_name);
463 
464 	return 0;
465 }
466 
467 #ifdef CONFIG_ACPI_NUMA
468 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
469 {
470 	struct acpi_dmar_rhsa *rhsa;
471 	struct dmar_drhd_unit *drhd;
472 
473 	rhsa = (struct acpi_dmar_rhsa *)header;
474 	for_each_drhd_unit(drhd) {
475 		if (drhd->reg_base_addr == rhsa->base_address) {
476 			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
477 
478 			if (!node_online(node))
479 				node = NUMA_NO_NODE;
480 			drhd->iommu->node = node;
481 			return 0;
482 		}
483 	}
484 	pr_warn(FW_BUG
485 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
486 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
487 		rhsa->base_address,
488 		dmi_get_system_info(DMI_BIOS_VENDOR),
489 		dmi_get_system_info(DMI_BIOS_VERSION),
490 		dmi_get_system_info(DMI_PRODUCT_VERSION));
491 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
492 
493 	return 0;
494 }
495 #else
496 #define	dmar_parse_one_rhsa		dmar_res_noop
497 #endif
498 
499 static void
500 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
501 {
502 	struct acpi_dmar_hardware_unit *drhd;
503 	struct acpi_dmar_reserved_memory *rmrr;
504 	struct acpi_dmar_atsr *atsr;
505 	struct acpi_dmar_rhsa *rhsa;
506 
507 	switch (header->type) {
508 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
509 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
510 				    header);
511 		pr_info("DRHD base: %#016Lx flags: %#x\n",
512 			(unsigned long long)drhd->address, drhd->flags);
513 		break;
514 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
515 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
516 				    header);
517 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
518 			(unsigned long long)rmrr->base_address,
519 			(unsigned long long)rmrr->end_address);
520 		break;
521 	case ACPI_DMAR_TYPE_ROOT_ATS:
522 		atsr = container_of(header, struct acpi_dmar_atsr, header);
523 		pr_info("ATSR flags: %#x\n", atsr->flags);
524 		break;
525 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
526 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
527 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
528 		       (unsigned long long)rhsa->base_address,
529 		       rhsa->proximity_domain);
530 		break;
531 	case ACPI_DMAR_TYPE_NAMESPACE:
532 		/* We don't print this here because we need to sanity-check
533 		   it first. So print it in dmar_parse_one_andd() instead. */
534 		break;
535 	}
536 }
537 
538 /**
539  * dmar_table_detect - checks to see if the platform supports DMAR devices
540  */
541 static int __init dmar_table_detect(void)
542 {
543 	acpi_status status = AE_OK;
544 
545 	/* if we could find DMAR table, then there are DMAR devices */
546 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
547 
548 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
549 		pr_warn("Unable to map DMAR\n");
550 		status = AE_NOT_FOUND;
551 	}
552 
553 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
554 }
555 
556 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
557 				       size_t len, struct dmar_res_callback *cb)
558 {
559 	struct acpi_dmar_header *iter, *next;
560 	struct acpi_dmar_header *end = ((void *)start) + len;
561 
562 	for (iter = start; iter < end; iter = next) {
563 		next = (void *)iter + iter->length;
564 		if (iter->length == 0) {
565 			/* Avoid looping forever on bad ACPI tables */
566 			pr_debug(FW_BUG "Invalid 0-length structure\n");
567 			break;
568 		} else if (next > end) {
569 			/* Avoid passing table end */
570 			pr_warn(FW_BUG "Record passes table end\n");
571 			return -EINVAL;
572 		}
573 
574 		if (cb->print_entry)
575 			dmar_table_print_dmar_entry(iter);
576 
577 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
578 			/* continue for forward compatibility */
579 			pr_debug("Unknown DMAR structure type %d\n",
580 				 iter->type);
581 		} else if (cb->cb[iter->type]) {
582 			int ret;
583 
584 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
585 			if (ret)
586 				return ret;
587 		} else if (!cb->ignore_unhandled) {
588 			pr_warn("No handler for DMAR structure type %d\n",
589 				iter->type);
590 			return -EINVAL;
591 		}
592 	}
593 
594 	return 0;
595 }
596 
597 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
598 				       struct dmar_res_callback *cb)
599 {
600 	return dmar_walk_remapping_entries((void *)(dmar + 1),
601 			dmar->header.length - sizeof(*dmar), cb);
602 }
603 
604 /**
605  * parse_dmar_table - parses the DMA reporting table
606  */
607 static int __init
608 parse_dmar_table(void)
609 {
610 	struct acpi_table_dmar *dmar;
611 	int drhd_count = 0;
612 	int ret;
613 	struct dmar_res_callback cb = {
614 		.print_entry = true,
615 		.ignore_unhandled = true,
616 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
617 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
618 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
619 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
620 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
621 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
622 	};
623 
624 	/*
625 	 * Do it again, earlier dmar_tbl mapping could be mapped with
626 	 * fixed map.
627 	 */
628 	dmar_table_detect();
629 
630 	/*
631 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
632 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
633 	 */
634 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
635 
636 	dmar = (struct acpi_table_dmar *)dmar_tbl;
637 	if (!dmar)
638 		return -ENODEV;
639 
640 	if (dmar->width < PAGE_SHIFT - 1) {
641 		pr_warn("Invalid DMAR haw\n");
642 		return -EINVAL;
643 	}
644 
645 	pr_info("Host address width %d\n", dmar->width + 1);
646 	ret = dmar_walk_dmar_table(dmar, &cb);
647 	if (ret == 0 && drhd_count == 0)
648 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
649 
650 	return ret;
651 }
652 
653 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
654 				 int cnt, struct pci_dev *dev)
655 {
656 	int index;
657 	struct device *tmp;
658 
659 	while (dev) {
660 		for_each_active_dev_scope(devices, cnt, index, tmp)
661 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
662 				return 1;
663 
664 		/* Check our parent */
665 		dev = dev->bus->self;
666 	}
667 
668 	return 0;
669 }
670 
671 struct dmar_drhd_unit *
672 dmar_find_matched_drhd_unit(struct pci_dev *dev)
673 {
674 	struct dmar_drhd_unit *dmaru;
675 	struct acpi_dmar_hardware_unit *drhd;
676 
677 	dev = pci_physfn(dev);
678 
679 	rcu_read_lock();
680 	for_each_drhd_unit(dmaru) {
681 		drhd = container_of(dmaru->hdr,
682 				    struct acpi_dmar_hardware_unit,
683 				    header);
684 
685 		if (dmaru->include_all &&
686 		    drhd->segment == pci_domain_nr(dev->bus))
687 			goto out;
688 
689 		if (dmar_pci_device_match(dmaru->devices,
690 					  dmaru->devices_cnt, dev))
691 			goto out;
692 	}
693 	dmaru = NULL;
694 out:
695 	rcu_read_unlock();
696 
697 	return dmaru;
698 }
699 
700 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
701 					      struct acpi_device *adev)
702 {
703 	struct dmar_drhd_unit *dmaru;
704 	struct acpi_dmar_hardware_unit *drhd;
705 	struct acpi_dmar_device_scope *scope;
706 	struct device *tmp;
707 	int i;
708 	struct acpi_dmar_pci_path *path;
709 
710 	for_each_drhd_unit(dmaru) {
711 		drhd = container_of(dmaru->hdr,
712 				    struct acpi_dmar_hardware_unit,
713 				    header);
714 
715 		for (scope = (void *)(drhd + 1);
716 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
717 		     scope = ((void *)scope) + scope->length) {
718 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
719 				continue;
720 			if (scope->enumeration_id != device_number)
721 				continue;
722 
723 			path = (void *)(scope + 1);
724 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
725 				dev_name(&adev->dev), dmaru->reg_base_addr,
726 				scope->bus, path->device, path->function);
727 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
728 				if (tmp == NULL) {
729 					dmaru->devices[i].bus = scope->bus;
730 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
731 									    path->function);
732 					rcu_assign_pointer(dmaru->devices[i].dev,
733 							   get_device(&adev->dev));
734 					return;
735 				}
736 			BUG_ON(i >= dmaru->devices_cnt);
737 		}
738 	}
739 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
740 		device_number, dev_name(&adev->dev));
741 }
742 
743 static int __init dmar_acpi_dev_scope_init(void)
744 {
745 	struct acpi_dmar_andd *andd;
746 
747 	if (dmar_tbl == NULL)
748 		return -ENODEV;
749 
750 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
751 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
752 	     andd = ((void *)andd) + andd->header.length) {
753 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
754 			acpi_handle h;
755 			struct acpi_device *adev;
756 
757 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
758 							  andd->device_name,
759 							  &h))) {
760 				pr_err("Failed to find handle for ACPI object %s\n",
761 				       andd->device_name);
762 				continue;
763 			}
764 			if (acpi_bus_get_device(h, &adev)) {
765 				pr_err("Failed to get device for ACPI object %s\n",
766 				       andd->device_name);
767 				continue;
768 			}
769 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
770 		}
771 	}
772 	return 0;
773 }
774 
775 int __init dmar_dev_scope_init(void)
776 {
777 	struct pci_dev *dev = NULL;
778 	struct dmar_pci_notify_info *info;
779 
780 	if (dmar_dev_scope_status != 1)
781 		return dmar_dev_scope_status;
782 
783 	if (list_empty(&dmar_drhd_units)) {
784 		dmar_dev_scope_status = -ENODEV;
785 	} else {
786 		dmar_dev_scope_status = 0;
787 
788 		dmar_acpi_dev_scope_init();
789 
790 		for_each_pci_dev(dev) {
791 			if (dev->is_virtfn)
792 				continue;
793 
794 			info = dmar_alloc_pci_notify_info(dev,
795 					BUS_NOTIFY_ADD_DEVICE);
796 			if (!info) {
797 				return dmar_dev_scope_status;
798 			} else {
799 				dmar_pci_bus_add_dev(info);
800 				dmar_free_pci_notify_info(info);
801 			}
802 		}
803 	}
804 
805 	return dmar_dev_scope_status;
806 }
807 
808 void __init dmar_register_bus_notifier(void)
809 {
810 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
811 }
812 
813 
814 int __init dmar_table_init(void)
815 {
816 	static int dmar_table_initialized;
817 	int ret;
818 
819 	if (dmar_table_initialized == 0) {
820 		ret = parse_dmar_table();
821 		if (ret < 0) {
822 			if (ret != -ENODEV)
823 				pr_info("Parse DMAR table failure.\n");
824 		} else  if (list_empty(&dmar_drhd_units)) {
825 			pr_info("No DMAR devices found\n");
826 			ret = -ENODEV;
827 		}
828 
829 		if (ret < 0)
830 			dmar_table_initialized = ret;
831 		else
832 			dmar_table_initialized = 1;
833 	}
834 
835 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
836 }
837 
838 static void warn_invalid_dmar(u64 addr, const char *message)
839 {
840 	pr_warn_once(FW_BUG
841 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
842 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
843 		addr, message,
844 		dmi_get_system_info(DMI_BIOS_VENDOR),
845 		dmi_get_system_info(DMI_BIOS_VERSION),
846 		dmi_get_system_info(DMI_PRODUCT_VERSION));
847 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
848 }
849 
850 static int __ref
851 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
852 {
853 	struct acpi_dmar_hardware_unit *drhd;
854 	void __iomem *addr;
855 	u64 cap, ecap;
856 
857 	drhd = (void *)entry;
858 	if (!drhd->address) {
859 		warn_invalid_dmar(0, "");
860 		return -EINVAL;
861 	}
862 
863 	if (arg)
864 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
865 	else
866 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
867 	if (!addr) {
868 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
869 		return -EINVAL;
870 	}
871 
872 	cap = dmar_readq(addr + DMAR_CAP_REG);
873 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
874 
875 	if (arg)
876 		iounmap(addr);
877 	else
878 		early_iounmap(addr, VTD_PAGE_SIZE);
879 
880 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
881 		warn_invalid_dmar(drhd->address, " returns all ones");
882 		return -EINVAL;
883 	}
884 
885 	return 0;
886 }
887 
888 int __init detect_intel_iommu(void)
889 {
890 	int ret;
891 	struct dmar_res_callback validate_drhd_cb = {
892 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
893 		.ignore_unhandled = true,
894 	};
895 
896 	down_write(&dmar_global_lock);
897 	ret = dmar_table_detect();
898 	if (!ret)
899 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
900 					   &validate_drhd_cb);
901 	if (!ret && !no_iommu && !iommu_detected &&
902 	    (!dmar_disabled || dmar_platform_optin())) {
903 		iommu_detected = 1;
904 		/* Make sure ACS will be enabled */
905 		pci_request_acs();
906 	}
907 
908 #ifdef CONFIG_X86
909 	if (!ret) {
910 		x86_init.iommu.iommu_init = intel_iommu_init;
911 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
912 	}
913 
914 #endif
915 
916 	if (dmar_tbl) {
917 		acpi_put_table(dmar_tbl);
918 		dmar_tbl = NULL;
919 	}
920 	up_write(&dmar_global_lock);
921 
922 	return ret ? ret : 1;
923 }
924 
925 static void unmap_iommu(struct intel_iommu *iommu)
926 {
927 	iounmap(iommu->reg);
928 	release_mem_region(iommu->reg_phys, iommu->reg_size);
929 }
930 
931 /**
932  * map_iommu: map the iommu's registers
933  * @iommu: the iommu to map
934  * @phys_addr: the physical address of the base resgister
935  *
936  * Memory map the iommu's registers.  Start w/ a single page, and
937  * possibly expand if that turns out to be insufficent.
938  */
939 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
940 {
941 	int map_size, err=0;
942 
943 	iommu->reg_phys = phys_addr;
944 	iommu->reg_size = VTD_PAGE_SIZE;
945 
946 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
947 		pr_err("Can't reserve memory\n");
948 		err = -EBUSY;
949 		goto out;
950 	}
951 
952 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
953 	if (!iommu->reg) {
954 		pr_err("Can't map the region\n");
955 		err = -ENOMEM;
956 		goto release;
957 	}
958 
959 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
960 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
961 
962 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
963 		err = -EINVAL;
964 		warn_invalid_dmar(phys_addr, " returns all ones");
965 		goto unmap;
966 	}
967 	iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
968 
969 	/* the registers might be more than one page */
970 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
971 			 cap_max_fault_reg_offset(iommu->cap));
972 	map_size = VTD_PAGE_ALIGN(map_size);
973 	if (map_size > iommu->reg_size) {
974 		iounmap(iommu->reg);
975 		release_mem_region(iommu->reg_phys, iommu->reg_size);
976 		iommu->reg_size = map_size;
977 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
978 					iommu->name)) {
979 			pr_err("Can't reserve memory\n");
980 			err = -EBUSY;
981 			goto out;
982 		}
983 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
984 		if (!iommu->reg) {
985 			pr_err("Can't map the region\n");
986 			err = -ENOMEM;
987 			goto release;
988 		}
989 	}
990 	err = 0;
991 	goto out;
992 
993 unmap:
994 	iounmap(iommu->reg);
995 release:
996 	release_mem_region(iommu->reg_phys, iommu->reg_size);
997 out:
998 	return err;
999 }
1000 
1001 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1002 {
1003 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1004 					    DMAR_UNITS_SUPPORTED);
1005 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1006 		iommu->seq_id = -1;
1007 	} else {
1008 		set_bit(iommu->seq_id, dmar_seq_ids);
1009 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1010 	}
1011 
1012 	return iommu->seq_id;
1013 }
1014 
1015 static void dmar_free_seq_id(struct intel_iommu *iommu)
1016 {
1017 	if (iommu->seq_id >= 0) {
1018 		clear_bit(iommu->seq_id, dmar_seq_ids);
1019 		iommu->seq_id = -1;
1020 	}
1021 }
1022 
1023 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1024 {
1025 	struct intel_iommu *iommu;
1026 	u32 ver, sts;
1027 	int agaw = 0;
1028 	int msagaw = 0;
1029 	int err;
1030 
1031 	if (!drhd->reg_base_addr) {
1032 		warn_invalid_dmar(0, "");
1033 		return -EINVAL;
1034 	}
1035 
1036 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1037 	if (!iommu)
1038 		return -ENOMEM;
1039 
1040 	if (dmar_alloc_seq_id(iommu) < 0) {
1041 		pr_err("Failed to allocate seq_id\n");
1042 		err = -ENOSPC;
1043 		goto error;
1044 	}
1045 
1046 	err = map_iommu(iommu, drhd->reg_base_addr);
1047 	if (err) {
1048 		pr_err("Failed to map %s\n", iommu->name);
1049 		goto error_free_seq_id;
1050 	}
1051 
1052 	err = -EINVAL;
1053 	agaw = iommu_calculate_agaw(iommu);
1054 	if (agaw < 0) {
1055 		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1056 			iommu->seq_id);
1057 		goto err_unmap;
1058 	}
1059 	msagaw = iommu_calculate_max_sagaw(iommu);
1060 	if (msagaw < 0) {
1061 		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1062 			iommu->seq_id);
1063 		goto err_unmap;
1064 	}
1065 	iommu->agaw = agaw;
1066 	iommu->msagaw = msagaw;
1067 	iommu->segment = drhd->segment;
1068 
1069 	iommu->node = NUMA_NO_NODE;
1070 
1071 	ver = readl(iommu->reg + DMAR_VER_REG);
1072 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1073 		iommu->name,
1074 		(unsigned long long)drhd->reg_base_addr,
1075 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1076 		(unsigned long long)iommu->cap,
1077 		(unsigned long long)iommu->ecap);
1078 
1079 	/* Reflect status in gcmd */
1080 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1081 	if (sts & DMA_GSTS_IRES)
1082 		iommu->gcmd |= DMA_GCMD_IRE;
1083 	if (sts & DMA_GSTS_TES)
1084 		iommu->gcmd |= DMA_GCMD_TE;
1085 	if (sts & DMA_GSTS_QIES)
1086 		iommu->gcmd |= DMA_GCMD_QIE;
1087 
1088 	raw_spin_lock_init(&iommu->register_lock);
1089 
1090 	if (intel_iommu_enabled) {
1091 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1092 					     intel_iommu_groups,
1093 					     "%s", iommu->name);
1094 		if (err)
1095 			goto err_unmap;
1096 
1097 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1098 
1099 		err = iommu_device_register(&iommu->iommu);
1100 		if (err)
1101 			goto err_unmap;
1102 	}
1103 
1104 	drhd->iommu = iommu;
1105 
1106 	return 0;
1107 
1108 err_unmap:
1109 	unmap_iommu(iommu);
1110 error_free_seq_id:
1111 	dmar_free_seq_id(iommu);
1112 error:
1113 	kfree(iommu);
1114 	return err;
1115 }
1116 
1117 static void free_iommu(struct intel_iommu *iommu)
1118 {
1119 	if (intel_iommu_enabled) {
1120 		iommu_device_unregister(&iommu->iommu);
1121 		iommu_device_sysfs_remove(&iommu->iommu);
1122 	}
1123 
1124 	if (iommu->irq) {
1125 		if (iommu->pr_irq) {
1126 			free_irq(iommu->pr_irq, iommu);
1127 			dmar_free_hwirq(iommu->pr_irq);
1128 			iommu->pr_irq = 0;
1129 		}
1130 		free_irq(iommu->irq, iommu);
1131 		dmar_free_hwirq(iommu->irq);
1132 		iommu->irq = 0;
1133 	}
1134 
1135 	if (iommu->qi) {
1136 		free_page((unsigned long)iommu->qi->desc);
1137 		kfree(iommu->qi->desc_status);
1138 		kfree(iommu->qi);
1139 	}
1140 
1141 	if (iommu->reg)
1142 		unmap_iommu(iommu);
1143 
1144 	dmar_free_seq_id(iommu);
1145 	kfree(iommu);
1146 }
1147 
1148 /*
1149  * Reclaim all the submitted descriptors which have completed its work.
1150  */
1151 static inline void reclaim_free_desc(struct q_inval *qi)
1152 {
1153 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1154 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1155 		qi->desc_status[qi->free_tail] = QI_FREE;
1156 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1157 		qi->free_cnt++;
1158 	}
1159 }
1160 
1161 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1162 {
1163 	u32 fault;
1164 	int head, tail;
1165 	struct q_inval *qi = iommu->qi;
1166 	int shift = qi_shift(iommu);
1167 
1168 	if (qi->desc_status[wait_index] == QI_ABORT)
1169 		return -EAGAIN;
1170 
1171 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1172 
1173 	/*
1174 	 * If IQE happens, the head points to the descriptor associated
1175 	 * with the error. No new descriptors are fetched until the IQE
1176 	 * is cleared.
1177 	 */
1178 	if (fault & DMA_FSTS_IQE) {
1179 		head = readl(iommu->reg + DMAR_IQH_REG);
1180 		if ((head >> shift) == index) {
1181 			struct qi_desc *desc = qi->desc + head;
1182 
1183 			/*
1184 			 * desc->qw2 and desc->qw3 are either reserved or
1185 			 * used by software as private data. We won't print
1186 			 * out these two qw's for security consideration.
1187 			 */
1188 			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1189 			       (unsigned long long)desc->qw0,
1190 			       (unsigned long long)desc->qw1);
1191 			memcpy(desc, qi->desc + (wait_index << shift),
1192 			       1 << shift);
1193 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1194 			return -EINVAL;
1195 		}
1196 	}
1197 
1198 	/*
1199 	 * If ITE happens, all pending wait_desc commands are aborted.
1200 	 * No new descriptors are fetched until the ITE is cleared.
1201 	 */
1202 	if (fault & DMA_FSTS_ITE) {
1203 		head = readl(iommu->reg + DMAR_IQH_REG);
1204 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1205 		head |= 1;
1206 		tail = readl(iommu->reg + DMAR_IQT_REG);
1207 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1208 
1209 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1210 
1211 		do {
1212 			if (qi->desc_status[head] == QI_IN_USE)
1213 				qi->desc_status[head] = QI_ABORT;
1214 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1215 		} while (head != tail);
1216 
1217 		if (qi->desc_status[wait_index] == QI_ABORT)
1218 			return -EAGAIN;
1219 	}
1220 
1221 	if (fault & DMA_FSTS_ICE)
1222 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1223 
1224 	return 0;
1225 }
1226 
1227 /*
1228  * Function to submit invalidation descriptors of all types to the queued
1229  * invalidation interface(QI). Multiple descriptors can be submitted at a
1230  * time, a wait descriptor will be appended to each submission to ensure
1231  * hardware has completed the invalidation before return. Wait descriptors
1232  * can be part of the submission but it will not be polled for completion.
1233  */
1234 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1235 		   unsigned int count, unsigned long options)
1236 {
1237 	struct q_inval *qi = iommu->qi;
1238 	struct qi_desc wait_desc;
1239 	int wait_index, index;
1240 	unsigned long flags;
1241 	int offset, shift;
1242 	int rc, i;
1243 
1244 	if (!qi)
1245 		return 0;
1246 
1247 restart:
1248 	rc = 0;
1249 
1250 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1251 	/*
1252 	 * Check if we have enough empty slots in the queue to submit,
1253 	 * the calculation is based on:
1254 	 * # of desc + 1 wait desc + 1 space between head and tail
1255 	 */
1256 	while (qi->free_cnt < count + 2) {
1257 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1258 		cpu_relax();
1259 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1260 	}
1261 
1262 	index = qi->free_head;
1263 	wait_index = (index + count) % QI_LENGTH;
1264 	shift = qi_shift(iommu);
1265 
1266 	for (i = 0; i < count; i++) {
1267 		offset = ((index + i) % QI_LENGTH) << shift;
1268 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1269 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1270 	}
1271 	qi->desc_status[wait_index] = QI_IN_USE;
1272 
1273 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1274 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1275 	if (options & QI_OPT_WAIT_DRAIN)
1276 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1277 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1278 	wait_desc.qw2 = 0;
1279 	wait_desc.qw3 = 0;
1280 
1281 	offset = wait_index << shift;
1282 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1283 
1284 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1285 	qi->free_cnt -= count + 1;
1286 
1287 	/*
1288 	 * update the HW tail register indicating the presence of
1289 	 * new descriptors.
1290 	 */
1291 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1292 
1293 	while (qi->desc_status[wait_index] != QI_DONE) {
1294 		/*
1295 		 * We will leave the interrupts disabled, to prevent interrupt
1296 		 * context to queue another cmd while a cmd is already submitted
1297 		 * and waiting for completion on this cpu. This is to avoid
1298 		 * a deadlock where the interrupt context can wait indefinitely
1299 		 * for free slots in the queue.
1300 		 */
1301 		rc = qi_check_fault(iommu, index, wait_index);
1302 		if (rc)
1303 			break;
1304 
1305 		raw_spin_unlock(&qi->q_lock);
1306 		cpu_relax();
1307 		raw_spin_lock(&qi->q_lock);
1308 	}
1309 
1310 	for (i = 0; i < count; i++)
1311 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1312 
1313 	reclaim_free_desc(qi);
1314 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1315 
1316 	if (rc == -EAGAIN)
1317 		goto restart;
1318 
1319 	return rc;
1320 }
1321 
1322 /*
1323  * Flush the global interrupt entry cache.
1324  */
1325 void qi_global_iec(struct intel_iommu *iommu)
1326 {
1327 	struct qi_desc desc;
1328 
1329 	desc.qw0 = QI_IEC_TYPE;
1330 	desc.qw1 = 0;
1331 	desc.qw2 = 0;
1332 	desc.qw3 = 0;
1333 
1334 	/* should never fail */
1335 	qi_submit_sync(iommu, &desc, 1, 0);
1336 }
1337 
1338 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1339 		      u64 type)
1340 {
1341 	struct qi_desc desc;
1342 
1343 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1344 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1345 	desc.qw1 = 0;
1346 	desc.qw2 = 0;
1347 	desc.qw3 = 0;
1348 
1349 	qi_submit_sync(iommu, &desc, 1, 0);
1350 }
1351 
1352 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1353 		    unsigned int size_order, u64 type)
1354 {
1355 	u8 dw = 0, dr = 0;
1356 
1357 	struct qi_desc desc;
1358 	int ih = 0;
1359 
1360 	if (cap_write_drain(iommu->cap))
1361 		dw = 1;
1362 
1363 	if (cap_read_drain(iommu->cap))
1364 		dr = 1;
1365 
1366 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1367 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1368 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1369 		| QI_IOTLB_AM(size_order);
1370 	desc.qw2 = 0;
1371 	desc.qw3 = 0;
1372 
1373 	qi_submit_sync(iommu, &desc, 1, 0);
1374 }
1375 
1376 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1377 			u16 qdep, u64 addr, unsigned mask)
1378 {
1379 	struct qi_desc desc;
1380 
1381 	if (mask) {
1382 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1383 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1384 	} else
1385 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1386 
1387 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1388 		qdep = 0;
1389 
1390 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1391 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1392 	desc.qw2 = 0;
1393 	desc.qw3 = 0;
1394 
1395 	qi_submit_sync(iommu, &desc, 1, 0);
1396 }
1397 
1398 /* PASID-based IOTLB invalidation */
1399 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1400 		     unsigned long npages, bool ih)
1401 {
1402 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1403 
1404 	/*
1405 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1406 	 * a positive value for Page-selective-within-PASID invalidation.
1407 	 * 0 is not a valid input.
1408 	 */
1409 	if (WARN_ON(!npages)) {
1410 		pr_err("Invalid input npages = %ld\n", npages);
1411 		return;
1412 	}
1413 
1414 	if (npages == -1) {
1415 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1416 				QI_EIOTLB_DID(did) |
1417 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1418 				QI_EIOTLB_TYPE;
1419 		desc.qw1 = 0;
1420 	} else {
1421 		int mask = ilog2(__roundup_pow_of_two(npages));
1422 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1423 
1424 		if (WARN_ON_ONCE(!ALIGN(addr, align)))
1425 			addr &= ~(align - 1);
1426 
1427 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1428 				QI_EIOTLB_DID(did) |
1429 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1430 				QI_EIOTLB_TYPE;
1431 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1432 				QI_EIOTLB_IH(ih) |
1433 				QI_EIOTLB_AM(mask);
1434 	}
1435 
1436 	qi_submit_sync(iommu, &desc, 1, 0);
1437 }
1438 
1439 /* PASID-based device IOTLB Invalidate */
1440 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1441 			      u32 pasid,  u16 qdep, u64 addr,
1442 			      unsigned int size_order, u64 granu)
1443 {
1444 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1445 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1446 
1447 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1448 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1449 		QI_DEV_IOTLB_PFSID(pfsid);
1450 	desc.qw1 = QI_DEV_EIOTLB_GLOB(granu);
1451 
1452 	/*
1453 	 * If S bit is 0, we only flush a single page. If S bit is set,
1454 	 * The least significant zero bit indicates the invalidation address
1455 	 * range. VT-d spec 6.5.2.6.
1456 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1457 	 * size order = 0 is PAGE_SIZE 4KB
1458 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1459 	 * ECAP.
1460 	 */
1461 	desc.qw1 |= addr & ~mask;
1462 	if (size_order)
1463 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1464 
1465 	qi_submit_sync(iommu, &desc, 1, 0);
1466 }
1467 
1468 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1469 			  u64 granu, int pasid)
1470 {
1471 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1472 
1473 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1474 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1475 	qi_submit_sync(iommu, &desc, 1, 0);
1476 }
1477 
1478 /*
1479  * Disable Queued Invalidation interface.
1480  */
1481 void dmar_disable_qi(struct intel_iommu *iommu)
1482 {
1483 	unsigned long flags;
1484 	u32 sts;
1485 	cycles_t start_time = get_cycles();
1486 
1487 	if (!ecap_qis(iommu->ecap))
1488 		return;
1489 
1490 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1491 
1492 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1493 	if (!(sts & DMA_GSTS_QIES))
1494 		goto end;
1495 
1496 	/*
1497 	 * Give a chance to HW to complete the pending invalidation requests.
1498 	 */
1499 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1500 		readl(iommu->reg + DMAR_IQH_REG)) &&
1501 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1502 		cpu_relax();
1503 
1504 	iommu->gcmd &= ~DMA_GCMD_QIE;
1505 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1506 
1507 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1508 		      !(sts & DMA_GSTS_QIES), sts);
1509 end:
1510 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1511 }
1512 
1513 /*
1514  * Enable queued invalidation.
1515  */
1516 static void __dmar_enable_qi(struct intel_iommu *iommu)
1517 {
1518 	u32 sts;
1519 	unsigned long flags;
1520 	struct q_inval *qi = iommu->qi;
1521 	u64 val = virt_to_phys(qi->desc);
1522 
1523 	qi->free_head = qi->free_tail = 0;
1524 	qi->free_cnt = QI_LENGTH;
1525 
1526 	/*
1527 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1528 	 * is present.
1529 	 */
1530 	if (ecap_smts(iommu->ecap))
1531 		val |= (1 << 11) | 1;
1532 
1533 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1534 
1535 	/* write zero to the tail reg */
1536 	writel(0, iommu->reg + DMAR_IQT_REG);
1537 
1538 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1539 
1540 	iommu->gcmd |= DMA_GCMD_QIE;
1541 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1542 
1543 	/* Make sure hardware complete it */
1544 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1545 
1546 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1547 }
1548 
1549 /*
1550  * Enable Queued Invalidation interface. This is a must to support
1551  * interrupt-remapping. Also used by DMA-remapping, which replaces
1552  * register based IOTLB invalidation.
1553  */
1554 int dmar_enable_qi(struct intel_iommu *iommu)
1555 {
1556 	struct q_inval *qi;
1557 	struct page *desc_page;
1558 
1559 	if (!ecap_qis(iommu->ecap))
1560 		return -ENOENT;
1561 
1562 	/*
1563 	 * queued invalidation is already setup and enabled.
1564 	 */
1565 	if (iommu->qi)
1566 		return 0;
1567 
1568 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1569 	if (!iommu->qi)
1570 		return -ENOMEM;
1571 
1572 	qi = iommu->qi;
1573 
1574 	/*
1575 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1576 	 * if the remapping hardware supports scalable mode translation.
1577 	 */
1578 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1579 				     !!ecap_smts(iommu->ecap));
1580 	if (!desc_page) {
1581 		kfree(qi);
1582 		iommu->qi = NULL;
1583 		return -ENOMEM;
1584 	}
1585 
1586 	qi->desc = page_address(desc_page);
1587 
1588 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1589 	if (!qi->desc_status) {
1590 		free_page((unsigned long) qi->desc);
1591 		kfree(qi);
1592 		iommu->qi = NULL;
1593 		return -ENOMEM;
1594 	}
1595 
1596 	raw_spin_lock_init(&qi->q_lock);
1597 
1598 	__dmar_enable_qi(iommu);
1599 
1600 	return 0;
1601 }
1602 
1603 /* iommu interrupt handling. Most stuff are MSI-like. */
1604 
1605 enum faulttype {
1606 	DMA_REMAP,
1607 	INTR_REMAP,
1608 	UNKNOWN,
1609 };
1610 
1611 static const char *dma_remap_fault_reasons[] =
1612 {
1613 	"Software",
1614 	"Present bit in root entry is clear",
1615 	"Present bit in context entry is clear",
1616 	"Invalid context entry",
1617 	"Access beyond MGAW",
1618 	"PTE Write access is not set",
1619 	"PTE Read access is not set",
1620 	"Next page table ptr is invalid",
1621 	"Root table address invalid",
1622 	"Context table ptr is invalid",
1623 	"non-zero reserved fields in RTP",
1624 	"non-zero reserved fields in CTP",
1625 	"non-zero reserved fields in PTE",
1626 	"PCE for translation request specifies blocking",
1627 };
1628 
1629 static const char * const dma_remap_sm_fault_reasons[] = {
1630 	"SM: Invalid Root Table Address",
1631 	"SM: TTM 0 for request with PASID",
1632 	"SM: TTM 0 for page group request",
1633 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1634 	"SM: Error attempting to access Root Entry",
1635 	"SM: Present bit in Root Entry is clear",
1636 	"SM: Non-zero reserved field set in Root Entry",
1637 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1638 	"SM: Error attempting to access Context Entry",
1639 	"SM: Present bit in Context Entry is clear",
1640 	"SM: Non-zero reserved field set in the Context Entry",
1641 	"SM: Invalid Context Entry",
1642 	"SM: DTE field in Context Entry is clear",
1643 	"SM: PASID Enable field in Context Entry is clear",
1644 	"SM: PASID is larger than the max in Context Entry",
1645 	"SM: PRE field in Context-Entry is clear",
1646 	"SM: RID_PASID field error in Context-Entry",
1647 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1648 	"SM: Error attempting to access the PASID Directory Entry",
1649 	"SM: Present bit in Directory Entry is clear",
1650 	"SM: Non-zero reserved field set in PASID Directory Entry",
1651 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1652 	"SM: Error attempting to access PASID Table Entry",
1653 	"SM: Present bit in PASID Table Entry is clear",
1654 	"SM: Non-zero reserved field set in PASID Table Entry",
1655 	"SM: Invalid Scalable-Mode PASID Table Entry",
1656 	"SM: ERE field is clear in PASID Table Entry",
1657 	"SM: SRE field is clear in PASID Table Entry",
1658 	"Unknown", "Unknown",/* 0x5E-0x5F */
1659 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1660 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1661 	"SM: Error attempting to access first-level paging entry",
1662 	"SM: Present bit in first-level paging entry is clear",
1663 	"SM: Non-zero reserved field set in first-level paging entry",
1664 	"SM: Error attempting to access FL-PML4 entry",
1665 	"SM: First-level entry address beyond MGAW in Nested translation",
1666 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1667 	"SM: Read permission error in first-level paging entry in Nested translation",
1668 	"SM: Write permission error in first-level paging entry in Nested translation",
1669 	"SM: Error attempting to access second-level paging entry",
1670 	"SM: Read/Write permission error in second-level paging entry",
1671 	"SM: Non-zero reserved field set in second-level paging entry",
1672 	"SM: Invalid second-level page table pointer",
1673 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1674 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1675 	"SM: Address in first-level translation is not canonical",
1676 	"SM: U/S set 0 for first-level translation with user privilege",
1677 	"SM: No execute permission for request with PASID and ER=1",
1678 	"SM: Address beyond the DMA hardware max",
1679 	"SM: Second-level entry address beyond the max",
1680 	"SM: No write permission for Write/AtomicOp request",
1681 	"SM: No read permission for Read/AtomicOp request",
1682 	"SM: Invalid address-interrupt address",
1683 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1684 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1685 };
1686 
1687 static const char *irq_remap_fault_reasons[] =
1688 {
1689 	"Detected reserved fields in the decoded interrupt-remapped request",
1690 	"Interrupt index exceeded the interrupt-remapping table size",
1691 	"Present field in the IRTE entry is clear",
1692 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1693 	"Detected reserved fields in the IRTE entry",
1694 	"Blocked a compatibility format interrupt request",
1695 	"Blocked an interrupt request due to source-id verification failure",
1696 };
1697 
1698 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1699 {
1700 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1701 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1702 		*fault_type = INTR_REMAP;
1703 		return irq_remap_fault_reasons[fault_reason - 0x20];
1704 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1705 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1706 		*fault_type = DMA_REMAP;
1707 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1708 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1709 		*fault_type = DMA_REMAP;
1710 		return dma_remap_fault_reasons[fault_reason];
1711 	} else {
1712 		*fault_type = UNKNOWN;
1713 		return "Unknown";
1714 	}
1715 }
1716 
1717 
1718 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1719 {
1720 	if (iommu->irq == irq)
1721 		return DMAR_FECTL_REG;
1722 	else if (iommu->pr_irq == irq)
1723 		return DMAR_PECTL_REG;
1724 	else
1725 		BUG();
1726 }
1727 
1728 void dmar_msi_unmask(struct irq_data *data)
1729 {
1730 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1731 	int reg = dmar_msi_reg(iommu, data->irq);
1732 	unsigned long flag;
1733 
1734 	/* unmask it */
1735 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1736 	writel(0, iommu->reg + reg);
1737 	/* Read a reg to force flush the post write */
1738 	readl(iommu->reg + reg);
1739 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1740 }
1741 
1742 void dmar_msi_mask(struct irq_data *data)
1743 {
1744 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1745 	int reg = dmar_msi_reg(iommu, data->irq);
1746 	unsigned long flag;
1747 
1748 	/* mask it */
1749 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1750 	writel(DMA_FECTL_IM, iommu->reg + reg);
1751 	/* Read a reg to force flush the post write */
1752 	readl(iommu->reg + reg);
1753 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1754 }
1755 
1756 void dmar_msi_write(int irq, struct msi_msg *msg)
1757 {
1758 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1759 	int reg = dmar_msi_reg(iommu, irq);
1760 	unsigned long flag;
1761 
1762 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1763 	writel(msg->data, iommu->reg + reg + 4);
1764 	writel(msg->address_lo, iommu->reg + reg + 8);
1765 	writel(msg->address_hi, iommu->reg + reg + 12);
1766 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1767 }
1768 
1769 void dmar_msi_read(int irq, struct msi_msg *msg)
1770 {
1771 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1772 	int reg = dmar_msi_reg(iommu, irq);
1773 	unsigned long flag;
1774 
1775 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1776 	msg->data = readl(iommu->reg + reg + 4);
1777 	msg->address_lo = readl(iommu->reg + reg + 8);
1778 	msg->address_hi = readl(iommu->reg + reg + 12);
1779 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1780 }
1781 
1782 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1783 		u8 fault_reason, int pasid, u16 source_id,
1784 		unsigned long long addr)
1785 {
1786 	const char *reason;
1787 	int fault_type;
1788 
1789 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1790 
1791 	if (fault_type == INTR_REMAP)
1792 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1793 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1794 			PCI_FUNC(source_id & 0xFF), addr >> 48,
1795 			fault_reason, reason);
1796 	else
1797 		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1798 		       type ? "DMA Read" : "DMA Write",
1799 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1800 		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1801 		       fault_reason, reason);
1802 	return 0;
1803 }
1804 
1805 #define PRIMARY_FAULT_REG_LEN (16)
1806 irqreturn_t dmar_fault(int irq, void *dev_id)
1807 {
1808 	struct intel_iommu *iommu = dev_id;
1809 	int reg, fault_index;
1810 	u32 fault_status;
1811 	unsigned long flag;
1812 	static DEFINE_RATELIMIT_STATE(rs,
1813 				      DEFAULT_RATELIMIT_INTERVAL,
1814 				      DEFAULT_RATELIMIT_BURST);
1815 
1816 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1817 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1818 	if (fault_status && __ratelimit(&rs))
1819 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1820 
1821 	/* TBD: ignore advanced fault log currently */
1822 	if (!(fault_status & DMA_FSTS_PPF))
1823 		goto unlock_exit;
1824 
1825 	fault_index = dma_fsts_fault_record_index(fault_status);
1826 	reg = cap_fault_reg_offset(iommu->cap);
1827 	while (1) {
1828 		/* Disable printing, simply clear the fault when ratelimited */
1829 		bool ratelimited = !__ratelimit(&rs);
1830 		u8 fault_reason;
1831 		u16 source_id;
1832 		u64 guest_addr;
1833 		int type, pasid;
1834 		u32 data;
1835 		bool pasid_present;
1836 
1837 		/* highest 32 bits */
1838 		data = readl(iommu->reg + reg +
1839 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1840 		if (!(data & DMA_FRCD_F))
1841 			break;
1842 
1843 		if (!ratelimited) {
1844 			fault_reason = dma_frcd_fault_reason(data);
1845 			type = dma_frcd_type(data);
1846 
1847 			pasid = dma_frcd_pasid_value(data);
1848 			data = readl(iommu->reg + reg +
1849 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1850 			source_id = dma_frcd_source_id(data);
1851 
1852 			pasid_present = dma_frcd_pasid_present(data);
1853 			guest_addr = dmar_readq(iommu->reg + reg +
1854 					fault_index * PRIMARY_FAULT_REG_LEN);
1855 			guest_addr = dma_frcd_page_addr(guest_addr);
1856 		}
1857 
1858 		/* clear the fault */
1859 		writel(DMA_FRCD_F, iommu->reg + reg +
1860 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1861 
1862 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1863 
1864 		if (!ratelimited)
1865 			/* Using pasid -1 if pasid is not present */
1866 			dmar_fault_do_one(iommu, type, fault_reason,
1867 					  pasid_present ? pasid : -1,
1868 					  source_id, guest_addr);
1869 
1870 		fault_index++;
1871 		if (fault_index >= cap_num_fault_regs(iommu->cap))
1872 			fault_index = 0;
1873 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1874 	}
1875 
1876 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1877 	       iommu->reg + DMAR_FSTS_REG);
1878 
1879 unlock_exit:
1880 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1881 	return IRQ_HANDLED;
1882 }
1883 
1884 int dmar_set_interrupt(struct intel_iommu *iommu)
1885 {
1886 	int irq, ret;
1887 
1888 	/*
1889 	 * Check if the fault interrupt is already initialized.
1890 	 */
1891 	if (iommu->irq)
1892 		return 0;
1893 
1894 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1895 	if (irq > 0) {
1896 		iommu->irq = irq;
1897 	} else {
1898 		pr_err("No free IRQ vectors\n");
1899 		return -EINVAL;
1900 	}
1901 
1902 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1903 	if (ret)
1904 		pr_err("Can't request irq\n");
1905 	return ret;
1906 }
1907 
1908 int __init enable_drhd_fault_handling(void)
1909 {
1910 	struct dmar_drhd_unit *drhd;
1911 	struct intel_iommu *iommu;
1912 
1913 	/*
1914 	 * Enable fault control interrupt.
1915 	 */
1916 	for_each_iommu(iommu, drhd) {
1917 		u32 fault_status;
1918 		int ret = dmar_set_interrupt(iommu);
1919 
1920 		if (ret) {
1921 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1922 			       (unsigned long long)drhd->reg_base_addr, ret);
1923 			return -1;
1924 		}
1925 
1926 		/*
1927 		 * Clear any previous faults.
1928 		 */
1929 		dmar_fault(iommu->irq, iommu);
1930 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1931 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1932 	}
1933 
1934 	return 0;
1935 }
1936 
1937 /*
1938  * Re-enable Queued Invalidation interface.
1939  */
1940 int dmar_reenable_qi(struct intel_iommu *iommu)
1941 {
1942 	if (!ecap_qis(iommu->ecap))
1943 		return -ENOENT;
1944 
1945 	if (!iommu->qi)
1946 		return -ENOENT;
1947 
1948 	/*
1949 	 * First disable queued invalidation.
1950 	 */
1951 	dmar_disable_qi(iommu);
1952 	/*
1953 	 * Then enable queued invalidation again. Since there is no pending
1954 	 * invalidation requests now, it's safe to re-enable queued
1955 	 * invalidation.
1956 	 */
1957 	__dmar_enable_qi(iommu);
1958 
1959 	return 0;
1960 }
1961 
1962 /*
1963  * Check interrupt remapping support in DMAR table description.
1964  */
1965 int __init dmar_ir_support(void)
1966 {
1967 	struct acpi_table_dmar *dmar;
1968 	dmar = (struct acpi_table_dmar *)dmar_tbl;
1969 	if (!dmar)
1970 		return 0;
1971 	return dmar->flags & 0x1;
1972 }
1973 
1974 /* Check whether DMAR units are in use */
1975 static inline bool dmar_in_use(void)
1976 {
1977 	return irq_remapping_enabled || intel_iommu_enabled;
1978 }
1979 
1980 static int __init dmar_free_unused_resources(void)
1981 {
1982 	struct dmar_drhd_unit *dmaru, *dmaru_n;
1983 
1984 	if (dmar_in_use())
1985 		return 0;
1986 
1987 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1988 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1989 
1990 	down_write(&dmar_global_lock);
1991 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1992 		list_del(&dmaru->list);
1993 		dmar_free_drhd(dmaru);
1994 	}
1995 	up_write(&dmar_global_lock);
1996 
1997 	return 0;
1998 }
1999 
2000 late_initcall(dmar_free_unused_resources);
2001 IOMMU_INIT_POST(detect_intel_iommu);
2002 
2003 /*
2004  * DMAR Hotplug Support
2005  * For more details, please refer to Intel(R) Virtualization Technology
2006  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2007  * "Remapping Hardware Unit Hot Plug".
2008  */
2009 static guid_t dmar_hp_guid =
2010 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2011 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2012 
2013 /*
2014  * Currently there's only one revision and BIOS will not check the revision id,
2015  * so use 0 for safety.
2016  */
2017 #define	DMAR_DSM_REV_ID			0
2018 #define	DMAR_DSM_FUNC_DRHD		1
2019 #define	DMAR_DSM_FUNC_ATSR		2
2020 #define	DMAR_DSM_FUNC_RHSA		3
2021 
2022 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2023 {
2024 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2025 }
2026 
2027 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2028 				  dmar_res_handler_t handler, void *arg)
2029 {
2030 	int ret = -ENODEV;
2031 	union acpi_object *obj;
2032 	struct acpi_dmar_header *start;
2033 	struct dmar_res_callback callback;
2034 	static int res_type[] = {
2035 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2036 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2037 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2038 	};
2039 
2040 	if (!dmar_detect_dsm(handle, func))
2041 		return 0;
2042 
2043 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2044 				      func, NULL, ACPI_TYPE_BUFFER);
2045 	if (!obj)
2046 		return -ENODEV;
2047 
2048 	memset(&callback, 0, sizeof(callback));
2049 	callback.cb[res_type[func]] = handler;
2050 	callback.arg[res_type[func]] = arg;
2051 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2052 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2053 
2054 	ACPI_FREE(obj);
2055 
2056 	return ret;
2057 }
2058 
2059 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2060 {
2061 	int ret;
2062 	struct dmar_drhd_unit *dmaru;
2063 
2064 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2065 	if (!dmaru)
2066 		return -ENODEV;
2067 
2068 	ret = dmar_ir_hotplug(dmaru, true);
2069 	if (ret == 0)
2070 		ret = dmar_iommu_hotplug(dmaru, true);
2071 
2072 	return ret;
2073 }
2074 
2075 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2076 {
2077 	int i, ret;
2078 	struct device *dev;
2079 	struct dmar_drhd_unit *dmaru;
2080 
2081 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2082 	if (!dmaru)
2083 		return 0;
2084 
2085 	/*
2086 	 * All PCI devices managed by this unit should have been destroyed.
2087 	 */
2088 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2089 		for_each_active_dev_scope(dmaru->devices,
2090 					  dmaru->devices_cnt, i, dev)
2091 			return -EBUSY;
2092 	}
2093 
2094 	ret = dmar_ir_hotplug(dmaru, false);
2095 	if (ret == 0)
2096 		ret = dmar_iommu_hotplug(dmaru, false);
2097 
2098 	return ret;
2099 }
2100 
2101 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2102 {
2103 	struct dmar_drhd_unit *dmaru;
2104 
2105 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2106 	if (dmaru) {
2107 		list_del_rcu(&dmaru->list);
2108 		synchronize_rcu();
2109 		dmar_free_drhd(dmaru);
2110 	}
2111 
2112 	return 0;
2113 }
2114 
2115 static int dmar_hotplug_insert(acpi_handle handle)
2116 {
2117 	int ret;
2118 	int drhd_count = 0;
2119 
2120 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2121 				     &dmar_validate_one_drhd, (void *)1);
2122 	if (ret)
2123 		goto out;
2124 
2125 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2126 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2127 	if (ret == 0 && drhd_count == 0) {
2128 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2129 		goto out;
2130 	} else if (ret) {
2131 		goto release_drhd;
2132 	}
2133 
2134 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2135 				     &dmar_parse_one_rhsa, NULL);
2136 	if (ret)
2137 		goto release_drhd;
2138 
2139 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2140 				     &dmar_parse_one_atsr, NULL);
2141 	if (ret)
2142 		goto release_atsr;
2143 
2144 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2145 				     &dmar_hp_add_drhd, NULL);
2146 	if (!ret)
2147 		return 0;
2148 
2149 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2150 			       &dmar_hp_remove_drhd, NULL);
2151 release_atsr:
2152 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2153 			       &dmar_release_one_atsr, NULL);
2154 release_drhd:
2155 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2156 			       &dmar_hp_release_drhd, NULL);
2157 out:
2158 	return ret;
2159 }
2160 
2161 static int dmar_hotplug_remove(acpi_handle handle)
2162 {
2163 	int ret;
2164 
2165 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2166 				     &dmar_check_one_atsr, NULL);
2167 	if (ret)
2168 		return ret;
2169 
2170 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2171 				     &dmar_hp_remove_drhd, NULL);
2172 	if (ret == 0) {
2173 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2174 					       &dmar_release_one_atsr, NULL));
2175 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2176 					       &dmar_hp_release_drhd, NULL));
2177 	} else {
2178 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2179 				       &dmar_hp_add_drhd, NULL);
2180 	}
2181 
2182 	return ret;
2183 }
2184 
2185 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2186 				       void *context, void **retval)
2187 {
2188 	acpi_handle *phdl = retval;
2189 
2190 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2191 		*phdl = handle;
2192 		return AE_CTRL_TERMINATE;
2193 	}
2194 
2195 	return AE_OK;
2196 }
2197 
2198 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2199 {
2200 	int ret;
2201 	acpi_handle tmp = NULL;
2202 	acpi_status status;
2203 
2204 	if (!dmar_in_use())
2205 		return 0;
2206 
2207 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2208 		tmp = handle;
2209 	} else {
2210 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2211 					     ACPI_UINT32_MAX,
2212 					     dmar_get_dsm_handle,
2213 					     NULL, NULL, &tmp);
2214 		if (ACPI_FAILURE(status)) {
2215 			pr_warn("Failed to locate _DSM method.\n");
2216 			return -ENXIO;
2217 		}
2218 	}
2219 	if (tmp == NULL)
2220 		return 0;
2221 
2222 	down_write(&dmar_global_lock);
2223 	if (insert)
2224 		ret = dmar_hotplug_insert(tmp);
2225 	else
2226 		ret = dmar_hotplug_remove(tmp);
2227 	up_write(&dmar_global_lock);
2228 
2229 	return ret;
2230 }
2231 
2232 int dmar_device_add(acpi_handle handle)
2233 {
2234 	return dmar_device_hotplug(handle, true);
2235 }
2236 
2237 int dmar_device_remove(acpi_handle handle)
2238 {
2239 	return dmar_device_hotplug(handle, false);
2240 }
2241 
2242 /*
2243  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2244  *
2245  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2246  * the ACPI DMAR table. This means that the platform boot firmware has made
2247  * sure no device can issue DMA outside of RMRR regions.
2248  */
2249 bool dmar_platform_optin(void)
2250 {
2251 	struct acpi_table_dmar *dmar;
2252 	acpi_status status;
2253 	bool ret;
2254 
2255 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2256 				(struct acpi_table_header **)&dmar);
2257 	if (ACPI_FAILURE(status))
2258 		return false;
2259 
2260 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2261 	acpi_put_table((struct acpi_table_header *)dmar);
2262 
2263 	return ret;
2264 }
2265 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2266