xref: /openbmc/linux/drivers/iommu/intel/dmar.c (revision 4c1ca831)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 
35 #include "../irq_remapping.h"
36 
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41 	bool			ignore_unhandled;
42 	bool			print_entry;
43 };
44 
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59 
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66 
67 extern const struct iommu_ops intel_iommu_ops;
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			pr_warn("Out of memory when allocating notify_info "
151 				"for %s.\n", pci_name(dev));
152 			if (dmar_dev_scope_status == 0)
153 				dmar_dev_scope_status = -ENOMEM;
154 			return NULL;
155 		}
156 	}
157 
158 	info->event = event;
159 	info->dev = dev;
160 	info->seg = pci_domain_nr(dev->bus);
161 	info->level = level;
162 	if (event == BUS_NOTIFY_ADD_DEVICE) {
163 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164 			level--;
165 			info->path[level].bus = tmp->bus->number;
166 			info->path[level].device = PCI_SLOT(tmp->devfn);
167 			info->path[level].function = PCI_FUNC(tmp->devfn);
168 			if (pci_is_root_bus(tmp->bus))
169 				info->bus = tmp->bus->number;
170 		}
171 	}
172 
173 	return info;
174 }
175 
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178 	if ((void *)info != dmar_pci_notify_info_buf)
179 		kfree(info);
180 }
181 
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183 				struct acpi_dmar_pci_path *path, int count)
184 {
185 	int i;
186 
187 	if (info->bus != bus)
188 		goto fallback;
189 	if (info->level != count)
190 		goto fallback;
191 
192 	for (i = 0; i < count; i++) {
193 		if (path[i].device != info->path[i].device ||
194 		    path[i].function != info->path[i].function)
195 			goto fallback;
196 	}
197 
198 	return true;
199 
200 fallback:
201 
202 	if (count != 1)
203 		return false;
204 
205 	i = info->level - 1;
206 	if (bus              == info->path[i].bus &&
207 	    path[0].device   == info->path[i].device &&
208 	    path[0].function == info->path[i].function) {
209 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210 			bus, path[0].device, path[0].function);
211 		return true;
212 	}
213 
214 	return false;
215 }
216 
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219 			  void *start, void*end, u16 segment,
220 			  struct dmar_dev_scope *devices,
221 			  int devices_cnt)
222 {
223 	int i, level;
224 	struct device *tmp, *dev = &info->dev->dev;
225 	struct acpi_dmar_device_scope *scope;
226 	struct acpi_dmar_pci_path *path;
227 
228 	if (segment != info->seg)
229 		return 0;
230 
231 	for (; start < end; start += scope->length) {
232 		scope = start;
233 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235 			continue;
236 
237 		path = (struct acpi_dmar_pci_path *)(scope + 1);
238 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239 		if (!dmar_match_pci_path(info, scope->bus, path, level))
240 			continue;
241 
242 		/*
243 		 * We expect devices with endpoint scope to have normal PCI
244 		 * headers, and devices with bridge scope to have bridge PCI
245 		 * headers.  However PCI NTB devices may be listed in the
246 		 * DMAR table with bridge scope, even though they have a
247 		 * normal PCI header.  NTB devices are identified by class
248 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249 		 * for this special case.
250 		 */
251 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256 			pr_warn("Device scope type does not match for %s\n",
257 				pci_name(info->dev));
258 			return -EINVAL;
259 		}
260 
261 		for_each_dev_scope(devices, devices_cnt, i, tmp)
262 			if (tmp == NULL) {
263 				devices[i].bus = info->dev->bus->number;
264 				devices[i].devfn = info->dev->devfn;
265 				rcu_assign_pointer(devices[i].dev,
266 						   get_device(dev));
267 				return 1;
268 			}
269 		BUG_ON(i >= devices_cnt);
270 	}
271 
272 	return 0;
273 }
274 
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276 			  struct dmar_dev_scope *devices, int count)
277 {
278 	int index;
279 	struct device *tmp;
280 
281 	if (info->seg != segment)
282 		return 0;
283 
284 	for_each_active_dev_scope(devices, count, index, tmp)
285 		if (tmp == &info->dev->dev) {
286 			RCU_INIT_POINTER(devices[index].dev, NULL);
287 			synchronize_rcu();
288 			put_device(tmp);
289 			return 1;
290 		}
291 
292 	return 0;
293 }
294 
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297 	int ret = 0;
298 	struct dmar_drhd_unit *dmaru;
299 	struct acpi_dmar_hardware_unit *drhd;
300 
301 	for_each_drhd_unit(dmaru) {
302 		if (dmaru->include_all)
303 			continue;
304 
305 		drhd = container_of(dmaru->hdr,
306 				    struct acpi_dmar_hardware_unit, header);
307 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308 				((void *)drhd) + drhd->header.length,
309 				dmaru->segment,
310 				dmaru->devices, dmaru->devices_cnt);
311 		if (ret)
312 			break;
313 	}
314 	if (ret >= 0)
315 		ret = dmar_iommu_notify_scope_dev(info);
316 	if (ret < 0 && dmar_dev_scope_status == 0)
317 		dmar_dev_scope_status = ret;
318 
319 	if (ret >= 0)
320 		intel_irq_remap_add_device(info);
321 
322 	return ret;
323 }
324 
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327 	struct dmar_drhd_unit *dmaru;
328 
329 	for_each_drhd_unit(dmaru)
330 		if (dmar_remove_dev_scope(info, dmaru->segment,
331 			dmaru->devices, dmaru->devices_cnt))
332 			break;
333 	dmar_iommu_notify_scope_dev(info);
334 }
335 
336 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
337 {
338 	dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev));
339 }
340 
341 static int dmar_pci_bus_notifier(struct notifier_block *nb,
342 				 unsigned long action, void *data)
343 {
344 	struct pci_dev *pdev = to_pci_dev(data);
345 	struct dmar_pci_notify_info *info;
346 
347 	/* Only care about add/remove events for physical functions.
348 	 * For VFs we actually do the lookup based on the corresponding
349 	 * PF in device_to_iommu() anyway. */
350 	if (pdev->is_virtfn) {
351 		/*
352 		 * Ensure that the VF device inherits the irq domain of the
353 		 * PF device. Ideally the device would inherit the domain
354 		 * from the bus, but DMAR can have multiple units per bus
355 		 * which makes this impossible. The VF 'bus' could inherit
356 		 * from the PF device, but that's yet another x86'sism to
357 		 * inflict on everybody else.
358 		 */
359 		if (action == BUS_NOTIFY_ADD_DEVICE)
360 			vf_inherit_msi_domain(pdev);
361 		return NOTIFY_DONE;
362 	}
363 
364 	if (action != BUS_NOTIFY_ADD_DEVICE &&
365 	    action != BUS_NOTIFY_REMOVED_DEVICE)
366 		return NOTIFY_DONE;
367 
368 	info = dmar_alloc_pci_notify_info(pdev, action);
369 	if (!info)
370 		return NOTIFY_DONE;
371 
372 	down_write(&dmar_global_lock);
373 	if (action == BUS_NOTIFY_ADD_DEVICE)
374 		dmar_pci_bus_add_dev(info);
375 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
376 		dmar_pci_bus_del_dev(info);
377 	up_write(&dmar_global_lock);
378 
379 	dmar_free_pci_notify_info(info);
380 
381 	return NOTIFY_OK;
382 }
383 
384 static struct notifier_block dmar_pci_bus_nb = {
385 	.notifier_call = dmar_pci_bus_notifier,
386 	.priority = INT_MIN,
387 };
388 
389 static struct dmar_drhd_unit *
390 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
391 {
392 	struct dmar_drhd_unit *dmaru;
393 
394 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
395 				dmar_rcu_check())
396 		if (dmaru->segment == drhd->segment &&
397 		    dmaru->reg_base_addr == drhd->address)
398 			return dmaru;
399 
400 	return NULL;
401 }
402 
403 /*
404  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
405  * structure which uniquely represent one DMA remapping hardware unit
406  * present in the platform
407  */
408 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
409 {
410 	struct acpi_dmar_hardware_unit *drhd;
411 	struct dmar_drhd_unit *dmaru;
412 	int ret;
413 
414 	drhd = (struct acpi_dmar_hardware_unit *)header;
415 	dmaru = dmar_find_dmaru(drhd);
416 	if (dmaru)
417 		goto out;
418 
419 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
420 	if (!dmaru)
421 		return -ENOMEM;
422 
423 	/*
424 	 * If header is allocated from slab by ACPI _DSM method, we need to
425 	 * copy the content because the memory buffer will be freed on return.
426 	 */
427 	dmaru->hdr = (void *)(dmaru + 1);
428 	memcpy(dmaru->hdr, header, header->length);
429 	dmaru->reg_base_addr = drhd->address;
430 	dmaru->segment = drhd->segment;
431 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
432 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
433 					      ((void *)drhd) + drhd->header.length,
434 					      &dmaru->devices_cnt);
435 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
436 		kfree(dmaru);
437 		return -ENOMEM;
438 	}
439 
440 	ret = alloc_iommu(dmaru);
441 	if (ret) {
442 		dmar_free_dev_scope(&dmaru->devices,
443 				    &dmaru->devices_cnt);
444 		kfree(dmaru);
445 		return ret;
446 	}
447 	dmar_register_drhd_unit(dmaru);
448 
449 out:
450 	if (arg)
451 		(*(int *)arg)++;
452 
453 	return 0;
454 }
455 
456 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
457 {
458 	if (dmaru->devices && dmaru->devices_cnt)
459 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
460 	if (dmaru->iommu)
461 		free_iommu(dmaru->iommu);
462 	kfree(dmaru);
463 }
464 
465 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
466 				      void *arg)
467 {
468 	struct acpi_dmar_andd *andd = (void *)header;
469 
470 	/* Check for NUL termination within the designated length */
471 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
472 		pr_warn(FW_BUG
473 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
474 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
475 			   dmi_get_system_info(DMI_BIOS_VENDOR),
476 			   dmi_get_system_info(DMI_BIOS_VERSION),
477 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
478 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
479 		return -EINVAL;
480 	}
481 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
482 		andd->device_name);
483 
484 	return 0;
485 }
486 
487 #ifdef CONFIG_ACPI_NUMA
488 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
489 {
490 	struct acpi_dmar_rhsa *rhsa;
491 	struct dmar_drhd_unit *drhd;
492 
493 	rhsa = (struct acpi_dmar_rhsa *)header;
494 	for_each_drhd_unit(drhd) {
495 		if (drhd->reg_base_addr == rhsa->base_address) {
496 			int node = pxm_to_node(rhsa->proximity_domain);
497 
498 			if (!node_online(node))
499 				node = NUMA_NO_NODE;
500 			drhd->iommu->node = node;
501 			return 0;
502 		}
503 	}
504 	pr_warn(FW_BUG
505 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
506 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
507 		rhsa->base_address,
508 		dmi_get_system_info(DMI_BIOS_VENDOR),
509 		dmi_get_system_info(DMI_BIOS_VERSION),
510 		dmi_get_system_info(DMI_PRODUCT_VERSION));
511 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
512 
513 	return 0;
514 }
515 #else
516 #define	dmar_parse_one_rhsa		dmar_res_noop
517 #endif
518 
519 static void
520 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
521 {
522 	struct acpi_dmar_hardware_unit *drhd;
523 	struct acpi_dmar_reserved_memory *rmrr;
524 	struct acpi_dmar_atsr *atsr;
525 	struct acpi_dmar_rhsa *rhsa;
526 
527 	switch (header->type) {
528 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
529 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
530 				    header);
531 		pr_info("DRHD base: %#016Lx flags: %#x\n",
532 			(unsigned long long)drhd->address, drhd->flags);
533 		break;
534 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
535 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
536 				    header);
537 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
538 			(unsigned long long)rmrr->base_address,
539 			(unsigned long long)rmrr->end_address);
540 		break;
541 	case ACPI_DMAR_TYPE_ROOT_ATS:
542 		atsr = container_of(header, struct acpi_dmar_atsr, header);
543 		pr_info("ATSR flags: %#x\n", atsr->flags);
544 		break;
545 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
546 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
547 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
548 		       (unsigned long long)rhsa->base_address,
549 		       rhsa->proximity_domain);
550 		break;
551 	case ACPI_DMAR_TYPE_NAMESPACE:
552 		/* We don't print this here because we need to sanity-check
553 		   it first. So print it in dmar_parse_one_andd() instead. */
554 		break;
555 	}
556 }
557 
558 /**
559  * dmar_table_detect - checks to see if the platform supports DMAR devices
560  */
561 static int __init dmar_table_detect(void)
562 {
563 	acpi_status status = AE_OK;
564 
565 	/* if we could find DMAR table, then there are DMAR devices */
566 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
567 
568 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
569 		pr_warn("Unable to map DMAR\n");
570 		status = AE_NOT_FOUND;
571 	}
572 
573 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
574 }
575 
576 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
577 				       size_t len, struct dmar_res_callback *cb)
578 {
579 	struct acpi_dmar_header *iter, *next;
580 	struct acpi_dmar_header *end = ((void *)start) + len;
581 
582 	for (iter = start; iter < end; iter = next) {
583 		next = (void *)iter + iter->length;
584 		if (iter->length == 0) {
585 			/* Avoid looping forever on bad ACPI tables */
586 			pr_debug(FW_BUG "Invalid 0-length structure\n");
587 			break;
588 		} else if (next > end) {
589 			/* Avoid passing table end */
590 			pr_warn(FW_BUG "Record passes table end\n");
591 			return -EINVAL;
592 		}
593 
594 		if (cb->print_entry)
595 			dmar_table_print_dmar_entry(iter);
596 
597 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
598 			/* continue for forward compatibility */
599 			pr_debug("Unknown DMAR structure type %d\n",
600 				 iter->type);
601 		} else if (cb->cb[iter->type]) {
602 			int ret;
603 
604 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
605 			if (ret)
606 				return ret;
607 		} else if (!cb->ignore_unhandled) {
608 			pr_warn("No handler for DMAR structure type %d\n",
609 				iter->type);
610 			return -EINVAL;
611 		}
612 	}
613 
614 	return 0;
615 }
616 
617 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
618 				       struct dmar_res_callback *cb)
619 {
620 	return dmar_walk_remapping_entries((void *)(dmar + 1),
621 			dmar->header.length - sizeof(*dmar), cb);
622 }
623 
624 /**
625  * parse_dmar_table - parses the DMA reporting table
626  */
627 static int __init
628 parse_dmar_table(void)
629 {
630 	struct acpi_table_dmar *dmar;
631 	int drhd_count = 0;
632 	int ret;
633 	struct dmar_res_callback cb = {
634 		.print_entry = true,
635 		.ignore_unhandled = true,
636 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
637 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
638 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
639 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
640 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
641 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
642 	};
643 
644 	/*
645 	 * Do it again, earlier dmar_tbl mapping could be mapped with
646 	 * fixed map.
647 	 */
648 	dmar_table_detect();
649 
650 	/*
651 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
652 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
653 	 */
654 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
655 
656 	dmar = (struct acpi_table_dmar *)dmar_tbl;
657 	if (!dmar)
658 		return -ENODEV;
659 
660 	if (dmar->width < PAGE_SHIFT - 1) {
661 		pr_warn("Invalid DMAR haw\n");
662 		return -EINVAL;
663 	}
664 
665 	pr_info("Host address width %d\n", dmar->width + 1);
666 	ret = dmar_walk_dmar_table(dmar, &cb);
667 	if (ret == 0 && drhd_count == 0)
668 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
669 
670 	return ret;
671 }
672 
673 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
674 				 int cnt, struct pci_dev *dev)
675 {
676 	int index;
677 	struct device *tmp;
678 
679 	while (dev) {
680 		for_each_active_dev_scope(devices, cnt, index, tmp)
681 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
682 				return 1;
683 
684 		/* Check our parent */
685 		dev = dev->bus->self;
686 	}
687 
688 	return 0;
689 }
690 
691 struct dmar_drhd_unit *
692 dmar_find_matched_drhd_unit(struct pci_dev *dev)
693 {
694 	struct dmar_drhd_unit *dmaru;
695 	struct acpi_dmar_hardware_unit *drhd;
696 
697 	dev = pci_physfn(dev);
698 
699 	rcu_read_lock();
700 	for_each_drhd_unit(dmaru) {
701 		drhd = container_of(dmaru->hdr,
702 				    struct acpi_dmar_hardware_unit,
703 				    header);
704 
705 		if (dmaru->include_all &&
706 		    drhd->segment == pci_domain_nr(dev->bus))
707 			goto out;
708 
709 		if (dmar_pci_device_match(dmaru->devices,
710 					  dmaru->devices_cnt, dev))
711 			goto out;
712 	}
713 	dmaru = NULL;
714 out:
715 	rcu_read_unlock();
716 
717 	return dmaru;
718 }
719 
720 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
721 					      struct acpi_device *adev)
722 {
723 	struct dmar_drhd_unit *dmaru;
724 	struct acpi_dmar_hardware_unit *drhd;
725 	struct acpi_dmar_device_scope *scope;
726 	struct device *tmp;
727 	int i;
728 	struct acpi_dmar_pci_path *path;
729 
730 	for_each_drhd_unit(dmaru) {
731 		drhd = container_of(dmaru->hdr,
732 				    struct acpi_dmar_hardware_unit,
733 				    header);
734 
735 		for (scope = (void *)(drhd + 1);
736 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
737 		     scope = ((void *)scope) + scope->length) {
738 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
739 				continue;
740 			if (scope->enumeration_id != device_number)
741 				continue;
742 
743 			path = (void *)(scope + 1);
744 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
745 				dev_name(&adev->dev), dmaru->reg_base_addr,
746 				scope->bus, path->device, path->function);
747 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
748 				if (tmp == NULL) {
749 					dmaru->devices[i].bus = scope->bus;
750 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
751 									    path->function);
752 					rcu_assign_pointer(dmaru->devices[i].dev,
753 							   get_device(&adev->dev));
754 					return;
755 				}
756 			BUG_ON(i >= dmaru->devices_cnt);
757 		}
758 	}
759 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
760 		device_number, dev_name(&adev->dev));
761 }
762 
763 static int __init dmar_acpi_dev_scope_init(void)
764 {
765 	struct acpi_dmar_andd *andd;
766 
767 	if (dmar_tbl == NULL)
768 		return -ENODEV;
769 
770 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
771 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
772 	     andd = ((void *)andd) + andd->header.length) {
773 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
774 			acpi_handle h;
775 			struct acpi_device *adev;
776 
777 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
778 							  andd->device_name,
779 							  &h))) {
780 				pr_err("Failed to find handle for ACPI object %s\n",
781 				       andd->device_name);
782 				continue;
783 			}
784 			if (acpi_bus_get_device(h, &adev)) {
785 				pr_err("Failed to get device for ACPI object %s\n",
786 				       andd->device_name);
787 				continue;
788 			}
789 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
790 		}
791 	}
792 	return 0;
793 }
794 
795 int __init dmar_dev_scope_init(void)
796 {
797 	struct pci_dev *dev = NULL;
798 	struct dmar_pci_notify_info *info;
799 
800 	if (dmar_dev_scope_status != 1)
801 		return dmar_dev_scope_status;
802 
803 	if (list_empty(&dmar_drhd_units)) {
804 		dmar_dev_scope_status = -ENODEV;
805 	} else {
806 		dmar_dev_scope_status = 0;
807 
808 		dmar_acpi_dev_scope_init();
809 
810 		for_each_pci_dev(dev) {
811 			if (dev->is_virtfn)
812 				continue;
813 
814 			info = dmar_alloc_pci_notify_info(dev,
815 					BUS_NOTIFY_ADD_DEVICE);
816 			if (!info) {
817 				return dmar_dev_scope_status;
818 			} else {
819 				dmar_pci_bus_add_dev(info);
820 				dmar_free_pci_notify_info(info);
821 			}
822 		}
823 	}
824 
825 	return dmar_dev_scope_status;
826 }
827 
828 void __init dmar_register_bus_notifier(void)
829 {
830 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
831 }
832 
833 
834 int __init dmar_table_init(void)
835 {
836 	static int dmar_table_initialized;
837 	int ret;
838 
839 	if (dmar_table_initialized == 0) {
840 		ret = parse_dmar_table();
841 		if (ret < 0) {
842 			if (ret != -ENODEV)
843 				pr_info("Parse DMAR table failure.\n");
844 		} else  if (list_empty(&dmar_drhd_units)) {
845 			pr_info("No DMAR devices found\n");
846 			ret = -ENODEV;
847 		}
848 
849 		if (ret < 0)
850 			dmar_table_initialized = ret;
851 		else
852 			dmar_table_initialized = 1;
853 	}
854 
855 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
856 }
857 
858 static void warn_invalid_dmar(u64 addr, const char *message)
859 {
860 	pr_warn_once(FW_BUG
861 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
862 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
863 		addr, message,
864 		dmi_get_system_info(DMI_BIOS_VENDOR),
865 		dmi_get_system_info(DMI_BIOS_VERSION),
866 		dmi_get_system_info(DMI_PRODUCT_VERSION));
867 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
868 }
869 
870 static int __ref
871 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
872 {
873 	struct acpi_dmar_hardware_unit *drhd;
874 	void __iomem *addr;
875 	u64 cap, ecap;
876 
877 	drhd = (void *)entry;
878 	if (!drhd->address) {
879 		warn_invalid_dmar(0, "");
880 		return -EINVAL;
881 	}
882 
883 	if (arg)
884 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
885 	else
886 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
887 	if (!addr) {
888 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
889 		return -EINVAL;
890 	}
891 
892 	cap = dmar_readq(addr + DMAR_CAP_REG);
893 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
894 
895 	if (arg)
896 		iounmap(addr);
897 	else
898 		early_iounmap(addr, VTD_PAGE_SIZE);
899 
900 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
901 		warn_invalid_dmar(drhd->address, " returns all ones");
902 		return -EINVAL;
903 	}
904 
905 	return 0;
906 }
907 
908 int __init detect_intel_iommu(void)
909 {
910 	int ret;
911 	struct dmar_res_callback validate_drhd_cb = {
912 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
913 		.ignore_unhandled = true,
914 	};
915 
916 	down_write(&dmar_global_lock);
917 	ret = dmar_table_detect();
918 	if (!ret)
919 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
920 					   &validate_drhd_cb);
921 	if (!ret && !no_iommu && !iommu_detected &&
922 	    (!dmar_disabled || dmar_platform_optin())) {
923 		iommu_detected = 1;
924 		/* Make sure ACS will be enabled */
925 		pci_request_acs();
926 	}
927 
928 #ifdef CONFIG_X86
929 	if (!ret) {
930 		x86_init.iommu.iommu_init = intel_iommu_init;
931 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
932 	}
933 
934 #endif
935 
936 	if (dmar_tbl) {
937 		acpi_put_table(dmar_tbl);
938 		dmar_tbl = NULL;
939 	}
940 	up_write(&dmar_global_lock);
941 
942 	return ret ? ret : 1;
943 }
944 
945 static void unmap_iommu(struct intel_iommu *iommu)
946 {
947 	iounmap(iommu->reg);
948 	release_mem_region(iommu->reg_phys, iommu->reg_size);
949 }
950 
951 /**
952  * map_iommu: map the iommu's registers
953  * @iommu: the iommu to map
954  * @phys_addr: the physical address of the base resgister
955  *
956  * Memory map the iommu's registers.  Start w/ a single page, and
957  * possibly expand if that turns out to be insufficent.
958  */
959 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
960 {
961 	int map_size, err=0;
962 
963 	iommu->reg_phys = phys_addr;
964 	iommu->reg_size = VTD_PAGE_SIZE;
965 
966 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
967 		pr_err("Can't reserve memory\n");
968 		err = -EBUSY;
969 		goto out;
970 	}
971 
972 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
973 	if (!iommu->reg) {
974 		pr_err("Can't map the region\n");
975 		err = -ENOMEM;
976 		goto release;
977 	}
978 
979 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
980 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
981 
982 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
983 		err = -EINVAL;
984 		warn_invalid_dmar(phys_addr, " returns all ones");
985 		goto unmap;
986 	}
987 	iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
988 
989 	/* the registers might be more than one page */
990 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
991 			 cap_max_fault_reg_offset(iommu->cap));
992 	map_size = VTD_PAGE_ALIGN(map_size);
993 	if (map_size > iommu->reg_size) {
994 		iounmap(iommu->reg);
995 		release_mem_region(iommu->reg_phys, iommu->reg_size);
996 		iommu->reg_size = map_size;
997 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
998 					iommu->name)) {
999 			pr_err("Can't reserve memory\n");
1000 			err = -EBUSY;
1001 			goto out;
1002 		}
1003 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1004 		if (!iommu->reg) {
1005 			pr_err("Can't map the region\n");
1006 			err = -ENOMEM;
1007 			goto release;
1008 		}
1009 	}
1010 	err = 0;
1011 	goto out;
1012 
1013 unmap:
1014 	iounmap(iommu->reg);
1015 release:
1016 	release_mem_region(iommu->reg_phys, iommu->reg_size);
1017 out:
1018 	return err;
1019 }
1020 
1021 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1022 {
1023 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1024 					    DMAR_UNITS_SUPPORTED);
1025 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1026 		iommu->seq_id = -1;
1027 	} else {
1028 		set_bit(iommu->seq_id, dmar_seq_ids);
1029 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1030 	}
1031 
1032 	return iommu->seq_id;
1033 }
1034 
1035 static void dmar_free_seq_id(struct intel_iommu *iommu)
1036 {
1037 	if (iommu->seq_id >= 0) {
1038 		clear_bit(iommu->seq_id, dmar_seq_ids);
1039 		iommu->seq_id = -1;
1040 	}
1041 }
1042 
1043 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1044 {
1045 	struct intel_iommu *iommu;
1046 	u32 ver, sts;
1047 	int agaw = -1;
1048 	int msagaw = -1;
1049 	int err;
1050 
1051 	if (!drhd->reg_base_addr) {
1052 		warn_invalid_dmar(0, "");
1053 		return -EINVAL;
1054 	}
1055 
1056 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1057 	if (!iommu)
1058 		return -ENOMEM;
1059 
1060 	if (dmar_alloc_seq_id(iommu) < 0) {
1061 		pr_err("Failed to allocate seq_id\n");
1062 		err = -ENOSPC;
1063 		goto error;
1064 	}
1065 
1066 	err = map_iommu(iommu, drhd->reg_base_addr);
1067 	if (err) {
1068 		pr_err("Failed to map %s\n", iommu->name);
1069 		goto error_free_seq_id;
1070 	}
1071 
1072 	err = -EINVAL;
1073 	if (cap_sagaw(iommu->cap) == 0) {
1074 		pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1075 			iommu->name);
1076 		drhd->ignored = 1;
1077 	}
1078 
1079 	if (!drhd->ignored) {
1080 		agaw = iommu_calculate_agaw(iommu);
1081 		if (agaw < 0) {
1082 			pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1083 			       iommu->seq_id);
1084 			drhd->ignored = 1;
1085 		}
1086 	}
1087 	if (!drhd->ignored) {
1088 		msagaw = iommu_calculate_max_sagaw(iommu);
1089 		if (msagaw < 0) {
1090 			pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1091 			       iommu->seq_id);
1092 			drhd->ignored = 1;
1093 			agaw = -1;
1094 		}
1095 	}
1096 	iommu->agaw = agaw;
1097 	iommu->msagaw = msagaw;
1098 	iommu->segment = drhd->segment;
1099 
1100 	iommu->node = NUMA_NO_NODE;
1101 
1102 	ver = readl(iommu->reg + DMAR_VER_REG);
1103 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1104 		iommu->name,
1105 		(unsigned long long)drhd->reg_base_addr,
1106 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1107 		(unsigned long long)iommu->cap,
1108 		(unsigned long long)iommu->ecap);
1109 
1110 	/* Reflect status in gcmd */
1111 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1112 	if (sts & DMA_GSTS_IRES)
1113 		iommu->gcmd |= DMA_GCMD_IRE;
1114 	if (sts & DMA_GSTS_TES)
1115 		iommu->gcmd |= DMA_GCMD_TE;
1116 	if (sts & DMA_GSTS_QIES)
1117 		iommu->gcmd |= DMA_GCMD_QIE;
1118 
1119 	raw_spin_lock_init(&iommu->register_lock);
1120 
1121 	/*
1122 	 * This is only for hotplug; at boot time intel_iommu_enabled won't
1123 	 * be set yet. When intel_iommu_init() runs, it registers the units
1124 	 * present at boot time, then sets intel_iommu_enabled.
1125 	 */
1126 	if (intel_iommu_enabled && !drhd->ignored) {
1127 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1128 					     intel_iommu_groups,
1129 					     "%s", iommu->name);
1130 		if (err)
1131 			goto err_unmap;
1132 
1133 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1134 
1135 		err = iommu_device_register(&iommu->iommu);
1136 		if (err)
1137 			goto err_unmap;
1138 	}
1139 
1140 	drhd->iommu = iommu;
1141 	iommu->drhd = drhd;
1142 
1143 	return 0;
1144 
1145 err_unmap:
1146 	unmap_iommu(iommu);
1147 error_free_seq_id:
1148 	dmar_free_seq_id(iommu);
1149 error:
1150 	kfree(iommu);
1151 	return err;
1152 }
1153 
1154 static void free_iommu(struct intel_iommu *iommu)
1155 {
1156 	if (intel_iommu_enabled && !iommu->drhd->ignored) {
1157 		iommu_device_unregister(&iommu->iommu);
1158 		iommu_device_sysfs_remove(&iommu->iommu);
1159 	}
1160 
1161 	if (iommu->irq) {
1162 		if (iommu->pr_irq) {
1163 			free_irq(iommu->pr_irq, iommu);
1164 			dmar_free_hwirq(iommu->pr_irq);
1165 			iommu->pr_irq = 0;
1166 		}
1167 		free_irq(iommu->irq, iommu);
1168 		dmar_free_hwirq(iommu->irq);
1169 		iommu->irq = 0;
1170 	}
1171 
1172 	if (iommu->qi) {
1173 		free_page((unsigned long)iommu->qi->desc);
1174 		kfree(iommu->qi->desc_status);
1175 		kfree(iommu->qi);
1176 	}
1177 
1178 	if (iommu->reg)
1179 		unmap_iommu(iommu);
1180 
1181 	dmar_free_seq_id(iommu);
1182 	kfree(iommu);
1183 }
1184 
1185 /*
1186  * Reclaim all the submitted descriptors which have completed its work.
1187  */
1188 static inline void reclaim_free_desc(struct q_inval *qi)
1189 {
1190 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1191 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1192 		qi->desc_status[qi->free_tail] = QI_FREE;
1193 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1194 		qi->free_cnt++;
1195 	}
1196 }
1197 
1198 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1199 {
1200 	u32 fault;
1201 	int head, tail;
1202 	struct q_inval *qi = iommu->qi;
1203 	int shift = qi_shift(iommu);
1204 
1205 	if (qi->desc_status[wait_index] == QI_ABORT)
1206 		return -EAGAIN;
1207 
1208 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1209 
1210 	/*
1211 	 * If IQE happens, the head points to the descriptor associated
1212 	 * with the error. No new descriptors are fetched until the IQE
1213 	 * is cleared.
1214 	 */
1215 	if (fault & DMA_FSTS_IQE) {
1216 		head = readl(iommu->reg + DMAR_IQH_REG);
1217 		if ((head >> shift) == index) {
1218 			struct qi_desc *desc = qi->desc + head;
1219 
1220 			/*
1221 			 * desc->qw2 and desc->qw3 are either reserved or
1222 			 * used by software as private data. We won't print
1223 			 * out these two qw's for security consideration.
1224 			 */
1225 			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1226 			       (unsigned long long)desc->qw0,
1227 			       (unsigned long long)desc->qw1);
1228 			memcpy(desc, qi->desc + (wait_index << shift),
1229 			       1 << shift);
1230 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1231 			return -EINVAL;
1232 		}
1233 	}
1234 
1235 	/*
1236 	 * If ITE happens, all pending wait_desc commands are aborted.
1237 	 * No new descriptors are fetched until the ITE is cleared.
1238 	 */
1239 	if (fault & DMA_FSTS_ITE) {
1240 		head = readl(iommu->reg + DMAR_IQH_REG);
1241 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1242 		head |= 1;
1243 		tail = readl(iommu->reg + DMAR_IQT_REG);
1244 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1245 
1246 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1247 
1248 		do {
1249 			if (qi->desc_status[head] == QI_IN_USE)
1250 				qi->desc_status[head] = QI_ABORT;
1251 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1252 		} while (head != tail);
1253 
1254 		if (qi->desc_status[wait_index] == QI_ABORT)
1255 			return -EAGAIN;
1256 	}
1257 
1258 	if (fault & DMA_FSTS_ICE)
1259 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1260 
1261 	return 0;
1262 }
1263 
1264 /*
1265  * Function to submit invalidation descriptors of all types to the queued
1266  * invalidation interface(QI). Multiple descriptors can be submitted at a
1267  * time, a wait descriptor will be appended to each submission to ensure
1268  * hardware has completed the invalidation before return. Wait descriptors
1269  * can be part of the submission but it will not be polled for completion.
1270  */
1271 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1272 		   unsigned int count, unsigned long options)
1273 {
1274 	struct q_inval *qi = iommu->qi;
1275 	struct qi_desc wait_desc;
1276 	int wait_index, index;
1277 	unsigned long flags;
1278 	int offset, shift;
1279 	int rc, i;
1280 
1281 	if (!qi)
1282 		return 0;
1283 
1284 restart:
1285 	rc = 0;
1286 
1287 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1288 	/*
1289 	 * Check if we have enough empty slots in the queue to submit,
1290 	 * the calculation is based on:
1291 	 * # of desc + 1 wait desc + 1 space between head and tail
1292 	 */
1293 	while (qi->free_cnt < count + 2) {
1294 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1295 		cpu_relax();
1296 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1297 	}
1298 
1299 	index = qi->free_head;
1300 	wait_index = (index + count) % QI_LENGTH;
1301 	shift = qi_shift(iommu);
1302 
1303 	for (i = 0; i < count; i++) {
1304 		offset = ((index + i) % QI_LENGTH) << shift;
1305 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1306 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1307 	}
1308 	qi->desc_status[wait_index] = QI_IN_USE;
1309 
1310 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1311 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1312 	if (options & QI_OPT_WAIT_DRAIN)
1313 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1314 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1315 	wait_desc.qw2 = 0;
1316 	wait_desc.qw3 = 0;
1317 
1318 	offset = wait_index << shift;
1319 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1320 
1321 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1322 	qi->free_cnt -= count + 1;
1323 
1324 	/*
1325 	 * update the HW tail register indicating the presence of
1326 	 * new descriptors.
1327 	 */
1328 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1329 
1330 	while (qi->desc_status[wait_index] != QI_DONE) {
1331 		/*
1332 		 * We will leave the interrupts disabled, to prevent interrupt
1333 		 * context to queue another cmd while a cmd is already submitted
1334 		 * and waiting for completion on this cpu. This is to avoid
1335 		 * a deadlock where the interrupt context can wait indefinitely
1336 		 * for free slots in the queue.
1337 		 */
1338 		rc = qi_check_fault(iommu, index, wait_index);
1339 		if (rc)
1340 			break;
1341 
1342 		raw_spin_unlock(&qi->q_lock);
1343 		cpu_relax();
1344 		raw_spin_lock(&qi->q_lock);
1345 	}
1346 
1347 	for (i = 0; i < count; i++)
1348 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1349 
1350 	reclaim_free_desc(qi);
1351 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1352 
1353 	if (rc == -EAGAIN)
1354 		goto restart;
1355 
1356 	return rc;
1357 }
1358 
1359 /*
1360  * Flush the global interrupt entry cache.
1361  */
1362 void qi_global_iec(struct intel_iommu *iommu)
1363 {
1364 	struct qi_desc desc;
1365 
1366 	desc.qw0 = QI_IEC_TYPE;
1367 	desc.qw1 = 0;
1368 	desc.qw2 = 0;
1369 	desc.qw3 = 0;
1370 
1371 	/* should never fail */
1372 	qi_submit_sync(iommu, &desc, 1, 0);
1373 }
1374 
1375 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1376 		      u64 type)
1377 {
1378 	struct qi_desc desc;
1379 
1380 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1381 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1382 	desc.qw1 = 0;
1383 	desc.qw2 = 0;
1384 	desc.qw3 = 0;
1385 
1386 	qi_submit_sync(iommu, &desc, 1, 0);
1387 }
1388 
1389 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1390 		    unsigned int size_order, u64 type)
1391 {
1392 	u8 dw = 0, dr = 0;
1393 
1394 	struct qi_desc desc;
1395 	int ih = 0;
1396 
1397 	if (cap_write_drain(iommu->cap))
1398 		dw = 1;
1399 
1400 	if (cap_read_drain(iommu->cap))
1401 		dr = 1;
1402 
1403 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1404 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1405 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1406 		| QI_IOTLB_AM(size_order);
1407 	desc.qw2 = 0;
1408 	desc.qw3 = 0;
1409 
1410 	qi_submit_sync(iommu, &desc, 1, 0);
1411 }
1412 
1413 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1414 			u16 qdep, u64 addr, unsigned mask)
1415 {
1416 	struct qi_desc desc;
1417 
1418 	if (mask) {
1419 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1420 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1421 	} else
1422 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1423 
1424 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1425 		qdep = 0;
1426 
1427 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1428 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1429 	desc.qw2 = 0;
1430 	desc.qw3 = 0;
1431 
1432 	qi_submit_sync(iommu, &desc, 1, 0);
1433 }
1434 
1435 /* PASID-based IOTLB invalidation */
1436 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1437 		     unsigned long npages, bool ih)
1438 {
1439 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1440 
1441 	/*
1442 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1443 	 * a positive value for Page-selective-within-PASID invalidation.
1444 	 * 0 is not a valid input.
1445 	 */
1446 	if (WARN_ON(!npages)) {
1447 		pr_err("Invalid input npages = %ld\n", npages);
1448 		return;
1449 	}
1450 
1451 	if (npages == -1) {
1452 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1453 				QI_EIOTLB_DID(did) |
1454 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1455 				QI_EIOTLB_TYPE;
1456 		desc.qw1 = 0;
1457 	} else {
1458 		int mask = ilog2(__roundup_pow_of_two(npages));
1459 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1460 
1461 		if (WARN_ON_ONCE(!ALIGN(addr, align)))
1462 			addr &= ~(align - 1);
1463 
1464 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1465 				QI_EIOTLB_DID(did) |
1466 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1467 				QI_EIOTLB_TYPE;
1468 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1469 				QI_EIOTLB_IH(ih) |
1470 				QI_EIOTLB_AM(mask);
1471 	}
1472 
1473 	qi_submit_sync(iommu, &desc, 1, 0);
1474 }
1475 
1476 /* PASID-based device IOTLB Invalidate */
1477 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1478 			      u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1479 {
1480 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1481 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1482 
1483 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1484 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1485 		QI_DEV_IOTLB_PFSID(pfsid);
1486 
1487 	/*
1488 	 * If S bit is 0, we only flush a single page. If S bit is set,
1489 	 * The least significant zero bit indicates the invalidation address
1490 	 * range. VT-d spec 6.5.2.6.
1491 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1492 	 * size order = 0 is PAGE_SIZE 4KB
1493 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1494 	 * ECAP.
1495 	 */
1496 	if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
1497 		pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1498 				    addr, size_order);
1499 
1500 	/* Take page address */
1501 	desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1502 
1503 	if (size_order) {
1504 		/*
1505 		 * Existing 0s in address below size_order may be the least
1506 		 * significant bit, we must set them to 1s to avoid having
1507 		 * smaller size than desired.
1508 		 */
1509 		desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1510 					VTD_PAGE_SHIFT);
1511 		/* Clear size_order bit to indicate size */
1512 		desc.qw1 &= ~mask;
1513 		/* Set the S bit to indicate flushing more than 1 page */
1514 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1515 	}
1516 
1517 	qi_submit_sync(iommu, &desc, 1, 0);
1518 }
1519 
1520 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1521 			  u64 granu, u32 pasid)
1522 {
1523 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1524 
1525 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1526 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1527 	qi_submit_sync(iommu, &desc, 1, 0);
1528 }
1529 
1530 /*
1531  * Disable Queued Invalidation interface.
1532  */
1533 void dmar_disable_qi(struct intel_iommu *iommu)
1534 {
1535 	unsigned long flags;
1536 	u32 sts;
1537 	cycles_t start_time = get_cycles();
1538 
1539 	if (!ecap_qis(iommu->ecap))
1540 		return;
1541 
1542 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1543 
1544 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1545 	if (!(sts & DMA_GSTS_QIES))
1546 		goto end;
1547 
1548 	/*
1549 	 * Give a chance to HW to complete the pending invalidation requests.
1550 	 */
1551 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1552 		readl(iommu->reg + DMAR_IQH_REG)) &&
1553 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1554 		cpu_relax();
1555 
1556 	iommu->gcmd &= ~DMA_GCMD_QIE;
1557 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1558 
1559 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1560 		      !(sts & DMA_GSTS_QIES), sts);
1561 end:
1562 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1563 }
1564 
1565 /*
1566  * Enable queued invalidation.
1567  */
1568 static void __dmar_enable_qi(struct intel_iommu *iommu)
1569 {
1570 	u32 sts;
1571 	unsigned long flags;
1572 	struct q_inval *qi = iommu->qi;
1573 	u64 val = virt_to_phys(qi->desc);
1574 
1575 	qi->free_head = qi->free_tail = 0;
1576 	qi->free_cnt = QI_LENGTH;
1577 
1578 	/*
1579 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1580 	 * is present.
1581 	 */
1582 	if (ecap_smts(iommu->ecap))
1583 		val |= (1 << 11) | 1;
1584 
1585 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1586 
1587 	/* write zero to the tail reg */
1588 	writel(0, iommu->reg + DMAR_IQT_REG);
1589 
1590 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1591 
1592 	iommu->gcmd |= DMA_GCMD_QIE;
1593 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1594 
1595 	/* Make sure hardware complete it */
1596 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1597 
1598 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1599 }
1600 
1601 /*
1602  * Enable Queued Invalidation interface. This is a must to support
1603  * interrupt-remapping. Also used by DMA-remapping, which replaces
1604  * register based IOTLB invalidation.
1605  */
1606 int dmar_enable_qi(struct intel_iommu *iommu)
1607 {
1608 	struct q_inval *qi;
1609 	struct page *desc_page;
1610 
1611 	if (!ecap_qis(iommu->ecap))
1612 		return -ENOENT;
1613 
1614 	/*
1615 	 * queued invalidation is already setup and enabled.
1616 	 */
1617 	if (iommu->qi)
1618 		return 0;
1619 
1620 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1621 	if (!iommu->qi)
1622 		return -ENOMEM;
1623 
1624 	qi = iommu->qi;
1625 
1626 	/*
1627 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1628 	 * if the remapping hardware supports scalable mode translation.
1629 	 */
1630 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1631 				     !!ecap_smts(iommu->ecap));
1632 	if (!desc_page) {
1633 		kfree(qi);
1634 		iommu->qi = NULL;
1635 		return -ENOMEM;
1636 	}
1637 
1638 	qi->desc = page_address(desc_page);
1639 
1640 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1641 	if (!qi->desc_status) {
1642 		free_page((unsigned long) qi->desc);
1643 		kfree(qi);
1644 		iommu->qi = NULL;
1645 		return -ENOMEM;
1646 	}
1647 
1648 	raw_spin_lock_init(&qi->q_lock);
1649 
1650 	__dmar_enable_qi(iommu);
1651 
1652 	return 0;
1653 }
1654 
1655 /* iommu interrupt handling. Most stuff are MSI-like. */
1656 
1657 enum faulttype {
1658 	DMA_REMAP,
1659 	INTR_REMAP,
1660 	UNKNOWN,
1661 };
1662 
1663 static const char *dma_remap_fault_reasons[] =
1664 {
1665 	"Software",
1666 	"Present bit in root entry is clear",
1667 	"Present bit in context entry is clear",
1668 	"Invalid context entry",
1669 	"Access beyond MGAW",
1670 	"PTE Write access is not set",
1671 	"PTE Read access is not set",
1672 	"Next page table ptr is invalid",
1673 	"Root table address invalid",
1674 	"Context table ptr is invalid",
1675 	"non-zero reserved fields in RTP",
1676 	"non-zero reserved fields in CTP",
1677 	"non-zero reserved fields in PTE",
1678 	"PCE for translation request specifies blocking",
1679 };
1680 
1681 static const char * const dma_remap_sm_fault_reasons[] = {
1682 	"SM: Invalid Root Table Address",
1683 	"SM: TTM 0 for request with PASID",
1684 	"SM: TTM 0 for page group request",
1685 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1686 	"SM: Error attempting to access Root Entry",
1687 	"SM: Present bit in Root Entry is clear",
1688 	"SM: Non-zero reserved field set in Root Entry",
1689 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1690 	"SM: Error attempting to access Context Entry",
1691 	"SM: Present bit in Context Entry is clear",
1692 	"SM: Non-zero reserved field set in the Context Entry",
1693 	"SM: Invalid Context Entry",
1694 	"SM: DTE field in Context Entry is clear",
1695 	"SM: PASID Enable field in Context Entry is clear",
1696 	"SM: PASID is larger than the max in Context Entry",
1697 	"SM: PRE field in Context-Entry is clear",
1698 	"SM: RID_PASID field error in Context-Entry",
1699 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1700 	"SM: Error attempting to access the PASID Directory Entry",
1701 	"SM: Present bit in Directory Entry is clear",
1702 	"SM: Non-zero reserved field set in PASID Directory Entry",
1703 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1704 	"SM: Error attempting to access PASID Table Entry",
1705 	"SM: Present bit in PASID Table Entry is clear",
1706 	"SM: Non-zero reserved field set in PASID Table Entry",
1707 	"SM: Invalid Scalable-Mode PASID Table Entry",
1708 	"SM: ERE field is clear in PASID Table Entry",
1709 	"SM: SRE field is clear in PASID Table Entry",
1710 	"Unknown", "Unknown",/* 0x5E-0x5F */
1711 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1712 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1713 	"SM: Error attempting to access first-level paging entry",
1714 	"SM: Present bit in first-level paging entry is clear",
1715 	"SM: Non-zero reserved field set in first-level paging entry",
1716 	"SM: Error attempting to access FL-PML4 entry",
1717 	"SM: First-level entry address beyond MGAW in Nested translation",
1718 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1719 	"SM: Read permission error in first-level paging entry in Nested translation",
1720 	"SM: Write permission error in first-level paging entry in Nested translation",
1721 	"SM: Error attempting to access second-level paging entry",
1722 	"SM: Read/Write permission error in second-level paging entry",
1723 	"SM: Non-zero reserved field set in second-level paging entry",
1724 	"SM: Invalid second-level page table pointer",
1725 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1726 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1727 	"SM: Address in first-level translation is not canonical",
1728 	"SM: U/S set 0 for first-level translation with user privilege",
1729 	"SM: No execute permission for request with PASID and ER=1",
1730 	"SM: Address beyond the DMA hardware max",
1731 	"SM: Second-level entry address beyond the max",
1732 	"SM: No write permission for Write/AtomicOp request",
1733 	"SM: No read permission for Read/AtomicOp request",
1734 	"SM: Invalid address-interrupt address",
1735 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1736 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1737 };
1738 
1739 static const char *irq_remap_fault_reasons[] =
1740 {
1741 	"Detected reserved fields in the decoded interrupt-remapped request",
1742 	"Interrupt index exceeded the interrupt-remapping table size",
1743 	"Present field in the IRTE entry is clear",
1744 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1745 	"Detected reserved fields in the IRTE entry",
1746 	"Blocked a compatibility format interrupt request",
1747 	"Blocked an interrupt request due to source-id verification failure",
1748 };
1749 
1750 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1751 {
1752 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1753 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1754 		*fault_type = INTR_REMAP;
1755 		return irq_remap_fault_reasons[fault_reason - 0x20];
1756 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1757 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1758 		*fault_type = DMA_REMAP;
1759 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1760 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1761 		*fault_type = DMA_REMAP;
1762 		return dma_remap_fault_reasons[fault_reason];
1763 	} else {
1764 		*fault_type = UNKNOWN;
1765 		return "Unknown";
1766 	}
1767 }
1768 
1769 
1770 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1771 {
1772 	if (iommu->irq == irq)
1773 		return DMAR_FECTL_REG;
1774 	else if (iommu->pr_irq == irq)
1775 		return DMAR_PECTL_REG;
1776 	else
1777 		BUG();
1778 }
1779 
1780 void dmar_msi_unmask(struct irq_data *data)
1781 {
1782 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1783 	int reg = dmar_msi_reg(iommu, data->irq);
1784 	unsigned long flag;
1785 
1786 	/* unmask it */
1787 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1788 	writel(0, iommu->reg + reg);
1789 	/* Read a reg to force flush the post write */
1790 	readl(iommu->reg + reg);
1791 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1792 }
1793 
1794 void dmar_msi_mask(struct irq_data *data)
1795 {
1796 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1797 	int reg = dmar_msi_reg(iommu, data->irq);
1798 	unsigned long flag;
1799 
1800 	/* mask it */
1801 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1802 	writel(DMA_FECTL_IM, iommu->reg + reg);
1803 	/* Read a reg to force flush the post write */
1804 	readl(iommu->reg + reg);
1805 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1806 }
1807 
1808 void dmar_msi_write(int irq, struct msi_msg *msg)
1809 {
1810 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1811 	int reg = dmar_msi_reg(iommu, irq);
1812 	unsigned long flag;
1813 
1814 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1815 	writel(msg->data, iommu->reg + reg + 4);
1816 	writel(msg->address_lo, iommu->reg + reg + 8);
1817 	writel(msg->address_hi, iommu->reg + reg + 12);
1818 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1819 }
1820 
1821 void dmar_msi_read(int irq, struct msi_msg *msg)
1822 {
1823 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1824 	int reg = dmar_msi_reg(iommu, irq);
1825 	unsigned long flag;
1826 
1827 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1828 	msg->data = readl(iommu->reg + reg + 4);
1829 	msg->address_lo = readl(iommu->reg + reg + 8);
1830 	msg->address_hi = readl(iommu->reg + reg + 12);
1831 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1832 }
1833 
1834 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1835 		u8 fault_reason, u32 pasid, u16 source_id,
1836 		unsigned long long addr)
1837 {
1838 	const char *reason;
1839 	int fault_type;
1840 
1841 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1842 
1843 	if (fault_type == INTR_REMAP)
1844 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1845 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1846 			PCI_FUNC(source_id & 0xFF), addr >> 48,
1847 			fault_reason, reason);
1848 	else
1849 		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1850 		       type ? "DMA Read" : "DMA Write",
1851 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1852 		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1853 		       fault_reason, reason);
1854 	return 0;
1855 }
1856 
1857 #define PRIMARY_FAULT_REG_LEN (16)
1858 irqreturn_t dmar_fault(int irq, void *dev_id)
1859 {
1860 	struct intel_iommu *iommu = dev_id;
1861 	int reg, fault_index;
1862 	u32 fault_status;
1863 	unsigned long flag;
1864 	static DEFINE_RATELIMIT_STATE(rs,
1865 				      DEFAULT_RATELIMIT_INTERVAL,
1866 				      DEFAULT_RATELIMIT_BURST);
1867 
1868 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1869 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1870 	if (fault_status && __ratelimit(&rs))
1871 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1872 
1873 	/* TBD: ignore advanced fault log currently */
1874 	if (!(fault_status & DMA_FSTS_PPF))
1875 		goto unlock_exit;
1876 
1877 	fault_index = dma_fsts_fault_record_index(fault_status);
1878 	reg = cap_fault_reg_offset(iommu->cap);
1879 	while (1) {
1880 		/* Disable printing, simply clear the fault when ratelimited */
1881 		bool ratelimited = !__ratelimit(&rs);
1882 		u8 fault_reason;
1883 		u16 source_id;
1884 		u64 guest_addr;
1885 		u32 pasid;
1886 		int type;
1887 		u32 data;
1888 		bool pasid_present;
1889 
1890 		/* highest 32 bits */
1891 		data = readl(iommu->reg + reg +
1892 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1893 		if (!(data & DMA_FRCD_F))
1894 			break;
1895 
1896 		if (!ratelimited) {
1897 			fault_reason = dma_frcd_fault_reason(data);
1898 			type = dma_frcd_type(data);
1899 
1900 			pasid = dma_frcd_pasid_value(data);
1901 			data = readl(iommu->reg + reg +
1902 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1903 			source_id = dma_frcd_source_id(data);
1904 
1905 			pasid_present = dma_frcd_pasid_present(data);
1906 			guest_addr = dmar_readq(iommu->reg + reg +
1907 					fault_index * PRIMARY_FAULT_REG_LEN);
1908 			guest_addr = dma_frcd_page_addr(guest_addr);
1909 		}
1910 
1911 		/* clear the fault */
1912 		writel(DMA_FRCD_F, iommu->reg + reg +
1913 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1914 
1915 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1916 
1917 		if (!ratelimited)
1918 			/* Using pasid -1 if pasid is not present */
1919 			dmar_fault_do_one(iommu, type, fault_reason,
1920 					  pasid_present ? pasid : -1,
1921 					  source_id, guest_addr);
1922 
1923 		fault_index++;
1924 		if (fault_index >= cap_num_fault_regs(iommu->cap))
1925 			fault_index = 0;
1926 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1927 	}
1928 
1929 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1930 	       iommu->reg + DMAR_FSTS_REG);
1931 
1932 unlock_exit:
1933 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1934 	return IRQ_HANDLED;
1935 }
1936 
1937 int dmar_set_interrupt(struct intel_iommu *iommu)
1938 {
1939 	int irq, ret;
1940 
1941 	/*
1942 	 * Check if the fault interrupt is already initialized.
1943 	 */
1944 	if (iommu->irq)
1945 		return 0;
1946 
1947 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1948 	if (irq > 0) {
1949 		iommu->irq = irq;
1950 	} else {
1951 		pr_err("No free IRQ vectors\n");
1952 		return -EINVAL;
1953 	}
1954 
1955 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1956 	if (ret)
1957 		pr_err("Can't request irq\n");
1958 	return ret;
1959 }
1960 
1961 int __init enable_drhd_fault_handling(void)
1962 {
1963 	struct dmar_drhd_unit *drhd;
1964 	struct intel_iommu *iommu;
1965 
1966 	/*
1967 	 * Enable fault control interrupt.
1968 	 */
1969 	for_each_iommu(iommu, drhd) {
1970 		u32 fault_status;
1971 		int ret = dmar_set_interrupt(iommu);
1972 
1973 		if (ret) {
1974 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1975 			       (unsigned long long)drhd->reg_base_addr, ret);
1976 			return -1;
1977 		}
1978 
1979 		/*
1980 		 * Clear any previous faults.
1981 		 */
1982 		dmar_fault(iommu->irq, iommu);
1983 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1984 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1985 	}
1986 
1987 	return 0;
1988 }
1989 
1990 /*
1991  * Re-enable Queued Invalidation interface.
1992  */
1993 int dmar_reenable_qi(struct intel_iommu *iommu)
1994 {
1995 	if (!ecap_qis(iommu->ecap))
1996 		return -ENOENT;
1997 
1998 	if (!iommu->qi)
1999 		return -ENOENT;
2000 
2001 	/*
2002 	 * First disable queued invalidation.
2003 	 */
2004 	dmar_disable_qi(iommu);
2005 	/*
2006 	 * Then enable queued invalidation again. Since there is no pending
2007 	 * invalidation requests now, it's safe to re-enable queued
2008 	 * invalidation.
2009 	 */
2010 	__dmar_enable_qi(iommu);
2011 
2012 	return 0;
2013 }
2014 
2015 /*
2016  * Check interrupt remapping support in DMAR table description.
2017  */
2018 int __init dmar_ir_support(void)
2019 {
2020 	struct acpi_table_dmar *dmar;
2021 	dmar = (struct acpi_table_dmar *)dmar_tbl;
2022 	if (!dmar)
2023 		return 0;
2024 	return dmar->flags & 0x1;
2025 }
2026 
2027 /* Check whether DMAR units are in use */
2028 static inline bool dmar_in_use(void)
2029 {
2030 	return irq_remapping_enabled || intel_iommu_enabled;
2031 }
2032 
2033 static int __init dmar_free_unused_resources(void)
2034 {
2035 	struct dmar_drhd_unit *dmaru, *dmaru_n;
2036 
2037 	if (dmar_in_use())
2038 		return 0;
2039 
2040 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2041 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2042 
2043 	down_write(&dmar_global_lock);
2044 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2045 		list_del(&dmaru->list);
2046 		dmar_free_drhd(dmaru);
2047 	}
2048 	up_write(&dmar_global_lock);
2049 
2050 	return 0;
2051 }
2052 
2053 late_initcall(dmar_free_unused_resources);
2054 IOMMU_INIT_POST(detect_intel_iommu);
2055 
2056 /*
2057  * DMAR Hotplug Support
2058  * For more details, please refer to Intel(R) Virtualization Technology
2059  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2060  * "Remapping Hardware Unit Hot Plug".
2061  */
2062 static guid_t dmar_hp_guid =
2063 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2064 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2065 
2066 /*
2067  * Currently there's only one revision and BIOS will not check the revision id,
2068  * so use 0 for safety.
2069  */
2070 #define	DMAR_DSM_REV_ID			0
2071 #define	DMAR_DSM_FUNC_DRHD		1
2072 #define	DMAR_DSM_FUNC_ATSR		2
2073 #define	DMAR_DSM_FUNC_RHSA		3
2074 
2075 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2076 {
2077 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2078 }
2079 
2080 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2081 				  dmar_res_handler_t handler, void *arg)
2082 {
2083 	int ret = -ENODEV;
2084 	union acpi_object *obj;
2085 	struct acpi_dmar_header *start;
2086 	struct dmar_res_callback callback;
2087 	static int res_type[] = {
2088 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2089 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2090 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2091 	};
2092 
2093 	if (!dmar_detect_dsm(handle, func))
2094 		return 0;
2095 
2096 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2097 				      func, NULL, ACPI_TYPE_BUFFER);
2098 	if (!obj)
2099 		return -ENODEV;
2100 
2101 	memset(&callback, 0, sizeof(callback));
2102 	callback.cb[res_type[func]] = handler;
2103 	callback.arg[res_type[func]] = arg;
2104 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2105 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2106 
2107 	ACPI_FREE(obj);
2108 
2109 	return ret;
2110 }
2111 
2112 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2113 {
2114 	int ret;
2115 	struct dmar_drhd_unit *dmaru;
2116 
2117 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2118 	if (!dmaru)
2119 		return -ENODEV;
2120 
2121 	ret = dmar_ir_hotplug(dmaru, true);
2122 	if (ret == 0)
2123 		ret = dmar_iommu_hotplug(dmaru, true);
2124 
2125 	return ret;
2126 }
2127 
2128 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2129 {
2130 	int i, ret;
2131 	struct device *dev;
2132 	struct dmar_drhd_unit *dmaru;
2133 
2134 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2135 	if (!dmaru)
2136 		return 0;
2137 
2138 	/*
2139 	 * All PCI devices managed by this unit should have been destroyed.
2140 	 */
2141 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2142 		for_each_active_dev_scope(dmaru->devices,
2143 					  dmaru->devices_cnt, i, dev)
2144 			return -EBUSY;
2145 	}
2146 
2147 	ret = dmar_ir_hotplug(dmaru, false);
2148 	if (ret == 0)
2149 		ret = dmar_iommu_hotplug(dmaru, false);
2150 
2151 	return ret;
2152 }
2153 
2154 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2155 {
2156 	struct dmar_drhd_unit *dmaru;
2157 
2158 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2159 	if (dmaru) {
2160 		list_del_rcu(&dmaru->list);
2161 		synchronize_rcu();
2162 		dmar_free_drhd(dmaru);
2163 	}
2164 
2165 	return 0;
2166 }
2167 
2168 static int dmar_hotplug_insert(acpi_handle handle)
2169 {
2170 	int ret;
2171 	int drhd_count = 0;
2172 
2173 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2174 				     &dmar_validate_one_drhd, (void *)1);
2175 	if (ret)
2176 		goto out;
2177 
2178 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2179 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2180 	if (ret == 0 && drhd_count == 0) {
2181 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2182 		goto out;
2183 	} else if (ret) {
2184 		goto release_drhd;
2185 	}
2186 
2187 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2188 				     &dmar_parse_one_rhsa, NULL);
2189 	if (ret)
2190 		goto release_drhd;
2191 
2192 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2193 				     &dmar_parse_one_atsr, NULL);
2194 	if (ret)
2195 		goto release_atsr;
2196 
2197 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2198 				     &dmar_hp_add_drhd, NULL);
2199 	if (!ret)
2200 		return 0;
2201 
2202 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2203 			       &dmar_hp_remove_drhd, NULL);
2204 release_atsr:
2205 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2206 			       &dmar_release_one_atsr, NULL);
2207 release_drhd:
2208 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2209 			       &dmar_hp_release_drhd, NULL);
2210 out:
2211 	return ret;
2212 }
2213 
2214 static int dmar_hotplug_remove(acpi_handle handle)
2215 {
2216 	int ret;
2217 
2218 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2219 				     &dmar_check_one_atsr, NULL);
2220 	if (ret)
2221 		return ret;
2222 
2223 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2224 				     &dmar_hp_remove_drhd, NULL);
2225 	if (ret == 0) {
2226 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2227 					       &dmar_release_one_atsr, NULL));
2228 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2229 					       &dmar_hp_release_drhd, NULL));
2230 	} else {
2231 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2232 				       &dmar_hp_add_drhd, NULL);
2233 	}
2234 
2235 	return ret;
2236 }
2237 
2238 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2239 				       void *context, void **retval)
2240 {
2241 	acpi_handle *phdl = retval;
2242 
2243 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2244 		*phdl = handle;
2245 		return AE_CTRL_TERMINATE;
2246 	}
2247 
2248 	return AE_OK;
2249 }
2250 
2251 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2252 {
2253 	int ret;
2254 	acpi_handle tmp = NULL;
2255 	acpi_status status;
2256 
2257 	if (!dmar_in_use())
2258 		return 0;
2259 
2260 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2261 		tmp = handle;
2262 	} else {
2263 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2264 					     ACPI_UINT32_MAX,
2265 					     dmar_get_dsm_handle,
2266 					     NULL, NULL, &tmp);
2267 		if (ACPI_FAILURE(status)) {
2268 			pr_warn("Failed to locate _DSM method.\n");
2269 			return -ENXIO;
2270 		}
2271 	}
2272 	if (tmp == NULL)
2273 		return 0;
2274 
2275 	down_write(&dmar_global_lock);
2276 	if (insert)
2277 		ret = dmar_hotplug_insert(tmp);
2278 	else
2279 		ret = dmar_hotplug_remove(tmp);
2280 	up_write(&dmar_global_lock);
2281 
2282 	return ret;
2283 }
2284 
2285 int dmar_device_add(acpi_handle handle)
2286 {
2287 	return dmar_device_hotplug(handle, true);
2288 }
2289 
2290 int dmar_device_remove(acpi_handle handle)
2291 {
2292 	return dmar_device_hotplug(handle, false);
2293 }
2294 
2295 /*
2296  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2297  *
2298  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2299  * the ACPI DMAR table. This means that the platform boot firmware has made
2300  * sure no device can issue DMA outside of RMRR regions.
2301  */
2302 bool dmar_platform_optin(void)
2303 {
2304 	struct acpi_table_dmar *dmar;
2305 	acpi_status status;
2306 	bool ret;
2307 
2308 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2309 				(struct acpi_table_header **)&dmar);
2310 	if (ACPI_FAILURE(status))
2311 		return false;
2312 
2313 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2314 	acpi_put_table((struct acpi_table_header *)dmar);
2315 
2316 	return ret;
2317 }
2318 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2319