xref: /openbmc/linux/drivers/iommu/intel/dmar.c (revision fa538f7cf05aab61cd91e01c160d4a09c81b8ffe)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 
35 #include "../irq_remapping.h"
36 
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41 	bool			ignore_unhandled;
42 	bool			print_entry;
43 };
44 
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59 
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66 
67 extern const struct iommu_ops intel_iommu_ops;
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			pr_warn("Out of memory when allocating notify_info "
151 				"for %s.\n", pci_name(dev));
152 			if (dmar_dev_scope_status == 0)
153 				dmar_dev_scope_status = -ENOMEM;
154 			return NULL;
155 		}
156 	}
157 
158 	info->event = event;
159 	info->dev = dev;
160 	info->seg = pci_domain_nr(dev->bus);
161 	info->level = level;
162 	if (event == BUS_NOTIFY_ADD_DEVICE) {
163 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164 			level--;
165 			info->path[level].bus = tmp->bus->number;
166 			info->path[level].device = PCI_SLOT(tmp->devfn);
167 			info->path[level].function = PCI_FUNC(tmp->devfn);
168 			if (pci_is_root_bus(tmp->bus))
169 				info->bus = tmp->bus->number;
170 		}
171 	}
172 
173 	return info;
174 }
175 
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178 	if ((void *)info != dmar_pci_notify_info_buf)
179 		kfree(info);
180 }
181 
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183 				struct acpi_dmar_pci_path *path, int count)
184 {
185 	int i;
186 
187 	if (info->bus != bus)
188 		goto fallback;
189 	if (info->level != count)
190 		goto fallback;
191 
192 	for (i = 0; i < count; i++) {
193 		if (path[i].device != info->path[i].device ||
194 		    path[i].function != info->path[i].function)
195 			goto fallback;
196 	}
197 
198 	return true;
199 
200 fallback:
201 
202 	if (count != 1)
203 		return false;
204 
205 	i = info->level - 1;
206 	if (bus              == info->path[i].bus &&
207 	    path[0].device   == info->path[i].device &&
208 	    path[0].function == info->path[i].function) {
209 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210 			bus, path[0].device, path[0].function);
211 		return true;
212 	}
213 
214 	return false;
215 }
216 
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219 			  void *start, void*end, u16 segment,
220 			  struct dmar_dev_scope *devices,
221 			  int devices_cnt)
222 {
223 	int i, level;
224 	struct device *tmp, *dev = &info->dev->dev;
225 	struct acpi_dmar_device_scope *scope;
226 	struct acpi_dmar_pci_path *path;
227 
228 	if (segment != info->seg)
229 		return 0;
230 
231 	for (; start < end; start += scope->length) {
232 		scope = start;
233 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235 			continue;
236 
237 		path = (struct acpi_dmar_pci_path *)(scope + 1);
238 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239 		if (!dmar_match_pci_path(info, scope->bus, path, level))
240 			continue;
241 
242 		/*
243 		 * We expect devices with endpoint scope to have normal PCI
244 		 * headers, and devices with bridge scope to have bridge PCI
245 		 * headers.  However PCI NTB devices may be listed in the
246 		 * DMAR table with bridge scope, even though they have a
247 		 * normal PCI header.  NTB devices are identified by class
248 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249 		 * for this special case.
250 		 */
251 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256 			pr_warn("Device scope type does not match for %s\n",
257 				pci_name(info->dev));
258 			return -EINVAL;
259 		}
260 
261 		for_each_dev_scope(devices, devices_cnt, i, tmp)
262 			if (tmp == NULL) {
263 				devices[i].bus = info->dev->bus->number;
264 				devices[i].devfn = info->dev->devfn;
265 				rcu_assign_pointer(devices[i].dev,
266 						   get_device(dev));
267 				return 1;
268 			}
269 		BUG_ON(i >= devices_cnt);
270 	}
271 
272 	return 0;
273 }
274 
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276 			  struct dmar_dev_scope *devices, int count)
277 {
278 	int index;
279 	struct device *tmp;
280 
281 	if (info->seg != segment)
282 		return 0;
283 
284 	for_each_active_dev_scope(devices, count, index, tmp)
285 		if (tmp == &info->dev->dev) {
286 			RCU_INIT_POINTER(devices[index].dev, NULL);
287 			synchronize_rcu();
288 			put_device(tmp);
289 			return 1;
290 		}
291 
292 	return 0;
293 }
294 
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297 	int ret = 0;
298 	struct dmar_drhd_unit *dmaru;
299 	struct acpi_dmar_hardware_unit *drhd;
300 
301 	for_each_drhd_unit(dmaru) {
302 		if (dmaru->include_all)
303 			continue;
304 
305 		drhd = container_of(dmaru->hdr,
306 				    struct acpi_dmar_hardware_unit, header);
307 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308 				((void *)drhd) + drhd->header.length,
309 				dmaru->segment,
310 				dmaru->devices, dmaru->devices_cnt);
311 		if (ret)
312 			break;
313 	}
314 	if (ret >= 0)
315 		ret = dmar_iommu_notify_scope_dev(info);
316 	if (ret < 0 && dmar_dev_scope_status == 0)
317 		dmar_dev_scope_status = ret;
318 
319 	if (ret >= 0)
320 		intel_irq_remap_add_device(info);
321 
322 	return ret;
323 }
324 
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327 	struct dmar_drhd_unit *dmaru;
328 
329 	for_each_drhd_unit(dmaru)
330 		if (dmar_remove_dev_scope(info, dmaru->segment,
331 			dmaru->devices, dmaru->devices_cnt))
332 			break;
333 	dmar_iommu_notify_scope_dev(info);
334 }
335 
336 static int dmar_pci_bus_notifier(struct notifier_block *nb,
337 				 unsigned long action, void *data)
338 {
339 	struct pci_dev *pdev = to_pci_dev(data);
340 	struct dmar_pci_notify_info *info;
341 
342 	/* Only care about add/remove events for physical functions.
343 	 * For VFs we actually do the lookup based on the corresponding
344 	 * PF in device_to_iommu() anyway. */
345 	if (pdev->is_virtfn)
346 		return NOTIFY_DONE;
347 	if (action != BUS_NOTIFY_ADD_DEVICE &&
348 	    action != BUS_NOTIFY_REMOVED_DEVICE)
349 		return NOTIFY_DONE;
350 
351 	info = dmar_alloc_pci_notify_info(pdev, action);
352 	if (!info)
353 		return NOTIFY_DONE;
354 
355 	down_write(&dmar_global_lock);
356 	if (action == BUS_NOTIFY_ADD_DEVICE)
357 		dmar_pci_bus_add_dev(info);
358 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
359 		dmar_pci_bus_del_dev(info);
360 	up_write(&dmar_global_lock);
361 
362 	dmar_free_pci_notify_info(info);
363 
364 	return NOTIFY_OK;
365 }
366 
367 static struct notifier_block dmar_pci_bus_nb = {
368 	.notifier_call = dmar_pci_bus_notifier,
369 	.priority = INT_MIN,
370 };
371 
372 static struct dmar_drhd_unit *
373 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
374 {
375 	struct dmar_drhd_unit *dmaru;
376 
377 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
378 				dmar_rcu_check())
379 		if (dmaru->segment == drhd->segment &&
380 		    dmaru->reg_base_addr == drhd->address)
381 			return dmaru;
382 
383 	return NULL;
384 }
385 
386 /*
387  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
388  * structure which uniquely represent one DMA remapping hardware unit
389  * present in the platform
390  */
391 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
392 {
393 	struct acpi_dmar_hardware_unit *drhd;
394 	struct dmar_drhd_unit *dmaru;
395 	int ret;
396 
397 	drhd = (struct acpi_dmar_hardware_unit *)header;
398 	dmaru = dmar_find_dmaru(drhd);
399 	if (dmaru)
400 		goto out;
401 
402 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
403 	if (!dmaru)
404 		return -ENOMEM;
405 
406 	/*
407 	 * If header is allocated from slab by ACPI _DSM method, we need to
408 	 * copy the content because the memory buffer will be freed on return.
409 	 */
410 	dmaru->hdr = (void *)(dmaru + 1);
411 	memcpy(dmaru->hdr, header, header->length);
412 	dmaru->reg_base_addr = drhd->address;
413 	dmaru->segment = drhd->segment;
414 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
415 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
416 					      ((void *)drhd) + drhd->header.length,
417 					      &dmaru->devices_cnt);
418 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
419 		kfree(dmaru);
420 		return -ENOMEM;
421 	}
422 
423 	ret = alloc_iommu(dmaru);
424 	if (ret) {
425 		dmar_free_dev_scope(&dmaru->devices,
426 				    &dmaru->devices_cnt);
427 		kfree(dmaru);
428 		return ret;
429 	}
430 	dmar_register_drhd_unit(dmaru);
431 
432 out:
433 	if (arg)
434 		(*(int *)arg)++;
435 
436 	return 0;
437 }
438 
439 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
440 {
441 	if (dmaru->devices && dmaru->devices_cnt)
442 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
443 	if (dmaru->iommu)
444 		free_iommu(dmaru->iommu);
445 	kfree(dmaru);
446 }
447 
448 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
449 				      void *arg)
450 {
451 	struct acpi_dmar_andd *andd = (void *)header;
452 
453 	/* Check for NUL termination within the designated length */
454 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
455 		pr_warn(FW_BUG
456 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
457 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
458 			   dmi_get_system_info(DMI_BIOS_VENDOR),
459 			   dmi_get_system_info(DMI_BIOS_VERSION),
460 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
461 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
462 		return -EINVAL;
463 	}
464 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
465 		andd->device_name);
466 
467 	return 0;
468 }
469 
470 #ifdef CONFIG_ACPI_NUMA
471 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
472 {
473 	struct acpi_dmar_rhsa *rhsa;
474 	struct dmar_drhd_unit *drhd;
475 
476 	rhsa = (struct acpi_dmar_rhsa *)header;
477 	for_each_drhd_unit(drhd) {
478 		if (drhd->reg_base_addr == rhsa->base_address) {
479 			int node = pxm_to_node(rhsa->proximity_domain);
480 
481 			if (!node_online(node))
482 				node = NUMA_NO_NODE;
483 			drhd->iommu->node = node;
484 			return 0;
485 		}
486 	}
487 	pr_warn(FW_BUG
488 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
489 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
490 		rhsa->base_address,
491 		dmi_get_system_info(DMI_BIOS_VENDOR),
492 		dmi_get_system_info(DMI_BIOS_VERSION),
493 		dmi_get_system_info(DMI_PRODUCT_VERSION));
494 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
495 
496 	return 0;
497 }
498 #else
499 #define	dmar_parse_one_rhsa		dmar_res_noop
500 #endif
501 
502 static void
503 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
504 {
505 	struct acpi_dmar_hardware_unit *drhd;
506 	struct acpi_dmar_reserved_memory *rmrr;
507 	struct acpi_dmar_atsr *atsr;
508 	struct acpi_dmar_rhsa *rhsa;
509 
510 	switch (header->type) {
511 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
512 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
513 				    header);
514 		pr_info("DRHD base: %#016Lx flags: %#x\n",
515 			(unsigned long long)drhd->address, drhd->flags);
516 		break;
517 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
518 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
519 				    header);
520 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
521 			(unsigned long long)rmrr->base_address,
522 			(unsigned long long)rmrr->end_address);
523 		break;
524 	case ACPI_DMAR_TYPE_ROOT_ATS:
525 		atsr = container_of(header, struct acpi_dmar_atsr, header);
526 		pr_info("ATSR flags: %#x\n", atsr->flags);
527 		break;
528 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
529 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
530 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
531 		       (unsigned long long)rhsa->base_address,
532 		       rhsa->proximity_domain);
533 		break;
534 	case ACPI_DMAR_TYPE_NAMESPACE:
535 		/* We don't print this here because we need to sanity-check
536 		   it first. So print it in dmar_parse_one_andd() instead. */
537 		break;
538 	}
539 }
540 
541 /**
542  * dmar_table_detect - checks to see if the platform supports DMAR devices
543  */
544 static int __init dmar_table_detect(void)
545 {
546 	acpi_status status = AE_OK;
547 
548 	/* if we could find DMAR table, then there are DMAR devices */
549 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
550 
551 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
552 		pr_warn("Unable to map DMAR\n");
553 		status = AE_NOT_FOUND;
554 	}
555 
556 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
557 }
558 
559 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
560 				       size_t len, struct dmar_res_callback *cb)
561 {
562 	struct acpi_dmar_header *iter, *next;
563 	struct acpi_dmar_header *end = ((void *)start) + len;
564 
565 	for (iter = start; iter < end; iter = next) {
566 		next = (void *)iter + iter->length;
567 		if (iter->length == 0) {
568 			/* Avoid looping forever on bad ACPI tables */
569 			pr_debug(FW_BUG "Invalid 0-length structure\n");
570 			break;
571 		} else if (next > end) {
572 			/* Avoid passing table end */
573 			pr_warn(FW_BUG "Record passes table end\n");
574 			return -EINVAL;
575 		}
576 
577 		if (cb->print_entry)
578 			dmar_table_print_dmar_entry(iter);
579 
580 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
581 			/* continue for forward compatibility */
582 			pr_debug("Unknown DMAR structure type %d\n",
583 				 iter->type);
584 		} else if (cb->cb[iter->type]) {
585 			int ret;
586 
587 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
588 			if (ret)
589 				return ret;
590 		} else if (!cb->ignore_unhandled) {
591 			pr_warn("No handler for DMAR structure type %d\n",
592 				iter->type);
593 			return -EINVAL;
594 		}
595 	}
596 
597 	return 0;
598 }
599 
600 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
601 				       struct dmar_res_callback *cb)
602 {
603 	return dmar_walk_remapping_entries((void *)(dmar + 1),
604 			dmar->header.length - sizeof(*dmar), cb);
605 }
606 
607 /**
608  * parse_dmar_table - parses the DMA reporting table
609  */
610 static int __init
611 parse_dmar_table(void)
612 {
613 	struct acpi_table_dmar *dmar;
614 	int drhd_count = 0;
615 	int ret;
616 	struct dmar_res_callback cb = {
617 		.print_entry = true,
618 		.ignore_unhandled = true,
619 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
620 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
621 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
622 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
623 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
624 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
625 	};
626 
627 	/*
628 	 * Do it again, earlier dmar_tbl mapping could be mapped with
629 	 * fixed map.
630 	 */
631 	dmar_table_detect();
632 
633 	/*
634 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
635 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
636 	 */
637 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
638 
639 	dmar = (struct acpi_table_dmar *)dmar_tbl;
640 	if (!dmar)
641 		return -ENODEV;
642 
643 	if (dmar->width < PAGE_SHIFT - 1) {
644 		pr_warn("Invalid DMAR haw\n");
645 		return -EINVAL;
646 	}
647 
648 	pr_info("Host address width %d\n", dmar->width + 1);
649 	ret = dmar_walk_dmar_table(dmar, &cb);
650 	if (ret == 0 && drhd_count == 0)
651 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
652 
653 	return ret;
654 }
655 
656 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
657 				 int cnt, struct pci_dev *dev)
658 {
659 	int index;
660 	struct device *tmp;
661 
662 	while (dev) {
663 		for_each_active_dev_scope(devices, cnt, index, tmp)
664 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
665 				return 1;
666 
667 		/* Check our parent */
668 		dev = dev->bus->self;
669 	}
670 
671 	return 0;
672 }
673 
674 struct dmar_drhd_unit *
675 dmar_find_matched_drhd_unit(struct pci_dev *dev)
676 {
677 	struct dmar_drhd_unit *dmaru;
678 	struct acpi_dmar_hardware_unit *drhd;
679 
680 	dev = pci_physfn(dev);
681 
682 	rcu_read_lock();
683 	for_each_drhd_unit(dmaru) {
684 		drhd = container_of(dmaru->hdr,
685 				    struct acpi_dmar_hardware_unit,
686 				    header);
687 
688 		if (dmaru->include_all &&
689 		    drhd->segment == pci_domain_nr(dev->bus))
690 			goto out;
691 
692 		if (dmar_pci_device_match(dmaru->devices,
693 					  dmaru->devices_cnt, dev))
694 			goto out;
695 	}
696 	dmaru = NULL;
697 out:
698 	rcu_read_unlock();
699 
700 	return dmaru;
701 }
702 
703 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
704 					      struct acpi_device *adev)
705 {
706 	struct dmar_drhd_unit *dmaru;
707 	struct acpi_dmar_hardware_unit *drhd;
708 	struct acpi_dmar_device_scope *scope;
709 	struct device *tmp;
710 	int i;
711 	struct acpi_dmar_pci_path *path;
712 
713 	for_each_drhd_unit(dmaru) {
714 		drhd = container_of(dmaru->hdr,
715 				    struct acpi_dmar_hardware_unit,
716 				    header);
717 
718 		for (scope = (void *)(drhd + 1);
719 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
720 		     scope = ((void *)scope) + scope->length) {
721 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
722 				continue;
723 			if (scope->enumeration_id != device_number)
724 				continue;
725 
726 			path = (void *)(scope + 1);
727 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
728 				dev_name(&adev->dev), dmaru->reg_base_addr,
729 				scope->bus, path->device, path->function);
730 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
731 				if (tmp == NULL) {
732 					dmaru->devices[i].bus = scope->bus;
733 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
734 									    path->function);
735 					rcu_assign_pointer(dmaru->devices[i].dev,
736 							   get_device(&adev->dev));
737 					return;
738 				}
739 			BUG_ON(i >= dmaru->devices_cnt);
740 		}
741 	}
742 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
743 		device_number, dev_name(&adev->dev));
744 }
745 
746 static int __init dmar_acpi_dev_scope_init(void)
747 {
748 	struct acpi_dmar_andd *andd;
749 
750 	if (dmar_tbl == NULL)
751 		return -ENODEV;
752 
753 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
754 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
755 	     andd = ((void *)andd) + andd->header.length) {
756 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
757 			acpi_handle h;
758 			struct acpi_device *adev;
759 
760 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
761 							  andd->device_name,
762 							  &h))) {
763 				pr_err("Failed to find handle for ACPI object %s\n",
764 				       andd->device_name);
765 				continue;
766 			}
767 			if (acpi_bus_get_device(h, &adev)) {
768 				pr_err("Failed to get device for ACPI object %s\n",
769 				       andd->device_name);
770 				continue;
771 			}
772 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
773 		}
774 	}
775 	return 0;
776 }
777 
778 int __init dmar_dev_scope_init(void)
779 {
780 	struct pci_dev *dev = NULL;
781 	struct dmar_pci_notify_info *info;
782 
783 	if (dmar_dev_scope_status != 1)
784 		return dmar_dev_scope_status;
785 
786 	if (list_empty(&dmar_drhd_units)) {
787 		dmar_dev_scope_status = -ENODEV;
788 	} else {
789 		dmar_dev_scope_status = 0;
790 
791 		dmar_acpi_dev_scope_init();
792 
793 		for_each_pci_dev(dev) {
794 			if (dev->is_virtfn)
795 				continue;
796 
797 			info = dmar_alloc_pci_notify_info(dev,
798 					BUS_NOTIFY_ADD_DEVICE);
799 			if (!info) {
800 				return dmar_dev_scope_status;
801 			} else {
802 				dmar_pci_bus_add_dev(info);
803 				dmar_free_pci_notify_info(info);
804 			}
805 		}
806 	}
807 
808 	return dmar_dev_scope_status;
809 }
810 
811 void __init dmar_register_bus_notifier(void)
812 {
813 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
814 }
815 
816 
817 int __init dmar_table_init(void)
818 {
819 	static int dmar_table_initialized;
820 	int ret;
821 
822 	if (dmar_table_initialized == 0) {
823 		ret = parse_dmar_table();
824 		if (ret < 0) {
825 			if (ret != -ENODEV)
826 				pr_info("Parse DMAR table failure.\n");
827 		} else  if (list_empty(&dmar_drhd_units)) {
828 			pr_info("No DMAR devices found\n");
829 			ret = -ENODEV;
830 		}
831 
832 		if (ret < 0)
833 			dmar_table_initialized = ret;
834 		else
835 			dmar_table_initialized = 1;
836 	}
837 
838 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
839 }
840 
841 static void warn_invalid_dmar(u64 addr, const char *message)
842 {
843 	pr_warn_once(FW_BUG
844 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
845 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
846 		addr, message,
847 		dmi_get_system_info(DMI_BIOS_VENDOR),
848 		dmi_get_system_info(DMI_BIOS_VERSION),
849 		dmi_get_system_info(DMI_PRODUCT_VERSION));
850 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
851 }
852 
853 static int __ref
854 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
855 {
856 	struct acpi_dmar_hardware_unit *drhd;
857 	void __iomem *addr;
858 	u64 cap, ecap;
859 
860 	drhd = (void *)entry;
861 	if (!drhd->address) {
862 		warn_invalid_dmar(0, "");
863 		return -EINVAL;
864 	}
865 
866 	if (arg)
867 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
868 	else
869 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
870 	if (!addr) {
871 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
872 		return -EINVAL;
873 	}
874 
875 	cap = dmar_readq(addr + DMAR_CAP_REG);
876 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
877 
878 	if (arg)
879 		iounmap(addr);
880 	else
881 		early_iounmap(addr, VTD_PAGE_SIZE);
882 
883 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
884 		warn_invalid_dmar(drhd->address, " returns all ones");
885 		return -EINVAL;
886 	}
887 
888 	return 0;
889 }
890 
891 int __init detect_intel_iommu(void)
892 {
893 	int ret;
894 	struct dmar_res_callback validate_drhd_cb = {
895 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
896 		.ignore_unhandled = true,
897 	};
898 
899 	down_write(&dmar_global_lock);
900 	ret = dmar_table_detect();
901 	if (!ret)
902 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
903 					   &validate_drhd_cb);
904 	if (!ret && !no_iommu && !iommu_detected &&
905 	    (!dmar_disabled || dmar_platform_optin())) {
906 		iommu_detected = 1;
907 		/* Make sure ACS will be enabled */
908 		pci_request_acs();
909 	}
910 
911 #ifdef CONFIG_X86
912 	if (!ret) {
913 		x86_init.iommu.iommu_init = intel_iommu_init;
914 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
915 	}
916 
917 #endif
918 
919 	if (dmar_tbl) {
920 		acpi_put_table(dmar_tbl);
921 		dmar_tbl = NULL;
922 	}
923 	up_write(&dmar_global_lock);
924 
925 	return ret ? ret : 1;
926 }
927 
928 static void unmap_iommu(struct intel_iommu *iommu)
929 {
930 	iounmap(iommu->reg);
931 	release_mem_region(iommu->reg_phys, iommu->reg_size);
932 }
933 
934 /**
935  * map_iommu: map the iommu's registers
936  * @iommu: the iommu to map
937  * @phys_addr: the physical address of the base resgister
938  *
939  * Memory map the iommu's registers.  Start w/ a single page, and
940  * possibly expand if that turns out to be insufficent.
941  */
942 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
943 {
944 	int map_size, err=0;
945 
946 	iommu->reg_phys = phys_addr;
947 	iommu->reg_size = VTD_PAGE_SIZE;
948 
949 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
950 		pr_err("Can't reserve memory\n");
951 		err = -EBUSY;
952 		goto out;
953 	}
954 
955 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
956 	if (!iommu->reg) {
957 		pr_err("Can't map the region\n");
958 		err = -ENOMEM;
959 		goto release;
960 	}
961 
962 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
963 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
964 
965 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
966 		err = -EINVAL;
967 		warn_invalid_dmar(phys_addr, " returns all ones");
968 		goto unmap;
969 	}
970 	iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
971 
972 	/* the registers might be more than one page */
973 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
974 			 cap_max_fault_reg_offset(iommu->cap));
975 	map_size = VTD_PAGE_ALIGN(map_size);
976 	if (map_size > iommu->reg_size) {
977 		iounmap(iommu->reg);
978 		release_mem_region(iommu->reg_phys, iommu->reg_size);
979 		iommu->reg_size = map_size;
980 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
981 					iommu->name)) {
982 			pr_err("Can't reserve memory\n");
983 			err = -EBUSY;
984 			goto out;
985 		}
986 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
987 		if (!iommu->reg) {
988 			pr_err("Can't map the region\n");
989 			err = -ENOMEM;
990 			goto release;
991 		}
992 	}
993 	err = 0;
994 	goto out;
995 
996 unmap:
997 	iounmap(iommu->reg);
998 release:
999 	release_mem_region(iommu->reg_phys, iommu->reg_size);
1000 out:
1001 	return err;
1002 }
1003 
1004 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1005 {
1006 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1007 					    DMAR_UNITS_SUPPORTED);
1008 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1009 		iommu->seq_id = -1;
1010 	} else {
1011 		set_bit(iommu->seq_id, dmar_seq_ids);
1012 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1013 	}
1014 
1015 	return iommu->seq_id;
1016 }
1017 
1018 static void dmar_free_seq_id(struct intel_iommu *iommu)
1019 {
1020 	if (iommu->seq_id >= 0) {
1021 		clear_bit(iommu->seq_id, dmar_seq_ids);
1022 		iommu->seq_id = -1;
1023 	}
1024 }
1025 
1026 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1027 {
1028 	struct intel_iommu *iommu;
1029 	u32 ver, sts;
1030 	int agaw = -1;
1031 	int msagaw = -1;
1032 	int err;
1033 
1034 	if (!drhd->reg_base_addr) {
1035 		warn_invalid_dmar(0, "");
1036 		return -EINVAL;
1037 	}
1038 
1039 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1040 	if (!iommu)
1041 		return -ENOMEM;
1042 
1043 	if (dmar_alloc_seq_id(iommu) < 0) {
1044 		pr_err("Failed to allocate seq_id\n");
1045 		err = -ENOSPC;
1046 		goto error;
1047 	}
1048 
1049 	err = map_iommu(iommu, drhd->reg_base_addr);
1050 	if (err) {
1051 		pr_err("Failed to map %s\n", iommu->name);
1052 		goto error_free_seq_id;
1053 	}
1054 
1055 	err = -EINVAL;
1056 	if (cap_sagaw(iommu->cap) == 0) {
1057 		pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1058 			iommu->name);
1059 		drhd->ignored = 1;
1060 	}
1061 
1062 	if (!drhd->ignored) {
1063 		agaw = iommu_calculate_agaw(iommu);
1064 		if (agaw < 0) {
1065 			pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1066 			       iommu->seq_id);
1067 			drhd->ignored = 1;
1068 		}
1069 	}
1070 	if (!drhd->ignored) {
1071 		msagaw = iommu_calculate_max_sagaw(iommu);
1072 		if (msagaw < 0) {
1073 			pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1074 			       iommu->seq_id);
1075 			drhd->ignored = 1;
1076 			agaw = -1;
1077 		}
1078 	}
1079 	iommu->agaw = agaw;
1080 	iommu->msagaw = msagaw;
1081 	iommu->segment = drhd->segment;
1082 
1083 	iommu->node = NUMA_NO_NODE;
1084 
1085 	ver = readl(iommu->reg + DMAR_VER_REG);
1086 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1087 		iommu->name,
1088 		(unsigned long long)drhd->reg_base_addr,
1089 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1090 		(unsigned long long)iommu->cap,
1091 		(unsigned long long)iommu->ecap);
1092 
1093 	/* Reflect status in gcmd */
1094 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1095 	if (sts & DMA_GSTS_IRES)
1096 		iommu->gcmd |= DMA_GCMD_IRE;
1097 	if (sts & DMA_GSTS_TES)
1098 		iommu->gcmd |= DMA_GCMD_TE;
1099 	if (sts & DMA_GSTS_QIES)
1100 		iommu->gcmd |= DMA_GCMD_QIE;
1101 
1102 	raw_spin_lock_init(&iommu->register_lock);
1103 
1104 	/*
1105 	 * This is only for hotplug; at boot time intel_iommu_enabled won't
1106 	 * be set yet. When intel_iommu_init() runs, it registers the units
1107 	 * present at boot time, then sets intel_iommu_enabled.
1108 	 */
1109 	if (intel_iommu_enabled && !drhd->ignored) {
1110 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1111 					     intel_iommu_groups,
1112 					     "%s", iommu->name);
1113 		if (err)
1114 			goto err_unmap;
1115 
1116 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1117 
1118 		err = iommu_device_register(&iommu->iommu);
1119 		if (err)
1120 			goto err_unmap;
1121 	}
1122 
1123 	drhd->iommu = iommu;
1124 	iommu->drhd = drhd;
1125 
1126 	return 0;
1127 
1128 err_unmap:
1129 	unmap_iommu(iommu);
1130 error_free_seq_id:
1131 	dmar_free_seq_id(iommu);
1132 error:
1133 	kfree(iommu);
1134 	return err;
1135 }
1136 
1137 static void free_iommu(struct intel_iommu *iommu)
1138 {
1139 	if (intel_iommu_enabled && !iommu->drhd->ignored) {
1140 		iommu_device_unregister(&iommu->iommu);
1141 		iommu_device_sysfs_remove(&iommu->iommu);
1142 	}
1143 
1144 	if (iommu->irq) {
1145 		if (iommu->pr_irq) {
1146 			free_irq(iommu->pr_irq, iommu);
1147 			dmar_free_hwirq(iommu->pr_irq);
1148 			iommu->pr_irq = 0;
1149 		}
1150 		free_irq(iommu->irq, iommu);
1151 		dmar_free_hwirq(iommu->irq);
1152 		iommu->irq = 0;
1153 	}
1154 
1155 	if (iommu->qi) {
1156 		free_page((unsigned long)iommu->qi->desc);
1157 		kfree(iommu->qi->desc_status);
1158 		kfree(iommu->qi);
1159 	}
1160 
1161 	if (iommu->reg)
1162 		unmap_iommu(iommu);
1163 
1164 	dmar_free_seq_id(iommu);
1165 	kfree(iommu);
1166 }
1167 
1168 /*
1169  * Reclaim all the submitted descriptors which have completed its work.
1170  */
1171 static inline void reclaim_free_desc(struct q_inval *qi)
1172 {
1173 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1174 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1175 		qi->desc_status[qi->free_tail] = QI_FREE;
1176 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1177 		qi->free_cnt++;
1178 	}
1179 }
1180 
1181 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1182 {
1183 	u32 fault;
1184 	int head, tail;
1185 	struct q_inval *qi = iommu->qi;
1186 	int shift = qi_shift(iommu);
1187 
1188 	if (qi->desc_status[wait_index] == QI_ABORT)
1189 		return -EAGAIN;
1190 
1191 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1192 
1193 	/*
1194 	 * If IQE happens, the head points to the descriptor associated
1195 	 * with the error. No new descriptors are fetched until the IQE
1196 	 * is cleared.
1197 	 */
1198 	if (fault & DMA_FSTS_IQE) {
1199 		head = readl(iommu->reg + DMAR_IQH_REG);
1200 		if ((head >> shift) == index) {
1201 			struct qi_desc *desc = qi->desc + head;
1202 
1203 			/*
1204 			 * desc->qw2 and desc->qw3 are either reserved or
1205 			 * used by software as private data. We won't print
1206 			 * out these two qw's for security consideration.
1207 			 */
1208 			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1209 			       (unsigned long long)desc->qw0,
1210 			       (unsigned long long)desc->qw1);
1211 			memcpy(desc, qi->desc + (wait_index << shift),
1212 			       1 << shift);
1213 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1214 			return -EINVAL;
1215 		}
1216 	}
1217 
1218 	/*
1219 	 * If ITE happens, all pending wait_desc commands are aborted.
1220 	 * No new descriptors are fetched until the ITE is cleared.
1221 	 */
1222 	if (fault & DMA_FSTS_ITE) {
1223 		head = readl(iommu->reg + DMAR_IQH_REG);
1224 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1225 		head |= 1;
1226 		tail = readl(iommu->reg + DMAR_IQT_REG);
1227 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1228 
1229 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1230 
1231 		do {
1232 			if (qi->desc_status[head] == QI_IN_USE)
1233 				qi->desc_status[head] = QI_ABORT;
1234 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1235 		} while (head != tail);
1236 
1237 		if (qi->desc_status[wait_index] == QI_ABORT)
1238 			return -EAGAIN;
1239 	}
1240 
1241 	if (fault & DMA_FSTS_ICE)
1242 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1243 
1244 	return 0;
1245 }
1246 
1247 /*
1248  * Function to submit invalidation descriptors of all types to the queued
1249  * invalidation interface(QI). Multiple descriptors can be submitted at a
1250  * time, a wait descriptor will be appended to each submission to ensure
1251  * hardware has completed the invalidation before return. Wait descriptors
1252  * can be part of the submission but it will not be polled for completion.
1253  */
1254 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1255 		   unsigned int count, unsigned long options)
1256 {
1257 	struct q_inval *qi = iommu->qi;
1258 	struct qi_desc wait_desc;
1259 	int wait_index, index;
1260 	unsigned long flags;
1261 	int offset, shift;
1262 	int rc, i;
1263 
1264 	if (!qi)
1265 		return 0;
1266 
1267 restart:
1268 	rc = 0;
1269 
1270 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1271 	/*
1272 	 * Check if we have enough empty slots in the queue to submit,
1273 	 * the calculation is based on:
1274 	 * # of desc + 1 wait desc + 1 space between head and tail
1275 	 */
1276 	while (qi->free_cnt < count + 2) {
1277 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1278 		cpu_relax();
1279 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1280 	}
1281 
1282 	index = qi->free_head;
1283 	wait_index = (index + count) % QI_LENGTH;
1284 	shift = qi_shift(iommu);
1285 
1286 	for (i = 0; i < count; i++) {
1287 		offset = ((index + i) % QI_LENGTH) << shift;
1288 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1289 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1290 	}
1291 	qi->desc_status[wait_index] = QI_IN_USE;
1292 
1293 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1294 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1295 	if (options & QI_OPT_WAIT_DRAIN)
1296 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1297 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1298 	wait_desc.qw2 = 0;
1299 	wait_desc.qw3 = 0;
1300 
1301 	offset = wait_index << shift;
1302 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1303 
1304 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1305 	qi->free_cnt -= count + 1;
1306 
1307 	/*
1308 	 * update the HW tail register indicating the presence of
1309 	 * new descriptors.
1310 	 */
1311 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1312 
1313 	while (qi->desc_status[wait_index] != QI_DONE) {
1314 		/*
1315 		 * We will leave the interrupts disabled, to prevent interrupt
1316 		 * context to queue another cmd while a cmd is already submitted
1317 		 * and waiting for completion on this cpu. This is to avoid
1318 		 * a deadlock where the interrupt context can wait indefinitely
1319 		 * for free slots in the queue.
1320 		 */
1321 		rc = qi_check_fault(iommu, index, wait_index);
1322 		if (rc)
1323 			break;
1324 
1325 		raw_spin_unlock(&qi->q_lock);
1326 		cpu_relax();
1327 		raw_spin_lock(&qi->q_lock);
1328 	}
1329 
1330 	for (i = 0; i < count; i++)
1331 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1332 
1333 	reclaim_free_desc(qi);
1334 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1335 
1336 	if (rc == -EAGAIN)
1337 		goto restart;
1338 
1339 	return rc;
1340 }
1341 
1342 /*
1343  * Flush the global interrupt entry cache.
1344  */
1345 void qi_global_iec(struct intel_iommu *iommu)
1346 {
1347 	struct qi_desc desc;
1348 
1349 	desc.qw0 = QI_IEC_TYPE;
1350 	desc.qw1 = 0;
1351 	desc.qw2 = 0;
1352 	desc.qw3 = 0;
1353 
1354 	/* should never fail */
1355 	qi_submit_sync(iommu, &desc, 1, 0);
1356 }
1357 
1358 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1359 		      u64 type)
1360 {
1361 	struct qi_desc desc;
1362 
1363 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1364 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1365 	desc.qw1 = 0;
1366 	desc.qw2 = 0;
1367 	desc.qw3 = 0;
1368 
1369 	qi_submit_sync(iommu, &desc, 1, 0);
1370 }
1371 
1372 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1373 		    unsigned int size_order, u64 type)
1374 {
1375 	u8 dw = 0, dr = 0;
1376 
1377 	struct qi_desc desc;
1378 	int ih = 0;
1379 
1380 	if (cap_write_drain(iommu->cap))
1381 		dw = 1;
1382 
1383 	if (cap_read_drain(iommu->cap))
1384 		dr = 1;
1385 
1386 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1387 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1388 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1389 		| QI_IOTLB_AM(size_order);
1390 	desc.qw2 = 0;
1391 	desc.qw3 = 0;
1392 
1393 	qi_submit_sync(iommu, &desc, 1, 0);
1394 }
1395 
1396 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1397 			u16 qdep, u64 addr, unsigned mask)
1398 {
1399 	struct qi_desc desc;
1400 
1401 	if (mask) {
1402 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1403 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1404 	} else
1405 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1406 
1407 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1408 		qdep = 0;
1409 
1410 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1411 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1412 	desc.qw2 = 0;
1413 	desc.qw3 = 0;
1414 
1415 	qi_submit_sync(iommu, &desc, 1, 0);
1416 }
1417 
1418 /* PASID-based IOTLB invalidation */
1419 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1420 		     unsigned long npages, bool ih)
1421 {
1422 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1423 
1424 	/*
1425 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1426 	 * a positive value for Page-selective-within-PASID invalidation.
1427 	 * 0 is not a valid input.
1428 	 */
1429 	if (WARN_ON(!npages)) {
1430 		pr_err("Invalid input npages = %ld\n", npages);
1431 		return;
1432 	}
1433 
1434 	if (npages == -1) {
1435 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1436 				QI_EIOTLB_DID(did) |
1437 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1438 				QI_EIOTLB_TYPE;
1439 		desc.qw1 = 0;
1440 	} else {
1441 		int mask = ilog2(__roundup_pow_of_two(npages));
1442 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1443 
1444 		if (WARN_ON_ONCE(!ALIGN(addr, align)))
1445 			addr &= ~(align - 1);
1446 
1447 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1448 				QI_EIOTLB_DID(did) |
1449 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1450 				QI_EIOTLB_TYPE;
1451 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1452 				QI_EIOTLB_IH(ih) |
1453 				QI_EIOTLB_AM(mask);
1454 	}
1455 
1456 	qi_submit_sync(iommu, &desc, 1, 0);
1457 }
1458 
1459 /* PASID-based device IOTLB Invalidate */
1460 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1461 			      u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1462 {
1463 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1464 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1465 
1466 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1467 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1468 		QI_DEV_IOTLB_PFSID(pfsid);
1469 
1470 	/*
1471 	 * If S bit is 0, we only flush a single page. If S bit is set,
1472 	 * The least significant zero bit indicates the invalidation address
1473 	 * range. VT-d spec 6.5.2.6.
1474 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1475 	 * size order = 0 is PAGE_SIZE 4KB
1476 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1477 	 * ECAP.
1478 	 */
1479 	if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
1480 		pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1481 				    addr, size_order);
1482 
1483 	/* Take page address */
1484 	desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1485 
1486 	if (size_order) {
1487 		/*
1488 		 * Existing 0s in address below size_order may be the least
1489 		 * significant bit, we must set them to 1s to avoid having
1490 		 * smaller size than desired.
1491 		 */
1492 		desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1493 					VTD_PAGE_SHIFT);
1494 		/* Clear size_order bit to indicate size */
1495 		desc.qw1 &= ~mask;
1496 		/* Set the S bit to indicate flushing more than 1 page */
1497 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1498 	}
1499 
1500 	qi_submit_sync(iommu, &desc, 1, 0);
1501 }
1502 
1503 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1504 			  u64 granu, u32 pasid)
1505 {
1506 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1507 
1508 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1509 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1510 	qi_submit_sync(iommu, &desc, 1, 0);
1511 }
1512 
1513 /*
1514  * Disable Queued Invalidation interface.
1515  */
1516 void dmar_disable_qi(struct intel_iommu *iommu)
1517 {
1518 	unsigned long flags;
1519 	u32 sts;
1520 	cycles_t start_time = get_cycles();
1521 
1522 	if (!ecap_qis(iommu->ecap))
1523 		return;
1524 
1525 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1526 
1527 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1528 	if (!(sts & DMA_GSTS_QIES))
1529 		goto end;
1530 
1531 	/*
1532 	 * Give a chance to HW to complete the pending invalidation requests.
1533 	 */
1534 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1535 		readl(iommu->reg + DMAR_IQH_REG)) &&
1536 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1537 		cpu_relax();
1538 
1539 	iommu->gcmd &= ~DMA_GCMD_QIE;
1540 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1541 
1542 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1543 		      !(sts & DMA_GSTS_QIES), sts);
1544 end:
1545 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1546 }
1547 
1548 /*
1549  * Enable queued invalidation.
1550  */
1551 static void __dmar_enable_qi(struct intel_iommu *iommu)
1552 {
1553 	u32 sts;
1554 	unsigned long flags;
1555 	struct q_inval *qi = iommu->qi;
1556 	u64 val = virt_to_phys(qi->desc);
1557 
1558 	qi->free_head = qi->free_tail = 0;
1559 	qi->free_cnt = QI_LENGTH;
1560 
1561 	/*
1562 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1563 	 * is present.
1564 	 */
1565 	if (ecap_smts(iommu->ecap))
1566 		val |= (1 << 11) | 1;
1567 
1568 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1569 
1570 	/* write zero to the tail reg */
1571 	writel(0, iommu->reg + DMAR_IQT_REG);
1572 
1573 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1574 
1575 	iommu->gcmd |= DMA_GCMD_QIE;
1576 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1577 
1578 	/* Make sure hardware complete it */
1579 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1580 
1581 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1582 }
1583 
1584 /*
1585  * Enable Queued Invalidation interface. This is a must to support
1586  * interrupt-remapping. Also used by DMA-remapping, which replaces
1587  * register based IOTLB invalidation.
1588  */
1589 int dmar_enable_qi(struct intel_iommu *iommu)
1590 {
1591 	struct q_inval *qi;
1592 	struct page *desc_page;
1593 
1594 	if (!ecap_qis(iommu->ecap))
1595 		return -ENOENT;
1596 
1597 	/*
1598 	 * queued invalidation is already setup and enabled.
1599 	 */
1600 	if (iommu->qi)
1601 		return 0;
1602 
1603 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1604 	if (!iommu->qi)
1605 		return -ENOMEM;
1606 
1607 	qi = iommu->qi;
1608 
1609 	/*
1610 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1611 	 * if the remapping hardware supports scalable mode translation.
1612 	 */
1613 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1614 				     !!ecap_smts(iommu->ecap));
1615 	if (!desc_page) {
1616 		kfree(qi);
1617 		iommu->qi = NULL;
1618 		return -ENOMEM;
1619 	}
1620 
1621 	qi->desc = page_address(desc_page);
1622 
1623 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1624 	if (!qi->desc_status) {
1625 		free_page((unsigned long) qi->desc);
1626 		kfree(qi);
1627 		iommu->qi = NULL;
1628 		return -ENOMEM;
1629 	}
1630 
1631 	raw_spin_lock_init(&qi->q_lock);
1632 
1633 	__dmar_enable_qi(iommu);
1634 
1635 	return 0;
1636 }
1637 
1638 /* iommu interrupt handling. Most stuff are MSI-like. */
1639 
1640 enum faulttype {
1641 	DMA_REMAP,
1642 	INTR_REMAP,
1643 	UNKNOWN,
1644 };
1645 
1646 static const char *dma_remap_fault_reasons[] =
1647 {
1648 	"Software",
1649 	"Present bit in root entry is clear",
1650 	"Present bit in context entry is clear",
1651 	"Invalid context entry",
1652 	"Access beyond MGAW",
1653 	"PTE Write access is not set",
1654 	"PTE Read access is not set",
1655 	"Next page table ptr is invalid",
1656 	"Root table address invalid",
1657 	"Context table ptr is invalid",
1658 	"non-zero reserved fields in RTP",
1659 	"non-zero reserved fields in CTP",
1660 	"non-zero reserved fields in PTE",
1661 	"PCE for translation request specifies blocking",
1662 };
1663 
1664 static const char * const dma_remap_sm_fault_reasons[] = {
1665 	"SM: Invalid Root Table Address",
1666 	"SM: TTM 0 for request with PASID",
1667 	"SM: TTM 0 for page group request",
1668 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1669 	"SM: Error attempting to access Root Entry",
1670 	"SM: Present bit in Root Entry is clear",
1671 	"SM: Non-zero reserved field set in Root Entry",
1672 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1673 	"SM: Error attempting to access Context Entry",
1674 	"SM: Present bit in Context Entry is clear",
1675 	"SM: Non-zero reserved field set in the Context Entry",
1676 	"SM: Invalid Context Entry",
1677 	"SM: DTE field in Context Entry is clear",
1678 	"SM: PASID Enable field in Context Entry is clear",
1679 	"SM: PASID is larger than the max in Context Entry",
1680 	"SM: PRE field in Context-Entry is clear",
1681 	"SM: RID_PASID field error in Context-Entry",
1682 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1683 	"SM: Error attempting to access the PASID Directory Entry",
1684 	"SM: Present bit in Directory Entry is clear",
1685 	"SM: Non-zero reserved field set in PASID Directory Entry",
1686 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1687 	"SM: Error attempting to access PASID Table Entry",
1688 	"SM: Present bit in PASID Table Entry is clear",
1689 	"SM: Non-zero reserved field set in PASID Table Entry",
1690 	"SM: Invalid Scalable-Mode PASID Table Entry",
1691 	"SM: ERE field is clear in PASID Table Entry",
1692 	"SM: SRE field is clear in PASID Table Entry",
1693 	"Unknown", "Unknown",/* 0x5E-0x5F */
1694 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1695 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1696 	"SM: Error attempting to access first-level paging entry",
1697 	"SM: Present bit in first-level paging entry is clear",
1698 	"SM: Non-zero reserved field set in first-level paging entry",
1699 	"SM: Error attempting to access FL-PML4 entry",
1700 	"SM: First-level entry address beyond MGAW in Nested translation",
1701 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1702 	"SM: Read permission error in first-level paging entry in Nested translation",
1703 	"SM: Write permission error in first-level paging entry in Nested translation",
1704 	"SM: Error attempting to access second-level paging entry",
1705 	"SM: Read/Write permission error in second-level paging entry",
1706 	"SM: Non-zero reserved field set in second-level paging entry",
1707 	"SM: Invalid second-level page table pointer",
1708 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1709 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1710 	"SM: Address in first-level translation is not canonical",
1711 	"SM: U/S set 0 for first-level translation with user privilege",
1712 	"SM: No execute permission for request with PASID and ER=1",
1713 	"SM: Address beyond the DMA hardware max",
1714 	"SM: Second-level entry address beyond the max",
1715 	"SM: No write permission for Write/AtomicOp request",
1716 	"SM: No read permission for Read/AtomicOp request",
1717 	"SM: Invalid address-interrupt address",
1718 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1719 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1720 };
1721 
1722 static const char *irq_remap_fault_reasons[] =
1723 {
1724 	"Detected reserved fields in the decoded interrupt-remapped request",
1725 	"Interrupt index exceeded the interrupt-remapping table size",
1726 	"Present field in the IRTE entry is clear",
1727 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1728 	"Detected reserved fields in the IRTE entry",
1729 	"Blocked a compatibility format interrupt request",
1730 	"Blocked an interrupt request due to source-id verification failure",
1731 };
1732 
1733 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1734 {
1735 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1736 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1737 		*fault_type = INTR_REMAP;
1738 		return irq_remap_fault_reasons[fault_reason - 0x20];
1739 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1740 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1741 		*fault_type = DMA_REMAP;
1742 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1743 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1744 		*fault_type = DMA_REMAP;
1745 		return dma_remap_fault_reasons[fault_reason];
1746 	} else {
1747 		*fault_type = UNKNOWN;
1748 		return "Unknown";
1749 	}
1750 }
1751 
1752 
1753 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1754 {
1755 	if (iommu->irq == irq)
1756 		return DMAR_FECTL_REG;
1757 	else if (iommu->pr_irq == irq)
1758 		return DMAR_PECTL_REG;
1759 	else
1760 		BUG();
1761 }
1762 
1763 void dmar_msi_unmask(struct irq_data *data)
1764 {
1765 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1766 	int reg = dmar_msi_reg(iommu, data->irq);
1767 	unsigned long flag;
1768 
1769 	/* unmask it */
1770 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1771 	writel(0, iommu->reg + reg);
1772 	/* Read a reg to force flush the post write */
1773 	readl(iommu->reg + reg);
1774 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1775 }
1776 
1777 void dmar_msi_mask(struct irq_data *data)
1778 {
1779 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1780 	int reg = dmar_msi_reg(iommu, data->irq);
1781 	unsigned long flag;
1782 
1783 	/* mask it */
1784 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1785 	writel(DMA_FECTL_IM, iommu->reg + reg);
1786 	/* Read a reg to force flush the post write */
1787 	readl(iommu->reg + reg);
1788 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1789 }
1790 
1791 void dmar_msi_write(int irq, struct msi_msg *msg)
1792 {
1793 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1794 	int reg = dmar_msi_reg(iommu, irq);
1795 	unsigned long flag;
1796 
1797 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1798 	writel(msg->data, iommu->reg + reg + 4);
1799 	writel(msg->address_lo, iommu->reg + reg + 8);
1800 	writel(msg->address_hi, iommu->reg + reg + 12);
1801 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1802 }
1803 
1804 void dmar_msi_read(int irq, struct msi_msg *msg)
1805 {
1806 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1807 	int reg = dmar_msi_reg(iommu, irq);
1808 	unsigned long flag;
1809 
1810 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1811 	msg->data = readl(iommu->reg + reg + 4);
1812 	msg->address_lo = readl(iommu->reg + reg + 8);
1813 	msg->address_hi = readl(iommu->reg + reg + 12);
1814 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1815 }
1816 
1817 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1818 		u8 fault_reason, u32 pasid, u16 source_id,
1819 		unsigned long long addr)
1820 {
1821 	const char *reason;
1822 	int fault_type;
1823 
1824 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1825 
1826 	if (fault_type == INTR_REMAP)
1827 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1828 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1829 			PCI_FUNC(source_id & 0xFF), addr >> 48,
1830 			fault_reason, reason);
1831 	else
1832 		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1833 		       type ? "DMA Read" : "DMA Write",
1834 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1835 		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1836 		       fault_reason, reason);
1837 	return 0;
1838 }
1839 
1840 #define PRIMARY_FAULT_REG_LEN (16)
1841 irqreturn_t dmar_fault(int irq, void *dev_id)
1842 {
1843 	struct intel_iommu *iommu = dev_id;
1844 	int reg, fault_index;
1845 	u32 fault_status;
1846 	unsigned long flag;
1847 	static DEFINE_RATELIMIT_STATE(rs,
1848 				      DEFAULT_RATELIMIT_INTERVAL,
1849 				      DEFAULT_RATELIMIT_BURST);
1850 
1851 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1852 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1853 	if (fault_status && __ratelimit(&rs))
1854 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1855 
1856 	/* TBD: ignore advanced fault log currently */
1857 	if (!(fault_status & DMA_FSTS_PPF))
1858 		goto unlock_exit;
1859 
1860 	fault_index = dma_fsts_fault_record_index(fault_status);
1861 	reg = cap_fault_reg_offset(iommu->cap);
1862 	while (1) {
1863 		/* Disable printing, simply clear the fault when ratelimited */
1864 		bool ratelimited = !__ratelimit(&rs);
1865 		u8 fault_reason;
1866 		u16 source_id;
1867 		u64 guest_addr;
1868 		u32 pasid;
1869 		int type;
1870 		u32 data;
1871 		bool pasid_present;
1872 
1873 		/* highest 32 bits */
1874 		data = readl(iommu->reg + reg +
1875 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1876 		if (!(data & DMA_FRCD_F))
1877 			break;
1878 
1879 		if (!ratelimited) {
1880 			fault_reason = dma_frcd_fault_reason(data);
1881 			type = dma_frcd_type(data);
1882 
1883 			pasid = dma_frcd_pasid_value(data);
1884 			data = readl(iommu->reg + reg +
1885 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1886 			source_id = dma_frcd_source_id(data);
1887 
1888 			pasid_present = dma_frcd_pasid_present(data);
1889 			guest_addr = dmar_readq(iommu->reg + reg +
1890 					fault_index * PRIMARY_FAULT_REG_LEN);
1891 			guest_addr = dma_frcd_page_addr(guest_addr);
1892 		}
1893 
1894 		/* clear the fault */
1895 		writel(DMA_FRCD_F, iommu->reg + reg +
1896 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1897 
1898 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1899 
1900 		if (!ratelimited)
1901 			/* Using pasid -1 if pasid is not present */
1902 			dmar_fault_do_one(iommu, type, fault_reason,
1903 					  pasid_present ? pasid : -1,
1904 					  source_id, guest_addr);
1905 
1906 		fault_index++;
1907 		if (fault_index >= cap_num_fault_regs(iommu->cap))
1908 			fault_index = 0;
1909 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1910 	}
1911 
1912 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1913 	       iommu->reg + DMAR_FSTS_REG);
1914 
1915 unlock_exit:
1916 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1917 	return IRQ_HANDLED;
1918 }
1919 
1920 int dmar_set_interrupt(struct intel_iommu *iommu)
1921 {
1922 	int irq, ret;
1923 
1924 	/*
1925 	 * Check if the fault interrupt is already initialized.
1926 	 */
1927 	if (iommu->irq)
1928 		return 0;
1929 
1930 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1931 	if (irq > 0) {
1932 		iommu->irq = irq;
1933 	} else {
1934 		pr_err("No free IRQ vectors\n");
1935 		return -EINVAL;
1936 	}
1937 
1938 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1939 	if (ret)
1940 		pr_err("Can't request irq\n");
1941 	return ret;
1942 }
1943 
1944 int __init enable_drhd_fault_handling(void)
1945 {
1946 	struct dmar_drhd_unit *drhd;
1947 	struct intel_iommu *iommu;
1948 
1949 	/*
1950 	 * Enable fault control interrupt.
1951 	 */
1952 	for_each_iommu(iommu, drhd) {
1953 		u32 fault_status;
1954 		int ret = dmar_set_interrupt(iommu);
1955 
1956 		if (ret) {
1957 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1958 			       (unsigned long long)drhd->reg_base_addr, ret);
1959 			return -1;
1960 		}
1961 
1962 		/*
1963 		 * Clear any previous faults.
1964 		 */
1965 		dmar_fault(iommu->irq, iommu);
1966 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1967 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /*
1974  * Re-enable Queued Invalidation interface.
1975  */
1976 int dmar_reenable_qi(struct intel_iommu *iommu)
1977 {
1978 	if (!ecap_qis(iommu->ecap))
1979 		return -ENOENT;
1980 
1981 	if (!iommu->qi)
1982 		return -ENOENT;
1983 
1984 	/*
1985 	 * First disable queued invalidation.
1986 	 */
1987 	dmar_disable_qi(iommu);
1988 	/*
1989 	 * Then enable queued invalidation again. Since there is no pending
1990 	 * invalidation requests now, it's safe to re-enable queued
1991 	 * invalidation.
1992 	 */
1993 	__dmar_enable_qi(iommu);
1994 
1995 	return 0;
1996 }
1997 
1998 /*
1999  * Check interrupt remapping support in DMAR table description.
2000  */
2001 int __init dmar_ir_support(void)
2002 {
2003 	struct acpi_table_dmar *dmar;
2004 	dmar = (struct acpi_table_dmar *)dmar_tbl;
2005 	if (!dmar)
2006 		return 0;
2007 	return dmar->flags & 0x1;
2008 }
2009 
2010 /* Check whether DMAR units are in use */
2011 static inline bool dmar_in_use(void)
2012 {
2013 	return irq_remapping_enabled || intel_iommu_enabled;
2014 }
2015 
2016 static int __init dmar_free_unused_resources(void)
2017 {
2018 	struct dmar_drhd_unit *dmaru, *dmaru_n;
2019 
2020 	if (dmar_in_use())
2021 		return 0;
2022 
2023 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2024 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2025 
2026 	down_write(&dmar_global_lock);
2027 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2028 		list_del(&dmaru->list);
2029 		dmar_free_drhd(dmaru);
2030 	}
2031 	up_write(&dmar_global_lock);
2032 
2033 	return 0;
2034 }
2035 
2036 late_initcall(dmar_free_unused_resources);
2037 IOMMU_INIT_POST(detect_intel_iommu);
2038 
2039 /*
2040  * DMAR Hotplug Support
2041  * For more details, please refer to Intel(R) Virtualization Technology
2042  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2043  * "Remapping Hardware Unit Hot Plug".
2044  */
2045 static guid_t dmar_hp_guid =
2046 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2047 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2048 
2049 /*
2050  * Currently there's only one revision and BIOS will not check the revision id,
2051  * so use 0 for safety.
2052  */
2053 #define	DMAR_DSM_REV_ID			0
2054 #define	DMAR_DSM_FUNC_DRHD		1
2055 #define	DMAR_DSM_FUNC_ATSR		2
2056 #define	DMAR_DSM_FUNC_RHSA		3
2057 
2058 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2059 {
2060 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2061 }
2062 
2063 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2064 				  dmar_res_handler_t handler, void *arg)
2065 {
2066 	int ret = -ENODEV;
2067 	union acpi_object *obj;
2068 	struct acpi_dmar_header *start;
2069 	struct dmar_res_callback callback;
2070 	static int res_type[] = {
2071 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2072 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2073 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2074 	};
2075 
2076 	if (!dmar_detect_dsm(handle, func))
2077 		return 0;
2078 
2079 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2080 				      func, NULL, ACPI_TYPE_BUFFER);
2081 	if (!obj)
2082 		return -ENODEV;
2083 
2084 	memset(&callback, 0, sizeof(callback));
2085 	callback.cb[res_type[func]] = handler;
2086 	callback.arg[res_type[func]] = arg;
2087 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2088 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2089 
2090 	ACPI_FREE(obj);
2091 
2092 	return ret;
2093 }
2094 
2095 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2096 {
2097 	int ret;
2098 	struct dmar_drhd_unit *dmaru;
2099 
2100 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2101 	if (!dmaru)
2102 		return -ENODEV;
2103 
2104 	ret = dmar_ir_hotplug(dmaru, true);
2105 	if (ret == 0)
2106 		ret = dmar_iommu_hotplug(dmaru, true);
2107 
2108 	return ret;
2109 }
2110 
2111 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2112 {
2113 	int i, ret;
2114 	struct device *dev;
2115 	struct dmar_drhd_unit *dmaru;
2116 
2117 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2118 	if (!dmaru)
2119 		return 0;
2120 
2121 	/*
2122 	 * All PCI devices managed by this unit should have been destroyed.
2123 	 */
2124 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2125 		for_each_active_dev_scope(dmaru->devices,
2126 					  dmaru->devices_cnt, i, dev)
2127 			return -EBUSY;
2128 	}
2129 
2130 	ret = dmar_ir_hotplug(dmaru, false);
2131 	if (ret == 0)
2132 		ret = dmar_iommu_hotplug(dmaru, false);
2133 
2134 	return ret;
2135 }
2136 
2137 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2138 {
2139 	struct dmar_drhd_unit *dmaru;
2140 
2141 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2142 	if (dmaru) {
2143 		list_del_rcu(&dmaru->list);
2144 		synchronize_rcu();
2145 		dmar_free_drhd(dmaru);
2146 	}
2147 
2148 	return 0;
2149 }
2150 
2151 static int dmar_hotplug_insert(acpi_handle handle)
2152 {
2153 	int ret;
2154 	int drhd_count = 0;
2155 
2156 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2157 				     &dmar_validate_one_drhd, (void *)1);
2158 	if (ret)
2159 		goto out;
2160 
2161 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2162 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2163 	if (ret == 0 && drhd_count == 0) {
2164 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2165 		goto out;
2166 	} else if (ret) {
2167 		goto release_drhd;
2168 	}
2169 
2170 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2171 				     &dmar_parse_one_rhsa, NULL);
2172 	if (ret)
2173 		goto release_drhd;
2174 
2175 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2176 				     &dmar_parse_one_atsr, NULL);
2177 	if (ret)
2178 		goto release_atsr;
2179 
2180 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2181 				     &dmar_hp_add_drhd, NULL);
2182 	if (!ret)
2183 		return 0;
2184 
2185 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2186 			       &dmar_hp_remove_drhd, NULL);
2187 release_atsr:
2188 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2189 			       &dmar_release_one_atsr, NULL);
2190 release_drhd:
2191 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2192 			       &dmar_hp_release_drhd, NULL);
2193 out:
2194 	return ret;
2195 }
2196 
2197 static int dmar_hotplug_remove(acpi_handle handle)
2198 {
2199 	int ret;
2200 
2201 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2202 				     &dmar_check_one_atsr, NULL);
2203 	if (ret)
2204 		return ret;
2205 
2206 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2207 				     &dmar_hp_remove_drhd, NULL);
2208 	if (ret == 0) {
2209 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2210 					       &dmar_release_one_atsr, NULL));
2211 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2212 					       &dmar_hp_release_drhd, NULL));
2213 	} else {
2214 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2215 				       &dmar_hp_add_drhd, NULL);
2216 	}
2217 
2218 	return ret;
2219 }
2220 
2221 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2222 				       void *context, void **retval)
2223 {
2224 	acpi_handle *phdl = retval;
2225 
2226 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2227 		*phdl = handle;
2228 		return AE_CTRL_TERMINATE;
2229 	}
2230 
2231 	return AE_OK;
2232 }
2233 
2234 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2235 {
2236 	int ret;
2237 	acpi_handle tmp = NULL;
2238 	acpi_status status;
2239 
2240 	if (!dmar_in_use())
2241 		return 0;
2242 
2243 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2244 		tmp = handle;
2245 	} else {
2246 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2247 					     ACPI_UINT32_MAX,
2248 					     dmar_get_dsm_handle,
2249 					     NULL, NULL, &tmp);
2250 		if (ACPI_FAILURE(status)) {
2251 			pr_warn("Failed to locate _DSM method.\n");
2252 			return -ENXIO;
2253 		}
2254 	}
2255 	if (tmp == NULL)
2256 		return 0;
2257 
2258 	down_write(&dmar_global_lock);
2259 	if (insert)
2260 		ret = dmar_hotplug_insert(tmp);
2261 	else
2262 		ret = dmar_hotplug_remove(tmp);
2263 	up_write(&dmar_global_lock);
2264 
2265 	return ret;
2266 }
2267 
2268 int dmar_device_add(acpi_handle handle)
2269 {
2270 	return dmar_device_hotplug(handle, true);
2271 }
2272 
2273 int dmar_device_remove(acpi_handle handle)
2274 {
2275 	return dmar_device_hotplug(handle, false);
2276 }
2277 
2278 /*
2279  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2280  *
2281  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2282  * the ACPI DMAR table. This means that the platform boot firmware has made
2283  * sure no device can issue DMA outside of RMRR regions.
2284  */
2285 bool dmar_platform_optin(void)
2286 {
2287 	struct acpi_table_dmar *dmar;
2288 	acpi_status status;
2289 	bool ret;
2290 
2291 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2292 				(struct acpi_table_header **)&dmar);
2293 	if (ACPI_FAILURE(status))
2294 		return false;
2295 
2296 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2297 	acpi_put_table((struct acpi_table_header *)dmar);
2298 
2299 	return ret;
2300 }
2301 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2302