xref: /openbmc/linux/drivers/iommu/intel/dmar.c (revision 29c37341)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 
35 #include "../irq_remapping.h"
36 
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41 	bool			ignore_unhandled;
42 	bool			print_entry;
43 };
44 
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59 
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66 
67 extern const struct iommu_ops intel_iommu_ops;
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			pr_warn("Out of memory when allocating notify_info "
151 				"for %s.\n", pci_name(dev));
152 			if (dmar_dev_scope_status == 0)
153 				dmar_dev_scope_status = -ENOMEM;
154 			return NULL;
155 		}
156 	}
157 
158 	info->event = event;
159 	info->dev = dev;
160 	info->seg = pci_domain_nr(dev->bus);
161 	info->level = level;
162 	if (event == BUS_NOTIFY_ADD_DEVICE) {
163 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164 			level--;
165 			info->path[level].bus = tmp->bus->number;
166 			info->path[level].device = PCI_SLOT(tmp->devfn);
167 			info->path[level].function = PCI_FUNC(tmp->devfn);
168 			if (pci_is_root_bus(tmp->bus))
169 				info->bus = tmp->bus->number;
170 		}
171 	}
172 
173 	return info;
174 }
175 
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178 	if ((void *)info != dmar_pci_notify_info_buf)
179 		kfree(info);
180 }
181 
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183 				struct acpi_dmar_pci_path *path, int count)
184 {
185 	int i;
186 
187 	if (info->bus != bus)
188 		goto fallback;
189 	if (info->level != count)
190 		goto fallback;
191 
192 	for (i = 0; i < count; i++) {
193 		if (path[i].device != info->path[i].device ||
194 		    path[i].function != info->path[i].function)
195 			goto fallback;
196 	}
197 
198 	return true;
199 
200 fallback:
201 
202 	if (count != 1)
203 		return false;
204 
205 	i = info->level - 1;
206 	if (bus              == info->path[i].bus &&
207 	    path[0].device   == info->path[i].device &&
208 	    path[0].function == info->path[i].function) {
209 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210 			bus, path[0].device, path[0].function);
211 		return true;
212 	}
213 
214 	return false;
215 }
216 
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219 			  void *start, void*end, u16 segment,
220 			  struct dmar_dev_scope *devices,
221 			  int devices_cnt)
222 {
223 	int i, level;
224 	struct device *tmp, *dev = &info->dev->dev;
225 	struct acpi_dmar_device_scope *scope;
226 	struct acpi_dmar_pci_path *path;
227 
228 	if (segment != info->seg)
229 		return 0;
230 
231 	for (; start < end; start += scope->length) {
232 		scope = start;
233 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235 			continue;
236 
237 		path = (struct acpi_dmar_pci_path *)(scope + 1);
238 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239 		if (!dmar_match_pci_path(info, scope->bus, path, level))
240 			continue;
241 
242 		/*
243 		 * We expect devices with endpoint scope to have normal PCI
244 		 * headers, and devices with bridge scope to have bridge PCI
245 		 * headers.  However PCI NTB devices may be listed in the
246 		 * DMAR table with bridge scope, even though they have a
247 		 * normal PCI header.  NTB devices are identified by class
248 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249 		 * for this special case.
250 		 */
251 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256 			pr_warn("Device scope type does not match for %s\n",
257 				pci_name(info->dev));
258 			return -EINVAL;
259 		}
260 
261 		for_each_dev_scope(devices, devices_cnt, i, tmp)
262 			if (tmp == NULL) {
263 				devices[i].bus = info->dev->bus->number;
264 				devices[i].devfn = info->dev->devfn;
265 				rcu_assign_pointer(devices[i].dev,
266 						   get_device(dev));
267 				return 1;
268 			}
269 		BUG_ON(i >= devices_cnt);
270 	}
271 
272 	return 0;
273 }
274 
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276 			  struct dmar_dev_scope *devices, int count)
277 {
278 	int index;
279 	struct device *tmp;
280 
281 	if (info->seg != segment)
282 		return 0;
283 
284 	for_each_active_dev_scope(devices, count, index, tmp)
285 		if (tmp == &info->dev->dev) {
286 			RCU_INIT_POINTER(devices[index].dev, NULL);
287 			synchronize_rcu();
288 			put_device(tmp);
289 			return 1;
290 		}
291 
292 	return 0;
293 }
294 
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297 	int ret = 0;
298 	struct dmar_drhd_unit *dmaru;
299 	struct acpi_dmar_hardware_unit *drhd;
300 
301 	for_each_drhd_unit(dmaru) {
302 		if (dmaru->include_all)
303 			continue;
304 
305 		drhd = container_of(dmaru->hdr,
306 				    struct acpi_dmar_hardware_unit, header);
307 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308 				((void *)drhd) + drhd->header.length,
309 				dmaru->segment,
310 				dmaru->devices, dmaru->devices_cnt);
311 		if (ret)
312 			break;
313 	}
314 	if (ret >= 0)
315 		ret = dmar_iommu_notify_scope_dev(info);
316 	if (ret < 0 && dmar_dev_scope_status == 0)
317 		dmar_dev_scope_status = ret;
318 
319 	return ret;
320 }
321 
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324 	struct dmar_drhd_unit *dmaru;
325 
326 	for_each_drhd_unit(dmaru)
327 		if (dmar_remove_dev_scope(info, dmaru->segment,
328 			dmaru->devices, dmaru->devices_cnt))
329 			break;
330 	dmar_iommu_notify_scope_dev(info);
331 }
332 
333 static int dmar_pci_bus_notifier(struct notifier_block *nb,
334 				 unsigned long action, void *data)
335 {
336 	struct pci_dev *pdev = to_pci_dev(data);
337 	struct dmar_pci_notify_info *info;
338 
339 	/* Only care about add/remove events for physical functions.
340 	 * For VFs we actually do the lookup based on the corresponding
341 	 * PF in device_to_iommu() anyway. */
342 	if (pdev->is_virtfn)
343 		return NOTIFY_DONE;
344 	if (action != BUS_NOTIFY_ADD_DEVICE &&
345 	    action != BUS_NOTIFY_REMOVED_DEVICE)
346 		return NOTIFY_DONE;
347 
348 	info = dmar_alloc_pci_notify_info(pdev, action);
349 	if (!info)
350 		return NOTIFY_DONE;
351 
352 	down_write(&dmar_global_lock);
353 	if (action == BUS_NOTIFY_ADD_DEVICE)
354 		dmar_pci_bus_add_dev(info);
355 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
356 		dmar_pci_bus_del_dev(info);
357 	up_write(&dmar_global_lock);
358 
359 	dmar_free_pci_notify_info(info);
360 
361 	return NOTIFY_OK;
362 }
363 
364 static struct notifier_block dmar_pci_bus_nb = {
365 	.notifier_call = dmar_pci_bus_notifier,
366 	.priority = INT_MIN,
367 };
368 
369 static struct dmar_drhd_unit *
370 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
371 {
372 	struct dmar_drhd_unit *dmaru;
373 
374 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
375 				dmar_rcu_check())
376 		if (dmaru->segment == drhd->segment &&
377 		    dmaru->reg_base_addr == drhd->address)
378 			return dmaru;
379 
380 	return NULL;
381 }
382 
383 /**
384  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
385  * structure which uniquely represent one DMA remapping hardware unit
386  * present in the platform
387  */
388 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
389 {
390 	struct acpi_dmar_hardware_unit *drhd;
391 	struct dmar_drhd_unit *dmaru;
392 	int ret;
393 
394 	drhd = (struct acpi_dmar_hardware_unit *)header;
395 	dmaru = dmar_find_dmaru(drhd);
396 	if (dmaru)
397 		goto out;
398 
399 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
400 	if (!dmaru)
401 		return -ENOMEM;
402 
403 	/*
404 	 * If header is allocated from slab by ACPI _DSM method, we need to
405 	 * copy the content because the memory buffer will be freed on return.
406 	 */
407 	dmaru->hdr = (void *)(dmaru + 1);
408 	memcpy(dmaru->hdr, header, header->length);
409 	dmaru->reg_base_addr = drhd->address;
410 	dmaru->segment = drhd->segment;
411 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
412 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
413 					      ((void *)drhd) + drhd->header.length,
414 					      &dmaru->devices_cnt);
415 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
416 		kfree(dmaru);
417 		return -ENOMEM;
418 	}
419 
420 	ret = alloc_iommu(dmaru);
421 	if (ret) {
422 		dmar_free_dev_scope(&dmaru->devices,
423 				    &dmaru->devices_cnt);
424 		kfree(dmaru);
425 		return ret;
426 	}
427 	dmar_register_drhd_unit(dmaru);
428 
429 out:
430 	if (arg)
431 		(*(int *)arg)++;
432 
433 	return 0;
434 }
435 
436 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
437 {
438 	if (dmaru->devices && dmaru->devices_cnt)
439 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
440 	if (dmaru->iommu)
441 		free_iommu(dmaru->iommu);
442 	kfree(dmaru);
443 }
444 
445 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
446 				      void *arg)
447 {
448 	struct acpi_dmar_andd *andd = (void *)header;
449 
450 	/* Check for NUL termination within the designated length */
451 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
452 		pr_warn(FW_BUG
453 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
454 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
455 			   dmi_get_system_info(DMI_BIOS_VENDOR),
456 			   dmi_get_system_info(DMI_BIOS_VERSION),
457 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
458 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
459 		return -EINVAL;
460 	}
461 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
462 		andd->device_name);
463 
464 	return 0;
465 }
466 
467 #ifdef CONFIG_ACPI_NUMA
468 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
469 {
470 	struct acpi_dmar_rhsa *rhsa;
471 	struct dmar_drhd_unit *drhd;
472 
473 	rhsa = (struct acpi_dmar_rhsa *)header;
474 	for_each_drhd_unit(drhd) {
475 		if (drhd->reg_base_addr == rhsa->base_address) {
476 			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
477 
478 			if (!node_online(node))
479 				node = NUMA_NO_NODE;
480 			drhd->iommu->node = node;
481 			return 0;
482 		}
483 	}
484 	pr_warn(FW_BUG
485 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
486 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
487 		rhsa->base_address,
488 		dmi_get_system_info(DMI_BIOS_VENDOR),
489 		dmi_get_system_info(DMI_BIOS_VERSION),
490 		dmi_get_system_info(DMI_PRODUCT_VERSION));
491 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
492 
493 	return 0;
494 }
495 #else
496 #define	dmar_parse_one_rhsa		dmar_res_noop
497 #endif
498 
499 static void
500 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
501 {
502 	struct acpi_dmar_hardware_unit *drhd;
503 	struct acpi_dmar_reserved_memory *rmrr;
504 	struct acpi_dmar_atsr *atsr;
505 	struct acpi_dmar_rhsa *rhsa;
506 
507 	switch (header->type) {
508 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
509 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
510 				    header);
511 		pr_info("DRHD base: %#016Lx flags: %#x\n",
512 			(unsigned long long)drhd->address, drhd->flags);
513 		break;
514 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
515 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
516 				    header);
517 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
518 			(unsigned long long)rmrr->base_address,
519 			(unsigned long long)rmrr->end_address);
520 		break;
521 	case ACPI_DMAR_TYPE_ROOT_ATS:
522 		atsr = container_of(header, struct acpi_dmar_atsr, header);
523 		pr_info("ATSR flags: %#x\n", atsr->flags);
524 		break;
525 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
526 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
527 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
528 		       (unsigned long long)rhsa->base_address,
529 		       rhsa->proximity_domain);
530 		break;
531 	case ACPI_DMAR_TYPE_NAMESPACE:
532 		/* We don't print this here because we need to sanity-check
533 		   it first. So print it in dmar_parse_one_andd() instead. */
534 		break;
535 	}
536 }
537 
538 /**
539  * dmar_table_detect - checks to see if the platform supports DMAR devices
540  */
541 static int __init dmar_table_detect(void)
542 {
543 	acpi_status status = AE_OK;
544 
545 	/* if we could find DMAR table, then there are DMAR devices */
546 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
547 
548 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
549 		pr_warn("Unable to map DMAR\n");
550 		status = AE_NOT_FOUND;
551 	}
552 
553 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
554 }
555 
556 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
557 				       size_t len, struct dmar_res_callback *cb)
558 {
559 	struct acpi_dmar_header *iter, *next;
560 	struct acpi_dmar_header *end = ((void *)start) + len;
561 
562 	for (iter = start; iter < end; iter = next) {
563 		next = (void *)iter + iter->length;
564 		if (iter->length == 0) {
565 			/* Avoid looping forever on bad ACPI tables */
566 			pr_debug(FW_BUG "Invalid 0-length structure\n");
567 			break;
568 		} else if (next > end) {
569 			/* Avoid passing table end */
570 			pr_warn(FW_BUG "Record passes table end\n");
571 			return -EINVAL;
572 		}
573 
574 		if (cb->print_entry)
575 			dmar_table_print_dmar_entry(iter);
576 
577 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
578 			/* continue for forward compatibility */
579 			pr_debug("Unknown DMAR structure type %d\n",
580 				 iter->type);
581 		} else if (cb->cb[iter->type]) {
582 			int ret;
583 
584 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
585 			if (ret)
586 				return ret;
587 		} else if (!cb->ignore_unhandled) {
588 			pr_warn("No handler for DMAR structure type %d\n",
589 				iter->type);
590 			return -EINVAL;
591 		}
592 	}
593 
594 	return 0;
595 }
596 
597 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
598 				       struct dmar_res_callback *cb)
599 {
600 	return dmar_walk_remapping_entries((void *)(dmar + 1),
601 			dmar->header.length - sizeof(*dmar), cb);
602 }
603 
604 /**
605  * parse_dmar_table - parses the DMA reporting table
606  */
607 static int __init
608 parse_dmar_table(void)
609 {
610 	struct acpi_table_dmar *dmar;
611 	int drhd_count = 0;
612 	int ret;
613 	struct dmar_res_callback cb = {
614 		.print_entry = true,
615 		.ignore_unhandled = true,
616 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
617 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
618 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
619 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
620 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
621 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
622 	};
623 
624 	/*
625 	 * Do it again, earlier dmar_tbl mapping could be mapped with
626 	 * fixed map.
627 	 */
628 	dmar_table_detect();
629 
630 	/*
631 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
632 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
633 	 */
634 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
635 
636 	dmar = (struct acpi_table_dmar *)dmar_tbl;
637 	if (!dmar)
638 		return -ENODEV;
639 
640 	if (dmar->width < PAGE_SHIFT - 1) {
641 		pr_warn("Invalid DMAR haw\n");
642 		return -EINVAL;
643 	}
644 
645 	pr_info("Host address width %d\n", dmar->width + 1);
646 	ret = dmar_walk_dmar_table(dmar, &cb);
647 	if (ret == 0 && drhd_count == 0)
648 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
649 
650 	return ret;
651 }
652 
653 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
654 				 int cnt, struct pci_dev *dev)
655 {
656 	int index;
657 	struct device *tmp;
658 
659 	while (dev) {
660 		for_each_active_dev_scope(devices, cnt, index, tmp)
661 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
662 				return 1;
663 
664 		/* Check our parent */
665 		dev = dev->bus->self;
666 	}
667 
668 	return 0;
669 }
670 
671 struct dmar_drhd_unit *
672 dmar_find_matched_drhd_unit(struct pci_dev *dev)
673 {
674 	struct dmar_drhd_unit *dmaru;
675 	struct acpi_dmar_hardware_unit *drhd;
676 
677 	dev = pci_physfn(dev);
678 
679 	rcu_read_lock();
680 	for_each_drhd_unit(dmaru) {
681 		drhd = container_of(dmaru->hdr,
682 				    struct acpi_dmar_hardware_unit,
683 				    header);
684 
685 		if (dmaru->include_all &&
686 		    drhd->segment == pci_domain_nr(dev->bus))
687 			goto out;
688 
689 		if (dmar_pci_device_match(dmaru->devices,
690 					  dmaru->devices_cnt, dev))
691 			goto out;
692 	}
693 	dmaru = NULL;
694 out:
695 	rcu_read_unlock();
696 
697 	return dmaru;
698 }
699 
700 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
701 					      struct acpi_device *adev)
702 {
703 	struct dmar_drhd_unit *dmaru;
704 	struct acpi_dmar_hardware_unit *drhd;
705 	struct acpi_dmar_device_scope *scope;
706 	struct device *tmp;
707 	int i;
708 	struct acpi_dmar_pci_path *path;
709 
710 	for_each_drhd_unit(dmaru) {
711 		drhd = container_of(dmaru->hdr,
712 				    struct acpi_dmar_hardware_unit,
713 				    header);
714 
715 		for (scope = (void *)(drhd + 1);
716 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
717 		     scope = ((void *)scope) + scope->length) {
718 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
719 				continue;
720 			if (scope->enumeration_id != device_number)
721 				continue;
722 
723 			path = (void *)(scope + 1);
724 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
725 				dev_name(&adev->dev), dmaru->reg_base_addr,
726 				scope->bus, path->device, path->function);
727 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
728 				if (tmp == NULL) {
729 					dmaru->devices[i].bus = scope->bus;
730 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
731 									    path->function);
732 					rcu_assign_pointer(dmaru->devices[i].dev,
733 							   get_device(&adev->dev));
734 					return;
735 				}
736 			BUG_ON(i >= dmaru->devices_cnt);
737 		}
738 	}
739 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
740 		device_number, dev_name(&adev->dev));
741 }
742 
743 static int __init dmar_acpi_dev_scope_init(void)
744 {
745 	struct acpi_dmar_andd *andd;
746 
747 	if (dmar_tbl == NULL)
748 		return -ENODEV;
749 
750 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
751 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
752 	     andd = ((void *)andd) + andd->header.length) {
753 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
754 			acpi_handle h;
755 			struct acpi_device *adev;
756 
757 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
758 							  andd->device_name,
759 							  &h))) {
760 				pr_err("Failed to find handle for ACPI object %s\n",
761 				       andd->device_name);
762 				continue;
763 			}
764 			if (acpi_bus_get_device(h, &adev)) {
765 				pr_err("Failed to get device for ACPI object %s\n",
766 				       andd->device_name);
767 				continue;
768 			}
769 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
770 		}
771 	}
772 	return 0;
773 }
774 
775 int __init dmar_dev_scope_init(void)
776 {
777 	struct pci_dev *dev = NULL;
778 	struct dmar_pci_notify_info *info;
779 
780 	if (dmar_dev_scope_status != 1)
781 		return dmar_dev_scope_status;
782 
783 	if (list_empty(&dmar_drhd_units)) {
784 		dmar_dev_scope_status = -ENODEV;
785 	} else {
786 		dmar_dev_scope_status = 0;
787 
788 		dmar_acpi_dev_scope_init();
789 
790 		for_each_pci_dev(dev) {
791 			if (dev->is_virtfn)
792 				continue;
793 
794 			info = dmar_alloc_pci_notify_info(dev,
795 					BUS_NOTIFY_ADD_DEVICE);
796 			if (!info) {
797 				return dmar_dev_scope_status;
798 			} else {
799 				dmar_pci_bus_add_dev(info);
800 				dmar_free_pci_notify_info(info);
801 			}
802 		}
803 	}
804 
805 	return dmar_dev_scope_status;
806 }
807 
808 void __init dmar_register_bus_notifier(void)
809 {
810 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
811 }
812 
813 
814 int __init dmar_table_init(void)
815 {
816 	static int dmar_table_initialized;
817 	int ret;
818 
819 	if (dmar_table_initialized == 0) {
820 		ret = parse_dmar_table();
821 		if (ret < 0) {
822 			if (ret != -ENODEV)
823 				pr_info("Parse DMAR table failure.\n");
824 		} else  if (list_empty(&dmar_drhd_units)) {
825 			pr_info("No DMAR devices found\n");
826 			ret = -ENODEV;
827 		}
828 
829 		if (ret < 0)
830 			dmar_table_initialized = ret;
831 		else
832 			dmar_table_initialized = 1;
833 	}
834 
835 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
836 }
837 
838 static void warn_invalid_dmar(u64 addr, const char *message)
839 {
840 	pr_warn_once(FW_BUG
841 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
842 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
843 		addr, message,
844 		dmi_get_system_info(DMI_BIOS_VENDOR),
845 		dmi_get_system_info(DMI_BIOS_VERSION),
846 		dmi_get_system_info(DMI_PRODUCT_VERSION));
847 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
848 }
849 
850 static int __ref
851 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
852 {
853 	struct acpi_dmar_hardware_unit *drhd;
854 	void __iomem *addr;
855 	u64 cap, ecap;
856 
857 	drhd = (void *)entry;
858 	if (!drhd->address) {
859 		warn_invalid_dmar(0, "");
860 		return -EINVAL;
861 	}
862 
863 	if (arg)
864 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
865 	else
866 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
867 	if (!addr) {
868 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
869 		return -EINVAL;
870 	}
871 
872 	cap = dmar_readq(addr + DMAR_CAP_REG);
873 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
874 
875 	if (arg)
876 		iounmap(addr);
877 	else
878 		early_iounmap(addr, VTD_PAGE_SIZE);
879 
880 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
881 		warn_invalid_dmar(drhd->address, " returns all ones");
882 		return -EINVAL;
883 	}
884 
885 	return 0;
886 }
887 
888 int __init detect_intel_iommu(void)
889 {
890 	int ret;
891 	struct dmar_res_callback validate_drhd_cb = {
892 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
893 		.ignore_unhandled = true,
894 	};
895 
896 	down_write(&dmar_global_lock);
897 	ret = dmar_table_detect();
898 	if (!ret)
899 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
900 					   &validate_drhd_cb);
901 	if (!ret && !no_iommu && !iommu_detected &&
902 	    (!dmar_disabled || dmar_platform_optin())) {
903 		iommu_detected = 1;
904 		/* Make sure ACS will be enabled */
905 		pci_request_acs();
906 	}
907 
908 #ifdef CONFIG_X86
909 	if (!ret) {
910 		x86_init.iommu.iommu_init = intel_iommu_init;
911 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
912 	}
913 
914 #endif
915 
916 	if (dmar_tbl) {
917 		acpi_put_table(dmar_tbl);
918 		dmar_tbl = NULL;
919 	}
920 	up_write(&dmar_global_lock);
921 
922 	return ret ? ret : 1;
923 }
924 
925 static void unmap_iommu(struct intel_iommu *iommu)
926 {
927 	iounmap(iommu->reg);
928 	release_mem_region(iommu->reg_phys, iommu->reg_size);
929 }
930 
931 /**
932  * map_iommu: map the iommu's registers
933  * @iommu: the iommu to map
934  * @phys_addr: the physical address of the base resgister
935  *
936  * Memory map the iommu's registers.  Start w/ a single page, and
937  * possibly expand if that turns out to be insufficent.
938  */
939 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
940 {
941 	int map_size, err=0;
942 
943 	iommu->reg_phys = phys_addr;
944 	iommu->reg_size = VTD_PAGE_SIZE;
945 
946 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
947 		pr_err("Can't reserve memory\n");
948 		err = -EBUSY;
949 		goto out;
950 	}
951 
952 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
953 	if (!iommu->reg) {
954 		pr_err("Can't map the region\n");
955 		err = -ENOMEM;
956 		goto release;
957 	}
958 
959 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
960 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
961 
962 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
963 		err = -EINVAL;
964 		warn_invalid_dmar(phys_addr, " returns all ones");
965 		goto unmap;
966 	}
967 	iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
968 
969 	/* the registers might be more than one page */
970 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
971 			 cap_max_fault_reg_offset(iommu->cap));
972 	map_size = VTD_PAGE_ALIGN(map_size);
973 	if (map_size > iommu->reg_size) {
974 		iounmap(iommu->reg);
975 		release_mem_region(iommu->reg_phys, iommu->reg_size);
976 		iommu->reg_size = map_size;
977 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
978 					iommu->name)) {
979 			pr_err("Can't reserve memory\n");
980 			err = -EBUSY;
981 			goto out;
982 		}
983 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
984 		if (!iommu->reg) {
985 			pr_err("Can't map the region\n");
986 			err = -ENOMEM;
987 			goto release;
988 		}
989 	}
990 	err = 0;
991 	goto out;
992 
993 unmap:
994 	iounmap(iommu->reg);
995 release:
996 	release_mem_region(iommu->reg_phys, iommu->reg_size);
997 out:
998 	return err;
999 }
1000 
1001 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1002 {
1003 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1004 					    DMAR_UNITS_SUPPORTED);
1005 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1006 		iommu->seq_id = -1;
1007 	} else {
1008 		set_bit(iommu->seq_id, dmar_seq_ids);
1009 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1010 	}
1011 
1012 	return iommu->seq_id;
1013 }
1014 
1015 static void dmar_free_seq_id(struct intel_iommu *iommu)
1016 {
1017 	if (iommu->seq_id >= 0) {
1018 		clear_bit(iommu->seq_id, dmar_seq_ids);
1019 		iommu->seq_id = -1;
1020 	}
1021 }
1022 
1023 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1024 {
1025 	struct intel_iommu *iommu;
1026 	u32 ver, sts;
1027 	int agaw = 0;
1028 	int msagaw = 0;
1029 	int err;
1030 
1031 	if (!drhd->reg_base_addr) {
1032 		warn_invalid_dmar(0, "");
1033 		return -EINVAL;
1034 	}
1035 
1036 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1037 	if (!iommu)
1038 		return -ENOMEM;
1039 
1040 	if (dmar_alloc_seq_id(iommu) < 0) {
1041 		pr_err("Failed to allocate seq_id\n");
1042 		err = -ENOSPC;
1043 		goto error;
1044 	}
1045 
1046 	err = map_iommu(iommu, drhd->reg_base_addr);
1047 	if (err) {
1048 		pr_err("Failed to map %s\n", iommu->name);
1049 		goto error_free_seq_id;
1050 	}
1051 
1052 	err = -EINVAL;
1053 	agaw = iommu_calculate_agaw(iommu);
1054 	if (agaw < 0) {
1055 		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1056 			iommu->seq_id);
1057 		goto err_unmap;
1058 	}
1059 	msagaw = iommu_calculate_max_sagaw(iommu);
1060 	if (msagaw < 0) {
1061 		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1062 			iommu->seq_id);
1063 		goto err_unmap;
1064 	}
1065 	iommu->agaw = agaw;
1066 	iommu->msagaw = msagaw;
1067 	iommu->segment = drhd->segment;
1068 
1069 	iommu->node = NUMA_NO_NODE;
1070 
1071 	ver = readl(iommu->reg + DMAR_VER_REG);
1072 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1073 		iommu->name,
1074 		(unsigned long long)drhd->reg_base_addr,
1075 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1076 		(unsigned long long)iommu->cap,
1077 		(unsigned long long)iommu->ecap);
1078 
1079 	/* Reflect status in gcmd */
1080 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1081 	if (sts & DMA_GSTS_IRES)
1082 		iommu->gcmd |= DMA_GCMD_IRE;
1083 	if (sts & DMA_GSTS_TES)
1084 		iommu->gcmd |= DMA_GCMD_TE;
1085 	if (sts & DMA_GSTS_QIES)
1086 		iommu->gcmd |= DMA_GCMD_QIE;
1087 
1088 	raw_spin_lock_init(&iommu->register_lock);
1089 
1090 	if (intel_iommu_enabled) {
1091 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1092 					     intel_iommu_groups,
1093 					     "%s", iommu->name);
1094 		if (err)
1095 			goto err_unmap;
1096 
1097 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1098 
1099 		err = iommu_device_register(&iommu->iommu);
1100 		if (err)
1101 			goto err_unmap;
1102 	}
1103 
1104 	drhd->iommu = iommu;
1105 	iommu->drhd = drhd;
1106 
1107 	return 0;
1108 
1109 err_unmap:
1110 	unmap_iommu(iommu);
1111 error_free_seq_id:
1112 	dmar_free_seq_id(iommu);
1113 error:
1114 	kfree(iommu);
1115 	return err;
1116 }
1117 
1118 static void free_iommu(struct intel_iommu *iommu)
1119 {
1120 	if (intel_iommu_enabled) {
1121 		iommu_device_unregister(&iommu->iommu);
1122 		iommu_device_sysfs_remove(&iommu->iommu);
1123 	}
1124 
1125 	if (iommu->irq) {
1126 		if (iommu->pr_irq) {
1127 			free_irq(iommu->pr_irq, iommu);
1128 			dmar_free_hwirq(iommu->pr_irq);
1129 			iommu->pr_irq = 0;
1130 		}
1131 		free_irq(iommu->irq, iommu);
1132 		dmar_free_hwirq(iommu->irq);
1133 		iommu->irq = 0;
1134 	}
1135 
1136 	if (iommu->qi) {
1137 		free_page((unsigned long)iommu->qi->desc);
1138 		kfree(iommu->qi->desc_status);
1139 		kfree(iommu->qi);
1140 	}
1141 
1142 	if (iommu->reg)
1143 		unmap_iommu(iommu);
1144 
1145 	dmar_free_seq_id(iommu);
1146 	kfree(iommu);
1147 }
1148 
1149 /*
1150  * Reclaim all the submitted descriptors which have completed its work.
1151  */
1152 static inline void reclaim_free_desc(struct q_inval *qi)
1153 {
1154 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1155 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1156 		qi->desc_status[qi->free_tail] = QI_FREE;
1157 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1158 		qi->free_cnt++;
1159 	}
1160 }
1161 
1162 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1163 {
1164 	u32 fault;
1165 	int head, tail;
1166 	struct q_inval *qi = iommu->qi;
1167 	int shift = qi_shift(iommu);
1168 
1169 	if (qi->desc_status[wait_index] == QI_ABORT)
1170 		return -EAGAIN;
1171 
1172 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1173 
1174 	/*
1175 	 * If IQE happens, the head points to the descriptor associated
1176 	 * with the error. No new descriptors are fetched until the IQE
1177 	 * is cleared.
1178 	 */
1179 	if (fault & DMA_FSTS_IQE) {
1180 		head = readl(iommu->reg + DMAR_IQH_REG);
1181 		if ((head >> shift) == index) {
1182 			struct qi_desc *desc = qi->desc + head;
1183 
1184 			/*
1185 			 * desc->qw2 and desc->qw3 are either reserved or
1186 			 * used by software as private data. We won't print
1187 			 * out these two qw's for security consideration.
1188 			 */
1189 			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1190 			       (unsigned long long)desc->qw0,
1191 			       (unsigned long long)desc->qw1);
1192 			memcpy(desc, qi->desc + (wait_index << shift),
1193 			       1 << shift);
1194 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1195 			return -EINVAL;
1196 		}
1197 	}
1198 
1199 	/*
1200 	 * If ITE happens, all pending wait_desc commands are aborted.
1201 	 * No new descriptors are fetched until the ITE is cleared.
1202 	 */
1203 	if (fault & DMA_FSTS_ITE) {
1204 		head = readl(iommu->reg + DMAR_IQH_REG);
1205 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1206 		head |= 1;
1207 		tail = readl(iommu->reg + DMAR_IQT_REG);
1208 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1209 
1210 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1211 
1212 		do {
1213 			if (qi->desc_status[head] == QI_IN_USE)
1214 				qi->desc_status[head] = QI_ABORT;
1215 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1216 		} while (head != tail);
1217 
1218 		if (qi->desc_status[wait_index] == QI_ABORT)
1219 			return -EAGAIN;
1220 	}
1221 
1222 	if (fault & DMA_FSTS_ICE)
1223 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1224 
1225 	return 0;
1226 }
1227 
1228 /*
1229  * Function to submit invalidation descriptors of all types to the queued
1230  * invalidation interface(QI). Multiple descriptors can be submitted at a
1231  * time, a wait descriptor will be appended to each submission to ensure
1232  * hardware has completed the invalidation before return. Wait descriptors
1233  * can be part of the submission but it will not be polled for completion.
1234  */
1235 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1236 		   unsigned int count, unsigned long options)
1237 {
1238 	struct q_inval *qi = iommu->qi;
1239 	struct qi_desc wait_desc;
1240 	int wait_index, index;
1241 	unsigned long flags;
1242 	int offset, shift;
1243 	int rc, i;
1244 
1245 	if (!qi)
1246 		return 0;
1247 
1248 restart:
1249 	rc = 0;
1250 
1251 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1252 	/*
1253 	 * Check if we have enough empty slots in the queue to submit,
1254 	 * the calculation is based on:
1255 	 * # of desc + 1 wait desc + 1 space between head and tail
1256 	 */
1257 	while (qi->free_cnt < count + 2) {
1258 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1259 		cpu_relax();
1260 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1261 	}
1262 
1263 	index = qi->free_head;
1264 	wait_index = (index + count) % QI_LENGTH;
1265 	shift = qi_shift(iommu);
1266 
1267 	for (i = 0; i < count; i++) {
1268 		offset = ((index + i) % QI_LENGTH) << shift;
1269 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1270 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1271 	}
1272 	qi->desc_status[wait_index] = QI_IN_USE;
1273 
1274 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1275 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1276 	if (options & QI_OPT_WAIT_DRAIN)
1277 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1278 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1279 	wait_desc.qw2 = 0;
1280 	wait_desc.qw3 = 0;
1281 
1282 	offset = wait_index << shift;
1283 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1284 
1285 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1286 	qi->free_cnt -= count + 1;
1287 
1288 	/*
1289 	 * update the HW tail register indicating the presence of
1290 	 * new descriptors.
1291 	 */
1292 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1293 
1294 	while (qi->desc_status[wait_index] != QI_DONE) {
1295 		/*
1296 		 * We will leave the interrupts disabled, to prevent interrupt
1297 		 * context to queue another cmd while a cmd is already submitted
1298 		 * and waiting for completion on this cpu. This is to avoid
1299 		 * a deadlock where the interrupt context can wait indefinitely
1300 		 * for free slots in the queue.
1301 		 */
1302 		rc = qi_check_fault(iommu, index, wait_index);
1303 		if (rc)
1304 			break;
1305 
1306 		raw_spin_unlock(&qi->q_lock);
1307 		cpu_relax();
1308 		raw_spin_lock(&qi->q_lock);
1309 	}
1310 
1311 	for (i = 0; i < count; i++)
1312 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1313 
1314 	reclaim_free_desc(qi);
1315 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1316 
1317 	if (rc == -EAGAIN)
1318 		goto restart;
1319 
1320 	return rc;
1321 }
1322 
1323 /*
1324  * Flush the global interrupt entry cache.
1325  */
1326 void qi_global_iec(struct intel_iommu *iommu)
1327 {
1328 	struct qi_desc desc;
1329 
1330 	desc.qw0 = QI_IEC_TYPE;
1331 	desc.qw1 = 0;
1332 	desc.qw2 = 0;
1333 	desc.qw3 = 0;
1334 
1335 	/* should never fail */
1336 	qi_submit_sync(iommu, &desc, 1, 0);
1337 }
1338 
1339 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1340 		      u64 type)
1341 {
1342 	struct qi_desc desc;
1343 
1344 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1345 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1346 	desc.qw1 = 0;
1347 	desc.qw2 = 0;
1348 	desc.qw3 = 0;
1349 
1350 	qi_submit_sync(iommu, &desc, 1, 0);
1351 }
1352 
1353 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1354 		    unsigned int size_order, u64 type)
1355 {
1356 	u8 dw = 0, dr = 0;
1357 
1358 	struct qi_desc desc;
1359 	int ih = 0;
1360 
1361 	if (cap_write_drain(iommu->cap))
1362 		dw = 1;
1363 
1364 	if (cap_read_drain(iommu->cap))
1365 		dr = 1;
1366 
1367 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1368 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1369 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1370 		| QI_IOTLB_AM(size_order);
1371 	desc.qw2 = 0;
1372 	desc.qw3 = 0;
1373 
1374 	qi_submit_sync(iommu, &desc, 1, 0);
1375 }
1376 
1377 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1378 			u16 qdep, u64 addr, unsigned mask)
1379 {
1380 	struct qi_desc desc;
1381 
1382 	if (mask) {
1383 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1384 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1385 	} else
1386 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1387 
1388 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1389 		qdep = 0;
1390 
1391 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1392 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1393 	desc.qw2 = 0;
1394 	desc.qw3 = 0;
1395 
1396 	qi_submit_sync(iommu, &desc, 1, 0);
1397 }
1398 
1399 /* PASID-based IOTLB invalidation */
1400 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1401 		     unsigned long npages, bool ih)
1402 {
1403 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1404 
1405 	/*
1406 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1407 	 * a positive value for Page-selective-within-PASID invalidation.
1408 	 * 0 is not a valid input.
1409 	 */
1410 	if (WARN_ON(!npages)) {
1411 		pr_err("Invalid input npages = %ld\n", npages);
1412 		return;
1413 	}
1414 
1415 	if (npages == -1) {
1416 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1417 				QI_EIOTLB_DID(did) |
1418 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1419 				QI_EIOTLB_TYPE;
1420 		desc.qw1 = 0;
1421 	} else {
1422 		int mask = ilog2(__roundup_pow_of_two(npages));
1423 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1424 
1425 		if (WARN_ON_ONCE(!ALIGN(addr, align)))
1426 			addr &= ~(align - 1);
1427 
1428 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1429 				QI_EIOTLB_DID(did) |
1430 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1431 				QI_EIOTLB_TYPE;
1432 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1433 				QI_EIOTLB_IH(ih) |
1434 				QI_EIOTLB_AM(mask);
1435 	}
1436 
1437 	qi_submit_sync(iommu, &desc, 1, 0);
1438 }
1439 
1440 /* PASID-based device IOTLB Invalidate */
1441 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1442 			      u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1443 {
1444 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1445 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1446 
1447 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1448 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1449 		QI_DEV_IOTLB_PFSID(pfsid);
1450 
1451 	/*
1452 	 * If S bit is 0, we only flush a single page. If S bit is set,
1453 	 * The least significant zero bit indicates the invalidation address
1454 	 * range. VT-d spec 6.5.2.6.
1455 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1456 	 * size order = 0 is PAGE_SIZE 4KB
1457 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1458 	 * ECAP.
1459 	 */
1460 	if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
1461 		pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1462 				    addr, size_order);
1463 
1464 	/* Take page address */
1465 	desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1466 
1467 	if (size_order) {
1468 		/*
1469 		 * Existing 0s in address below size_order may be the least
1470 		 * significant bit, we must set them to 1s to avoid having
1471 		 * smaller size than desired.
1472 		 */
1473 		desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1474 					VTD_PAGE_SHIFT);
1475 		/* Clear size_order bit to indicate size */
1476 		desc.qw1 &= ~mask;
1477 		/* Set the S bit to indicate flushing more than 1 page */
1478 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1479 	}
1480 
1481 	qi_submit_sync(iommu, &desc, 1, 0);
1482 }
1483 
1484 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1485 			  u64 granu, int pasid)
1486 {
1487 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1488 
1489 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1490 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1491 	qi_submit_sync(iommu, &desc, 1, 0);
1492 }
1493 
1494 /*
1495  * Disable Queued Invalidation interface.
1496  */
1497 void dmar_disable_qi(struct intel_iommu *iommu)
1498 {
1499 	unsigned long flags;
1500 	u32 sts;
1501 	cycles_t start_time = get_cycles();
1502 
1503 	if (!ecap_qis(iommu->ecap))
1504 		return;
1505 
1506 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1507 
1508 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1509 	if (!(sts & DMA_GSTS_QIES))
1510 		goto end;
1511 
1512 	/*
1513 	 * Give a chance to HW to complete the pending invalidation requests.
1514 	 */
1515 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1516 		readl(iommu->reg + DMAR_IQH_REG)) &&
1517 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1518 		cpu_relax();
1519 
1520 	iommu->gcmd &= ~DMA_GCMD_QIE;
1521 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1522 
1523 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1524 		      !(sts & DMA_GSTS_QIES), sts);
1525 end:
1526 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1527 }
1528 
1529 /*
1530  * Enable queued invalidation.
1531  */
1532 static void __dmar_enable_qi(struct intel_iommu *iommu)
1533 {
1534 	u32 sts;
1535 	unsigned long flags;
1536 	struct q_inval *qi = iommu->qi;
1537 	u64 val = virt_to_phys(qi->desc);
1538 
1539 	qi->free_head = qi->free_tail = 0;
1540 	qi->free_cnt = QI_LENGTH;
1541 
1542 	/*
1543 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1544 	 * is present.
1545 	 */
1546 	if (ecap_smts(iommu->ecap))
1547 		val |= (1 << 11) | 1;
1548 
1549 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1550 
1551 	/* write zero to the tail reg */
1552 	writel(0, iommu->reg + DMAR_IQT_REG);
1553 
1554 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1555 
1556 	iommu->gcmd |= DMA_GCMD_QIE;
1557 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1558 
1559 	/* Make sure hardware complete it */
1560 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1561 
1562 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1563 }
1564 
1565 /*
1566  * Enable Queued Invalidation interface. This is a must to support
1567  * interrupt-remapping. Also used by DMA-remapping, which replaces
1568  * register based IOTLB invalidation.
1569  */
1570 int dmar_enable_qi(struct intel_iommu *iommu)
1571 {
1572 	struct q_inval *qi;
1573 	struct page *desc_page;
1574 
1575 	if (!ecap_qis(iommu->ecap))
1576 		return -ENOENT;
1577 
1578 	/*
1579 	 * queued invalidation is already setup and enabled.
1580 	 */
1581 	if (iommu->qi)
1582 		return 0;
1583 
1584 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1585 	if (!iommu->qi)
1586 		return -ENOMEM;
1587 
1588 	qi = iommu->qi;
1589 
1590 	/*
1591 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1592 	 * if the remapping hardware supports scalable mode translation.
1593 	 */
1594 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1595 				     !!ecap_smts(iommu->ecap));
1596 	if (!desc_page) {
1597 		kfree(qi);
1598 		iommu->qi = NULL;
1599 		return -ENOMEM;
1600 	}
1601 
1602 	qi->desc = page_address(desc_page);
1603 
1604 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1605 	if (!qi->desc_status) {
1606 		free_page((unsigned long) qi->desc);
1607 		kfree(qi);
1608 		iommu->qi = NULL;
1609 		return -ENOMEM;
1610 	}
1611 
1612 	raw_spin_lock_init(&qi->q_lock);
1613 
1614 	__dmar_enable_qi(iommu);
1615 
1616 	return 0;
1617 }
1618 
1619 /* iommu interrupt handling. Most stuff are MSI-like. */
1620 
1621 enum faulttype {
1622 	DMA_REMAP,
1623 	INTR_REMAP,
1624 	UNKNOWN,
1625 };
1626 
1627 static const char *dma_remap_fault_reasons[] =
1628 {
1629 	"Software",
1630 	"Present bit in root entry is clear",
1631 	"Present bit in context entry is clear",
1632 	"Invalid context entry",
1633 	"Access beyond MGAW",
1634 	"PTE Write access is not set",
1635 	"PTE Read access is not set",
1636 	"Next page table ptr is invalid",
1637 	"Root table address invalid",
1638 	"Context table ptr is invalid",
1639 	"non-zero reserved fields in RTP",
1640 	"non-zero reserved fields in CTP",
1641 	"non-zero reserved fields in PTE",
1642 	"PCE for translation request specifies blocking",
1643 };
1644 
1645 static const char * const dma_remap_sm_fault_reasons[] = {
1646 	"SM: Invalid Root Table Address",
1647 	"SM: TTM 0 for request with PASID",
1648 	"SM: TTM 0 for page group request",
1649 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1650 	"SM: Error attempting to access Root Entry",
1651 	"SM: Present bit in Root Entry is clear",
1652 	"SM: Non-zero reserved field set in Root Entry",
1653 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1654 	"SM: Error attempting to access Context Entry",
1655 	"SM: Present bit in Context Entry is clear",
1656 	"SM: Non-zero reserved field set in the Context Entry",
1657 	"SM: Invalid Context Entry",
1658 	"SM: DTE field in Context Entry is clear",
1659 	"SM: PASID Enable field in Context Entry is clear",
1660 	"SM: PASID is larger than the max in Context Entry",
1661 	"SM: PRE field in Context-Entry is clear",
1662 	"SM: RID_PASID field error in Context-Entry",
1663 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1664 	"SM: Error attempting to access the PASID Directory Entry",
1665 	"SM: Present bit in Directory Entry is clear",
1666 	"SM: Non-zero reserved field set in PASID Directory Entry",
1667 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1668 	"SM: Error attempting to access PASID Table Entry",
1669 	"SM: Present bit in PASID Table Entry is clear",
1670 	"SM: Non-zero reserved field set in PASID Table Entry",
1671 	"SM: Invalid Scalable-Mode PASID Table Entry",
1672 	"SM: ERE field is clear in PASID Table Entry",
1673 	"SM: SRE field is clear in PASID Table Entry",
1674 	"Unknown", "Unknown",/* 0x5E-0x5F */
1675 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1676 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1677 	"SM: Error attempting to access first-level paging entry",
1678 	"SM: Present bit in first-level paging entry is clear",
1679 	"SM: Non-zero reserved field set in first-level paging entry",
1680 	"SM: Error attempting to access FL-PML4 entry",
1681 	"SM: First-level entry address beyond MGAW in Nested translation",
1682 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1683 	"SM: Read permission error in first-level paging entry in Nested translation",
1684 	"SM: Write permission error in first-level paging entry in Nested translation",
1685 	"SM: Error attempting to access second-level paging entry",
1686 	"SM: Read/Write permission error in second-level paging entry",
1687 	"SM: Non-zero reserved field set in second-level paging entry",
1688 	"SM: Invalid second-level page table pointer",
1689 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1690 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1691 	"SM: Address in first-level translation is not canonical",
1692 	"SM: U/S set 0 for first-level translation with user privilege",
1693 	"SM: No execute permission for request with PASID and ER=1",
1694 	"SM: Address beyond the DMA hardware max",
1695 	"SM: Second-level entry address beyond the max",
1696 	"SM: No write permission for Write/AtomicOp request",
1697 	"SM: No read permission for Read/AtomicOp request",
1698 	"SM: Invalid address-interrupt address",
1699 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1700 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1701 };
1702 
1703 static const char *irq_remap_fault_reasons[] =
1704 {
1705 	"Detected reserved fields in the decoded interrupt-remapped request",
1706 	"Interrupt index exceeded the interrupt-remapping table size",
1707 	"Present field in the IRTE entry is clear",
1708 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1709 	"Detected reserved fields in the IRTE entry",
1710 	"Blocked a compatibility format interrupt request",
1711 	"Blocked an interrupt request due to source-id verification failure",
1712 };
1713 
1714 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1715 {
1716 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1717 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1718 		*fault_type = INTR_REMAP;
1719 		return irq_remap_fault_reasons[fault_reason - 0x20];
1720 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1721 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1722 		*fault_type = DMA_REMAP;
1723 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1724 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1725 		*fault_type = DMA_REMAP;
1726 		return dma_remap_fault_reasons[fault_reason];
1727 	} else {
1728 		*fault_type = UNKNOWN;
1729 		return "Unknown";
1730 	}
1731 }
1732 
1733 
1734 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1735 {
1736 	if (iommu->irq == irq)
1737 		return DMAR_FECTL_REG;
1738 	else if (iommu->pr_irq == irq)
1739 		return DMAR_PECTL_REG;
1740 	else
1741 		BUG();
1742 }
1743 
1744 void dmar_msi_unmask(struct irq_data *data)
1745 {
1746 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1747 	int reg = dmar_msi_reg(iommu, data->irq);
1748 	unsigned long flag;
1749 
1750 	/* unmask it */
1751 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1752 	writel(0, iommu->reg + reg);
1753 	/* Read a reg to force flush the post write */
1754 	readl(iommu->reg + reg);
1755 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1756 }
1757 
1758 void dmar_msi_mask(struct irq_data *data)
1759 {
1760 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1761 	int reg = dmar_msi_reg(iommu, data->irq);
1762 	unsigned long flag;
1763 
1764 	/* mask it */
1765 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1766 	writel(DMA_FECTL_IM, iommu->reg + reg);
1767 	/* Read a reg to force flush the post write */
1768 	readl(iommu->reg + reg);
1769 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1770 }
1771 
1772 void dmar_msi_write(int irq, struct msi_msg *msg)
1773 {
1774 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1775 	int reg = dmar_msi_reg(iommu, irq);
1776 	unsigned long flag;
1777 
1778 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1779 	writel(msg->data, iommu->reg + reg + 4);
1780 	writel(msg->address_lo, iommu->reg + reg + 8);
1781 	writel(msg->address_hi, iommu->reg + reg + 12);
1782 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1783 }
1784 
1785 void dmar_msi_read(int irq, struct msi_msg *msg)
1786 {
1787 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1788 	int reg = dmar_msi_reg(iommu, irq);
1789 	unsigned long flag;
1790 
1791 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1792 	msg->data = readl(iommu->reg + reg + 4);
1793 	msg->address_lo = readl(iommu->reg + reg + 8);
1794 	msg->address_hi = readl(iommu->reg + reg + 12);
1795 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1796 }
1797 
1798 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1799 		u8 fault_reason, int pasid, u16 source_id,
1800 		unsigned long long addr)
1801 {
1802 	const char *reason;
1803 	int fault_type;
1804 
1805 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1806 
1807 	if (fault_type == INTR_REMAP)
1808 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1809 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1810 			PCI_FUNC(source_id & 0xFF), addr >> 48,
1811 			fault_reason, reason);
1812 	else
1813 		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1814 		       type ? "DMA Read" : "DMA Write",
1815 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1816 		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1817 		       fault_reason, reason);
1818 	return 0;
1819 }
1820 
1821 #define PRIMARY_FAULT_REG_LEN (16)
1822 irqreturn_t dmar_fault(int irq, void *dev_id)
1823 {
1824 	struct intel_iommu *iommu = dev_id;
1825 	int reg, fault_index;
1826 	u32 fault_status;
1827 	unsigned long flag;
1828 	static DEFINE_RATELIMIT_STATE(rs,
1829 				      DEFAULT_RATELIMIT_INTERVAL,
1830 				      DEFAULT_RATELIMIT_BURST);
1831 
1832 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1833 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1834 	if (fault_status && __ratelimit(&rs))
1835 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1836 
1837 	/* TBD: ignore advanced fault log currently */
1838 	if (!(fault_status & DMA_FSTS_PPF))
1839 		goto unlock_exit;
1840 
1841 	fault_index = dma_fsts_fault_record_index(fault_status);
1842 	reg = cap_fault_reg_offset(iommu->cap);
1843 	while (1) {
1844 		/* Disable printing, simply clear the fault when ratelimited */
1845 		bool ratelimited = !__ratelimit(&rs);
1846 		u8 fault_reason;
1847 		u16 source_id;
1848 		u64 guest_addr;
1849 		int type, pasid;
1850 		u32 data;
1851 		bool pasid_present;
1852 
1853 		/* highest 32 bits */
1854 		data = readl(iommu->reg + reg +
1855 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1856 		if (!(data & DMA_FRCD_F))
1857 			break;
1858 
1859 		if (!ratelimited) {
1860 			fault_reason = dma_frcd_fault_reason(data);
1861 			type = dma_frcd_type(data);
1862 
1863 			pasid = dma_frcd_pasid_value(data);
1864 			data = readl(iommu->reg + reg +
1865 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1866 			source_id = dma_frcd_source_id(data);
1867 
1868 			pasid_present = dma_frcd_pasid_present(data);
1869 			guest_addr = dmar_readq(iommu->reg + reg +
1870 					fault_index * PRIMARY_FAULT_REG_LEN);
1871 			guest_addr = dma_frcd_page_addr(guest_addr);
1872 		}
1873 
1874 		/* clear the fault */
1875 		writel(DMA_FRCD_F, iommu->reg + reg +
1876 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1877 
1878 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1879 
1880 		if (!ratelimited)
1881 			/* Using pasid -1 if pasid is not present */
1882 			dmar_fault_do_one(iommu, type, fault_reason,
1883 					  pasid_present ? pasid : -1,
1884 					  source_id, guest_addr);
1885 
1886 		fault_index++;
1887 		if (fault_index >= cap_num_fault_regs(iommu->cap))
1888 			fault_index = 0;
1889 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1890 	}
1891 
1892 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1893 	       iommu->reg + DMAR_FSTS_REG);
1894 
1895 unlock_exit:
1896 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1897 	return IRQ_HANDLED;
1898 }
1899 
1900 int dmar_set_interrupt(struct intel_iommu *iommu)
1901 {
1902 	int irq, ret;
1903 
1904 	/*
1905 	 * Check if the fault interrupt is already initialized.
1906 	 */
1907 	if (iommu->irq)
1908 		return 0;
1909 
1910 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1911 	if (irq > 0) {
1912 		iommu->irq = irq;
1913 	} else {
1914 		pr_err("No free IRQ vectors\n");
1915 		return -EINVAL;
1916 	}
1917 
1918 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1919 	if (ret)
1920 		pr_err("Can't request irq\n");
1921 	return ret;
1922 }
1923 
1924 int __init enable_drhd_fault_handling(void)
1925 {
1926 	struct dmar_drhd_unit *drhd;
1927 	struct intel_iommu *iommu;
1928 
1929 	/*
1930 	 * Enable fault control interrupt.
1931 	 */
1932 	for_each_iommu(iommu, drhd) {
1933 		u32 fault_status;
1934 		int ret = dmar_set_interrupt(iommu);
1935 
1936 		if (ret) {
1937 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1938 			       (unsigned long long)drhd->reg_base_addr, ret);
1939 			return -1;
1940 		}
1941 
1942 		/*
1943 		 * Clear any previous faults.
1944 		 */
1945 		dmar_fault(iommu->irq, iommu);
1946 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1947 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1948 	}
1949 
1950 	return 0;
1951 }
1952 
1953 /*
1954  * Re-enable Queued Invalidation interface.
1955  */
1956 int dmar_reenable_qi(struct intel_iommu *iommu)
1957 {
1958 	if (!ecap_qis(iommu->ecap))
1959 		return -ENOENT;
1960 
1961 	if (!iommu->qi)
1962 		return -ENOENT;
1963 
1964 	/*
1965 	 * First disable queued invalidation.
1966 	 */
1967 	dmar_disable_qi(iommu);
1968 	/*
1969 	 * Then enable queued invalidation again. Since there is no pending
1970 	 * invalidation requests now, it's safe to re-enable queued
1971 	 * invalidation.
1972 	 */
1973 	__dmar_enable_qi(iommu);
1974 
1975 	return 0;
1976 }
1977 
1978 /*
1979  * Check interrupt remapping support in DMAR table description.
1980  */
1981 int __init dmar_ir_support(void)
1982 {
1983 	struct acpi_table_dmar *dmar;
1984 	dmar = (struct acpi_table_dmar *)dmar_tbl;
1985 	if (!dmar)
1986 		return 0;
1987 	return dmar->flags & 0x1;
1988 }
1989 
1990 /* Check whether DMAR units are in use */
1991 static inline bool dmar_in_use(void)
1992 {
1993 	return irq_remapping_enabled || intel_iommu_enabled;
1994 }
1995 
1996 static int __init dmar_free_unused_resources(void)
1997 {
1998 	struct dmar_drhd_unit *dmaru, *dmaru_n;
1999 
2000 	if (dmar_in_use())
2001 		return 0;
2002 
2003 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2004 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2005 
2006 	down_write(&dmar_global_lock);
2007 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2008 		list_del(&dmaru->list);
2009 		dmar_free_drhd(dmaru);
2010 	}
2011 	up_write(&dmar_global_lock);
2012 
2013 	return 0;
2014 }
2015 
2016 late_initcall(dmar_free_unused_resources);
2017 IOMMU_INIT_POST(detect_intel_iommu);
2018 
2019 /*
2020  * DMAR Hotplug Support
2021  * For more details, please refer to Intel(R) Virtualization Technology
2022  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2023  * "Remapping Hardware Unit Hot Plug".
2024  */
2025 static guid_t dmar_hp_guid =
2026 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2027 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2028 
2029 /*
2030  * Currently there's only one revision and BIOS will not check the revision id,
2031  * so use 0 for safety.
2032  */
2033 #define	DMAR_DSM_REV_ID			0
2034 #define	DMAR_DSM_FUNC_DRHD		1
2035 #define	DMAR_DSM_FUNC_ATSR		2
2036 #define	DMAR_DSM_FUNC_RHSA		3
2037 
2038 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2039 {
2040 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2041 }
2042 
2043 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2044 				  dmar_res_handler_t handler, void *arg)
2045 {
2046 	int ret = -ENODEV;
2047 	union acpi_object *obj;
2048 	struct acpi_dmar_header *start;
2049 	struct dmar_res_callback callback;
2050 	static int res_type[] = {
2051 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2052 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2053 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2054 	};
2055 
2056 	if (!dmar_detect_dsm(handle, func))
2057 		return 0;
2058 
2059 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2060 				      func, NULL, ACPI_TYPE_BUFFER);
2061 	if (!obj)
2062 		return -ENODEV;
2063 
2064 	memset(&callback, 0, sizeof(callback));
2065 	callback.cb[res_type[func]] = handler;
2066 	callback.arg[res_type[func]] = arg;
2067 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2068 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2069 
2070 	ACPI_FREE(obj);
2071 
2072 	return ret;
2073 }
2074 
2075 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2076 {
2077 	int ret;
2078 	struct dmar_drhd_unit *dmaru;
2079 
2080 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2081 	if (!dmaru)
2082 		return -ENODEV;
2083 
2084 	ret = dmar_ir_hotplug(dmaru, true);
2085 	if (ret == 0)
2086 		ret = dmar_iommu_hotplug(dmaru, true);
2087 
2088 	return ret;
2089 }
2090 
2091 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2092 {
2093 	int i, ret;
2094 	struct device *dev;
2095 	struct dmar_drhd_unit *dmaru;
2096 
2097 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2098 	if (!dmaru)
2099 		return 0;
2100 
2101 	/*
2102 	 * All PCI devices managed by this unit should have been destroyed.
2103 	 */
2104 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2105 		for_each_active_dev_scope(dmaru->devices,
2106 					  dmaru->devices_cnt, i, dev)
2107 			return -EBUSY;
2108 	}
2109 
2110 	ret = dmar_ir_hotplug(dmaru, false);
2111 	if (ret == 0)
2112 		ret = dmar_iommu_hotplug(dmaru, false);
2113 
2114 	return ret;
2115 }
2116 
2117 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2118 {
2119 	struct dmar_drhd_unit *dmaru;
2120 
2121 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2122 	if (dmaru) {
2123 		list_del_rcu(&dmaru->list);
2124 		synchronize_rcu();
2125 		dmar_free_drhd(dmaru);
2126 	}
2127 
2128 	return 0;
2129 }
2130 
2131 static int dmar_hotplug_insert(acpi_handle handle)
2132 {
2133 	int ret;
2134 	int drhd_count = 0;
2135 
2136 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2137 				     &dmar_validate_one_drhd, (void *)1);
2138 	if (ret)
2139 		goto out;
2140 
2141 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2142 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2143 	if (ret == 0 && drhd_count == 0) {
2144 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2145 		goto out;
2146 	} else if (ret) {
2147 		goto release_drhd;
2148 	}
2149 
2150 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2151 				     &dmar_parse_one_rhsa, NULL);
2152 	if (ret)
2153 		goto release_drhd;
2154 
2155 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2156 				     &dmar_parse_one_atsr, NULL);
2157 	if (ret)
2158 		goto release_atsr;
2159 
2160 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2161 				     &dmar_hp_add_drhd, NULL);
2162 	if (!ret)
2163 		return 0;
2164 
2165 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2166 			       &dmar_hp_remove_drhd, NULL);
2167 release_atsr:
2168 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2169 			       &dmar_release_one_atsr, NULL);
2170 release_drhd:
2171 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2172 			       &dmar_hp_release_drhd, NULL);
2173 out:
2174 	return ret;
2175 }
2176 
2177 static int dmar_hotplug_remove(acpi_handle handle)
2178 {
2179 	int ret;
2180 
2181 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2182 				     &dmar_check_one_atsr, NULL);
2183 	if (ret)
2184 		return ret;
2185 
2186 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2187 				     &dmar_hp_remove_drhd, NULL);
2188 	if (ret == 0) {
2189 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2190 					       &dmar_release_one_atsr, NULL));
2191 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2192 					       &dmar_hp_release_drhd, NULL));
2193 	} else {
2194 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2195 				       &dmar_hp_add_drhd, NULL);
2196 	}
2197 
2198 	return ret;
2199 }
2200 
2201 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2202 				       void *context, void **retval)
2203 {
2204 	acpi_handle *phdl = retval;
2205 
2206 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2207 		*phdl = handle;
2208 		return AE_CTRL_TERMINATE;
2209 	}
2210 
2211 	return AE_OK;
2212 }
2213 
2214 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2215 {
2216 	int ret;
2217 	acpi_handle tmp = NULL;
2218 	acpi_status status;
2219 
2220 	if (!dmar_in_use())
2221 		return 0;
2222 
2223 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2224 		tmp = handle;
2225 	} else {
2226 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2227 					     ACPI_UINT32_MAX,
2228 					     dmar_get_dsm_handle,
2229 					     NULL, NULL, &tmp);
2230 		if (ACPI_FAILURE(status)) {
2231 			pr_warn("Failed to locate _DSM method.\n");
2232 			return -ENXIO;
2233 		}
2234 	}
2235 	if (tmp == NULL)
2236 		return 0;
2237 
2238 	down_write(&dmar_global_lock);
2239 	if (insert)
2240 		ret = dmar_hotplug_insert(tmp);
2241 	else
2242 		ret = dmar_hotplug_remove(tmp);
2243 	up_write(&dmar_global_lock);
2244 
2245 	return ret;
2246 }
2247 
2248 int dmar_device_add(acpi_handle handle)
2249 {
2250 	return dmar_device_hotplug(handle, true);
2251 }
2252 
2253 int dmar_device_remove(acpi_handle handle)
2254 {
2255 	return dmar_device_hotplug(handle, false);
2256 }
2257 
2258 /*
2259  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2260  *
2261  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2262  * the ACPI DMAR table. This means that the platform boot firmware has made
2263  * sure no device can issue DMA outside of RMRR regions.
2264  */
2265 bool dmar_platform_optin(void)
2266 {
2267 	struct acpi_table_dmar *dmar;
2268 	acpi_status status;
2269 	bool ret;
2270 
2271 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2272 				(struct acpi_table_header **)&dmar);
2273 	if (ACPI_FAILURE(status))
2274 		return false;
2275 
2276 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2277 	acpi_put_table((struct acpi_table_header *)dmar);
2278 
2279 	return ret;
2280 }
2281 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2282