xref: /openbmc/linux/arch/x86/pci/acpi.c (revision 11dc486ed5d4626e6b92a23b67ed76cb6c48bfc9)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define pr_fmt(fmt) "PCI: " fmt
4 
5 #include <linux/pci.h>
6 #include <linux/acpi.h>
7 #include <linux/init.h>
8 #include <linux/irq.h>
9 #include <linux/dmi.h>
10 #include <linux/slab.h>
11 #include <linux/pci-acpi.h>
12 #include <asm/numa.h>
13 #include <asm/pci_x86.h>
14 
15 struct pci_root_info {
16 	struct acpi_pci_root_info common;
17 	struct pci_sysdata sd;
18 #ifdef	CONFIG_PCI_MMCONFIG
19 	bool mcfg_added;
20 	u8 start_bus;
21 	u8 end_bus;
22 #endif
23 };
24 
25 bool pci_use_e820 = true;
26 static bool pci_use_crs = true;
27 static bool pci_ignore_seg;
28 
29 static int __init set_use_crs(const struct dmi_system_id *id)
30 {
31 	pci_use_crs = true;
32 	return 0;
33 }
34 
35 static int __init set_nouse_crs(const struct dmi_system_id *id)
36 {
37 	pci_use_crs = false;
38 	return 0;
39 }
40 
41 static int __init set_ignore_seg(const struct dmi_system_id *id)
42 {
43 	pr_info("%s detected: ignoring ACPI _SEG\n", id->ident);
44 	pci_ignore_seg = true;
45 	return 0;
46 }
47 
48 static int __init set_no_e820(const struct dmi_system_id *id)
49 {
50 	pr_info("%s detected: not clipping E820 regions from _CRS\n",
51 	        id->ident);
52 	pci_use_e820 = false;
53 	return 0;
54 }
55 
56 static const struct dmi_system_id pci_crs_quirks[] __initconst = {
57 	/* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
58 	{
59 		.callback = set_use_crs,
60 		.ident = "IBM System x3800",
61 		.matches = {
62 			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
63 			DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
64 		},
65 	},
66 	/* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */
67 	/* 2006 AMD HT/VIA system with two host bridges */
68         {
69 		.callback = set_use_crs,
70 		.ident = "ASRock ALiveSATA2-GLAN",
71 		.matches = {
72 			DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),
73                 },
74         },
75 	/* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */
76 	/* 2006 AMD HT/VIA system with two host bridges */
77 	{
78 		.callback = set_use_crs,
79 		.ident = "ASUS M2V-MX SE",
80 		.matches = {
81 			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
82 			DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"),
83 			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
84 		},
85 	},
86 	/* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
87 	{
88 		.callback = set_use_crs,
89 		.ident = "MSI MS-7253",
90 		.matches = {
91 			DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
92 			DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
93 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
94 		},
95 	},
96 	/* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */
97 	/* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */
98 	{
99 		.callback = set_use_crs,
100 		.ident = "Foxconn K8M890-8237A",
101 		.matches = {
102 			DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"),
103 			DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"),
104 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
105 		},
106 	},
107 
108 	/* Now for the blacklist.. */
109 
110 	/* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */
111 	{
112 		.callback = set_nouse_crs,
113 		.ident = "Dell Studio 1557",
114 		.matches = {
115 			DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."),
116 			DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"),
117 			DMI_MATCH(DMI_BIOS_VERSION, "A09"),
118 		},
119 	},
120 	/* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */
121 	{
122 		.callback = set_nouse_crs,
123 		.ident = "Thinkpad SL510",
124 		.matches = {
125 			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
126 			DMI_MATCH(DMI_BOARD_NAME, "2847DFG"),
127 			DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"),
128 		},
129 	},
130 	/* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */
131 	{
132 		.callback = set_nouse_crs,
133 		.ident = "Supermicro X8DTH",
134 		.matches = {
135 			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
136 			DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"),
137 			DMI_MATCH(DMI_BIOS_VERSION, "2.0a"),
138 		},
139 	},
140 
141 	/* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */
142 	{
143 		.callback = set_ignore_seg,
144 		.ident = "HP xw9300",
145 		.matches = {
146 			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
147 			DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"),
148 		},
149 	},
150 
151 	/*
152 	 * Many Lenovo models with "IIL" in their DMI_PRODUCT_VERSION have
153 	 * an E820 reserved region that covers the entire 32-bit host
154 	 * bridge memory window from _CRS.  Using the E820 region to clip
155 	 * _CRS means no space is available for hot-added or uninitialized
156 	 * PCI devices.  This typically breaks I2C controllers for touchpads
157 	 * and hot-added Thunderbolt devices.  See the commit log for
158 	 * models known to require this quirk and related bug reports.
159 	 */
160 	{
161 		.callback = set_no_e820,
162 		.ident = "Lenovo *IIL* product version",
163 		.matches = {
164 			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
165 			DMI_MATCH(DMI_PRODUCT_VERSION, "IIL"),
166 		},
167 	},
168 
169 	/*
170 	 * The Acer Spin 5 (SP513-54N) has the same E820 reservation covering
171 	 * the entire _CRS 32-bit window issue as the Lenovo *IIL* models.
172 	 * See https://bugs.launchpad.net/bugs/1884232
173 	 */
174 	{
175 		.callback = set_no_e820,
176 		.ident = "Acer Spin 5 (SP513-54N)",
177 		.matches = {
178 			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
179 			DMI_MATCH(DMI_PRODUCT_NAME, "Spin SP513-54N"),
180 		},
181 	},
182 
183 	/*
184 	 * Clevo X170KM-G barebones have the same E820 reservation covering
185 	 * the entire _CRS 32-bit window issue as the Lenovo *IIL* models.
186 	 * See https://bugzilla.kernel.org/show_bug.cgi?id=214259
187 	 */
188 	{
189 		.callback = set_no_e820,
190 		.ident = "Clevo X170KM-G Barebone",
191 		.matches = {
192 			DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"),
193 		},
194 	},
195 	{}
196 };
197 
198 void __init pci_acpi_crs_quirks(void)
199 {
200 	int year = dmi_get_bios_year();
201 
202 	if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff)
203 		pci_use_crs = false;
204 
205 	/*
206 	 * Some firmware includes unusable space (host bridge registers,
207 	 * hidden PCI device BARs, etc) in PCI host bridge _CRS.  This is a
208 	 * firmware defect, and 4dc2287c1805 ("x86: avoid E820 regions when
209 	 * allocating address space") has clipped out the unusable space in
210 	 * the past.
211 	 *
212 	 * But other firmware supplies E820 reserved regions that cover
213 	 * entire _CRS windows, so clipping throws away the entire window,
214 	 * leaving none for hot-added or uninitialized devices.  These E820
215 	 * entries are probably *not* a firmware defect, so disable the
216 	 * clipping by default for post-2022 machines.
217 	 *
218 	 * We already have quirks to disable clipping for pre-2023
219 	 * machines, and we'll likely need quirks to *enable* clipping for
220 	 * post-2022 machines that incorrectly include unusable space in
221 	 * _CRS.
222 	 */
223 	if (year >= 2023)
224 		pci_use_e820 = false;
225 
226 	dmi_check_system(pci_crs_quirks);
227 
228 	/*
229 	 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
230 	 * takes precedence over anything we figured out above.
231 	 */
232 	if (pci_probe & PCI_ROOT_NO_CRS)
233 		pci_use_crs = false;
234 	else if (pci_probe & PCI_USE__CRS)
235 		pci_use_crs = true;
236 
237 	pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n",
238 	        pci_use_crs ? "Using" : "Ignoring",
239 	        pci_use_crs ? "nocrs" : "use_crs");
240 
241 	/* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */
242 	if (pci_probe & PCI_NO_E820)
243 		pci_use_e820 = false;
244 	else if (pci_probe & PCI_USE_E820)
245 		pci_use_e820 = true;
246 
247 	pr_info("%s E820 reservations for host bridge windows\n",
248 	        pci_use_e820 ? "Using" : "Ignoring");
249 	if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
250 		pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n");
251 }
252 
253 /*
254  * Check if pdev is part of a PCIe switch that is directly below the
255  * specified bridge.
256  */
257 static bool pcie_switch_directly_under(struct pci_dev *bridge,
258 				       struct pci_dev *pdev)
259 {
260 	struct pci_dev *parent = pci_upstream_bridge(pdev);
261 
262 	/* If the device doesn't have a parent, it's not under anything */
263 	if (!parent)
264 		return false;
265 
266 	/*
267 	 * If the device has a PCIe type, check if it is below the
268 	 * corresponding PCIe switch components (if applicable). Then check
269 	 * if its upstream port is directly beneath the specified bridge.
270 	 */
271 	switch (pci_pcie_type(pdev)) {
272 	case PCI_EXP_TYPE_UPSTREAM:
273 		return parent == bridge;
274 
275 	case PCI_EXP_TYPE_DOWNSTREAM:
276 		if (pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM)
277 			return false;
278 		parent = pci_upstream_bridge(parent);
279 		return parent == bridge;
280 
281 	case PCI_EXP_TYPE_ENDPOINT:
282 		if (pci_pcie_type(parent) != PCI_EXP_TYPE_DOWNSTREAM)
283 			return false;
284 		parent = pci_upstream_bridge(parent);
285 		if (!parent || pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM)
286 			return false;
287 		parent = pci_upstream_bridge(parent);
288 		return parent == bridge;
289 	}
290 
291 	return false;
292 }
293 
294 static bool pcie_has_usb4_host_interface(struct pci_dev *pdev)
295 {
296 	struct fwnode_handle *fwnode;
297 
298 	/*
299 	 * For USB4, the tunneled PCIe Root or Downstream Ports are marked
300 	 * with the "usb4-host-interface" ACPI property, so we look for
301 	 * that first. This should cover most cases.
302 	 */
303 	fwnode = fwnode_find_reference(dev_fwnode(&pdev->dev),
304 				       "usb4-host-interface", 0);
305 	if (!IS_ERR(fwnode)) {
306 		fwnode_handle_put(fwnode);
307 		return true;
308 	}
309 
310 	/*
311 	 * Any integrated Thunderbolt 3/4 PCIe Root Ports from Intel
312 	 * before Alder Lake do not have the "usb4-host-interface"
313 	 * property so we use their PCI IDs instead. All these are
314 	 * tunneled. This list is not expected to grow.
315 	 */
316 	if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
317 		switch (pdev->device) {
318 		/* Ice Lake Thunderbolt 3 PCIe Root Ports */
319 		case 0x8a1d:
320 		case 0x8a1f:
321 		case 0x8a21:
322 		case 0x8a23:
323 		/* Tiger Lake-LP Thunderbolt 4 PCIe Root Ports */
324 		case 0x9a23:
325 		case 0x9a25:
326 		case 0x9a27:
327 		case 0x9a29:
328 		/* Tiger Lake-H Thunderbolt 4 PCIe Root Ports */
329 		case 0x9a2b:
330 		case 0x9a2d:
331 		case 0x9a2f:
332 		case 0x9a31:
333 			return true;
334 		}
335 	}
336 
337 	return false;
338 }
339 
340 bool arch_pci_dev_is_removable(struct pci_dev *pdev)
341 {
342 	struct pci_dev *parent, *root;
343 
344 	/* pdev without a parent or Root Port is never tunneled */
345 	parent = pci_upstream_bridge(pdev);
346 	if (!parent)
347 		return false;
348 	root = pcie_find_root_port(pdev);
349 	if (!root)
350 		return false;
351 
352 	/* Internal PCIe devices are not tunneled */
353 	if (!root->external_facing)
354 		return false;
355 
356 	/* Anything directly behind a "usb4-host-interface" is tunneled */
357 	if (pcie_has_usb4_host_interface(parent))
358 		return true;
359 
360 	/*
361 	 * Check if this is a discrete Thunderbolt/USB4 controller that is
362 	 * directly behind the non-USB4 PCIe Root Port marked as
363 	 * "ExternalFacingPort". Those are not behind a PCIe tunnel.
364 	 */
365 	if (pcie_switch_directly_under(root, pdev))
366 		return false;
367 
368 	/* PCIe devices after the discrete chip are tunneled */
369 	return true;
370 }
371 
372 #ifdef	CONFIG_PCI_MMCONFIG
373 static int check_segment(u16 seg, struct device *dev, char *estr)
374 {
375 	if (seg) {
376 		dev_err(dev, "%s can't access configuration space under this host bridge\n",
377 			estr);
378 		return -EIO;
379 	}
380 
381 	/*
382 	 * Failure in adding MMCFG information is not fatal,
383 	 * just can't access extended configuration space of
384 	 * devices under this host bridge.
385 	 */
386 	dev_warn(dev, "%s can't access extended configuration space under this bridge\n",
387 		 estr);
388 
389 	return 0;
390 }
391 
392 static int setup_mcfg_map(struct acpi_pci_root_info *ci)
393 {
394 	int result, seg;
395 	struct pci_root_info *info;
396 	struct acpi_pci_root *root = ci->root;
397 	struct device *dev = &ci->bridge->dev;
398 
399 	info = container_of(ci, struct pci_root_info, common);
400 	info->start_bus = (u8)root->secondary.start;
401 	info->end_bus = (u8)root->secondary.end;
402 	info->mcfg_added = false;
403 	seg = info->sd.domain;
404 
405 	/* return success if MMCFG is not in use */
406 	if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg)
407 		return 0;
408 
409 	if (!(pci_probe & PCI_PROBE_MMCONF))
410 		return check_segment(seg, dev, "MMCONFIG is disabled,");
411 
412 	result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus,
413 				     root->mcfg_addr);
414 	if (result == 0) {
415 		/* enable MMCFG if it hasn't been enabled yet */
416 		if (raw_pci_ext_ops == NULL)
417 			raw_pci_ext_ops = &pci_mmcfg;
418 		info->mcfg_added = true;
419 	} else if (result != -EEXIST)
420 		return check_segment(seg, dev,
421 			 "fail to add MMCONFIG information,");
422 
423 	return 0;
424 }
425 
426 static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
427 {
428 	struct pci_root_info *info;
429 
430 	info = container_of(ci, struct pci_root_info, common);
431 	if (info->mcfg_added) {
432 		pci_mmconfig_delete(info->sd.domain,
433 				    info->start_bus, info->end_bus);
434 		info->mcfg_added = false;
435 	}
436 }
437 #else
438 static int setup_mcfg_map(struct acpi_pci_root_info *ci)
439 {
440 	return 0;
441 }
442 
443 static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
444 {
445 }
446 #endif
447 
448 static int pci_acpi_root_get_node(struct acpi_pci_root *root)
449 {
450 	int busnum = root->secondary.start;
451 	struct acpi_device *device = root->device;
452 	int node = acpi_get_node(device->handle);
453 
454 	if (node == NUMA_NO_NODE) {
455 		node = x86_pci_root_bus_node(busnum);
456 		if (node != 0 && node != NUMA_NO_NODE)
457 			dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n",
458 				node);
459 	}
460 	if (node != NUMA_NO_NODE && !node_online(node))
461 		node = NUMA_NO_NODE;
462 
463 	return node;
464 }
465 
466 static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci)
467 {
468 	return setup_mcfg_map(ci);
469 }
470 
471 static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci)
472 {
473 	teardown_mcfg_map(ci);
474 	kfree(container_of(ci, struct pci_root_info, common));
475 }
476 
477 /*
478  * An IO port or MMIO resource assigned to a PCI host bridge may be
479  * consumed by the host bridge itself or available to its child
480  * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
481  * to tell whether the resource is consumed by the host bridge itself,
482  * but firmware hasn't used that bit consistently, so we can't rely on it.
483  *
484  * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
485  * to be available to child bus/devices except one special case:
486  *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
487  *     to access PCI configuration space.
488  *
489  * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
490  */
491 static bool resource_is_pcicfg_ioport(struct resource *res)
492 {
493 	return (res->flags & IORESOURCE_IO) &&
494 		res->start == 0xCF8 && res->end == 0xCFF;
495 }
496 
497 static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
498 {
499 	struct acpi_device *device = ci->bridge;
500 	int busnum = ci->root->secondary.start;
501 	struct resource_entry *entry, *tmp;
502 	int status;
503 
504 	status = acpi_pci_probe_root_resources(ci);
505 
506 	if (pci_use_crs) {
507 		resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
508 			if (resource_is_pcicfg_ioport(entry->res))
509 				resource_list_destroy_entry(entry);
510 		return status;
511 	}
512 
513 	resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
514 		dev_printk(KERN_DEBUG, &device->dev,
515 			   "host bridge window %pR (ignored)\n", entry->res);
516 		resource_list_destroy_entry(entry);
517 	}
518 	x86_pci_root_bus_resources(busnum, &ci->resources);
519 
520 	return 0;
521 }
522 
523 static struct acpi_pci_root_ops acpi_pci_root_ops = {
524 	.pci_ops = &pci_root_ops,
525 	.init_info = pci_acpi_root_init_info,
526 	.release_info = pci_acpi_root_release_info,
527 	.prepare_resources = pci_acpi_root_prepare_resources,
528 };
529 
530 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
531 {
532 	int domain = root->segment;
533 	int busnum = root->secondary.start;
534 	int node = pci_acpi_root_get_node(root);
535 	struct pci_bus *bus;
536 
537 	if (pci_ignore_seg)
538 		root->segment = domain = 0;
539 
540 	if (domain && !pci_domains_supported) {
541 		pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n",
542 		        domain, busnum);
543 		return NULL;
544 	}
545 
546 	bus = pci_find_bus(domain, busnum);
547 	if (bus) {
548 		/*
549 		 * If the desired bus has been scanned already, replace
550 		 * its bus->sysdata.
551 		 */
552 		struct pci_sysdata sd = {
553 			.domain = domain,
554 			.node = node,
555 			.companion = root->device
556 		};
557 
558 		memcpy(bus->sysdata, &sd, sizeof(sd));
559 	} else {
560 		struct pci_root_info *info;
561 
562 		info = kzalloc(sizeof(*info), GFP_KERNEL);
563 		if (!info)
564 			dev_err(&root->device->dev,
565 				"pci_bus %04x:%02x: ignored (out of memory)\n",
566 				domain, busnum);
567 		else {
568 			info->sd.domain = domain;
569 			info->sd.node = node;
570 			info->sd.companion = root->device;
571 			bus = acpi_pci_root_create(root, &acpi_pci_root_ops,
572 						   &info->common, &info->sd);
573 		}
574 	}
575 
576 	/* After the PCI-E bus has been walked and all devices discovered,
577 	 * configure any settings of the fabric that might be necessary.
578 	 */
579 	if (bus) {
580 		struct pci_bus *child;
581 		list_for_each_entry(child, &bus->children, node)
582 			pcie_bus_configure_settings(child);
583 	}
584 
585 	return bus;
586 }
587 
588 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
589 {
590 	/*
591 	 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
592 	 * here, pci_create_root_bus() has been called by someone else and
593 	 * sysdata is likely to be different from what we expect.  Let it go in
594 	 * that case.
595 	 */
596 	if (!bridge->dev.parent) {
597 		struct pci_sysdata *sd = bridge->bus->sysdata;
598 		ACPI_COMPANION_SET(&bridge->dev, sd->companion);
599 	}
600 	return 0;
601 }
602 
603 int __init pci_acpi_init(void)
604 {
605 	struct pci_dev *dev = NULL;
606 
607 	if (acpi_noirq)
608 		return -ENODEV;
609 
610 	pr_info("Using ACPI for IRQ routing\n");
611 	acpi_irq_penalty_init();
612 	pcibios_enable_irq = acpi_pci_irq_enable;
613 	pcibios_disable_irq = acpi_pci_irq_disable;
614 	x86_init.pci.init_irq = x86_init_noop;
615 
616 	if (pci_routeirq) {
617 		/*
618 		 * PCI IRQ routing is set up by pci_enable_device(), but we
619 		 * also do it here in case there are still broken drivers that
620 		 * don't use pci_enable_device().
621 		 */
622 		pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
623 		for_each_pci_dev(dev)
624 			acpi_pci_irq_enable(dev);
625 	}
626 
627 	return 0;
628 }
629