xref: /openbmc/linux/drivers/pci/pcie/err.c (revision fed8b7e366e7c8f81e957ef91aa8f0a38e038c66)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file implements the error recovery as a core part of PCIe error
4  * reporting. When a PCIe error is delivered, an error message will be
5  * collected and printed to console, then, an error recovery procedure
6  * will be executed by following the PCI error recovery rules.
7  *
8  * Copyright (C) 2006 Intel Corp.
9  *	Tom Long Nguyen (tom.l.nguyen@intel.com)
10  *	Zhang Yanmin (yanmin.zhang@intel.com)
11  */
12 
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/kernel.h>
16 #include <linux/errno.h>
17 #include <linux/aer.h>
18 #include "portdrv.h"
19 #include "../pci.h"
20 
21 static pci_ers_result_t merge_result(enum pci_ers_result orig,
22 				  enum pci_ers_result new)
23 {
24 	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
25 		return PCI_ERS_RESULT_NO_AER_DRIVER;
26 
27 	if (new == PCI_ERS_RESULT_NONE)
28 		return orig;
29 
30 	switch (orig) {
31 	case PCI_ERS_RESULT_CAN_RECOVER:
32 	case PCI_ERS_RESULT_RECOVERED:
33 		orig = new;
34 		break;
35 	case PCI_ERS_RESULT_DISCONNECT:
36 		if (new == PCI_ERS_RESULT_NEED_RESET)
37 			orig = PCI_ERS_RESULT_NEED_RESET;
38 		break;
39 	default:
40 		break;
41 	}
42 
43 	return orig;
44 }
45 
46 static int report_error_detected(struct pci_dev *dev,
47 				 enum pci_channel_state state,
48 				 enum pci_ers_result *result)
49 {
50 	pci_ers_result_t vote;
51 	const struct pci_error_handlers *err_handler;
52 
53 	device_lock(&dev->dev);
54 	if (!pci_dev_set_io_state(dev, state) ||
55 		!dev->driver ||
56 		!dev->driver->err_handler ||
57 		!dev->driver->err_handler->error_detected) {
58 		/*
59 		 * If any device in the subtree does not have an error_detected
60 		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
61 		 * error callbacks of "any" device in the subtree, and will
62 		 * exit in the disconnected error state.
63 		 */
64 		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
65 			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
66 		else
67 			vote = PCI_ERS_RESULT_NONE;
68 	} else {
69 		err_handler = dev->driver->err_handler;
70 		vote = err_handler->error_detected(dev, state);
71 	}
72 	pci_uevent_ers(dev, vote);
73 	*result = merge_result(*result, vote);
74 	device_unlock(&dev->dev);
75 	return 0;
76 }
77 
78 static int report_frozen_detected(struct pci_dev *dev, void *data)
79 {
80 	return report_error_detected(dev, pci_channel_io_frozen, data);
81 }
82 
83 static int report_normal_detected(struct pci_dev *dev, void *data)
84 {
85 	return report_error_detected(dev, pci_channel_io_normal, data);
86 }
87 
88 static int report_mmio_enabled(struct pci_dev *dev, void *data)
89 {
90 	pci_ers_result_t vote, *result = data;
91 	const struct pci_error_handlers *err_handler;
92 
93 	device_lock(&dev->dev);
94 	if (!dev->driver ||
95 		!dev->driver->err_handler ||
96 		!dev->driver->err_handler->mmio_enabled)
97 		goto out;
98 
99 	err_handler = dev->driver->err_handler;
100 	vote = err_handler->mmio_enabled(dev);
101 	*result = merge_result(*result, vote);
102 out:
103 	device_unlock(&dev->dev);
104 	return 0;
105 }
106 
107 static int report_slot_reset(struct pci_dev *dev, void *data)
108 {
109 	pci_ers_result_t vote, *result = data;
110 	const struct pci_error_handlers *err_handler;
111 
112 	device_lock(&dev->dev);
113 	if (!dev->driver ||
114 		!dev->driver->err_handler ||
115 		!dev->driver->err_handler->slot_reset)
116 		goto out;
117 
118 	err_handler = dev->driver->err_handler;
119 	vote = err_handler->slot_reset(dev);
120 	*result = merge_result(*result, vote);
121 out:
122 	device_unlock(&dev->dev);
123 	return 0;
124 }
125 
126 static int report_resume(struct pci_dev *dev, void *data)
127 {
128 	const struct pci_error_handlers *err_handler;
129 
130 	device_lock(&dev->dev);
131 	if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
132 		!dev->driver ||
133 		!dev->driver->err_handler ||
134 		!dev->driver->err_handler->resume)
135 		goto out;
136 
137 	err_handler = dev->driver->err_handler;
138 	err_handler->resume(dev);
139 out:
140 	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
141 	device_unlock(&dev->dev);
142 	return 0;
143 }
144 
145 /**
146  * default_reset_link - default reset function
147  * @dev: pointer to pci_dev data structure
148  *
149  * Invoked when performing link reset on a Downstream Port or a
150  * Root Port with no aer driver.
151  */
152 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
153 {
154 	int rc;
155 
156 	rc = pci_bus_error_reset(dev);
157 	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
158 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
159 }
160 
161 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
162 {
163 	pci_ers_result_t status;
164 	struct pcie_port_service_driver *driver = NULL;
165 
166 	driver = pcie_port_find_service(dev, service);
167 	if (driver && driver->reset_link) {
168 		status = driver->reset_link(dev);
169 	} else if (dev->has_secondary_link) {
170 		status = default_reset_link(dev);
171 	} else {
172 		pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
173 			pci_name(dev));
174 		return PCI_ERS_RESULT_DISCONNECT;
175 	}
176 
177 	if (status != PCI_ERS_RESULT_RECOVERED) {
178 		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
179 			pci_name(dev));
180 		return PCI_ERS_RESULT_DISCONNECT;
181 	}
182 
183 	return status;
184 }
185 
186 void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
187 		      u32 service)
188 {
189 	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
190 	struct pci_bus *bus;
191 
192 	/*
193 	 * Error recovery runs on all subordinates of the first downstream port.
194 	 * If the downstream port detected the error, it is cleared at the end.
195 	 */
196 	if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
197 	      pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
198 		dev = dev->bus->self;
199 	bus = dev->subordinate;
200 
201 	pci_dbg(dev, "broadcast error_detected message\n");
202 	if (state == pci_channel_io_frozen)
203 		pci_walk_bus(bus, report_frozen_detected, &status);
204 	else
205 		pci_walk_bus(bus, report_normal_detected, &status);
206 
207 	if (state == pci_channel_io_frozen &&
208 	    reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
209 		goto failed;
210 
211 	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
212 		status = PCI_ERS_RESULT_RECOVERED;
213 		pci_dbg(dev, "broadcast mmio_enabled message\n");
214 		pci_walk_bus(bus, report_mmio_enabled, &status);
215 	}
216 
217 	if (status == PCI_ERS_RESULT_NEED_RESET) {
218 		/*
219 		 * TODO: Should call platform-specific
220 		 * functions to reset slot before calling
221 		 * drivers' slot_reset callbacks?
222 		 */
223 		status = PCI_ERS_RESULT_RECOVERED;
224 		pci_dbg(dev, "broadcast slot_reset message\n");
225 		pci_walk_bus(bus, report_slot_reset, &status);
226 	}
227 
228 	if (status != PCI_ERS_RESULT_RECOVERED)
229 		goto failed;
230 
231 	pci_dbg(dev, "broadcast resume message\n");
232 	pci_walk_bus(bus, report_resume, &status);
233 
234 	pci_aer_clear_device_status(dev);
235 	pci_cleanup_aer_uncorrect_error_status(dev);
236 	pci_info(dev, "AER: Device recovery successful\n");
237 	return;
238 
239 failed:
240 	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
241 
242 	/* TODO: Should kernel panic here? */
243 	pci_info(dev, "AER: Device recovery failed\n");
244 }
245