xref: /openbmc/linux/arch/s390/pci/pci_event.c (revision e50e86dbcabda570fc8a1435fe2fca97e9ab7312)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Copyright IBM Corp. 2012
4  *
5  *  Author(s):
6  *    Jan Glauber <jang@linux.vnet.ibm.com>
7  */
8 
9 #define KMSG_COMPONENT "zpci"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11 
12 #include <linux/kernel.h>
13 #include <linux/pci.h>
14 #include <asm/pci_debug.h>
15 #include <asm/pci_dma.h>
16 #include <asm/sclp.h>
17 
18 #include "pci_bus.h"
19 
20 /* Content Code Description for PCI Function Error */
21 struct zpci_ccdf_err {
22 	u32 reserved1;
23 	u32 fh;				/* function handle */
24 	u32 fid;			/* function id */
25 	u32 ett		:  4;		/* expected table type */
26 	u32 mvn		: 12;		/* MSI vector number */
27 	u32 dmaas	:  8;		/* DMA address space */
28 	u32		:  6;
29 	u32 q		:  1;		/* event qualifier */
30 	u32 rw		:  1;		/* read/write */
31 	u64 faddr;			/* failing address */
32 	u32 reserved3;
33 	u16 reserved4;
34 	u16 pec;			/* PCI event code */
35 } __packed;
36 
37 /* Content Code Description for PCI Function Availability */
38 struct zpci_ccdf_avail {
39 	u32 reserved1;
40 	u32 fh;				/* function handle */
41 	u32 fid;			/* function id */
42 	u32 reserved2;
43 	u32 reserved3;
44 	u32 reserved4;
45 	u32 reserved5;
46 	u16 reserved6;
47 	u16 pec;			/* PCI event code */
48 } __packed;
49 
ers_result_indicates_abort(pci_ers_result_t ers_res)50 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
51 {
52 	switch (ers_res) {
53 	case PCI_ERS_RESULT_CAN_RECOVER:
54 	case PCI_ERS_RESULT_RECOVERED:
55 	case PCI_ERS_RESULT_NEED_RESET:
56 		return false;
57 	default:
58 		return true;
59 	}
60 }
61 
is_passed_through(struct zpci_dev * zdev)62 static bool is_passed_through(struct zpci_dev *zdev)
63 {
64 	return zdev->s390_domain;
65 }
66 
is_driver_supported(struct pci_driver * driver)67 static bool is_driver_supported(struct pci_driver *driver)
68 {
69 	if (!driver || !driver->err_handler)
70 		return false;
71 	if (!driver->err_handler->error_detected)
72 		return false;
73 	if (!driver->err_handler->slot_reset)
74 		return false;
75 	if (!driver->err_handler->resume)
76 		return false;
77 	return true;
78 }
79 
zpci_event_notify_error_detected(struct pci_dev * pdev,struct pci_driver * driver)80 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
81 							 struct pci_driver *driver)
82 {
83 	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
84 
85 	ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
86 	if (ers_result_indicates_abort(ers_res))
87 		pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
88 	else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
89 		pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
90 
91 	return ers_res;
92 }
93 
zpci_event_do_error_state_clear(struct pci_dev * pdev,struct pci_driver * driver)94 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
95 							struct pci_driver *driver)
96 {
97 	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
98 	struct zpci_dev *zdev = to_zpci(pdev);
99 	int rc;
100 
101 	pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
102 	rc = zpci_reset_load_store_blocked(zdev);
103 	if (rc) {
104 		pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
105 		/* Let's try a full reset instead */
106 		return PCI_ERS_RESULT_NEED_RESET;
107 	}
108 
109 	if (driver->err_handler->mmio_enabled) {
110 		ers_res = driver->err_handler->mmio_enabled(pdev);
111 		if (ers_result_indicates_abort(ers_res)) {
112 			pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
113 				pci_name(pdev));
114 			return ers_res;
115 		} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
116 			pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
117 			return ers_res;
118 		}
119 	}
120 
121 	pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
122 	rc = zpci_clear_error_state(zdev);
123 	if (!rc) {
124 		pdev->error_state = pci_channel_io_normal;
125 	} else {
126 		pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
127 		/* Let's try a full reset instead */
128 		return PCI_ERS_RESULT_NEED_RESET;
129 	}
130 
131 	return ers_res;
132 }
133 
zpci_event_do_reset(struct pci_dev * pdev,struct pci_driver * driver)134 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
135 					    struct pci_driver *driver)
136 {
137 	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
138 
139 	pr_info("%s: Initiating reset\n", pci_name(pdev));
140 	if (zpci_hot_reset_device(to_zpci(pdev))) {
141 		pr_err("%s: The reset request failed\n", pci_name(pdev));
142 		return ers_res;
143 	}
144 	pdev->error_state = pci_channel_io_normal;
145 	ers_res = driver->err_handler->slot_reset(pdev);
146 	if (ers_result_indicates_abort(ers_res)) {
147 		pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
148 		return ers_res;
149 	}
150 
151 	return ers_res;
152 }
153 
154 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
155  * @pdev: PCI function to recover currently in the error state
156  *
157  * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
158  * With the simplification that recovery always happens per function
159  * and the platform determines which functions are affected for
160  * multi-function devices.
161  */
zpci_event_attempt_error_recovery(struct pci_dev * pdev)162 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
163 {
164 	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
165 	struct pci_driver *driver;
166 
167 	/*
168 	 * Ensure that the PCI function is not removed concurrently, no driver
169 	 * is unbound or probed and that userspace can't access its
170 	 * configuration space while we perform recovery.
171 	 */
172 	pci_dev_lock(pdev);
173 	if (pdev->error_state == pci_channel_io_perm_failure) {
174 		ers_res = PCI_ERS_RESULT_DISCONNECT;
175 		goto out_unlock;
176 	}
177 	pdev->error_state = pci_channel_io_frozen;
178 
179 	if (is_passed_through(to_zpci(pdev))) {
180 		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
181 			pci_name(pdev));
182 		goto out_unlock;
183 	}
184 
185 	driver = to_pci_driver(pdev->dev.driver);
186 	if (!is_driver_supported(driver)) {
187 		if (!driver)
188 			pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
189 				pci_name(pdev));
190 		else
191 			pr_info("%s: The %s driver bound to the device does not support error recovery\n",
192 				pci_name(pdev),
193 				driver->name);
194 		goto out_unlock;
195 	}
196 
197 	ers_res = zpci_event_notify_error_detected(pdev, driver);
198 	if (ers_result_indicates_abort(ers_res))
199 		goto out_unlock;
200 
201 	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
202 		ers_res = zpci_event_do_error_state_clear(pdev, driver);
203 		if (ers_result_indicates_abort(ers_res))
204 			goto out_unlock;
205 	}
206 
207 	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
208 		ers_res = zpci_event_do_reset(pdev, driver);
209 
210 	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
211 		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
212 		       pci_name(pdev));
213 		goto out_unlock;
214 	}
215 
216 	pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
217 	if (driver->err_handler->resume)
218 		driver->err_handler->resume(pdev);
219 out_unlock:
220 	pci_dev_unlock(pdev);
221 
222 	return ers_res;
223 }
224 
225 /* zpci_event_io_failure - Report PCI channel failure state to driver
226  * @pdev: PCI function for which to report
227  * @es: PCI channel failure state to report
228  */
zpci_event_io_failure(struct pci_dev * pdev,pci_channel_state_t es)229 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
230 {
231 	struct pci_driver *driver;
232 
233 	pci_dev_lock(pdev);
234 	pdev->error_state = es;
235 	/**
236 	 * While vfio-pci's error_detected callback notifies user-space QEMU
237 	 * reacts to this by freezing the guest. In an s390 environment PCI
238 	 * errors are rarely fatal so this is overkill. Instead in the future
239 	 * we will inject the error event and let the guest recover the device
240 	 * itself.
241 	 */
242 	if (is_passed_through(to_zpci(pdev)))
243 		goto out;
244 	driver = to_pci_driver(pdev->dev.driver);
245 	if (driver && driver->err_handler && driver->err_handler->error_detected)
246 		driver->err_handler->error_detected(pdev, pdev->error_state);
247 out:
248 	pci_dev_unlock(pdev);
249 }
250 
__zpci_event_error(struct zpci_ccdf_err * ccdf)251 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
252 {
253 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
254 	struct pci_dev *pdev = NULL;
255 	pci_ers_result_t ers_res;
256 
257 	zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
258 		 ccdf->fid, ccdf->fh, ccdf->pec);
259 	zpci_err("error CCDF:\n");
260 	zpci_err_hex(ccdf, sizeof(*ccdf));
261 
262 	if (zdev) {
263 		zpci_update_fh(zdev, ccdf->fh);
264 		if (zdev->zbus->bus)
265 			pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
266 	}
267 
268 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
269 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
270 
271 	if (!pdev)
272 		goto no_pdev;
273 
274 	switch (ccdf->pec) {
275 	case 0x002a: /* Error event concerns FMB */
276 	case 0x002b:
277 	case 0x002c:
278 		break;
279 	case 0x0040: /* Service Action or Error Recovery Failed */
280 	case 0x003b:
281 		zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
282 		break;
283 	default: /* PCI function left in the error state attempt to recover */
284 		ers_res = zpci_event_attempt_error_recovery(pdev);
285 		if (ers_res != PCI_ERS_RESULT_RECOVERED)
286 			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
287 		break;
288 	}
289 	pci_dev_put(pdev);
290 no_pdev:
291 	zpci_zdev_put(zdev);
292 }
293 
zpci_event_error(void * data)294 void zpci_event_error(void *data)
295 {
296 	if (zpci_is_enabled())
297 		__zpci_event_error(data);
298 }
299 
zpci_event_hard_deconfigured(struct zpci_dev * zdev,u32 fh)300 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
301 {
302 	zpci_update_fh(zdev, fh);
303 	/* Give the driver a hint that the function is
304 	 * already unusable.
305 	 */
306 	zpci_bus_remove_device(zdev, true);
307 	/* Even though the device is already gone we still
308 	 * need to free zPCI resources as part of the disable.
309 	 */
310 	if (zdev->dma_table)
311 		zpci_dma_exit_device(zdev);
312 	if (zdev_enabled(zdev))
313 		zpci_disable_device(zdev);
314 	zdev->state = ZPCI_FN_STATE_STANDBY;
315 }
316 
__zpci_event_availability(struct zpci_ccdf_avail * ccdf)317 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
318 {
319 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
320 	bool existing_zdev = !!zdev;
321 	enum zpci_state state;
322 
323 	zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
324 		 ccdf->fid, ccdf->fh, ccdf->pec);
325 	switch (ccdf->pec) {
326 	case 0x0301: /* Reserved|Standby -> Configured */
327 		if (!zdev) {
328 			zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
329 			if (IS_ERR(zdev))
330 				break;
331 		} else {
332 			/* the configuration request may be stale */
333 			if (zdev->state != ZPCI_FN_STATE_STANDBY)
334 				break;
335 			zdev->state = ZPCI_FN_STATE_CONFIGURED;
336 		}
337 		zpci_scan_configured_device(zdev, ccdf->fh);
338 		break;
339 	case 0x0302: /* Reserved -> Standby */
340 		if (!zdev)
341 			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
342 		else
343 			zpci_update_fh(zdev, ccdf->fh);
344 		break;
345 	case 0x0303: /* Deconfiguration requested */
346 		if (zdev) {
347 			/* The event may have been queued before we confirgured
348 			 * the device.
349 			 */
350 			if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
351 				break;
352 			zpci_update_fh(zdev, ccdf->fh);
353 			zpci_deconfigure_device(zdev);
354 		}
355 		break;
356 	case 0x0304: /* Configured -> Standby|Reserved */
357 		if (zdev) {
358 			/* The event may have been queued before we confirgured
359 			 * the device.:
360 			 */
361 			if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
362 				zpci_event_hard_deconfigured(zdev, ccdf->fh);
363 			/* The 0x0304 event may immediately reserve the device */
364 			if (!clp_get_state(zdev->fid, &state) &&
365 			    state == ZPCI_FN_STATE_RESERVED) {
366 				zpci_device_reserved(zdev);
367 			}
368 		}
369 		break;
370 	case 0x0306: /* 0x308 or 0x302 for multiple devices */
371 		zpci_remove_reserved_devices();
372 		clp_scan_pci_devices();
373 		break;
374 	case 0x0308: /* Standby -> Reserved */
375 		if (!zdev)
376 			break;
377 		zpci_device_reserved(zdev);
378 		break;
379 	default:
380 		break;
381 	}
382 	if (existing_zdev)
383 		zpci_zdev_put(zdev);
384 }
385 
zpci_event_availability(void * data)386 void zpci_event_availability(void *data)
387 {
388 	if (zpci_is_enabled())
389 		__zpci_event_availability(data);
390 }
391