xref: /openbmc/linux/drivers/pci/pcie/aer_inject.c (revision be709d48)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCIe AER software error injection support.
4  *
5  * Debuging PCIe AER code is quite difficult because it is hard to
6  * trigger various real hardware errors. Software based error
7  * injection can fake almost all kinds of errors with the help of a
8  * user space helper tool aer-inject, which can be gotten from:
9  *   http://www.kernel.org/pub/linux/utils/pci/aer-inject/
10  *
11  * Copyright 2009 Intel Corporation.
12  *     Huang Ying <ying.huang@intel.com>
13  */
14 
15 #include <linux/module.h>
16 #include <linux/init.h>
17 #include <linux/irq.h>
18 #include <linux/miscdevice.h>
19 #include <linux/pci.h>
20 #include <linux/slab.h>
21 #include <linux/fs.h>
22 #include <linux/uaccess.h>
23 #include <linux/stddef.h>
24 #include <linux/device.h>
25 
26 #include "portdrv.h"
27 
28 /* Override the existing corrected and uncorrected error masks */
29 static bool aer_mask_override;
30 module_param(aer_mask_override, bool, 0);
31 
32 struct aer_error_inj {
33 	u8 bus;
34 	u8 dev;
35 	u8 fn;
36 	u32 uncor_status;
37 	u32 cor_status;
38 	u32 header_log0;
39 	u32 header_log1;
40 	u32 header_log2;
41 	u32 header_log3;
42 	u32 domain;
43 };
44 
45 struct aer_error {
46 	struct list_head list;
47 	u32 domain;
48 	unsigned int bus;
49 	unsigned int devfn;
50 	int pos_cap_err;
51 
52 	u32 uncor_status;
53 	u32 cor_status;
54 	u32 header_log0;
55 	u32 header_log1;
56 	u32 header_log2;
57 	u32 header_log3;
58 	u32 root_status;
59 	u32 source_id;
60 };
61 
62 struct pci_bus_ops {
63 	struct list_head list;
64 	struct pci_bus *bus;
65 	struct pci_ops *ops;
66 };
67 
68 static LIST_HEAD(einjected);
69 
70 static LIST_HEAD(pci_bus_ops_list);
71 
72 /* Protect einjected and pci_bus_ops_list */
73 static DEFINE_SPINLOCK(inject_lock);
74 
75 static void aer_error_init(struct aer_error *err, u32 domain,
76 			   unsigned int bus, unsigned int devfn,
77 			   int pos_cap_err)
78 {
79 	INIT_LIST_HEAD(&err->list);
80 	err->domain = domain;
81 	err->bus = bus;
82 	err->devfn = devfn;
83 	err->pos_cap_err = pos_cap_err;
84 }
85 
86 /* inject_lock must be held before calling */
87 static struct aer_error *__find_aer_error(u32 domain, unsigned int bus,
88 					  unsigned int devfn)
89 {
90 	struct aer_error *err;
91 
92 	list_for_each_entry(err, &einjected, list) {
93 		if (domain == err->domain &&
94 		    bus == err->bus &&
95 		    devfn == err->devfn)
96 			return err;
97 	}
98 	return NULL;
99 }
100 
101 /* inject_lock must be held before calling */
102 static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev)
103 {
104 	int domain = pci_domain_nr(dev->bus);
105 	if (domain < 0)
106 		return NULL;
107 	return __find_aer_error(domain, dev->bus->number, dev->devfn);
108 }
109 
110 /* inject_lock must be held before calling */
111 static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus)
112 {
113 	struct pci_bus_ops *bus_ops;
114 
115 	list_for_each_entry(bus_ops, &pci_bus_ops_list, list) {
116 		if (bus_ops->bus == bus)
117 			return bus_ops->ops;
118 	}
119 	return NULL;
120 }
121 
122 static struct pci_bus_ops *pci_bus_ops_pop(void)
123 {
124 	unsigned long flags;
125 	struct pci_bus_ops *bus_ops;
126 
127 	spin_lock_irqsave(&inject_lock, flags);
128 	bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
129 					   struct pci_bus_ops, list);
130 	if (bus_ops)
131 		list_del(&bus_ops->list);
132 	spin_unlock_irqrestore(&inject_lock, flags);
133 	return bus_ops;
134 }
135 
136 static u32 *find_pci_config_dword(struct aer_error *err, int where,
137 				  int *prw1cs)
138 {
139 	int rw1cs = 0;
140 	u32 *target = NULL;
141 
142 	if (err->pos_cap_err == -1)
143 		return NULL;
144 
145 	switch (where - err->pos_cap_err) {
146 	case PCI_ERR_UNCOR_STATUS:
147 		target = &err->uncor_status;
148 		rw1cs = 1;
149 		break;
150 	case PCI_ERR_COR_STATUS:
151 		target = &err->cor_status;
152 		rw1cs = 1;
153 		break;
154 	case PCI_ERR_HEADER_LOG:
155 		target = &err->header_log0;
156 		break;
157 	case PCI_ERR_HEADER_LOG+4:
158 		target = &err->header_log1;
159 		break;
160 	case PCI_ERR_HEADER_LOG+8:
161 		target = &err->header_log2;
162 		break;
163 	case PCI_ERR_HEADER_LOG+12:
164 		target = &err->header_log3;
165 		break;
166 	case PCI_ERR_ROOT_STATUS:
167 		target = &err->root_status;
168 		rw1cs = 1;
169 		break;
170 	case PCI_ERR_ROOT_ERR_SRC:
171 		target = &err->source_id;
172 		break;
173 	}
174 	if (prw1cs)
175 		*prw1cs = rw1cs;
176 	return target;
177 }
178 
179 static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where,
180 			int size, u32 *val)
181 {
182 	struct pci_ops *ops, *my_ops;
183 	int rv;
184 
185 	ops = __find_pci_bus_ops(bus);
186 	if (!ops)
187 		return -1;
188 
189 	my_ops = bus->ops;
190 	bus->ops = ops;
191 	rv = ops->read(bus, devfn, where, size, val);
192 	bus->ops = my_ops;
193 
194 	return rv;
195 }
196 
197 static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where,
198 			 int size, u32 val)
199 {
200 	struct pci_ops *ops, *my_ops;
201 	int rv;
202 
203 	ops = __find_pci_bus_ops(bus);
204 	if (!ops)
205 		return -1;
206 
207 	my_ops = bus->ops;
208 	bus->ops = ops;
209 	rv = ops->write(bus, devfn, where, size, val);
210 	bus->ops = my_ops;
211 
212 	return rv;
213 }
214 
215 static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
216 			       int where, int size, u32 *val)
217 {
218 	u32 *sim;
219 	struct aer_error *err;
220 	unsigned long flags;
221 	int domain;
222 	int rv;
223 
224 	spin_lock_irqsave(&inject_lock, flags);
225 	if (size != sizeof(u32))
226 		goto out;
227 	domain = pci_domain_nr(bus);
228 	if (domain < 0)
229 		goto out;
230 	err = __find_aer_error(domain, bus->number, devfn);
231 	if (!err)
232 		goto out;
233 
234 	sim = find_pci_config_dword(err, where, NULL);
235 	if (sim) {
236 		*val = *sim;
237 		spin_unlock_irqrestore(&inject_lock, flags);
238 		return 0;
239 	}
240 out:
241 	rv = aer_inj_read(bus, devfn, where, size, val);
242 	spin_unlock_irqrestore(&inject_lock, flags);
243 	return rv;
244 }
245 
246 static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
247 				int where, int size, u32 val)
248 {
249 	u32 *sim;
250 	struct aer_error *err;
251 	unsigned long flags;
252 	int rw1cs;
253 	int domain;
254 	int rv;
255 
256 	spin_lock_irqsave(&inject_lock, flags);
257 	if (size != sizeof(u32))
258 		goto out;
259 	domain = pci_domain_nr(bus);
260 	if (domain < 0)
261 		goto out;
262 	err = __find_aer_error(domain, bus->number, devfn);
263 	if (!err)
264 		goto out;
265 
266 	sim = find_pci_config_dword(err, where, &rw1cs);
267 	if (sim) {
268 		if (rw1cs)
269 			*sim ^= val;
270 		else
271 			*sim = val;
272 		spin_unlock_irqrestore(&inject_lock, flags);
273 		return 0;
274 	}
275 out:
276 	rv = aer_inj_write(bus, devfn, where, size, val);
277 	spin_unlock_irqrestore(&inject_lock, flags);
278 	return rv;
279 }
280 
281 static struct pci_ops aer_inj_pci_ops = {
282 	.read = aer_inj_read_config,
283 	.write = aer_inj_write_config,
284 };
285 
286 static void pci_bus_ops_init(struct pci_bus_ops *bus_ops,
287 			     struct pci_bus *bus,
288 			     struct pci_ops *ops)
289 {
290 	INIT_LIST_HEAD(&bus_ops->list);
291 	bus_ops->bus = bus;
292 	bus_ops->ops = ops;
293 }
294 
295 static int pci_bus_set_aer_ops(struct pci_bus *bus)
296 {
297 	struct pci_ops *ops;
298 	struct pci_bus_ops *bus_ops;
299 	unsigned long flags;
300 
301 	bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL);
302 	if (!bus_ops)
303 		return -ENOMEM;
304 	ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
305 	spin_lock_irqsave(&inject_lock, flags);
306 	if (ops == &aer_inj_pci_ops)
307 		goto out;
308 	pci_bus_ops_init(bus_ops, bus, ops);
309 	list_add(&bus_ops->list, &pci_bus_ops_list);
310 	bus_ops = NULL;
311 out:
312 	spin_unlock_irqrestore(&inject_lock, flags);
313 	kfree(bus_ops);
314 	return 0;
315 }
316 
317 static int aer_inject(struct aer_error_inj *einj)
318 {
319 	struct aer_error *err, *rperr;
320 	struct aer_error *err_alloc = NULL, *rperr_alloc = NULL;
321 	struct pci_dev *dev, *rpdev;
322 	struct pcie_device *edev;
323 	struct device *device;
324 	unsigned long flags;
325 	unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
326 	int pos_cap_err, rp_pos_cap_err;
327 	u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
328 	int ret = 0;
329 
330 	dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn);
331 	if (!dev)
332 		return -ENODEV;
333 	rpdev = pcie_find_root_port(dev);
334 	if (!rpdev) {
335 		pci_err(dev, "aer_inject: Root port not found\n");
336 		ret = -ENODEV;
337 		goto out_put;
338 	}
339 
340 	pos_cap_err = dev->aer_cap;
341 	if (!pos_cap_err) {
342 		pci_err(dev, "aer_inject: Device doesn't support AER\n");
343 		ret = -EPROTONOSUPPORT;
344 		goto out_put;
345 	}
346 	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever);
347 	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask);
348 	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
349 			      &uncor_mask);
350 
351 	rp_pos_cap_err = rpdev->aer_cap;
352 	if (!rp_pos_cap_err) {
353 		pci_err(rpdev, "aer_inject: Root port doesn't support AER\n");
354 		ret = -EPROTONOSUPPORT;
355 		goto out_put;
356 	}
357 
358 	err_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
359 	if (!err_alloc) {
360 		ret = -ENOMEM;
361 		goto out_put;
362 	}
363 	rperr_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
364 	if (!rperr_alloc) {
365 		ret = -ENOMEM;
366 		goto out_put;
367 	}
368 
369 	if (aer_mask_override) {
370 		cor_mask_orig = cor_mask;
371 		cor_mask &= !(einj->cor_status);
372 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
373 				       cor_mask);
374 
375 		uncor_mask_orig = uncor_mask;
376 		uncor_mask &= !(einj->uncor_status);
377 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
378 				       uncor_mask);
379 	}
380 
381 	spin_lock_irqsave(&inject_lock, flags);
382 
383 	err = __find_aer_error_by_dev(dev);
384 	if (!err) {
385 		err = err_alloc;
386 		err_alloc = NULL;
387 		aer_error_init(err, einj->domain, einj->bus, devfn,
388 			       pos_cap_err);
389 		list_add(&err->list, &einjected);
390 	}
391 	err->uncor_status |= einj->uncor_status;
392 	err->cor_status |= einj->cor_status;
393 	err->header_log0 = einj->header_log0;
394 	err->header_log1 = einj->header_log1;
395 	err->header_log2 = einj->header_log2;
396 	err->header_log3 = einj->header_log3;
397 
398 	if (!aer_mask_override && einj->cor_status &&
399 	    !(einj->cor_status & ~cor_mask)) {
400 		ret = -EINVAL;
401 		pci_warn(dev, "aer_inject: The correctable error(s) is masked by device\n");
402 		spin_unlock_irqrestore(&inject_lock, flags);
403 		goto out_put;
404 	}
405 	if (!aer_mask_override && einj->uncor_status &&
406 	    !(einj->uncor_status & ~uncor_mask)) {
407 		ret = -EINVAL;
408 		pci_warn(dev, "aer_inject: The uncorrectable error(s) is masked by device\n");
409 		spin_unlock_irqrestore(&inject_lock, flags);
410 		goto out_put;
411 	}
412 
413 	rperr = __find_aer_error_by_dev(rpdev);
414 	if (!rperr) {
415 		rperr = rperr_alloc;
416 		rperr_alloc = NULL;
417 		aer_error_init(rperr, pci_domain_nr(rpdev->bus),
418 			       rpdev->bus->number, rpdev->devfn,
419 			       rp_pos_cap_err);
420 		list_add(&rperr->list, &einjected);
421 	}
422 	if (einj->cor_status) {
423 		if (rperr->root_status & PCI_ERR_ROOT_COR_RCV)
424 			rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
425 		else
426 			rperr->root_status |= PCI_ERR_ROOT_COR_RCV;
427 		rperr->source_id &= 0xffff0000;
428 		rperr->source_id |= (einj->bus << 8) | devfn;
429 	}
430 	if (einj->uncor_status) {
431 		if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)
432 			rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
433 		if (sever & einj->uncor_status) {
434 			rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV;
435 			if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV))
436 				rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL;
437 		} else
438 			rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
439 		rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV;
440 		rperr->source_id &= 0x0000ffff;
441 		rperr->source_id |= ((einj->bus << 8) | devfn) << 16;
442 	}
443 	spin_unlock_irqrestore(&inject_lock, flags);
444 
445 	if (aer_mask_override) {
446 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
447 				       cor_mask_orig);
448 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
449 				       uncor_mask_orig);
450 	}
451 
452 	ret = pci_bus_set_aer_ops(dev->bus);
453 	if (ret)
454 		goto out_put;
455 	ret = pci_bus_set_aer_ops(rpdev->bus);
456 	if (ret)
457 		goto out_put;
458 
459 	device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER);
460 	if (device) {
461 		edev = to_pcie_device(device);
462 		if (!get_service_data(edev)) {
463 			dev_warn(&edev->device,
464 				 "aer_inject: AER service is not initialized\n");
465 			ret = -EPROTONOSUPPORT;
466 			goto out_put;
467 		}
468 		dev_info(&edev->device,
469 			 "aer_inject: Injecting errors %08x/%08x into device %s\n",
470 			 einj->cor_status, einj->uncor_status, pci_name(dev));
471 		local_irq_disable();
472 		generic_handle_irq(edev->irq);
473 		local_irq_enable();
474 	} else {
475 		pci_err(rpdev, "aer_inject: AER device not found\n");
476 		ret = -ENODEV;
477 	}
478 out_put:
479 	kfree(err_alloc);
480 	kfree(rperr_alloc);
481 	pci_dev_put(dev);
482 	return ret;
483 }
484 
485 static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf,
486 				size_t usize, loff_t *off)
487 {
488 	struct aer_error_inj einj;
489 	int ret;
490 
491 	if (!capable(CAP_SYS_ADMIN))
492 		return -EPERM;
493 	if (usize < offsetof(struct aer_error_inj, domain) ||
494 	    usize > sizeof(einj))
495 		return -EINVAL;
496 
497 	memset(&einj, 0, sizeof(einj));
498 	if (copy_from_user(&einj, ubuf, usize))
499 		return -EFAULT;
500 
501 	ret = aer_inject(&einj);
502 	return ret ? ret : usize;
503 }
504 
505 static const struct file_operations aer_inject_fops = {
506 	.write = aer_inject_write,
507 	.owner = THIS_MODULE,
508 	.llseek = noop_llseek,
509 };
510 
511 static struct miscdevice aer_inject_device = {
512 	.minor = MISC_DYNAMIC_MINOR,
513 	.name = "aer_inject",
514 	.fops = &aer_inject_fops,
515 };
516 
517 static int __init aer_inject_init(void)
518 {
519 	return misc_register(&aer_inject_device);
520 }
521 
522 static void __exit aer_inject_exit(void)
523 {
524 	struct aer_error *err, *err_next;
525 	unsigned long flags;
526 	struct pci_bus_ops *bus_ops;
527 
528 	misc_deregister(&aer_inject_device);
529 
530 	while ((bus_ops = pci_bus_ops_pop())) {
531 		pci_bus_set_ops(bus_ops->bus, bus_ops->ops);
532 		kfree(bus_ops);
533 	}
534 
535 	spin_lock_irqsave(&inject_lock, flags);
536 	list_for_each_entry_safe(err, err_next, &einjected, list) {
537 		list_del(&err->list);
538 		kfree(err);
539 	}
540 	spin_unlock_irqrestore(&inject_lock, flags);
541 }
542 
543 module_init(aer_inject_init);
544 module_exit(aer_inject_exit);
545 
546 MODULE_DESCRIPTION("PCIe AER software error injector");
547 MODULE_LICENSE("GPL");
548