1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCIe AER software error injection support. 4 * 5 * Debugging PCIe AER code is quite difficult because it is hard to 6 * trigger various real hardware errors. Software based error 7 * injection can fake almost all kinds of errors with the help of a 8 * user space helper tool aer-inject, which can be gotten from: 9 * http://www.kernel.org/pub/linux/utils/pci/aer-inject/ 10 * 11 * Copyright 2009 Intel Corporation. 12 * Huang Ying <ying.huang@intel.com> 13 */ 14 15 #define dev_fmt(fmt) "aer_inject: " fmt 16 17 #include <linux/module.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/miscdevice.h> 21 #include <linux/pci.h> 22 #include <linux/slab.h> 23 #include <linux/fs.h> 24 #include <linux/uaccess.h> 25 #include <linux/stddef.h> 26 #include <linux/device.h> 27 28 #include "portdrv.h" 29 30 /* Override the existing corrected and uncorrected error masks */ 31 static bool aer_mask_override; 32 module_param(aer_mask_override, bool, 0); 33 34 struct aer_error_inj { 35 u8 bus; 36 u8 dev; 37 u8 fn; 38 u32 uncor_status; 39 u32 cor_status; 40 u32 header_log0; 41 u32 header_log1; 42 u32 header_log2; 43 u32 header_log3; 44 u32 domain; 45 }; 46 47 struct aer_error { 48 struct list_head list; 49 u32 domain; 50 unsigned int bus; 51 unsigned int devfn; 52 int pos_cap_err; 53 54 u32 uncor_status; 55 u32 cor_status; 56 u32 header_log0; 57 u32 header_log1; 58 u32 header_log2; 59 u32 header_log3; 60 u32 root_status; 61 u32 source_id; 62 }; 63 64 struct pci_bus_ops { 65 struct list_head list; 66 struct pci_bus *bus; 67 struct pci_ops *ops; 68 }; 69 70 static LIST_HEAD(einjected); 71 72 static LIST_HEAD(pci_bus_ops_list); 73 74 /* Protect einjected and pci_bus_ops_list */ 75 static DEFINE_SPINLOCK(inject_lock); 76 77 static void aer_error_init(struct aer_error *err, u32 domain, 78 unsigned int bus, unsigned int devfn, 79 int pos_cap_err) 80 { 81 INIT_LIST_HEAD(&err->list); 82 err->domain = domain; 83 err->bus = bus; 84 err->devfn = devfn; 85 err->pos_cap_err = pos_cap_err; 86 } 87 88 /* inject_lock must be held before calling */ 89 static struct aer_error *__find_aer_error(u32 domain, unsigned int bus, 90 unsigned int devfn) 91 { 92 struct aer_error *err; 93 94 list_for_each_entry(err, &einjected, list) { 95 if (domain == err->domain && 96 bus == err->bus && 97 devfn == err->devfn) 98 return err; 99 } 100 return NULL; 101 } 102 103 /* inject_lock must be held before calling */ 104 static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) 105 { 106 int domain = pci_domain_nr(dev->bus); 107 if (domain < 0) 108 return NULL; 109 return __find_aer_error(domain, dev->bus->number, dev->devfn); 110 } 111 112 /* inject_lock must be held before calling */ 113 static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) 114 { 115 struct pci_bus_ops *bus_ops; 116 117 list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { 118 if (bus_ops->bus == bus) 119 return bus_ops->ops; 120 } 121 return NULL; 122 } 123 124 static struct pci_bus_ops *pci_bus_ops_pop(void) 125 { 126 unsigned long flags; 127 struct pci_bus_ops *bus_ops; 128 129 spin_lock_irqsave(&inject_lock, flags); 130 bus_ops = list_first_entry_or_null(&pci_bus_ops_list, 131 struct pci_bus_ops, list); 132 if (bus_ops) 133 list_del(&bus_ops->list); 134 spin_unlock_irqrestore(&inject_lock, flags); 135 return bus_ops; 136 } 137 138 static u32 *find_pci_config_dword(struct aer_error *err, int where, 139 int *prw1cs) 140 { 141 int rw1cs = 0; 142 u32 *target = NULL; 143 144 if (err->pos_cap_err == -1) 145 return NULL; 146 147 switch (where - err->pos_cap_err) { 148 case PCI_ERR_UNCOR_STATUS: 149 target = &err->uncor_status; 150 rw1cs = 1; 151 break; 152 case PCI_ERR_COR_STATUS: 153 target = &err->cor_status; 154 rw1cs = 1; 155 break; 156 case PCI_ERR_HEADER_LOG: 157 target = &err->header_log0; 158 break; 159 case PCI_ERR_HEADER_LOG+4: 160 target = &err->header_log1; 161 break; 162 case PCI_ERR_HEADER_LOG+8: 163 target = &err->header_log2; 164 break; 165 case PCI_ERR_HEADER_LOG+12: 166 target = &err->header_log3; 167 break; 168 case PCI_ERR_ROOT_STATUS: 169 target = &err->root_status; 170 rw1cs = 1; 171 break; 172 case PCI_ERR_ROOT_ERR_SRC: 173 target = &err->source_id; 174 break; 175 } 176 if (prw1cs) 177 *prw1cs = rw1cs; 178 return target; 179 } 180 181 static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where, 182 int size, u32 *val) 183 { 184 struct pci_ops *ops, *my_ops; 185 int rv; 186 187 ops = __find_pci_bus_ops(bus); 188 if (!ops) 189 return -1; 190 191 my_ops = bus->ops; 192 bus->ops = ops; 193 rv = ops->read(bus, devfn, where, size, val); 194 bus->ops = my_ops; 195 196 return rv; 197 } 198 199 static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where, 200 int size, u32 val) 201 { 202 struct pci_ops *ops, *my_ops; 203 int rv; 204 205 ops = __find_pci_bus_ops(bus); 206 if (!ops) 207 return -1; 208 209 my_ops = bus->ops; 210 bus->ops = ops; 211 rv = ops->write(bus, devfn, where, size, val); 212 bus->ops = my_ops; 213 214 return rv; 215 } 216 217 static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn, 218 int where, int size, u32 *val) 219 { 220 u32 *sim; 221 struct aer_error *err; 222 unsigned long flags; 223 int domain; 224 int rv; 225 226 spin_lock_irqsave(&inject_lock, flags); 227 if (size != sizeof(u32)) 228 goto out; 229 domain = pci_domain_nr(bus); 230 if (domain < 0) 231 goto out; 232 err = __find_aer_error(domain, bus->number, devfn); 233 if (!err) 234 goto out; 235 236 sim = find_pci_config_dword(err, where, NULL); 237 if (sim) { 238 *val = *sim; 239 spin_unlock_irqrestore(&inject_lock, flags); 240 return 0; 241 } 242 out: 243 rv = aer_inj_read(bus, devfn, where, size, val); 244 spin_unlock_irqrestore(&inject_lock, flags); 245 return rv; 246 } 247 248 static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn, 249 int where, int size, u32 val) 250 { 251 u32 *sim; 252 struct aer_error *err; 253 unsigned long flags; 254 int rw1cs; 255 int domain; 256 int rv; 257 258 spin_lock_irqsave(&inject_lock, flags); 259 if (size != sizeof(u32)) 260 goto out; 261 domain = pci_domain_nr(bus); 262 if (domain < 0) 263 goto out; 264 err = __find_aer_error(domain, bus->number, devfn); 265 if (!err) 266 goto out; 267 268 sim = find_pci_config_dword(err, where, &rw1cs); 269 if (sim) { 270 if (rw1cs) 271 *sim ^= val; 272 else 273 *sim = val; 274 spin_unlock_irqrestore(&inject_lock, flags); 275 return 0; 276 } 277 out: 278 rv = aer_inj_write(bus, devfn, where, size, val); 279 spin_unlock_irqrestore(&inject_lock, flags); 280 return rv; 281 } 282 283 static struct pci_ops aer_inj_pci_ops = { 284 .read = aer_inj_read_config, 285 .write = aer_inj_write_config, 286 }; 287 288 static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, 289 struct pci_bus *bus, 290 struct pci_ops *ops) 291 { 292 INIT_LIST_HEAD(&bus_ops->list); 293 bus_ops->bus = bus; 294 bus_ops->ops = ops; 295 } 296 297 static int pci_bus_set_aer_ops(struct pci_bus *bus) 298 { 299 struct pci_ops *ops; 300 struct pci_bus_ops *bus_ops; 301 unsigned long flags; 302 303 bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); 304 if (!bus_ops) 305 return -ENOMEM; 306 ops = pci_bus_set_ops(bus, &aer_inj_pci_ops); 307 spin_lock_irqsave(&inject_lock, flags); 308 if (ops == &aer_inj_pci_ops) 309 goto out; 310 pci_bus_ops_init(bus_ops, bus, ops); 311 list_add(&bus_ops->list, &pci_bus_ops_list); 312 bus_ops = NULL; 313 out: 314 spin_unlock_irqrestore(&inject_lock, flags); 315 kfree(bus_ops); 316 return 0; 317 } 318 319 static int aer_inject(struct aer_error_inj *einj) 320 { 321 struct aer_error *err, *rperr; 322 struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; 323 struct pci_dev *dev, *rpdev; 324 struct pcie_device *edev; 325 struct device *device; 326 unsigned long flags; 327 unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); 328 int pos_cap_err, rp_pos_cap_err; 329 u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; 330 int ret = 0; 331 332 dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn); 333 if (!dev) 334 return -ENODEV; 335 rpdev = pcie_find_root_port(dev); 336 if (!rpdev) { 337 pci_err(dev, "Root port not found\n"); 338 ret = -ENODEV; 339 goto out_put; 340 } 341 342 pos_cap_err = dev->aer_cap; 343 if (!pos_cap_err) { 344 pci_err(dev, "Device doesn't support AER\n"); 345 ret = -EPROTONOSUPPORT; 346 goto out_put; 347 } 348 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); 349 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask); 350 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 351 &uncor_mask); 352 353 rp_pos_cap_err = rpdev->aer_cap; 354 if (!rp_pos_cap_err) { 355 pci_err(rpdev, "Root port doesn't support AER\n"); 356 ret = -EPROTONOSUPPORT; 357 goto out_put; 358 } 359 360 err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 361 if (!err_alloc) { 362 ret = -ENOMEM; 363 goto out_put; 364 } 365 rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 366 if (!rperr_alloc) { 367 ret = -ENOMEM; 368 goto out_put; 369 } 370 371 if (aer_mask_override) { 372 cor_mask_orig = cor_mask; 373 cor_mask &= !(einj->cor_status); 374 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, 375 cor_mask); 376 377 uncor_mask_orig = uncor_mask; 378 uncor_mask &= !(einj->uncor_status); 379 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 380 uncor_mask); 381 } 382 383 spin_lock_irqsave(&inject_lock, flags); 384 385 err = __find_aer_error_by_dev(dev); 386 if (!err) { 387 err = err_alloc; 388 err_alloc = NULL; 389 aer_error_init(err, einj->domain, einj->bus, devfn, 390 pos_cap_err); 391 list_add(&err->list, &einjected); 392 } 393 err->uncor_status |= einj->uncor_status; 394 err->cor_status |= einj->cor_status; 395 err->header_log0 = einj->header_log0; 396 err->header_log1 = einj->header_log1; 397 err->header_log2 = einj->header_log2; 398 err->header_log3 = einj->header_log3; 399 400 if (!aer_mask_override && einj->cor_status && 401 !(einj->cor_status & ~cor_mask)) { 402 ret = -EINVAL; 403 pci_warn(dev, "The correctable error(s) is masked by device\n"); 404 spin_unlock_irqrestore(&inject_lock, flags); 405 goto out_put; 406 } 407 if (!aer_mask_override && einj->uncor_status && 408 !(einj->uncor_status & ~uncor_mask)) { 409 ret = -EINVAL; 410 pci_warn(dev, "The uncorrectable error(s) is masked by device\n"); 411 spin_unlock_irqrestore(&inject_lock, flags); 412 goto out_put; 413 } 414 415 rperr = __find_aer_error_by_dev(rpdev); 416 if (!rperr) { 417 rperr = rperr_alloc; 418 rperr_alloc = NULL; 419 aer_error_init(rperr, pci_domain_nr(rpdev->bus), 420 rpdev->bus->number, rpdev->devfn, 421 rp_pos_cap_err); 422 list_add(&rperr->list, &einjected); 423 } 424 if (einj->cor_status) { 425 if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) 426 rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; 427 else 428 rperr->root_status |= PCI_ERR_ROOT_COR_RCV; 429 rperr->source_id &= 0xffff0000; 430 rperr->source_id |= (einj->bus << 8) | devfn; 431 } 432 if (einj->uncor_status) { 433 if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) 434 rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; 435 if (sever & einj->uncor_status) { 436 rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; 437 if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) 438 rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; 439 } else 440 rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; 441 rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; 442 rperr->source_id &= 0x0000ffff; 443 rperr->source_id |= ((einj->bus << 8) | devfn) << 16; 444 } 445 spin_unlock_irqrestore(&inject_lock, flags); 446 447 if (aer_mask_override) { 448 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, 449 cor_mask_orig); 450 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 451 uncor_mask_orig); 452 } 453 454 ret = pci_bus_set_aer_ops(dev->bus); 455 if (ret) 456 goto out_put; 457 ret = pci_bus_set_aer_ops(rpdev->bus); 458 if (ret) 459 goto out_put; 460 461 device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER); 462 if (device) { 463 edev = to_pcie_device(device); 464 if (!get_service_data(edev)) { 465 pci_warn(edev->port, "AER service is not initialized\n"); 466 ret = -EPROTONOSUPPORT; 467 goto out_put; 468 } 469 pci_info(edev->port, "Injecting errors %08x/%08x into device %s\n", 470 einj->cor_status, einj->uncor_status, pci_name(dev)); 471 ret = irq_inject_interrupt(edev->irq); 472 } else { 473 pci_err(rpdev, "AER device not found\n"); 474 ret = -ENODEV; 475 } 476 out_put: 477 kfree(err_alloc); 478 kfree(rperr_alloc); 479 pci_dev_put(dev); 480 return ret; 481 } 482 483 static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, 484 size_t usize, loff_t *off) 485 { 486 struct aer_error_inj einj; 487 int ret; 488 489 if (!capable(CAP_SYS_ADMIN)) 490 return -EPERM; 491 if (usize < offsetof(struct aer_error_inj, domain) || 492 usize > sizeof(einj)) 493 return -EINVAL; 494 495 memset(&einj, 0, sizeof(einj)); 496 if (copy_from_user(&einj, ubuf, usize)) 497 return -EFAULT; 498 499 ret = aer_inject(&einj); 500 return ret ? ret : usize; 501 } 502 503 static const struct file_operations aer_inject_fops = { 504 .write = aer_inject_write, 505 .owner = THIS_MODULE, 506 .llseek = noop_llseek, 507 }; 508 509 static struct miscdevice aer_inject_device = { 510 .minor = MISC_DYNAMIC_MINOR, 511 .name = "aer_inject", 512 .fops = &aer_inject_fops, 513 }; 514 515 static int __init aer_inject_init(void) 516 { 517 return misc_register(&aer_inject_device); 518 } 519 520 static void __exit aer_inject_exit(void) 521 { 522 struct aer_error *err, *err_next; 523 unsigned long flags; 524 struct pci_bus_ops *bus_ops; 525 526 misc_deregister(&aer_inject_device); 527 528 while ((bus_ops = pci_bus_ops_pop())) { 529 pci_bus_set_ops(bus_ops->bus, bus_ops->ops); 530 kfree(bus_ops); 531 } 532 533 spin_lock_irqsave(&inject_lock, flags); 534 list_for_each_entry_safe(err, err_next, &einjected, list) { 535 list_del(&err->list); 536 kfree(err); 537 } 538 spin_unlock_irqrestore(&inject_lock, flags); 539 } 540 541 module_init(aer_inject_init); 542 module_exit(aer_inject_exit); 543 544 MODULE_DESCRIPTION("PCIe AER software error injector"); 545 MODULE_LICENSE("GPL"); 546