1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCIe AER software error injection support. 4 * 5 * Debuging PCIe AER code is quite difficult because it is hard to 6 * trigger various real hardware errors. Software based error 7 * injection can fake almost all kinds of errors with the help of a 8 * user space helper tool aer-inject, which can be gotten from: 9 * http://www.kernel.org/pub/linux/utils/pci/aer-inject/ 10 * 11 * Copyright 2009 Intel Corporation. 12 * Huang Ying <ying.huang@intel.com> 13 */ 14 15 #include <linux/module.h> 16 #include <linux/init.h> 17 #include <linux/miscdevice.h> 18 #include <linux/pci.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/uaccess.h> 22 #include <linux/stddef.h> 23 #include <linux/device.h> 24 25 #include "portdrv.h" 26 27 /* Override the existing corrected and uncorrected error masks */ 28 static bool aer_mask_override; 29 module_param(aer_mask_override, bool, 0); 30 31 struct aer_error_inj { 32 u8 bus; 33 u8 dev; 34 u8 fn; 35 u32 uncor_status; 36 u32 cor_status; 37 u32 header_log0; 38 u32 header_log1; 39 u32 header_log2; 40 u32 header_log3; 41 u32 domain; 42 }; 43 44 struct aer_error { 45 struct list_head list; 46 u32 domain; 47 unsigned int bus; 48 unsigned int devfn; 49 int pos_cap_err; 50 51 u32 uncor_status; 52 u32 cor_status; 53 u32 header_log0; 54 u32 header_log1; 55 u32 header_log2; 56 u32 header_log3; 57 u32 root_status; 58 u32 source_id; 59 }; 60 61 struct pci_bus_ops { 62 struct list_head list; 63 struct pci_bus *bus; 64 struct pci_ops *ops; 65 }; 66 67 static LIST_HEAD(einjected); 68 69 static LIST_HEAD(pci_bus_ops_list); 70 71 /* Protect einjected and pci_bus_ops_list */ 72 static DEFINE_SPINLOCK(inject_lock); 73 74 static void aer_error_init(struct aer_error *err, u32 domain, 75 unsigned int bus, unsigned int devfn, 76 int pos_cap_err) 77 { 78 INIT_LIST_HEAD(&err->list); 79 err->domain = domain; 80 err->bus = bus; 81 err->devfn = devfn; 82 err->pos_cap_err = pos_cap_err; 83 } 84 85 /* inject_lock must be held before calling */ 86 static struct aer_error *__find_aer_error(u32 domain, unsigned int bus, 87 unsigned int devfn) 88 { 89 struct aer_error *err; 90 91 list_for_each_entry(err, &einjected, list) { 92 if (domain == err->domain && 93 bus == err->bus && 94 devfn == err->devfn) 95 return err; 96 } 97 return NULL; 98 } 99 100 /* inject_lock must be held before calling */ 101 static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) 102 { 103 int domain = pci_domain_nr(dev->bus); 104 if (domain < 0) 105 return NULL; 106 return __find_aer_error(domain, dev->bus->number, dev->devfn); 107 } 108 109 /* inject_lock must be held before calling */ 110 static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) 111 { 112 struct pci_bus_ops *bus_ops; 113 114 list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { 115 if (bus_ops->bus == bus) 116 return bus_ops->ops; 117 } 118 return NULL; 119 } 120 121 static struct pci_bus_ops *pci_bus_ops_pop(void) 122 { 123 unsigned long flags; 124 struct pci_bus_ops *bus_ops; 125 126 spin_lock_irqsave(&inject_lock, flags); 127 bus_ops = list_first_entry_or_null(&pci_bus_ops_list, 128 struct pci_bus_ops, list); 129 if (bus_ops) 130 list_del(&bus_ops->list); 131 spin_unlock_irqrestore(&inject_lock, flags); 132 return bus_ops; 133 } 134 135 static u32 *find_pci_config_dword(struct aer_error *err, int where, 136 int *prw1cs) 137 { 138 int rw1cs = 0; 139 u32 *target = NULL; 140 141 if (err->pos_cap_err == -1) 142 return NULL; 143 144 switch (where - err->pos_cap_err) { 145 case PCI_ERR_UNCOR_STATUS: 146 target = &err->uncor_status; 147 rw1cs = 1; 148 break; 149 case PCI_ERR_COR_STATUS: 150 target = &err->cor_status; 151 rw1cs = 1; 152 break; 153 case PCI_ERR_HEADER_LOG: 154 target = &err->header_log0; 155 break; 156 case PCI_ERR_HEADER_LOG+4: 157 target = &err->header_log1; 158 break; 159 case PCI_ERR_HEADER_LOG+8: 160 target = &err->header_log2; 161 break; 162 case PCI_ERR_HEADER_LOG+12: 163 target = &err->header_log3; 164 break; 165 case PCI_ERR_ROOT_STATUS: 166 target = &err->root_status; 167 rw1cs = 1; 168 break; 169 case PCI_ERR_ROOT_ERR_SRC: 170 target = &err->source_id; 171 break; 172 } 173 if (prw1cs) 174 *prw1cs = rw1cs; 175 return target; 176 } 177 178 static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn, 179 int where, int size, u32 *val) 180 { 181 u32 *sim; 182 struct aer_error *err; 183 unsigned long flags; 184 struct pci_ops *ops; 185 struct pci_ops *my_ops; 186 int domain; 187 int rv; 188 189 spin_lock_irqsave(&inject_lock, flags); 190 if (size != sizeof(u32)) 191 goto out; 192 domain = pci_domain_nr(bus); 193 if (domain < 0) 194 goto out; 195 err = __find_aer_error(domain, bus->number, devfn); 196 if (!err) 197 goto out; 198 199 sim = find_pci_config_dword(err, where, NULL); 200 if (sim) { 201 *val = *sim; 202 spin_unlock_irqrestore(&inject_lock, flags); 203 return 0; 204 } 205 out: 206 ops = __find_pci_bus_ops(bus); 207 /* 208 * pci_lock must already be held, so we can directly 209 * manipulate bus->ops. Many config access functions, 210 * including pci_generic_config_read() require the original 211 * bus->ops be installed to function, so temporarily put them 212 * back. 213 */ 214 my_ops = bus->ops; 215 bus->ops = ops; 216 rv = ops->read(bus, devfn, where, size, val); 217 bus->ops = my_ops; 218 spin_unlock_irqrestore(&inject_lock, flags); 219 return rv; 220 } 221 222 static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn, 223 int where, int size, u32 val) 224 { 225 u32 *sim; 226 struct aer_error *err; 227 unsigned long flags; 228 int rw1cs; 229 struct pci_ops *ops; 230 struct pci_ops *my_ops; 231 int domain; 232 int rv; 233 234 spin_lock_irqsave(&inject_lock, flags); 235 if (size != sizeof(u32)) 236 goto out; 237 domain = pci_domain_nr(bus); 238 if (domain < 0) 239 goto out; 240 err = __find_aer_error(domain, bus->number, devfn); 241 if (!err) 242 goto out; 243 244 sim = find_pci_config_dword(err, where, &rw1cs); 245 if (sim) { 246 if (rw1cs) 247 *sim ^= val; 248 else 249 *sim = val; 250 spin_unlock_irqrestore(&inject_lock, flags); 251 return 0; 252 } 253 out: 254 ops = __find_pci_bus_ops(bus); 255 /* 256 * pci_lock must already be held, so we can directly 257 * manipulate bus->ops. Many config access functions, 258 * including pci_generic_config_write() require the original 259 * bus->ops be installed to function, so temporarily put them 260 * back. 261 */ 262 my_ops = bus->ops; 263 bus->ops = ops; 264 rv = ops->write(bus, devfn, where, size, val); 265 bus->ops = my_ops; 266 spin_unlock_irqrestore(&inject_lock, flags); 267 return rv; 268 } 269 270 static struct pci_ops aer_inj_pci_ops = { 271 .read = aer_inj_read_config, 272 .write = aer_inj_write_config, 273 }; 274 275 static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, 276 struct pci_bus *bus, 277 struct pci_ops *ops) 278 { 279 INIT_LIST_HEAD(&bus_ops->list); 280 bus_ops->bus = bus; 281 bus_ops->ops = ops; 282 } 283 284 static int pci_bus_set_aer_ops(struct pci_bus *bus) 285 { 286 struct pci_ops *ops; 287 struct pci_bus_ops *bus_ops; 288 unsigned long flags; 289 290 bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); 291 if (!bus_ops) 292 return -ENOMEM; 293 ops = pci_bus_set_ops(bus, &aer_inj_pci_ops); 294 spin_lock_irqsave(&inject_lock, flags); 295 if (ops == &aer_inj_pci_ops) 296 goto out; 297 pci_bus_ops_init(bus_ops, bus, ops); 298 list_add(&bus_ops->list, &pci_bus_ops_list); 299 bus_ops = NULL; 300 out: 301 spin_unlock_irqrestore(&inject_lock, flags); 302 kfree(bus_ops); 303 return 0; 304 } 305 306 static int find_aer_device_iter(struct device *device, void *data) 307 { 308 struct pcie_device **result = data; 309 struct pcie_device *pcie_dev; 310 311 if (device->bus == &pcie_port_bus_type) { 312 pcie_dev = to_pcie_device(device); 313 if (pcie_dev->service & PCIE_PORT_SERVICE_AER) { 314 *result = pcie_dev; 315 return 1; 316 } 317 } 318 return 0; 319 } 320 321 static int find_aer_device(struct pci_dev *dev, struct pcie_device **result) 322 { 323 return device_for_each_child(&dev->dev, result, find_aer_device_iter); 324 } 325 326 static int aer_inject(struct aer_error_inj *einj) 327 { 328 struct aer_error *err, *rperr; 329 struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; 330 struct pci_dev *dev, *rpdev; 331 struct pcie_device *edev; 332 unsigned long flags; 333 unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); 334 int pos_cap_err, rp_pos_cap_err; 335 u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; 336 int ret = 0; 337 338 dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn); 339 if (!dev) 340 return -ENODEV; 341 rpdev = pcie_find_root_port(dev); 342 if (!rpdev) { 343 pci_err(dev, "aer_inject: Root port not found\n"); 344 ret = -ENODEV; 345 goto out_put; 346 } 347 348 pos_cap_err = dev->aer_cap; 349 if (!pos_cap_err) { 350 pci_err(dev, "aer_inject: Device doesn't support AER\n"); 351 ret = -EPROTONOSUPPORT; 352 goto out_put; 353 } 354 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); 355 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask); 356 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 357 &uncor_mask); 358 359 rp_pos_cap_err = rpdev->aer_cap; 360 if (!rp_pos_cap_err) { 361 pci_err(rpdev, "aer_inject: Root port doesn't support AER\n"); 362 ret = -EPROTONOSUPPORT; 363 goto out_put; 364 } 365 366 err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 367 if (!err_alloc) { 368 ret = -ENOMEM; 369 goto out_put; 370 } 371 rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 372 if (!rperr_alloc) { 373 ret = -ENOMEM; 374 goto out_put; 375 } 376 377 if (aer_mask_override) { 378 cor_mask_orig = cor_mask; 379 cor_mask &= !(einj->cor_status); 380 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, 381 cor_mask); 382 383 uncor_mask_orig = uncor_mask; 384 uncor_mask &= !(einj->uncor_status); 385 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 386 uncor_mask); 387 } 388 389 spin_lock_irqsave(&inject_lock, flags); 390 391 err = __find_aer_error_by_dev(dev); 392 if (!err) { 393 err = err_alloc; 394 err_alloc = NULL; 395 aer_error_init(err, einj->domain, einj->bus, devfn, 396 pos_cap_err); 397 list_add(&err->list, &einjected); 398 } 399 err->uncor_status |= einj->uncor_status; 400 err->cor_status |= einj->cor_status; 401 err->header_log0 = einj->header_log0; 402 err->header_log1 = einj->header_log1; 403 err->header_log2 = einj->header_log2; 404 err->header_log3 = einj->header_log3; 405 406 if (!aer_mask_override && einj->cor_status && 407 !(einj->cor_status & ~cor_mask)) { 408 ret = -EINVAL; 409 pci_warn(dev, "aer_inject: The correctable error(s) is masked by device\n"); 410 spin_unlock_irqrestore(&inject_lock, flags); 411 goto out_put; 412 } 413 if (!aer_mask_override && einj->uncor_status && 414 !(einj->uncor_status & ~uncor_mask)) { 415 ret = -EINVAL; 416 pci_warn(dev, "aer_inject: The uncorrectable error(s) is masked by device\n"); 417 spin_unlock_irqrestore(&inject_lock, flags); 418 goto out_put; 419 } 420 421 rperr = __find_aer_error_by_dev(rpdev); 422 if (!rperr) { 423 rperr = rperr_alloc; 424 rperr_alloc = NULL; 425 aer_error_init(rperr, pci_domain_nr(rpdev->bus), 426 rpdev->bus->number, rpdev->devfn, 427 rp_pos_cap_err); 428 list_add(&rperr->list, &einjected); 429 } 430 if (einj->cor_status) { 431 if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) 432 rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; 433 else 434 rperr->root_status |= PCI_ERR_ROOT_COR_RCV; 435 rperr->source_id &= 0xffff0000; 436 rperr->source_id |= (einj->bus << 8) | devfn; 437 } 438 if (einj->uncor_status) { 439 if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) 440 rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; 441 if (sever & einj->uncor_status) { 442 rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; 443 if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) 444 rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; 445 } else 446 rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; 447 rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; 448 rperr->source_id &= 0x0000ffff; 449 rperr->source_id |= ((einj->bus << 8) | devfn) << 16; 450 } 451 spin_unlock_irqrestore(&inject_lock, flags); 452 453 if (aer_mask_override) { 454 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, 455 cor_mask_orig); 456 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 457 uncor_mask_orig); 458 } 459 460 ret = pci_bus_set_aer_ops(dev->bus); 461 if (ret) 462 goto out_put; 463 ret = pci_bus_set_aer_ops(rpdev->bus); 464 if (ret) 465 goto out_put; 466 467 if (find_aer_device(rpdev, &edev)) { 468 if (!get_service_data(edev)) { 469 dev_warn(&edev->device, 470 "aer_inject: AER service is not initialized\n"); 471 ret = -EPROTONOSUPPORT; 472 goto out_put; 473 } 474 dev_info(&edev->device, 475 "aer_inject: Injecting errors %08x/%08x into device %s\n", 476 einj->cor_status, einj->uncor_status, pci_name(dev)); 477 aer_irq(-1, edev); 478 } else { 479 pci_err(rpdev, "aer_inject: AER device not found\n"); 480 ret = -ENODEV; 481 } 482 out_put: 483 kfree(err_alloc); 484 kfree(rperr_alloc); 485 pci_dev_put(dev); 486 return ret; 487 } 488 489 static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, 490 size_t usize, loff_t *off) 491 { 492 struct aer_error_inj einj; 493 int ret; 494 495 if (!capable(CAP_SYS_ADMIN)) 496 return -EPERM; 497 if (usize < offsetof(struct aer_error_inj, domain) || 498 usize > sizeof(einj)) 499 return -EINVAL; 500 501 memset(&einj, 0, sizeof(einj)); 502 if (copy_from_user(&einj, ubuf, usize)) 503 return -EFAULT; 504 505 ret = aer_inject(&einj); 506 return ret ? ret : usize; 507 } 508 509 static const struct file_operations aer_inject_fops = { 510 .write = aer_inject_write, 511 .owner = THIS_MODULE, 512 .llseek = noop_llseek, 513 }; 514 515 static struct miscdevice aer_inject_device = { 516 .minor = MISC_DYNAMIC_MINOR, 517 .name = "aer_inject", 518 .fops = &aer_inject_fops, 519 }; 520 521 static int __init aer_inject_init(void) 522 { 523 return misc_register(&aer_inject_device); 524 } 525 526 static void __exit aer_inject_exit(void) 527 { 528 struct aer_error *err, *err_next; 529 unsigned long flags; 530 struct pci_bus_ops *bus_ops; 531 532 misc_deregister(&aer_inject_device); 533 534 while ((bus_ops = pci_bus_ops_pop())) { 535 pci_bus_set_ops(bus_ops->bus, bus_ops->ops); 536 kfree(bus_ops); 537 } 538 539 spin_lock_irqsave(&inject_lock, flags); 540 list_for_each_entry_safe(err, err_next, &einjected, list) { 541 list_del(&err->list); 542 kfree(err); 543 } 544 spin_unlock_irqrestore(&inject_lock, flags); 545 } 546 547 module_init(aer_inject_init); 548 module_exit(aer_inject_exit); 549 550 MODULE_DESCRIPTION("PCIe AER software error injector"); 551 MODULE_LICENSE("GPL"); 552