1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement the AER root port service driver. The driver registers an IRQ 4 * handler. When a root port triggers an AER interrupt, the IRQ handler 5 * collects root port status and schedules work. 6 * 7 * Copyright (C) 2006 Intel Corp. 8 * Tom Long Nguyen (tom.l.nguyen@intel.com) 9 * Zhang Yanmin (yanmin.zhang@intel.com) 10 * 11 * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. 12 * Andrew Patterson <andrew.patterson@hp.com> 13 */ 14 15 #define pr_fmt(fmt) "AER: " fmt 16 #define dev_fmt pr_fmt 17 18 #include <linux/bitops.h> 19 #include <linux/cper.h> 20 #include <linux/pci.h> 21 #include <linux/pci-acpi.h> 22 #include <linux/sched.h> 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/pm.h> 26 #include <linux/init.h> 27 #include <linux/interrupt.h> 28 #include <linux/delay.h> 29 #include <linux/kfifo.h> 30 #include <linux/slab.h> 31 #include <acpi/apei.h> 32 #include <ras/ras_event.h> 33 34 #include "../pci.h" 35 #include "portdrv.h" 36 37 #define AER_ERROR_SOURCES_MAX 128 38 39 #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ 40 #define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/ 41 42 struct aer_err_source { 43 unsigned int status; 44 unsigned int id; 45 }; 46 47 struct aer_rpc { 48 struct pci_dev *rpd; /* Root Port device */ 49 DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); 50 }; 51 52 /* AER stats for the device */ 53 struct aer_stats { 54 55 /* 56 * Fields for all AER capable devices. They indicate the errors 57 * "as seen by this device". Note that this may mean that if an 58 * end point is causing problems, the AER counters may increment 59 * at its link partner (e.g. root port) because the errors will be 60 * "seen" by the link partner and not the the problematic end point 61 * itself (which may report all counters as 0 as it never saw any 62 * problems). 63 */ 64 /* Counters for different type of correctable errors */ 65 u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; 66 /* Counters for different type of fatal uncorrectable errors */ 67 u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; 68 /* Counters for different type of nonfatal uncorrectable errors */ 69 u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; 70 /* Total number of ERR_COR sent by this device */ 71 u64 dev_total_cor_errs; 72 /* Total number of ERR_FATAL sent by this device */ 73 u64 dev_total_fatal_errs; 74 /* Total number of ERR_NONFATAL sent by this device */ 75 u64 dev_total_nonfatal_errs; 76 77 /* 78 * Fields for Root ports & root complex event collectors only, these 79 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL 80 * messages received by the root port / event collector, INCLUDING the 81 * ones that are generated internally (by the rootport itself) 82 */ 83 u64 rootport_total_cor_errs; 84 u64 rootport_total_fatal_errs; 85 u64 rootport_total_nonfatal_errs; 86 }; 87 88 #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ 89 PCI_ERR_UNC_ECRC| \ 90 PCI_ERR_UNC_UNSUP| \ 91 PCI_ERR_UNC_COMP_ABORT| \ 92 PCI_ERR_UNC_UNX_COMP| \ 93 PCI_ERR_UNC_MALF_TLP) 94 95 #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ 96 PCI_EXP_RTCTL_SENFEE| \ 97 PCI_EXP_RTCTL_SEFEE) 98 #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ 99 PCI_ERR_ROOT_CMD_NONFATAL_EN| \ 100 PCI_ERR_ROOT_CMD_FATAL_EN) 101 #define ERR_COR_ID(d) (d & 0xffff) 102 #define ERR_UNCOR_ID(d) (d >> 16) 103 104 static int pcie_aer_disable; 105 static pci_ers_result_t aer_root_reset(struct pci_dev *dev); 106 107 void pci_no_aer(void) 108 { 109 pcie_aer_disable = 1; 110 } 111 112 bool pci_aer_available(void) 113 { 114 return !pcie_aer_disable && pci_msi_enabled(); 115 } 116 117 #ifdef CONFIG_PCIE_ECRC 118 119 #define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ 120 #define ECRC_POLICY_OFF 1 /* ECRC off for performance */ 121 #define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ 122 123 static int ecrc_policy = ECRC_POLICY_DEFAULT; 124 125 static const char * const ecrc_policy_str[] = { 126 [ECRC_POLICY_DEFAULT] = "bios", 127 [ECRC_POLICY_OFF] = "off", 128 [ECRC_POLICY_ON] = "on" 129 }; 130 131 /** 132 * enable_ercr_checking - enable PCIe ECRC checking for a device 133 * @dev: the PCI device 134 * 135 * Returns 0 on success, or negative on failure. 136 */ 137 static int enable_ecrc_checking(struct pci_dev *dev) 138 { 139 int pos; 140 u32 reg32; 141 142 if (!pci_is_pcie(dev)) 143 return -ENODEV; 144 145 pos = dev->aer_cap; 146 if (!pos) 147 return -ENODEV; 148 149 pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); 150 if (reg32 & PCI_ERR_CAP_ECRC_GENC) 151 reg32 |= PCI_ERR_CAP_ECRC_GENE; 152 if (reg32 & PCI_ERR_CAP_ECRC_CHKC) 153 reg32 |= PCI_ERR_CAP_ECRC_CHKE; 154 pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); 155 156 return 0; 157 } 158 159 /** 160 * disable_ercr_checking - disables PCIe ECRC checking for a device 161 * @dev: the PCI device 162 * 163 * Returns 0 on success, or negative on failure. 164 */ 165 static int disable_ecrc_checking(struct pci_dev *dev) 166 { 167 int pos; 168 u32 reg32; 169 170 if (!pci_is_pcie(dev)) 171 return -ENODEV; 172 173 pos = dev->aer_cap; 174 if (!pos) 175 return -ENODEV; 176 177 pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); 178 reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 179 pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); 180 181 return 0; 182 } 183 184 /** 185 * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy 186 * @dev: the PCI device 187 */ 188 void pcie_set_ecrc_checking(struct pci_dev *dev) 189 { 190 switch (ecrc_policy) { 191 case ECRC_POLICY_DEFAULT: 192 return; 193 case ECRC_POLICY_OFF: 194 disable_ecrc_checking(dev); 195 break; 196 case ECRC_POLICY_ON: 197 enable_ecrc_checking(dev); 198 break; 199 default: 200 return; 201 } 202 } 203 204 /** 205 * pcie_ecrc_get_policy - parse kernel command-line ecrc option 206 * @str: ECRC policy from kernel command line to use 207 */ 208 void pcie_ecrc_get_policy(char *str) 209 { 210 int i; 211 212 i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str); 213 if (i < 0) 214 return; 215 216 ecrc_policy = i; 217 } 218 #endif /* CONFIG_PCIE_ECRC */ 219 220 #ifdef CONFIG_ACPI_APEI 221 static inline int hest_match_pci(struct acpi_hest_aer_common *p, 222 struct pci_dev *pci) 223 { 224 return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) && 225 ACPI_HEST_BUS(p->bus) == pci->bus->number && 226 p->device == PCI_SLOT(pci->devfn) && 227 p->function == PCI_FUNC(pci->devfn); 228 } 229 230 static inline bool hest_match_type(struct acpi_hest_header *hest_hdr, 231 struct pci_dev *dev) 232 { 233 u16 hest_type = hest_hdr->type; 234 u8 pcie_type = pci_pcie_type(dev); 235 236 if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT && 237 pcie_type == PCI_EXP_TYPE_ROOT_PORT) || 238 (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT && 239 pcie_type == PCI_EXP_TYPE_ENDPOINT) || 240 (hest_type == ACPI_HEST_TYPE_AER_BRIDGE && 241 (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)) 242 return true; 243 return false; 244 } 245 246 struct aer_hest_parse_info { 247 struct pci_dev *pci_dev; 248 int firmware_first; 249 }; 250 251 static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr) 252 { 253 if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || 254 hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || 255 hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE) 256 return 1; 257 return 0; 258 } 259 260 static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data) 261 { 262 struct aer_hest_parse_info *info = data; 263 struct acpi_hest_aer_common *p; 264 int ff; 265 266 if (!hest_source_is_pcie_aer(hest_hdr)) 267 return 0; 268 269 p = (struct acpi_hest_aer_common *)(hest_hdr + 1); 270 ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); 271 272 /* 273 * If no specific device is supplied, determine whether 274 * FIRMWARE_FIRST is set for *any* PCIe device. 275 */ 276 if (!info->pci_dev) { 277 info->firmware_first |= ff; 278 return 0; 279 } 280 281 /* Otherwise, check the specific device */ 282 if (p->flags & ACPI_HEST_GLOBAL) { 283 if (hest_match_type(hest_hdr, info->pci_dev)) 284 info->firmware_first = ff; 285 } else 286 if (hest_match_pci(p, info->pci_dev)) 287 info->firmware_first = ff; 288 289 return 0; 290 } 291 292 static void aer_set_firmware_first(struct pci_dev *pci_dev) 293 { 294 int rc; 295 struct aer_hest_parse_info info = { 296 .pci_dev = pci_dev, 297 .firmware_first = 0, 298 }; 299 300 rc = apei_hest_parse(aer_hest_parse, &info); 301 302 if (rc) 303 pci_dev->__aer_firmware_first = 0; 304 else 305 pci_dev->__aer_firmware_first = info.firmware_first; 306 pci_dev->__aer_firmware_first_valid = 1; 307 } 308 309 int pcie_aer_get_firmware_first(struct pci_dev *dev) 310 { 311 if (!pci_is_pcie(dev)) 312 return 0; 313 314 if (pcie_ports_native) 315 return 0; 316 317 if (!dev->__aer_firmware_first_valid) 318 aer_set_firmware_first(dev); 319 return dev->__aer_firmware_first; 320 } 321 322 static bool aer_firmware_first; 323 324 /** 325 * aer_acpi_firmware_first - Check if APEI should control AER. 326 */ 327 bool aer_acpi_firmware_first(void) 328 { 329 static bool parsed = false; 330 struct aer_hest_parse_info info = { 331 .pci_dev = NULL, /* Check all PCIe devices */ 332 .firmware_first = 0, 333 }; 334 335 if (pcie_ports_native) 336 return false; 337 338 if (!parsed) { 339 apei_hest_parse(aer_hest_parse, &info); 340 aer_firmware_first = info.firmware_first; 341 parsed = true; 342 } 343 return aer_firmware_first; 344 } 345 #endif 346 347 #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ 348 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) 349 350 int pci_enable_pcie_error_reporting(struct pci_dev *dev) 351 { 352 if (pcie_aer_get_firmware_first(dev)) 353 return -EIO; 354 355 if (!dev->aer_cap) 356 return -EIO; 357 358 return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); 359 } 360 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); 361 362 int pci_disable_pcie_error_reporting(struct pci_dev *dev) 363 { 364 if (pcie_aer_get_firmware_first(dev)) 365 return -EIO; 366 367 return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, 368 PCI_EXP_AER_FLAGS); 369 } 370 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); 371 372 void pci_aer_clear_device_status(struct pci_dev *dev) 373 { 374 u16 sta; 375 376 pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); 377 pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); 378 } 379 380 int pci_aer_clear_nonfatal_status(struct pci_dev *dev) 381 { 382 int pos; 383 u32 status, sev; 384 385 pos = dev->aer_cap; 386 if (!pos) 387 return -EIO; 388 389 if (pcie_aer_get_firmware_first(dev)) 390 return -EIO; 391 392 /* Clear status bits for ERR_NONFATAL errors only */ 393 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 394 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); 395 status &= ~sev; 396 if (status) 397 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); 398 399 return 0; 400 } 401 EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status); 402 403 void pci_aer_clear_fatal_status(struct pci_dev *dev) 404 { 405 int pos; 406 u32 status, sev; 407 408 pos = dev->aer_cap; 409 if (!pos) 410 return; 411 412 if (pcie_aer_get_firmware_first(dev)) 413 return; 414 415 /* Clear status bits for ERR_FATAL errors only */ 416 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 417 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); 418 status &= sev; 419 if (status) 420 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); 421 } 422 423 /** 424 * pci_aer_raw_clear_status - Clear AER error registers. 425 * @dev: the PCI device 426 * 427 * Clearing AER error status registers unconditionally, regardless of 428 * whether they're owned by firmware or the OS. 429 * 430 * Returns 0 on success, or negative on failure. 431 */ 432 int pci_aer_raw_clear_status(struct pci_dev *dev) 433 { 434 int pos; 435 u32 status; 436 int port_type; 437 438 if (!pci_is_pcie(dev)) 439 return -ENODEV; 440 441 pos = dev->aer_cap; 442 if (!pos) 443 return -EIO; 444 445 port_type = pci_pcie_type(dev); 446 if (port_type == PCI_EXP_TYPE_ROOT_PORT) { 447 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); 448 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); 449 } 450 451 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); 452 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); 453 454 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 455 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); 456 457 return 0; 458 } 459 460 int pci_aer_clear_status(struct pci_dev *dev) 461 { 462 if (pcie_aer_get_firmware_first(dev)) 463 return -EIO; 464 465 return pci_aer_raw_clear_status(dev); 466 } 467 468 void pci_save_aer_state(struct pci_dev *dev) 469 { 470 struct pci_cap_saved_state *save_state; 471 u32 *cap; 472 int pos; 473 474 pos = dev->aer_cap; 475 if (!pos) 476 return; 477 478 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); 479 if (!save_state) 480 return; 481 482 cap = &save_state->cap.data[0]; 483 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, cap++); 484 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, cap++); 485 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, cap++); 486 pci_read_config_dword(dev, pos + PCI_ERR_CAP, cap++); 487 if (pcie_cap_has_rtctl(dev)) 488 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, cap++); 489 } 490 491 void pci_restore_aer_state(struct pci_dev *dev) 492 { 493 struct pci_cap_saved_state *save_state; 494 u32 *cap; 495 int pos; 496 497 pos = dev->aer_cap; 498 if (!pos) 499 return; 500 501 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); 502 if (!save_state) 503 return; 504 505 cap = &save_state->cap.data[0]; 506 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, *cap++); 507 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, *cap++); 508 pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, *cap++); 509 pci_write_config_dword(dev, pos + PCI_ERR_CAP, *cap++); 510 if (pcie_cap_has_rtctl(dev)) 511 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, *cap++); 512 } 513 514 void pci_aer_init(struct pci_dev *dev) 515 { 516 int n; 517 518 dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); 519 if (!dev->aer_cap) 520 return; 521 522 dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); 523 524 /* 525 * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, 526 * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event 527 * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec 528 * 7.8.4). 529 */ 530 n = pcie_cap_has_rtctl(dev) ? 5 : 4; 531 pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); 532 533 pci_aer_clear_status(dev); 534 } 535 536 void pci_aer_exit(struct pci_dev *dev) 537 { 538 kfree(dev->aer_stats); 539 dev->aer_stats = NULL; 540 } 541 542 #define AER_AGENT_RECEIVER 0 543 #define AER_AGENT_REQUESTER 1 544 #define AER_AGENT_COMPLETER 2 545 #define AER_AGENT_TRANSMITTER 3 546 547 #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ 548 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) 549 #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ 550 0 : PCI_ERR_UNC_COMP_ABORT) 551 #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ 552 (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) 553 554 #define AER_GET_AGENT(t, e) \ 555 ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ 556 (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ 557 (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ 558 AER_AGENT_RECEIVER) 559 560 #define AER_PHYSICAL_LAYER_ERROR 0 561 #define AER_DATA_LINK_LAYER_ERROR 1 562 #define AER_TRANSACTION_LAYER_ERROR 2 563 564 #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ 565 PCI_ERR_COR_RCVR : 0) 566 #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ 567 (PCI_ERR_COR_BAD_TLP| \ 568 PCI_ERR_COR_BAD_DLLP| \ 569 PCI_ERR_COR_REP_ROLL| \ 570 PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) 571 572 #define AER_GET_LAYER_ERROR(t, e) \ 573 ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ 574 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ 575 AER_TRANSACTION_LAYER_ERROR) 576 577 /* 578 * AER error strings 579 */ 580 static const char *aer_error_severity_string[] = { 581 "Uncorrected (Non-Fatal)", 582 "Uncorrected (Fatal)", 583 "Corrected" 584 }; 585 586 static const char *aer_error_layer[] = { 587 "Physical Layer", 588 "Data Link Layer", 589 "Transaction Layer" 590 }; 591 592 static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = { 593 "RxErr", /* Bit Position 0 */ 594 NULL, 595 NULL, 596 NULL, 597 NULL, 598 NULL, 599 "BadTLP", /* Bit Position 6 */ 600 "BadDLLP", /* Bit Position 7 */ 601 "Rollover", /* Bit Position 8 */ 602 NULL, 603 NULL, 604 NULL, 605 "Timeout", /* Bit Position 12 */ 606 "NonFatalErr", /* Bit Position 13 */ 607 "CorrIntErr", /* Bit Position 14 */ 608 "HeaderOF", /* Bit Position 15 */ 609 }; 610 611 static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = { 612 "Undefined", /* Bit Position 0 */ 613 NULL, 614 NULL, 615 NULL, 616 "DLP", /* Bit Position 4 */ 617 "SDES", /* Bit Position 5 */ 618 NULL, 619 NULL, 620 NULL, 621 NULL, 622 NULL, 623 NULL, 624 "TLP", /* Bit Position 12 */ 625 "FCP", /* Bit Position 13 */ 626 "CmpltTO", /* Bit Position 14 */ 627 "CmpltAbrt", /* Bit Position 15 */ 628 "UnxCmplt", /* Bit Position 16 */ 629 "RxOF", /* Bit Position 17 */ 630 "MalfTLP", /* Bit Position 18 */ 631 "ECRC", /* Bit Position 19 */ 632 "UnsupReq", /* Bit Position 20 */ 633 "ACSViol", /* Bit Position 21 */ 634 "UncorrIntErr", /* Bit Position 22 */ 635 "BlockedTLP", /* Bit Position 23 */ 636 "AtomicOpBlocked", /* Bit Position 24 */ 637 "TLPBlockedErr", /* Bit Position 25 */ 638 "PoisonTLPBlocked", /* Bit Position 26 */ 639 }; 640 641 static const char *aer_agent_string[] = { 642 "Receiver ID", 643 "Requester ID", 644 "Completer ID", 645 "Transmitter ID" 646 }; 647 648 #define aer_stats_dev_attr(name, stats_array, strings_array, \ 649 total_string, total_field) \ 650 static ssize_t \ 651 name##_show(struct device *dev, struct device_attribute *attr, \ 652 char *buf) \ 653 { \ 654 unsigned int i; \ 655 char *str = buf; \ 656 struct pci_dev *pdev = to_pci_dev(dev); \ 657 u64 *stats = pdev->aer_stats->stats_array; \ 658 \ 659 for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ 660 if (strings_array[i]) \ 661 str += sprintf(str, "%s %llu\n", \ 662 strings_array[i], stats[i]); \ 663 else if (stats[i]) \ 664 str += sprintf(str, #stats_array "_bit[%d] %llu\n",\ 665 i, stats[i]); \ 666 } \ 667 str += sprintf(str, "TOTAL_%s %llu\n", total_string, \ 668 pdev->aer_stats->total_field); \ 669 return str-buf; \ 670 } \ 671 static DEVICE_ATTR_RO(name) 672 673 aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, 674 aer_correctable_error_string, "ERR_COR", 675 dev_total_cor_errs); 676 aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, 677 aer_uncorrectable_error_string, "ERR_FATAL", 678 dev_total_fatal_errs); 679 aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, 680 aer_uncorrectable_error_string, "ERR_NONFATAL", 681 dev_total_nonfatal_errs); 682 683 #define aer_stats_rootport_attr(name, field) \ 684 static ssize_t \ 685 name##_show(struct device *dev, struct device_attribute *attr, \ 686 char *buf) \ 687 { \ 688 struct pci_dev *pdev = to_pci_dev(dev); \ 689 return sprintf(buf, "%llu\n", pdev->aer_stats->field); \ 690 } \ 691 static DEVICE_ATTR_RO(name) 692 693 aer_stats_rootport_attr(aer_rootport_total_err_cor, 694 rootport_total_cor_errs); 695 aer_stats_rootport_attr(aer_rootport_total_err_fatal, 696 rootport_total_fatal_errs); 697 aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, 698 rootport_total_nonfatal_errs); 699 700 static struct attribute *aer_stats_attrs[] __ro_after_init = { 701 &dev_attr_aer_dev_correctable.attr, 702 &dev_attr_aer_dev_fatal.attr, 703 &dev_attr_aer_dev_nonfatal.attr, 704 &dev_attr_aer_rootport_total_err_cor.attr, 705 &dev_attr_aer_rootport_total_err_fatal.attr, 706 &dev_attr_aer_rootport_total_err_nonfatal.attr, 707 NULL 708 }; 709 710 static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, 711 struct attribute *a, int n) 712 { 713 struct device *dev = kobj_to_dev(kobj); 714 struct pci_dev *pdev = to_pci_dev(dev); 715 716 if (!pdev->aer_stats) 717 return 0; 718 719 if ((a == &dev_attr_aer_rootport_total_err_cor.attr || 720 a == &dev_attr_aer_rootport_total_err_fatal.attr || 721 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && 722 pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) 723 return 0; 724 725 return a->mode; 726 } 727 728 const struct attribute_group aer_stats_attr_group = { 729 .attrs = aer_stats_attrs, 730 .is_visible = aer_stats_attrs_are_visible, 731 }; 732 733 static void pci_dev_aer_stats_incr(struct pci_dev *pdev, 734 struct aer_err_info *info) 735 { 736 unsigned long status = info->status & ~info->mask; 737 int i, max = -1; 738 u64 *counter = NULL; 739 struct aer_stats *aer_stats = pdev->aer_stats; 740 741 if (!aer_stats) 742 return; 743 744 switch (info->severity) { 745 case AER_CORRECTABLE: 746 aer_stats->dev_total_cor_errs++; 747 counter = &aer_stats->dev_cor_errs[0]; 748 max = AER_MAX_TYPEOF_COR_ERRS; 749 break; 750 case AER_NONFATAL: 751 aer_stats->dev_total_nonfatal_errs++; 752 counter = &aer_stats->dev_nonfatal_errs[0]; 753 max = AER_MAX_TYPEOF_UNCOR_ERRS; 754 break; 755 case AER_FATAL: 756 aer_stats->dev_total_fatal_errs++; 757 counter = &aer_stats->dev_fatal_errs[0]; 758 max = AER_MAX_TYPEOF_UNCOR_ERRS; 759 break; 760 } 761 762 for_each_set_bit(i, &status, max) 763 counter[i]++; 764 } 765 766 static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, 767 struct aer_err_source *e_src) 768 { 769 struct aer_stats *aer_stats = pdev->aer_stats; 770 771 if (!aer_stats) 772 return; 773 774 if (e_src->status & PCI_ERR_ROOT_COR_RCV) 775 aer_stats->rootport_total_cor_errs++; 776 777 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { 778 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) 779 aer_stats->rootport_total_fatal_errs++; 780 else 781 aer_stats->rootport_total_nonfatal_errs++; 782 } 783 } 784 785 static void __print_tlp_header(struct pci_dev *dev, 786 struct aer_header_log_regs *t) 787 { 788 pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", 789 t->dw0, t->dw1, t->dw2, t->dw3); 790 } 791 792 static void __aer_print_error(struct pci_dev *dev, 793 struct aer_err_info *info) 794 { 795 unsigned long status = info->status & ~info->mask; 796 const char *errmsg = NULL; 797 int i; 798 799 for_each_set_bit(i, &status, 32) { 800 if (info->severity == AER_CORRECTABLE) 801 errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ? 802 aer_correctable_error_string[i] : NULL; 803 else 804 errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? 805 aer_uncorrectable_error_string[i] : NULL; 806 807 if (errmsg) 808 pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, 809 info->first_error == i ? " (First)" : ""); 810 else 811 pci_err(dev, " [%2d] Unknown Error Bit%s\n", 812 i, info->first_error == i ? " (First)" : ""); 813 } 814 pci_dev_aer_stats_incr(dev, info); 815 } 816 817 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 818 { 819 int layer, agent; 820 int id = ((dev->bus->number << 8) | dev->devfn); 821 822 if (!info->status) { 823 pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", 824 aer_error_severity_string[info->severity]); 825 goto out; 826 } 827 828 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 829 agent = AER_GET_AGENT(info->severity, info->status); 830 831 pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", 832 aer_error_severity_string[info->severity], 833 aer_error_layer[layer], aer_agent_string[agent]); 834 835 pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", 836 dev->vendor, dev->device, 837 info->status, info->mask); 838 839 __aer_print_error(dev, info); 840 841 if (info->tlp_header_valid) 842 __print_tlp_header(dev, &info->tlp); 843 844 out: 845 if (info->id && info->error_dev_num > 1 && info->id == id) 846 pci_err(dev, " Error of this Agent is reported first\n"); 847 848 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), 849 info->severity, info->tlp_header_valid, &info->tlp); 850 } 851 852 static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 853 { 854 u8 bus = info->id >> 8; 855 u8 devfn = info->id & 0xff; 856 857 pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", 858 info->multi_error_valid ? "Multiple " : "", 859 aer_error_severity_string[info->severity], 860 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), 861 PCI_FUNC(devfn)); 862 } 863 864 #ifdef CONFIG_ACPI_APEI_PCIEAER 865 int cper_severity_to_aer(int cper_severity) 866 { 867 switch (cper_severity) { 868 case CPER_SEV_RECOVERABLE: 869 return AER_NONFATAL; 870 case CPER_SEV_FATAL: 871 return AER_FATAL; 872 default: 873 return AER_CORRECTABLE; 874 } 875 } 876 EXPORT_SYMBOL_GPL(cper_severity_to_aer); 877 878 void cper_print_aer(struct pci_dev *dev, int aer_severity, 879 struct aer_capability_regs *aer) 880 { 881 int layer, agent, tlp_header_valid = 0; 882 u32 status, mask; 883 struct aer_err_info info; 884 885 if (aer_severity == AER_CORRECTABLE) { 886 status = aer->cor_status; 887 mask = aer->cor_mask; 888 } else { 889 status = aer->uncor_status; 890 mask = aer->uncor_mask; 891 tlp_header_valid = status & AER_LOG_TLP_MASKS; 892 } 893 894 layer = AER_GET_LAYER_ERROR(aer_severity, status); 895 agent = AER_GET_AGENT(aer_severity, status); 896 897 memset(&info, 0, sizeof(info)); 898 info.severity = aer_severity; 899 info.status = status; 900 info.mask = mask; 901 info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); 902 903 pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); 904 __aer_print_error(dev, &info); 905 pci_err(dev, "aer_layer=%s, aer_agent=%s\n", 906 aer_error_layer[layer], aer_agent_string[agent]); 907 908 if (aer_severity != AER_CORRECTABLE) 909 pci_err(dev, "aer_uncor_severity: 0x%08x\n", 910 aer->uncor_severity); 911 912 if (tlp_header_valid) 913 __print_tlp_header(dev, &aer->header_log); 914 915 trace_aer_event(dev_name(&dev->dev), (status & ~mask), 916 aer_severity, tlp_header_valid, &aer->header_log); 917 } 918 #endif 919 920 /** 921 * add_error_device - list device to be handled 922 * @e_info: pointer to error info 923 * @dev: pointer to pci_dev to be added 924 */ 925 static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) 926 { 927 if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { 928 e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); 929 e_info->error_dev_num++; 930 return 0; 931 } 932 return -ENOSPC; 933 } 934 935 /** 936 * is_error_source - check whether the device is source of reported error 937 * @dev: pointer to pci_dev to be checked 938 * @e_info: pointer to reported error info 939 */ 940 static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) 941 { 942 int pos; 943 u32 status, mask; 944 u16 reg16; 945 946 /* 947 * When bus id is equal to 0, it might be a bad id 948 * reported by root port. 949 */ 950 if ((PCI_BUS_NUM(e_info->id) != 0) && 951 !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { 952 /* Device ID match? */ 953 if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) 954 return true; 955 956 /* Continue id comparing if there is no multiple error */ 957 if (!e_info->multi_error_valid) 958 return false; 959 } 960 961 /* 962 * When either 963 * 1) bus id is equal to 0. Some ports might lose the bus 964 * id of error source id; 965 * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set 966 * 3) There are multiple errors and prior ID comparing fails; 967 * We check AER status registers to find possible reporter. 968 */ 969 if (atomic_read(&dev->enable_cnt) == 0) 970 return false; 971 972 /* Check if AER is enabled */ 973 pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); 974 if (!(reg16 & PCI_EXP_AER_FLAGS)) 975 return false; 976 977 pos = dev->aer_cap; 978 if (!pos) 979 return false; 980 981 /* Check if error is recorded */ 982 if (e_info->severity == AER_CORRECTABLE) { 983 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); 984 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); 985 } else { 986 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 987 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); 988 } 989 if (status & ~mask) 990 return true; 991 992 return false; 993 } 994 995 static int find_device_iter(struct pci_dev *dev, void *data) 996 { 997 struct aer_err_info *e_info = (struct aer_err_info *)data; 998 999 if (is_error_source(dev, e_info)) { 1000 /* List this device */ 1001 if (add_error_device(e_info, dev)) { 1002 /* We cannot handle more... Stop iteration */ 1003 /* TODO: Should print error message here? */ 1004 return 1; 1005 } 1006 1007 /* If there is only a single error, stop iteration */ 1008 if (!e_info->multi_error_valid) 1009 return 1; 1010 } 1011 return 0; 1012 } 1013 1014 /** 1015 * find_source_device - search through device hierarchy for source device 1016 * @parent: pointer to Root Port pci_dev data structure 1017 * @e_info: including detailed error information such like id 1018 * 1019 * Return true if found. 1020 * 1021 * Invoked by DPC when error is detected at the Root Port. 1022 * Caller of this function must set id, severity, and multi_error_valid of 1023 * struct aer_err_info pointed by @e_info properly. This function must fill 1024 * e_info->error_dev_num and e_info->dev[], based on the given information. 1025 */ 1026 static bool find_source_device(struct pci_dev *parent, 1027 struct aer_err_info *e_info) 1028 { 1029 struct pci_dev *dev = parent; 1030 int result; 1031 1032 /* Must reset in this function */ 1033 e_info->error_dev_num = 0; 1034 1035 /* Is Root Port an agent that sends error message? */ 1036 result = find_device_iter(dev, e_info); 1037 if (result) 1038 return true; 1039 1040 pci_walk_bus(parent->subordinate, find_device_iter, e_info); 1041 1042 if (!e_info->error_dev_num) { 1043 pci_info(parent, "can't find device of ID%04x\n", e_info->id); 1044 return false; 1045 } 1046 return true; 1047 } 1048 1049 /** 1050 * handle_error_source - handle logging error into an event log 1051 * @dev: pointer to pci_dev data structure of error source device 1052 * @info: comprehensive error information 1053 * 1054 * Invoked when an error being detected by Root Port. 1055 */ 1056 static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) 1057 { 1058 int pos; 1059 1060 if (info->severity == AER_CORRECTABLE) { 1061 /* 1062 * Correctable error does not need software intervention. 1063 * No need to go through error recovery process. 1064 */ 1065 pos = dev->aer_cap; 1066 if (pos) 1067 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, 1068 info->status); 1069 pci_aer_clear_device_status(dev); 1070 } else if (info->severity == AER_NONFATAL) 1071 pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); 1072 else if (info->severity == AER_FATAL) 1073 pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); 1074 pci_dev_put(dev); 1075 } 1076 1077 #ifdef CONFIG_ACPI_APEI_PCIEAER 1078 1079 #define AER_RECOVER_RING_ORDER 4 1080 #define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) 1081 1082 struct aer_recover_entry { 1083 u8 bus; 1084 u8 devfn; 1085 u16 domain; 1086 int severity; 1087 struct aer_capability_regs *regs; 1088 }; 1089 1090 static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, 1091 AER_RECOVER_RING_SIZE); 1092 1093 static void aer_recover_work_func(struct work_struct *work) 1094 { 1095 struct aer_recover_entry entry; 1096 struct pci_dev *pdev; 1097 1098 while (kfifo_get(&aer_recover_ring, &entry)) { 1099 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, 1100 entry.devfn); 1101 if (!pdev) { 1102 pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", 1103 entry.domain, entry.bus, 1104 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); 1105 continue; 1106 } 1107 cper_print_aer(pdev, entry.severity, entry.regs); 1108 if (entry.severity == AER_NONFATAL) 1109 pcie_do_recovery(pdev, pci_channel_io_normal, 1110 aer_root_reset); 1111 else if (entry.severity == AER_FATAL) 1112 pcie_do_recovery(pdev, pci_channel_io_frozen, 1113 aer_root_reset); 1114 pci_dev_put(pdev); 1115 } 1116 } 1117 1118 /* 1119 * Mutual exclusion for writers of aer_recover_ring, reader side don't 1120 * need lock, because there is only one reader and lock is not needed 1121 * between reader and writer. 1122 */ 1123 static DEFINE_SPINLOCK(aer_recover_ring_lock); 1124 static DECLARE_WORK(aer_recover_work, aer_recover_work_func); 1125 1126 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, 1127 int severity, struct aer_capability_regs *aer_regs) 1128 { 1129 struct aer_recover_entry entry = { 1130 .bus = bus, 1131 .devfn = devfn, 1132 .domain = domain, 1133 .severity = severity, 1134 .regs = aer_regs, 1135 }; 1136 1137 if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1, 1138 &aer_recover_ring_lock)) 1139 schedule_work(&aer_recover_work); 1140 else 1141 pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", 1142 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1143 } 1144 EXPORT_SYMBOL_GPL(aer_recover_queue); 1145 #endif 1146 1147 /** 1148 * aer_get_device_error_info - read error status from dev and store it to info 1149 * @dev: pointer to the device expected to have a error record 1150 * @info: pointer to structure to store the error record 1151 * 1152 * Return 1 on success, 0 on error. 1153 * 1154 * Note that @info is reused among all error devices. Clear fields properly. 1155 */ 1156 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) 1157 { 1158 int pos, temp; 1159 1160 /* Must reset in this function */ 1161 info->status = 0; 1162 info->tlp_header_valid = 0; 1163 1164 pos = dev->aer_cap; 1165 1166 /* The device might not support AER */ 1167 if (!pos) 1168 return 0; 1169 1170 if (info->severity == AER_CORRECTABLE) { 1171 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, 1172 &info->status); 1173 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, 1174 &info->mask); 1175 if (!(info->status & ~info->mask)) 1176 return 0; 1177 } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || 1178 pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || 1179 info->severity == AER_NONFATAL) { 1180 1181 /* Link is still healthy for IO reads */ 1182 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, 1183 &info->status); 1184 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, 1185 &info->mask); 1186 if (!(info->status & ~info->mask)) 1187 return 0; 1188 1189 /* Get First Error Pointer */ 1190 pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); 1191 info->first_error = PCI_ERR_CAP_FEP(temp); 1192 1193 if (info->status & AER_LOG_TLP_MASKS) { 1194 info->tlp_header_valid = 1; 1195 pci_read_config_dword(dev, 1196 pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); 1197 pci_read_config_dword(dev, 1198 pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); 1199 pci_read_config_dword(dev, 1200 pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); 1201 pci_read_config_dword(dev, 1202 pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); 1203 } 1204 } 1205 1206 return 1; 1207 } 1208 1209 static inline void aer_process_err_devices(struct aer_err_info *e_info) 1210 { 1211 int i; 1212 1213 /* Report all before handle them, not to lost records by reset etc. */ 1214 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1215 if (aer_get_device_error_info(e_info->dev[i], e_info)) 1216 aer_print_error(e_info->dev[i], e_info); 1217 } 1218 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1219 if (aer_get_device_error_info(e_info->dev[i], e_info)) 1220 handle_error_source(e_info->dev[i], e_info); 1221 } 1222 } 1223 1224 /** 1225 * aer_isr_one_error - consume an error detected by root port 1226 * @rpc: pointer to the root port which holds an error 1227 * @e_src: pointer to an error source 1228 */ 1229 static void aer_isr_one_error(struct aer_rpc *rpc, 1230 struct aer_err_source *e_src) 1231 { 1232 struct pci_dev *pdev = rpc->rpd; 1233 struct aer_err_info e_info; 1234 1235 pci_rootport_aer_stats_incr(pdev, e_src); 1236 1237 /* 1238 * There is a possibility that both correctable error and 1239 * uncorrectable error being logged. Report correctable error first. 1240 */ 1241 if (e_src->status & PCI_ERR_ROOT_COR_RCV) { 1242 e_info.id = ERR_COR_ID(e_src->id); 1243 e_info.severity = AER_CORRECTABLE; 1244 1245 if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) 1246 e_info.multi_error_valid = 1; 1247 else 1248 e_info.multi_error_valid = 0; 1249 aer_print_port_info(pdev, &e_info); 1250 1251 if (find_source_device(pdev, &e_info)) 1252 aer_process_err_devices(&e_info); 1253 } 1254 1255 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { 1256 e_info.id = ERR_UNCOR_ID(e_src->id); 1257 1258 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) 1259 e_info.severity = AER_FATAL; 1260 else 1261 e_info.severity = AER_NONFATAL; 1262 1263 if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) 1264 e_info.multi_error_valid = 1; 1265 else 1266 e_info.multi_error_valid = 0; 1267 1268 aer_print_port_info(pdev, &e_info); 1269 1270 if (find_source_device(pdev, &e_info)) 1271 aer_process_err_devices(&e_info); 1272 } 1273 } 1274 1275 /** 1276 * aer_isr - consume errors detected by root port 1277 * @irq: IRQ assigned to Root Port 1278 * @context: pointer to Root Port data structure 1279 * 1280 * Invoked, as DPC, when root port records new detected error 1281 */ 1282 static irqreturn_t aer_isr(int irq, void *context) 1283 { 1284 struct pcie_device *dev = (struct pcie_device *)context; 1285 struct aer_rpc *rpc = get_service_data(dev); 1286 struct aer_err_source uninitialized_var(e_src); 1287 1288 if (kfifo_is_empty(&rpc->aer_fifo)) 1289 return IRQ_NONE; 1290 1291 while (kfifo_get(&rpc->aer_fifo, &e_src)) 1292 aer_isr_one_error(rpc, &e_src); 1293 return IRQ_HANDLED; 1294 } 1295 1296 /** 1297 * aer_irq - Root Port's ISR 1298 * @irq: IRQ assigned to Root Port 1299 * @context: pointer to Root Port data structure 1300 * 1301 * Invoked when Root Port detects AER messages. 1302 */ 1303 static irqreturn_t aer_irq(int irq, void *context) 1304 { 1305 struct pcie_device *pdev = (struct pcie_device *)context; 1306 struct aer_rpc *rpc = get_service_data(pdev); 1307 struct pci_dev *rp = rpc->rpd; 1308 struct aer_err_source e_src = {}; 1309 int pos = rp->aer_cap; 1310 1311 pci_read_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, &e_src.status); 1312 if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) 1313 return IRQ_NONE; 1314 1315 pci_read_config_dword(rp, pos + PCI_ERR_ROOT_ERR_SRC, &e_src.id); 1316 pci_write_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, e_src.status); 1317 1318 if (!kfifo_put(&rpc->aer_fifo, e_src)) 1319 return IRQ_HANDLED; 1320 1321 return IRQ_WAKE_THREAD; 1322 } 1323 1324 static int set_device_error_reporting(struct pci_dev *dev, void *data) 1325 { 1326 bool enable = *((bool *)data); 1327 int type = pci_pcie_type(dev); 1328 1329 if ((type == PCI_EXP_TYPE_ROOT_PORT) || 1330 (type == PCI_EXP_TYPE_UPSTREAM) || 1331 (type == PCI_EXP_TYPE_DOWNSTREAM)) { 1332 if (enable) 1333 pci_enable_pcie_error_reporting(dev); 1334 else 1335 pci_disable_pcie_error_reporting(dev); 1336 } 1337 1338 if (enable) 1339 pcie_set_ecrc_checking(dev); 1340 1341 return 0; 1342 } 1343 1344 /** 1345 * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. 1346 * @dev: pointer to root port's pci_dev data structure 1347 * @enable: true = enable error reporting, false = disable error reporting. 1348 */ 1349 static void set_downstream_devices_error_reporting(struct pci_dev *dev, 1350 bool enable) 1351 { 1352 set_device_error_reporting(dev, &enable); 1353 1354 if (!dev->subordinate) 1355 return; 1356 pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); 1357 } 1358 1359 /** 1360 * aer_enable_rootport - enable Root Port's interrupts when receiving messages 1361 * @rpc: pointer to a Root Port data structure 1362 * 1363 * Invoked when PCIe bus loads AER service driver. 1364 */ 1365 static void aer_enable_rootport(struct aer_rpc *rpc) 1366 { 1367 struct pci_dev *pdev = rpc->rpd; 1368 int aer_pos; 1369 u16 reg16; 1370 u32 reg32; 1371 1372 /* Clear PCIe Capability's Device Status */ 1373 pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); 1374 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); 1375 1376 /* Disable system error generation in response to error messages */ 1377 pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, 1378 SYSTEM_ERROR_INTR_ON_MESG_MASK); 1379 1380 aer_pos = pdev->aer_cap; 1381 /* Clear error status */ 1382 pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); 1383 pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); 1384 pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); 1385 pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); 1386 pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); 1387 pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); 1388 1389 /* 1390 * Enable error reporting for the root port device and downstream port 1391 * devices. 1392 */ 1393 set_downstream_devices_error_reporting(pdev, true); 1394 1395 /* Enable Root Port's interrupt in response to error messages */ 1396 pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); 1397 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; 1398 pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); 1399 } 1400 1401 /** 1402 * aer_disable_rootport - disable Root Port's interrupts when receiving messages 1403 * @rpc: pointer to a Root Port data structure 1404 * 1405 * Invoked when PCIe bus unloads AER service driver. 1406 */ 1407 static void aer_disable_rootport(struct aer_rpc *rpc) 1408 { 1409 struct pci_dev *pdev = rpc->rpd; 1410 u32 reg32; 1411 int pos; 1412 1413 /* 1414 * Disable error reporting for the root port device and downstream port 1415 * devices. 1416 */ 1417 set_downstream_devices_error_reporting(pdev, false); 1418 1419 pos = pdev->aer_cap; 1420 /* Disable Root's interrupt in response to error messages */ 1421 pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); 1422 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; 1423 pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); 1424 1425 /* Clear Root's error status reg */ 1426 pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); 1427 pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); 1428 } 1429 1430 /** 1431 * aer_remove - clean up resources 1432 * @dev: pointer to the pcie_dev data structure 1433 * 1434 * Invoked when PCI Express bus unloads or AER probe fails. 1435 */ 1436 static void aer_remove(struct pcie_device *dev) 1437 { 1438 struct aer_rpc *rpc = get_service_data(dev); 1439 1440 aer_disable_rootport(rpc); 1441 } 1442 1443 /** 1444 * aer_probe - initialize resources 1445 * @dev: pointer to the pcie_dev data structure 1446 * 1447 * Invoked when PCI Express bus loads AER service driver. 1448 */ 1449 static int aer_probe(struct pcie_device *dev) 1450 { 1451 int status; 1452 struct aer_rpc *rpc; 1453 struct device *device = &dev->device; 1454 struct pci_dev *port = dev->port; 1455 1456 rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); 1457 if (!rpc) 1458 return -ENOMEM; 1459 1460 rpc->rpd = port; 1461 INIT_KFIFO(rpc->aer_fifo); 1462 set_service_data(dev, rpc); 1463 1464 status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr, 1465 IRQF_SHARED, "aerdrv", dev); 1466 if (status) { 1467 pci_err(port, "request AER IRQ %d failed\n", dev->irq); 1468 return status; 1469 } 1470 1471 aer_enable_rootport(rpc); 1472 pci_info(port, "enabled with IRQ %d\n", dev->irq); 1473 return 0; 1474 } 1475 1476 /** 1477 * aer_root_reset - reset link on Root Port 1478 * @dev: pointer to Root Port's pci_dev data structure 1479 * 1480 * Invoked by Port Bus driver when performing link reset at Root Port. 1481 */ 1482 static pci_ers_result_t aer_root_reset(struct pci_dev *dev) 1483 { 1484 u32 reg32; 1485 int pos; 1486 int rc; 1487 1488 pos = dev->aer_cap; 1489 1490 /* Disable Root's interrupt in response to error messages */ 1491 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); 1492 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; 1493 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); 1494 1495 rc = pci_bus_error_reset(dev); 1496 pci_info(dev, "Root Port link has been reset\n"); 1497 1498 /* Clear Root Error Status */ 1499 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); 1500 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); 1501 1502 /* Enable Root Port's interrupt in response to error messages */ 1503 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); 1504 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; 1505 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); 1506 1507 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 1508 } 1509 1510 static struct pcie_port_service_driver aerdriver = { 1511 .name = "aer", 1512 .port_type = PCI_EXP_TYPE_ROOT_PORT, 1513 .service = PCIE_PORT_SERVICE_AER, 1514 1515 .probe = aer_probe, 1516 .remove = aer_remove, 1517 }; 1518 1519 /** 1520 * aer_service_init - register AER root service driver 1521 * 1522 * Invoked when AER root service driver is loaded. 1523 */ 1524 int __init pcie_aer_init(void) 1525 { 1526 if (!pci_aer_available() || aer_acpi_firmware_first()) 1527 return -ENXIO; 1528 return pcie_port_service_register(&aerdriver); 1529 } 1530