1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement the AER root port service driver. The driver registers an IRQ 4 * handler. When a root port triggers an AER interrupt, the IRQ handler 5 * collects root port status and schedules work. 6 * 7 * Copyright (C) 2006 Intel Corp. 8 * Tom Long Nguyen (tom.l.nguyen@intel.com) 9 * Zhang Yanmin (yanmin.zhang@intel.com) 10 * 11 * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. 12 * Andrew Patterson <andrew.patterson@hp.com> 13 */ 14 15 #define pr_fmt(fmt) "AER: " fmt 16 #define dev_fmt pr_fmt 17 18 #include <linux/bitops.h> 19 #include <linux/cper.h> 20 #include <linux/pci.h> 21 #include <linux/pci-acpi.h> 22 #include <linux/sched.h> 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/pm.h> 26 #include <linux/init.h> 27 #include <linux/interrupt.h> 28 #include <linux/delay.h> 29 #include <linux/kfifo.h> 30 #include <linux/slab.h> 31 #include <acpi/apei.h> 32 #include <ras/ras_event.h> 33 34 #include "../pci.h" 35 #include "portdrv.h" 36 37 #define AER_ERROR_SOURCES_MAX 128 38 39 #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ 40 #define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/ 41 42 struct aer_err_source { 43 unsigned int status; 44 unsigned int id; 45 }; 46 47 struct aer_rpc { 48 struct pci_dev *rpd; /* Root Port device */ 49 DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); 50 }; 51 52 /* AER stats for the device */ 53 struct aer_stats { 54 55 /* 56 * Fields for all AER capable devices. They indicate the errors 57 * "as seen by this device". Note that this may mean that if an 58 * end point is causing problems, the AER counters may increment 59 * at its link partner (e.g. root port) because the errors will be 60 * "seen" by the link partner and not the the problematic end point 61 * itself (which may report all counters as 0 as it never saw any 62 * problems). 63 */ 64 /* Counters for different type of correctable errors */ 65 u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; 66 /* Counters for different type of fatal uncorrectable errors */ 67 u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; 68 /* Counters for different type of nonfatal uncorrectable errors */ 69 u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; 70 /* Total number of ERR_COR sent by this device */ 71 u64 dev_total_cor_errs; 72 /* Total number of ERR_FATAL sent by this device */ 73 u64 dev_total_fatal_errs; 74 /* Total number of ERR_NONFATAL sent by this device */ 75 u64 dev_total_nonfatal_errs; 76 77 /* 78 * Fields for Root ports & root complex event collectors only, these 79 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL 80 * messages received by the root port / event collector, INCLUDING the 81 * ones that are generated internally (by the rootport itself) 82 */ 83 u64 rootport_total_cor_errs; 84 u64 rootport_total_fatal_errs; 85 u64 rootport_total_nonfatal_errs; 86 }; 87 88 #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ 89 PCI_ERR_UNC_ECRC| \ 90 PCI_ERR_UNC_UNSUP| \ 91 PCI_ERR_UNC_COMP_ABORT| \ 92 PCI_ERR_UNC_UNX_COMP| \ 93 PCI_ERR_UNC_MALF_TLP) 94 95 #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ 96 PCI_EXP_RTCTL_SENFEE| \ 97 PCI_EXP_RTCTL_SEFEE) 98 #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ 99 PCI_ERR_ROOT_CMD_NONFATAL_EN| \ 100 PCI_ERR_ROOT_CMD_FATAL_EN) 101 #define ERR_COR_ID(d) (d & 0xffff) 102 #define ERR_UNCOR_ID(d) (d >> 16) 103 104 static int pcie_aer_disable; 105 static pci_ers_result_t aer_root_reset(struct pci_dev *dev); 106 107 void pci_no_aer(void) 108 { 109 pcie_aer_disable = 1; 110 } 111 112 bool pci_aer_available(void) 113 { 114 return !pcie_aer_disable && pci_msi_enabled(); 115 } 116 117 #ifdef CONFIG_PCIE_ECRC 118 119 #define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ 120 #define ECRC_POLICY_OFF 1 /* ECRC off for performance */ 121 #define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ 122 123 static int ecrc_policy = ECRC_POLICY_DEFAULT; 124 125 static const char * const ecrc_policy_str[] = { 126 [ECRC_POLICY_DEFAULT] = "bios", 127 [ECRC_POLICY_OFF] = "off", 128 [ECRC_POLICY_ON] = "on" 129 }; 130 131 /** 132 * enable_ecrc_checking - enable PCIe ECRC checking for a device 133 * @dev: the PCI device 134 * 135 * Returns 0 on success, or negative on failure. 136 */ 137 static int enable_ecrc_checking(struct pci_dev *dev) 138 { 139 int aer = dev->aer_cap; 140 u32 reg32; 141 142 if (!aer) 143 return -ENODEV; 144 145 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); 146 if (reg32 & PCI_ERR_CAP_ECRC_GENC) 147 reg32 |= PCI_ERR_CAP_ECRC_GENE; 148 if (reg32 & PCI_ERR_CAP_ECRC_CHKC) 149 reg32 |= PCI_ERR_CAP_ECRC_CHKE; 150 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); 151 152 return 0; 153 } 154 155 /** 156 * disable_ecrc_checking - disables PCIe ECRC checking for a device 157 * @dev: the PCI device 158 * 159 * Returns 0 on success, or negative on failure. 160 */ 161 static int disable_ecrc_checking(struct pci_dev *dev) 162 { 163 int aer = dev->aer_cap; 164 u32 reg32; 165 166 if (!aer) 167 return -ENODEV; 168 169 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); 170 reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 171 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); 172 173 return 0; 174 } 175 176 /** 177 * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy 178 * @dev: the PCI device 179 */ 180 void pcie_set_ecrc_checking(struct pci_dev *dev) 181 { 182 switch (ecrc_policy) { 183 case ECRC_POLICY_DEFAULT: 184 return; 185 case ECRC_POLICY_OFF: 186 disable_ecrc_checking(dev); 187 break; 188 case ECRC_POLICY_ON: 189 enable_ecrc_checking(dev); 190 break; 191 default: 192 return; 193 } 194 } 195 196 /** 197 * pcie_ecrc_get_policy - parse kernel command-line ecrc option 198 * @str: ECRC policy from kernel command line to use 199 */ 200 void pcie_ecrc_get_policy(char *str) 201 { 202 int i; 203 204 i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str); 205 if (i < 0) 206 return; 207 208 ecrc_policy = i; 209 } 210 #endif /* CONFIG_PCIE_ECRC */ 211 212 #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ 213 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) 214 215 int pcie_aer_is_native(struct pci_dev *dev) 216 { 217 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); 218 219 if (!dev->aer_cap) 220 return 0; 221 222 return pcie_ports_native || host->native_aer; 223 } 224 225 int pci_enable_pcie_error_reporting(struct pci_dev *dev) 226 { 227 int rc; 228 229 if (!pcie_aer_is_native(dev)) 230 return -EIO; 231 232 rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); 233 return pcibios_err_to_errno(rc); 234 } 235 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); 236 237 int pci_disable_pcie_error_reporting(struct pci_dev *dev) 238 { 239 int rc; 240 241 if (!pcie_aer_is_native(dev)) 242 return -EIO; 243 244 rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); 245 return pcibios_err_to_errno(rc); 246 } 247 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); 248 249 int pci_aer_clear_nonfatal_status(struct pci_dev *dev) 250 { 251 int aer = dev->aer_cap; 252 u32 status, sev; 253 254 if (!pcie_aer_is_native(dev)) 255 return -EIO; 256 257 /* Clear status bits for ERR_NONFATAL errors only */ 258 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 259 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); 260 status &= ~sev; 261 if (status) 262 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); 263 264 return 0; 265 } 266 EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status); 267 268 void pci_aer_clear_fatal_status(struct pci_dev *dev) 269 { 270 int aer = dev->aer_cap; 271 u32 status, sev; 272 273 if (!pcie_aer_is_native(dev)) 274 return; 275 276 /* Clear status bits for ERR_FATAL errors only */ 277 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 278 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); 279 status &= sev; 280 if (status) 281 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); 282 } 283 284 /** 285 * pci_aer_raw_clear_status - Clear AER error registers. 286 * @dev: the PCI device 287 * 288 * Clearing AER error status registers unconditionally, regardless of 289 * whether they're owned by firmware or the OS. 290 * 291 * Returns 0 on success, or negative on failure. 292 */ 293 int pci_aer_raw_clear_status(struct pci_dev *dev) 294 { 295 int aer = dev->aer_cap; 296 u32 status; 297 int port_type; 298 299 if (!aer) 300 return -EIO; 301 302 port_type = pci_pcie_type(dev); 303 if (port_type == PCI_EXP_TYPE_ROOT_PORT || 304 port_type == PCI_EXP_TYPE_RC_EC) { 305 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); 306 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); 307 } 308 309 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); 310 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status); 311 312 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 313 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); 314 315 return 0; 316 } 317 318 int pci_aer_clear_status(struct pci_dev *dev) 319 { 320 if (!pcie_aer_is_native(dev)) 321 return -EIO; 322 323 return pci_aer_raw_clear_status(dev); 324 } 325 326 void pci_save_aer_state(struct pci_dev *dev) 327 { 328 int aer = dev->aer_cap; 329 struct pci_cap_saved_state *save_state; 330 u32 *cap; 331 332 if (!aer) 333 return; 334 335 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); 336 if (!save_state) 337 return; 338 339 cap = &save_state->cap.data[0]; 340 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++); 341 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++); 342 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++); 343 pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++); 344 if (pcie_cap_has_rtctl(dev)) 345 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++); 346 } 347 348 void pci_restore_aer_state(struct pci_dev *dev) 349 { 350 int aer = dev->aer_cap; 351 struct pci_cap_saved_state *save_state; 352 u32 *cap; 353 354 if (!aer) 355 return; 356 357 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); 358 if (!save_state) 359 return; 360 361 cap = &save_state->cap.data[0]; 362 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++); 363 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++); 364 pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++); 365 pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++); 366 if (pcie_cap_has_rtctl(dev)) 367 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++); 368 } 369 370 void pci_aer_init(struct pci_dev *dev) 371 { 372 int n; 373 374 dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); 375 if (!dev->aer_cap) 376 return; 377 378 dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); 379 380 /* 381 * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, 382 * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event 383 * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec 384 * 7.8.4). 385 */ 386 n = pcie_cap_has_rtctl(dev) ? 5 : 4; 387 pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); 388 389 pci_aer_clear_status(dev); 390 } 391 392 void pci_aer_exit(struct pci_dev *dev) 393 { 394 kfree(dev->aer_stats); 395 dev->aer_stats = NULL; 396 } 397 398 #define AER_AGENT_RECEIVER 0 399 #define AER_AGENT_REQUESTER 1 400 #define AER_AGENT_COMPLETER 2 401 #define AER_AGENT_TRANSMITTER 3 402 403 #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ 404 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) 405 #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ 406 0 : PCI_ERR_UNC_COMP_ABORT) 407 #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ 408 (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) 409 410 #define AER_GET_AGENT(t, e) \ 411 ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ 412 (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ 413 (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ 414 AER_AGENT_RECEIVER) 415 416 #define AER_PHYSICAL_LAYER_ERROR 0 417 #define AER_DATA_LINK_LAYER_ERROR 1 418 #define AER_TRANSACTION_LAYER_ERROR 2 419 420 #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ 421 PCI_ERR_COR_RCVR : 0) 422 #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ 423 (PCI_ERR_COR_BAD_TLP| \ 424 PCI_ERR_COR_BAD_DLLP| \ 425 PCI_ERR_COR_REP_ROLL| \ 426 PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) 427 428 #define AER_GET_LAYER_ERROR(t, e) \ 429 ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ 430 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ 431 AER_TRANSACTION_LAYER_ERROR) 432 433 /* 434 * AER error strings 435 */ 436 static const char *aer_error_severity_string[] = { 437 "Uncorrected (Non-Fatal)", 438 "Uncorrected (Fatal)", 439 "Corrected" 440 }; 441 442 static const char *aer_error_layer[] = { 443 "Physical Layer", 444 "Data Link Layer", 445 "Transaction Layer" 446 }; 447 448 static const char *aer_correctable_error_string[] = { 449 "RxErr", /* Bit Position 0 */ 450 NULL, 451 NULL, 452 NULL, 453 NULL, 454 NULL, 455 "BadTLP", /* Bit Position 6 */ 456 "BadDLLP", /* Bit Position 7 */ 457 "Rollover", /* Bit Position 8 */ 458 NULL, 459 NULL, 460 NULL, 461 "Timeout", /* Bit Position 12 */ 462 "NonFatalErr", /* Bit Position 13 */ 463 "CorrIntErr", /* Bit Position 14 */ 464 "HeaderOF", /* Bit Position 15 */ 465 NULL, /* Bit Position 16 */ 466 NULL, /* Bit Position 17 */ 467 NULL, /* Bit Position 18 */ 468 NULL, /* Bit Position 19 */ 469 NULL, /* Bit Position 20 */ 470 NULL, /* Bit Position 21 */ 471 NULL, /* Bit Position 22 */ 472 NULL, /* Bit Position 23 */ 473 NULL, /* Bit Position 24 */ 474 NULL, /* Bit Position 25 */ 475 NULL, /* Bit Position 26 */ 476 NULL, /* Bit Position 27 */ 477 NULL, /* Bit Position 28 */ 478 NULL, /* Bit Position 29 */ 479 NULL, /* Bit Position 30 */ 480 NULL, /* Bit Position 31 */ 481 }; 482 483 static const char *aer_uncorrectable_error_string[] = { 484 "Undefined", /* Bit Position 0 */ 485 NULL, 486 NULL, 487 NULL, 488 "DLP", /* Bit Position 4 */ 489 "SDES", /* Bit Position 5 */ 490 NULL, 491 NULL, 492 NULL, 493 NULL, 494 NULL, 495 NULL, 496 "TLP", /* Bit Position 12 */ 497 "FCP", /* Bit Position 13 */ 498 "CmpltTO", /* Bit Position 14 */ 499 "CmpltAbrt", /* Bit Position 15 */ 500 "UnxCmplt", /* Bit Position 16 */ 501 "RxOF", /* Bit Position 17 */ 502 "MalfTLP", /* Bit Position 18 */ 503 "ECRC", /* Bit Position 19 */ 504 "UnsupReq", /* Bit Position 20 */ 505 "ACSViol", /* Bit Position 21 */ 506 "UncorrIntErr", /* Bit Position 22 */ 507 "BlockedTLP", /* Bit Position 23 */ 508 "AtomicOpBlocked", /* Bit Position 24 */ 509 "TLPBlockedErr", /* Bit Position 25 */ 510 "PoisonTLPBlocked", /* Bit Position 26 */ 511 NULL, /* Bit Position 27 */ 512 NULL, /* Bit Position 28 */ 513 NULL, /* Bit Position 29 */ 514 NULL, /* Bit Position 30 */ 515 NULL, /* Bit Position 31 */ 516 }; 517 518 static const char *aer_agent_string[] = { 519 "Receiver ID", 520 "Requester ID", 521 "Completer ID", 522 "Transmitter ID" 523 }; 524 525 #define aer_stats_dev_attr(name, stats_array, strings_array, \ 526 total_string, total_field) \ 527 static ssize_t \ 528 name##_show(struct device *dev, struct device_attribute *attr, \ 529 char *buf) \ 530 { \ 531 unsigned int i; \ 532 char *str = buf; \ 533 struct pci_dev *pdev = to_pci_dev(dev); \ 534 u64 *stats = pdev->aer_stats->stats_array; \ 535 \ 536 for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ 537 if (strings_array[i]) \ 538 str += sprintf(str, "%s %llu\n", \ 539 strings_array[i], stats[i]); \ 540 else if (stats[i]) \ 541 str += sprintf(str, #stats_array "_bit[%d] %llu\n",\ 542 i, stats[i]); \ 543 } \ 544 str += sprintf(str, "TOTAL_%s %llu\n", total_string, \ 545 pdev->aer_stats->total_field); \ 546 return str-buf; \ 547 } \ 548 static DEVICE_ATTR_RO(name) 549 550 aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, 551 aer_correctable_error_string, "ERR_COR", 552 dev_total_cor_errs); 553 aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, 554 aer_uncorrectable_error_string, "ERR_FATAL", 555 dev_total_fatal_errs); 556 aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, 557 aer_uncorrectable_error_string, "ERR_NONFATAL", 558 dev_total_nonfatal_errs); 559 560 #define aer_stats_rootport_attr(name, field) \ 561 static ssize_t \ 562 name##_show(struct device *dev, struct device_attribute *attr, \ 563 char *buf) \ 564 { \ 565 struct pci_dev *pdev = to_pci_dev(dev); \ 566 return sprintf(buf, "%llu\n", pdev->aer_stats->field); \ 567 } \ 568 static DEVICE_ATTR_RO(name) 569 570 aer_stats_rootport_attr(aer_rootport_total_err_cor, 571 rootport_total_cor_errs); 572 aer_stats_rootport_attr(aer_rootport_total_err_fatal, 573 rootport_total_fatal_errs); 574 aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, 575 rootport_total_nonfatal_errs); 576 577 static struct attribute *aer_stats_attrs[] __ro_after_init = { 578 &dev_attr_aer_dev_correctable.attr, 579 &dev_attr_aer_dev_fatal.attr, 580 &dev_attr_aer_dev_nonfatal.attr, 581 &dev_attr_aer_rootport_total_err_cor.attr, 582 &dev_attr_aer_rootport_total_err_fatal.attr, 583 &dev_attr_aer_rootport_total_err_nonfatal.attr, 584 NULL 585 }; 586 587 static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, 588 struct attribute *a, int n) 589 { 590 struct device *dev = kobj_to_dev(kobj); 591 struct pci_dev *pdev = to_pci_dev(dev); 592 593 if (!pdev->aer_stats) 594 return 0; 595 596 if ((a == &dev_attr_aer_rootport_total_err_cor.attr || 597 a == &dev_attr_aer_rootport_total_err_fatal.attr || 598 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && 599 ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && 600 (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC))) 601 return 0; 602 603 return a->mode; 604 } 605 606 const struct attribute_group aer_stats_attr_group = { 607 .attrs = aer_stats_attrs, 608 .is_visible = aer_stats_attrs_are_visible, 609 }; 610 611 static void pci_dev_aer_stats_incr(struct pci_dev *pdev, 612 struct aer_err_info *info) 613 { 614 unsigned long status = info->status & ~info->mask; 615 int i, max = -1; 616 u64 *counter = NULL; 617 struct aer_stats *aer_stats = pdev->aer_stats; 618 619 if (!aer_stats) 620 return; 621 622 switch (info->severity) { 623 case AER_CORRECTABLE: 624 aer_stats->dev_total_cor_errs++; 625 counter = &aer_stats->dev_cor_errs[0]; 626 max = AER_MAX_TYPEOF_COR_ERRS; 627 break; 628 case AER_NONFATAL: 629 aer_stats->dev_total_nonfatal_errs++; 630 counter = &aer_stats->dev_nonfatal_errs[0]; 631 max = AER_MAX_TYPEOF_UNCOR_ERRS; 632 break; 633 case AER_FATAL: 634 aer_stats->dev_total_fatal_errs++; 635 counter = &aer_stats->dev_fatal_errs[0]; 636 max = AER_MAX_TYPEOF_UNCOR_ERRS; 637 break; 638 } 639 640 for_each_set_bit(i, &status, max) 641 counter[i]++; 642 } 643 644 static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, 645 struct aer_err_source *e_src) 646 { 647 struct aer_stats *aer_stats = pdev->aer_stats; 648 649 if (!aer_stats) 650 return; 651 652 if (e_src->status & PCI_ERR_ROOT_COR_RCV) 653 aer_stats->rootport_total_cor_errs++; 654 655 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { 656 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) 657 aer_stats->rootport_total_fatal_errs++; 658 else 659 aer_stats->rootport_total_nonfatal_errs++; 660 } 661 } 662 663 static void __print_tlp_header(struct pci_dev *dev, 664 struct aer_header_log_regs *t) 665 { 666 pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", 667 t->dw0, t->dw1, t->dw2, t->dw3); 668 } 669 670 static void __aer_print_error(struct pci_dev *dev, 671 struct aer_err_info *info) 672 { 673 const char **strings; 674 unsigned long status = info->status & ~info->mask; 675 const char *level, *errmsg; 676 int i; 677 678 if (info->severity == AER_CORRECTABLE) { 679 strings = aer_correctable_error_string; 680 level = KERN_WARNING; 681 } else { 682 strings = aer_uncorrectable_error_string; 683 level = KERN_ERR; 684 } 685 686 for_each_set_bit(i, &status, 32) { 687 errmsg = strings[i]; 688 if (!errmsg) 689 errmsg = "Unknown Error Bit"; 690 691 pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, 692 info->first_error == i ? " (First)" : ""); 693 } 694 pci_dev_aer_stats_incr(dev, info); 695 } 696 697 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 698 { 699 int layer, agent; 700 int id = ((dev->bus->number << 8) | dev->devfn); 701 const char *level; 702 703 if (!info->status) { 704 pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", 705 aer_error_severity_string[info->severity]); 706 goto out; 707 } 708 709 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 710 agent = AER_GET_AGENT(info->severity, info->status); 711 712 level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; 713 714 pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", 715 aer_error_severity_string[info->severity], 716 aer_error_layer[layer], aer_agent_string[agent]); 717 718 pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", 719 dev->vendor, dev->device, info->status, info->mask); 720 721 __aer_print_error(dev, info); 722 723 if (info->tlp_header_valid) 724 __print_tlp_header(dev, &info->tlp); 725 726 out: 727 if (info->id && info->error_dev_num > 1 && info->id == id) 728 pci_err(dev, " Error of this Agent is reported first\n"); 729 730 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), 731 info->severity, info->tlp_header_valid, &info->tlp); 732 } 733 734 static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 735 { 736 u8 bus = info->id >> 8; 737 u8 devfn = info->id & 0xff; 738 739 pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", 740 info->multi_error_valid ? "Multiple " : "", 741 aer_error_severity_string[info->severity], 742 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), 743 PCI_FUNC(devfn)); 744 } 745 746 #ifdef CONFIG_ACPI_APEI_PCIEAER 747 int cper_severity_to_aer(int cper_severity) 748 { 749 switch (cper_severity) { 750 case CPER_SEV_RECOVERABLE: 751 return AER_NONFATAL; 752 case CPER_SEV_FATAL: 753 return AER_FATAL; 754 default: 755 return AER_CORRECTABLE; 756 } 757 } 758 EXPORT_SYMBOL_GPL(cper_severity_to_aer); 759 760 void cper_print_aer(struct pci_dev *dev, int aer_severity, 761 struct aer_capability_regs *aer) 762 { 763 int layer, agent, tlp_header_valid = 0; 764 u32 status, mask; 765 struct aer_err_info info; 766 767 if (aer_severity == AER_CORRECTABLE) { 768 status = aer->cor_status; 769 mask = aer->cor_mask; 770 } else { 771 status = aer->uncor_status; 772 mask = aer->uncor_mask; 773 tlp_header_valid = status & AER_LOG_TLP_MASKS; 774 } 775 776 layer = AER_GET_LAYER_ERROR(aer_severity, status); 777 agent = AER_GET_AGENT(aer_severity, status); 778 779 memset(&info, 0, sizeof(info)); 780 info.severity = aer_severity; 781 info.status = status; 782 info.mask = mask; 783 info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); 784 785 pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); 786 __aer_print_error(dev, &info); 787 pci_err(dev, "aer_layer=%s, aer_agent=%s\n", 788 aer_error_layer[layer], aer_agent_string[agent]); 789 790 if (aer_severity != AER_CORRECTABLE) 791 pci_err(dev, "aer_uncor_severity: 0x%08x\n", 792 aer->uncor_severity); 793 794 if (tlp_header_valid) 795 __print_tlp_header(dev, &aer->header_log); 796 797 trace_aer_event(dev_name(&dev->dev), (status & ~mask), 798 aer_severity, tlp_header_valid, &aer->header_log); 799 } 800 #endif 801 802 /** 803 * add_error_device - list device to be handled 804 * @e_info: pointer to error info 805 * @dev: pointer to pci_dev to be added 806 */ 807 static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) 808 { 809 if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { 810 e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); 811 e_info->error_dev_num++; 812 return 0; 813 } 814 return -ENOSPC; 815 } 816 817 /** 818 * is_error_source - check whether the device is source of reported error 819 * @dev: pointer to pci_dev to be checked 820 * @e_info: pointer to reported error info 821 */ 822 static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) 823 { 824 int aer = dev->aer_cap; 825 u32 status, mask; 826 u16 reg16; 827 828 /* 829 * When bus id is equal to 0, it might be a bad id 830 * reported by root port. 831 */ 832 if ((PCI_BUS_NUM(e_info->id) != 0) && 833 !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { 834 /* Device ID match? */ 835 if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) 836 return true; 837 838 /* Continue id comparing if there is no multiple error */ 839 if (!e_info->multi_error_valid) 840 return false; 841 } 842 843 /* 844 * When either 845 * 1) bus id is equal to 0. Some ports might lose the bus 846 * id of error source id; 847 * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set 848 * 3) There are multiple errors and prior ID comparing fails; 849 * We check AER status registers to find possible reporter. 850 */ 851 if (atomic_read(&dev->enable_cnt) == 0) 852 return false; 853 854 /* Check if AER is enabled */ 855 pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); 856 if (!(reg16 & PCI_EXP_AER_FLAGS)) 857 return false; 858 859 if (!aer) 860 return false; 861 862 /* Check if error is recorded */ 863 if (e_info->severity == AER_CORRECTABLE) { 864 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); 865 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); 866 } else { 867 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 868 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); 869 } 870 if (status & ~mask) 871 return true; 872 873 return false; 874 } 875 876 static int find_device_iter(struct pci_dev *dev, void *data) 877 { 878 struct aer_err_info *e_info = (struct aer_err_info *)data; 879 880 if (is_error_source(dev, e_info)) { 881 /* List this device */ 882 if (add_error_device(e_info, dev)) { 883 /* We cannot handle more... Stop iteration */ 884 /* TODO: Should print error message here? */ 885 return 1; 886 } 887 888 /* If there is only a single error, stop iteration */ 889 if (!e_info->multi_error_valid) 890 return 1; 891 } 892 return 0; 893 } 894 895 /** 896 * find_source_device - search through device hierarchy for source device 897 * @parent: pointer to Root Port pci_dev data structure 898 * @e_info: including detailed error information such like id 899 * 900 * Return true if found. 901 * 902 * Invoked by DPC when error is detected at the Root Port. 903 * Caller of this function must set id, severity, and multi_error_valid of 904 * struct aer_err_info pointed by @e_info properly. This function must fill 905 * e_info->error_dev_num and e_info->dev[], based on the given information. 906 */ 907 static bool find_source_device(struct pci_dev *parent, 908 struct aer_err_info *e_info) 909 { 910 struct pci_dev *dev = parent; 911 int result; 912 913 /* Must reset in this function */ 914 e_info->error_dev_num = 0; 915 916 /* Is Root Port an agent that sends error message? */ 917 result = find_device_iter(dev, e_info); 918 if (result) 919 return true; 920 921 if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC) 922 pcie_walk_rcec(parent, find_device_iter, e_info); 923 else 924 pci_walk_bus(parent->subordinate, find_device_iter, e_info); 925 926 if (!e_info->error_dev_num) { 927 pci_info(parent, "can't find device of ID%04x\n", e_info->id); 928 return false; 929 } 930 return true; 931 } 932 933 /** 934 * handle_error_source - handle logging error into an event log 935 * @dev: pointer to pci_dev data structure of error source device 936 * @info: comprehensive error information 937 * 938 * Invoked when an error being detected by Root Port. 939 */ 940 static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) 941 { 942 int aer = dev->aer_cap; 943 944 if (info->severity == AER_CORRECTABLE) { 945 /* 946 * Correctable error does not need software intervention. 947 * No need to go through error recovery process. 948 */ 949 if (aer) 950 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, 951 info->status); 952 if (pcie_aer_is_native(dev)) 953 pcie_clear_device_status(dev); 954 } else if (info->severity == AER_NONFATAL) 955 pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); 956 else if (info->severity == AER_FATAL) 957 pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); 958 pci_dev_put(dev); 959 } 960 961 #ifdef CONFIG_ACPI_APEI_PCIEAER 962 963 #define AER_RECOVER_RING_ORDER 4 964 #define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) 965 966 struct aer_recover_entry { 967 u8 bus; 968 u8 devfn; 969 u16 domain; 970 int severity; 971 struct aer_capability_regs *regs; 972 }; 973 974 static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, 975 AER_RECOVER_RING_SIZE); 976 977 static void aer_recover_work_func(struct work_struct *work) 978 { 979 struct aer_recover_entry entry; 980 struct pci_dev *pdev; 981 982 while (kfifo_get(&aer_recover_ring, &entry)) { 983 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, 984 entry.devfn); 985 if (!pdev) { 986 pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", 987 entry.domain, entry.bus, 988 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); 989 continue; 990 } 991 cper_print_aer(pdev, entry.severity, entry.regs); 992 if (entry.severity == AER_NONFATAL) 993 pcie_do_recovery(pdev, pci_channel_io_normal, 994 aer_root_reset); 995 else if (entry.severity == AER_FATAL) 996 pcie_do_recovery(pdev, pci_channel_io_frozen, 997 aer_root_reset); 998 pci_dev_put(pdev); 999 } 1000 } 1001 1002 /* 1003 * Mutual exclusion for writers of aer_recover_ring, reader side don't 1004 * need lock, because there is only one reader and lock is not needed 1005 * between reader and writer. 1006 */ 1007 static DEFINE_SPINLOCK(aer_recover_ring_lock); 1008 static DECLARE_WORK(aer_recover_work, aer_recover_work_func); 1009 1010 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, 1011 int severity, struct aer_capability_regs *aer_regs) 1012 { 1013 struct aer_recover_entry entry = { 1014 .bus = bus, 1015 .devfn = devfn, 1016 .domain = domain, 1017 .severity = severity, 1018 .regs = aer_regs, 1019 }; 1020 1021 if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1, 1022 &aer_recover_ring_lock)) 1023 schedule_work(&aer_recover_work); 1024 else 1025 pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", 1026 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1027 } 1028 EXPORT_SYMBOL_GPL(aer_recover_queue); 1029 #endif 1030 1031 /** 1032 * aer_get_device_error_info - read error status from dev and store it to info 1033 * @dev: pointer to the device expected to have a error record 1034 * @info: pointer to structure to store the error record 1035 * 1036 * Return 1 on success, 0 on error. 1037 * 1038 * Note that @info is reused among all error devices. Clear fields properly. 1039 */ 1040 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) 1041 { 1042 int type = pci_pcie_type(dev); 1043 int aer = dev->aer_cap; 1044 int temp; 1045 1046 /* Must reset in this function */ 1047 info->status = 0; 1048 info->tlp_header_valid = 0; 1049 1050 /* The device might not support AER */ 1051 if (!aer) 1052 return 0; 1053 1054 if (info->severity == AER_CORRECTABLE) { 1055 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, 1056 &info->status); 1057 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, 1058 &info->mask); 1059 if (!(info->status & ~info->mask)) 1060 return 0; 1061 } else if (type == PCI_EXP_TYPE_ROOT_PORT || 1062 type == PCI_EXP_TYPE_RC_EC || 1063 type == PCI_EXP_TYPE_DOWNSTREAM || 1064 info->severity == AER_NONFATAL) { 1065 1066 /* Link is still healthy for IO reads */ 1067 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, 1068 &info->status); 1069 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, 1070 &info->mask); 1071 if (!(info->status & ~info->mask)) 1072 return 0; 1073 1074 /* Get First Error Pointer */ 1075 pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); 1076 info->first_error = PCI_ERR_CAP_FEP(temp); 1077 1078 if (info->status & AER_LOG_TLP_MASKS) { 1079 info->tlp_header_valid = 1; 1080 pci_read_config_dword(dev, 1081 aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); 1082 pci_read_config_dword(dev, 1083 aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); 1084 pci_read_config_dword(dev, 1085 aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); 1086 pci_read_config_dword(dev, 1087 aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); 1088 } 1089 } 1090 1091 return 1; 1092 } 1093 1094 static inline void aer_process_err_devices(struct aer_err_info *e_info) 1095 { 1096 int i; 1097 1098 /* Report all before handle them, not to lost records by reset etc. */ 1099 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1100 if (aer_get_device_error_info(e_info->dev[i], e_info)) 1101 aer_print_error(e_info->dev[i], e_info); 1102 } 1103 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1104 if (aer_get_device_error_info(e_info->dev[i], e_info)) 1105 handle_error_source(e_info->dev[i], e_info); 1106 } 1107 } 1108 1109 /** 1110 * aer_isr_one_error - consume an error detected by root port 1111 * @rpc: pointer to the root port which holds an error 1112 * @e_src: pointer to an error source 1113 */ 1114 static void aer_isr_one_error(struct aer_rpc *rpc, 1115 struct aer_err_source *e_src) 1116 { 1117 struct pci_dev *pdev = rpc->rpd; 1118 struct aer_err_info e_info; 1119 1120 pci_rootport_aer_stats_incr(pdev, e_src); 1121 1122 /* 1123 * There is a possibility that both correctable error and 1124 * uncorrectable error being logged. Report correctable error first. 1125 */ 1126 if (e_src->status & PCI_ERR_ROOT_COR_RCV) { 1127 e_info.id = ERR_COR_ID(e_src->id); 1128 e_info.severity = AER_CORRECTABLE; 1129 1130 if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) 1131 e_info.multi_error_valid = 1; 1132 else 1133 e_info.multi_error_valid = 0; 1134 aer_print_port_info(pdev, &e_info); 1135 1136 if (find_source_device(pdev, &e_info)) 1137 aer_process_err_devices(&e_info); 1138 } 1139 1140 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { 1141 e_info.id = ERR_UNCOR_ID(e_src->id); 1142 1143 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) 1144 e_info.severity = AER_FATAL; 1145 else 1146 e_info.severity = AER_NONFATAL; 1147 1148 if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) 1149 e_info.multi_error_valid = 1; 1150 else 1151 e_info.multi_error_valid = 0; 1152 1153 aer_print_port_info(pdev, &e_info); 1154 1155 if (find_source_device(pdev, &e_info)) 1156 aer_process_err_devices(&e_info); 1157 } 1158 } 1159 1160 /** 1161 * aer_isr - consume errors detected by root port 1162 * @irq: IRQ assigned to Root Port 1163 * @context: pointer to Root Port data structure 1164 * 1165 * Invoked, as DPC, when root port records new detected error 1166 */ 1167 static irqreturn_t aer_isr(int irq, void *context) 1168 { 1169 struct pcie_device *dev = (struct pcie_device *)context; 1170 struct aer_rpc *rpc = get_service_data(dev); 1171 struct aer_err_source e_src; 1172 1173 if (kfifo_is_empty(&rpc->aer_fifo)) 1174 return IRQ_NONE; 1175 1176 while (kfifo_get(&rpc->aer_fifo, &e_src)) 1177 aer_isr_one_error(rpc, &e_src); 1178 return IRQ_HANDLED; 1179 } 1180 1181 /** 1182 * aer_irq - Root Port's ISR 1183 * @irq: IRQ assigned to Root Port 1184 * @context: pointer to Root Port data structure 1185 * 1186 * Invoked when Root Port detects AER messages. 1187 */ 1188 static irqreturn_t aer_irq(int irq, void *context) 1189 { 1190 struct pcie_device *pdev = (struct pcie_device *)context; 1191 struct aer_rpc *rpc = get_service_data(pdev); 1192 struct pci_dev *rp = rpc->rpd; 1193 int aer = rp->aer_cap; 1194 struct aer_err_source e_src = {}; 1195 1196 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status); 1197 if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) 1198 return IRQ_NONE; 1199 1200 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id); 1201 pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status); 1202 1203 if (!kfifo_put(&rpc->aer_fifo, e_src)) 1204 return IRQ_HANDLED; 1205 1206 return IRQ_WAKE_THREAD; 1207 } 1208 1209 static int set_device_error_reporting(struct pci_dev *dev, void *data) 1210 { 1211 bool enable = *((bool *)data); 1212 int type = pci_pcie_type(dev); 1213 1214 if ((type == PCI_EXP_TYPE_ROOT_PORT) || 1215 (type == PCI_EXP_TYPE_RC_EC) || 1216 (type == PCI_EXP_TYPE_UPSTREAM) || 1217 (type == PCI_EXP_TYPE_DOWNSTREAM)) { 1218 if (enable) 1219 pci_enable_pcie_error_reporting(dev); 1220 else 1221 pci_disable_pcie_error_reporting(dev); 1222 } 1223 1224 if (enable) 1225 pcie_set_ecrc_checking(dev); 1226 1227 return 0; 1228 } 1229 1230 /** 1231 * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. 1232 * @dev: pointer to root port's pci_dev data structure 1233 * @enable: true = enable error reporting, false = disable error reporting. 1234 */ 1235 static void set_downstream_devices_error_reporting(struct pci_dev *dev, 1236 bool enable) 1237 { 1238 set_device_error_reporting(dev, &enable); 1239 1240 if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) 1241 pcie_walk_rcec(dev, set_device_error_reporting, &enable); 1242 else if (dev->subordinate) 1243 pci_walk_bus(dev->subordinate, set_device_error_reporting, 1244 &enable); 1245 1246 } 1247 1248 /** 1249 * aer_enable_rootport - enable Root Port's interrupts when receiving messages 1250 * @rpc: pointer to a Root Port data structure 1251 * 1252 * Invoked when PCIe bus loads AER service driver. 1253 */ 1254 static void aer_enable_rootport(struct aer_rpc *rpc) 1255 { 1256 struct pci_dev *pdev = rpc->rpd; 1257 int aer = pdev->aer_cap; 1258 u16 reg16; 1259 u32 reg32; 1260 1261 /* Clear PCIe Capability's Device Status */ 1262 pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); 1263 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); 1264 1265 /* Disable system error generation in response to error messages */ 1266 pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, 1267 SYSTEM_ERROR_INTR_ON_MESG_MASK); 1268 1269 /* Clear error status */ 1270 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); 1271 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); 1272 pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32); 1273 pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32); 1274 pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32); 1275 pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32); 1276 1277 /* 1278 * Enable error reporting for the root port device and downstream port 1279 * devices. 1280 */ 1281 set_downstream_devices_error_reporting(pdev, true); 1282 1283 /* Enable Root Port's interrupt in response to error messages */ 1284 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); 1285 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; 1286 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); 1287 } 1288 1289 /** 1290 * aer_disable_rootport - disable Root Port's interrupts when receiving messages 1291 * @rpc: pointer to a Root Port data structure 1292 * 1293 * Invoked when PCIe bus unloads AER service driver. 1294 */ 1295 static void aer_disable_rootport(struct aer_rpc *rpc) 1296 { 1297 struct pci_dev *pdev = rpc->rpd; 1298 int aer = pdev->aer_cap; 1299 u32 reg32; 1300 1301 /* 1302 * Disable error reporting for the root port device and downstream port 1303 * devices. 1304 */ 1305 set_downstream_devices_error_reporting(pdev, false); 1306 1307 /* Disable Root's interrupt in response to error messages */ 1308 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); 1309 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; 1310 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); 1311 1312 /* Clear Root's error status reg */ 1313 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); 1314 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); 1315 } 1316 1317 /** 1318 * aer_remove - clean up resources 1319 * @dev: pointer to the pcie_dev data structure 1320 * 1321 * Invoked when PCI Express bus unloads or AER probe fails. 1322 */ 1323 static void aer_remove(struct pcie_device *dev) 1324 { 1325 struct aer_rpc *rpc = get_service_data(dev); 1326 1327 aer_disable_rootport(rpc); 1328 } 1329 1330 /** 1331 * aer_probe - initialize resources 1332 * @dev: pointer to the pcie_dev data structure 1333 * 1334 * Invoked when PCI Express bus loads AER service driver. 1335 */ 1336 static int aer_probe(struct pcie_device *dev) 1337 { 1338 int status; 1339 struct aer_rpc *rpc; 1340 struct device *device = &dev->device; 1341 struct pci_dev *port = dev->port; 1342 1343 /* Limit to Root Ports or Root Complex Event Collectors */ 1344 if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) && 1345 (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT)) 1346 return -ENODEV; 1347 1348 rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); 1349 if (!rpc) 1350 return -ENOMEM; 1351 1352 rpc->rpd = port; 1353 INIT_KFIFO(rpc->aer_fifo); 1354 set_service_data(dev, rpc); 1355 1356 status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr, 1357 IRQF_SHARED, "aerdrv", dev); 1358 if (status) { 1359 pci_err(port, "request AER IRQ %d failed\n", dev->irq); 1360 return status; 1361 } 1362 1363 aer_enable_rootport(rpc); 1364 pci_info(port, "enabled with IRQ %d\n", dev->irq); 1365 return 0; 1366 } 1367 1368 /** 1369 * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP 1370 * @dev: pointer to Root Port, RCEC, or RCiEP 1371 * 1372 * Invoked by Port Bus driver when performing reset. 1373 */ 1374 static pci_ers_result_t aer_root_reset(struct pci_dev *dev) 1375 { 1376 int type = pci_pcie_type(dev); 1377 struct pci_dev *root; 1378 int aer; 1379 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); 1380 u32 reg32; 1381 int rc; 1382 1383 /* 1384 * Only Root Ports and RCECs have AER Root Command and Root Status 1385 * registers. If "dev" is an RCiEP, the relevant registers are in 1386 * the RCEC. 1387 */ 1388 if (type == PCI_EXP_TYPE_RC_END) 1389 root = dev->rcec; 1390 else 1391 root = pcie_find_root_port(dev); 1392 1393 /* 1394 * If the platform retained control of AER, an RCiEP may not have 1395 * an RCEC visible to us, so dev->rcec ("root") may be NULL. In 1396 * that case, firmware is responsible for these registers. 1397 */ 1398 aer = root ? root->aer_cap : 0; 1399 1400 if ((host->native_aer || pcie_ports_native) && aer) { 1401 /* Disable Root's interrupt in response to error messages */ 1402 pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); 1403 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; 1404 pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); 1405 } 1406 1407 if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { 1408 if (pcie_has_flr(dev)) { 1409 rc = pcie_flr(dev); 1410 pci_info(dev, "has been reset (%d)\n", rc); 1411 } else { 1412 pci_info(dev, "not reset (no FLR support)\n"); 1413 rc = -ENOTTY; 1414 } 1415 } else { 1416 rc = pci_bus_error_reset(dev); 1417 pci_info(dev, "%s Port link has been reset (%d)\n", 1418 pci_is_root_bus(dev->bus) ? "Root" : "Downstream", rc); 1419 } 1420 1421 if ((host->native_aer || pcie_ports_native) && aer) { 1422 /* Clear Root Error Status */ 1423 pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, ®32); 1424 pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32); 1425 1426 /* Enable Root Port's interrupt in response to error messages */ 1427 pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); 1428 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; 1429 pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); 1430 } 1431 1432 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 1433 } 1434 1435 static struct pcie_port_service_driver aerdriver = { 1436 .name = "aer", 1437 .port_type = PCIE_ANY_PORT, 1438 .service = PCIE_PORT_SERVICE_AER, 1439 1440 .probe = aer_probe, 1441 .remove = aer_remove, 1442 }; 1443 1444 /** 1445 * pcie_aer_init - register AER root service driver 1446 * 1447 * Invoked when AER root service driver is loaded. 1448 */ 1449 int __init pcie_aer_init(void) 1450 { 1451 if (!pci_aer_available()) 1452 return -ENXIO; 1453 return pcie_port_service_register(&aerdriver); 1454 } 1455