1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement the AER root port service driver. The driver registers an IRQ 4 * handler. When a root port triggers an AER interrupt, the IRQ handler 5 * collects root port status and schedules work. 6 * 7 * Copyright (C) 2006 Intel Corp. 8 * Tom Long Nguyen (tom.l.nguyen@intel.com) 9 * Zhang Yanmin (yanmin.zhang@intel.com) 10 * 11 * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. 12 * Andrew Patterson <andrew.patterson@hp.com> 13 */ 14 15 #define pr_fmt(fmt) "AER: " fmt 16 #define dev_fmt pr_fmt 17 18 #include <linux/bitops.h> 19 #include <linux/cper.h> 20 #include <linux/pci.h> 21 #include <linux/pci-acpi.h> 22 #include <linux/sched.h> 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/pm.h> 26 #include <linux/init.h> 27 #include <linux/interrupt.h> 28 #include <linux/delay.h> 29 #include <linux/kfifo.h> 30 #include <linux/slab.h> 31 #include <acpi/apei.h> 32 #include <ras/ras_event.h> 33 34 #include "../pci.h" 35 #include "portdrv.h" 36 37 #define AER_ERROR_SOURCES_MAX 128 38 39 #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ 40 #define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/ 41 42 struct aer_err_source { 43 unsigned int status; 44 unsigned int id; 45 }; 46 47 struct aer_rpc { 48 struct pci_dev *rpd; /* Root Port device */ 49 DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); 50 }; 51 52 /* AER stats for the device */ 53 struct aer_stats { 54 55 /* 56 * Fields for all AER capable devices. They indicate the errors 57 * "as seen by this device". Note that this may mean that if an 58 * end point is causing problems, the AER counters may increment 59 * at its link partner (e.g. root port) because the errors will be 60 * "seen" by the link partner and not the the problematic end point 61 * itself (which may report all counters as 0 as it never saw any 62 * problems). 63 */ 64 /* Counters for different type of correctable errors */ 65 u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; 66 /* Counters for different type of fatal uncorrectable errors */ 67 u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; 68 /* Counters for different type of nonfatal uncorrectable errors */ 69 u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; 70 /* Total number of ERR_COR sent by this device */ 71 u64 dev_total_cor_errs; 72 /* Total number of ERR_FATAL sent by this device */ 73 u64 dev_total_fatal_errs; 74 /* Total number of ERR_NONFATAL sent by this device */ 75 u64 dev_total_nonfatal_errs; 76 77 /* 78 * Fields for Root ports & root complex event collectors only, these 79 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL 80 * messages received by the root port / event collector, INCLUDING the 81 * ones that are generated internally (by the rootport itself) 82 */ 83 u64 rootport_total_cor_errs; 84 u64 rootport_total_fatal_errs; 85 u64 rootport_total_nonfatal_errs; 86 }; 87 88 #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ 89 PCI_ERR_UNC_ECRC| \ 90 PCI_ERR_UNC_UNSUP| \ 91 PCI_ERR_UNC_COMP_ABORT| \ 92 PCI_ERR_UNC_UNX_COMP| \ 93 PCI_ERR_UNC_MALF_TLP) 94 95 #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ 96 PCI_EXP_RTCTL_SENFEE| \ 97 PCI_EXP_RTCTL_SEFEE) 98 #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ 99 PCI_ERR_ROOT_CMD_NONFATAL_EN| \ 100 PCI_ERR_ROOT_CMD_FATAL_EN) 101 #define ERR_COR_ID(d) (d & 0xffff) 102 #define ERR_UNCOR_ID(d) (d >> 16) 103 104 static int pcie_aer_disable; 105 static pci_ers_result_t aer_root_reset(struct pci_dev *dev); 106 107 void pci_no_aer(void) 108 { 109 pcie_aer_disable = 1; 110 } 111 112 bool pci_aer_available(void) 113 { 114 return !pcie_aer_disable && pci_msi_enabled(); 115 } 116 117 #ifdef CONFIG_PCIE_ECRC 118 119 #define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ 120 #define ECRC_POLICY_OFF 1 /* ECRC off for performance */ 121 #define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ 122 123 static int ecrc_policy = ECRC_POLICY_DEFAULT; 124 125 static const char * const ecrc_policy_str[] = { 126 [ECRC_POLICY_DEFAULT] = "bios", 127 [ECRC_POLICY_OFF] = "off", 128 [ECRC_POLICY_ON] = "on" 129 }; 130 131 /** 132 * enable_ercr_checking - enable PCIe ECRC checking for a device 133 * @dev: the PCI device 134 * 135 * Returns 0 on success, or negative on failure. 136 */ 137 static int enable_ecrc_checking(struct pci_dev *dev) 138 { 139 int aer = dev->aer_cap; 140 u32 reg32; 141 142 if (!aer) 143 return -ENODEV; 144 145 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); 146 if (reg32 & PCI_ERR_CAP_ECRC_GENC) 147 reg32 |= PCI_ERR_CAP_ECRC_GENE; 148 if (reg32 & PCI_ERR_CAP_ECRC_CHKC) 149 reg32 |= PCI_ERR_CAP_ECRC_CHKE; 150 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); 151 152 return 0; 153 } 154 155 /** 156 * disable_ercr_checking - disables PCIe ECRC checking for a device 157 * @dev: the PCI device 158 * 159 * Returns 0 on success, or negative on failure. 160 */ 161 static int disable_ecrc_checking(struct pci_dev *dev) 162 { 163 int aer = dev->aer_cap; 164 u32 reg32; 165 166 if (!aer) 167 return -ENODEV; 168 169 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); 170 reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 171 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); 172 173 return 0; 174 } 175 176 /** 177 * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy 178 * @dev: the PCI device 179 */ 180 void pcie_set_ecrc_checking(struct pci_dev *dev) 181 { 182 switch (ecrc_policy) { 183 case ECRC_POLICY_DEFAULT: 184 return; 185 case ECRC_POLICY_OFF: 186 disable_ecrc_checking(dev); 187 break; 188 case ECRC_POLICY_ON: 189 enable_ecrc_checking(dev); 190 break; 191 default: 192 return; 193 } 194 } 195 196 /** 197 * pcie_ecrc_get_policy - parse kernel command-line ecrc option 198 * @str: ECRC policy from kernel command line to use 199 */ 200 void pcie_ecrc_get_policy(char *str) 201 { 202 int i; 203 204 i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str); 205 if (i < 0) 206 return; 207 208 ecrc_policy = i; 209 } 210 #endif /* CONFIG_PCIE_ECRC */ 211 212 #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ 213 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) 214 215 int pcie_aer_is_native(struct pci_dev *dev) 216 { 217 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); 218 219 if (!dev->aer_cap) 220 return 0; 221 222 return pcie_ports_native || host->native_aer; 223 } 224 225 int pci_enable_pcie_error_reporting(struct pci_dev *dev) 226 { 227 if (!pcie_aer_is_native(dev)) 228 return -EIO; 229 230 return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); 231 } 232 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); 233 234 int pci_disable_pcie_error_reporting(struct pci_dev *dev) 235 { 236 if (!pcie_aer_is_native(dev)) 237 return -EIO; 238 239 return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, 240 PCI_EXP_AER_FLAGS); 241 } 242 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); 243 244 void pci_aer_clear_device_status(struct pci_dev *dev) 245 { 246 u16 sta; 247 248 pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); 249 pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); 250 } 251 252 int pci_aer_clear_nonfatal_status(struct pci_dev *dev) 253 { 254 int aer = dev->aer_cap; 255 u32 status, sev; 256 257 if (!pcie_aer_is_native(dev)) 258 return -EIO; 259 260 /* Clear status bits for ERR_NONFATAL errors only */ 261 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 262 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); 263 status &= ~sev; 264 if (status) 265 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); 266 267 return 0; 268 } 269 EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status); 270 271 void pci_aer_clear_fatal_status(struct pci_dev *dev) 272 { 273 int aer = dev->aer_cap; 274 u32 status, sev; 275 276 if (!pcie_aer_is_native(dev)) 277 return; 278 279 /* Clear status bits for ERR_FATAL errors only */ 280 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 281 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); 282 status &= sev; 283 if (status) 284 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); 285 } 286 287 /** 288 * pci_aer_raw_clear_status - Clear AER error registers. 289 * @dev: the PCI device 290 * 291 * Clearing AER error status registers unconditionally, regardless of 292 * whether they're owned by firmware or the OS. 293 * 294 * Returns 0 on success, or negative on failure. 295 */ 296 int pci_aer_raw_clear_status(struct pci_dev *dev) 297 { 298 int aer = dev->aer_cap; 299 u32 status; 300 int port_type; 301 302 if (!aer) 303 return -EIO; 304 305 port_type = pci_pcie_type(dev); 306 if (port_type == PCI_EXP_TYPE_ROOT_PORT) { 307 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); 308 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); 309 } 310 311 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); 312 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status); 313 314 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 315 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); 316 317 return 0; 318 } 319 320 int pci_aer_clear_status(struct pci_dev *dev) 321 { 322 if (!pcie_aer_is_native(dev)) 323 return -EIO; 324 325 return pci_aer_raw_clear_status(dev); 326 } 327 328 void pci_save_aer_state(struct pci_dev *dev) 329 { 330 int aer = dev->aer_cap; 331 struct pci_cap_saved_state *save_state; 332 u32 *cap; 333 334 if (!aer) 335 return; 336 337 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); 338 if (!save_state) 339 return; 340 341 cap = &save_state->cap.data[0]; 342 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++); 343 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++); 344 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++); 345 pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++); 346 if (pcie_cap_has_rtctl(dev)) 347 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++); 348 } 349 350 void pci_restore_aer_state(struct pci_dev *dev) 351 { 352 int aer = dev->aer_cap; 353 struct pci_cap_saved_state *save_state; 354 u32 *cap; 355 356 if (!aer) 357 return; 358 359 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); 360 if (!save_state) 361 return; 362 363 cap = &save_state->cap.data[0]; 364 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++); 365 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++); 366 pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++); 367 pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++); 368 if (pcie_cap_has_rtctl(dev)) 369 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++); 370 } 371 372 void pci_aer_init(struct pci_dev *dev) 373 { 374 int n; 375 376 dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); 377 if (!dev->aer_cap) 378 return; 379 380 dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); 381 382 /* 383 * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, 384 * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event 385 * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec 386 * 7.8.4). 387 */ 388 n = pcie_cap_has_rtctl(dev) ? 5 : 4; 389 pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); 390 391 pci_aer_clear_status(dev); 392 } 393 394 void pci_aer_exit(struct pci_dev *dev) 395 { 396 kfree(dev->aer_stats); 397 dev->aer_stats = NULL; 398 } 399 400 #define AER_AGENT_RECEIVER 0 401 #define AER_AGENT_REQUESTER 1 402 #define AER_AGENT_COMPLETER 2 403 #define AER_AGENT_TRANSMITTER 3 404 405 #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ 406 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) 407 #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ 408 0 : PCI_ERR_UNC_COMP_ABORT) 409 #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ 410 (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) 411 412 #define AER_GET_AGENT(t, e) \ 413 ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ 414 (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ 415 (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ 416 AER_AGENT_RECEIVER) 417 418 #define AER_PHYSICAL_LAYER_ERROR 0 419 #define AER_DATA_LINK_LAYER_ERROR 1 420 #define AER_TRANSACTION_LAYER_ERROR 2 421 422 #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ 423 PCI_ERR_COR_RCVR : 0) 424 #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ 425 (PCI_ERR_COR_BAD_TLP| \ 426 PCI_ERR_COR_BAD_DLLP| \ 427 PCI_ERR_COR_REP_ROLL| \ 428 PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) 429 430 #define AER_GET_LAYER_ERROR(t, e) \ 431 ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ 432 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ 433 AER_TRANSACTION_LAYER_ERROR) 434 435 /* 436 * AER error strings 437 */ 438 static const char *aer_error_severity_string[] = { 439 "Uncorrected (Non-Fatal)", 440 "Uncorrected (Fatal)", 441 "Corrected" 442 }; 443 444 static const char *aer_error_layer[] = { 445 "Physical Layer", 446 "Data Link Layer", 447 "Transaction Layer" 448 }; 449 450 static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = { 451 "RxErr", /* Bit Position 0 */ 452 NULL, 453 NULL, 454 NULL, 455 NULL, 456 NULL, 457 "BadTLP", /* Bit Position 6 */ 458 "BadDLLP", /* Bit Position 7 */ 459 "Rollover", /* Bit Position 8 */ 460 NULL, 461 NULL, 462 NULL, 463 "Timeout", /* Bit Position 12 */ 464 "NonFatalErr", /* Bit Position 13 */ 465 "CorrIntErr", /* Bit Position 14 */ 466 "HeaderOF", /* Bit Position 15 */ 467 }; 468 469 static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = { 470 "Undefined", /* Bit Position 0 */ 471 NULL, 472 NULL, 473 NULL, 474 "DLP", /* Bit Position 4 */ 475 "SDES", /* Bit Position 5 */ 476 NULL, 477 NULL, 478 NULL, 479 NULL, 480 NULL, 481 NULL, 482 "TLP", /* Bit Position 12 */ 483 "FCP", /* Bit Position 13 */ 484 "CmpltTO", /* Bit Position 14 */ 485 "CmpltAbrt", /* Bit Position 15 */ 486 "UnxCmplt", /* Bit Position 16 */ 487 "RxOF", /* Bit Position 17 */ 488 "MalfTLP", /* Bit Position 18 */ 489 "ECRC", /* Bit Position 19 */ 490 "UnsupReq", /* Bit Position 20 */ 491 "ACSViol", /* Bit Position 21 */ 492 "UncorrIntErr", /* Bit Position 22 */ 493 "BlockedTLP", /* Bit Position 23 */ 494 "AtomicOpBlocked", /* Bit Position 24 */ 495 "TLPBlockedErr", /* Bit Position 25 */ 496 "PoisonTLPBlocked", /* Bit Position 26 */ 497 }; 498 499 static const char *aer_agent_string[] = { 500 "Receiver ID", 501 "Requester ID", 502 "Completer ID", 503 "Transmitter ID" 504 }; 505 506 #define aer_stats_dev_attr(name, stats_array, strings_array, \ 507 total_string, total_field) \ 508 static ssize_t \ 509 name##_show(struct device *dev, struct device_attribute *attr, \ 510 char *buf) \ 511 { \ 512 unsigned int i; \ 513 char *str = buf; \ 514 struct pci_dev *pdev = to_pci_dev(dev); \ 515 u64 *stats = pdev->aer_stats->stats_array; \ 516 \ 517 for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ 518 if (strings_array[i]) \ 519 str += sprintf(str, "%s %llu\n", \ 520 strings_array[i], stats[i]); \ 521 else if (stats[i]) \ 522 str += sprintf(str, #stats_array "_bit[%d] %llu\n",\ 523 i, stats[i]); \ 524 } \ 525 str += sprintf(str, "TOTAL_%s %llu\n", total_string, \ 526 pdev->aer_stats->total_field); \ 527 return str-buf; \ 528 } \ 529 static DEVICE_ATTR_RO(name) 530 531 aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, 532 aer_correctable_error_string, "ERR_COR", 533 dev_total_cor_errs); 534 aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, 535 aer_uncorrectable_error_string, "ERR_FATAL", 536 dev_total_fatal_errs); 537 aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, 538 aer_uncorrectable_error_string, "ERR_NONFATAL", 539 dev_total_nonfatal_errs); 540 541 #define aer_stats_rootport_attr(name, field) \ 542 static ssize_t \ 543 name##_show(struct device *dev, struct device_attribute *attr, \ 544 char *buf) \ 545 { \ 546 struct pci_dev *pdev = to_pci_dev(dev); \ 547 return sprintf(buf, "%llu\n", pdev->aer_stats->field); \ 548 } \ 549 static DEVICE_ATTR_RO(name) 550 551 aer_stats_rootport_attr(aer_rootport_total_err_cor, 552 rootport_total_cor_errs); 553 aer_stats_rootport_attr(aer_rootport_total_err_fatal, 554 rootport_total_fatal_errs); 555 aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, 556 rootport_total_nonfatal_errs); 557 558 static struct attribute *aer_stats_attrs[] __ro_after_init = { 559 &dev_attr_aer_dev_correctable.attr, 560 &dev_attr_aer_dev_fatal.attr, 561 &dev_attr_aer_dev_nonfatal.attr, 562 &dev_attr_aer_rootport_total_err_cor.attr, 563 &dev_attr_aer_rootport_total_err_fatal.attr, 564 &dev_attr_aer_rootport_total_err_nonfatal.attr, 565 NULL 566 }; 567 568 static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, 569 struct attribute *a, int n) 570 { 571 struct device *dev = kobj_to_dev(kobj); 572 struct pci_dev *pdev = to_pci_dev(dev); 573 574 if (!pdev->aer_stats) 575 return 0; 576 577 if ((a == &dev_attr_aer_rootport_total_err_cor.attr || 578 a == &dev_attr_aer_rootport_total_err_fatal.attr || 579 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && 580 pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) 581 return 0; 582 583 return a->mode; 584 } 585 586 const struct attribute_group aer_stats_attr_group = { 587 .attrs = aer_stats_attrs, 588 .is_visible = aer_stats_attrs_are_visible, 589 }; 590 591 static void pci_dev_aer_stats_incr(struct pci_dev *pdev, 592 struct aer_err_info *info) 593 { 594 unsigned long status = info->status & ~info->mask; 595 int i, max = -1; 596 u64 *counter = NULL; 597 struct aer_stats *aer_stats = pdev->aer_stats; 598 599 if (!aer_stats) 600 return; 601 602 switch (info->severity) { 603 case AER_CORRECTABLE: 604 aer_stats->dev_total_cor_errs++; 605 counter = &aer_stats->dev_cor_errs[0]; 606 max = AER_MAX_TYPEOF_COR_ERRS; 607 break; 608 case AER_NONFATAL: 609 aer_stats->dev_total_nonfatal_errs++; 610 counter = &aer_stats->dev_nonfatal_errs[0]; 611 max = AER_MAX_TYPEOF_UNCOR_ERRS; 612 break; 613 case AER_FATAL: 614 aer_stats->dev_total_fatal_errs++; 615 counter = &aer_stats->dev_fatal_errs[0]; 616 max = AER_MAX_TYPEOF_UNCOR_ERRS; 617 break; 618 } 619 620 for_each_set_bit(i, &status, max) 621 counter[i]++; 622 } 623 624 static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, 625 struct aer_err_source *e_src) 626 { 627 struct aer_stats *aer_stats = pdev->aer_stats; 628 629 if (!aer_stats) 630 return; 631 632 if (e_src->status & PCI_ERR_ROOT_COR_RCV) 633 aer_stats->rootport_total_cor_errs++; 634 635 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { 636 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) 637 aer_stats->rootport_total_fatal_errs++; 638 else 639 aer_stats->rootport_total_nonfatal_errs++; 640 } 641 } 642 643 static void __print_tlp_header(struct pci_dev *dev, 644 struct aer_header_log_regs *t) 645 { 646 pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", 647 t->dw0, t->dw1, t->dw2, t->dw3); 648 } 649 650 static void __aer_print_error(struct pci_dev *dev, 651 struct aer_err_info *info) 652 { 653 unsigned long status = info->status & ~info->mask; 654 const char *errmsg = NULL; 655 int i; 656 657 for_each_set_bit(i, &status, 32) { 658 if (info->severity == AER_CORRECTABLE) 659 errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ? 660 aer_correctable_error_string[i] : NULL; 661 else 662 errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? 663 aer_uncorrectable_error_string[i] : NULL; 664 665 if (errmsg) 666 pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, 667 info->first_error == i ? " (First)" : ""); 668 else 669 pci_err(dev, " [%2d] Unknown Error Bit%s\n", 670 i, info->first_error == i ? " (First)" : ""); 671 } 672 pci_dev_aer_stats_incr(dev, info); 673 } 674 675 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 676 { 677 int layer, agent; 678 int id = ((dev->bus->number << 8) | dev->devfn); 679 680 if (!info->status) { 681 pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", 682 aer_error_severity_string[info->severity]); 683 goto out; 684 } 685 686 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 687 agent = AER_GET_AGENT(info->severity, info->status); 688 689 pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", 690 aer_error_severity_string[info->severity], 691 aer_error_layer[layer], aer_agent_string[agent]); 692 693 pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", 694 dev->vendor, dev->device, 695 info->status, info->mask); 696 697 __aer_print_error(dev, info); 698 699 if (info->tlp_header_valid) 700 __print_tlp_header(dev, &info->tlp); 701 702 out: 703 if (info->id && info->error_dev_num > 1 && info->id == id) 704 pci_err(dev, " Error of this Agent is reported first\n"); 705 706 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), 707 info->severity, info->tlp_header_valid, &info->tlp); 708 } 709 710 static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 711 { 712 u8 bus = info->id >> 8; 713 u8 devfn = info->id & 0xff; 714 715 pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", 716 info->multi_error_valid ? "Multiple " : "", 717 aer_error_severity_string[info->severity], 718 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), 719 PCI_FUNC(devfn)); 720 } 721 722 #ifdef CONFIG_ACPI_APEI_PCIEAER 723 int cper_severity_to_aer(int cper_severity) 724 { 725 switch (cper_severity) { 726 case CPER_SEV_RECOVERABLE: 727 return AER_NONFATAL; 728 case CPER_SEV_FATAL: 729 return AER_FATAL; 730 default: 731 return AER_CORRECTABLE; 732 } 733 } 734 EXPORT_SYMBOL_GPL(cper_severity_to_aer); 735 736 void cper_print_aer(struct pci_dev *dev, int aer_severity, 737 struct aer_capability_regs *aer) 738 { 739 int layer, agent, tlp_header_valid = 0; 740 u32 status, mask; 741 struct aer_err_info info; 742 743 if (aer_severity == AER_CORRECTABLE) { 744 status = aer->cor_status; 745 mask = aer->cor_mask; 746 } else { 747 status = aer->uncor_status; 748 mask = aer->uncor_mask; 749 tlp_header_valid = status & AER_LOG_TLP_MASKS; 750 } 751 752 layer = AER_GET_LAYER_ERROR(aer_severity, status); 753 agent = AER_GET_AGENT(aer_severity, status); 754 755 memset(&info, 0, sizeof(info)); 756 info.severity = aer_severity; 757 info.status = status; 758 info.mask = mask; 759 info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); 760 761 pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); 762 __aer_print_error(dev, &info); 763 pci_err(dev, "aer_layer=%s, aer_agent=%s\n", 764 aer_error_layer[layer], aer_agent_string[agent]); 765 766 if (aer_severity != AER_CORRECTABLE) 767 pci_err(dev, "aer_uncor_severity: 0x%08x\n", 768 aer->uncor_severity); 769 770 if (tlp_header_valid) 771 __print_tlp_header(dev, &aer->header_log); 772 773 trace_aer_event(dev_name(&dev->dev), (status & ~mask), 774 aer_severity, tlp_header_valid, &aer->header_log); 775 } 776 #endif 777 778 /** 779 * add_error_device - list device to be handled 780 * @e_info: pointer to error info 781 * @dev: pointer to pci_dev to be added 782 */ 783 static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) 784 { 785 if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { 786 e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); 787 e_info->error_dev_num++; 788 return 0; 789 } 790 return -ENOSPC; 791 } 792 793 /** 794 * is_error_source - check whether the device is source of reported error 795 * @dev: pointer to pci_dev to be checked 796 * @e_info: pointer to reported error info 797 */ 798 static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) 799 { 800 int aer = dev->aer_cap; 801 u32 status, mask; 802 u16 reg16; 803 804 /* 805 * When bus id is equal to 0, it might be a bad id 806 * reported by root port. 807 */ 808 if ((PCI_BUS_NUM(e_info->id) != 0) && 809 !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { 810 /* Device ID match? */ 811 if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) 812 return true; 813 814 /* Continue id comparing if there is no multiple error */ 815 if (!e_info->multi_error_valid) 816 return false; 817 } 818 819 /* 820 * When either 821 * 1) bus id is equal to 0. Some ports might lose the bus 822 * id of error source id; 823 * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set 824 * 3) There are multiple errors and prior ID comparing fails; 825 * We check AER status registers to find possible reporter. 826 */ 827 if (atomic_read(&dev->enable_cnt) == 0) 828 return false; 829 830 /* Check if AER is enabled */ 831 pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); 832 if (!(reg16 & PCI_EXP_AER_FLAGS)) 833 return false; 834 835 if (!aer) 836 return false; 837 838 /* Check if error is recorded */ 839 if (e_info->severity == AER_CORRECTABLE) { 840 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); 841 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); 842 } else { 843 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); 844 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); 845 } 846 if (status & ~mask) 847 return true; 848 849 return false; 850 } 851 852 static int find_device_iter(struct pci_dev *dev, void *data) 853 { 854 struct aer_err_info *e_info = (struct aer_err_info *)data; 855 856 if (is_error_source(dev, e_info)) { 857 /* List this device */ 858 if (add_error_device(e_info, dev)) { 859 /* We cannot handle more... Stop iteration */ 860 /* TODO: Should print error message here? */ 861 return 1; 862 } 863 864 /* If there is only a single error, stop iteration */ 865 if (!e_info->multi_error_valid) 866 return 1; 867 } 868 return 0; 869 } 870 871 /** 872 * find_source_device - search through device hierarchy for source device 873 * @parent: pointer to Root Port pci_dev data structure 874 * @e_info: including detailed error information such like id 875 * 876 * Return true if found. 877 * 878 * Invoked by DPC when error is detected at the Root Port. 879 * Caller of this function must set id, severity, and multi_error_valid of 880 * struct aer_err_info pointed by @e_info properly. This function must fill 881 * e_info->error_dev_num and e_info->dev[], based on the given information. 882 */ 883 static bool find_source_device(struct pci_dev *parent, 884 struct aer_err_info *e_info) 885 { 886 struct pci_dev *dev = parent; 887 int result; 888 889 /* Must reset in this function */ 890 e_info->error_dev_num = 0; 891 892 /* Is Root Port an agent that sends error message? */ 893 result = find_device_iter(dev, e_info); 894 if (result) 895 return true; 896 897 pci_walk_bus(parent->subordinate, find_device_iter, e_info); 898 899 if (!e_info->error_dev_num) { 900 pci_info(parent, "can't find device of ID%04x\n", e_info->id); 901 return false; 902 } 903 return true; 904 } 905 906 /** 907 * handle_error_source - handle logging error into an event log 908 * @dev: pointer to pci_dev data structure of error source device 909 * @info: comprehensive error information 910 * 911 * Invoked when an error being detected by Root Port. 912 */ 913 static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) 914 { 915 int aer = dev->aer_cap; 916 917 if (info->severity == AER_CORRECTABLE) { 918 /* 919 * Correctable error does not need software intervention. 920 * No need to go through error recovery process. 921 */ 922 if (aer) 923 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, 924 info->status); 925 pci_aer_clear_device_status(dev); 926 } else if (info->severity == AER_NONFATAL) 927 pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); 928 else if (info->severity == AER_FATAL) 929 pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); 930 pci_dev_put(dev); 931 } 932 933 #ifdef CONFIG_ACPI_APEI_PCIEAER 934 935 #define AER_RECOVER_RING_ORDER 4 936 #define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) 937 938 struct aer_recover_entry { 939 u8 bus; 940 u8 devfn; 941 u16 domain; 942 int severity; 943 struct aer_capability_regs *regs; 944 }; 945 946 static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, 947 AER_RECOVER_RING_SIZE); 948 949 static void aer_recover_work_func(struct work_struct *work) 950 { 951 struct aer_recover_entry entry; 952 struct pci_dev *pdev; 953 954 while (kfifo_get(&aer_recover_ring, &entry)) { 955 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, 956 entry.devfn); 957 if (!pdev) { 958 pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", 959 entry.domain, entry.bus, 960 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); 961 continue; 962 } 963 cper_print_aer(pdev, entry.severity, entry.regs); 964 if (entry.severity == AER_NONFATAL) 965 pcie_do_recovery(pdev, pci_channel_io_normal, 966 aer_root_reset); 967 else if (entry.severity == AER_FATAL) 968 pcie_do_recovery(pdev, pci_channel_io_frozen, 969 aer_root_reset); 970 pci_dev_put(pdev); 971 } 972 } 973 974 /* 975 * Mutual exclusion for writers of aer_recover_ring, reader side don't 976 * need lock, because there is only one reader and lock is not needed 977 * between reader and writer. 978 */ 979 static DEFINE_SPINLOCK(aer_recover_ring_lock); 980 static DECLARE_WORK(aer_recover_work, aer_recover_work_func); 981 982 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, 983 int severity, struct aer_capability_regs *aer_regs) 984 { 985 struct aer_recover_entry entry = { 986 .bus = bus, 987 .devfn = devfn, 988 .domain = domain, 989 .severity = severity, 990 .regs = aer_regs, 991 }; 992 993 if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1, 994 &aer_recover_ring_lock)) 995 schedule_work(&aer_recover_work); 996 else 997 pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", 998 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 999 } 1000 EXPORT_SYMBOL_GPL(aer_recover_queue); 1001 #endif 1002 1003 /** 1004 * aer_get_device_error_info - read error status from dev and store it to info 1005 * @dev: pointer to the device expected to have a error record 1006 * @info: pointer to structure to store the error record 1007 * 1008 * Return 1 on success, 0 on error. 1009 * 1010 * Note that @info is reused among all error devices. Clear fields properly. 1011 */ 1012 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) 1013 { 1014 int aer = dev->aer_cap; 1015 int temp; 1016 1017 /* Must reset in this function */ 1018 info->status = 0; 1019 info->tlp_header_valid = 0; 1020 1021 /* The device might not support AER */ 1022 if (!aer) 1023 return 0; 1024 1025 if (info->severity == AER_CORRECTABLE) { 1026 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, 1027 &info->status); 1028 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, 1029 &info->mask); 1030 if (!(info->status & ~info->mask)) 1031 return 0; 1032 } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || 1033 pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || 1034 info->severity == AER_NONFATAL) { 1035 1036 /* Link is still healthy for IO reads */ 1037 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, 1038 &info->status); 1039 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, 1040 &info->mask); 1041 if (!(info->status & ~info->mask)) 1042 return 0; 1043 1044 /* Get First Error Pointer */ 1045 pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); 1046 info->first_error = PCI_ERR_CAP_FEP(temp); 1047 1048 if (info->status & AER_LOG_TLP_MASKS) { 1049 info->tlp_header_valid = 1; 1050 pci_read_config_dword(dev, 1051 aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); 1052 pci_read_config_dword(dev, 1053 aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); 1054 pci_read_config_dword(dev, 1055 aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); 1056 pci_read_config_dword(dev, 1057 aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); 1058 } 1059 } 1060 1061 return 1; 1062 } 1063 1064 static inline void aer_process_err_devices(struct aer_err_info *e_info) 1065 { 1066 int i; 1067 1068 /* Report all before handle them, not to lost records by reset etc. */ 1069 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1070 if (aer_get_device_error_info(e_info->dev[i], e_info)) 1071 aer_print_error(e_info->dev[i], e_info); 1072 } 1073 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1074 if (aer_get_device_error_info(e_info->dev[i], e_info)) 1075 handle_error_source(e_info->dev[i], e_info); 1076 } 1077 } 1078 1079 /** 1080 * aer_isr_one_error - consume an error detected by root port 1081 * @rpc: pointer to the root port which holds an error 1082 * @e_src: pointer to an error source 1083 */ 1084 static void aer_isr_one_error(struct aer_rpc *rpc, 1085 struct aer_err_source *e_src) 1086 { 1087 struct pci_dev *pdev = rpc->rpd; 1088 struct aer_err_info e_info; 1089 1090 pci_rootport_aer_stats_incr(pdev, e_src); 1091 1092 /* 1093 * There is a possibility that both correctable error and 1094 * uncorrectable error being logged. Report correctable error first. 1095 */ 1096 if (e_src->status & PCI_ERR_ROOT_COR_RCV) { 1097 e_info.id = ERR_COR_ID(e_src->id); 1098 e_info.severity = AER_CORRECTABLE; 1099 1100 if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) 1101 e_info.multi_error_valid = 1; 1102 else 1103 e_info.multi_error_valid = 0; 1104 aer_print_port_info(pdev, &e_info); 1105 1106 if (find_source_device(pdev, &e_info)) 1107 aer_process_err_devices(&e_info); 1108 } 1109 1110 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { 1111 e_info.id = ERR_UNCOR_ID(e_src->id); 1112 1113 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) 1114 e_info.severity = AER_FATAL; 1115 else 1116 e_info.severity = AER_NONFATAL; 1117 1118 if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) 1119 e_info.multi_error_valid = 1; 1120 else 1121 e_info.multi_error_valid = 0; 1122 1123 aer_print_port_info(pdev, &e_info); 1124 1125 if (find_source_device(pdev, &e_info)) 1126 aer_process_err_devices(&e_info); 1127 } 1128 } 1129 1130 /** 1131 * aer_isr - consume errors detected by root port 1132 * @irq: IRQ assigned to Root Port 1133 * @context: pointer to Root Port data structure 1134 * 1135 * Invoked, as DPC, when root port records new detected error 1136 */ 1137 static irqreturn_t aer_isr(int irq, void *context) 1138 { 1139 struct pcie_device *dev = (struct pcie_device *)context; 1140 struct aer_rpc *rpc = get_service_data(dev); 1141 struct aer_err_source uninitialized_var(e_src); 1142 1143 if (kfifo_is_empty(&rpc->aer_fifo)) 1144 return IRQ_NONE; 1145 1146 while (kfifo_get(&rpc->aer_fifo, &e_src)) 1147 aer_isr_one_error(rpc, &e_src); 1148 return IRQ_HANDLED; 1149 } 1150 1151 /** 1152 * aer_irq - Root Port's ISR 1153 * @irq: IRQ assigned to Root Port 1154 * @context: pointer to Root Port data structure 1155 * 1156 * Invoked when Root Port detects AER messages. 1157 */ 1158 static irqreturn_t aer_irq(int irq, void *context) 1159 { 1160 struct pcie_device *pdev = (struct pcie_device *)context; 1161 struct aer_rpc *rpc = get_service_data(pdev); 1162 struct pci_dev *rp = rpc->rpd; 1163 int aer = rp->aer_cap; 1164 struct aer_err_source e_src = {}; 1165 1166 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status); 1167 if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) 1168 return IRQ_NONE; 1169 1170 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id); 1171 pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status); 1172 1173 if (!kfifo_put(&rpc->aer_fifo, e_src)) 1174 return IRQ_HANDLED; 1175 1176 return IRQ_WAKE_THREAD; 1177 } 1178 1179 static int set_device_error_reporting(struct pci_dev *dev, void *data) 1180 { 1181 bool enable = *((bool *)data); 1182 int type = pci_pcie_type(dev); 1183 1184 if ((type == PCI_EXP_TYPE_ROOT_PORT) || 1185 (type == PCI_EXP_TYPE_UPSTREAM) || 1186 (type == PCI_EXP_TYPE_DOWNSTREAM)) { 1187 if (enable) 1188 pci_enable_pcie_error_reporting(dev); 1189 else 1190 pci_disable_pcie_error_reporting(dev); 1191 } 1192 1193 if (enable) 1194 pcie_set_ecrc_checking(dev); 1195 1196 return 0; 1197 } 1198 1199 /** 1200 * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. 1201 * @dev: pointer to root port's pci_dev data structure 1202 * @enable: true = enable error reporting, false = disable error reporting. 1203 */ 1204 static void set_downstream_devices_error_reporting(struct pci_dev *dev, 1205 bool enable) 1206 { 1207 set_device_error_reporting(dev, &enable); 1208 1209 if (!dev->subordinate) 1210 return; 1211 pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); 1212 } 1213 1214 /** 1215 * aer_enable_rootport - enable Root Port's interrupts when receiving messages 1216 * @rpc: pointer to a Root Port data structure 1217 * 1218 * Invoked when PCIe bus loads AER service driver. 1219 */ 1220 static void aer_enable_rootport(struct aer_rpc *rpc) 1221 { 1222 struct pci_dev *pdev = rpc->rpd; 1223 int aer = pdev->aer_cap; 1224 u16 reg16; 1225 u32 reg32; 1226 1227 /* Clear PCIe Capability's Device Status */ 1228 pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); 1229 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); 1230 1231 /* Disable system error generation in response to error messages */ 1232 pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, 1233 SYSTEM_ERROR_INTR_ON_MESG_MASK); 1234 1235 /* Clear error status */ 1236 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); 1237 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); 1238 pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32); 1239 pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32); 1240 pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32); 1241 pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32); 1242 1243 /* 1244 * Enable error reporting for the root port device and downstream port 1245 * devices. 1246 */ 1247 set_downstream_devices_error_reporting(pdev, true); 1248 1249 /* Enable Root Port's interrupt in response to error messages */ 1250 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); 1251 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; 1252 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); 1253 } 1254 1255 /** 1256 * aer_disable_rootport - disable Root Port's interrupts when receiving messages 1257 * @rpc: pointer to a Root Port data structure 1258 * 1259 * Invoked when PCIe bus unloads AER service driver. 1260 */ 1261 static void aer_disable_rootport(struct aer_rpc *rpc) 1262 { 1263 struct pci_dev *pdev = rpc->rpd; 1264 int aer = pdev->aer_cap; 1265 u32 reg32; 1266 1267 /* 1268 * Disable error reporting for the root port device and downstream port 1269 * devices. 1270 */ 1271 set_downstream_devices_error_reporting(pdev, false); 1272 1273 /* Disable Root's interrupt in response to error messages */ 1274 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); 1275 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; 1276 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); 1277 1278 /* Clear Root's error status reg */ 1279 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); 1280 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); 1281 } 1282 1283 /** 1284 * aer_remove - clean up resources 1285 * @dev: pointer to the pcie_dev data structure 1286 * 1287 * Invoked when PCI Express bus unloads or AER probe fails. 1288 */ 1289 static void aer_remove(struct pcie_device *dev) 1290 { 1291 struct aer_rpc *rpc = get_service_data(dev); 1292 1293 aer_disable_rootport(rpc); 1294 } 1295 1296 /** 1297 * aer_probe - initialize resources 1298 * @dev: pointer to the pcie_dev data structure 1299 * 1300 * Invoked when PCI Express bus loads AER service driver. 1301 */ 1302 static int aer_probe(struct pcie_device *dev) 1303 { 1304 int status; 1305 struct aer_rpc *rpc; 1306 struct device *device = &dev->device; 1307 struct pci_dev *port = dev->port; 1308 1309 rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); 1310 if (!rpc) 1311 return -ENOMEM; 1312 1313 rpc->rpd = port; 1314 INIT_KFIFO(rpc->aer_fifo); 1315 set_service_data(dev, rpc); 1316 1317 status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr, 1318 IRQF_SHARED, "aerdrv", dev); 1319 if (status) { 1320 pci_err(port, "request AER IRQ %d failed\n", dev->irq); 1321 return status; 1322 } 1323 1324 aer_enable_rootport(rpc); 1325 pci_info(port, "enabled with IRQ %d\n", dev->irq); 1326 return 0; 1327 } 1328 1329 /** 1330 * aer_root_reset - reset link on Root Port 1331 * @dev: pointer to Root Port's pci_dev data structure 1332 * 1333 * Invoked by Port Bus driver when performing link reset at Root Port. 1334 */ 1335 static pci_ers_result_t aer_root_reset(struct pci_dev *dev) 1336 { 1337 int aer = dev->aer_cap; 1338 u32 reg32; 1339 int rc; 1340 1341 1342 /* Disable Root's interrupt in response to error messages */ 1343 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); 1344 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; 1345 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); 1346 1347 rc = pci_bus_error_reset(dev); 1348 pci_info(dev, "Root Port link has been reset\n"); 1349 1350 /* Clear Root Error Status */ 1351 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); 1352 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); 1353 1354 /* Enable Root Port's interrupt in response to error messages */ 1355 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); 1356 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; 1357 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); 1358 1359 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 1360 } 1361 1362 static struct pcie_port_service_driver aerdriver = { 1363 .name = "aer", 1364 .port_type = PCI_EXP_TYPE_ROOT_PORT, 1365 .service = PCIE_PORT_SERVICE_AER, 1366 1367 .probe = aer_probe, 1368 .remove = aer_remove, 1369 }; 1370 1371 /** 1372 * aer_service_init - register AER root service driver 1373 * 1374 * Invoked when AER root service driver is loaded. 1375 */ 1376 int __init pcie_aer_init(void) 1377 { 1378 if (!pci_aer_available()) 1379 return -ENXIO; 1380 return pcie_port_service_register(&aerdriver); 1381 } 1382