1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/proc_fs.h> 31 #include <linux/rbtree.h> 32 #include <linux/reboot.h> 33 #include <linux/seq_file.h> 34 #include <linux/spinlock.h> 35 #include <linux/export.h> 36 #include <linux/of.h> 37 38 #include <linux/atomic.h> 39 #include <asm/debug.h> 40 #include <asm/eeh.h> 41 #include <asm/eeh_event.h> 42 #include <asm/io.h> 43 #include <asm/machdep.h> 44 #include <asm/ppc-pci.h> 45 #include <asm/rtas.h> 46 47 48 /** Overview: 49 * EEH, or "Extended Error Handling" is a PCI bridge technology for 50 * dealing with PCI bus errors that can't be dealt with within the 51 * usual PCI framework, except by check-stopping the CPU. Systems 52 * that are designed for high-availability/reliability cannot afford 53 * to crash due to a "mere" PCI error, thus the need for EEH. 54 * An EEH-capable bridge operates by converting a detected error 55 * into a "slot freeze", taking the PCI adapter off-line, making 56 * the slot behave, from the OS'es point of view, as if the slot 57 * were "empty": all reads return 0xff's and all writes are silently 58 * ignored. EEH slot isolation events can be triggered by parity 59 * errors on the address or data busses (e.g. during posted writes), 60 * which in turn might be caused by low voltage on the bus, dust, 61 * vibration, humidity, radioactivity or plain-old failed hardware. 62 * 63 * Note, however, that one of the leading causes of EEH slot 64 * freeze events are buggy device drivers, buggy device microcode, 65 * or buggy device hardware. This is because any attempt by the 66 * device to bus-master data to a memory address that is not 67 * assigned to the device will trigger a slot freeze. (The idea 68 * is to prevent devices-gone-wild from corrupting system memory). 69 * Buggy hardware/drivers will have a miserable time co-existing 70 * with EEH. 71 * 72 * Ideally, a PCI device driver, when suspecting that an isolation 73 * event has occurred (e.g. by reading 0xff's), will then ask EEH 74 * whether this is the case, and then take appropriate steps to 75 * reset the PCI slot, the PCI device, and then resume operations. 76 * However, until that day, the checking is done here, with the 77 * eeh_check_failure() routine embedded in the MMIO macros. If 78 * the slot is found to be isolated, an "EEH Event" is synthesized 79 * and sent out for processing. 80 */ 81 82 /* If a device driver keeps reading an MMIO register in an interrupt 83 * handler after a slot isolation event, it might be broken. 84 * This sets the threshold for how many read attempts we allow 85 * before printing an error message. 86 */ 87 #define EEH_MAX_FAILS 2100000 88 89 /* Time to wait for a PCI slot to report status, in milliseconds */ 90 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 91 92 /* 93 * EEH probe mode support, which is part of the flags, 94 * is to support multiple platforms for EEH. Some platforms 95 * like pSeries do PCI emunation based on device tree. 96 * However, other platforms like powernv probe PCI devices 97 * from hardware. The flag is used to distinguish that. 98 * In addition, struct eeh_ops::probe would be invoked for 99 * particular OF node or PCI device so that the corresponding 100 * PE would be created there. 101 */ 102 int eeh_subsystem_flags; 103 EXPORT_SYMBOL(eeh_subsystem_flags); 104 105 /* Platform dependent EEH operations */ 106 struct eeh_ops *eeh_ops = NULL; 107 108 /* Lock to avoid races due to multiple reports of an error */ 109 DEFINE_RAW_SPINLOCK(confirm_error_lock); 110 111 /* Buffer for reporting pci register dumps. Its here in BSS, and 112 * not dynamically alloced, so that it ends up in RMO where RTAS 113 * can access it. 114 */ 115 #define EEH_PCI_REGS_LOG_LEN 4096 116 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 117 118 /* 119 * The struct is used to maintain the EEH global statistic 120 * information. Besides, the EEH global statistics will be 121 * exported to user space through procfs 122 */ 123 struct eeh_stats { 124 u64 no_device; /* PCI device not found */ 125 u64 no_dn; /* OF node not found */ 126 u64 no_cfg_addr; /* Config address not found */ 127 u64 ignored_check; /* EEH check skipped */ 128 u64 total_mmio_ffs; /* Total EEH checks */ 129 u64 false_positives; /* Unnecessary EEH checks */ 130 u64 slot_resets; /* PE reset */ 131 }; 132 133 static struct eeh_stats eeh_stats; 134 135 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 136 137 static int __init eeh_setup(char *str) 138 { 139 if (!strcmp(str, "off")) 140 eeh_subsystem_flags |= EEH_FORCE_DISABLED; 141 142 return 1; 143 } 144 __setup("eeh=", eeh_setup); 145 146 /** 147 * eeh_gather_pci_data - Copy assorted PCI config space registers to buff 148 * @edev: device to report data for 149 * @buf: point to buffer in which to log 150 * @len: amount of room in buffer 151 * 152 * This routine captures assorted PCI configuration space data, 153 * and puts them into a buffer for RTAS error logging. 154 */ 155 static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) 156 { 157 struct device_node *dn = eeh_dev_to_of_node(edev); 158 u32 cfg; 159 int cap, i; 160 int n = 0; 161 162 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); 163 pr_warn("EEH: of node=%s\n", dn->full_name); 164 165 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); 166 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 167 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 168 169 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); 170 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 171 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 172 173 /* Gather bridge-specific registers */ 174 if (edev->mode & EEH_DEV_BRIDGE) { 175 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); 176 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 177 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 178 179 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); 180 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 181 pr_warn("EEH: Bridge control: %04x\n", cfg); 182 } 183 184 /* Dump out the PCI-X command and status regs */ 185 cap = edev->pcix_cap; 186 if (cap) { 187 eeh_ops->read_config(dn, cap, 4, &cfg); 188 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 189 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 190 191 eeh_ops->read_config(dn, cap+4, 4, &cfg); 192 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 193 pr_warn("EEH: PCI-X status: %08x\n", cfg); 194 } 195 196 /* If PCI-E capable, dump PCI-E cap 10 */ 197 cap = edev->pcie_cap; 198 if (cap) { 199 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 200 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 201 202 for (i=0; i<=8; i++) { 203 eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 204 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 205 pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); 206 } 207 } 208 209 /* If AER capable, dump it */ 210 cap = edev->aer_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 213 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 214 215 for (i=0; i<14; i++) { 216 eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); 219 } 220 } 221 222 return n; 223 } 224 225 /** 226 * eeh_slot_error_detail - Generate combined log including driver log and error log 227 * @pe: EEH PE 228 * @severity: temporary or permanent error log 229 * 230 * This routine should be called to generate the combined log, which 231 * is comprised of driver log and error log. The driver log is figured 232 * out from the config space of the corresponding PCI device, while 233 * the error log is fetched through platform dependent function call. 234 */ 235 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 236 { 237 size_t loglen = 0; 238 struct eeh_dev *edev, *tmp; 239 240 /* 241 * When the PHB is fenced or dead, it's pointless to collect 242 * the data from PCI config space because it should return 243 * 0xFF's. For ER, we still retrieve the data from the PCI 244 * config space. 245 * 246 * For pHyp, we have to enable IO for log retrieval. Otherwise, 247 * 0xFF's is always returned from PCI config space. 248 */ 249 if (!(pe->type & EEH_PE_PHB)) { 250 if (eeh_probe_mode_devtree()) 251 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 252 eeh_ops->configure_bridge(pe); 253 eeh_pe_restore_bars(pe); 254 255 pci_regs_buf[0] = 0; 256 eeh_pe_for_each_dev(pe, edev, tmp) { 257 loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, 258 EEH_PCI_REGS_LOG_LEN - loglen); 259 } 260 } 261 262 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 263 } 264 265 /** 266 * eeh_token_to_phys - Convert EEH address token to phys address 267 * @token: I/O token, should be address in the form 0xA.... 268 * 269 * This routine should be called to convert virtual I/O address 270 * to physical one. 271 */ 272 static inline unsigned long eeh_token_to_phys(unsigned long token) 273 { 274 pte_t *ptep; 275 unsigned long pa; 276 int hugepage_shift; 277 278 /* 279 * We won't find hugepages here, iomem 280 */ 281 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 282 if (!ptep) 283 return token; 284 WARN_ON(hugepage_shift); 285 pa = pte_pfn(*ptep) << PAGE_SHIFT; 286 287 return pa | (token & (PAGE_SIZE-1)); 288 } 289 290 /* 291 * On PowerNV platform, we might already have fenced PHB there. 292 * For that case, it's meaningless to recover frozen PE. Intead, 293 * We have to handle fenced PHB firstly. 294 */ 295 static int eeh_phb_check_failure(struct eeh_pe *pe) 296 { 297 struct eeh_pe *phb_pe; 298 unsigned long flags; 299 int ret; 300 301 if (!eeh_probe_mode_dev()) 302 return -EPERM; 303 304 /* Find the PHB PE */ 305 phb_pe = eeh_phb_pe_get(pe->phb); 306 if (!phb_pe) { 307 pr_warning("%s Can't find PE for PHB#%d\n", 308 __func__, pe->phb->global_number); 309 return -EEXIST; 310 } 311 312 /* If the PHB has been in problematic state */ 313 eeh_serialize_lock(&flags); 314 if (phb_pe->state & EEH_PE_ISOLATED) { 315 ret = 0; 316 goto out; 317 } 318 319 /* Check PHB state */ 320 ret = eeh_ops->get_state(phb_pe, NULL); 321 if ((ret < 0) || 322 (ret == EEH_STATE_NOT_SUPPORT) || 323 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 324 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 325 ret = 0; 326 goto out; 327 } 328 329 /* Isolate the PHB and send event */ 330 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 331 eeh_serialize_unlock(flags); 332 333 pr_err("EEH: PHB#%x failure detected, location: %s\n", 334 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 335 dump_stack(); 336 eeh_send_failure_event(phb_pe); 337 338 return 1; 339 out: 340 eeh_serialize_unlock(flags); 341 return ret; 342 } 343 344 /** 345 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 346 * @edev: eeh device 347 * 348 * Check for an EEH failure for the given device node. Call this 349 * routine if the result of a read was all 0xff's and you want to 350 * find out if this is due to an EEH slot freeze. This routine 351 * will query firmware for the EEH status. 352 * 353 * Returns 0 if there has not been an EEH error; otherwise returns 354 * a non-zero value and queues up a slot isolation event notification. 355 * 356 * It is safe to call this routine in an interrupt context. 357 */ 358 int eeh_dev_check_failure(struct eeh_dev *edev) 359 { 360 int ret; 361 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 362 unsigned long flags; 363 struct device_node *dn; 364 struct pci_dev *dev; 365 struct eeh_pe *pe, *parent_pe, *phb_pe; 366 int rc = 0; 367 const char *location; 368 369 eeh_stats.total_mmio_ffs++; 370 371 if (!eeh_enabled()) 372 return 0; 373 374 if (!edev) { 375 eeh_stats.no_dn++; 376 return 0; 377 } 378 dn = eeh_dev_to_of_node(edev); 379 dev = eeh_dev_to_pci_dev(edev); 380 pe = edev->pe; 381 382 /* Access to IO BARs might get this far and still not want checking. */ 383 if (!pe) { 384 eeh_stats.ignored_check++; 385 pr_debug("EEH: Ignored check for %s %s\n", 386 eeh_pci_name(dev), dn->full_name); 387 return 0; 388 } 389 390 if (!pe->addr && !pe->config_addr) { 391 eeh_stats.no_cfg_addr++; 392 return 0; 393 } 394 395 /* 396 * On PowerNV platform, we might already have fenced PHB 397 * there and we need take care of that firstly. 398 */ 399 ret = eeh_phb_check_failure(pe); 400 if (ret > 0) 401 return ret; 402 403 /* If we already have a pending isolation event for this 404 * slot, we know it's bad already, we don't need to check. 405 * Do this checking under a lock; as multiple PCI devices 406 * in one slot might report errors simultaneously, and we 407 * only want one error recovery routine running. 408 */ 409 eeh_serialize_lock(&flags); 410 rc = 1; 411 if (pe->state & EEH_PE_ISOLATED) { 412 pe->check_count++; 413 if (pe->check_count % EEH_MAX_FAILS == 0) { 414 location = of_get_property(dn, "ibm,loc-code", NULL); 415 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 416 "location=%s driver=%s pci addr=%s\n", 417 pe->check_count, location, 418 eeh_driver_name(dev), eeh_pci_name(dev)); 419 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 420 eeh_driver_name(dev)); 421 dump_stack(); 422 } 423 goto dn_unlock; 424 } 425 426 /* 427 * Now test for an EEH failure. This is VERY expensive. 428 * Note that the eeh_config_addr may be a parent device 429 * in the case of a device behind a bridge, or it may be 430 * function zero of a multi-function device. 431 * In any case they must share a common PHB. 432 */ 433 ret = eeh_ops->get_state(pe, NULL); 434 435 /* Note that config-io to empty slots may fail; 436 * they are empty when they don't have children. 437 * We will punt with the following conditions: Failure to get 438 * PE's state, EEH not support and Permanently unavailable 439 * state, PE is in good state. 440 */ 441 if ((ret < 0) || 442 (ret == EEH_STATE_NOT_SUPPORT) || 443 ((ret & active_flags) == active_flags)) { 444 eeh_stats.false_positives++; 445 pe->false_positives++; 446 rc = 0; 447 goto dn_unlock; 448 } 449 450 /* 451 * It should be corner case that the parent PE has been 452 * put into frozen state as well. We should take care 453 * that at first. 454 */ 455 parent_pe = pe->parent; 456 while (parent_pe) { 457 /* Hit the ceiling ? */ 458 if (parent_pe->type & EEH_PE_PHB) 459 break; 460 461 /* Frozen parent PE ? */ 462 ret = eeh_ops->get_state(parent_pe, NULL); 463 if (ret > 0 && 464 (ret & active_flags) != active_flags) 465 pe = parent_pe; 466 467 /* Next parent level */ 468 parent_pe = parent_pe->parent; 469 } 470 471 eeh_stats.slot_resets++; 472 473 /* Avoid repeated reports of this failure, including problems 474 * with other functions on this device, and functions under 475 * bridges. 476 */ 477 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 478 eeh_serialize_unlock(flags); 479 480 /* Most EEH events are due to device driver bugs. Having 481 * a stack trace will help the device-driver authors figure 482 * out what happened. So print that out. 483 */ 484 phb_pe = eeh_phb_pe_get(pe->phb); 485 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 486 pe->phb->global_number, pe->addr); 487 pr_err("EEH: PE location: %s, PHB location: %s\n", 488 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 489 dump_stack(); 490 491 eeh_send_failure_event(pe); 492 493 return 1; 494 495 dn_unlock: 496 eeh_serialize_unlock(flags); 497 return rc; 498 } 499 500 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 501 502 /** 503 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 504 * @token: I/O token, should be address in the form 0xA.... 505 * @val: value, should be all 1's (XXX why do we need this arg??) 506 * 507 * Check for an EEH failure at the given token address. Call this 508 * routine if the result of a read was all 0xff's and you want to 509 * find out if this is due to an EEH slot freeze event. This routine 510 * will query firmware for the EEH status. 511 * 512 * Note this routine is safe to call in an interrupt context. 513 */ 514 unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) 515 { 516 unsigned long addr; 517 struct eeh_dev *edev; 518 519 /* Finding the phys addr + pci device; this is pretty quick. */ 520 addr = eeh_token_to_phys((unsigned long __force) token); 521 edev = eeh_addr_cache_get_dev(addr); 522 if (!edev) { 523 eeh_stats.no_device++; 524 return val; 525 } 526 527 eeh_dev_check_failure(edev); 528 return val; 529 } 530 531 EXPORT_SYMBOL(eeh_check_failure); 532 533 534 /** 535 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 536 * @pe: EEH PE 537 * 538 * This routine should be called to reenable frozen MMIO or DMA 539 * so that it would work correctly again. It's useful while doing 540 * recovery or log collection on the indicated device. 541 */ 542 int eeh_pci_enable(struct eeh_pe *pe, int function) 543 { 544 int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 545 546 /* 547 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 548 * Also, it's pointless to enable them on unfrozen PE. So 549 * we have the check here. 550 */ 551 if (function == EEH_OPT_THAW_MMIO || 552 function == EEH_OPT_THAW_DMA) { 553 rc = eeh_ops->get_state(pe, NULL); 554 if (rc < 0) 555 return rc; 556 557 /* Needn't to enable or already enabled */ 558 if ((rc == EEH_STATE_NOT_SUPPORT) || 559 ((rc & flags) == flags)) 560 return 0; 561 } 562 563 rc = eeh_ops->set_option(pe, function); 564 if (rc) 565 pr_warn("%s: Unexpected state change %d on " 566 "PHB#%d-PE#%x, err=%d\n", 567 __func__, function, pe->phb->global_number, 568 pe->addr, rc); 569 570 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 571 if (rc <= 0) 572 return rc; 573 574 if ((function == EEH_OPT_THAW_MMIO) && 575 (rc & EEH_STATE_MMIO_ENABLED)) 576 return 0; 577 578 if ((function == EEH_OPT_THAW_DMA) && 579 (rc & EEH_STATE_DMA_ENABLED)) 580 return 0; 581 582 return rc; 583 } 584 585 /** 586 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 587 * @dev: pci device struct 588 * @state: reset state to enter 589 * 590 * Return value: 591 * 0 if success 592 */ 593 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 594 { 595 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 596 struct eeh_pe *pe = edev->pe; 597 598 if (!pe) { 599 pr_err("%s: No PE found on PCI device %s\n", 600 __func__, pci_name(dev)); 601 return -EINVAL; 602 } 603 604 switch (state) { 605 case pcie_deassert_reset: 606 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 607 break; 608 case pcie_hot_reset: 609 eeh_ops->reset(pe, EEH_RESET_HOT); 610 break; 611 case pcie_warm_reset: 612 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 613 break; 614 default: 615 return -EINVAL; 616 }; 617 618 return 0; 619 } 620 621 /** 622 * eeh_set_pe_freset - Check the required reset for the indicated device 623 * @data: EEH device 624 * @flag: return value 625 * 626 * Each device might have its preferred reset type: fundamental or 627 * hot reset. The routine is used to collected the information for 628 * the indicated device and its children so that the bunch of the 629 * devices could be reset properly. 630 */ 631 static void *eeh_set_dev_freset(void *data, void *flag) 632 { 633 struct pci_dev *dev; 634 unsigned int *freset = (unsigned int *)flag; 635 struct eeh_dev *edev = (struct eeh_dev *)data; 636 637 dev = eeh_dev_to_pci_dev(edev); 638 if (dev) 639 *freset |= dev->needs_freset; 640 641 return NULL; 642 } 643 644 /** 645 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 646 * @pe: EEH PE 647 * 648 * Assert the PCI #RST line for 1/4 second. 649 */ 650 static void eeh_reset_pe_once(struct eeh_pe *pe) 651 { 652 unsigned int freset = 0; 653 654 /* Determine type of EEH reset required for 655 * Partitionable Endpoint, a hot-reset (1) 656 * or a fundamental reset (3). 657 * A fundamental reset required by any device under 658 * Partitionable Endpoint trumps hot-reset. 659 */ 660 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 661 662 if (freset) 663 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 664 else 665 eeh_ops->reset(pe, EEH_RESET_HOT); 666 667 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 668 } 669 670 /** 671 * eeh_reset_pe - Reset the indicated PE 672 * @pe: EEH PE 673 * 674 * This routine should be called to reset indicated device, including 675 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 676 * might be involved as well. 677 */ 678 int eeh_reset_pe(struct eeh_pe *pe) 679 { 680 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 681 int i, rc; 682 683 /* Take three shots at resetting the bus */ 684 for (i=0; i<3; i++) { 685 eeh_reset_pe_once(pe); 686 687 /* 688 * EEH_PE_ISOLATED is expected to be removed after 689 * BAR restore. 690 */ 691 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 692 if ((rc & flags) == flags) 693 return 0; 694 695 if (rc < 0) { 696 pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 697 __func__, pe->phb->global_number, pe->addr); 698 return -1; 699 } 700 pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", 701 i+1, pe->phb->global_number, pe->addr, rc); 702 } 703 704 return -1; 705 } 706 707 /** 708 * eeh_save_bars - Save device bars 709 * @edev: PCI device associated EEH device 710 * 711 * Save the values of the device bars. Unlike the restore 712 * routine, this routine is *not* recursive. This is because 713 * PCI devices are added individually; but, for the restore, 714 * an entire slot is reset at a time. 715 */ 716 void eeh_save_bars(struct eeh_dev *edev) 717 { 718 int i; 719 struct device_node *dn; 720 721 if (!edev) 722 return; 723 dn = eeh_dev_to_of_node(edev); 724 725 for (i = 0; i < 16; i++) 726 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); 727 728 /* 729 * For PCI bridges including root port, we need enable bus 730 * master explicitly. Otherwise, it can't fetch IODA table 731 * entries correctly. So we cache the bit in advance so that 732 * we can restore it after reset, either PHB range or PE range. 733 */ 734 if (edev->mode & EEH_DEV_BRIDGE) 735 edev->config_space[1] |= PCI_COMMAND_MASTER; 736 } 737 738 /** 739 * eeh_ops_register - Register platform dependent EEH operations 740 * @ops: platform dependent EEH operations 741 * 742 * Register the platform dependent EEH operation callback 743 * functions. The platform should call this function before 744 * any other EEH operations. 745 */ 746 int __init eeh_ops_register(struct eeh_ops *ops) 747 { 748 if (!ops->name) { 749 pr_warning("%s: Invalid EEH ops name for %p\n", 750 __func__, ops); 751 return -EINVAL; 752 } 753 754 if (eeh_ops && eeh_ops != ops) { 755 pr_warning("%s: EEH ops of platform %s already existing (%s)\n", 756 __func__, eeh_ops->name, ops->name); 757 return -EEXIST; 758 } 759 760 eeh_ops = ops; 761 762 return 0; 763 } 764 765 /** 766 * eeh_ops_unregister - Unreigster platform dependent EEH operations 767 * @name: name of EEH platform operations 768 * 769 * Unregister the platform dependent EEH operation callback 770 * functions. 771 */ 772 int __exit eeh_ops_unregister(const char *name) 773 { 774 if (!name || !strlen(name)) { 775 pr_warning("%s: Invalid EEH ops name\n", 776 __func__); 777 return -EINVAL; 778 } 779 780 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 781 eeh_ops = NULL; 782 return 0; 783 } 784 785 return -EEXIST; 786 } 787 788 static int eeh_reboot_notifier(struct notifier_block *nb, 789 unsigned long action, void *unused) 790 { 791 eeh_set_enable(false); 792 return NOTIFY_DONE; 793 } 794 795 static struct notifier_block eeh_reboot_nb = { 796 .notifier_call = eeh_reboot_notifier, 797 }; 798 799 /** 800 * eeh_init - EEH initialization 801 * 802 * Initialize EEH by trying to enable it for all of the adapters in the system. 803 * As a side effect we can determine here if eeh is supported at all. 804 * Note that we leave EEH on so failed config cycles won't cause a machine 805 * check. If a user turns off EEH for a particular adapter they are really 806 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 807 * grant access to a slot if EEH isn't enabled, and so we always enable 808 * EEH for all slots/all devices. 809 * 810 * The eeh-force-off option disables EEH checking globally, for all slots. 811 * Even if force-off is set, the EEH hardware is still enabled, so that 812 * newer systems can boot. 813 */ 814 int eeh_init(void) 815 { 816 struct pci_controller *hose, *tmp; 817 struct device_node *phb; 818 static int cnt = 0; 819 int ret = 0; 820 821 /* 822 * We have to delay the initialization on PowerNV after 823 * the PCI hierarchy tree has been built because the PEs 824 * are figured out based on PCI devices instead of device 825 * tree nodes 826 */ 827 if (machine_is(powernv) && cnt++ <= 0) 828 return ret; 829 830 /* Register reboot notifier */ 831 ret = register_reboot_notifier(&eeh_reboot_nb); 832 if (ret) { 833 pr_warn("%s: Failed to register notifier (%d)\n", 834 __func__, ret); 835 return ret; 836 } 837 838 /* call platform initialization function */ 839 if (!eeh_ops) { 840 pr_warning("%s: Platform EEH operation not found\n", 841 __func__); 842 return -EEXIST; 843 } else if ((ret = eeh_ops->init())) { 844 pr_warning("%s: Failed to call platform init function (%d)\n", 845 __func__, ret); 846 return ret; 847 } 848 849 /* Initialize EEH event */ 850 ret = eeh_event_init(); 851 if (ret) 852 return ret; 853 854 /* Enable EEH for all adapters */ 855 if (eeh_probe_mode_devtree()) { 856 list_for_each_entry_safe(hose, tmp, 857 &hose_list, list_node) { 858 phb = hose->dn; 859 traverse_pci_devices(phb, eeh_ops->of_probe, NULL); 860 } 861 } else if (eeh_probe_mode_dev()) { 862 list_for_each_entry_safe(hose, tmp, 863 &hose_list, list_node) 864 pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); 865 } else { 866 pr_warn("%s: Invalid probe mode %x", 867 __func__, eeh_subsystem_flags); 868 return -EINVAL; 869 } 870 871 /* 872 * Call platform post-initialization. Actually, It's good chance 873 * to inform platform that EEH is ready to supply service if the 874 * I/O cache stuff has been built up. 875 */ 876 if (eeh_ops->post_init) { 877 ret = eeh_ops->post_init(); 878 if (ret) 879 return ret; 880 } 881 882 if (eeh_enabled()) 883 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 884 else 885 pr_warning("EEH: No capable adapters found\n"); 886 887 return ret; 888 } 889 890 core_initcall_sync(eeh_init); 891 892 /** 893 * eeh_add_device_early - Enable EEH for the indicated device_node 894 * @dn: device node for which to set up EEH 895 * 896 * This routine must be used to perform EEH initialization for PCI 897 * devices that were added after system boot (e.g. hotplug, dlpar). 898 * This routine must be called before any i/o is performed to the 899 * adapter (inluding any config-space i/o). 900 * Whether this actually enables EEH or not for this device depends 901 * on the CEC architecture, type of the device, on earlier boot 902 * command-line arguments & etc. 903 */ 904 void eeh_add_device_early(struct device_node *dn) 905 { 906 struct pci_controller *phb; 907 908 /* 909 * If we're doing EEH probe based on PCI device, we 910 * would delay the probe until late stage because 911 * the PCI device isn't available this moment. 912 */ 913 if (!eeh_probe_mode_devtree()) 914 return; 915 916 if (!of_node_to_eeh_dev(dn)) 917 return; 918 phb = of_node_to_eeh_dev(dn)->phb; 919 920 /* USB Bus children of PCI devices will not have BUID's */ 921 if (NULL == phb || 0 == phb->buid) 922 return; 923 924 eeh_ops->of_probe(dn, NULL); 925 } 926 927 /** 928 * eeh_add_device_tree_early - Enable EEH for the indicated device 929 * @dn: device node 930 * 931 * This routine must be used to perform EEH initialization for the 932 * indicated PCI device that was added after system boot (e.g. 933 * hotplug, dlpar). 934 */ 935 void eeh_add_device_tree_early(struct device_node *dn) 936 { 937 struct device_node *sib; 938 939 for_each_child_of_node(dn, sib) 940 eeh_add_device_tree_early(sib); 941 eeh_add_device_early(dn); 942 } 943 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 944 945 /** 946 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 947 * @dev: pci device for which to set up EEH 948 * 949 * This routine must be used to complete EEH initialization for PCI 950 * devices that were added after system boot (e.g. hotplug, dlpar). 951 */ 952 void eeh_add_device_late(struct pci_dev *dev) 953 { 954 struct device_node *dn; 955 struct eeh_dev *edev; 956 957 if (!dev || !eeh_enabled()) 958 return; 959 960 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 961 962 dn = pci_device_to_OF_node(dev); 963 edev = of_node_to_eeh_dev(dn); 964 if (edev->pdev == dev) { 965 pr_debug("EEH: Already referenced !\n"); 966 return; 967 } 968 969 /* 970 * The EEH cache might not be removed correctly because of 971 * unbalanced kref to the device during unplug time, which 972 * relies on pcibios_release_device(). So we have to remove 973 * that here explicitly. 974 */ 975 if (edev->pdev) { 976 eeh_rmv_from_parent_pe(edev); 977 eeh_addr_cache_rmv_dev(edev->pdev); 978 eeh_sysfs_remove_device(edev->pdev); 979 edev->mode &= ~EEH_DEV_SYSFS; 980 981 /* 982 * We definitely should have the PCI device removed 983 * though it wasn't correctly. So we needn't call 984 * into error handler afterwards. 985 */ 986 edev->mode |= EEH_DEV_NO_HANDLER; 987 988 edev->pdev = NULL; 989 dev->dev.archdata.edev = NULL; 990 } 991 992 edev->pdev = dev; 993 dev->dev.archdata.edev = edev; 994 995 /* 996 * We have to do the EEH probe here because the PCI device 997 * hasn't been created yet in the early stage. 998 */ 999 if (eeh_probe_mode_dev()) 1000 eeh_ops->dev_probe(dev, NULL); 1001 1002 eeh_addr_cache_insert_dev(dev); 1003 } 1004 1005 /** 1006 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1007 * @bus: PCI bus 1008 * 1009 * This routine must be used to perform EEH initialization for PCI 1010 * devices which are attached to the indicated PCI bus. The PCI bus 1011 * is added after system boot through hotplug or dlpar. 1012 */ 1013 void eeh_add_device_tree_late(struct pci_bus *bus) 1014 { 1015 struct pci_dev *dev; 1016 1017 list_for_each_entry(dev, &bus->devices, bus_list) { 1018 eeh_add_device_late(dev); 1019 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1020 struct pci_bus *subbus = dev->subordinate; 1021 if (subbus) 1022 eeh_add_device_tree_late(subbus); 1023 } 1024 } 1025 } 1026 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1027 1028 /** 1029 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1030 * @bus: PCI bus 1031 * 1032 * This routine must be used to add EEH sysfs files for PCI 1033 * devices which are attached to the indicated PCI bus. The PCI bus 1034 * is added after system boot through hotplug or dlpar. 1035 */ 1036 void eeh_add_sysfs_files(struct pci_bus *bus) 1037 { 1038 struct pci_dev *dev; 1039 1040 list_for_each_entry(dev, &bus->devices, bus_list) { 1041 eeh_sysfs_add_device(dev); 1042 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1043 struct pci_bus *subbus = dev->subordinate; 1044 if (subbus) 1045 eeh_add_sysfs_files(subbus); 1046 } 1047 } 1048 } 1049 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1050 1051 /** 1052 * eeh_remove_device - Undo EEH setup for the indicated pci device 1053 * @dev: pci device to be removed 1054 * 1055 * This routine should be called when a device is removed from 1056 * a running system (e.g. by hotplug or dlpar). It unregisters 1057 * the PCI device from the EEH subsystem. I/O errors affecting 1058 * this device will no longer be detected after this call; thus, 1059 * i/o errors affecting this slot may leave this device unusable. 1060 */ 1061 void eeh_remove_device(struct pci_dev *dev) 1062 { 1063 struct eeh_dev *edev; 1064 1065 if (!dev || !eeh_enabled()) 1066 return; 1067 edev = pci_dev_to_eeh_dev(dev); 1068 1069 /* Unregister the device with the EEH/PCI address search system */ 1070 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1071 1072 if (!edev || !edev->pdev || !edev->pe) { 1073 pr_debug("EEH: Not referenced !\n"); 1074 return; 1075 } 1076 1077 /* 1078 * During the hotplug for EEH error recovery, we need the EEH 1079 * device attached to the parent PE in order for BAR restore 1080 * a bit later. So we keep it for BAR restore and remove it 1081 * from the parent PE during the BAR resotre. 1082 */ 1083 edev->pdev = NULL; 1084 dev->dev.archdata.edev = NULL; 1085 if (!(edev->pe->state & EEH_PE_KEEP)) 1086 eeh_rmv_from_parent_pe(edev); 1087 else 1088 edev->mode |= EEH_DEV_DISCONNECTED; 1089 1090 /* 1091 * We're removing from the PCI subsystem, that means 1092 * the PCI device driver can't support EEH or not 1093 * well. So we rely on hotplug completely to do recovery 1094 * for the specific PCI device. 1095 */ 1096 edev->mode |= EEH_DEV_NO_HANDLER; 1097 1098 eeh_addr_cache_rmv_dev(dev); 1099 eeh_sysfs_remove_device(dev); 1100 edev->mode &= ~EEH_DEV_SYSFS; 1101 } 1102 1103 static int proc_eeh_show(struct seq_file *m, void *v) 1104 { 1105 if (!eeh_enabled()) { 1106 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1107 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1108 } else { 1109 seq_printf(m, "EEH Subsystem is enabled\n"); 1110 seq_printf(m, 1111 "no device=%llu\n" 1112 "no device node=%llu\n" 1113 "no config address=%llu\n" 1114 "check not wanted=%llu\n" 1115 "eeh_total_mmio_ffs=%llu\n" 1116 "eeh_false_positives=%llu\n" 1117 "eeh_slot_resets=%llu\n", 1118 eeh_stats.no_device, 1119 eeh_stats.no_dn, 1120 eeh_stats.no_cfg_addr, 1121 eeh_stats.ignored_check, 1122 eeh_stats.total_mmio_ffs, 1123 eeh_stats.false_positives, 1124 eeh_stats.slot_resets); 1125 } 1126 1127 return 0; 1128 } 1129 1130 static int proc_eeh_open(struct inode *inode, struct file *file) 1131 { 1132 return single_open(file, proc_eeh_show, NULL); 1133 } 1134 1135 static const struct file_operations proc_eeh_operations = { 1136 .open = proc_eeh_open, 1137 .read = seq_read, 1138 .llseek = seq_lseek, 1139 .release = single_release, 1140 }; 1141 1142 #ifdef CONFIG_DEBUG_FS 1143 static int eeh_enable_dbgfs_set(void *data, u64 val) 1144 { 1145 if (val) 1146 eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; 1147 else 1148 eeh_subsystem_flags |= EEH_FORCE_DISABLED; 1149 1150 /* Notify the backend */ 1151 if (eeh_ops->post_init) 1152 eeh_ops->post_init(); 1153 1154 return 0; 1155 } 1156 1157 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1158 { 1159 if (eeh_enabled()) 1160 *val = 0x1ul; 1161 else 1162 *val = 0x0ul; 1163 return 0; 1164 } 1165 1166 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1167 eeh_enable_dbgfs_set, "0x%llx\n"); 1168 #endif 1169 1170 static int __init eeh_init_proc(void) 1171 { 1172 if (machine_is(pseries) || machine_is(powernv)) { 1173 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1174 #ifdef CONFIG_DEBUG_FS 1175 debugfs_create_file("eeh_enable", 0600, 1176 powerpc_debugfs_root, NULL, 1177 &eeh_enable_dbgfs_ops); 1178 #endif 1179 } 1180 1181 return 0; 1182 } 1183 __initcall(eeh_init_proc); 1184