1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* Platform dependent EEH operations */ 108 struct eeh_ops *eeh_ops = NULL; 109 110 /* Lock to avoid races due to multiple reports of an error */ 111 DEFINE_RAW_SPINLOCK(confirm_error_lock); 112 113 /* Lock to protect passed flags */ 114 static DEFINE_MUTEX(eeh_dev_mutex); 115 116 /* Buffer for reporting pci register dumps. Its here in BSS, and 117 * not dynamically alloced, so that it ends up in RMO where RTAS 118 * can access it. 119 */ 120 #define EEH_PCI_REGS_LOG_LEN 4096 121 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 122 123 /* 124 * The struct is used to maintain the EEH global statistic 125 * information. Besides, the EEH global statistics will be 126 * exported to user space through procfs 127 */ 128 struct eeh_stats { 129 u64 no_device; /* PCI device not found */ 130 u64 no_dn; /* OF node not found */ 131 u64 no_cfg_addr; /* Config address not found */ 132 u64 ignored_check; /* EEH check skipped */ 133 u64 total_mmio_ffs; /* Total EEH checks */ 134 u64 false_positives; /* Unnecessary EEH checks */ 135 u64 slot_resets; /* PE reset */ 136 }; 137 138 static struct eeh_stats eeh_stats; 139 140 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 141 142 static int __init eeh_setup(char *str) 143 { 144 if (!strcmp(str, "off")) 145 eeh_add_flag(EEH_FORCE_DISABLED); 146 147 return 1; 148 } 149 __setup("eeh=", eeh_setup); 150 151 /** 152 * eeh_gather_pci_data - Copy assorted PCI config space registers to buff 153 * @edev: device to report data for 154 * @buf: point to buffer in which to log 155 * @len: amount of room in buffer 156 * 157 * This routine captures assorted PCI configuration space data, 158 * and puts them into a buffer for RTAS error logging. 159 */ 160 static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) 161 { 162 struct device_node *dn = eeh_dev_to_of_node(edev); 163 u32 cfg; 164 int cap, i; 165 int n = 0, l = 0; 166 char buffer[128]; 167 168 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); 169 pr_warn("EEH: of node=%s\n", dn->full_name); 170 171 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); 172 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 173 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 174 175 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); 176 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 177 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 178 179 /* Gather bridge-specific registers */ 180 if (edev->mode & EEH_DEV_BRIDGE) { 181 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); 182 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 183 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 184 185 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); 186 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 187 pr_warn("EEH: Bridge control: %04x\n", cfg); 188 } 189 190 /* Dump out the PCI-X command and status regs */ 191 cap = edev->pcix_cap; 192 if (cap) { 193 eeh_ops->read_config(dn, cap, 4, &cfg); 194 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 195 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 196 197 eeh_ops->read_config(dn, cap+4, 4, &cfg); 198 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 199 pr_warn("EEH: PCI-X status: %08x\n", cfg); 200 } 201 202 /* If PCI-E capable, dump PCI-E cap 10 */ 203 cap = edev->pcie_cap; 204 if (cap) { 205 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 206 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 207 208 for (i=0; i<=8; i++) { 209 eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 210 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 211 212 if ((i % 4) == 0) { 213 if (i != 0) 214 pr_warn("%s\n", buffer); 215 216 l = scnprintf(buffer, sizeof(buffer), 217 "EEH: PCI-E %02x: %08x ", 218 4*i, cfg); 219 } else { 220 l += scnprintf(buffer+l, sizeof(buffer)-l, 221 "%08x ", cfg); 222 } 223 224 } 225 226 pr_warn("%s\n", buffer); 227 } 228 229 /* If AER capable, dump it */ 230 cap = edev->aer_cap; 231 if (cap) { 232 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 233 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 234 235 for (i=0; i<=13; i++) { 236 eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 237 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 238 239 if ((i % 4) == 0) { 240 if (i != 0) 241 pr_warn("%s\n", buffer); 242 243 l = scnprintf(buffer, sizeof(buffer), 244 "EEH: PCI-E AER %02x: %08x ", 245 4*i, cfg); 246 } else { 247 l += scnprintf(buffer+l, sizeof(buffer)-l, 248 "%08x ", cfg); 249 } 250 } 251 252 pr_warn("%s\n", buffer); 253 } 254 255 return n; 256 } 257 258 /** 259 * eeh_slot_error_detail - Generate combined log including driver log and error log 260 * @pe: EEH PE 261 * @severity: temporary or permanent error log 262 * 263 * This routine should be called to generate the combined log, which 264 * is comprised of driver log and error log. The driver log is figured 265 * out from the config space of the corresponding PCI device, while 266 * the error log is fetched through platform dependent function call. 267 */ 268 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 269 { 270 size_t loglen = 0; 271 struct eeh_dev *edev, *tmp; 272 273 /* 274 * When the PHB is fenced or dead, it's pointless to collect 275 * the data from PCI config space because it should return 276 * 0xFF's. For ER, we still retrieve the data from the PCI 277 * config space. 278 * 279 * For pHyp, we have to enable IO for log retrieval. Otherwise, 280 * 0xFF's is always returned from PCI config space. 281 */ 282 if (!(pe->type & EEH_PE_PHB)) { 283 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 284 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 285 eeh_ops->configure_bridge(pe); 286 eeh_pe_restore_bars(pe); 287 288 pci_regs_buf[0] = 0; 289 eeh_pe_for_each_dev(pe, edev, tmp) { 290 loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, 291 EEH_PCI_REGS_LOG_LEN - loglen); 292 } 293 } 294 295 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 296 } 297 298 /** 299 * eeh_token_to_phys - Convert EEH address token to phys address 300 * @token: I/O token, should be address in the form 0xA.... 301 * 302 * This routine should be called to convert virtual I/O address 303 * to physical one. 304 */ 305 static inline unsigned long eeh_token_to_phys(unsigned long token) 306 { 307 pte_t *ptep; 308 unsigned long pa; 309 int hugepage_shift; 310 311 /* 312 * We won't find hugepages here, iomem 313 */ 314 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 315 if (!ptep) 316 return token; 317 WARN_ON(hugepage_shift); 318 pa = pte_pfn(*ptep) << PAGE_SHIFT; 319 320 return pa | (token & (PAGE_SIZE-1)); 321 } 322 323 /* 324 * On PowerNV platform, we might already have fenced PHB there. 325 * For that case, it's meaningless to recover frozen PE. Intead, 326 * We have to handle fenced PHB firstly. 327 */ 328 static int eeh_phb_check_failure(struct eeh_pe *pe) 329 { 330 struct eeh_pe *phb_pe; 331 unsigned long flags; 332 int ret; 333 334 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 335 return -EPERM; 336 337 /* Find the PHB PE */ 338 phb_pe = eeh_phb_pe_get(pe->phb); 339 if (!phb_pe) { 340 pr_warn("%s Can't find PE for PHB#%d\n", 341 __func__, pe->phb->global_number); 342 return -EEXIST; 343 } 344 345 /* If the PHB has been in problematic state */ 346 eeh_serialize_lock(&flags); 347 if (phb_pe->state & EEH_PE_ISOLATED) { 348 ret = 0; 349 goto out; 350 } 351 352 /* Check PHB state */ 353 ret = eeh_ops->get_state(phb_pe, NULL); 354 if ((ret < 0) || 355 (ret == EEH_STATE_NOT_SUPPORT) || 356 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 357 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 358 ret = 0; 359 goto out; 360 } 361 362 /* Isolate the PHB and send event */ 363 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 364 eeh_serialize_unlock(flags); 365 366 pr_err("EEH: PHB#%x failure detected, location: %s\n", 367 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 368 dump_stack(); 369 eeh_send_failure_event(phb_pe); 370 371 return 1; 372 out: 373 eeh_serialize_unlock(flags); 374 return ret; 375 } 376 377 /** 378 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 379 * @edev: eeh device 380 * 381 * Check for an EEH failure for the given device node. Call this 382 * routine if the result of a read was all 0xff's and you want to 383 * find out if this is due to an EEH slot freeze. This routine 384 * will query firmware for the EEH status. 385 * 386 * Returns 0 if there has not been an EEH error; otherwise returns 387 * a non-zero value and queues up a slot isolation event notification. 388 * 389 * It is safe to call this routine in an interrupt context. 390 */ 391 int eeh_dev_check_failure(struct eeh_dev *edev) 392 { 393 int ret; 394 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 395 unsigned long flags; 396 struct device_node *dn; 397 struct pci_dev *dev; 398 struct eeh_pe *pe, *parent_pe, *phb_pe; 399 int rc = 0; 400 const char *location; 401 402 eeh_stats.total_mmio_ffs++; 403 404 if (!eeh_enabled()) 405 return 0; 406 407 if (!edev) { 408 eeh_stats.no_dn++; 409 return 0; 410 } 411 dn = eeh_dev_to_of_node(edev); 412 dev = eeh_dev_to_pci_dev(edev); 413 pe = edev->pe; 414 415 /* Access to IO BARs might get this far and still not want checking. */ 416 if (!pe) { 417 eeh_stats.ignored_check++; 418 pr_debug("EEH: Ignored check for %s %s\n", 419 eeh_pci_name(dev), dn->full_name); 420 return 0; 421 } 422 423 if (!pe->addr && !pe->config_addr) { 424 eeh_stats.no_cfg_addr++; 425 return 0; 426 } 427 428 /* 429 * On PowerNV platform, we might already have fenced PHB 430 * there and we need take care of that firstly. 431 */ 432 ret = eeh_phb_check_failure(pe); 433 if (ret > 0) 434 return ret; 435 436 /* 437 * If the PE isn't owned by us, we shouldn't check the 438 * state. Instead, let the owner handle it if the PE has 439 * been frozen. 440 */ 441 if (eeh_pe_passed(pe)) 442 return 0; 443 444 /* If we already have a pending isolation event for this 445 * slot, we know it's bad already, we don't need to check. 446 * Do this checking under a lock; as multiple PCI devices 447 * in one slot might report errors simultaneously, and we 448 * only want one error recovery routine running. 449 */ 450 eeh_serialize_lock(&flags); 451 rc = 1; 452 if (pe->state & EEH_PE_ISOLATED) { 453 pe->check_count++; 454 if (pe->check_count % EEH_MAX_FAILS == 0) { 455 location = of_get_property(dn, "ibm,loc-code", NULL); 456 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 457 "location=%s driver=%s pci addr=%s\n", 458 pe->check_count, location, 459 eeh_driver_name(dev), eeh_pci_name(dev)); 460 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 461 eeh_driver_name(dev)); 462 dump_stack(); 463 } 464 goto dn_unlock; 465 } 466 467 /* 468 * Now test for an EEH failure. This is VERY expensive. 469 * Note that the eeh_config_addr may be a parent device 470 * in the case of a device behind a bridge, or it may be 471 * function zero of a multi-function device. 472 * In any case they must share a common PHB. 473 */ 474 ret = eeh_ops->get_state(pe, NULL); 475 476 /* Note that config-io to empty slots may fail; 477 * they are empty when they don't have children. 478 * We will punt with the following conditions: Failure to get 479 * PE's state, EEH not support and Permanently unavailable 480 * state, PE is in good state. 481 */ 482 if ((ret < 0) || 483 (ret == EEH_STATE_NOT_SUPPORT) || 484 ((ret & active_flags) == active_flags)) { 485 eeh_stats.false_positives++; 486 pe->false_positives++; 487 rc = 0; 488 goto dn_unlock; 489 } 490 491 /* 492 * It should be corner case that the parent PE has been 493 * put into frozen state as well. We should take care 494 * that at first. 495 */ 496 parent_pe = pe->parent; 497 while (parent_pe) { 498 /* Hit the ceiling ? */ 499 if (parent_pe->type & EEH_PE_PHB) 500 break; 501 502 /* Frozen parent PE ? */ 503 ret = eeh_ops->get_state(parent_pe, NULL); 504 if (ret > 0 && 505 (ret & active_flags) != active_flags) 506 pe = parent_pe; 507 508 /* Next parent level */ 509 parent_pe = parent_pe->parent; 510 } 511 512 eeh_stats.slot_resets++; 513 514 /* Avoid repeated reports of this failure, including problems 515 * with other functions on this device, and functions under 516 * bridges. 517 */ 518 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 519 eeh_serialize_unlock(flags); 520 521 /* Most EEH events are due to device driver bugs. Having 522 * a stack trace will help the device-driver authors figure 523 * out what happened. So print that out. 524 */ 525 phb_pe = eeh_phb_pe_get(pe->phb); 526 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 527 pe->phb->global_number, pe->addr); 528 pr_err("EEH: PE location: %s, PHB location: %s\n", 529 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 530 dump_stack(); 531 532 eeh_send_failure_event(pe); 533 534 return 1; 535 536 dn_unlock: 537 eeh_serialize_unlock(flags); 538 return rc; 539 } 540 541 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 542 543 /** 544 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 545 * @token: I/O token, should be address in the form 0xA.... 546 * @val: value, should be all 1's (XXX why do we need this arg??) 547 * 548 * Check for an EEH failure at the given token address. Call this 549 * routine if the result of a read was all 0xff's and you want to 550 * find out if this is due to an EEH slot freeze event. This routine 551 * will query firmware for the EEH status. 552 * 553 * Note this routine is safe to call in an interrupt context. 554 */ 555 unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) 556 { 557 unsigned long addr; 558 struct eeh_dev *edev; 559 560 /* Finding the phys addr + pci device; this is pretty quick. */ 561 addr = eeh_token_to_phys((unsigned long __force) token); 562 edev = eeh_addr_cache_get_dev(addr); 563 if (!edev) { 564 eeh_stats.no_device++; 565 return val; 566 } 567 568 eeh_dev_check_failure(edev); 569 return val; 570 } 571 572 EXPORT_SYMBOL(eeh_check_failure); 573 574 575 /** 576 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 577 * @pe: EEH PE 578 * 579 * This routine should be called to reenable frozen MMIO or DMA 580 * so that it would work correctly again. It's useful while doing 581 * recovery or log collection on the indicated device. 582 */ 583 int eeh_pci_enable(struct eeh_pe *pe, int function) 584 { 585 int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 586 587 /* 588 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 589 * Also, it's pointless to enable them on unfrozen PE. So 590 * we have the check here. 591 */ 592 if (function == EEH_OPT_THAW_MMIO || 593 function == EEH_OPT_THAW_DMA) { 594 rc = eeh_ops->get_state(pe, NULL); 595 if (rc < 0) 596 return rc; 597 598 /* Needn't to enable or already enabled */ 599 if ((rc == EEH_STATE_NOT_SUPPORT) || 600 ((rc & flags) == flags)) 601 return 0; 602 } 603 604 rc = eeh_ops->set_option(pe, function); 605 if (rc) 606 pr_warn("%s: Unexpected state change %d on " 607 "PHB#%d-PE#%x, err=%d\n", 608 __func__, function, pe->phb->global_number, 609 pe->addr, rc); 610 611 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 612 if (rc <= 0) 613 return rc; 614 615 if ((function == EEH_OPT_THAW_MMIO) && 616 (rc & EEH_STATE_MMIO_ENABLED)) 617 return 0; 618 619 if ((function == EEH_OPT_THAW_DMA) && 620 (rc & EEH_STATE_DMA_ENABLED)) 621 return 0; 622 623 return rc; 624 } 625 626 /** 627 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 628 * @dev: pci device struct 629 * @state: reset state to enter 630 * 631 * Return value: 632 * 0 if success 633 */ 634 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 635 { 636 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 637 struct eeh_pe *pe = edev->pe; 638 639 if (!pe) { 640 pr_err("%s: No PE found on PCI device %s\n", 641 __func__, pci_name(dev)); 642 return -EINVAL; 643 } 644 645 switch (state) { 646 case pcie_deassert_reset: 647 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 648 break; 649 case pcie_hot_reset: 650 eeh_ops->reset(pe, EEH_RESET_HOT); 651 break; 652 case pcie_warm_reset: 653 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 654 break; 655 default: 656 return -EINVAL; 657 }; 658 659 return 0; 660 } 661 662 /** 663 * eeh_set_pe_freset - Check the required reset for the indicated device 664 * @data: EEH device 665 * @flag: return value 666 * 667 * Each device might have its preferred reset type: fundamental or 668 * hot reset. The routine is used to collected the information for 669 * the indicated device and its children so that the bunch of the 670 * devices could be reset properly. 671 */ 672 static void *eeh_set_dev_freset(void *data, void *flag) 673 { 674 struct pci_dev *dev; 675 unsigned int *freset = (unsigned int *)flag; 676 struct eeh_dev *edev = (struct eeh_dev *)data; 677 678 dev = eeh_dev_to_pci_dev(edev); 679 if (dev) 680 *freset |= dev->needs_freset; 681 682 return NULL; 683 } 684 685 /** 686 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 687 * @pe: EEH PE 688 * 689 * Assert the PCI #RST line for 1/4 second. 690 */ 691 static void eeh_reset_pe_once(struct eeh_pe *pe) 692 { 693 unsigned int freset = 0; 694 695 /* Determine type of EEH reset required for 696 * Partitionable Endpoint, a hot-reset (1) 697 * or a fundamental reset (3). 698 * A fundamental reset required by any device under 699 * Partitionable Endpoint trumps hot-reset. 700 */ 701 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 702 703 if (freset) 704 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 705 else 706 eeh_ops->reset(pe, EEH_RESET_HOT); 707 708 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 709 } 710 711 /** 712 * eeh_reset_pe - Reset the indicated PE 713 * @pe: EEH PE 714 * 715 * This routine should be called to reset indicated device, including 716 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 717 * might be involved as well. 718 */ 719 int eeh_reset_pe(struct eeh_pe *pe) 720 { 721 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 722 int i, rc; 723 724 /* Take three shots at resetting the bus */ 725 for (i=0; i<3; i++) { 726 eeh_reset_pe_once(pe); 727 728 /* 729 * EEH_PE_ISOLATED is expected to be removed after 730 * BAR restore. 731 */ 732 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 733 if ((rc & flags) == flags) 734 return 0; 735 736 if (rc < 0) { 737 pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 738 __func__, pe->phb->global_number, pe->addr); 739 return -1; 740 } 741 pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", 742 i+1, pe->phb->global_number, pe->addr, rc); 743 } 744 745 return -1; 746 } 747 748 /** 749 * eeh_save_bars - Save device bars 750 * @edev: PCI device associated EEH device 751 * 752 * Save the values of the device bars. Unlike the restore 753 * routine, this routine is *not* recursive. This is because 754 * PCI devices are added individually; but, for the restore, 755 * an entire slot is reset at a time. 756 */ 757 void eeh_save_bars(struct eeh_dev *edev) 758 { 759 int i; 760 struct device_node *dn; 761 762 if (!edev) 763 return; 764 dn = eeh_dev_to_of_node(edev); 765 766 for (i = 0; i < 16; i++) 767 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); 768 769 /* 770 * For PCI bridges including root port, we need enable bus 771 * master explicitly. Otherwise, it can't fetch IODA table 772 * entries correctly. So we cache the bit in advance so that 773 * we can restore it after reset, either PHB range or PE range. 774 */ 775 if (edev->mode & EEH_DEV_BRIDGE) 776 edev->config_space[1] |= PCI_COMMAND_MASTER; 777 } 778 779 /** 780 * eeh_ops_register - Register platform dependent EEH operations 781 * @ops: platform dependent EEH operations 782 * 783 * Register the platform dependent EEH operation callback 784 * functions. The platform should call this function before 785 * any other EEH operations. 786 */ 787 int __init eeh_ops_register(struct eeh_ops *ops) 788 { 789 if (!ops->name) { 790 pr_warn("%s: Invalid EEH ops name for %p\n", 791 __func__, ops); 792 return -EINVAL; 793 } 794 795 if (eeh_ops && eeh_ops != ops) { 796 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 797 __func__, eeh_ops->name, ops->name); 798 return -EEXIST; 799 } 800 801 eeh_ops = ops; 802 803 return 0; 804 } 805 806 /** 807 * eeh_ops_unregister - Unreigster platform dependent EEH operations 808 * @name: name of EEH platform operations 809 * 810 * Unregister the platform dependent EEH operation callback 811 * functions. 812 */ 813 int __exit eeh_ops_unregister(const char *name) 814 { 815 if (!name || !strlen(name)) { 816 pr_warn("%s: Invalid EEH ops name\n", 817 __func__); 818 return -EINVAL; 819 } 820 821 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 822 eeh_ops = NULL; 823 return 0; 824 } 825 826 return -EEXIST; 827 } 828 829 static int eeh_reboot_notifier(struct notifier_block *nb, 830 unsigned long action, void *unused) 831 { 832 eeh_clear_flag(EEH_ENABLED); 833 return NOTIFY_DONE; 834 } 835 836 static struct notifier_block eeh_reboot_nb = { 837 .notifier_call = eeh_reboot_notifier, 838 }; 839 840 /** 841 * eeh_init - EEH initialization 842 * 843 * Initialize EEH by trying to enable it for all of the adapters in the system. 844 * As a side effect we can determine here if eeh is supported at all. 845 * Note that we leave EEH on so failed config cycles won't cause a machine 846 * check. If a user turns off EEH for a particular adapter they are really 847 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 848 * grant access to a slot if EEH isn't enabled, and so we always enable 849 * EEH for all slots/all devices. 850 * 851 * The eeh-force-off option disables EEH checking globally, for all slots. 852 * Even if force-off is set, the EEH hardware is still enabled, so that 853 * newer systems can boot. 854 */ 855 int eeh_init(void) 856 { 857 struct pci_controller *hose, *tmp; 858 struct device_node *phb; 859 static int cnt = 0; 860 int ret = 0; 861 862 /* 863 * We have to delay the initialization on PowerNV after 864 * the PCI hierarchy tree has been built because the PEs 865 * are figured out based on PCI devices instead of device 866 * tree nodes 867 */ 868 if (machine_is(powernv) && cnt++ <= 0) 869 return ret; 870 871 /* Register reboot notifier */ 872 ret = register_reboot_notifier(&eeh_reboot_nb); 873 if (ret) { 874 pr_warn("%s: Failed to register notifier (%d)\n", 875 __func__, ret); 876 return ret; 877 } 878 879 /* call platform initialization function */ 880 if (!eeh_ops) { 881 pr_warn("%s: Platform EEH operation not found\n", 882 __func__); 883 return -EEXIST; 884 } else if ((ret = eeh_ops->init())) { 885 pr_warn("%s: Failed to call platform init function (%d)\n", 886 __func__, ret); 887 return ret; 888 } 889 890 /* Initialize EEH event */ 891 ret = eeh_event_init(); 892 if (ret) 893 return ret; 894 895 /* Enable EEH for all adapters */ 896 if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) { 897 list_for_each_entry_safe(hose, tmp, 898 &hose_list, list_node) { 899 phb = hose->dn; 900 traverse_pci_devices(phb, eeh_ops->of_probe, NULL); 901 } 902 } else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) { 903 list_for_each_entry_safe(hose, tmp, 904 &hose_list, list_node) 905 pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); 906 } else { 907 pr_warn("%s: Invalid probe mode %x", 908 __func__, eeh_subsystem_flags); 909 return -EINVAL; 910 } 911 912 /* 913 * Call platform post-initialization. Actually, It's good chance 914 * to inform platform that EEH is ready to supply service if the 915 * I/O cache stuff has been built up. 916 */ 917 if (eeh_ops->post_init) { 918 ret = eeh_ops->post_init(); 919 if (ret) 920 return ret; 921 } 922 923 if (eeh_enabled()) 924 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 925 else 926 pr_warn("EEH: No capable adapters found\n"); 927 928 return ret; 929 } 930 931 core_initcall_sync(eeh_init); 932 933 /** 934 * eeh_add_device_early - Enable EEH for the indicated device_node 935 * @dn: device node for which to set up EEH 936 * 937 * This routine must be used to perform EEH initialization for PCI 938 * devices that were added after system boot (e.g. hotplug, dlpar). 939 * This routine must be called before any i/o is performed to the 940 * adapter (inluding any config-space i/o). 941 * Whether this actually enables EEH or not for this device depends 942 * on the CEC architecture, type of the device, on earlier boot 943 * command-line arguments & etc. 944 */ 945 void eeh_add_device_early(struct device_node *dn) 946 { 947 struct pci_controller *phb; 948 949 /* 950 * If we're doing EEH probe based on PCI device, we 951 * would delay the probe until late stage because 952 * the PCI device isn't available this moment. 953 */ 954 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 955 return; 956 957 if (!of_node_to_eeh_dev(dn)) 958 return; 959 phb = of_node_to_eeh_dev(dn)->phb; 960 961 /* USB Bus children of PCI devices will not have BUID's */ 962 if (NULL == phb || 0 == phb->buid) 963 return; 964 965 eeh_ops->of_probe(dn, NULL); 966 } 967 968 /** 969 * eeh_add_device_tree_early - Enable EEH for the indicated device 970 * @dn: device node 971 * 972 * This routine must be used to perform EEH initialization for the 973 * indicated PCI device that was added after system boot (e.g. 974 * hotplug, dlpar). 975 */ 976 void eeh_add_device_tree_early(struct device_node *dn) 977 { 978 struct device_node *sib; 979 980 for_each_child_of_node(dn, sib) 981 eeh_add_device_tree_early(sib); 982 eeh_add_device_early(dn); 983 } 984 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 985 986 /** 987 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 988 * @dev: pci device for which to set up EEH 989 * 990 * This routine must be used to complete EEH initialization for PCI 991 * devices that were added after system boot (e.g. hotplug, dlpar). 992 */ 993 void eeh_add_device_late(struct pci_dev *dev) 994 { 995 struct device_node *dn; 996 struct eeh_dev *edev; 997 998 if (!dev || !eeh_enabled()) 999 return; 1000 1001 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1002 1003 dn = pci_device_to_OF_node(dev); 1004 edev = of_node_to_eeh_dev(dn); 1005 if (edev->pdev == dev) { 1006 pr_debug("EEH: Already referenced !\n"); 1007 return; 1008 } 1009 1010 /* 1011 * The EEH cache might not be removed correctly because of 1012 * unbalanced kref to the device during unplug time, which 1013 * relies on pcibios_release_device(). So we have to remove 1014 * that here explicitly. 1015 */ 1016 if (edev->pdev) { 1017 eeh_rmv_from_parent_pe(edev); 1018 eeh_addr_cache_rmv_dev(edev->pdev); 1019 eeh_sysfs_remove_device(edev->pdev); 1020 edev->mode &= ~EEH_DEV_SYSFS; 1021 1022 /* 1023 * We definitely should have the PCI device removed 1024 * though it wasn't correctly. So we needn't call 1025 * into error handler afterwards. 1026 */ 1027 edev->mode |= EEH_DEV_NO_HANDLER; 1028 1029 edev->pdev = NULL; 1030 dev->dev.archdata.edev = NULL; 1031 } 1032 1033 edev->pdev = dev; 1034 dev->dev.archdata.edev = edev; 1035 1036 /* 1037 * We have to do the EEH probe here because the PCI device 1038 * hasn't been created yet in the early stage. 1039 */ 1040 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1041 eeh_ops->dev_probe(dev, NULL); 1042 1043 eeh_addr_cache_insert_dev(dev); 1044 } 1045 1046 /** 1047 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1048 * @bus: PCI bus 1049 * 1050 * This routine must be used to perform EEH initialization for PCI 1051 * devices which are attached to the indicated PCI bus. The PCI bus 1052 * is added after system boot through hotplug or dlpar. 1053 */ 1054 void eeh_add_device_tree_late(struct pci_bus *bus) 1055 { 1056 struct pci_dev *dev; 1057 1058 list_for_each_entry(dev, &bus->devices, bus_list) { 1059 eeh_add_device_late(dev); 1060 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1061 struct pci_bus *subbus = dev->subordinate; 1062 if (subbus) 1063 eeh_add_device_tree_late(subbus); 1064 } 1065 } 1066 } 1067 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1068 1069 /** 1070 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1071 * @bus: PCI bus 1072 * 1073 * This routine must be used to add EEH sysfs files for PCI 1074 * devices which are attached to the indicated PCI bus. The PCI bus 1075 * is added after system boot through hotplug or dlpar. 1076 */ 1077 void eeh_add_sysfs_files(struct pci_bus *bus) 1078 { 1079 struct pci_dev *dev; 1080 1081 list_for_each_entry(dev, &bus->devices, bus_list) { 1082 eeh_sysfs_add_device(dev); 1083 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1084 struct pci_bus *subbus = dev->subordinate; 1085 if (subbus) 1086 eeh_add_sysfs_files(subbus); 1087 } 1088 } 1089 } 1090 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1091 1092 /** 1093 * eeh_remove_device - Undo EEH setup for the indicated pci device 1094 * @dev: pci device to be removed 1095 * 1096 * This routine should be called when a device is removed from 1097 * a running system (e.g. by hotplug or dlpar). It unregisters 1098 * the PCI device from the EEH subsystem. I/O errors affecting 1099 * this device will no longer be detected after this call; thus, 1100 * i/o errors affecting this slot may leave this device unusable. 1101 */ 1102 void eeh_remove_device(struct pci_dev *dev) 1103 { 1104 struct eeh_dev *edev; 1105 1106 if (!dev || !eeh_enabled()) 1107 return; 1108 edev = pci_dev_to_eeh_dev(dev); 1109 1110 /* Unregister the device with the EEH/PCI address search system */ 1111 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1112 1113 if (!edev || !edev->pdev || !edev->pe) { 1114 pr_debug("EEH: Not referenced !\n"); 1115 return; 1116 } 1117 1118 /* 1119 * During the hotplug for EEH error recovery, we need the EEH 1120 * device attached to the parent PE in order for BAR restore 1121 * a bit later. So we keep it for BAR restore and remove it 1122 * from the parent PE during the BAR resotre. 1123 */ 1124 edev->pdev = NULL; 1125 dev->dev.archdata.edev = NULL; 1126 if (!(edev->pe->state & EEH_PE_KEEP)) 1127 eeh_rmv_from_parent_pe(edev); 1128 else 1129 edev->mode |= EEH_DEV_DISCONNECTED; 1130 1131 /* 1132 * We're removing from the PCI subsystem, that means 1133 * the PCI device driver can't support EEH or not 1134 * well. So we rely on hotplug completely to do recovery 1135 * for the specific PCI device. 1136 */ 1137 edev->mode |= EEH_DEV_NO_HANDLER; 1138 1139 eeh_addr_cache_rmv_dev(dev); 1140 eeh_sysfs_remove_device(dev); 1141 edev->mode &= ~EEH_DEV_SYSFS; 1142 } 1143 1144 /** 1145 * eeh_dev_open - Increase count of pass through devices for PE 1146 * @pdev: PCI device 1147 * 1148 * Increase count of passed through devices for the indicated 1149 * PE. In the result, the EEH errors detected on the PE won't be 1150 * reported. The PE owner will be responsible for detection 1151 * and recovery. 1152 */ 1153 int eeh_dev_open(struct pci_dev *pdev) 1154 { 1155 struct eeh_dev *edev; 1156 1157 mutex_lock(&eeh_dev_mutex); 1158 1159 /* No PCI device ? */ 1160 if (!pdev) 1161 goto out; 1162 1163 /* No EEH device or PE ? */ 1164 edev = pci_dev_to_eeh_dev(pdev); 1165 if (!edev || !edev->pe) 1166 goto out; 1167 1168 /* Increase PE's pass through count */ 1169 atomic_inc(&edev->pe->pass_dev_cnt); 1170 mutex_unlock(&eeh_dev_mutex); 1171 1172 return 0; 1173 out: 1174 mutex_unlock(&eeh_dev_mutex); 1175 return -ENODEV; 1176 } 1177 EXPORT_SYMBOL_GPL(eeh_dev_open); 1178 1179 /** 1180 * eeh_dev_release - Decrease count of pass through devices for PE 1181 * @pdev: PCI device 1182 * 1183 * Decrease count of pass through devices for the indicated PE. If 1184 * there is no passed through device in PE, the EEH errors detected 1185 * on the PE will be reported and handled as usual. 1186 */ 1187 void eeh_dev_release(struct pci_dev *pdev) 1188 { 1189 struct eeh_dev *edev; 1190 1191 mutex_lock(&eeh_dev_mutex); 1192 1193 /* No PCI device ? */ 1194 if (!pdev) 1195 goto out; 1196 1197 /* No EEH device ? */ 1198 edev = pci_dev_to_eeh_dev(pdev); 1199 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1200 goto out; 1201 1202 /* Decrease PE's pass through count */ 1203 atomic_dec(&edev->pe->pass_dev_cnt); 1204 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1205 out: 1206 mutex_unlock(&eeh_dev_mutex); 1207 } 1208 EXPORT_SYMBOL(eeh_dev_release); 1209 1210 #ifdef CONFIG_IOMMU_API 1211 1212 static int dev_has_iommu_table(struct device *dev, void *data) 1213 { 1214 struct pci_dev *pdev = to_pci_dev(dev); 1215 struct pci_dev **ppdev = data; 1216 struct iommu_table *tbl; 1217 1218 if (!dev) 1219 return 0; 1220 1221 tbl = get_iommu_table_base(dev); 1222 if (tbl && tbl->it_group) { 1223 *ppdev = pdev; 1224 return 1; 1225 } 1226 1227 return 0; 1228 } 1229 1230 /** 1231 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1232 * @group: IOMMU group 1233 * 1234 * The routine is called to convert IOMMU group to EEH PE. 1235 */ 1236 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1237 { 1238 struct pci_dev *pdev = NULL; 1239 struct eeh_dev *edev; 1240 int ret; 1241 1242 /* No IOMMU group ? */ 1243 if (!group) 1244 return NULL; 1245 1246 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1247 if (!ret || !pdev) 1248 return NULL; 1249 1250 /* No EEH device or PE ? */ 1251 edev = pci_dev_to_eeh_dev(pdev); 1252 if (!edev || !edev->pe) 1253 return NULL; 1254 1255 return edev->pe; 1256 } 1257 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1258 1259 #endif /* CONFIG_IOMMU_API */ 1260 1261 /** 1262 * eeh_pe_set_option - Set options for the indicated PE 1263 * @pe: EEH PE 1264 * @option: requested option 1265 * 1266 * The routine is called to enable or disable EEH functionality 1267 * on the indicated PE, to enable IO or DMA for the frozen PE. 1268 */ 1269 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1270 { 1271 int ret = 0; 1272 1273 /* Invalid PE ? */ 1274 if (!pe) 1275 return -ENODEV; 1276 1277 /* 1278 * EEH functionality could possibly be disabled, just 1279 * return error for the case. And the EEH functinality 1280 * isn't expected to be disabled on one specific PE. 1281 */ 1282 switch (option) { 1283 case EEH_OPT_ENABLE: 1284 if (eeh_enabled()) 1285 break; 1286 ret = -EIO; 1287 break; 1288 case EEH_OPT_DISABLE: 1289 break; 1290 case EEH_OPT_THAW_MMIO: 1291 case EEH_OPT_THAW_DMA: 1292 if (!eeh_ops || !eeh_ops->set_option) { 1293 ret = -ENOENT; 1294 break; 1295 } 1296 1297 ret = eeh_ops->set_option(pe, option); 1298 break; 1299 default: 1300 pr_debug("%s: Option %d out of range (%d, %d)\n", 1301 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1302 ret = -EINVAL; 1303 } 1304 1305 return ret; 1306 } 1307 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1308 1309 /** 1310 * eeh_pe_get_state - Retrieve PE's state 1311 * @pe: EEH PE 1312 * 1313 * Retrieve the PE's state, which includes 3 aspects: enabled 1314 * DMA, enabled IO and asserted reset. 1315 */ 1316 int eeh_pe_get_state(struct eeh_pe *pe) 1317 { 1318 int result, ret = 0; 1319 bool rst_active, dma_en, mmio_en; 1320 1321 /* Existing PE ? */ 1322 if (!pe) 1323 return -ENODEV; 1324 1325 if (!eeh_ops || !eeh_ops->get_state) 1326 return -ENOENT; 1327 1328 result = eeh_ops->get_state(pe, NULL); 1329 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1330 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1331 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1332 1333 if (rst_active) 1334 ret = EEH_PE_STATE_RESET; 1335 else if (dma_en && mmio_en) 1336 ret = EEH_PE_STATE_NORMAL; 1337 else if (!dma_en && !mmio_en) 1338 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1339 else if (!dma_en && mmio_en) 1340 ret = EEH_PE_STATE_STOPPED_DMA; 1341 else 1342 ret = EEH_PE_STATE_UNAVAIL; 1343 1344 return ret; 1345 } 1346 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1347 1348 /** 1349 * eeh_pe_reset - Issue PE reset according to specified type 1350 * @pe: EEH PE 1351 * @option: reset type 1352 * 1353 * The routine is called to reset the specified PE with the 1354 * indicated type, either fundamental reset or hot reset. 1355 * PE reset is the most important part for error recovery. 1356 */ 1357 int eeh_pe_reset(struct eeh_pe *pe, int option) 1358 { 1359 int ret = 0; 1360 1361 /* Invalid PE ? */ 1362 if (!pe) 1363 return -ENODEV; 1364 1365 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1366 return -ENOENT; 1367 1368 switch (option) { 1369 case EEH_RESET_DEACTIVATE: 1370 ret = eeh_ops->reset(pe, option); 1371 if (ret) 1372 break; 1373 1374 /* 1375 * The PE is still in frozen state and we need to clear 1376 * that. It's good to clear frozen state after deassert 1377 * to avoid messy IO access during reset, which might 1378 * cause recursive frozen PE. 1379 */ 1380 ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO); 1381 if (!ret) 1382 ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA); 1383 if (!ret) 1384 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1385 break; 1386 case EEH_RESET_HOT: 1387 case EEH_RESET_FUNDAMENTAL: 1388 ret = eeh_ops->reset(pe, option); 1389 break; 1390 default: 1391 pr_debug("%s: Unsupported option %d\n", 1392 __func__, option); 1393 ret = -EINVAL; 1394 } 1395 1396 return ret; 1397 } 1398 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1399 1400 /** 1401 * eeh_pe_configure - Configure PCI bridges after PE reset 1402 * @pe: EEH PE 1403 * 1404 * The routine is called to restore the PCI config space for 1405 * those PCI devices, especially PCI bridges affected by PE 1406 * reset issued previously. 1407 */ 1408 int eeh_pe_configure(struct eeh_pe *pe) 1409 { 1410 int ret = 0; 1411 1412 /* Invalid PE ? */ 1413 if (!pe) 1414 return -ENODEV; 1415 1416 /* Restore config space for the affected devices */ 1417 eeh_pe_restore_bars(pe); 1418 1419 return ret; 1420 } 1421 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1422 1423 static int proc_eeh_show(struct seq_file *m, void *v) 1424 { 1425 if (!eeh_enabled()) { 1426 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1427 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1428 } else { 1429 seq_printf(m, "EEH Subsystem is enabled\n"); 1430 seq_printf(m, 1431 "no device=%llu\n" 1432 "no device node=%llu\n" 1433 "no config address=%llu\n" 1434 "check not wanted=%llu\n" 1435 "eeh_total_mmio_ffs=%llu\n" 1436 "eeh_false_positives=%llu\n" 1437 "eeh_slot_resets=%llu\n", 1438 eeh_stats.no_device, 1439 eeh_stats.no_dn, 1440 eeh_stats.no_cfg_addr, 1441 eeh_stats.ignored_check, 1442 eeh_stats.total_mmio_ffs, 1443 eeh_stats.false_positives, 1444 eeh_stats.slot_resets); 1445 } 1446 1447 return 0; 1448 } 1449 1450 static int proc_eeh_open(struct inode *inode, struct file *file) 1451 { 1452 return single_open(file, proc_eeh_show, NULL); 1453 } 1454 1455 static const struct file_operations proc_eeh_operations = { 1456 .open = proc_eeh_open, 1457 .read = seq_read, 1458 .llseek = seq_lseek, 1459 .release = single_release, 1460 }; 1461 1462 #ifdef CONFIG_DEBUG_FS 1463 static int eeh_enable_dbgfs_set(void *data, u64 val) 1464 { 1465 if (val) 1466 eeh_clear_flag(EEH_FORCE_DISABLED); 1467 else 1468 eeh_add_flag(EEH_FORCE_DISABLED); 1469 1470 /* Notify the backend */ 1471 if (eeh_ops->post_init) 1472 eeh_ops->post_init(); 1473 1474 return 0; 1475 } 1476 1477 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1478 { 1479 if (eeh_enabled()) 1480 *val = 0x1ul; 1481 else 1482 *val = 0x0ul; 1483 return 0; 1484 } 1485 1486 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1487 eeh_enable_dbgfs_set, "0x%llx\n"); 1488 #endif 1489 1490 static int __init eeh_init_proc(void) 1491 { 1492 if (machine_is(pseries) || machine_is(powernv)) { 1493 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1494 #ifdef CONFIG_DEBUG_FS 1495 debugfs_create_file("eeh_enable", 0600, 1496 powerpc_debugfs_root, NULL, 1497 &eeh_enable_dbgfs_ops); 1498 #endif 1499 } 1500 1501 return 0; 1502 } 1503 __initcall(eeh_init_proc); 1504