1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 148 149 static int __init eeh_setup(char *str) 150 { 151 if (!strcmp(str, "off")) 152 eeh_add_flag(EEH_FORCE_DISABLED); 153 else if (!strcmp(str, "early_log")) 154 eeh_add_flag(EEH_EARLY_DUMP_LOG); 155 156 return 1; 157 } 158 __setup("eeh=", eeh_setup); 159 160 /* 161 * This routine captures assorted PCI configuration space data 162 * for the indicated PCI device, and puts them into a buffer 163 * for RTAS error logging. 164 */ 165 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 166 { 167 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 168 u32 cfg; 169 int cap, i; 170 int n = 0, l = 0; 171 char buffer[128]; 172 173 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 174 edev->phb->global_number, pdn->busno, 175 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 176 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 177 edev->phb->global_number, pdn->busno, 178 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 179 180 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 181 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 182 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 183 184 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 185 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 186 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 187 188 /* Gather bridge-specific registers */ 189 if (edev->mode & EEH_DEV_BRIDGE) { 190 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 191 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 192 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 193 194 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 195 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 196 pr_warn("EEH: Bridge control: %04x\n", cfg); 197 } 198 199 /* Dump out the PCI-X command and status regs */ 200 cap = edev->pcix_cap; 201 if (cap) { 202 eeh_ops->read_config(pdn, cap, 4, &cfg); 203 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 204 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 205 206 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 207 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 208 pr_warn("EEH: PCI-X status: %08x\n", cfg); 209 } 210 211 /* If PCI-E capable, dump PCI-E cap 10 */ 212 cap = edev->pcie_cap; 213 if (cap) { 214 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 215 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 216 217 for (i=0; i<=8; i++) { 218 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 219 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 220 221 if ((i % 4) == 0) { 222 if (i != 0) 223 pr_warn("%s\n", buffer); 224 225 l = scnprintf(buffer, sizeof(buffer), 226 "EEH: PCI-E %02x: %08x ", 227 4*i, cfg); 228 } else { 229 l += scnprintf(buffer+l, sizeof(buffer)-l, 230 "%08x ", cfg); 231 } 232 233 } 234 235 pr_warn("%s\n", buffer); 236 } 237 238 /* If AER capable, dump it */ 239 cap = edev->aer_cap; 240 if (cap) { 241 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 242 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 243 244 for (i=0; i<=13; i++) { 245 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 246 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 247 248 if ((i % 4) == 0) { 249 if (i != 0) 250 pr_warn("%s\n", buffer); 251 252 l = scnprintf(buffer, sizeof(buffer), 253 "EEH: PCI-E AER %02x: %08x ", 254 4*i, cfg); 255 } else { 256 l += scnprintf(buffer+l, sizeof(buffer)-l, 257 "%08x ", cfg); 258 } 259 } 260 261 pr_warn("%s\n", buffer); 262 } 263 264 return n; 265 } 266 267 static void *eeh_dump_pe_log(void *data, void *flag) 268 { 269 struct eeh_pe *pe = data; 270 struct eeh_dev *edev, *tmp; 271 size_t *plen = flag; 272 273 /* If the PE's config space is blocked, 0xFF's will be 274 * returned. It's pointless to collect the log in this 275 * case. 276 */ 277 if (pe->state & EEH_PE_CFG_BLOCKED) 278 return NULL; 279 280 eeh_pe_for_each_dev(pe, edev, tmp) 281 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 282 EEH_PCI_REGS_LOG_LEN - *plen); 283 284 return NULL; 285 } 286 287 /** 288 * eeh_slot_error_detail - Generate combined log including driver log and error log 289 * @pe: EEH PE 290 * @severity: temporary or permanent error log 291 * 292 * This routine should be called to generate the combined log, which 293 * is comprised of driver log and error log. The driver log is figured 294 * out from the config space of the corresponding PCI device, while 295 * the error log is fetched through platform dependent function call. 296 */ 297 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 298 { 299 size_t loglen = 0; 300 301 /* 302 * When the PHB is fenced or dead, it's pointless to collect 303 * the data from PCI config space because it should return 304 * 0xFF's. For ER, we still retrieve the data from the PCI 305 * config space. 306 * 307 * For pHyp, we have to enable IO for log retrieval. Otherwise, 308 * 0xFF's is always returned from PCI config space. 309 */ 310 if (!(pe->type & EEH_PE_PHB)) { 311 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 312 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 313 eeh_ops->configure_bridge(pe); 314 eeh_pe_restore_bars(pe); 315 316 pci_regs_buf[0] = 0; 317 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 318 } 319 320 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 321 } 322 323 /** 324 * eeh_token_to_phys - Convert EEH address token to phys address 325 * @token: I/O token, should be address in the form 0xA.... 326 * 327 * This routine should be called to convert virtual I/O address 328 * to physical one. 329 */ 330 static inline unsigned long eeh_token_to_phys(unsigned long token) 331 { 332 pte_t *ptep; 333 unsigned long pa; 334 int hugepage_shift; 335 336 /* 337 * We won't find hugepages here(this is iomem). Hence we are not 338 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 339 * page table free, because of init_mm. 340 */ 341 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 342 if (!ptep) 343 return token; 344 WARN_ON(hugepage_shift); 345 pa = pte_pfn(*ptep) << PAGE_SHIFT; 346 347 return pa | (token & (PAGE_SIZE-1)); 348 } 349 350 /* 351 * On PowerNV platform, we might already have fenced PHB there. 352 * For that case, it's meaningless to recover frozen PE. Intead, 353 * We have to handle fenced PHB firstly. 354 */ 355 static int eeh_phb_check_failure(struct eeh_pe *pe) 356 { 357 struct eeh_pe *phb_pe; 358 unsigned long flags; 359 int ret; 360 361 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 362 return -EPERM; 363 364 /* Find the PHB PE */ 365 phb_pe = eeh_phb_pe_get(pe->phb); 366 if (!phb_pe) { 367 pr_warn("%s Can't find PE for PHB#%d\n", 368 __func__, pe->phb->global_number); 369 return -EEXIST; 370 } 371 372 /* If the PHB has been in problematic state */ 373 eeh_serialize_lock(&flags); 374 if (phb_pe->state & EEH_PE_ISOLATED) { 375 ret = 0; 376 goto out; 377 } 378 379 /* Check PHB state */ 380 ret = eeh_ops->get_state(phb_pe, NULL); 381 if ((ret < 0) || 382 (ret == EEH_STATE_NOT_SUPPORT) || 383 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 384 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 385 ret = 0; 386 goto out; 387 } 388 389 /* Isolate the PHB and send event */ 390 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 391 eeh_serialize_unlock(flags); 392 393 pr_err("EEH: PHB#%x failure detected, location: %s\n", 394 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 395 dump_stack(); 396 eeh_send_failure_event(phb_pe); 397 398 return 1; 399 out: 400 eeh_serialize_unlock(flags); 401 return ret; 402 } 403 404 /** 405 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 406 * @edev: eeh device 407 * 408 * Check for an EEH failure for the given device node. Call this 409 * routine if the result of a read was all 0xff's and you want to 410 * find out if this is due to an EEH slot freeze. This routine 411 * will query firmware for the EEH status. 412 * 413 * Returns 0 if there has not been an EEH error; otherwise returns 414 * a non-zero value and queues up a slot isolation event notification. 415 * 416 * It is safe to call this routine in an interrupt context. 417 */ 418 int eeh_dev_check_failure(struct eeh_dev *edev) 419 { 420 int ret; 421 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 422 unsigned long flags; 423 struct pci_dn *pdn; 424 struct pci_dev *dev; 425 struct eeh_pe *pe, *parent_pe, *phb_pe; 426 int rc = 0; 427 const char *location = NULL; 428 429 eeh_stats.total_mmio_ffs++; 430 431 if (!eeh_enabled()) 432 return 0; 433 434 if (!edev) { 435 eeh_stats.no_dn++; 436 return 0; 437 } 438 dev = eeh_dev_to_pci_dev(edev); 439 pe = eeh_dev_to_pe(edev); 440 441 /* Access to IO BARs might get this far and still not want checking. */ 442 if (!pe) { 443 eeh_stats.ignored_check++; 444 pr_debug("EEH: Ignored check for %s\n", 445 eeh_pci_name(dev)); 446 return 0; 447 } 448 449 if (!pe->addr && !pe->config_addr) { 450 eeh_stats.no_cfg_addr++; 451 return 0; 452 } 453 454 /* 455 * On PowerNV platform, we might already have fenced PHB 456 * there and we need take care of that firstly. 457 */ 458 ret = eeh_phb_check_failure(pe); 459 if (ret > 0) 460 return ret; 461 462 /* 463 * If the PE isn't owned by us, we shouldn't check the 464 * state. Instead, let the owner handle it if the PE has 465 * been frozen. 466 */ 467 if (eeh_pe_passed(pe)) 468 return 0; 469 470 /* If we already have a pending isolation event for this 471 * slot, we know it's bad already, we don't need to check. 472 * Do this checking under a lock; as multiple PCI devices 473 * in one slot might report errors simultaneously, and we 474 * only want one error recovery routine running. 475 */ 476 eeh_serialize_lock(&flags); 477 rc = 1; 478 if (pe->state & EEH_PE_ISOLATED) { 479 pe->check_count++; 480 if (pe->check_count % EEH_MAX_FAILS == 0) { 481 pdn = eeh_dev_to_pdn(edev); 482 if (pdn->node) 483 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 484 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 485 "location=%s driver=%s pci addr=%s\n", 486 pe->check_count, 487 location ? location : "unknown", 488 eeh_driver_name(dev), eeh_pci_name(dev)); 489 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 490 eeh_driver_name(dev)); 491 dump_stack(); 492 } 493 goto dn_unlock; 494 } 495 496 /* 497 * Now test for an EEH failure. This is VERY expensive. 498 * Note that the eeh_config_addr may be a parent device 499 * in the case of a device behind a bridge, or it may be 500 * function zero of a multi-function device. 501 * In any case they must share a common PHB. 502 */ 503 ret = eeh_ops->get_state(pe, NULL); 504 505 /* Note that config-io to empty slots may fail; 506 * they are empty when they don't have children. 507 * We will punt with the following conditions: Failure to get 508 * PE's state, EEH not support and Permanently unavailable 509 * state, PE is in good state. 510 */ 511 if ((ret < 0) || 512 (ret == EEH_STATE_NOT_SUPPORT) || 513 ((ret & active_flags) == active_flags)) { 514 eeh_stats.false_positives++; 515 pe->false_positives++; 516 rc = 0; 517 goto dn_unlock; 518 } 519 520 /* 521 * It should be corner case that the parent PE has been 522 * put into frozen state as well. We should take care 523 * that at first. 524 */ 525 parent_pe = pe->parent; 526 while (parent_pe) { 527 /* Hit the ceiling ? */ 528 if (parent_pe->type & EEH_PE_PHB) 529 break; 530 531 /* Frozen parent PE ? */ 532 ret = eeh_ops->get_state(parent_pe, NULL); 533 if (ret > 0 && 534 (ret & active_flags) != active_flags) 535 pe = parent_pe; 536 537 /* Next parent level */ 538 parent_pe = parent_pe->parent; 539 } 540 541 eeh_stats.slot_resets++; 542 543 /* Avoid repeated reports of this failure, including problems 544 * with other functions on this device, and functions under 545 * bridges. 546 */ 547 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 548 eeh_serialize_unlock(flags); 549 550 /* Most EEH events are due to device driver bugs. Having 551 * a stack trace will help the device-driver authors figure 552 * out what happened. So print that out. 553 */ 554 phb_pe = eeh_phb_pe_get(pe->phb); 555 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 556 pe->phb->global_number, pe->addr); 557 pr_err("EEH: PE location: %s, PHB location: %s\n", 558 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 559 dump_stack(); 560 561 eeh_send_failure_event(pe); 562 563 return 1; 564 565 dn_unlock: 566 eeh_serialize_unlock(flags); 567 return rc; 568 } 569 570 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 571 572 /** 573 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 574 * @token: I/O address 575 * 576 * Check for an EEH failure at the given I/O address. Call this 577 * routine if the result of a read was all 0xff's and you want to 578 * find out if this is due to an EEH slot freeze event. This routine 579 * will query firmware for the EEH status. 580 * 581 * Note this routine is safe to call in an interrupt context. 582 */ 583 int eeh_check_failure(const volatile void __iomem *token) 584 { 585 unsigned long addr; 586 struct eeh_dev *edev; 587 588 /* Finding the phys addr + pci device; this is pretty quick. */ 589 addr = eeh_token_to_phys((unsigned long __force) token); 590 edev = eeh_addr_cache_get_dev(addr); 591 if (!edev) { 592 eeh_stats.no_device++; 593 return 0; 594 } 595 596 return eeh_dev_check_failure(edev); 597 } 598 EXPORT_SYMBOL(eeh_check_failure); 599 600 601 /** 602 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 603 * @pe: EEH PE 604 * 605 * This routine should be called to reenable frozen MMIO or DMA 606 * so that it would work correctly again. It's useful while doing 607 * recovery or log collection on the indicated device. 608 */ 609 int eeh_pci_enable(struct eeh_pe *pe, int function) 610 { 611 int active_flag, rc; 612 613 /* 614 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 615 * Also, it's pointless to enable them on unfrozen PE. So 616 * we have to check before enabling IO or DMA. 617 */ 618 switch (function) { 619 case EEH_OPT_THAW_MMIO: 620 active_flag = EEH_STATE_MMIO_ACTIVE; 621 break; 622 case EEH_OPT_THAW_DMA: 623 active_flag = EEH_STATE_DMA_ACTIVE; 624 break; 625 case EEH_OPT_DISABLE: 626 case EEH_OPT_ENABLE: 627 case EEH_OPT_FREEZE_PE: 628 active_flag = 0; 629 break; 630 default: 631 pr_warn("%s: Invalid function %d\n", 632 __func__, function); 633 return -EINVAL; 634 } 635 636 /* 637 * Check if IO or DMA has been enabled before 638 * enabling them. 639 */ 640 if (active_flag) { 641 rc = eeh_ops->get_state(pe, NULL); 642 if (rc < 0) 643 return rc; 644 645 /* Needn't enable it at all */ 646 if (rc == EEH_STATE_NOT_SUPPORT) 647 return 0; 648 649 /* It's already enabled */ 650 if (rc & active_flag) 651 return 0; 652 } 653 654 655 /* Issue the request */ 656 rc = eeh_ops->set_option(pe, function); 657 if (rc) 658 pr_warn("%s: Unexpected state change %d on " 659 "PHB#%d-PE#%x, err=%d\n", 660 __func__, function, pe->phb->global_number, 661 pe->addr, rc); 662 663 /* Check if the request is finished successfully */ 664 if (active_flag) { 665 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 666 if (rc <= 0) 667 return rc; 668 669 if (rc & active_flag) 670 return 0; 671 672 return -EIO; 673 } 674 675 return rc; 676 } 677 678 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 679 { 680 struct eeh_dev *edev = data; 681 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 682 struct pci_dev *dev = userdata; 683 684 /* 685 * The caller should have disabled and saved the 686 * state for the specified device 687 */ 688 if (!pdev || pdev == dev) 689 return NULL; 690 691 /* Ensure we have D0 power state */ 692 pci_set_power_state(pdev, PCI_D0); 693 694 /* Save device state */ 695 pci_save_state(pdev); 696 697 /* 698 * Disable device to avoid any DMA traffic and 699 * interrupt from the device 700 */ 701 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 702 703 return NULL; 704 } 705 706 static void *eeh_restore_dev_state(void *data, void *userdata) 707 { 708 struct eeh_dev *edev = data; 709 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 710 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 711 struct pci_dev *dev = userdata; 712 713 if (!pdev) 714 return NULL; 715 716 /* Apply customization from firmware */ 717 if (pdn && eeh_ops->restore_config) 718 eeh_ops->restore_config(pdn); 719 720 /* The caller should restore state for the specified device */ 721 if (pdev != dev) 722 pci_save_state(pdev); 723 724 return NULL; 725 } 726 727 /** 728 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 729 * @dev: pci device struct 730 * @state: reset state to enter 731 * 732 * Return value: 733 * 0 if success 734 */ 735 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 736 { 737 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 738 struct eeh_pe *pe = eeh_dev_to_pe(edev); 739 740 if (!pe) { 741 pr_err("%s: No PE found on PCI device %s\n", 742 __func__, pci_name(dev)); 743 return -EINVAL; 744 } 745 746 switch (state) { 747 case pcie_deassert_reset: 748 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 749 eeh_unfreeze_pe(pe, false); 750 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 751 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 752 break; 753 case pcie_hot_reset: 754 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 755 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 756 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 757 eeh_ops->reset(pe, EEH_RESET_HOT); 758 break; 759 case pcie_warm_reset: 760 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 761 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 762 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 763 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 764 break; 765 default: 766 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 767 return -EINVAL; 768 }; 769 770 return 0; 771 } 772 773 /** 774 * eeh_set_pe_freset - Check the required reset for the indicated device 775 * @data: EEH device 776 * @flag: return value 777 * 778 * Each device might have its preferred reset type: fundamental or 779 * hot reset. The routine is used to collected the information for 780 * the indicated device and its children so that the bunch of the 781 * devices could be reset properly. 782 */ 783 static void *eeh_set_dev_freset(void *data, void *flag) 784 { 785 struct pci_dev *dev; 786 unsigned int *freset = (unsigned int *)flag; 787 struct eeh_dev *edev = (struct eeh_dev *)data; 788 789 dev = eeh_dev_to_pci_dev(edev); 790 if (dev) 791 *freset |= dev->needs_freset; 792 793 return NULL; 794 } 795 796 /** 797 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 798 * @pe: EEH PE 799 * 800 * Assert the PCI #RST line for 1/4 second. 801 */ 802 static void eeh_reset_pe_once(struct eeh_pe *pe) 803 { 804 unsigned int freset = 0; 805 806 /* Determine type of EEH reset required for 807 * Partitionable Endpoint, a hot-reset (1) 808 * or a fundamental reset (3). 809 * A fundamental reset required by any device under 810 * Partitionable Endpoint trumps hot-reset. 811 */ 812 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 813 814 if (freset) 815 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 816 else 817 eeh_ops->reset(pe, EEH_RESET_HOT); 818 819 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 820 } 821 822 /** 823 * eeh_reset_pe - Reset the indicated PE 824 * @pe: EEH PE 825 * 826 * This routine should be called to reset indicated device, including 827 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 828 * might be involved as well. 829 */ 830 int eeh_reset_pe(struct eeh_pe *pe) 831 { 832 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 833 int i, state, ret; 834 835 /* Mark as reset and block config space */ 836 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 837 838 /* Take three shots at resetting the bus */ 839 for (i = 0; i < 3; i++) { 840 eeh_reset_pe_once(pe); 841 842 /* 843 * EEH_PE_ISOLATED is expected to be removed after 844 * BAR restore. 845 */ 846 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 847 if ((state & flags) == flags) { 848 ret = 0; 849 goto out; 850 } 851 852 if (state < 0) { 853 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 854 __func__, pe->phb->global_number, pe->addr); 855 ret = -ENOTRECOVERABLE; 856 goto out; 857 } 858 859 /* We might run out of credits */ 860 ret = -EIO; 861 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 862 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 863 } 864 865 out: 866 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 867 return ret; 868 } 869 870 /** 871 * eeh_save_bars - Save device bars 872 * @edev: PCI device associated EEH device 873 * 874 * Save the values of the device bars. Unlike the restore 875 * routine, this routine is *not* recursive. This is because 876 * PCI devices are added individually; but, for the restore, 877 * an entire slot is reset at a time. 878 */ 879 void eeh_save_bars(struct eeh_dev *edev) 880 { 881 struct pci_dn *pdn; 882 int i; 883 884 pdn = eeh_dev_to_pdn(edev); 885 if (!pdn) 886 return; 887 888 for (i = 0; i < 16; i++) 889 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 890 891 /* 892 * For PCI bridges including root port, we need enable bus 893 * master explicitly. Otherwise, it can't fetch IODA table 894 * entries correctly. So we cache the bit in advance so that 895 * we can restore it after reset, either PHB range or PE range. 896 */ 897 if (edev->mode & EEH_DEV_BRIDGE) 898 edev->config_space[1] |= PCI_COMMAND_MASTER; 899 } 900 901 /** 902 * eeh_ops_register - Register platform dependent EEH operations 903 * @ops: platform dependent EEH operations 904 * 905 * Register the platform dependent EEH operation callback 906 * functions. The platform should call this function before 907 * any other EEH operations. 908 */ 909 int __init eeh_ops_register(struct eeh_ops *ops) 910 { 911 if (!ops->name) { 912 pr_warn("%s: Invalid EEH ops name for %p\n", 913 __func__, ops); 914 return -EINVAL; 915 } 916 917 if (eeh_ops && eeh_ops != ops) { 918 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 919 __func__, eeh_ops->name, ops->name); 920 return -EEXIST; 921 } 922 923 eeh_ops = ops; 924 925 return 0; 926 } 927 928 /** 929 * eeh_ops_unregister - Unreigster platform dependent EEH operations 930 * @name: name of EEH platform operations 931 * 932 * Unregister the platform dependent EEH operation callback 933 * functions. 934 */ 935 int __exit eeh_ops_unregister(const char *name) 936 { 937 if (!name || !strlen(name)) { 938 pr_warn("%s: Invalid EEH ops name\n", 939 __func__); 940 return -EINVAL; 941 } 942 943 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 944 eeh_ops = NULL; 945 return 0; 946 } 947 948 return -EEXIST; 949 } 950 951 static int eeh_reboot_notifier(struct notifier_block *nb, 952 unsigned long action, void *unused) 953 { 954 eeh_clear_flag(EEH_ENABLED); 955 return NOTIFY_DONE; 956 } 957 958 static struct notifier_block eeh_reboot_nb = { 959 .notifier_call = eeh_reboot_notifier, 960 }; 961 962 /** 963 * eeh_init - EEH initialization 964 * 965 * Initialize EEH by trying to enable it for all of the adapters in the system. 966 * As a side effect we can determine here if eeh is supported at all. 967 * Note that we leave EEH on so failed config cycles won't cause a machine 968 * check. If a user turns off EEH for a particular adapter they are really 969 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 970 * grant access to a slot if EEH isn't enabled, and so we always enable 971 * EEH for all slots/all devices. 972 * 973 * The eeh-force-off option disables EEH checking globally, for all slots. 974 * Even if force-off is set, the EEH hardware is still enabled, so that 975 * newer systems can boot. 976 */ 977 int eeh_init(void) 978 { 979 struct pci_controller *hose, *tmp; 980 struct pci_dn *pdn; 981 static int cnt = 0; 982 int ret = 0; 983 984 /* 985 * We have to delay the initialization on PowerNV after 986 * the PCI hierarchy tree has been built because the PEs 987 * are figured out based on PCI devices instead of device 988 * tree nodes 989 */ 990 if (machine_is(powernv) && cnt++ <= 0) 991 return ret; 992 993 /* Register reboot notifier */ 994 ret = register_reboot_notifier(&eeh_reboot_nb); 995 if (ret) { 996 pr_warn("%s: Failed to register notifier (%d)\n", 997 __func__, ret); 998 return ret; 999 } 1000 1001 /* call platform initialization function */ 1002 if (!eeh_ops) { 1003 pr_warn("%s: Platform EEH operation not found\n", 1004 __func__); 1005 return -EEXIST; 1006 } else if ((ret = eeh_ops->init())) 1007 return ret; 1008 1009 /* Initialize EEH event */ 1010 ret = eeh_event_init(); 1011 if (ret) 1012 return ret; 1013 1014 /* Enable EEH for all adapters */ 1015 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1016 pdn = hose->pci_data; 1017 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1018 } 1019 1020 /* 1021 * Call platform post-initialization. Actually, It's good chance 1022 * to inform platform that EEH is ready to supply service if the 1023 * I/O cache stuff has been built up. 1024 */ 1025 if (eeh_ops->post_init) { 1026 ret = eeh_ops->post_init(); 1027 if (ret) 1028 return ret; 1029 } 1030 1031 if (eeh_enabled()) 1032 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1033 else 1034 pr_warn("EEH: No capable adapters found\n"); 1035 1036 return ret; 1037 } 1038 1039 core_initcall_sync(eeh_init); 1040 1041 /** 1042 * eeh_add_device_early - Enable EEH for the indicated device node 1043 * @pdn: PCI device node for which to set up EEH 1044 * 1045 * This routine must be used to perform EEH initialization for PCI 1046 * devices that were added after system boot (e.g. hotplug, dlpar). 1047 * This routine must be called before any i/o is performed to the 1048 * adapter (inluding any config-space i/o). 1049 * Whether this actually enables EEH or not for this device depends 1050 * on the CEC architecture, type of the device, on earlier boot 1051 * command-line arguments & etc. 1052 */ 1053 void eeh_add_device_early(struct pci_dn *pdn) 1054 { 1055 struct pci_controller *phb; 1056 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1057 1058 if (!edev || !eeh_enabled()) 1059 return; 1060 1061 /* USB Bus children of PCI devices will not have BUID's */ 1062 phb = edev->phb; 1063 if (NULL == phb || 1064 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1065 return; 1066 1067 eeh_ops->probe(pdn, NULL); 1068 } 1069 1070 /** 1071 * eeh_add_device_tree_early - Enable EEH for the indicated device 1072 * @pdn: PCI device node 1073 * 1074 * This routine must be used to perform EEH initialization for the 1075 * indicated PCI device that was added after system boot (e.g. 1076 * hotplug, dlpar). 1077 */ 1078 void eeh_add_device_tree_early(struct pci_dn *pdn) 1079 { 1080 struct pci_dn *n; 1081 1082 if (!pdn) 1083 return; 1084 1085 list_for_each_entry(n, &pdn->child_list, list) 1086 eeh_add_device_tree_early(n); 1087 eeh_add_device_early(pdn); 1088 } 1089 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1090 1091 /** 1092 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1093 * @dev: pci device for which to set up EEH 1094 * 1095 * This routine must be used to complete EEH initialization for PCI 1096 * devices that were added after system boot (e.g. hotplug, dlpar). 1097 */ 1098 void eeh_add_device_late(struct pci_dev *dev) 1099 { 1100 struct pci_dn *pdn; 1101 struct eeh_dev *edev; 1102 1103 if (!dev || !eeh_enabled()) 1104 return; 1105 1106 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1107 1108 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1109 edev = pdn_to_eeh_dev(pdn); 1110 if (edev->pdev == dev) { 1111 pr_debug("EEH: Already referenced !\n"); 1112 return; 1113 } 1114 1115 /* 1116 * The EEH cache might not be removed correctly because of 1117 * unbalanced kref to the device during unplug time, which 1118 * relies on pcibios_release_device(). So we have to remove 1119 * that here explicitly. 1120 */ 1121 if (edev->pdev) { 1122 eeh_rmv_from_parent_pe(edev); 1123 eeh_addr_cache_rmv_dev(edev->pdev); 1124 eeh_sysfs_remove_device(edev->pdev); 1125 edev->mode &= ~EEH_DEV_SYSFS; 1126 1127 /* 1128 * We definitely should have the PCI device removed 1129 * though it wasn't correctly. So we needn't call 1130 * into error handler afterwards. 1131 */ 1132 edev->mode |= EEH_DEV_NO_HANDLER; 1133 1134 edev->pdev = NULL; 1135 dev->dev.archdata.edev = NULL; 1136 } 1137 1138 edev->pdev = dev; 1139 dev->dev.archdata.edev = edev; 1140 1141 eeh_addr_cache_insert_dev(dev); 1142 } 1143 1144 /** 1145 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1146 * @bus: PCI bus 1147 * 1148 * This routine must be used to perform EEH initialization for PCI 1149 * devices which are attached to the indicated PCI bus. The PCI bus 1150 * is added after system boot through hotplug or dlpar. 1151 */ 1152 void eeh_add_device_tree_late(struct pci_bus *bus) 1153 { 1154 struct pci_dev *dev; 1155 1156 list_for_each_entry(dev, &bus->devices, bus_list) { 1157 eeh_add_device_late(dev); 1158 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1159 struct pci_bus *subbus = dev->subordinate; 1160 if (subbus) 1161 eeh_add_device_tree_late(subbus); 1162 } 1163 } 1164 } 1165 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1166 1167 /** 1168 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1169 * @bus: PCI bus 1170 * 1171 * This routine must be used to add EEH sysfs files for PCI 1172 * devices which are attached to the indicated PCI bus. The PCI bus 1173 * is added after system boot through hotplug or dlpar. 1174 */ 1175 void eeh_add_sysfs_files(struct pci_bus *bus) 1176 { 1177 struct pci_dev *dev; 1178 1179 list_for_each_entry(dev, &bus->devices, bus_list) { 1180 eeh_sysfs_add_device(dev); 1181 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1182 struct pci_bus *subbus = dev->subordinate; 1183 if (subbus) 1184 eeh_add_sysfs_files(subbus); 1185 } 1186 } 1187 } 1188 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1189 1190 /** 1191 * eeh_remove_device - Undo EEH setup for the indicated pci device 1192 * @dev: pci device to be removed 1193 * 1194 * This routine should be called when a device is removed from 1195 * a running system (e.g. by hotplug or dlpar). It unregisters 1196 * the PCI device from the EEH subsystem. I/O errors affecting 1197 * this device will no longer be detected after this call; thus, 1198 * i/o errors affecting this slot may leave this device unusable. 1199 */ 1200 void eeh_remove_device(struct pci_dev *dev) 1201 { 1202 struct eeh_dev *edev; 1203 1204 if (!dev || !eeh_enabled()) 1205 return; 1206 edev = pci_dev_to_eeh_dev(dev); 1207 1208 /* Unregister the device with the EEH/PCI address search system */ 1209 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1210 1211 if (!edev || !edev->pdev || !edev->pe) { 1212 pr_debug("EEH: Not referenced !\n"); 1213 return; 1214 } 1215 1216 /* 1217 * During the hotplug for EEH error recovery, we need the EEH 1218 * device attached to the parent PE in order for BAR restore 1219 * a bit later. So we keep it for BAR restore and remove it 1220 * from the parent PE during the BAR resotre. 1221 */ 1222 edev->pdev = NULL; 1223 dev->dev.archdata.edev = NULL; 1224 if (!(edev->pe->state & EEH_PE_KEEP)) 1225 eeh_rmv_from_parent_pe(edev); 1226 else 1227 edev->mode |= EEH_DEV_DISCONNECTED; 1228 1229 /* 1230 * We're removing from the PCI subsystem, that means 1231 * the PCI device driver can't support EEH or not 1232 * well. So we rely on hotplug completely to do recovery 1233 * for the specific PCI device. 1234 */ 1235 edev->mode |= EEH_DEV_NO_HANDLER; 1236 1237 eeh_addr_cache_rmv_dev(dev); 1238 eeh_sysfs_remove_device(dev); 1239 edev->mode &= ~EEH_DEV_SYSFS; 1240 } 1241 1242 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1243 { 1244 int ret; 1245 1246 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1247 if (ret) { 1248 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1249 __func__, ret, pe->phb->global_number, pe->addr); 1250 return ret; 1251 } 1252 1253 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1254 if (ret) { 1255 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1256 __func__, ret, pe->phb->global_number, pe->addr); 1257 return ret; 1258 } 1259 1260 /* Clear software isolated state */ 1261 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1262 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1263 1264 return ret; 1265 } 1266 1267 1268 static struct pci_device_id eeh_reset_ids[] = { 1269 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1270 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1271 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1272 { 0 } 1273 }; 1274 1275 static int eeh_pe_change_owner(struct eeh_pe *pe) 1276 { 1277 struct eeh_dev *edev, *tmp; 1278 struct pci_dev *pdev; 1279 struct pci_device_id *id; 1280 int flags, ret; 1281 1282 /* Check PE state */ 1283 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1284 ret = eeh_ops->get_state(pe, NULL); 1285 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1286 return 0; 1287 1288 /* Unfrozen PE, nothing to do */ 1289 if ((ret & flags) == flags) 1290 return 0; 1291 1292 /* Frozen PE, check if it needs PE level reset */ 1293 eeh_pe_for_each_dev(pe, edev, tmp) { 1294 pdev = eeh_dev_to_pci_dev(edev); 1295 if (!pdev) 1296 continue; 1297 1298 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1299 if (id->vendor != PCI_ANY_ID && 1300 id->vendor != pdev->vendor) 1301 continue; 1302 if (id->device != PCI_ANY_ID && 1303 id->device != pdev->device) 1304 continue; 1305 if (id->subvendor != PCI_ANY_ID && 1306 id->subvendor != pdev->subsystem_vendor) 1307 continue; 1308 if (id->subdevice != PCI_ANY_ID && 1309 id->subdevice != pdev->subsystem_device) 1310 continue; 1311 1312 goto reset; 1313 } 1314 } 1315 1316 return eeh_unfreeze_pe(pe, true); 1317 1318 reset: 1319 return eeh_pe_reset_and_recover(pe); 1320 } 1321 1322 /** 1323 * eeh_dev_open - Increase count of pass through devices for PE 1324 * @pdev: PCI device 1325 * 1326 * Increase count of passed through devices for the indicated 1327 * PE. In the result, the EEH errors detected on the PE won't be 1328 * reported. The PE owner will be responsible for detection 1329 * and recovery. 1330 */ 1331 int eeh_dev_open(struct pci_dev *pdev) 1332 { 1333 struct eeh_dev *edev; 1334 int ret = -ENODEV; 1335 1336 mutex_lock(&eeh_dev_mutex); 1337 1338 /* No PCI device ? */ 1339 if (!pdev) 1340 goto out; 1341 1342 /* No EEH device or PE ? */ 1343 edev = pci_dev_to_eeh_dev(pdev); 1344 if (!edev || !edev->pe) 1345 goto out; 1346 1347 /* 1348 * The PE might have been put into frozen state, but we 1349 * didn't detect that yet. The passed through PCI devices 1350 * in frozen PE won't work properly. Clear the frozen state 1351 * in advance. 1352 */ 1353 ret = eeh_pe_change_owner(edev->pe); 1354 if (ret) 1355 goto out; 1356 1357 /* Increase PE's pass through count */ 1358 atomic_inc(&edev->pe->pass_dev_cnt); 1359 mutex_unlock(&eeh_dev_mutex); 1360 1361 return 0; 1362 out: 1363 mutex_unlock(&eeh_dev_mutex); 1364 return ret; 1365 } 1366 EXPORT_SYMBOL_GPL(eeh_dev_open); 1367 1368 /** 1369 * eeh_dev_release - Decrease count of pass through devices for PE 1370 * @pdev: PCI device 1371 * 1372 * Decrease count of pass through devices for the indicated PE. If 1373 * there is no passed through device in PE, the EEH errors detected 1374 * on the PE will be reported and handled as usual. 1375 */ 1376 void eeh_dev_release(struct pci_dev *pdev) 1377 { 1378 struct eeh_dev *edev; 1379 1380 mutex_lock(&eeh_dev_mutex); 1381 1382 /* No PCI device ? */ 1383 if (!pdev) 1384 goto out; 1385 1386 /* No EEH device ? */ 1387 edev = pci_dev_to_eeh_dev(pdev); 1388 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1389 goto out; 1390 1391 /* Decrease PE's pass through count */ 1392 atomic_dec(&edev->pe->pass_dev_cnt); 1393 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1394 eeh_pe_change_owner(edev->pe); 1395 out: 1396 mutex_unlock(&eeh_dev_mutex); 1397 } 1398 EXPORT_SYMBOL(eeh_dev_release); 1399 1400 #ifdef CONFIG_IOMMU_API 1401 1402 static int dev_has_iommu_table(struct device *dev, void *data) 1403 { 1404 struct pci_dev *pdev = to_pci_dev(dev); 1405 struct pci_dev **ppdev = data; 1406 struct iommu_table *tbl; 1407 1408 if (!dev) 1409 return 0; 1410 1411 tbl = get_iommu_table_base(dev); 1412 if (tbl && tbl->it_group) { 1413 *ppdev = pdev; 1414 return 1; 1415 } 1416 1417 return 0; 1418 } 1419 1420 /** 1421 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1422 * @group: IOMMU group 1423 * 1424 * The routine is called to convert IOMMU group to EEH PE. 1425 */ 1426 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1427 { 1428 struct pci_dev *pdev = NULL; 1429 struct eeh_dev *edev; 1430 int ret; 1431 1432 /* No IOMMU group ? */ 1433 if (!group) 1434 return NULL; 1435 1436 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1437 if (!ret || !pdev) 1438 return NULL; 1439 1440 /* No EEH device or PE ? */ 1441 edev = pci_dev_to_eeh_dev(pdev); 1442 if (!edev || !edev->pe) 1443 return NULL; 1444 1445 return edev->pe; 1446 } 1447 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1448 1449 #endif /* CONFIG_IOMMU_API */ 1450 1451 /** 1452 * eeh_pe_set_option - Set options for the indicated PE 1453 * @pe: EEH PE 1454 * @option: requested option 1455 * 1456 * The routine is called to enable or disable EEH functionality 1457 * on the indicated PE, to enable IO or DMA for the frozen PE. 1458 */ 1459 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1460 { 1461 int ret = 0; 1462 1463 /* Invalid PE ? */ 1464 if (!pe) 1465 return -ENODEV; 1466 1467 /* 1468 * EEH functionality could possibly be disabled, just 1469 * return error for the case. And the EEH functinality 1470 * isn't expected to be disabled on one specific PE. 1471 */ 1472 switch (option) { 1473 case EEH_OPT_ENABLE: 1474 if (eeh_enabled()) { 1475 ret = eeh_pe_change_owner(pe); 1476 break; 1477 } 1478 ret = -EIO; 1479 break; 1480 case EEH_OPT_DISABLE: 1481 break; 1482 case EEH_OPT_THAW_MMIO: 1483 case EEH_OPT_THAW_DMA: 1484 if (!eeh_ops || !eeh_ops->set_option) { 1485 ret = -ENOENT; 1486 break; 1487 } 1488 1489 ret = eeh_pci_enable(pe, option); 1490 break; 1491 default: 1492 pr_debug("%s: Option %d out of range (%d, %d)\n", 1493 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1494 ret = -EINVAL; 1495 } 1496 1497 return ret; 1498 } 1499 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1500 1501 /** 1502 * eeh_pe_get_state - Retrieve PE's state 1503 * @pe: EEH PE 1504 * 1505 * Retrieve the PE's state, which includes 3 aspects: enabled 1506 * DMA, enabled IO and asserted reset. 1507 */ 1508 int eeh_pe_get_state(struct eeh_pe *pe) 1509 { 1510 int result, ret = 0; 1511 bool rst_active, dma_en, mmio_en; 1512 1513 /* Existing PE ? */ 1514 if (!pe) 1515 return -ENODEV; 1516 1517 if (!eeh_ops || !eeh_ops->get_state) 1518 return -ENOENT; 1519 1520 result = eeh_ops->get_state(pe, NULL); 1521 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1522 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1523 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1524 1525 if (rst_active) 1526 ret = EEH_PE_STATE_RESET; 1527 else if (dma_en && mmio_en) 1528 ret = EEH_PE_STATE_NORMAL; 1529 else if (!dma_en && !mmio_en) 1530 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1531 else if (!dma_en && mmio_en) 1532 ret = EEH_PE_STATE_STOPPED_DMA; 1533 else 1534 ret = EEH_PE_STATE_UNAVAIL; 1535 1536 return ret; 1537 } 1538 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1539 1540 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1541 { 1542 struct eeh_dev *edev, *tmp; 1543 struct pci_dev *pdev; 1544 int ret = 0; 1545 1546 /* Restore config space */ 1547 eeh_pe_restore_bars(pe); 1548 1549 /* 1550 * Reenable PCI devices as the devices passed 1551 * through are always enabled before the reset. 1552 */ 1553 eeh_pe_for_each_dev(pe, edev, tmp) { 1554 pdev = eeh_dev_to_pci_dev(edev); 1555 if (!pdev) 1556 continue; 1557 1558 ret = pci_reenable_device(pdev); 1559 if (ret) { 1560 pr_warn("%s: Failure %d reenabling %s\n", 1561 __func__, ret, pci_name(pdev)); 1562 return ret; 1563 } 1564 } 1565 1566 /* The PE is still in frozen state */ 1567 return eeh_unfreeze_pe(pe, true); 1568 } 1569 1570 /** 1571 * eeh_pe_reset - Issue PE reset according to specified type 1572 * @pe: EEH PE 1573 * @option: reset type 1574 * 1575 * The routine is called to reset the specified PE with the 1576 * indicated type, either fundamental reset or hot reset. 1577 * PE reset is the most important part for error recovery. 1578 */ 1579 int eeh_pe_reset(struct eeh_pe *pe, int option) 1580 { 1581 int ret = 0; 1582 1583 /* Invalid PE ? */ 1584 if (!pe) 1585 return -ENODEV; 1586 1587 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1588 return -ENOENT; 1589 1590 switch (option) { 1591 case EEH_RESET_DEACTIVATE: 1592 ret = eeh_ops->reset(pe, option); 1593 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1594 if (ret) 1595 break; 1596 1597 ret = eeh_pe_reenable_devices(pe); 1598 break; 1599 case EEH_RESET_HOT: 1600 case EEH_RESET_FUNDAMENTAL: 1601 /* 1602 * Proactively freeze the PE to drop all MMIO access 1603 * during reset, which should be banned as it's always 1604 * cause recursive EEH error. 1605 */ 1606 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1607 1608 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1609 ret = eeh_ops->reset(pe, option); 1610 break; 1611 default: 1612 pr_debug("%s: Unsupported option %d\n", 1613 __func__, option); 1614 ret = -EINVAL; 1615 } 1616 1617 return ret; 1618 } 1619 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1620 1621 /** 1622 * eeh_pe_configure - Configure PCI bridges after PE reset 1623 * @pe: EEH PE 1624 * 1625 * The routine is called to restore the PCI config space for 1626 * those PCI devices, especially PCI bridges affected by PE 1627 * reset issued previously. 1628 */ 1629 int eeh_pe_configure(struct eeh_pe *pe) 1630 { 1631 int ret = 0; 1632 1633 /* Invalid PE ? */ 1634 if (!pe) 1635 return -ENODEV; 1636 1637 return ret; 1638 } 1639 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1640 1641 static int proc_eeh_show(struct seq_file *m, void *v) 1642 { 1643 if (!eeh_enabled()) { 1644 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1645 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1646 } else { 1647 seq_printf(m, "EEH Subsystem is enabled\n"); 1648 seq_printf(m, 1649 "no device=%llu\n" 1650 "no device node=%llu\n" 1651 "no config address=%llu\n" 1652 "check not wanted=%llu\n" 1653 "eeh_total_mmio_ffs=%llu\n" 1654 "eeh_false_positives=%llu\n" 1655 "eeh_slot_resets=%llu\n", 1656 eeh_stats.no_device, 1657 eeh_stats.no_dn, 1658 eeh_stats.no_cfg_addr, 1659 eeh_stats.ignored_check, 1660 eeh_stats.total_mmio_ffs, 1661 eeh_stats.false_positives, 1662 eeh_stats.slot_resets); 1663 } 1664 1665 return 0; 1666 } 1667 1668 static int proc_eeh_open(struct inode *inode, struct file *file) 1669 { 1670 return single_open(file, proc_eeh_show, NULL); 1671 } 1672 1673 static const struct file_operations proc_eeh_operations = { 1674 .open = proc_eeh_open, 1675 .read = seq_read, 1676 .llseek = seq_lseek, 1677 .release = single_release, 1678 }; 1679 1680 #ifdef CONFIG_DEBUG_FS 1681 static int eeh_enable_dbgfs_set(void *data, u64 val) 1682 { 1683 if (val) 1684 eeh_clear_flag(EEH_FORCE_DISABLED); 1685 else 1686 eeh_add_flag(EEH_FORCE_DISABLED); 1687 1688 /* Notify the backend */ 1689 if (eeh_ops->post_init) 1690 eeh_ops->post_init(); 1691 1692 return 0; 1693 } 1694 1695 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1696 { 1697 if (eeh_enabled()) 1698 *val = 0x1ul; 1699 else 1700 *val = 0x0ul; 1701 return 0; 1702 } 1703 1704 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1705 { 1706 eeh_max_freezes = val; 1707 return 0; 1708 } 1709 1710 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1711 { 1712 *val = eeh_max_freezes; 1713 return 0; 1714 } 1715 1716 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1717 eeh_enable_dbgfs_set, "0x%llx\n"); 1718 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1719 eeh_freeze_dbgfs_set, "0x%llx\n"); 1720 #endif 1721 1722 static int __init eeh_init_proc(void) 1723 { 1724 if (machine_is(pseries) || machine_is(powernv)) { 1725 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1726 #ifdef CONFIG_DEBUG_FS 1727 debugfs_create_file("eeh_enable", 0600, 1728 powerpc_debugfs_root, NULL, 1729 &eeh_enable_dbgfs_ops); 1730 debugfs_create_file("eeh_max_freezes", 0600, 1731 powerpc_debugfs_root, NULL, 1732 &eeh_freeze_dbgfs_ops); 1733 #endif 1734 } 1735 1736 return 0; 1737 } 1738 __initcall(eeh_init_proc); 1739