1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 148 149 static int __init eeh_setup(char *str) 150 { 151 if (!strcmp(str, "off")) 152 eeh_add_flag(EEH_FORCE_DISABLED); 153 else if (!strcmp(str, "early_log")) 154 eeh_add_flag(EEH_EARLY_DUMP_LOG); 155 156 return 1; 157 } 158 __setup("eeh=", eeh_setup); 159 160 /* 161 * This routine captures assorted PCI configuration space data 162 * for the indicated PCI device, and puts them into a buffer 163 * for RTAS error logging. 164 */ 165 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 166 { 167 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 168 u32 cfg; 169 int cap, i; 170 int n = 0, l = 0; 171 char buffer[128]; 172 173 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 174 edev->phb->global_number, pdn->busno, 175 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 176 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 177 edev->phb->global_number, pdn->busno, 178 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 179 180 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 181 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 182 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 183 184 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 185 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 186 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 187 188 /* Gather bridge-specific registers */ 189 if (edev->mode & EEH_DEV_BRIDGE) { 190 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 191 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 192 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 193 194 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 195 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 196 pr_warn("EEH: Bridge control: %04x\n", cfg); 197 } 198 199 /* Dump out the PCI-X command and status regs */ 200 cap = edev->pcix_cap; 201 if (cap) { 202 eeh_ops->read_config(pdn, cap, 4, &cfg); 203 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 204 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 205 206 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 207 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 208 pr_warn("EEH: PCI-X status: %08x\n", cfg); 209 } 210 211 /* If PCI-E capable, dump PCI-E cap 10 */ 212 cap = edev->pcie_cap; 213 if (cap) { 214 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 215 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 216 217 for (i=0; i<=8; i++) { 218 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 219 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 220 221 if ((i % 4) == 0) { 222 if (i != 0) 223 pr_warn("%s\n", buffer); 224 225 l = scnprintf(buffer, sizeof(buffer), 226 "EEH: PCI-E %02x: %08x ", 227 4*i, cfg); 228 } else { 229 l += scnprintf(buffer+l, sizeof(buffer)-l, 230 "%08x ", cfg); 231 } 232 233 } 234 235 pr_warn("%s\n", buffer); 236 } 237 238 /* If AER capable, dump it */ 239 cap = edev->aer_cap; 240 if (cap) { 241 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 242 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 243 244 for (i=0; i<=13; i++) { 245 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 246 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 247 248 if ((i % 4) == 0) { 249 if (i != 0) 250 pr_warn("%s\n", buffer); 251 252 l = scnprintf(buffer, sizeof(buffer), 253 "EEH: PCI-E AER %02x: %08x ", 254 4*i, cfg); 255 } else { 256 l += scnprintf(buffer+l, sizeof(buffer)-l, 257 "%08x ", cfg); 258 } 259 } 260 261 pr_warn("%s\n", buffer); 262 } 263 264 return n; 265 } 266 267 static void *eeh_dump_pe_log(void *data, void *flag) 268 { 269 struct eeh_pe *pe = data; 270 struct eeh_dev *edev, *tmp; 271 size_t *plen = flag; 272 273 /* If the PE's config space is blocked, 0xFF's will be 274 * returned. It's pointless to collect the log in this 275 * case. 276 */ 277 if (pe->state & EEH_PE_CFG_BLOCKED) 278 return NULL; 279 280 eeh_pe_for_each_dev(pe, edev, tmp) 281 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 282 EEH_PCI_REGS_LOG_LEN - *plen); 283 284 return NULL; 285 } 286 287 /** 288 * eeh_slot_error_detail - Generate combined log including driver log and error log 289 * @pe: EEH PE 290 * @severity: temporary or permanent error log 291 * 292 * This routine should be called to generate the combined log, which 293 * is comprised of driver log and error log. The driver log is figured 294 * out from the config space of the corresponding PCI device, while 295 * the error log is fetched through platform dependent function call. 296 */ 297 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 298 { 299 size_t loglen = 0; 300 301 /* 302 * When the PHB is fenced or dead, it's pointless to collect 303 * the data from PCI config space because it should return 304 * 0xFF's. For ER, we still retrieve the data from the PCI 305 * config space. 306 * 307 * For pHyp, we have to enable IO for log retrieval. Otherwise, 308 * 0xFF's is always returned from PCI config space. 309 */ 310 if (!(pe->type & EEH_PE_PHB)) { 311 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 312 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 313 eeh_ops->configure_bridge(pe); 314 eeh_pe_restore_bars(pe); 315 316 pci_regs_buf[0] = 0; 317 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 318 } 319 320 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 321 } 322 323 /** 324 * eeh_token_to_phys - Convert EEH address token to phys address 325 * @token: I/O token, should be address in the form 0xA.... 326 * 327 * This routine should be called to convert virtual I/O address 328 * to physical one. 329 */ 330 static inline unsigned long eeh_token_to_phys(unsigned long token) 331 { 332 pte_t *ptep; 333 unsigned long pa; 334 int hugepage_shift; 335 336 /* 337 * We won't find hugepages here(this is iomem). Hence we are not 338 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 339 * page table free, because of init_mm. 340 */ 341 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 342 if (!ptep) 343 return token; 344 WARN_ON(hugepage_shift); 345 pa = pte_pfn(*ptep) << PAGE_SHIFT; 346 347 return pa | (token & (PAGE_SIZE-1)); 348 } 349 350 /* 351 * On PowerNV platform, we might already have fenced PHB there. 352 * For that case, it's meaningless to recover frozen PE. Intead, 353 * We have to handle fenced PHB firstly. 354 */ 355 static int eeh_phb_check_failure(struct eeh_pe *pe) 356 { 357 struct eeh_pe *phb_pe; 358 unsigned long flags; 359 int ret; 360 361 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 362 return -EPERM; 363 364 /* Find the PHB PE */ 365 phb_pe = eeh_phb_pe_get(pe->phb); 366 if (!phb_pe) { 367 pr_warn("%s Can't find PE for PHB#%d\n", 368 __func__, pe->phb->global_number); 369 return -EEXIST; 370 } 371 372 /* If the PHB has been in problematic state */ 373 eeh_serialize_lock(&flags); 374 if (phb_pe->state & EEH_PE_ISOLATED) { 375 ret = 0; 376 goto out; 377 } 378 379 /* Check PHB state */ 380 ret = eeh_ops->get_state(phb_pe, NULL); 381 if ((ret < 0) || 382 (ret == EEH_STATE_NOT_SUPPORT) || 383 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 384 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 385 ret = 0; 386 goto out; 387 } 388 389 /* Isolate the PHB and send event */ 390 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 391 eeh_serialize_unlock(flags); 392 393 pr_err("EEH: PHB#%x failure detected, location: %s\n", 394 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 395 dump_stack(); 396 eeh_send_failure_event(phb_pe); 397 398 return 1; 399 out: 400 eeh_serialize_unlock(flags); 401 return ret; 402 } 403 404 /** 405 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 406 * @edev: eeh device 407 * 408 * Check for an EEH failure for the given device node. Call this 409 * routine if the result of a read was all 0xff's and you want to 410 * find out if this is due to an EEH slot freeze. This routine 411 * will query firmware for the EEH status. 412 * 413 * Returns 0 if there has not been an EEH error; otherwise returns 414 * a non-zero value and queues up a slot isolation event notification. 415 * 416 * It is safe to call this routine in an interrupt context. 417 */ 418 int eeh_dev_check_failure(struct eeh_dev *edev) 419 { 420 int ret; 421 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 422 unsigned long flags; 423 struct pci_dn *pdn; 424 struct pci_dev *dev; 425 struct eeh_pe *pe, *parent_pe, *phb_pe; 426 int rc = 0; 427 const char *location = NULL; 428 429 eeh_stats.total_mmio_ffs++; 430 431 if (!eeh_enabled()) 432 return 0; 433 434 if (!edev) { 435 eeh_stats.no_dn++; 436 return 0; 437 } 438 dev = eeh_dev_to_pci_dev(edev); 439 pe = eeh_dev_to_pe(edev); 440 441 /* Access to IO BARs might get this far and still not want checking. */ 442 if (!pe) { 443 eeh_stats.ignored_check++; 444 pr_debug("EEH: Ignored check for %s\n", 445 eeh_pci_name(dev)); 446 return 0; 447 } 448 449 if (!pe->addr && !pe->config_addr) { 450 eeh_stats.no_cfg_addr++; 451 return 0; 452 } 453 454 /* 455 * On PowerNV platform, we might already have fenced PHB 456 * there and we need take care of that firstly. 457 */ 458 ret = eeh_phb_check_failure(pe); 459 if (ret > 0) 460 return ret; 461 462 /* 463 * If the PE isn't owned by us, we shouldn't check the 464 * state. Instead, let the owner handle it if the PE has 465 * been frozen. 466 */ 467 if (eeh_pe_passed(pe)) 468 return 0; 469 470 /* If we already have a pending isolation event for this 471 * slot, we know it's bad already, we don't need to check. 472 * Do this checking under a lock; as multiple PCI devices 473 * in one slot might report errors simultaneously, and we 474 * only want one error recovery routine running. 475 */ 476 eeh_serialize_lock(&flags); 477 rc = 1; 478 if (pe->state & EEH_PE_ISOLATED) { 479 pe->check_count++; 480 if (pe->check_count % EEH_MAX_FAILS == 0) { 481 pdn = eeh_dev_to_pdn(edev); 482 if (pdn->node) 483 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 484 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 485 "location=%s driver=%s pci addr=%s\n", 486 pe->check_count, 487 location ? location : "unknown", 488 eeh_driver_name(dev), eeh_pci_name(dev)); 489 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 490 eeh_driver_name(dev)); 491 dump_stack(); 492 } 493 goto dn_unlock; 494 } 495 496 /* 497 * Now test for an EEH failure. This is VERY expensive. 498 * Note that the eeh_config_addr may be a parent device 499 * in the case of a device behind a bridge, or it may be 500 * function zero of a multi-function device. 501 * In any case they must share a common PHB. 502 */ 503 ret = eeh_ops->get_state(pe, NULL); 504 505 /* Note that config-io to empty slots may fail; 506 * they are empty when they don't have children. 507 * We will punt with the following conditions: Failure to get 508 * PE's state, EEH not support and Permanently unavailable 509 * state, PE is in good state. 510 */ 511 if ((ret < 0) || 512 (ret == EEH_STATE_NOT_SUPPORT) || 513 ((ret & active_flags) == active_flags)) { 514 eeh_stats.false_positives++; 515 pe->false_positives++; 516 rc = 0; 517 goto dn_unlock; 518 } 519 520 /* 521 * It should be corner case that the parent PE has been 522 * put into frozen state as well. We should take care 523 * that at first. 524 */ 525 parent_pe = pe->parent; 526 while (parent_pe) { 527 /* Hit the ceiling ? */ 528 if (parent_pe->type & EEH_PE_PHB) 529 break; 530 531 /* Frozen parent PE ? */ 532 ret = eeh_ops->get_state(parent_pe, NULL); 533 if (ret > 0 && 534 (ret & active_flags) != active_flags) 535 pe = parent_pe; 536 537 /* Next parent level */ 538 parent_pe = parent_pe->parent; 539 } 540 541 eeh_stats.slot_resets++; 542 543 /* Avoid repeated reports of this failure, including problems 544 * with other functions on this device, and functions under 545 * bridges. 546 */ 547 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 548 eeh_serialize_unlock(flags); 549 550 /* Most EEH events are due to device driver bugs. Having 551 * a stack trace will help the device-driver authors figure 552 * out what happened. So print that out. 553 */ 554 phb_pe = eeh_phb_pe_get(pe->phb); 555 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 556 pe->phb->global_number, pe->addr); 557 pr_err("EEH: PE location: %s, PHB location: %s\n", 558 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 559 dump_stack(); 560 561 eeh_send_failure_event(pe); 562 563 return 1; 564 565 dn_unlock: 566 eeh_serialize_unlock(flags); 567 return rc; 568 } 569 570 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 571 572 /** 573 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 574 * @token: I/O address 575 * 576 * Check for an EEH failure at the given I/O address. Call this 577 * routine if the result of a read was all 0xff's and you want to 578 * find out if this is due to an EEH slot freeze event. This routine 579 * will query firmware for the EEH status. 580 * 581 * Note this routine is safe to call in an interrupt context. 582 */ 583 int eeh_check_failure(const volatile void __iomem *token) 584 { 585 unsigned long addr; 586 struct eeh_dev *edev; 587 588 /* Finding the phys addr + pci device; this is pretty quick. */ 589 addr = eeh_token_to_phys((unsigned long __force) token); 590 edev = eeh_addr_cache_get_dev(addr); 591 if (!edev) { 592 eeh_stats.no_device++; 593 return 0; 594 } 595 596 return eeh_dev_check_failure(edev); 597 } 598 EXPORT_SYMBOL(eeh_check_failure); 599 600 601 /** 602 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 603 * @pe: EEH PE 604 * 605 * This routine should be called to reenable frozen MMIO or DMA 606 * so that it would work correctly again. It's useful while doing 607 * recovery or log collection on the indicated device. 608 */ 609 int eeh_pci_enable(struct eeh_pe *pe, int function) 610 { 611 int active_flag, rc; 612 613 /* 614 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 615 * Also, it's pointless to enable them on unfrozen PE. So 616 * we have to check before enabling IO or DMA. 617 */ 618 switch (function) { 619 case EEH_OPT_THAW_MMIO: 620 active_flag = EEH_STATE_MMIO_ACTIVE; 621 break; 622 case EEH_OPT_THAW_DMA: 623 active_flag = EEH_STATE_DMA_ACTIVE; 624 break; 625 case EEH_OPT_DISABLE: 626 case EEH_OPT_ENABLE: 627 case EEH_OPT_FREEZE_PE: 628 active_flag = 0; 629 break; 630 default: 631 pr_warn("%s: Invalid function %d\n", 632 __func__, function); 633 return -EINVAL; 634 } 635 636 /* 637 * Check if IO or DMA has been enabled before 638 * enabling them. 639 */ 640 if (active_flag) { 641 rc = eeh_ops->get_state(pe, NULL); 642 if (rc < 0) 643 return rc; 644 645 /* Needn't enable it at all */ 646 if (rc == EEH_STATE_NOT_SUPPORT) 647 return 0; 648 649 /* It's already enabled */ 650 if (rc & active_flag) 651 return 0; 652 } 653 654 655 /* Issue the request */ 656 rc = eeh_ops->set_option(pe, function); 657 if (rc) 658 pr_warn("%s: Unexpected state change %d on " 659 "PHB#%d-PE#%x, err=%d\n", 660 __func__, function, pe->phb->global_number, 661 pe->addr, rc); 662 663 /* Check if the request is finished successfully */ 664 if (active_flag) { 665 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 666 if (rc <= 0) 667 return rc; 668 669 if (rc & active_flag) 670 return 0; 671 672 return -EIO; 673 } 674 675 return rc; 676 } 677 678 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 679 { 680 struct eeh_dev *edev = data; 681 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 682 struct pci_dev *dev = userdata; 683 684 /* 685 * The caller should have disabled and saved the 686 * state for the specified device 687 */ 688 if (!pdev || pdev == dev) 689 return NULL; 690 691 /* Ensure we have D0 power state */ 692 pci_set_power_state(pdev, PCI_D0); 693 694 /* Save device state */ 695 pci_save_state(pdev); 696 697 /* 698 * Disable device to avoid any DMA traffic and 699 * interrupt from the device 700 */ 701 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 702 703 return NULL; 704 } 705 706 static void *eeh_restore_dev_state(void *data, void *userdata) 707 { 708 struct eeh_dev *edev = data; 709 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 710 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 711 struct pci_dev *dev = userdata; 712 713 if (!pdev) 714 return NULL; 715 716 /* Apply customization from firmware */ 717 if (pdn && eeh_ops->restore_config) 718 eeh_ops->restore_config(pdn); 719 720 /* The caller should restore state for the specified device */ 721 if (pdev != dev) 722 pci_save_state(pdev); 723 724 return NULL; 725 } 726 727 /** 728 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 729 * @dev: pci device struct 730 * @state: reset state to enter 731 * 732 * Return value: 733 * 0 if success 734 */ 735 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 736 { 737 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 738 struct eeh_pe *pe = eeh_dev_to_pe(edev); 739 740 if (!pe) { 741 pr_err("%s: No PE found on PCI device %s\n", 742 __func__, pci_name(dev)); 743 return -EINVAL; 744 } 745 746 switch (state) { 747 case pcie_deassert_reset: 748 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 749 eeh_unfreeze_pe(pe, false); 750 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 751 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 752 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 753 break; 754 case pcie_hot_reset: 755 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 756 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 757 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 758 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 759 eeh_ops->reset(pe, EEH_RESET_HOT); 760 break; 761 case pcie_warm_reset: 762 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 763 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 764 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 765 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 766 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 767 break; 768 default: 769 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 770 return -EINVAL; 771 }; 772 773 return 0; 774 } 775 776 /** 777 * eeh_set_pe_freset - Check the required reset for the indicated device 778 * @data: EEH device 779 * @flag: return value 780 * 781 * Each device might have its preferred reset type: fundamental or 782 * hot reset. The routine is used to collected the information for 783 * the indicated device and its children so that the bunch of the 784 * devices could be reset properly. 785 */ 786 static void *eeh_set_dev_freset(void *data, void *flag) 787 { 788 struct pci_dev *dev; 789 unsigned int *freset = (unsigned int *)flag; 790 struct eeh_dev *edev = (struct eeh_dev *)data; 791 792 dev = eeh_dev_to_pci_dev(edev); 793 if (dev) 794 *freset |= dev->needs_freset; 795 796 return NULL; 797 } 798 799 /** 800 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 801 * @pe: EEH PE 802 * 803 * Assert the PCI #RST line for 1/4 second. 804 */ 805 static void eeh_reset_pe_once(struct eeh_pe *pe) 806 { 807 unsigned int freset = 0; 808 809 /* Determine type of EEH reset required for 810 * Partitionable Endpoint, a hot-reset (1) 811 * or a fundamental reset (3). 812 * A fundamental reset required by any device under 813 * Partitionable Endpoint trumps hot-reset. 814 */ 815 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 816 817 if (freset) 818 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 819 else 820 eeh_ops->reset(pe, EEH_RESET_HOT); 821 822 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 823 } 824 825 /** 826 * eeh_reset_pe - Reset the indicated PE 827 * @pe: EEH PE 828 * 829 * This routine should be called to reset indicated device, including 830 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 831 * might be involved as well. 832 */ 833 int eeh_reset_pe(struct eeh_pe *pe) 834 { 835 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 836 int i, state, ret; 837 838 /* Mark as reset and block config space */ 839 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 840 841 /* Take three shots at resetting the bus */ 842 for (i = 0; i < 3; i++) { 843 eeh_reset_pe_once(pe); 844 845 /* 846 * EEH_PE_ISOLATED is expected to be removed after 847 * BAR restore. 848 */ 849 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 850 if ((state & flags) == flags) { 851 ret = 0; 852 goto out; 853 } 854 855 if (state < 0) { 856 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 857 __func__, pe->phb->global_number, pe->addr); 858 ret = -ENOTRECOVERABLE; 859 goto out; 860 } 861 862 /* We might run out of credits */ 863 ret = -EIO; 864 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 865 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 866 } 867 868 out: 869 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 870 return ret; 871 } 872 873 /** 874 * eeh_save_bars - Save device bars 875 * @edev: PCI device associated EEH device 876 * 877 * Save the values of the device bars. Unlike the restore 878 * routine, this routine is *not* recursive. This is because 879 * PCI devices are added individually; but, for the restore, 880 * an entire slot is reset at a time. 881 */ 882 void eeh_save_bars(struct eeh_dev *edev) 883 { 884 struct pci_dn *pdn; 885 int i; 886 887 pdn = eeh_dev_to_pdn(edev); 888 if (!pdn) 889 return; 890 891 for (i = 0; i < 16; i++) 892 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 893 894 /* 895 * For PCI bridges including root port, we need enable bus 896 * master explicitly. Otherwise, it can't fetch IODA table 897 * entries correctly. So we cache the bit in advance so that 898 * we can restore it after reset, either PHB range or PE range. 899 */ 900 if (edev->mode & EEH_DEV_BRIDGE) 901 edev->config_space[1] |= PCI_COMMAND_MASTER; 902 } 903 904 /** 905 * eeh_ops_register - Register platform dependent EEH operations 906 * @ops: platform dependent EEH operations 907 * 908 * Register the platform dependent EEH operation callback 909 * functions. The platform should call this function before 910 * any other EEH operations. 911 */ 912 int __init eeh_ops_register(struct eeh_ops *ops) 913 { 914 if (!ops->name) { 915 pr_warn("%s: Invalid EEH ops name for %p\n", 916 __func__, ops); 917 return -EINVAL; 918 } 919 920 if (eeh_ops && eeh_ops != ops) { 921 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 922 __func__, eeh_ops->name, ops->name); 923 return -EEXIST; 924 } 925 926 eeh_ops = ops; 927 928 return 0; 929 } 930 931 /** 932 * eeh_ops_unregister - Unreigster platform dependent EEH operations 933 * @name: name of EEH platform operations 934 * 935 * Unregister the platform dependent EEH operation callback 936 * functions. 937 */ 938 int __exit eeh_ops_unregister(const char *name) 939 { 940 if (!name || !strlen(name)) { 941 pr_warn("%s: Invalid EEH ops name\n", 942 __func__); 943 return -EINVAL; 944 } 945 946 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 947 eeh_ops = NULL; 948 return 0; 949 } 950 951 return -EEXIST; 952 } 953 954 static int eeh_reboot_notifier(struct notifier_block *nb, 955 unsigned long action, void *unused) 956 { 957 eeh_clear_flag(EEH_ENABLED); 958 return NOTIFY_DONE; 959 } 960 961 static struct notifier_block eeh_reboot_nb = { 962 .notifier_call = eeh_reboot_notifier, 963 }; 964 965 /** 966 * eeh_init - EEH initialization 967 * 968 * Initialize EEH by trying to enable it for all of the adapters in the system. 969 * As a side effect we can determine here if eeh is supported at all. 970 * Note that we leave EEH on so failed config cycles won't cause a machine 971 * check. If a user turns off EEH for a particular adapter they are really 972 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 973 * grant access to a slot if EEH isn't enabled, and so we always enable 974 * EEH for all slots/all devices. 975 * 976 * The eeh-force-off option disables EEH checking globally, for all slots. 977 * Even if force-off is set, the EEH hardware is still enabled, so that 978 * newer systems can boot. 979 */ 980 int eeh_init(void) 981 { 982 struct pci_controller *hose, *tmp; 983 struct pci_dn *pdn; 984 static int cnt = 0; 985 int ret = 0; 986 987 /* 988 * We have to delay the initialization on PowerNV after 989 * the PCI hierarchy tree has been built because the PEs 990 * are figured out based on PCI devices instead of device 991 * tree nodes 992 */ 993 if (machine_is(powernv) && cnt++ <= 0) 994 return ret; 995 996 /* Register reboot notifier */ 997 ret = register_reboot_notifier(&eeh_reboot_nb); 998 if (ret) { 999 pr_warn("%s: Failed to register notifier (%d)\n", 1000 __func__, ret); 1001 return ret; 1002 } 1003 1004 /* call platform initialization function */ 1005 if (!eeh_ops) { 1006 pr_warn("%s: Platform EEH operation not found\n", 1007 __func__); 1008 return -EEXIST; 1009 } else if ((ret = eeh_ops->init())) 1010 return ret; 1011 1012 /* Initialize EEH event */ 1013 ret = eeh_event_init(); 1014 if (ret) 1015 return ret; 1016 1017 /* Enable EEH for all adapters */ 1018 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1019 pdn = hose->pci_data; 1020 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1021 } 1022 1023 /* 1024 * Call platform post-initialization. Actually, It's good chance 1025 * to inform platform that EEH is ready to supply service if the 1026 * I/O cache stuff has been built up. 1027 */ 1028 if (eeh_ops->post_init) { 1029 ret = eeh_ops->post_init(); 1030 if (ret) 1031 return ret; 1032 } 1033 1034 if (eeh_enabled()) 1035 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1036 else 1037 pr_warn("EEH: No capable adapters found\n"); 1038 1039 return ret; 1040 } 1041 1042 core_initcall_sync(eeh_init); 1043 1044 /** 1045 * eeh_add_device_early - Enable EEH for the indicated device node 1046 * @pdn: PCI device node for which to set up EEH 1047 * 1048 * This routine must be used to perform EEH initialization for PCI 1049 * devices that were added after system boot (e.g. hotplug, dlpar). 1050 * This routine must be called before any i/o is performed to the 1051 * adapter (inluding any config-space i/o). 1052 * Whether this actually enables EEH or not for this device depends 1053 * on the CEC architecture, type of the device, on earlier boot 1054 * command-line arguments & etc. 1055 */ 1056 void eeh_add_device_early(struct pci_dn *pdn) 1057 { 1058 struct pci_controller *phb; 1059 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1060 1061 if (!edev || !eeh_enabled()) 1062 return; 1063 1064 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1065 return; 1066 1067 /* USB Bus children of PCI devices will not have BUID's */ 1068 phb = edev->phb; 1069 if (NULL == phb || 1070 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1071 return; 1072 1073 eeh_ops->probe(pdn, NULL); 1074 } 1075 1076 /** 1077 * eeh_add_device_tree_early - Enable EEH for the indicated device 1078 * @pdn: PCI device node 1079 * 1080 * This routine must be used to perform EEH initialization for the 1081 * indicated PCI device that was added after system boot (e.g. 1082 * hotplug, dlpar). 1083 */ 1084 void eeh_add_device_tree_early(struct pci_dn *pdn) 1085 { 1086 struct pci_dn *n; 1087 1088 if (!pdn) 1089 return; 1090 1091 list_for_each_entry(n, &pdn->child_list, list) 1092 eeh_add_device_tree_early(n); 1093 eeh_add_device_early(pdn); 1094 } 1095 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1096 1097 /** 1098 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1099 * @dev: pci device for which to set up EEH 1100 * 1101 * This routine must be used to complete EEH initialization for PCI 1102 * devices that were added after system boot (e.g. hotplug, dlpar). 1103 */ 1104 void eeh_add_device_late(struct pci_dev *dev) 1105 { 1106 struct pci_dn *pdn; 1107 struct eeh_dev *edev; 1108 1109 if (!dev || !eeh_enabled()) 1110 return; 1111 1112 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1113 1114 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1115 edev = pdn_to_eeh_dev(pdn); 1116 if (edev->pdev == dev) { 1117 pr_debug("EEH: Already referenced !\n"); 1118 return; 1119 } 1120 1121 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1122 eeh_ops->probe(pdn, NULL); 1123 1124 /* 1125 * The EEH cache might not be removed correctly because of 1126 * unbalanced kref to the device during unplug time, which 1127 * relies on pcibios_release_device(). So we have to remove 1128 * that here explicitly. 1129 */ 1130 if (edev->pdev) { 1131 eeh_rmv_from_parent_pe(edev); 1132 eeh_addr_cache_rmv_dev(edev->pdev); 1133 eeh_sysfs_remove_device(edev->pdev); 1134 edev->mode &= ~EEH_DEV_SYSFS; 1135 1136 /* 1137 * We definitely should have the PCI device removed 1138 * though it wasn't correctly. So we needn't call 1139 * into error handler afterwards. 1140 */ 1141 edev->mode |= EEH_DEV_NO_HANDLER; 1142 1143 edev->pdev = NULL; 1144 dev->dev.archdata.edev = NULL; 1145 } 1146 1147 edev->pdev = dev; 1148 dev->dev.archdata.edev = edev; 1149 1150 eeh_addr_cache_insert_dev(dev); 1151 } 1152 1153 /** 1154 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1155 * @bus: PCI bus 1156 * 1157 * This routine must be used to perform EEH initialization for PCI 1158 * devices which are attached to the indicated PCI bus. The PCI bus 1159 * is added after system boot through hotplug or dlpar. 1160 */ 1161 void eeh_add_device_tree_late(struct pci_bus *bus) 1162 { 1163 struct pci_dev *dev; 1164 1165 list_for_each_entry(dev, &bus->devices, bus_list) { 1166 eeh_add_device_late(dev); 1167 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1168 struct pci_bus *subbus = dev->subordinate; 1169 if (subbus) 1170 eeh_add_device_tree_late(subbus); 1171 } 1172 } 1173 } 1174 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1175 1176 /** 1177 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1178 * @bus: PCI bus 1179 * 1180 * This routine must be used to add EEH sysfs files for PCI 1181 * devices which are attached to the indicated PCI bus. The PCI bus 1182 * is added after system boot through hotplug or dlpar. 1183 */ 1184 void eeh_add_sysfs_files(struct pci_bus *bus) 1185 { 1186 struct pci_dev *dev; 1187 1188 list_for_each_entry(dev, &bus->devices, bus_list) { 1189 eeh_sysfs_add_device(dev); 1190 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1191 struct pci_bus *subbus = dev->subordinate; 1192 if (subbus) 1193 eeh_add_sysfs_files(subbus); 1194 } 1195 } 1196 } 1197 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1198 1199 /** 1200 * eeh_remove_device - Undo EEH setup for the indicated pci device 1201 * @dev: pci device to be removed 1202 * 1203 * This routine should be called when a device is removed from 1204 * a running system (e.g. by hotplug or dlpar). It unregisters 1205 * the PCI device from the EEH subsystem. I/O errors affecting 1206 * this device will no longer be detected after this call; thus, 1207 * i/o errors affecting this slot may leave this device unusable. 1208 */ 1209 void eeh_remove_device(struct pci_dev *dev) 1210 { 1211 struct eeh_dev *edev; 1212 1213 if (!dev || !eeh_enabled()) 1214 return; 1215 edev = pci_dev_to_eeh_dev(dev); 1216 1217 /* Unregister the device with the EEH/PCI address search system */ 1218 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1219 1220 if (!edev || !edev->pdev || !edev->pe) { 1221 pr_debug("EEH: Not referenced !\n"); 1222 return; 1223 } 1224 1225 /* 1226 * During the hotplug for EEH error recovery, we need the EEH 1227 * device attached to the parent PE in order for BAR restore 1228 * a bit later. So we keep it for BAR restore and remove it 1229 * from the parent PE during the BAR resotre. 1230 */ 1231 edev->pdev = NULL; 1232 dev->dev.archdata.edev = NULL; 1233 if (!(edev->pe->state & EEH_PE_KEEP)) 1234 eeh_rmv_from_parent_pe(edev); 1235 else 1236 edev->mode |= EEH_DEV_DISCONNECTED; 1237 1238 /* 1239 * We're removing from the PCI subsystem, that means 1240 * the PCI device driver can't support EEH or not 1241 * well. So we rely on hotplug completely to do recovery 1242 * for the specific PCI device. 1243 */ 1244 edev->mode |= EEH_DEV_NO_HANDLER; 1245 1246 eeh_addr_cache_rmv_dev(dev); 1247 eeh_sysfs_remove_device(dev); 1248 edev->mode &= ~EEH_DEV_SYSFS; 1249 } 1250 1251 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1252 { 1253 int ret; 1254 1255 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1256 if (ret) { 1257 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1258 __func__, ret, pe->phb->global_number, pe->addr); 1259 return ret; 1260 } 1261 1262 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1263 if (ret) { 1264 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1265 __func__, ret, pe->phb->global_number, pe->addr); 1266 return ret; 1267 } 1268 1269 /* Clear software isolated state */ 1270 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1271 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1272 1273 return ret; 1274 } 1275 1276 1277 static struct pci_device_id eeh_reset_ids[] = { 1278 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1279 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1280 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1281 { 0 } 1282 }; 1283 1284 static int eeh_pe_change_owner(struct eeh_pe *pe) 1285 { 1286 struct eeh_dev *edev, *tmp; 1287 struct pci_dev *pdev; 1288 struct pci_device_id *id; 1289 int flags, ret; 1290 1291 /* Check PE state */ 1292 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1293 ret = eeh_ops->get_state(pe, NULL); 1294 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1295 return 0; 1296 1297 /* Unfrozen PE, nothing to do */ 1298 if ((ret & flags) == flags) 1299 return 0; 1300 1301 /* Frozen PE, check if it needs PE level reset */ 1302 eeh_pe_for_each_dev(pe, edev, tmp) { 1303 pdev = eeh_dev_to_pci_dev(edev); 1304 if (!pdev) 1305 continue; 1306 1307 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1308 if (id->vendor != PCI_ANY_ID && 1309 id->vendor != pdev->vendor) 1310 continue; 1311 if (id->device != PCI_ANY_ID && 1312 id->device != pdev->device) 1313 continue; 1314 if (id->subvendor != PCI_ANY_ID && 1315 id->subvendor != pdev->subsystem_vendor) 1316 continue; 1317 if (id->subdevice != PCI_ANY_ID && 1318 id->subdevice != pdev->subsystem_device) 1319 continue; 1320 1321 goto reset; 1322 } 1323 } 1324 1325 return eeh_unfreeze_pe(pe, true); 1326 1327 reset: 1328 return eeh_pe_reset_and_recover(pe); 1329 } 1330 1331 /** 1332 * eeh_dev_open - Increase count of pass through devices for PE 1333 * @pdev: PCI device 1334 * 1335 * Increase count of passed through devices for the indicated 1336 * PE. In the result, the EEH errors detected on the PE won't be 1337 * reported. The PE owner will be responsible for detection 1338 * and recovery. 1339 */ 1340 int eeh_dev_open(struct pci_dev *pdev) 1341 { 1342 struct eeh_dev *edev; 1343 int ret = -ENODEV; 1344 1345 mutex_lock(&eeh_dev_mutex); 1346 1347 /* No PCI device ? */ 1348 if (!pdev) 1349 goto out; 1350 1351 /* No EEH device or PE ? */ 1352 edev = pci_dev_to_eeh_dev(pdev); 1353 if (!edev || !edev->pe) 1354 goto out; 1355 1356 /* 1357 * The PE might have been put into frozen state, but we 1358 * didn't detect that yet. The passed through PCI devices 1359 * in frozen PE won't work properly. Clear the frozen state 1360 * in advance. 1361 */ 1362 ret = eeh_pe_change_owner(edev->pe); 1363 if (ret) 1364 goto out; 1365 1366 /* Increase PE's pass through count */ 1367 atomic_inc(&edev->pe->pass_dev_cnt); 1368 mutex_unlock(&eeh_dev_mutex); 1369 1370 return 0; 1371 out: 1372 mutex_unlock(&eeh_dev_mutex); 1373 return ret; 1374 } 1375 EXPORT_SYMBOL_GPL(eeh_dev_open); 1376 1377 /** 1378 * eeh_dev_release - Decrease count of pass through devices for PE 1379 * @pdev: PCI device 1380 * 1381 * Decrease count of pass through devices for the indicated PE. If 1382 * there is no passed through device in PE, the EEH errors detected 1383 * on the PE will be reported and handled as usual. 1384 */ 1385 void eeh_dev_release(struct pci_dev *pdev) 1386 { 1387 struct eeh_dev *edev; 1388 1389 mutex_lock(&eeh_dev_mutex); 1390 1391 /* No PCI device ? */ 1392 if (!pdev) 1393 goto out; 1394 1395 /* No EEH device ? */ 1396 edev = pci_dev_to_eeh_dev(pdev); 1397 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1398 goto out; 1399 1400 /* Decrease PE's pass through count */ 1401 atomic_dec(&edev->pe->pass_dev_cnt); 1402 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1403 eeh_pe_change_owner(edev->pe); 1404 out: 1405 mutex_unlock(&eeh_dev_mutex); 1406 } 1407 EXPORT_SYMBOL(eeh_dev_release); 1408 1409 #ifdef CONFIG_IOMMU_API 1410 1411 static int dev_has_iommu_table(struct device *dev, void *data) 1412 { 1413 struct pci_dev *pdev = to_pci_dev(dev); 1414 struct pci_dev **ppdev = data; 1415 struct iommu_table *tbl; 1416 1417 if (!dev) 1418 return 0; 1419 1420 tbl = get_iommu_table_base(dev); 1421 if (tbl && tbl->it_group) { 1422 *ppdev = pdev; 1423 return 1; 1424 } 1425 1426 return 0; 1427 } 1428 1429 /** 1430 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1431 * @group: IOMMU group 1432 * 1433 * The routine is called to convert IOMMU group to EEH PE. 1434 */ 1435 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1436 { 1437 struct pci_dev *pdev = NULL; 1438 struct eeh_dev *edev; 1439 int ret; 1440 1441 /* No IOMMU group ? */ 1442 if (!group) 1443 return NULL; 1444 1445 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1446 if (!ret || !pdev) 1447 return NULL; 1448 1449 /* No EEH device or PE ? */ 1450 edev = pci_dev_to_eeh_dev(pdev); 1451 if (!edev || !edev->pe) 1452 return NULL; 1453 1454 return edev->pe; 1455 } 1456 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1457 1458 #endif /* CONFIG_IOMMU_API */ 1459 1460 /** 1461 * eeh_pe_set_option - Set options for the indicated PE 1462 * @pe: EEH PE 1463 * @option: requested option 1464 * 1465 * The routine is called to enable or disable EEH functionality 1466 * on the indicated PE, to enable IO or DMA for the frozen PE. 1467 */ 1468 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1469 { 1470 int ret = 0; 1471 1472 /* Invalid PE ? */ 1473 if (!pe) 1474 return -ENODEV; 1475 1476 /* 1477 * EEH functionality could possibly be disabled, just 1478 * return error for the case. And the EEH functinality 1479 * isn't expected to be disabled on one specific PE. 1480 */ 1481 switch (option) { 1482 case EEH_OPT_ENABLE: 1483 if (eeh_enabled()) { 1484 ret = eeh_pe_change_owner(pe); 1485 break; 1486 } 1487 ret = -EIO; 1488 break; 1489 case EEH_OPT_DISABLE: 1490 break; 1491 case EEH_OPT_THAW_MMIO: 1492 case EEH_OPT_THAW_DMA: 1493 if (!eeh_ops || !eeh_ops->set_option) { 1494 ret = -ENOENT; 1495 break; 1496 } 1497 1498 ret = eeh_pci_enable(pe, option); 1499 break; 1500 default: 1501 pr_debug("%s: Option %d out of range (%d, %d)\n", 1502 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1503 ret = -EINVAL; 1504 } 1505 1506 return ret; 1507 } 1508 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1509 1510 /** 1511 * eeh_pe_get_state - Retrieve PE's state 1512 * @pe: EEH PE 1513 * 1514 * Retrieve the PE's state, which includes 3 aspects: enabled 1515 * DMA, enabled IO and asserted reset. 1516 */ 1517 int eeh_pe_get_state(struct eeh_pe *pe) 1518 { 1519 int result, ret = 0; 1520 bool rst_active, dma_en, mmio_en; 1521 1522 /* Existing PE ? */ 1523 if (!pe) 1524 return -ENODEV; 1525 1526 if (!eeh_ops || !eeh_ops->get_state) 1527 return -ENOENT; 1528 1529 result = eeh_ops->get_state(pe, NULL); 1530 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1531 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1532 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1533 1534 if (rst_active) 1535 ret = EEH_PE_STATE_RESET; 1536 else if (dma_en && mmio_en) 1537 ret = EEH_PE_STATE_NORMAL; 1538 else if (!dma_en && !mmio_en) 1539 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1540 else if (!dma_en && mmio_en) 1541 ret = EEH_PE_STATE_STOPPED_DMA; 1542 else 1543 ret = EEH_PE_STATE_UNAVAIL; 1544 1545 return ret; 1546 } 1547 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1548 1549 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1550 { 1551 struct eeh_dev *edev, *tmp; 1552 struct pci_dev *pdev; 1553 int ret = 0; 1554 1555 /* Restore config space */ 1556 eeh_pe_restore_bars(pe); 1557 1558 /* 1559 * Reenable PCI devices as the devices passed 1560 * through are always enabled before the reset. 1561 */ 1562 eeh_pe_for_each_dev(pe, edev, tmp) { 1563 pdev = eeh_dev_to_pci_dev(edev); 1564 if (!pdev) 1565 continue; 1566 1567 ret = pci_reenable_device(pdev); 1568 if (ret) { 1569 pr_warn("%s: Failure %d reenabling %s\n", 1570 __func__, ret, pci_name(pdev)); 1571 return ret; 1572 } 1573 } 1574 1575 /* The PE is still in frozen state */ 1576 return eeh_unfreeze_pe(pe, true); 1577 } 1578 1579 /** 1580 * eeh_pe_reset - Issue PE reset according to specified type 1581 * @pe: EEH PE 1582 * @option: reset type 1583 * 1584 * The routine is called to reset the specified PE with the 1585 * indicated type, either fundamental reset or hot reset. 1586 * PE reset is the most important part for error recovery. 1587 */ 1588 int eeh_pe_reset(struct eeh_pe *pe, int option) 1589 { 1590 int ret = 0; 1591 1592 /* Invalid PE ? */ 1593 if (!pe) 1594 return -ENODEV; 1595 1596 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1597 return -ENOENT; 1598 1599 switch (option) { 1600 case EEH_RESET_DEACTIVATE: 1601 ret = eeh_ops->reset(pe, option); 1602 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1603 if (ret) 1604 break; 1605 1606 ret = eeh_pe_reenable_devices(pe); 1607 break; 1608 case EEH_RESET_HOT: 1609 case EEH_RESET_FUNDAMENTAL: 1610 /* 1611 * Proactively freeze the PE to drop all MMIO access 1612 * during reset, which should be banned as it's always 1613 * cause recursive EEH error. 1614 */ 1615 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1616 1617 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1618 ret = eeh_ops->reset(pe, option); 1619 break; 1620 default: 1621 pr_debug("%s: Unsupported option %d\n", 1622 __func__, option); 1623 ret = -EINVAL; 1624 } 1625 1626 return ret; 1627 } 1628 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1629 1630 /** 1631 * eeh_pe_configure - Configure PCI bridges after PE reset 1632 * @pe: EEH PE 1633 * 1634 * The routine is called to restore the PCI config space for 1635 * those PCI devices, especially PCI bridges affected by PE 1636 * reset issued previously. 1637 */ 1638 int eeh_pe_configure(struct eeh_pe *pe) 1639 { 1640 int ret = 0; 1641 1642 /* Invalid PE ? */ 1643 if (!pe) 1644 return -ENODEV; 1645 1646 return ret; 1647 } 1648 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1649 1650 static int proc_eeh_show(struct seq_file *m, void *v) 1651 { 1652 if (!eeh_enabled()) { 1653 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1654 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1655 } else { 1656 seq_printf(m, "EEH Subsystem is enabled\n"); 1657 seq_printf(m, 1658 "no device=%llu\n" 1659 "no device node=%llu\n" 1660 "no config address=%llu\n" 1661 "check not wanted=%llu\n" 1662 "eeh_total_mmio_ffs=%llu\n" 1663 "eeh_false_positives=%llu\n" 1664 "eeh_slot_resets=%llu\n", 1665 eeh_stats.no_device, 1666 eeh_stats.no_dn, 1667 eeh_stats.no_cfg_addr, 1668 eeh_stats.ignored_check, 1669 eeh_stats.total_mmio_ffs, 1670 eeh_stats.false_positives, 1671 eeh_stats.slot_resets); 1672 } 1673 1674 return 0; 1675 } 1676 1677 static int proc_eeh_open(struct inode *inode, struct file *file) 1678 { 1679 return single_open(file, proc_eeh_show, NULL); 1680 } 1681 1682 static const struct file_operations proc_eeh_operations = { 1683 .open = proc_eeh_open, 1684 .read = seq_read, 1685 .llseek = seq_lseek, 1686 .release = single_release, 1687 }; 1688 1689 #ifdef CONFIG_DEBUG_FS 1690 static int eeh_enable_dbgfs_set(void *data, u64 val) 1691 { 1692 if (val) 1693 eeh_clear_flag(EEH_FORCE_DISABLED); 1694 else 1695 eeh_add_flag(EEH_FORCE_DISABLED); 1696 1697 /* Notify the backend */ 1698 if (eeh_ops->post_init) 1699 eeh_ops->post_init(); 1700 1701 return 0; 1702 } 1703 1704 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1705 { 1706 if (eeh_enabled()) 1707 *val = 0x1ul; 1708 else 1709 *val = 0x0ul; 1710 return 0; 1711 } 1712 1713 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1714 { 1715 eeh_max_freezes = val; 1716 return 0; 1717 } 1718 1719 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1720 { 1721 *val = eeh_max_freezes; 1722 return 0; 1723 } 1724 1725 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1726 eeh_enable_dbgfs_set, "0x%llx\n"); 1727 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1728 eeh_freeze_dbgfs_set, "0x%llx\n"); 1729 #endif 1730 1731 static int __init eeh_init_proc(void) 1732 { 1733 if (machine_is(pseries) || machine_is(powernv)) { 1734 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1735 #ifdef CONFIG_DEBUG_FS 1736 debugfs_create_file("eeh_enable", 0600, 1737 powerpc_debugfs_root, NULL, 1738 &eeh_enable_dbgfs_ops); 1739 debugfs_create_file("eeh_max_freezes", 0600, 1740 powerpc_debugfs_root, NULL, 1741 &eeh_freeze_dbgfs_ops); 1742 #endif 1743 } 1744 1745 return 0; 1746 } 1747 __initcall(eeh_init_proc); 1748