1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 148 149 static int __init eeh_setup(char *str) 150 { 151 if (!strcmp(str, "off")) 152 eeh_add_flag(EEH_FORCE_DISABLED); 153 else if (!strcmp(str, "early_log")) 154 eeh_add_flag(EEH_EARLY_DUMP_LOG); 155 156 return 1; 157 } 158 __setup("eeh=", eeh_setup); 159 160 /* 161 * This routine captures assorted PCI configuration space data 162 * for the indicated PCI device, and puts them into a buffer 163 * for RTAS error logging. 164 */ 165 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 166 { 167 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 168 u32 cfg; 169 int cap, i; 170 int n = 0, l = 0; 171 char buffer[128]; 172 173 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 174 edev->phb->global_number, pdn->busno, 175 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 176 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 177 edev->phb->global_number, pdn->busno, 178 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 179 180 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 181 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 182 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 183 184 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 185 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 186 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 187 188 /* Gather bridge-specific registers */ 189 if (edev->mode & EEH_DEV_BRIDGE) { 190 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 191 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 192 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 193 194 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 195 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 196 pr_warn("EEH: Bridge control: %04x\n", cfg); 197 } 198 199 /* Dump out the PCI-X command and status regs */ 200 cap = edev->pcix_cap; 201 if (cap) { 202 eeh_ops->read_config(pdn, cap, 4, &cfg); 203 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 204 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 205 206 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 207 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 208 pr_warn("EEH: PCI-X status: %08x\n", cfg); 209 } 210 211 /* If PCI-E capable, dump PCI-E cap 10 */ 212 cap = edev->pcie_cap; 213 if (cap) { 214 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 215 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 216 217 for (i=0; i<=8; i++) { 218 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 219 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 220 221 if ((i % 4) == 0) { 222 if (i != 0) 223 pr_warn("%s\n", buffer); 224 225 l = scnprintf(buffer, sizeof(buffer), 226 "EEH: PCI-E %02x: %08x ", 227 4*i, cfg); 228 } else { 229 l += scnprintf(buffer+l, sizeof(buffer)-l, 230 "%08x ", cfg); 231 } 232 233 } 234 235 pr_warn("%s\n", buffer); 236 } 237 238 /* If AER capable, dump it */ 239 cap = edev->aer_cap; 240 if (cap) { 241 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 242 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 243 244 for (i=0; i<=13; i++) { 245 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 246 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 247 248 if ((i % 4) == 0) { 249 if (i != 0) 250 pr_warn("%s\n", buffer); 251 252 l = scnprintf(buffer, sizeof(buffer), 253 "EEH: PCI-E AER %02x: %08x ", 254 4*i, cfg); 255 } else { 256 l += scnprintf(buffer+l, sizeof(buffer)-l, 257 "%08x ", cfg); 258 } 259 } 260 261 pr_warn("%s\n", buffer); 262 } 263 264 return n; 265 } 266 267 static void *eeh_dump_pe_log(void *data, void *flag) 268 { 269 struct eeh_pe *pe = data; 270 struct eeh_dev *edev, *tmp; 271 size_t *plen = flag; 272 273 /* If the PE's config space is blocked, 0xFF's will be 274 * returned. It's pointless to collect the log in this 275 * case. 276 */ 277 if (pe->state & EEH_PE_CFG_BLOCKED) 278 return NULL; 279 280 eeh_pe_for_each_dev(pe, edev, tmp) 281 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 282 EEH_PCI_REGS_LOG_LEN - *plen); 283 284 return NULL; 285 } 286 287 /** 288 * eeh_slot_error_detail - Generate combined log including driver log and error log 289 * @pe: EEH PE 290 * @severity: temporary or permanent error log 291 * 292 * This routine should be called to generate the combined log, which 293 * is comprised of driver log and error log. The driver log is figured 294 * out from the config space of the corresponding PCI device, while 295 * the error log is fetched through platform dependent function call. 296 */ 297 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 298 { 299 size_t loglen = 0; 300 301 /* 302 * When the PHB is fenced or dead, it's pointless to collect 303 * the data from PCI config space because it should return 304 * 0xFF's. For ER, we still retrieve the data from the PCI 305 * config space. 306 * 307 * For pHyp, we have to enable IO for log retrieval. Otherwise, 308 * 0xFF's is always returned from PCI config space. 309 */ 310 if (!(pe->type & EEH_PE_PHB)) { 311 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 312 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 313 eeh_ops->configure_bridge(pe); 314 eeh_pe_restore_bars(pe); 315 316 pci_regs_buf[0] = 0; 317 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 318 } 319 320 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 321 } 322 323 /** 324 * eeh_token_to_phys - Convert EEH address token to phys address 325 * @token: I/O token, should be address in the form 0xA.... 326 * 327 * This routine should be called to convert virtual I/O address 328 * to physical one. 329 */ 330 static inline unsigned long eeh_token_to_phys(unsigned long token) 331 { 332 pte_t *ptep; 333 unsigned long pa; 334 int hugepage_shift; 335 336 /* 337 * We won't find hugepages here, iomem 338 */ 339 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 340 if (!ptep) 341 return token; 342 WARN_ON(hugepage_shift); 343 pa = pte_pfn(*ptep) << PAGE_SHIFT; 344 345 return pa | (token & (PAGE_SIZE-1)); 346 } 347 348 /* 349 * On PowerNV platform, we might already have fenced PHB there. 350 * For that case, it's meaningless to recover frozen PE. Intead, 351 * We have to handle fenced PHB firstly. 352 */ 353 static int eeh_phb_check_failure(struct eeh_pe *pe) 354 { 355 struct eeh_pe *phb_pe; 356 unsigned long flags; 357 int ret; 358 359 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 360 return -EPERM; 361 362 /* Find the PHB PE */ 363 phb_pe = eeh_phb_pe_get(pe->phb); 364 if (!phb_pe) { 365 pr_warn("%s Can't find PE for PHB#%d\n", 366 __func__, pe->phb->global_number); 367 return -EEXIST; 368 } 369 370 /* If the PHB has been in problematic state */ 371 eeh_serialize_lock(&flags); 372 if (phb_pe->state & EEH_PE_ISOLATED) { 373 ret = 0; 374 goto out; 375 } 376 377 /* Check PHB state */ 378 ret = eeh_ops->get_state(phb_pe, NULL); 379 if ((ret < 0) || 380 (ret == EEH_STATE_NOT_SUPPORT) || 381 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 382 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 383 ret = 0; 384 goto out; 385 } 386 387 /* Isolate the PHB and send event */ 388 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 389 eeh_serialize_unlock(flags); 390 391 pr_err("EEH: PHB#%x failure detected, location: %s\n", 392 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 393 dump_stack(); 394 eeh_send_failure_event(phb_pe); 395 396 return 1; 397 out: 398 eeh_serialize_unlock(flags); 399 return ret; 400 } 401 402 /** 403 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 404 * @edev: eeh device 405 * 406 * Check for an EEH failure for the given device node. Call this 407 * routine if the result of a read was all 0xff's and you want to 408 * find out if this is due to an EEH slot freeze. This routine 409 * will query firmware for the EEH status. 410 * 411 * Returns 0 if there has not been an EEH error; otherwise returns 412 * a non-zero value and queues up a slot isolation event notification. 413 * 414 * It is safe to call this routine in an interrupt context. 415 */ 416 int eeh_dev_check_failure(struct eeh_dev *edev) 417 { 418 int ret; 419 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 420 unsigned long flags; 421 struct pci_dn *pdn; 422 struct pci_dev *dev; 423 struct eeh_pe *pe, *parent_pe, *phb_pe; 424 int rc = 0; 425 const char *location = NULL; 426 427 eeh_stats.total_mmio_ffs++; 428 429 if (!eeh_enabled()) 430 return 0; 431 432 if (!edev) { 433 eeh_stats.no_dn++; 434 return 0; 435 } 436 dev = eeh_dev_to_pci_dev(edev); 437 pe = eeh_dev_to_pe(edev); 438 439 /* Access to IO BARs might get this far and still not want checking. */ 440 if (!pe) { 441 eeh_stats.ignored_check++; 442 pr_debug("EEH: Ignored check for %s\n", 443 eeh_pci_name(dev)); 444 return 0; 445 } 446 447 if (!pe->addr && !pe->config_addr) { 448 eeh_stats.no_cfg_addr++; 449 return 0; 450 } 451 452 /* 453 * On PowerNV platform, we might already have fenced PHB 454 * there and we need take care of that firstly. 455 */ 456 ret = eeh_phb_check_failure(pe); 457 if (ret > 0) 458 return ret; 459 460 /* 461 * If the PE isn't owned by us, we shouldn't check the 462 * state. Instead, let the owner handle it if the PE has 463 * been frozen. 464 */ 465 if (eeh_pe_passed(pe)) 466 return 0; 467 468 /* If we already have a pending isolation event for this 469 * slot, we know it's bad already, we don't need to check. 470 * Do this checking under a lock; as multiple PCI devices 471 * in one slot might report errors simultaneously, and we 472 * only want one error recovery routine running. 473 */ 474 eeh_serialize_lock(&flags); 475 rc = 1; 476 if (pe->state & EEH_PE_ISOLATED) { 477 pe->check_count++; 478 if (pe->check_count % EEH_MAX_FAILS == 0) { 479 pdn = eeh_dev_to_pdn(edev); 480 if (pdn->node) 481 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 482 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 483 "location=%s driver=%s pci addr=%s\n", 484 pe->check_count, 485 location ? location : "unknown", 486 eeh_driver_name(dev), eeh_pci_name(dev)); 487 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 488 eeh_driver_name(dev)); 489 dump_stack(); 490 } 491 goto dn_unlock; 492 } 493 494 /* 495 * Now test for an EEH failure. This is VERY expensive. 496 * Note that the eeh_config_addr may be a parent device 497 * in the case of a device behind a bridge, or it may be 498 * function zero of a multi-function device. 499 * In any case they must share a common PHB. 500 */ 501 ret = eeh_ops->get_state(pe, NULL); 502 503 /* Note that config-io to empty slots may fail; 504 * they are empty when they don't have children. 505 * We will punt with the following conditions: Failure to get 506 * PE's state, EEH not support and Permanently unavailable 507 * state, PE is in good state. 508 */ 509 if ((ret < 0) || 510 (ret == EEH_STATE_NOT_SUPPORT) || 511 ((ret & active_flags) == active_flags)) { 512 eeh_stats.false_positives++; 513 pe->false_positives++; 514 rc = 0; 515 goto dn_unlock; 516 } 517 518 /* 519 * It should be corner case that the parent PE has been 520 * put into frozen state as well. We should take care 521 * that at first. 522 */ 523 parent_pe = pe->parent; 524 while (parent_pe) { 525 /* Hit the ceiling ? */ 526 if (parent_pe->type & EEH_PE_PHB) 527 break; 528 529 /* Frozen parent PE ? */ 530 ret = eeh_ops->get_state(parent_pe, NULL); 531 if (ret > 0 && 532 (ret & active_flags) != active_flags) 533 pe = parent_pe; 534 535 /* Next parent level */ 536 parent_pe = parent_pe->parent; 537 } 538 539 eeh_stats.slot_resets++; 540 541 /* Avoid repeated reports of this failure, including problems 542 * with other functions on this device, and functions under 543 * bridges. 544 */ 545 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 546 eeh_serialize_unlock(flags); 547 548 /* Most EEH events are due to device driver bugs. Having 549 * a stack trace will help the device-driver authors figure 550 * out what happened. So print that out. 551 */ 552 phb_pe = eeh_phb_pe_get(pe->phb); 553 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 554 pe->phb->global_number, pe->addr); 555 pr_err("EEH: PE location: %s, PHB location: %s\n", 556 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 557 dump_stack(); 558 559 eeh_send_failure_event(pe); 560 561 return 1; 562 563 dn_unlock: 564 eeh_serialize_unlock(flags); 565 return rc; 566 } 567 568 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 569 570 /** 571 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 572 * @token: I/O address 573 * 574 * Check for an EEH failure at the given I/O address. Call this 575 * routine if the result of a read was all 0xff's and you want to 576 * find out if this is due to an EEH slot freeze event. This routine 577 * will query firmware for the EEH status. 578 * 579 * Note this routine is safe to call in an interrupt context. 580 */ 581 int eeh_check_failure(const volatile void __iomem *token) 582 { 583 unsigned long addr; 584 struct eeh_dev *edev; 585 586 /* Finding the phys addr + pci device; this is pretty quick. */ 587 addr = eeh_token_to_phys((unsigned long __force) token); 588 edev = eeh_addr_cache_get_dev(addr); 589 if (!edev) { 590 eeh_stats.no_device++; 591 return 0; 592 } 593 594 return eeh_dev_check_failure(edev); 595 } 596 EXPORT_SYMBOL(eeh_check_failure); 597 598 599 /** 600 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 601 * @pe: EEH PE 602 * 603 * This routine should be called to reenable frozen MMIO or DMA 604 * so that it would work correctly again. It's useful while doing 605 * recovery or log collection on the indicated device. 606 */ 607 int eeh_pci_enable(struct eeh_pe *pe, int function) 608 { 609 int active_flag, rc; 610 611 /* 612 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 613 * Also, it's pointless to enable them on unfrozen PE. So 614 * we have to check before enabling IO or DMA. 615 */ 616 switch (function) { 617 case EEH_OPT_THAW_MMIO: 618 active_flag = EEH_STATE_MMIO_ACTIVE; 619 break; 620 case EEH_OPT_THAW_DMA: 621 active_flag = EEH_STATE_DMA_ACTIVE; 622 break; 623 case EEH_OPT_DISABLE: 624 case EEH_OPT_ENABLE: 625 case EEH_OPT_FREEZE_PE: 626 active_flag = 0; 627 break; 628 default: 629 pr_warn("%s: Invalid function %d\n", 630 __func__, function); 631 return -EINVAL; 632 } 633 634 /* 635 * Check if IO or DMA has been enabled before 636 * enabling them. 637 */ 638 if (active_flag) { 639 rc = eeh_ops->get_state(pe, NULL); 640 if (rc < 0) 641 return rc; 642 643 /* Needn't enable it at all */ 644 if (rc == EEH_STATE_NOT_SUPPORT) 645 return 0; 646 647 /* It's already enabled */ 648 if (rc & active_flag) 649 return 0; 650 } 651 652 653 /* Issue the request */ 654 rc = eeh_ops->set_option(pe, function); 655 if (rc) 656 pr_warn("%s: Unexpected state change %d on " 657 "PHB#%d-PE#%x, err=%d\n", 658 __func__, function, pe->phb->global_number, 659 pe->addr, rc); 660 661 /* Check if the request is finished successfully */ 662 if (active_flag) { 663 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 664 if (rc <= 0) 665 return rc; 666 667 if (rc & active_flag) 668 return 0; 669 670 return -EIO; 671 } 672 673 return rc; 674 } 675 676 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 677 { 678 struct eeh_dev *edev = data; 679 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 680 struct pci_dev *dev = userdata; 681 682 /* 683 * The caller should have disabled and saved the 684 * state for the specified device 685 */ 686 if (!pdev || pdev == dev) 687 return NULL; 688 689 /* Ensure we have D0 power state */ 690 pci_set_power_state(pdev, PCI_D0); 691 692 /* Save device state */ 693 pci_save_state(pdev); 694 695 /* 696 * Disable device to avoid any DMA traffic and 697 * interrupt from the device 698 */ 699 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 700 701 return NULL; 702 } 703 704 static void *eeh_restore_dev_state(void *data, void *userdata) 705 { 706 struct eeh_dev *edev = data; 707 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 708 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 709 struct pci_dev *dev = userdata; 710 711 if (!pdev) 712 return NULL; 713 714 /* Apply customization from firmware */ 715 if (pdn && eeh_ops->restore_config) 716 eeh_ops->restore_config(pdn); 717 718 /* The caller should restore state for the specified device */ 719 if (pdev != dev) 720 pci_save_state(pdev); 721 722 return NULL; 723 } 724 725 /** 726 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 727 * @dev: pci device struct 728 * @state: reset state to enter 729 * 730 * Return value: 731 * 0 if success 732 */ 733 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 734 { 735 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 736 struct eeh_pe *pe = eeh_dev_to_pe(edev); 737 738 if (!pe) { 739 pr_err("%s: No PE found on PCI device %s\n", 740 __func__, pci_name(dev)); 741 return -EINVAL; 742 } 743 744 switch (state) { 745 case pcie_deassert_reset: 746 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 747 eeh_unfreeze_pe(pe, false); 748 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 749 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 750 break; 751 case pcie_hot_reset: 752 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 753 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 754 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 755 eeh_ops->reset(pe, EEH_RESET_HOT); 756 break; 757 case pcie_warm_reset: 758 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 759 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 760 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 761 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 762 break; 763 default: 764 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 765 return -EINVAL; 766 }; 767 768 return 0; 769 } 770 771 /** 772 * eeh_set_pe_freset - Check the required reset for the indicated device 773 * @data: EEH device 774 * @flag: return value 775 * 776 * Each device might have its preferred reset type: fundamental or 777 * hot reset. The routine is used to collected the information for 778 * the indicated device and its children so that the bunch of the 779 * devices could be reset properly. 780 */ 781 static void *eeh_set_dev_freset(void *data, void *flag) 782 { 783 struct pci_dev *dev; 784 unsigned int *freset = (unsigned int *)flag; 785 struct eeh_dev *edev = (struct eeh_dev *)data; 786 787 dev = eeh_dev_to_pci_dev(edev); 788 if (dev) 789 *freset |= dev->needs_freset; 790 791 return NULL; 792 } 793 794 /** 795 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 796 * @pe: EEH PE 797 * 798 * Assert the PCI #RST line for 1/4 second. 799 */ 800 static void eeh_reset_pe_once(struct eeh_pe *pe) 801 { 802 unsigned int freset = 0; 803 804 /* Determine type of EEH reset required for 805 * Partitionable Endpoint, a hot-reset (1) 806 * or a fundamental reset (3). 807 * A fundamental reset required by any device under 808 * Partitionable Endpoint trumps hot-reset. 809 */ 810 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 811 812 if (freset) 813 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 814 else 815 eeh_ops->reset(pe, EEH_RESET_HOT); 816 817 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 818 } 819 820 /** 821 * eeh_reset_pe - Reset the indicated PE 822 * @pe: EEH PE 823 * 824 * This routine should be called to reset indicated device, including 825 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 826 * might be involved as well. 827 */ 828 int eeh_reset_pe(struct eeh_pe *pe) 829 { 830 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 831 int i, state, ret; 832 833 /* Mark as reset and block config space */ 834 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 835 836 /* Take three shots at resetting the bus */ 837 for (i = 0; i < 3; i++) { 838 eeh_reset_pe_once(pe); 839 840 /* 841 * EEH_PE_ISOLATED is expected to be removed after 842 * BAR restore. 843 */ 844 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 845 if ((state & flags) == flags) { 846 ret = 0; 847 goto out; 848 } 849 850 if (state < 0) { 851 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 852 __func__, pe->phb->global_number, pe->addr); 853 ret = -ENOTRECOVERABLE; 854 goto out; 855 } 856 857 /* We might run out of credits */ 858 ret = -EIO; 859 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 860 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 861 } 862 863 out: 864 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 865 return ret; 866 } 867 868 /** 869 * eeh_save_bars - Save device bars 870 * @edev: PCI device associated EEH device 871 * 872 * Save the values of the device bars. Unlike the restore 873 * routine, this routine is *not* recursive. This is because 874 * PCI devices are added individually; but, for the restore, 875 * an entire slot is reset at a time. 876 */ 877 void eeh_save_bars(struct eeh_dev *edev) 878 { 879 struct pci_dn *pdn; 880 int i; 881 882 pdn = eeh_dev_to_pdn(edev); 883 if (!pdn) 884 return; 885 886 for (i = 0; i < 16; i++) 887 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 888 889 /* 890 * For PCI bridges including root port, we need enable bus 891 * master explicitly. Otherwise, it can't fetch IODA table 892 * entries correctly. So we cache the bit in advance so that 893 * we can restore it after reset, either PHB range or PE range. 894 */ 895 if (edev->mode & EEH_DEV_BRIDGE) 896 edev->config_space[1] |= PCI_COMMAND_MASTER; 897 } 898 899 /** 900 * eeh_ops_register - Register platform dependent EEH operations 901 * @ops: platform dependent EEH operations 902 * 903 * Register the platform dependent EEH operation callback 904 * functions. The platform should call this function before 905 * any other EEH operations. 906 */ 907 int __init eeh_ops_register(struct eeh_ops *ops) 908 { 909 if (!ops->name) { 910 pr_warn("%s: Invalid EEH ops name for %p\n", 911 __func__, ops); 912 return -EINVAL; 913 } 914 915 if (eeh_ops && eeh_ops != ops) { 916 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 917 __func__, eeh_ops->name, ops->name); 918 return -EEXIST; 919 } 920 921 eeh_ops = ops; 922 923 return 0; 924 } 925 926 /** 927 * eeh_ops_unregister - Unreigster platform dependent EEH operations 928 * @name: name of EEH platform operations 929 * 930 * Unregister the platform dependent EEH operation callback 931 * functions. 932 */ 933 int __exit eeh_ops_unregister(const char *name) 934 { 935 if (!name || !strlen(name)) { 936 pr_warn("%s: Invalid EEH ops name\n", 937 __func__); 938 return -EINVAL; 939 } 940 941 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 942 eeh_ops = NULL; 943 return 0; 944 } 945 946 return -EEXIST; 947 } 948 949 static int eeh_reboot_notifier(struct notifier_block *nb, 950 unsigned long action, void *unused) 951 { 952 eeh_clear_flag(EEH_ENABLED); 953 return NOTIFY_DONE; 954 } 955 956 static struct notifier_block eeh_reboot_nb = { 957 .notifier_call = eeh_reboot_notifier, 958 }; 959 960 /** 961 * eeh_init - EEH initialization 962 * 963 * Initialize EEH by trying to enable it for all of the adapters in the system. 964 * As a side effect we can determine here if eeh is supported at all. 965 * Note that we leave EEH on so failed config cycles won't cause a machine 966 * check. If a user turns off EEH for a particular adapter they are really 967 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 968 * grant access to a slot if EEH isn't enabled, and so we always enable 969 * EEH for all slots/all devices. 970 * 971 * The eeh-force-off option disables EEH checking globally, for all slots. 972 * Even if force-off is set, the EEH hardware is still enabled, so that 973 * newer systems can boot. 974 */ 975 int eeh_init(void) 976 { 977 struct pci_controller *hose, *tmp; 978 struct pci_dn *pdn; 979 static int cnt = 0; 980 int ret = 0; 981 982 /* 983 * We have to delay the initialization on PowerNV after 984 * the PCI hierarchy tree has been built because the PEs 985 * are figured out based on PCI devices instead of device 986 * tree nodes 987 */ 988 if (machine_is(powernv) && cnt++ <= 0) 989 return ret; 990 991 /* Register reboot notifier */ 992 ret = register_reboot_notifier(&eeh_reboot_nb); 993 if (ret) { 994 pr_warn("%s: Failed to register notifier (%d)\n", 995 __func__, ret); 996 return ret; 997 } 998 999 /* call platform initialization function */ 1000 if (!eeh_ops) { 1001 pr_warn("%s: Platform EEH operation not found\n", 1002 __func__); 1003 return -EEXIST; 1004 } else if ((ret = eeh_ops->init())) 1005 return ret; 1006 1007 /* Initialize EEH event */ 1008 ret = eeh_event_init(); 1009 if (ret) 1010 return ret; 1011 1012 /* Enable EEH for all adapters */ 1013 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1014 pdn = hose->pci_data; 1015 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1016 } 1017 1018 /* 1019 * Call platform post-initialization. Actually, It's good chance 1020 * to inform platform that EEH is ready to supply service if the 1021 * I/O cache stuff has been built up. 1022 */ 1023 if (eeh_ops->post_init) { 1024 ret = eeh_ops->post_init(); 1025 if (ret) 1026 return ret; 1027 } 1028 1029 if (eeh_enabled()) 1030 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1031 else 1032 pr_warn("EEH: No capable adapters found\n"); 1033 1034 return ret; 1035 } 1036 1037 core_initcall_sync(eeh_init); 1038 1039 /** 1040 * eeh_add_device_early - Enable EEH for the indicated device node 1041 * @pdn: PCI device node for which to set up EEH 1042 * 1043 * This routine must be used to perform EEH initialization for PCI 1044 * devices that were added after system boot (e.g. hotplug, dlpar). 1045 * This routine must be called before any i/o is performed to the 1046 * adapter (inluding any config-space i/o). 1047 * Whether this actually enables EEH or not for this device depends 1048 * on the CEC architecture, type of the device, on earlier boot 1049 * command-line arguments & etc. 1050 */ 1051 void eeh_add_device_early(struct pci_dn *pdn) 1052 { 1053 struct pci_controller *phb; 1054 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1055 1056 if (!edev || !eeh_enabled()) 1057 return; 1058 1059 /* USB Bus children of PCI devices will not have BUID's */ 1060 phb = edev->phb; 1061 if (NULL == phb || 1062 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1063 return; 1064 1065 eeh_ops->probe(pdn, NULL); 1066 } 1067 1068 /** 1069 * eeh_add_device_tree_early - Enable EEH for the indicated device 1070 * @pdn: PCI device node 1071 * 1072 * This routine must be used to perform EEH initialization for the 1073 * indicated PCI device that was added after system boot (e.g. 1074 * hotplug, dlpar). 1075 */ 1076 void eeh_add_device_tree_early(struct pci_dn *pdn) 1077 { 1078 struct pci_dn *n; 1079 1080 if (!pdn) 1081 return; 1082 1083 list_for_each_entry(n, &pdn->child_list, list) 1084 eeh_add_device_tree_early(n); 1085 eeh_add_device_early(pdn); 1086 } 1087 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1088 1089 /** 1090 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1091 * @dev: pci device for which to set up EEH 1092 * 1093 * This routine must be used to complete EEH initialization for PCI 1094 * devices that were added after system boot (e.g. hotplug, dlpar). 1095 */ 1096 void eeh_add_device_late(struct pci_dev *dev) 1097 { 1098 struct pci_dn *pdn; 1099 struct eeh_dev *edev; 1100 1101 if (!dev || !eeh_enabled()) 1102 return; 1103 1104 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1105 1106 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1107 edev = pdn_to_eeh_dev(pdn); 1108 if (edev->pdev == dev) { 1109 pr_debug("EEH: Already referenced !\n"); 1110 return; 1111 } 1112 1113 /* 1114 * The EEH cache might not be removed correctly because of 1115 * unbalanced kref to the device during unplug time, which 1116 * relies on pcibios_release_device(). So we have to remove 1117 * that here explicitly. 1118 */ 1119 if (edev->pdev) { 1120 eeh_rmv_from_parent_pe(edev); 1121 eeh_addr_cache_rmv_dev(edev->pdev); 1122 eeh_sysfs_remove_device(edev->pdev); 1123 edev->mode &= ~EEH_DEV_SYSFS; 1124 1125 /* 1126 * We definitely should have the PCI device removed 1127 * though it wasn't correctly. So we needn't call 1128 * into error handler afterwards. 1129 */ 1130 edev->mode |= EEH_DEV_NO_HANDLER; 1131 1132 edev->pdev = NULL; 1133 dev->dev.archdata.edev = NULL; 1134 } 1135 1136 edev->pdev = dev; 1137 dev->dev.archdata.edev = edev; 1138 1139 eeh_addr_cache_insert_dev(dev); 1140 } 1141 1142 /** 1143 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1144 * @bus: PCI bus 1145 * 1146 * This routine must be used to perform EEH initialization for PCI 1147 * devices which are attached to the indicated PCI bus. The PCI bus 1148 * is added after system boot through hotplug or dlpar. 1149 */ 1150 void eeh_add_device_tree_late(struct pci_bus *bus) 1151 { 1152 struct pci_dev *dev; 1153 1154 list_for_each_entry(dev, &bus->devices, bus_list) { 1155 eeh_add_device_late(dev); 1156 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1157 struct pci_bus *subbus = dev->subordinate; 1158 if (subbus) 1159 eeh_add_device_tree_late(subbus); 1160 } 1161 } 1162 } 1163 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1164 1165 /** 1166 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1167 * @bus: PCI bus 1168 * 1169 * This routine must be used to add EEH sysfs files for PCI 1170 * devices which are attached to the indicated PCI bus. The PCI bus 1171 * is added after system boot through hotplug or dlpar. 1172 */ 1173 void eeh_add_sysfs_files(struct pci_bus *bus) 1174 { 1175 struct pci_dev *dev; 1176 1177 list_for_each_entry(dev, &bus->devices, bus_list) { 1178 eeh_sysfs_add_device(dev); 1179 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1180 struct pci_bus *subbus = dev->subordinate; 1181 if (subbus) 1182 eeh_add_sysfs_files(subbus); 1183 } 1184 } 1185 } 1186 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1187 1188 /** 1189 * eeh_remove_device - Undo EEH setup for the indicated pci device 1190 * @dev: pci device to be removed 1191 * 1192 * This routine should be called when a device is removed from 1193 * a running system (e.g. by hotplug or dlpar). It unregisters 1194 * the PCI device from the EEH subsystem. I/O errors affecting 1195 * this device will no longer be detected after this call; thus, 1196 * i/o errors affecting this slot may leave this device unusable. 1197 */ 1198 void eeh_remove_device(struct pci_dev *dev) 1199 { 1200 struct eeh_dev *edev; 1201 1202 if (!dev || !eeh_enabled()) 1203 return; 1204 edev = pci_dev_to_eeh_dev(dev); 1205 1206 /* Unregister the device with the EEH/PCI address search system */ 1207 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1208 1209 if (!edev || !edev->pdev || !edev->pe) { 1210 pr_debug("EEH: Not referenced !\n"); 1211 return; 1212 } 1213 1214 /* 1215 * During the hotplug for EEH error recovery, we need the EEH 1216 * device attached to the parent PE in order for BAR restore 1217 * a bit later. So we keep it for BAR restore and remove it 1218 * from the parent PE during the BAR resotre. 1219 */ 1220 edev->pdev = NULL; 1221 dev->dev.archdata.edev = NULL; 1222 if (!(edev->pe->state & EEH_PE_KEEP)) 1223 eeh_rmv_from_parent_pe(edev); 1224 else 1225 edev->mode |= EEH_DEV_DISCONNECTED; 1226 1227 /* 1228 * We're removing from the PCI subsystem, that means 1229 * the PCI device driver can't support EEH or not 1230 * well. So we rely on hotplug completely to do recovery 1231 * for the specific PCI device. 1232 */ 1233 edev->mode |= EEH_DEV_NO_HANDLER; 1234 1235 eeh_addr_cache_rmv_dev(dev); 1236 eeh_sysfs_remove_device(dev); 1237 edev->mode &= ~EEH_DEV_SYSFS; 1238 } 1239 1240 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1241 { 1242 int ret; 1243 1244 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1245 if (ret) { 1246 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1247 __func__, ret, pe->phb->global_number, pe->addr); 1248 return ret; 1249 } 1250 1251 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1252 if (ret) { 1253 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1254 __func__, ret, pe->phb->global_number, pe->addr); 1255 return ret; 1256 } 1257 1258 /* Clear software isolated state */ 1259 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1260 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1261 1262 return ret; 1263 } 1264 1265 1266 static struct pci_device_id eeh_reset_ids[] = { 1267 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1268 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1269 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1270 { 0 } 1271 }; 1272 1273 static int eeh_pe_change_owner(struct eeh_pe *pe) 1274 { 1275 struct eeh_dev *edev, *tmp; 1276 struct pci_dev *pdev; 1277 struct pci_device_id *id; 1278 int flags, ret; 1279 1280 /* Check PE state */ 1281 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1282 ret = eeh_ops->get_state(pe, NULL); 1283 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1284 return 0; 1285 1286 /* Unfrozen PE, nothing to do */ 1287 if ((ret & flags) == flags) 1288 return 0; 1289 1290 /* Frozen PE, check if it needs PE level reset */ 1291 eeh_pe_for_each_dev(pe, edev, tmp) { 1292 pdev = eeh_dev_to_pci_dev(edev); 1293 if (!pdev) 1294 continue; 1295 1296 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1297 if (id->vendor != PCI_ANY_ID && 1298 id->vendor != pdev->vendor) 1299 continue; 1300 if (id->device != PCI_ANY_ID && 1301 id->device != pdev->device) 1302 continue; 1303 if (id->subvendor != PCI_ANY_ID && 1304 id->subvendor != pdev->subsystem_vendor) 1305 continue; 1306 if (id->subdevice != PCI_ANY_ID && 1307 id->subdevice != pdev->subsystem_device) 1308 continue; 1309 1310 goto reset; 1311 } 1312 } 1313 1314 return eeh_unfreeze_pe(pe, true); 1315 1316 reset: 1317 return eeh_pe_reset_and_recover(pe); 1318 } 1319 1320 /** 1321 * eeh_dev_open - Increase count of pass through devices for PE 1322 * @pdev: PCI device 1323 * 1324 * Increase count of passed through devices for the indicated 1325 * PE. In the result, the EEH errors detected on the PE won't be 1326 * reported. The PE owner will be responsible for detection 1327 * and recovery. 1328 */ 1329 int eeh_dev_open(struct pci_dev *pdev) 1330 { 1331 struct eeh_dev *edev; 1332 int ret = -ENODEV; 1333 1334 mutex_lock(&eeh_dev_mutex); 1335 1336 /* No PCI device ? */ 1337 if (!pdev) 1338 goto out; 1339 1340 /* No EEH device or PE ? */ 1341 edev = pci_dev_to_eeh_dev(pdev); 1342 if (!edev || !edev->pe) 1343 goto out; 1344 1345 /* 1346 * The PE might have been put into frozen state, but we 1347 * didn't detect that yet. The passed through PCI devices 1348 * in frozen PE won't work properly. Clear the frozen state 1349 * in advance. 1350 */ 1351 ret = eeh_pe_change_owner(edev->pe); 1352 if (ret) 1353 goto out; 1354 1355 /* Increase PE's pass through count */ 1356 atomic_inc(&edev->pe->pass_dev_cnt); 1357 mutex_unlock(&eeh_dev_mutex); 1358 1359 return 0; 1360 out: 1361 mutex_unlock(&eeh_dev_mutex); 1362 return ret; 1363 } 1364 EXPORT_SYMBOL_GPL(eeh_dev_open); 1365 1366 /** 1367 * eeh_dev_release - Decrease count of pass through devices for PE 1368 * @pdev: PCI device 1369 * 1370 * Decrease count of pass through devices for the indicated PE. If 1371 * there is no passed through device in PE, the EEH errors detected 1372 * on the PE will be reported and handled as usual. 1373 */ 1374 void eeh_dev_release(struct pci_dev *pdev) 1375 { 1376 struct eeh_dev *edev; 1377 1378 mutex_lock(&eeh_dev_mutex); 1379 1380 /* No PCI device ? */ 1381 if (!pdev) 1382 goto out; 1383 1384 /* No EEH device ? */ 1385 edev = pci_dev_to_eeh_dev(pdev); 1386 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1387 goto out; 1388 1389 /* Decrease PE's pass through count */ 1390 atomic_dec(&edev->pe->pass_dev_cnt); 1391 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1392 eeh_pe_change_owner(edev->pe); 1393 out: 1394 mutex_unlock(&eeh_dev_mutex); 1395 } 1396 EXPORT_SYMBOL(eeh_dev_release); 1397 1398 #ifdef CONFIG_IOMMU_API 1399 1400 static int dev_has_iommu_table(struct device *dev, void *data) 1401 { 1402 struct pci_dev *pdev = to_pci_dev(dev); 1403 struct pci_dev **ppdev = data; 1404 struct iommu_table *tbl; 1405 1406 if (!dev) 1407 return 0; 1408 1409 tbl = get_iommu_table_base(dev); 1410 if (tbl && tbl->it_group) { 1411 *ppdev = pdev; 1412 return 1; 1413 } 1414 1415 return 0; 1416 } 1417 1418 /** 1419 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1420 * @group: IOMMU group 1421 * 1422 * The routine is called to convert IOMMU group to EEH PE. 1423 */ 1424 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1425 { 1426 struct pci_dev *pdev = NULL; 1427 struct eeh_dev *edev; 1428 int ret; 1429 1430 /* No IOMMU group ? */ 1431 if (!group) 1432 return NULL; 1433 1434 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1435 if (!ret || !pdev) 1436 return NULL; 1437 1438 /* No EEH device or PE ? */ 1439 edev = pci_dev_to_eeh_dev(pdev); 1440 if (!edev || !edev->pe) 1441 return NULL; 1442 1443 return edev->pe; 1444 } 1445 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1446 1447 #endif /* CONFIG_IOMMU_API */ 1448 1449 /** 1450 * eeh_pe_set_option - Set options for the indicated PE 1451 * @pe: EEH PE 1452 * @option: requested option 1453 * 1454 * The routine is called to enable or disable EEH functionality 1455 * on the indicated PE, to enable IO or DMA for the frozen PE. 1456 */ 1457 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1458 { 1459 int ret = 0; 1460 1461 /* Invalid PE ? */ 1462 if (!pe) 1463 return -ENODEV; 1464 1465 /* 1466 * EEH functionality could possibly be disabled, just 1467 * return error for the case. And the EEH functinality 1468 * isn't expected to be disabled on one specific PE. 1469 */ 1470 switch (option) { 1471 case EEH_OPT_ENABLE: 1472 if (eeh_enabled()) { 1473 ret = eeh_pe_change_owner(pe); 1474 break; 1475 } 1476 ret = -EIO; 1477 break; 1478 case EEH_OPT_DISABLE: 1479 break; 1480 case EEH_OPT_THAW_MMIO: 1481 case EEH_OPT_THAW_DMA: 1482 if (!eeh_ops || !eeh_ops->set_option) { 1483 ret = -ENOENT; 1484 break; 1485 } 1486 1487 ret = eeh_pci_enable(pe, option); 1488 break; 1489 default: 1490 pr_debug("%s: Option %d out of range (%d, %d)\n", 1491 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1492 ret = -EINVAL; 1493 } 1494 1495 return ret; 1496 } 1497 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1498 1499 /** 1500 * eeh_pe_get_state - Retrieve PE's state 1501 * @pe: EEH PE 1502 * 1503 * Retrieve the PE's state, which includes 3 aspects: enabled 1504 * DMA, enabled IO and asserted reset. 1505 */ 1506 int eeh_pe_get_state(struct eeh_pe *pe) 1507 { 1508 int result, ret = 0; 1509 bool rst_active, dma_en, mmio_en; 1510 1511 /* Existing PE ? */ 1512 if (!pe) 1513 return -ENODEV; 1514 1515 if (!eeh_ops || !eeh_ops->get_state) 1516 return -ENOENT; 1517 1518 result = eeh_ops->get_state(pe, NULL); 1519 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1520 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1521 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1522 1523 if (rst_active) 1524 ret = EEH_PE_STATE_RESET; 1525 else if (dma_en && mmio_en) 1526 ret = EEH_PE_STATE_NORMAL; 1527 else if (!dma_en && !mmio_en) 1528 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1529 else if (!dma_en && mmio_en) 1530 ret = EEH_PE_STATE_STOPPED_DMA; 1531 else 1532 ret = EEH_PE_STATE_UNAVAIL; 1533 1534 return ret; 1535 } 1536 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1537 1538 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1539 { 1540 struct eeh_dev *edev, *tmp; 1541 struct pci_dev *pdev; 1542 int ret = 0; 1543 1544 /* Restore config space */ 1545 eeh_pe_restore_bars(pe); 1546 1547 /* 1548 * Reenable PCI devices as the devices passed 1549 * through are always enabled before the reset. 1550 */ 1551 eeh_pe_for_each_dev(pe, edev, tmp) { 1552 pdev = eeh_dev_to_pci_dev(edev); 1553 if (!pdev) 1554 continue; 1555 1556 ret = pci_reenable_device(pdev); 1557 if (ret) { 1558 pr_warn("%s: Failure %d reenabling %s\n", 1559 __func__, ret, pci_name(pdev)); 1560 return ret; 1561 } 1562 } 1563 1564 /* The PE is still in frozen state */ 1565 return eeh_unfreeze_pe(pe, true); 1566 } 1567 1568 /** 1569 * eeh_pe_reset - Issue PE reset according to specified type 1570 * @pe: EEH PE 1571 * @option: reset type 1572 * 1573 * The routine is called to reset the specified PE with the 1574 * indicated type, either fundamental reset or hot reset. 1575 * PE reset is the most important part for error recovery. 1576 */ 1577 int eeh_pe_reset(struct eeh_pe *pe, int option) 1578 { 1579 int ret = 0; 1580 1581 /* Invalid PE ? */ 1582 if (!pe) 1583 return -ENODEV; 1584 1585 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1586 return -ENOENT; 1587 1588 switch (option) { 1589 case EEH_RESET_DEACTIVATE: 1590 ret = eeh_ops->reset(pe, option); 1591 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1592 if (ret) 1593 break; 1594 1595 ret = eeh_pe_reenable_devices(pe); 1596 break; 1597 case EEH_RESET_HOT: 1598 case EEH_RESET_FUNDAMENTAL: 1599 /* 1600 * Proactively freeze the PE to drop all MMIO access 1601 * during reset, which should be banned as it's always 1602 * cause recursive EEH error. 1603 */ 1604 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1605 1606 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1607 ret = eeh_ops->reset(pe, option); 1608 break; 1609 default: 1610 pr_debug("%s: Unsupported option %d\n", 1611 __func__, option); 1612 ret = -EINVAL; 1613 } 1614 1615 return ret; 1616 } 1617 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1618 1619 /** 1620 * eeh_pe_configure - Configure PCI bridges after PE reset 1621 * @pe: EEH PE 1622 * 1623 * The routine is called to restore the PCI config space for 1624 * those PCI devices, especially PCI bridges affected by PE 1625 * reset issued previously. 1626 */ 1627 int eeh_pe_configure(struct eeh_pe *pe) 1628 { 1629 int ret = 0; 1630 1631 /* Invalid PE ? */ 1632 if (!pe) 1633 return -ENODEV; 1634 1635 return ret; 1636 } 1637 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1638 1639 static int proc_eeh_show(struct seq_file *m, void *v) 1640 { 1641 if (!eeh_enabled()) { 1642 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1643 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1644 } else { 1645 seq_printf(m, "EEH Subsystem is enabled\n"); 1646 seq_printf(m, 1647 "no device=%llu\n" 1648 "no device node=%llu\n" 1649 "no config address=%llu\n" 1650 "check not wanted=%llu\n" 1651 "eeh_total_mmio_ffs=%llu\n" 1652 "eeh_false_positives=%llu\n" 1653 "eeh_slot_resets=%llu\n", 1654 eeh_stats.no_device, 1655 eeh_stats.no_dn, 1656 eeh_stats.no_cfg_addr, 1657 eeh_stats.ignored_check, 1658 eeh_stats.total_mmio_ffs, 1659 eeh_stats.false_positives, 1660 eeh_stats.slot_resets); 1661 } 1662 1663 return 0; 1664 } 1665 1666 static int proc_eeh_open(struct inode *inode, struct file *file) 1667 { 1668 return single_open(file, proc_eeh_show, NULL); 1669 } 1670 1671 static const struct file_operations proc_eeh_operations = { 1672 .open = proc_eeh_open, 1673 .read = seq_read, 1674 .llseek = seq_lseek, 1675 .release = single_release, 1676 }; 1677 1678 #ifdef CONFIG_DEBUG_FS 1679 static int eeh_enable_dbgfs_set(void *data, u64 val) 1680 { 1681 if (val) 1682 eeh_clear_flag(EEH_FORCE_DISABLED); 1683 else 1684 eeh_add_flag(EEH_FORCE_DISABLED); 1685 1686 /* Notify the backend */ 1687 if (eeh_ops->post_init) 1688 eeh_ops->post_init(); 1689 1690 return 0; 1691 } 1692 1693 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1694 { 1695 if (eeh_enabled()) 1696 *val = 0x1ul; 1697 else 1698 *val = 0x0ul; 1699 return 0; 1700 } 1701 1702 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1703 { 1704 eeh_max_freezes = val; 1705 return 0; 1706 } 1707 1708 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1709 { 1710 *val = eeh_max_freezes; 1711 return 0; 1712 } 1713 1714 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1715 eeh_enable_dbgfs_set, "0x%llx\n"); 1716 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1717 eeh_freeze_dbgfs_set, "0x%llx\n"); 1718 #endif 1719 1720 static int __init eeh_init_proc(void) 1721 { 1722 if (machine_is(pseries) || machine_is(powernv)) { 1723 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1724 #ifdef CONFIG_DEBUG_FS 1725 debugfs_create_file("eeh_enable", 0600, 1726 powerpc_debugfs_root, NULL, 1727 &eeh_enable_dbgfs_ops); 1728 debugfs_create_file("eeh_max_freezes", 0600, 1729 powerpc_debugfs_root, NULL, 1730 &eeh_freeze_dbgfs_ops); 1731 #endif 1732 } 1733 1734 return 0; 1735 } 1736 __initcall(eeh_init_proc); 1737