1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 static int __init eeh_setup(char *str) 148 { 149 if (!strcmp(str, "off")) 150 eeh_add_flag(EEH_FORCE_DISABLED); 151 else if (!strcmp(str, "early_log")) 152 eeh_add_flag(EEH_EARLY_DUMP_LOG); 153 154 return 1; 155 } 156 __setup("eeh=", eeh_setup); 157 158 /* 159 * This routine captures assorted PCI configuration space data 160 * for the indicated PCI device, and puts them into a buffer 161 * for RTAS error logging. 162 */ 163 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 164 { 165 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 166 u32 cfg; 167 int cap, i; 168 int n = 0, l = 0; 169 char buffer[128]; 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 172 edev->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 175 edev->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(void *data, void *flag) 266 { 267 struct eeh_pe *pe = data; 268 struct eeh_dev *edev, *tmp; 269 size_t *plen = flag; 270 271 /* If the PE's config space is blocked, 0xFF's will be 272 * returned. It's pointless to collect the log in this 273 * case. 274 */ 275 if (pe->state & EEH_PE_CFG_BLOCKED) 276 return NULL; 277 278 eeh_pe_for_each_dev(pe, edev, tmp) 279 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 280 EEH_PCI_REGS_LOG_LEN - *plen); 281 282 return NULL; 283 } 284 285 /** 286 * eeh_slot_error_detail - Generate combined log including driver log and error log 287 * @pe: EEH PE 288 * @severity: temporary or permanent error log 289 * 290 * This routine should be called to generate the combined log, which 291 * is comprised of driver log and error log. The driver log is figured 292 * out from the config space of the corresponding PCI device, while 293 * the error log is fetched through platform dependent function call. 294 */ 295 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 296 { 297 size_t loglen = 0; 298 299 /* 300 * When the PHB is fenced or dead, it's pointless to collect 301 * the data from PCI config space because it should return 302 * 0xFF's. For ER, we still retrieve the data from the PCI 303 * config space. 304 * 305 * For pHyp, we have to enable IO for log retrieval. Otherwise, 306 * 0xFF's is always returned from PCI config space. 307 */ 308 if (!(pe->type & EEH_PE_PHB)) { 309 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 310 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 311 312 /* 313 * The config space of some PCI devices can't be accessed 314 * when their PEs are in frozen state. Otherwise, fenced 315 * PHB might be seen. Those PEs are identified with flag 316 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 317 * is set automatically when the PE is put to EEH_PE_ISOLATED. 318 * 319 * Restoring BARs possibly triggers PCI config access in 320 * (OPAL) firmware and then causes fenced PHB. If the 321 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 322 * pointless to restore BARs and dump config space. 323 */ 324 eeh_ops->configure_bridge(pe); 325 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 326 eeh_pe_restore_bars(pe); 327 328 pci_regs_buf[0] = 0; 329 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 330 } 331 } 332 333 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 334 } 335 336 /** 337 * eeh_token_to_phys - Convert EEH address token to phys address 338 * @token: I/O token, should be address in the form 0xA.... 339 * 340 * This routine should be called to convert virtual I/O address 341 * to physical one. 342 */ 343 static inline unsigned long eeh_token_to_phys(unsigned long token) 344 { 345 pte_t *ptep; 346 unsigned long pa; 347 int hugepage_shift; 348 349 /* 350 * We won't find hugepages here(this is iomem). Hence we are not 351 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 352 * page table free, because of init_mm. 353 */ 354 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, 355 NULL, &hugepage_shift); 356 if (!ptep) 357 return token; 358 WARN_ON(hugepage_shift); 359 pa = pte_pfn(*ptep) << PAGE_SHIFT; 360 361 return pa | (token & (PAGE_SIZE-1)); 362 } 363 364 /* 365 * On PowerNV platform, we might already have fenced PHB there. 366 * For that case, it's meaningless to recover frozen PE. Intead, 367 * We have to handle fenced PHB firstly. 368 */ 369 static int eeh_phb_check_failure(struct eeh_pe *pe) 370 { 371 struct eeh_pe *phb_pe; 372 unsigned long flags; 373 int ret; 374 375 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 376 return -EPERM; 377 378 /* Find the PHB PE */ 379 phb_pe = eeh_phb_pe_get(pe->phb); 380 if (!phb_pe) { 381 pr_warn("%s Can't find PE for PHB#%d\n", 382 __func__, pe->phb->global_number); 383 return -EEXIST; 384 } 385 386 /* If the PHB has been in problematic state */ 387 eeh_serialize_lock(&flags); 388 if (phb_pe->state & EEH_PE_ISOLATED) { 389 ret = 0; 390 goto out; 391 } 392 393 /* Check PHB state */ 394 ret = eeh_ops->get_state(phb_pe, NULL); 395 if ((ret < 0) || 396 (ret == EEH_STATE_NOT_SUPPORT) || 397 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 398 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 399 ret = 0; 400 goto out; 401 } 402 403 /* Isolate the PHB and send event */ 404 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 405 eeh_serialize_unlock(flags); 406 407 pr_err("EEH: PHB#%x failure detected, location: %s\n", 408 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 409 dump_stack(); 410 eeh_send_failure_event(phb_pe); 411 412 return 1; 413 out: 414 eeh_serialize_unlock(flags); 415 return ret; 416 } 417 418 /** 419 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 420 * @edev: eeh device 421 * 422 * Check for an EEH failure for the given device node. Call this 423 * routine if the result of a read was all 0xff's and you want to 424 * find out if this is due to an EEH slot freeze. This routine 425 * will query firmware for the EEH status. 426 * 427 * Returns 0 if there has not been an EEH error; otherwise returns 428 * a non-zero value and queues up a slot isolation event notification. 429 * 430 * It is safe to call this routine in an interrupt context. 431 */ 432 int eeh_dev_check_failure(struct eeh_dev *edev) 433 { 434 int ret; 435 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 436 unsigned long flags; 437 struct pci_dn *pdn; 438 struct pci_dev *dev; 439 struct eeh_pe *pe, *parent_pe, *phb_pe; 440 int rc = 0; 441 const char *location = NULL; 442 443 eeh_stats.total_mmio_ffs++; 444 445 if (!eeh_enabled()) 446 return 0; 447 448 if (!edev) { 449 eeh_stats.no_dn++; 450 return 0; 451 } 452 dev = eeh_dev_to_pci_dev(edev); 453 pe = eeh_dev_to_pe(edev); 454 455 /* Access to IO BARs might get this far and still not want checking. */ 456 if (!pe) { 457 eeh_stats.ignored_check++; 458 pr_debug("EEH: Ignored check for %s\n", 459 eeh_pci_name(dev)); 460 return 0; 461 } 462 463 if (!pe->addr && !pe->config_addr) { 464 eeh_stats.no_cfg_addr++; 465 return 0; 466 } 467 468 /* 469 * On PowerNV platform, we might already have fenced PHB 470 * there and we need take care of that firstly. 471 */ 472 ret = eeh_phb_check_failure(pe); 473 if (ret > 0) 474 return ret; 475 476 /* 477 * If the PE isn't owned by us, we shouldn't check the 478 * state. Instead, let the owner handle it if the PE has 479 * been frozen. 480 */ 481 if (eeh_pe_passed(pe)) 482 return 0; 483 484 /* If we already have a pending isolation event for this 485 * slot, we know it's bad already, we don't need to check. 486 * Do this checking under a lock; as multiple PCI devices 487 * in one slot might report errors simultaneously, and we 488 * only want one error recovery routine running. 489 */ 490 eeh_serialize_lock(&flags); 491 rc = 1; 492 if (pe->state & EEH_PE_ISOLATED) { 493 pe->check_count++; 494 if (pe->check_count % EEH_MAX_FAILS == 0) { 495 pdn = eeh_dev_to_pdn(edev); 496 if (pdn->node) 497 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 498 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 499 "location=%s driver=%s pci addr=%s\n", 500 pe->check_count, 501 location ? location : "unknown", 502 eeh_driver_name(dev), eeh_pci_name(dev)); 503 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 504 eeh_driver_name(dev)); 505 dump_stack(); 506 } 507 goto dn_unlock; 508 } 509 510 /* 511 * Now test for an EEH failure. This is VERY expensive. 512 * Note that the eeh_config_addr may be a parent device 513 * in the case of a device behind a bridge, or it may be 514 * function zero of a multi-function device. 515 * In any case they must share a common PHB. 516 */ 517 ret = eeh_ops->get_state(pe, NULL); 518 519 /* Note that config-io to empty slots may fail; 520 * they are empty when they don't have children. 521 * We will punt with the following conditions: Failure to get 522 * PE's state, EEH not support and Permanently unavailable 523 * state, PE is in good state. 524 */ 525 if ((ret < 0) || 526 (ret == EEH_STATE_NOT_SUPPORT) || 527 ((ret & active_flags) == active_flags)) { 528 eeh_stats.false_positives++; 529 pe->false_positives++; 530 rc = 0; 531 goto dn_unlock; 532 } 533 534 /* 535 * It should be corner case that the parent PE has been 536 * put into frozen state as well. We should take care 537 * that at first. 538 */ 539 parent_pe = pe->parent; 540 while (parent_pe) { 541 /* Hit the ceiling ? */ 542 if (parent_pe->type & EEH_PE_PHB) 543 break; 544 545 /* Frozen parent PE ? */ 546 ret = eeh_ops->get_state(parent_pe, NULL); 547 if (ret > 0 && 548 (ret & active_flags) != active_flags) 549 pe = parent_pe; 550 551 /* Next parent level */ 552 parent_pe = parent_pe->parent; 553 } 554 555 eeh_stats.slot_resets++; 556 557 /* Avoid repeated reports of this failure, including problems 558 * with other functions on this device, and functions under 559 * bridges. 560 */ 561 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 562 eeh_serialize_unlock(flags); 563 564 /* Most EEH events are due to device driver bugs. Having 565 * a stack trace will help the device-driver authors figure 566 * out what happened. So print that out. 567 */ 568 phb_pe = eeh_phb_pe_get(pe->phb); 569 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 570 pe->phb->global_number, pe->addr); 571 pr_err("EEH: PE location: %s, PHB location: %s\n", 572 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 573 dump_stack(); 574 575 eeh_send_failure_event(pe); 576 577 return 1; 578 579 dn_unlock: 580 eeh_serialize_unlock(flags); 581 return rc; 582 } 583 584 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 585 586 /** 587 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 588 * @token: I/O address 589 * 590 * Check for an EEH failure at the given I/O address. Call this 591 * routine if the result of a read was all 0xff's and you want to 592 * find out if this is due to an EEH slot freeze event. This routine 593 * will query firmware for the EEH status. 594 * 595 * Note this routine is safe to call in an interrupt context. 596 */ 597 int eeh_check_failure(const volatile void __iomem *token) 598 { 599 unsigned long addr; 600 struct eeh_dev *edev; 601 602 /* Finding the phys addr + pci device; this is pretty quick. */ 603 addr = eeh_token_to_phys((unsigned long __force) token); 604 edev = eeh_addr_cache_get_dev(addr); 605 if (!edev) { 606 eeh_stats.no_device++; 607 return 0; 608 } 609 610 return eeh_dev_check_failure(edev); 611 } 612 EXPORT_SYMBOL(eeh_check_failure); 613 614 615 /** 616 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 617 * @pe: EEH PE 618 * 619 * This routine should be called to reenable frozen MMIO or DMA 620 * so that it would work correctly again. It's useful while doing 621 * recovery or log collection on the indicated device. 622 */ 623 int eeh_pci_enable(struct eeh_pe *pe, int function) 624 { 625 int active_flag, rc; 626 627 /* 628 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 629 * Also, it's pointless to enable them on unfrozen PE. So 630 * we have to check before enabling IO or DMA. 631 */ 632 switch (function) { 633 case EEH_OPT_THAW_MMIO: 634 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 635 break; 636 case EEH_OPT_THAW_DMA: 637 active_flag = EEH_STATE_DMA_ACTIVE; 638 break; 639 case EEH_OPT_DISABLE: 640 case EEH_OPT_ENABLE: 641 case EEH_OPT_FREEZE_PE: 642 active_flag = 0; 643 break; 644 default: 645 pr_warn("%s: Invalid function %d\n", 646 __func__, function); 647 return -EINVAL; 648 } 649 650 /* 651 * Check if IO or DMA has been enabled before 652 * enabling them. 653 */ 654 if (active_flag) { 655 rc = eeh_ops->get_state(pe, NULL); 656 if (rc < 0) 657 return rc; 658 659 /* Needn't enable it at all */ 660 if (rc == EEH_STATE_NOT_SUPPORT) 661 return 0; 662 663 /* It's already enabled */ 664 if (rc & active_flag) 665 return 0; 666 } 667 668 669 /* Issue the request */ 670 rc = eeh_ops->set_option(pe, function); 671 if (rc) 672 pr_warn("%s: Unexpected state change %d on " 673 "PHB#%d-PE#%x, err=%d\n", 674 __func__, function, pe->phb->global_number, 675 pe->addr, rc); 676 677 /* Check if the request is finished successfully */ 678 if (active_flag) { 679 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 680 if (rc <= 0) 681 return rc; 682 683 if (rc & active_flag) 684 return 0; 685 686 return -EIO; 687 } 688 689 return rc; 690 } 691 692 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 693 { 694 struct eeh_dev *edev = data; 695 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 696 struct pci_dev *dev = userdata; 697 698 /* 699 * The caller should have disabled and saved the 700 * state for the specified device 701 */ 702 if (!pdev || pdev == dev) 703 return NULL; 704 705 /* Ensure we have D0 power state */ 706 pci_set_power_state(pdev, PCI_D0); 707 708 /* Save device state */ 709 pci_save_state(pdev); 710 711 /* 712 * Disable device to avoid any DMA traffic and 713 * interrupt from the device 714 */ 715 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 716 717 return NULL; 718 } 719 720 static void *eeh_restore_dev_state(void *data, void *userdata) 721 { 722 struct eeh_dev *edev = data; 723 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 724 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 725 struct pci_dev *dev = userdata; 726 727 if (!pdev) 728 return NULL; 729 730 /* Apply customization from firmware */ 731 if (pdn && eeh_ops->restore_config) 732 eeh_ops->restore_config(pdn); 733 734 /* The caller should restore state for the specified device */ 735 if (pdev != dev) 736 pci_restore_state(pdev); 737 738 return NULL; 739 } 740 741 /** 742 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 743 * @dev: pci device struct 744 * @state: reset state to enter 745 * 746 * Return value: 747 * 0 if success 748 */ 749 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 750 { 751 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 752 struct eeh_pe *pe = eeh_dev_to_pe(edev); 753 754 if (!pe) { 755 pr_err("%s: No PE found on PCI device %s\n", 756 __func__, pci_name(dev)); 757 return -EINVAL; 758 } 759 760 switch (state) { 761 case pcie_deassert_reset: 762 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 763 eeh_unfreeze_pe(pe, false); 764 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 765 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 766 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 767 break; 768 case pcie_hot_reset: 769 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 770 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 771 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 772 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 773 eeh_ops->reset(pe, EEH_RESET_HOT); 774 break; 775 case pcie_warm_reset: 776 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 777 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 778 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 779 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 780 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 781 break; 782 default: 783 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 784 return -EINVAL; 785 }; 786 787 return 0; 788 } 789 790 /** 791 * eeh_set_pe_freset - Check the required reset for the indicated device 792 * @data: EEH device 793 * @flag: return value 794 * 795 * Each device might have its preferred reset type: fundamental or 796 * hot reset. The routine is used to collected the information for 797 * the indicated device and its children so that the bunch of the 798 * devices could be reset properly. 799 */ 800 static void *eeh_set_dev_freset(void *data, void *flag) 801 { 802 struct pci_dev *dev; 803 unsigned int *freset = (unsigned int *)flag; 804 struct eeh_dev *edev = (struct eeh_dev *)data; 805 806 dev = eeh_dev_to_pci_dev(edev); 807 if (dev) 808 *freset |= dev->needs_freset; 809 810 return NULL; 811 } 812 813 /** 814 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 815 * @pe: EEH PE 816 * 817 * Assert the PCI #RST line for 1/4 second. 818 */ 819 static void eeh_reset_pe_once(struct eeh_pe *pe) 820 { 821 unsigned int freset = 0; 822 823 /* Determine type of EEH reset required for 824 * Partitionable Endpoint, a hot-reset (1) 825 * or a fundamental reset (3). 826 * A fundamental reset required by any device under 827 * Partitionable Endpoint trumps hot-reset. 828 */ 829 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 830 831 if (freset) 832 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 833 else 834 eeh_ops->reset(pe, EEH_RESET_HOT); 835 836 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 837 } 838 839 /** 840 * eeh_reset_pe - Reset the indicated PE 841 * @pe: EEH PE 842 * 843 * This routine should be called to reset indicated device, including 844 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 845 * might be involved as well. 846 */ 847 int eeh_reset_pe(struct eeh_pe *pe) 848 { 849 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 850 int i, state, ret; 851 852 /* Mark as reset and block config space */ 853 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 854 855 /* Take three shots at resetting the bus */ 856 for (i = 0; i < 3; i++) { 857 eeh_reset_pe_once(pe); 858 859 /* 860 * EEH_PE_ISOLATED is expected to be removed after 861 * BAR restore. 862 */ 863 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 864 if ((state & flags) == flags) { 865 ret = 0; 866 goto out; 867 } 868 869 if (state < 0) { 870 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 871 __func__, pe->phb->global_number, pe->addr); 872 ret = -ENOTRECOVERABLE; 873 goto out; 874 } 875 876 /* We might run out of credits */ 877 ret = -EIO; 878 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 879 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 880 } 881 882 out: 883 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 884 return ret; 885 } 886 887 /** 888 * eeh_save_bars - Save device bars 889 * @edev: PCI device associated EEH device 890 * 891 * Save the values of the device bars. Unlike the restore 892 * routine, this routine is *not* recursive. This is because 893 * PCI devices are added individually; but, for the restore, 894 * an entire slot is reset at a time. 895 */ 896 void eeh_save_bars(struct eeh_dev *edev) 897 { 898 struct pci_dn *pdn; 899 int i; 900 901 pdn = eeh_dev_to_pdn(edev); 902 if (!pdn) 903 return; 904 905 for (i = 0; i < 16; i++) 906 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 907 908 /* 909 * For PCI bridges including root port, we need enable bus 910 * master explicitly. Otherwise, it can't fetch IODA table 911 * entries correctly. So we cache the bit in advance so that 912 * we can restore it after reset, either PHB range or PE range. 913 */ 914 if (edev->mode & EEH_DEV_BRIDGE) 915 edev->config_space[1] |= PCI_COMMAND_MASTER; 916 } 917 918 /** 919 * eeh_ops_register - Register platform dependent EEH operations 920 * @ops: platform dependent EEH operations 921 * 922 * Register the platform dependent EEH operation callback 923 * functions. The platform should call this function before 924 * any other EEH operations. 925 */ 926 int __init eeh_ops_register(struct eeh_ops *ops) 927 { 928 if (!ops->name) { 929 pr_warn("%s: Invalid EEH ops name for %p\n", 930 __func__, ops); 931 return -EINVAL; 932 } 933 934 if (eeh_ops && eeh_ops != ops) { 935 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 936 __func__, eeh_ops->name, ops->name); 937 return -EEXIST; 938 } 939 940 eeh_ops = ops; 941 942 return 0; 943 } 944 945 /** 946 * eeh_ops_unregister - Unreigster platform dependent EEH operations 947 * @name: name of EEH platform operations 948 * 949 * Unregister the platform dependent EEH operation callback 950 * functions. 951 */ 952 int __exit eeh_ops_unregister(const char *name) 953 { 954 if (!name || !strlen(name)) { 955 pr_warn("%s: Invalid EEH ops name\n", 956 __func__); 957 return -EINVAL; 958 } 959 960 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 961 eeh_ops = NULL; 962 return 0; 963 } 964 965 return -EEXIST; 966 } 967 968 static int eeh_reboot_notifier(struct notifier_block *nb, 969 unsigned long action, void *unused) 970 { 971 eeh_clear_flag(EEH_ENABLED); 972 return NOTIFY_DONE; 973 } 974 975 static struct notifier_block eeh_reboot_nb = { 976 .notifier_call = eeh_reboot_notifier, 977 }; 978 979 /** 980 * eeh_init - EEH initialization 981 * 982 * Initialize EEH by trying to enable it for all of the adapters in the system. 983 * As a side effect we can determine here if eeh is supported at all. 984 * Note that we leave EEH on so failed config cycles won't cause a machine 985 * check. If a user turns off EEH for a particular adapter they are really 986 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 987 * grant access to a slot if EEH isn't enabled, and so we always enable 988 * EEH for all slots/all devices. 989 * 990 * The eeh-force-off option disables EEH checking globally, for all slots. 991 * Even if force-off is set, the EEH hardware is still enabled, so that 992 * newer systems can boot. 993 */ 994 int eeh_init(void) 995 { 996 struct pci_controller *hose, *tmp; 997 struct pci_dn *pdn; 998 static int cnt = 0; 999 int ret = 0; 1000 1001 /* 1002 * We have to delay the initialization on PowerNV after 1003 * the PCI hierarchy tree has been built because the PEs 1004 * are figured out based on PCI devices instead of device 1005 * tree nodes 1006 */ 1007 if (machine_is(powernv) && cnt++ <= 0) 1008 return ret; 1009 1010 /* Register reboot notifier */ 1011 ret = register_reboot_notifier(&eeh_reboot_nb); 1012 if (ret) { 1013 pr_warn("%s: Failed to register notifier (%d)\n", 1014 __func__, ret); 1015 return ret; 1016 } 1017 1018 /* call platform initialization function */ 1019 if (!eeh_ops) { 1020 pr_warn("%s: Platform EEH operation not found\n", 1021 __func__); 1022 return -EEXIST; 1023 } else if ((ret = eeh_ops->init())) 1024 return ret; 1025 1026 /* Initialize EEH event */ 1027 ret = eeh_event_init(); 1028 if (ret) 1029 return ret; 1030 1031 /* Enable EEH for all adapters */ 1032 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1033 pdn = hose->pci_data; 1034 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1035 } 1036 1037 /* 1038 * Call platform post-initialization. Actually, It's good chance 1039 * to inform platform that EEH is ready to supply service if the 1040 * I/O cache stuff has been built up. 1041 */ 1042 if (eeh_ops->post_init) { 1043 ret = eeh_ops->post_init(); 1044 if (ret) 1045 return ret; 1046 } 1047 1048 if (eeh_enabled()) 1049 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1050 else 1051 pr_warn("EEH: No capable adapters found\n"); 1052 1053 return ret; 1054 } 1055 1056 core_initcall_sync(eeh_init); 1057 1058 /** 1059 * eeh_add_device_early - Enable EEH for the indicated device node 1060 * @pdn: PCI device node for which to set up EEH 1061 * 1062 * This routine must be used to perform EEH initialization for PCI 1063 * devices that were added after system boot (e.g. hotplug, dlpar). 1064 * This routine must be called before any i/o is performed to the 1065 * adapter (inluding any config-space i/o). 1066 * Whether this actually enables EEH or not for this device depends 1067 * on the CEC architecture, type of the device, on earlier boot 1068 * command-line arguments & etc. 1069 */ 1070 void eeh_add_device_early(struct pci_dn *pdn) 1071 { 1072 struct pci_controller *phb; 1073 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1074 1075 if (!edev || !eeh_enabled()) 1076 return; 1077 1078 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1079 return; 1080 1081 /* USB Bus children of PCI devices will not have BUID's */ 1082 phb = edev->phb; 1083 if (NULL == phb || 1084 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1085 return; 1086 1087 eeh_ops->probe(pdn, NULL); 1088 } 1089 1090 /** 1091 * eeh_add_device_tree_early - Enable EEH for the indicated device 1092 * @pdn: PCI device node 1093 * 1094 * This routine must be used to perform EEH initialization for the 1095 * indicated PCI device that was added after system boot (e.g. 1096 * hotplug, dlpar). 1097 */ 1098 void eeh_add_device_tree_early(struct pci_dn *pdn) 1099 { 1100 struct pci_dn *n; 1101 1102 if (!pdn) 1103 return; 1104 1105 list_for_each_entry(n, &pdn->child_list, list) 1106 eeh_add_device_tree_early(n); 1107 eeh_add_device_early(pdn); 1108 } 1109 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1110 1111 /** 1112 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1113 * @dev: pci device for which to set up EEH 1114 * 1115 * This routine must be used to complete EEH initialization for PCI 1116 * devices that were added after system boot (e.g. hotplug, dlpar). 1117 */ 1118 void eeh_add_device_late(struct pci_dev *dev) 1119 { 1120 struct pci_dn *pdn; 1121 struct eeh_dev *edev; 1122 1123 if (!dev || !eeh_enabled()) 1124 return; 1125 1126 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1127 1128 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1129 edev = pdn_to_eeh_dev(pdn); 1130 if (edev->pdev == dev) { 1131 pr_debug("EEH: Already referenced !\n"); 1132 return; 1133 } 1134 1135 /* 1136 * The EEH cache might not be removed correctly because of 1137 * unbalanced kref to the device during unplug time, which 1138 * relies on pcibios_release_device(). So we have to remove 1139 * that here explicitly. 1140 */ 1141 if (edev->pdev) { 1142 eeh_rmv_from_parent_pe(edev); 1143 eeh_addr_cache_rmv_dev(edev->pdev); 1144 eeh_sysfs_remove_device(edev->pdev); 1145 edev->mode &= ~EEH_DEV_SYSFS; 1146 1147 /* 1148 * We definitely should have the PCI device removed 1149 * though it wasn't correctly. So we needn't call 1150 * into error handler afterwards. 1151 */ 1152 edev->mode |= EEH_DEV_NO_HANDLER; 1153 1154 edev->pdev = NULL; 1155 dev->dev.archdata.edev = NULL; 1156 } 1157 1158 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1159 eeh_ops->probe(pdn, NULL); 1160 1161 edev->pdev = dev; 1162 dev->dev.archdata.edev = edev; 1163 1164 eeh_addr_cache_insert_dev(dev); 1165 } 1166 1167 /** 1168 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1169 * @bus: PCI bus 1170 * 1171 * This routine must be used to perform EEH initialization for PCI 1172 * devices which are attached to the indicated PCI bus. The PCI bus 1173 * is added after system boot through hotplug or dlpar. 1174 */ 1175 void eeh_add_device_tree_late(struct pci_bus *bus) 1176 { 1177 struct pci_dev *dev; 1178 1179 list_for_each_entry(dev, &bus->devices, bus_list) { 1180 eeh_add_device_late(dev); 1181 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1182 struct pci_bus *subbus = dev->subordinate; 1183 if (subbus) 1184 eeh_add_device_tree_late(subbus); 1185 } 1186 } 1187 } 1188 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1189 1190 /** 1191 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1192 * @bus: PCI bus 1193 * 1194 * This routine must be used to add EEH sysfs files for PCI 1195 * devices which are attached to the indicated PCI bus. The PCI bus 1196 * is added after system boot through hotplug or dlpar. 1197 */ 1198 void eeh_add_sysfs_files(struct pci_bus *bus) 1199 { 1200 struct pci_dev *dev; 1201 1202 list_for_each_entry(dev, &bus->devices, bus_list) { 1203 eeh_sysfs_add_device(dev); 1204 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1205 struct pci_bus *subbus = dev->subordinate; 1206 if (subbus) 1207 eeh_add_sysfs_files(subbus); 1208 } 1209 } 1210 } 1211 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1212 1213 /** 1214 * eeh_remove_device - Undo EEH setup for the indicated pci device 1215 * @dev: pci device to be removed 1216 * 1217 * This routine should be called when a device is removed from 1218 * a running system (e.g. by hotplug or dlpar). It unregisters 1219 * the PCI device from the EEH subsystem. I/O errors affecting 1220 * this device will no longer be detected after this call; thus, 1221 * i/o errors affecting this slot may leave this device unusable. 1222 */ 1223 void eeh_remove_device(struct pci_dev *dev) 1224 { 1225 struct eeh_dev *edev; 1226 1227 if (!dev || !eeh_enabled()) 1228 return; 1229 edev = pci_dev_to_eeh_dev(dev); 1230 1231 /* Unregister the device with the EEH/PCI address search system */ 1232 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1233 1234 if (!edev || !edev->pdev || !edev->pe) { 1235 pr_debug("EEH: Not referenced !\n"); 1236 return; 1237 } 1238 1239 /* 1240 * During the hotplug for EEH error recovery, we need the EEH 1241 * device attached to the parent PE in order for BAR restore 1242 * a bit later. So we keep it for BAR restore and remove it 1243 * from the parent PE during the BAR resotre. 1244 */ 1245 edev->pdev = NULL; 1246 dev->dev.archdata.edev = NULL; 1247 if (!(edev->pe->state & EEH_PE_KEEP)) 1248 eeh_rmv_from_parent_pe(edev); 1249 else 1250 edev->mode |= EEH_DEV_DISCONNECTED; 1251 1252 /* 1253 * We're removing from the PCI subsystem, that means 1254 * the PCI device driver can't support EEH or not 1255 * well. So we rely on hotplug completely to do recovery 1256 * for the specific PCI device. 1257 */ 1258 edev->mode |= EEH_DEV_NO_HANDLER; 1259 1260 eeh_addr_cache_rmv_dev(dev); 1261 eeh_sysfs_remove_device(dev); 1262 edev->mode &= ~EEH_DEV_SYSFS; 1263 } 1264 1265 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1266 { 1267 int ret; 1268 1269 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1270 if (ret) { 1271 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1272 __func__, ret, pe->phb->global_number, pe->addr); 1273 return ret; 1274 } 1275 1276 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1277 if (ret) { 1278 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1279 __func__, ret, pe->phb->global_number, pe->addr); 1280 return ret; 1281 } 1282 1283 /* Clear software isolated state */ 1284 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1285 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1286 1287 return ret; 1288 } 1289 1290 1291 static struct pci_device_id eeh_reset_ids[] = { 1292 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1293 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1294 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1295 { 0 } 1296 }; 1297 1298 static int eeh_pe_change_owner(struct eeh_pe *pe) 1299 { 1300 struct eeh_dev *edev, *tmp; 1301 struct pci_dev *pdev; 1302 struct pci_device_id *id; 1303 int flags, ret; 1304 1305 /* Check PE state */ 1306 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1307 ret = eeh_ops->get_state(pe, NULL); 1308 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1309 return 0; 1310 1311 /* Unfrozen PE, nothing to do */ 1312 if ((ret & flags) == flags) 1313 return 0; 1314 1315 /* Frozen PE, check if it needs PE level reset */ 1316 eeh_pe_for_each_dev(pe, edev, tmp) { 1317 pdev = eeh_dev_to_pci_dev(edev); 1318 if (!pdev) 1319 continue; 1320 1321 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1322 if (id->vendor != PCI_ANY_ID && 1323 id->vendor != pdev->vendor) 1324 continue; 1325 if (id->device != PCI_ANY_ID && 1326 id->device != pdev->device) 1327 continue; 1328 if (id->subvendor != PCI_ANY_ID && 1329 id->subvendor != pdev->subsystem_vendor) 1330 continue; 1331 if (id->subdevice != PCI_ANY_ID && 1332 id->subdevice != pdev->subsystem_device) 1333 continue; 1334 1335 goto reset; 1336 } 1337 } 1338 1339 return eeh_unfreeze_pe(pe, true); 1340 1341 reset: 1342 return eeh_pe_reset_and_recover(pe); 1343 } 1344 1345 /** 1346 * eeh_dev_open - Increase count of pass through devices for PE 1347 * @pdev: PCI device 1348 * 1349 * Increase count of passed through devices for the indicated 1350 * PE. In the result, the EEH errors detected on the PE won't be 1351 * reported. The PE owner will be responsible for detection 1352 * and recovery. 1353 */ 1354 int eeh_dev_open(struct pci_dev *pdev) 1355 { 1356 struct eeh_dev *edev; 1357 int ret = -ENODEV; 1358 1359 mutex_lock(&eeh_dev_mutex); 1360 1361 /* No PCI device ? */ 1362 if (!pdev) 1363 goto out; 1364 1365 /* No EEH device or PE ? */ 1366 edev = pci_dev_to_eeh_dev(pdev); 1367 if (!edev || !edev->pe) 1368 goto out; 1369 1370 /* 1371 * The PE might have been put into frozen state, but we 1372 * didn't detect that yet. The passed through PCI devices 1373 * in frozen PE won't work properly. Clear the frozen state 1374 * in advance. 1375 */ 1376 ret = eeh_pe_change_owner(edev->pe); 1377 if (ret) 1378 goto out; 1379 1380 /* Increase PE's pass through count */ 1381 atomic_inc(&edev->pe->pass_dev_cnt); 1382 mutex_unlock(&eeh_dev_mutex); 1383 1384 return 0; 1385 out: 1386 mutex_unlock(&eeh_dev_mutex); 1387 return ret; 1388 } 1389 EXPORT_SYMBOL_GPL(eeh_dev_open); 1390 1391 /** 1392 * eeh_dev_release - Decrease count of pass through devices for PE 1393 * @pdev: PCI device 1394 * 1395 * Decrease count of pass through devices for the indicated PE. If 1396 * there is no passed through device in PE, the EEH errors detected 1397 * on the PE will be reported and handled as usual. 1398 */ 1399 void eeh_dev_release(struct pci_dev *pdev) 1400 { 1401 struct eeh_dev *edev; 1402 1403 mutex_lock(&eeh_dev_mutex); 1404 1405 /* No PCI device ? */ 1406 if (!pdev) 1407 goto out; 1408 1409 /* No EEH device ? */ 1410 edev = pci_dev_to_eeh_dev(pdev); 1411 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1412 goto out; 1413 1414 /* Decrease PE's pass through count */ 1415 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1416 eeh_pe_change_owner(edev->pe); 1417 out: 1418 mutex_unlock(&eeh_dev_mutex); 1419 } 1420 EXPORT_SYMBOL(eeh_dev_release); 1421 1422 #ifdef CONFIG_IOMMU_API 1423 1424 static int dev_has_iommu_table(struct device *dev, void *data) 1425 { 1426 struct pci_dev *pdev = to_pci_dev(dev); 1427 struct pci_dev **ppdev = data; 1428 1429 if (!dev) 1430 return 0; 1431 1432 if (dev->iommu_group) { 1433 *ppdev = pdev; 1434 return 1; 1435 } 1436 1437 return 0; 1438 } 1439 1440 /** 1441 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1442 * @group: IOMMU group 1443 * 1444 * The routine is called to convert IOMMU group to EEH PE. 1445 */ 1446 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1447 { 1448 struct pci_dev *pdev = NULL; 1449 struct eeh_dev *edev; 1450 int ret; 1451 1452 /* No IOMMU group ? */ 1453 if (!group) 1454 return NULL; 1455 1456 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1457 if (!ret || !pdev) 1458 return NULL; 1459 1460 /* No EEH device or PE ? */ 1461 edev = pci_dev_to_eeh_dev(pdev); 1462 if (!edev || !edev->pe) 1463 return NULL; 1464 1465 return edev->pe; 1466 } 1467 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1468 1469 #endif /* CONFIG_IOMMU_API */ 1470 1471 /** 1472 * eeh_pe_set_option - Set options for the indicated PE 1473 * @pe: EEH PE 1474 * @option: requested option 1475 * 1476 * The routine is called to enable or disable EEH functionality 1477 * on the indicated PE, to enable IO or DMA for the frozen PE. 1478 */ 1479 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1480 { 1481 int ret = 0; 1482 1483 /* Invalid PE ? */ 1484 if (!pe) 1485 return -ENODEV; 1486 1487 /* 1488 * EEH functionality could possibly be disabled, just 1489 * return error for the case. And the EEH functinality 1490 * isn't expected to be disabled on one specific PE. 1491 */ 1492 switch (option) { 1493 case EEH_OPT_ENABLE: 1494 if (eeh_enabled()) { 1495 ret = eeh_pe_change_owner(pe); 1496 break; 1497 } 1498 ret = -EIO; 1499 break; 1500 case EEH_OPT_DISABLE: 1501 break; 1502 case EEH_OPT_THAW_MMIO: 1503 case EEH_OPT_THAW_DMA: 1504 if (!eeh_ops || !eeh_ops->set_option) { 1505 ret = -ENOENT; 1506 break; 1507 } 1508 1509 ret = eeh_pci_enable(pe, option); 1510 break; 1511 default: 1512 pr_debug("%s: Option %d out of range (%d, %d)\n", 1513 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1514 ret = -EINVAL; 1515 } 1516 1517 return ret; 1518 } 1519 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1520 1521 /** 1522 * eeh_pe_get_state - Retrieve PE's state 1523 * @pe: EEH PE 1524 * 1525 * Retrieve the PE's state, which includes 3 aspects: enabled 1526 * DMA, enabled IO and asserted reset. 1527 */ 1528 int eeh_pe_get_state(struct eeh_pe *pe) 1529 { 1530 int result, ret = 0; 1531 bool rst_active, dma_en, mmio_en; 1532 1533 /* Existing PE ? */ 1534 if (!pe) 1535 return -ENODEV; 1536 1537 if (!eeh_ops || !eeh_ops->get_state) 1538 return -ENOENT; 1539 1540 result = eeh_ops->get_state(pe, NULL); 1541 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1542 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1543 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1544 1545 if (rst_active) 1546 ret = EEH_PE_STATE_RESET; 1547 else if (dma_en && mmio_en) 1548 ret = EEH_PE_STATE_NORMAL; 1549 else if (!dma_en && !mmio_en) 1550 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1551 else if (!dma_en && mmio_en) 1552 ret = EEH_PE_STATE_STOPPED_DMA; 1553 else 1554 ret = EEH_PE_STATE_UNAVAIL; 1555 1556 return ret; 1557 } 1558 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1559 1560 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1561 { 1562 struct eeh_dev *edev, *tmp; 1563 struct pci_dev *pdev; 1564 int ret = 0; 1565 1566 /* Restore config space */ 1567 eeh_pe_restore_bars(pe); 1568 1569 /* 1570 * Reenable PCI devices as the devices passed 1571 * through are always enabled before the reset. 1572 */ 1573 eeh_pe_for_each_dev(pe, edev, tmp) { 1574 pdev = eeh_dev_to_pci_dev(edev); 1575 if (!pdev) 1576 continue; 1577 1578 ret = pci_reenable_device(pdev); 1579 if (ret) { 1580 pr_warn("%s: Failure %d reenabling %s\n", 1581 __func__, ret, pci_name(pdev)); 1582 return ret; 1583 } 1584 } 1585 1586 /* The PE is still in frozen state */ 1587 return eeh_unfreeze_pe(pe, true); 1588 } 1589 1590 /** 1591 * eeh_pe_reset - Issue PE reset according to specified type 1592 * @pe: EEH PE 1593 * @option: reset type 1594 * 1595 * The routine is called to reset the specified PE with the 1596 * indicated type, either fundamental reset or hot reset. 1597 * PE reset is the most important part for error recovery. 1598 */ 1599 int eeh_pe_reset(struct eeh_pe *pe, int option) 1600 { 1601 int ret = 0; 1602 1603 /* Invalid PE ? */ 1604 if (!pe) 1605 return -ENODEV; 1606 1607 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1608 return -ENOENT; 1609 1610 switch (option) { 1611 case EEH_RESET_DEACTIVATE: 1612 ret = eeh_ops->reset(pe, option); 1613 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1614 if (ret) 1615 break; 1616 1617 ret = eeh_pe_reenable_devices(pe); 1618 break; 1619 case EEH_RESET_HOT: 1620 case EEH_RESET_FUNDAMENTAL: 1621 /* 1622 * Proactively freeze the PE to drop all MMIO access 1623 * during reset, which should be banned as it's always 1624 * cause recursive EEH error. 1625 */ 1626 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1627 1628 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1629 ret = eeh_ops->reset(pe, option); 1630 break; 1631 default: 1632 pr_debug("%s: Unsupported option %d\n", 1633 __func__, option); 1634 ret = -EINVAL; 1635 } 1636 1637 return ret; 1638 } 1639 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1640 1641 /** 1642 * eeh_pe_configure - Configure PCI bridges after PE reset 1643 * @pe: EEH PE 1644 * 1645 * The routine is called to restore the PCI config space for 1646 * those PCI devices, especially PCI bridges affected by PE 1647 * reset issued previously. 1648 */ 1649 int eeh_pe_configure(struct eeh_pe *pe) 1650 { 1651 int ret = 0; 1652 1653 /* Invalid PE ? */ 1654 if (!pe) 1655 return -ENODEV; 1656 1657 return ret; 1658 } 1659 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1660 1661 /** 1662 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1663 * @pe: the indicated PE 1664 * @type: error type 1665 * @function: error function 1666 * @addr: address 1667 * @mask: address mask 1668 * 1669 * The routine is called to inject the specified PCI error, which 1670 * is determined by @type and @function, to the indicated PE for 1671 * testing purpose. 1672 */ 1673 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1674 unsigned long addr, unsigned long mask) 1675 { 1676 /* Invalid PE ? */ 1677 if (!pe) 1678 return -ENODEV; 1679 1680 /* Unsupported operation ? */ 1681 if (!eeh_ops || !eeh_ops->err_inject) 1682 return -ENOENT; 1683 1684 /* Check on PCI error type */ 1685 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1686 return -EINVAL; 1687 1688 /* Check on PCI error function */ 1689 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1690 return -EINVAL; 1691 1692 return eeh_ops->err_inject(pe, type, func, addr, mask); 1693 } 1694 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1695 1696 static int proc_eeh_show(struct seq_file *m, void *v) 1697 { 1698 if (!eeh_enabled()) { 1699 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1700 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1701 } else { 1702 seq_printf(m, "EEH Subsystem is enabled\n"); 1703 seq_printf(m, 1704 "no device=%llu\n" 1705 "no device node=%llu\n" 1706 "no config address=%llu\n" 1707 "check not wanted=%llu\n" 1708 "eeh_total_mmio_ffs=%llu\n" 1709 "eeh_false_positives=%llu\n" 1710 "eeh_slot_resets=%llu\n", 1711 eeh_stats.no_device, 1712 eeh_stats.no_dn, 1713 eeh_stats.no_cfg_addr, 1714 eeh_stats.ignored_check, 1715 eeh_stats.total_mmio_ffs, 1716 eeh_stats.false_positives, 1717 eeh_stats.slot_resets); 1718 } 1719 1720 return 0; 1721 } 1722 1723 static int proc_eeh_open(struct inode *inode, struct file *file) 1724 { 1725 return single_open(file, proc_eeh_show, NULL); 1726 } 1727 1728 static const struct file_operations proc_eeh_operations = { 1729 .open = proc_eeh_open, 1730 .read = seq_read, 1731 .llseek = seq_lseek, 1732 .release = single_release, 1733 }; 1734 1735 #ifdef CONFIG_DEBUG_FS 1736 static int eeh_enable_dbgfs_set(void *data, u64 val) 1737 { 1738 if (val) 1739 eeh_clear_flag(EEH_FORCE_DISABLED); 1740 else 1741 eeh_add_flag(EEH_FORCE_DISABLED); 1742 1743 /* Notify the backend */ 1744 if (eeh_ops->post_init) 1745 eeh_ops->post_init(); 1746 1747 return 0; 1748 } 1749 1750 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1751 { 1752 if (eeh_enabled()) 1753 *val = 0x1ul; 1754 else 1755 *val = 0x0ul; 1756 return 0; 1757 } 1758 1759 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1760 { 1761 eeh_max_freezes = val; 1762 return 0; 1763 } 1764 1765 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1766 { 1767 *val = eeh_max_freezes; 1768 return 0; 1769 } 1770 1771 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1772 eeh_enable_dbgfs_set, "0x%llx\n"); 1773 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1774 eeh_freeze_dbgfs_set, "0x%llx\n"); 1775 #endif 1776 1777 static int __init eeh_init_proc(void) 1778 { 1779 if (machine_is(pseries) || machine_is(powernv)) { 1780 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1781 #ifdef CONFIG_DEBUG_FS 1782 debugfs_create_file("eeh_enable", 0600, 1783 powerpc_debugfs_root, NULL, 1784 &eeh_enable_dbgfs_ops); 1785 debugfs_create_file("eeh_max_freezes", 0600, 1786 powerpc_debugfs_root, NULL, 1787 &eeh_freeze_dbgfs_ops); 1788 #endif 1789 } 1790 1791 return 0; 1792 } 1793 __initcall(eeh_init_proc); 1794