1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/sched.h> 26 #include <linux/init.h> 27 #include <linux/list.h> 28 #include <linux/pci.h> 29 #include <linux/iommu.h> 30 #include <linux/proc_fs.h> 31 #include <linux/rbtree.h> 32 #include <linux/reboot.h> 33 #include <linux/seq_file.h> 34 #include <linux/spinlock.h> 35 #include <linux/export.h> 36 #include <linux/of.h> 37 38 #include <linux/atomic.h> 39 #include <asm/debugfs.h> 40 #include <asm/eeh.h> 41 #include <asm/eeh_event.h> 42 #include <asm/io.h> 43 #include <asm/iommu.h> 44 #include <asm/machdep.h> 45 #include <asm/ppc-pci.h> 46 #include <asm/rtas.h> 47 #include <asm/pte-walk.h> 48 49 50 /** Overview: 51 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 EXPORT_SYMBOL_GPL(confirm_error_lock); 120 121 /* Lock to protect passed flags */ 122 static DEFINE_MUTEX(eeh_dev_mutex); 123 124 /* Buffer for reporting pci register dumps. Its here in BSS, and 125 * not dynamically alloced, so that it ends up in RMO where RTAS 126 * can access it. 127 */ 128 #define EEH_PCI_REGS_LOG_LEN 8192 129 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 130 131 /* 132 * The struct is used to maintain the EEH global statistic 133 * information. Besides, the EEH global statistics will be 134 * exported to user space through procfs 135 */ 136 struct eeh_stats { 137 u64 no_device; /* PCI device not found */ 138 u64 no_dn; /* OF node not found */ 139 u64 no_cfg_addr; /* Config address not found */ 140 u64 ignored_check; /* EEH check skipped */ 141 u64 total_mmio_ffs; /* Total EEH checks */ 142 u64 false_positives; /* Unnecessary EEH checks */ 143 u64 slot_resets; /* PE reset */ 144 }; 145 146 static struct eeh_stats eeh_stats; 147 148 static int __init eeh_setup(char *str) 149 { 150 if (!strcmp(str, "off")) 151 eeh_add_flag(EEH_FORCE_DISABLED); 152 else if (!strcmp(str, "early_log")) 153 eeh_add_flag(EEH_EARLY_DUMP_LOG); 154 155 return 1; 156 } 157 __setup("eeh=", eeh_setup); 158 159 /* 160 * This routine captures assorted PCI configuration space data 161 * for the indicated PCI device, and puts them into a buffer 162 * for RTAS error logging. 163 */ 164 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 165 { 166 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 167 u32 cfg; 168 int cap, i; 169 int n = 0, l = 0; 170 char buffer[128]; 171 172 if (!pdn) { 173 pr_warn("EEH: Note: No error log for absent device.\n"); 174 return 0; 175 } 176 177 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 178 pdn->phb->global_number, pdn->busno, 179 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 180 pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", 181 pdn->phb->global_number, pdn->busno, 182 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 183 184 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 185 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 186 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 187 188 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 189 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 190 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 191 192 /* Gather bridge-specific registers */ 193 if (edev->mode & EEH_DEV_BRIDGE) { 194 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 195 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 196 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 197 198 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 199 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 200 pr_warn("EEH: Bridge control: %04x\n", cfg); 201 } 202 203 /* Dump out the PCI-X command and status regs */ 204 cap = edev->pcix_cap; 205 if (cap) { 206 eeh_ops->read_config(pdn, cap, 4, &cfg); 207 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 208 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 209 210 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 211 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 212 pr_warn("EEH: PCI-X status: %08x\n", cfg); 213 } 214 215 /* If PCI-E capable, dump PCI-E cap 10 */ 216 cap = edev->pcie_cap; 217 if (cap) { 218 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 219 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 220 221 for (i=0; i<=8; i++) { 222 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 223 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 224 225 if ((i % 4) == 0) { 226 if (i != 0) 227 pr_warn("%s\n", buffer); 228 229 l = scnprintf(buffer, sizeof(buffer), 230 "EEH: PCI-E %02x: %08x ", 231 4*i, cfg); 232 } else { 233 l += scnprintf(buffer+l, sizeof(buffer)-l, 234 "%08x ", cfg); 235 } 236 237 } 238 239 pr_warn("%s\n", buffer); 240 } 241 242 /* If AER capable, dump it */ 243 cap = edev->aer_cap; 244 if (cap) { 245 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 246 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 247 248 for (i=0; i<=13; i++) { 249 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 250 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 251 252 if ((i % 4) == 0) { 253 if (i != 0) 254 pr_warn("%s\n", buffer); 255 256 l = scnprintf(buffer, sizeof(buffer), 257 "EEH: PCI-E AER %02x: %08x ", 258 4*i, cfg); 259 } else { 260 l += scnprintf(buffer+l, sizeof(buffer)-l, 261 "%08x ", cfg); 262 } 263 } 264 265 pr_warn("%s\n", buffer); 266 } 267 268 return n; 269 } 270 271 static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) 272 { 273 struct eeh_dev *edev, *tmp; 274 size_t *plen = flag; 275 276 eeh_pe_for_each_dev(pe, edev, tmp) 277 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 278 EEH_PCI_REGS_LOG_LEN - *plen); 279 280 return NULL; 281 } 282 283 /** 284 * eeh_slot_error_detail - Generate combined log including driver log and error log 285 * @pe: EEH PE 286 * @severity: temporary or permanent error log 287 * 288 * This routine should be called to generate the combined log, which 289 * is comprised of driver log and error log. The driver log is figured 290 * out from the config space of the corresponding PCI device, while 291 * the error log is fetched through platform dependent function call. 292 */ 293 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 294 { 295 size_t loglen = 0; 296 297 /* 298 * When the PHB is fenced or dead, it's pointless to collect 299 * the data from PCI config space because it should return 300 * 0xFF's. For ER, we still retrieve the data from the PCI 301 * config space. 302 * 303 * For pHyp, we have to enable IO for log retrieval. Otherwise, 304 * 0xFF's is always returned from PCI config space. 305 * 306 * When the @severity is EEH_LOG_PERM, the PE is going to be 307 * removed. Prior to that, the drivers for devices included in 308 * the PE will be closed. The drivers rely on working IO path 309 * to bring the devices to quiet state. Otherwise, PCI traffic 310 * from those devices after they are removed is like to cause 311 * another unexpected EEH error. 312 */ 313 if (!(pe->type & EEH_PE_PHB)) { 314 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || 315 severity == EEH_LOG_PERM) 316 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 317 318 /* 319 * The config space of some PCI devices can't be accessed 320 * when their PEs are in frozen state. Otherwise, fenced 321 * PHB might be seen. Those PEs are identified with flag 322 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 323 * is set automatically when the PE is put to EEH_PE_ISOLATED. 324 * 325 * Restoring BARs possibly triggers PCI config access in 326 * (OPAL) firmware and then causes fenced PHB. If the 327 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 328 * pointless to restore BARs and dump config space. 329 */ 330 eeh_ops->configure_bridge(pe); 331 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 332 eeh_pe_restore_bars(pe); 333 334 pci_regs_buf[0] = 0; 335 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 336 } 337 } 338 339 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 340 } 341 342 /** 343 * eeh_token_to_phys - Convert EEH address token to phys address 344 * @token: I/O token, should be address in the form 0xA.... 345 * 346 * This routine should be called to convert virtual I/O address 347 * to physical one. 348 */ 349 static inline unsigned long eeh_token_to_phys(unsigned long token) 350 { 351 pte_t *ptep; 352 unsigned long pa; 353 int hugepage_shift; 354 355 /* 356 * We won't find hugepages here(this is iomem). Hence we are not 357 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 358 * page table free, because of init_mm. 359 */ 360 ptep = find_init_mm_pte(token, &hugepage_shift); 361 if (!ptep) 362 return token; 363 WARN_ON(hugepage_shift); 364 pa = pte_pfn(*ptep) << PAGE_SHIFT; 365 366 return pa | (token & (PAGE_SIZE-1)); 367 } 368 369 /* 370 * On PowerNV platform, we might already have fenced PHB there. 371 * For that case, it's meaningless to recover frozen PE. Intead, 372 * We have to handle fenced PHB firstly. 373 */ 374 static int eeh_phb_check_failure(struct eeh_pe *pe) 375 { 376 struct eeh_pe *phb_pe; 377 unsigned long flags; 378 int ret; 379 380 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 381 return -EPERM; 382 383 /* Find the PHB PE */ 384 phb_pe = eeh_phb_pe_get(pe->phb); 385 if (!phb_pe) { 386 pr_warn("%s Can't find PE for PHB#%x\n", 387 __func__, pe->phb->global_number); 388 return -EEXIST; 389 } 390 391 /* If the PHB has been in problematic state */ 392 eeh_serialize_lock(&flags); 393 if (phb_pe->state & EEH_PE_ISOLATED) { 394 ret = 0; 395 goto out; 396 } 397 398 /* Check PHB state */ 399 ret = eeh_ops->get_state(phb_pe, NULL); 400 if ((ret < 0) || 401 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 402 ret = 0; 403 goto out; 404 } 405 406 /* Isolate the PHB and send event */ 407 eeh_pe_mark_isolated(phb_pe); 408 eeh_serialize_unlock(flags); 409 410 pr_err("EEH: PHB#%x failure detected, location: %s\n", 411 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 412 dump_stack(); 413 eeh_send_failure_event(phb_pe); 414 415 return 1; 416 out: 417 eeh_serialize_unlock(flags); 418 return ret; 419 } 420 421 /** 422 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 423 * @edev: eeh device 424 * 425 * Check for an EEH failure for the given device node. Call this 426 * routine if the result of a read was all 0xff's and you want to 427 * find out if this is due to an EEH slot freeze. This routine 428 * will query firmware for the EEH status. 429 * 430 * Returns 0 if there has not been an EEH error; otherwise returns 431 * a non-zero value and queues up a slot isolation event notification. 432 * 433 * It is safe to call this routine in an interrupt context. 434 */ 435 int eeh_dev_check_failure(struct eeh_dev *edev) 436 { 437 int ret; 438 unsigned long flags; 439 struct device_node *dn; 440 struct pci_dev *dev; 441 struct eeh_pe *pe, *parent_pe, *phb_pe; 442 int rc = 0; 443 const char *location = NULL; 444 445 eeh_stats.total_mmio_ffs++; 446 447 if (!eeh_enabled()) 448 return 0; 449 450 if (!edev) { 451 eeh_stats.no_dn++; 452 return 0; 453 } 454 dev = eeh_dev_to_pci_dev(edev); 455 pe = eeh_dev_to_pe(edev); 456 457 /* Access to IO BARs might get this far and still not want checking. */ 458 if (!pe) { 459 eeh_stats.ignored_check++; 460 pr_debug("EEH: Ignored check for %s\n", 461 eeh_pci_name(dev)); 462 return 0; 463 } 464 465 if (!pe->addr && !pe->config_addr) { 466 eeh_stats.no_cfg_addr++; 467 return 0; 468 } 469 470 /* 471 * On PowerNV platform, we might already have fenced PHB 472 * there and we need take care of that firstly. 473 */ 474 ret = eeh_phb_check_failure(pe); 475 if (ret > 0) 476 return ret; 477 478 /* 479 * If the PE isn't owned by us, we shouldn't check the 480 * state. Instead, let the owner handle it if the PE has 481 * been frozen. 482 */ 483 if (eeh_pe_passed(pe)) 484 return 0; 485 486 /* If we already have a pending isolation event for this 487 * slot, we know it's bad already, we don't need to check. 488 * Do this checking under a lock; as multiple PCI devices 489 * in one slot might report errors simultaneously, and we 490 * only want one error recovery routine running. 491 */ 492 eeh_serialize_lock(&flags); 493 rc = 1; 494 if (pe->state & EEH_PE_ISOLATED) { 495 pe->check_count++; 496 if (pe->check_count % EEH_MAX_FAILS == 0) { 497 dn = pci_device_to_OF_node(dev); 498 if (dn) 499 location = of_get_property(dn, "ibm,loc-code", 500 NULL); 501 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 502 "location=%s driver=%s pci addr=%s\n", 503 pe->check_count, 504 location ? location : "unknown", 505 eeh_driver_name(dev), eeh_pci_name(dev)); 506 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 507 eeh_driver_name(dev)); 508 dump_stack(); 509 } 510 goto dn_unlock; 511 } 512 513 /* 514 * Now test for an EEH failure. This is VERY expensive. 515 * Note that the eeh_config_addr may be a parent device 516 * in the case of a device behind a bridge, or it may be 517 * function zero of a multi-function device. 518 * In any case they must share a common PHB. 519 */ 520 ret = eeh_ops->get_state(pe, NULL); 521 522 /* Note that config-io to empty slots may fail; 523 * they are empty when they don't have children. 524 * We will punt with the following conditions: Failure to get 525 * PE's state, EEH not support and Permanently unavailable 526 * state, PE is in good state. 527 */ 528 if ((ret < 0) || 529 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 530 eeh_stats.false_positives++; 531 pe->false_positives++; 532 rc = 0; 533 goto dn_unlock; 534 } 535 536 /* 537 * It should be corner case that the parent PE has been 538 * put into frozen state as well. We should take care 539 * that at first. 540 */ 541 parent_pe = pe->parent; 542 while (parent_pe) { 543 /* Hit the ceiling ? */ 544 if (parent_pe->type & EEH_PE_PHB) 545 break; 546 547 /* Frozen parent PE ? */ 548 ret = eeh_ops->get_state(parent_pe, NULL); 549 if (ret > 0 && !eeh_state_active(ret)) { 550 pe = parent_pe; 551 pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", 552 pe->phb->global_number, pe->addr, 553 pe->phb->global_number, parent_pe->addr); 554 } 555 556 /* Next parent level */ 557 parent_pe = parent_pe->parent; 558 } 559 560 eeh_stats.slot_resets++; 561 562 /* Avoid repeated reports of this failure, including problems 563 * with other functions on this device, and functions under 564 * bridges. 565 */ 566 eeh_pe_mark_isolated(pe); 567 eeh_serialize_unlock(flags); 568 569 /* Most EEH events are due to device driver bugs. Having 570 * a stack trace will help the device-driver authors figure 571 * out what happened. So print that out. 572 */ 573 phb_pe = eeh_phb_pe_get(pe->phb); 574 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 575 pe->phb->global_number, pe->addr); 576 pr_err("EEH: PE location: %s, PHB location: %s\n", 577 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 578 dump_stack(); 579 580 eeh_send_failure_event(pe); 581 582 return 1; 583 584 dn_unlock: 585 eeh_serialize_unlock(flags); 586 return rc; 587 } 588 589 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 590 591 /** 592 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 593 * @token: I/O address 594 * 595 * Check for an EEH failure at the given I/O address. Call this 596 * routine if the result of a read was all 0xff's and you want to 597 * find out if this is due to an EEH slot freeze event. This routine 598 * will query firmware for the EEH status. 599 * 600 * Note this routine is safe to call in an interrupt context. 601 */ 602 int eeh_check_failure(const volatile void __iomem *token) 603 { 604 unsigned long addr; 605 struct eeh_dev *edev; 606 607 /* Finding the phys addr + pci device; this is pretty quick. */ 608 addr = eeh_token_to_phys((unsigned long __force) token); 609 edev = eeh_addr_cache_get_dev(addr); 610 if (!edev) { 611 eeh_stats.no_device++; 612 return 0; 613 } 614 615 return eeh_dev_check_failure(edev); 616 } 617 EXPORT_SYMBOL(eeh_check_failure); 618 619 620 /** 621 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 622 * @pe: EEH PE 623 * 624 * This routine should be called to reenable frozen MMIO or DMA 625 * so that it would work correctly again. It's useful while doing 626 * recovery or log collection on the indicated device. 627 */ 628 int eeh_pci_enable(struct eeh_pe *pe, int function) 629 { 630 int active_flag, rc; 631 632 /* 633 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 634 * Also, it's pointless to enable them on unfrozen PE. So 635 * we have to check before enabling IO or DMA. 636 */ 637 switch (function) { 638 case EEH_OPT_THAW_MMIO: 639 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 640 break; 641 case EEH_OPT_THAW_DMA: 642 active_flag = EEH_STATE_DMA_ACTIVE; 643 break; 644 case EEH_OPT_DISABLE: 645 case EEH_OPT_ENABLE: 646 case EEH_OPT_FREEZE_PE: 647 active_flag = 0; 648 break; 649 default: 650 pr_warn("%s: Invalid function %d\n", 651 __func__, function); 652 return -EINVAL; 653 } 654 655 /* 656 * Check if IO or DMA has been enabled before 657 * enabling them. 658 */ 659 if (active_flag) { 660 rc = eeh_ops->get_state(pe, NULL); 661 if (rc < 0) 662 return rc; 663 664 /* Needn't enable it at all */ 665 if (rc == EEH_STATE_NOT_SUPPORT) 666 return 0; 667 668 /* It's already enabled */ 669 if (rc & active_flag) 670 return 0; 671 } 672 673 674 /* Issue the request */ 675 rc = eeh_ops->set_option(pe, function); 676 if (rc) 677 pr_warn("%s: Unexpected state change %d on " 678 "PHB#%x-PE#%x, err=%d\n", 679 __func__, function, pe->phb->global_number, 680 pe->addr, rc); 681 682 /* Check if the request is finished successfully */ 683 if (active_flag) { 684 rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 685 if (rc < 0) 686 return rc; 687 688 if (rc & active_flag) 689 return 0; 690 691 return -EIO; 692 } 693 694 return rc; 695 } 696 697 static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, 698 void *userdata) 699 { 700 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 701 struct pci_dev *dev = userdata; 702 703 /* 704 * The caller should have disabled and saved the 705 * state for the specified device 706 */ 707 if (!pdev || pdev == dev) 708 return NULL; 709 710 /* Ensure we have D0 power state */ 711 pci_set_power_state(pdev, PCI_D0); 712 713 /* Save device state */ 714 pci_save_state(pdev); 715 716 /* 717 * Disable device to avoid any DMA traffic and 718 * interrupt from the device 719 */ 720 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 721 722 return NULL; 723 } 724 725 static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) 726 { 727 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 728 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 729 struct pci_dev *dev = userdata; 730 731 if (!pdev) 732 return NULL; 733 734 /* Apply customization from firmware */ 735 if (pdn && eeh_ops->restore_config) 736 eeh_ops->restore_config(pdn); 737 738 /* The caller should restore state for the specified device */ 739 if (pdev != dev) 740 pci_restore_state(pdev); 741 742 return NULL; 743 } 744 745 int eeh_restore_vf_config(struct pci_dn *pdn) 746 { 747 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 748 u32 devctl, cmd, cap2, aer_capctl; 749 int old_mps; 750 751 if (edev->pcie_cap) { 752 /* Restore MPS */ 753 old_mps = (ffs(pdn->mps) - 8) << 5; 754 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 755 2, &devctl); 756 devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; 757 devctl |= old_mps; 758 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 759 2, devctl); 760 761 /* Disable Completion Timeout if possible */ 762 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 763 4, &cap2); 764 if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { 765 eeh_ops->read_config(pdn, 766 edev->pcie_cap + PCI_EXP_DEVCTL2, 767 4, &cap2); 768 cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; 769 eeh_ops->write_config(pdn, 770 edev->pcie_cap + PCI_EXP_DEVCTL2, 771 4, cap2); 772 } 773 } 774 775 /* Enable SERR and parity checking */ 776 eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); 777 cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); 778 eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); 779 780 /* Enable report various errors */ 781 if (edev->pcie_cap) { 782 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 783 2, &devctl); 784 devctl &= ~PCI_EXP_DEVCTL_CERE; 785 devctl |= (PCI_EXP_DEVCTL_NFERE | 786 PCI_EXP_DEVCTL_FERE | 787 PCI_EXP_DEVCTL_URRE); 788 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 789 2, devctl); 790 } 791 792 /* Enable ECRC generation and check */ 793 if (edev->pcie_cap && edev->aer_cap) { 794 eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, 795 4, &aer_capctl); 796 aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 797 eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, 798 4, aer_capctl); 799 } 800 801 return 0; 802 } 803 804 /** 805 * pcibios_set_pcie_reset_state - Set PCI-E reset state 806 * @dev: pci device struct 807 * @state: reset state to enter 808 * 809 * Return value: 810 * 0 if success 811 */ 812 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 813 { 814 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 815 struct eeh_pe *pe = eeh_dev_to_pe(edev); 816 817 if (!pe) { 818 pr_err("%s: No PE found on PCI device %s\n", 819 __func__, pci_name(dev)); 820 return -EINVAL; 821 } 822 823 switch (state) { 824 case pcie_deassert_reset: 825 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 826 eeh_unfreeze_pe(pe, false); 827 if (!(pe->type & EEH_PE_VF)) 828 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 829 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 830 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 831 break; 832 case pcie_hot_reset: 833 eeh_pe_mark_isolated(pe); 834 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 835 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 836 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 837 if (!(pe->type & EEH_PE_VF)) 838 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 839 eeh_ops->reset(pe, EEH_RESET_HOT); 840 break; 841 case pcie_warm_reset: 842 eeh_pe_mark_isolated(pe); 843 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 844 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 845 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 846 if (!(pe->type & EEH_PE_VF)) 847 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 848 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 849 break; 850 default: 851 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 852 return -EINVAL; 853 }; 854 855 return 0; 856 } 857 858 /** 859 * eeh_set_pe_freset - Check the required reset for the indicated device 860 * @data: EEH device 861 * @flag: return value 862 * 863 * Each device might have its preferred reset type: fundamental or 864 * hot reset. The routine is used to collected the information for 865 * the indicated device and its children so that the bunch of the 866 * devices could be reset properly. 867 */ 868 static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) 869 { 870 struct pci_dev *dev; 871 unsigned int *freset = (unsigned int *)flag; 872 873 dev = eeh_dev_to_pci_dev(edev); 874 if (dev) 875 *freset |= dev->needs_freset; 876 877 return NULL; 878 } 879 880 /** 881 * eeh_pe_reset_full - Complete a full reset process on the indicated PE 882 * @pe: EEH PE 883 * 884 * This function executes a full reset procedure on a PE, including setting 885 * the appropriate flags, performing a fundamental or hot reset, and then 886 * deactivating the reset status. It is designed to be used within the EEH 887 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and 888 * only performs a single operation at a time. 889 * 890 * This function will attempt to reset a PE three times before failing. 891 */ 892 int eeh_pe_reset_full(struct eeh_pe *pe) 893 { 894 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 895 int type = EEH_RESET_HOT; 896 unsigned int freset = 0; 897 int i, state, ret; 898 899 /* 900 * Determine the type of reset to perform - hot or fundamental. 901 * Hot reset is the default operation, unless any device under the 902 * PE requires a fundamental reset. 903 */ 904 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 905 906 if (freset) 907 type = EEH_RESET_FUNDAMENTAL; 908 909 /* Mark the PE as in reset state and block config space accesses */ 910 eeh_pe_state_mark(pe, reset_state); 911 912 /* Make three attempts at resetting the bus */ 913 for (i = 0; i < 3; i++) { 914 ret = eeh_pe_reset(pe, type); 915 if (ret) 916 break; 917 918 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); 919 if (ret) 920 break; 921 922 /* Wait until the PE is in a functioning state */ 923 state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 924 if (state < 0) { 925 pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", 926 __func__, pe->phb->global_number, pe->addr); 927 ret = -ENOTRECOVERABLE; 928 break; 929 } 930 if (eeh_state_active(state)) 931 break; 932 933 /* Set error in case this is our last attempt */ 934 ret = -EIO; 935 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 936 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 937 } 938 939 eeh_pe_state_clear(pe, reset_state); 940 return ret; 941 } 942 943 /** 944 * eeh_save_bars - Save device bars 945 * @edev: PCI device associated EEH device 946 * 947 * Save the values of the device bars. Unlike the restore 948 * routine, this routine is *not* recursive. This is because 949 * PCI devices are added individually; but, for the restore, 950 * an entire slot is reset at a time. 951 */ 952 void eeh_save_bars(struct eeh_dev *edev) 953 { 954 struct pci_dn *pdn; 955 int i; 956 957 pdn = eeh_dev_to_pdn(edev); 958 if (!pdn) 959 return; 960 961 for (i = 0; i < 16; i++) 962 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 963 964 /* 965 * For PCI bridges including root port, we need enable bus 966 * master explicitly. Otherwise, it can't fetch IODA table 967 * entries correctly. So we cache the bit in advance so that 968 * we can restore it after reset, either PHB range or PE range. 969 */ 970 if (edev->mode & EEH_DEV_BRIDGE) 971 edev->config_space[1] |= PCI_COMMAND_MASTER; 972 } 973 974 /** 975 * eeh_ops_register - Register platform dependent EEH operations 976 * @ops: platform dependent EEH operations 977 * 978 * Register the platform dependent EEH operation callback 979 * functions. The platform should call this function before 980 * any other EEH operations. 981 */ 982 int __init eeh_ops_register(struct eeh_ops *ops) 983 { 984 if (!ops->name) { 985 pr_warn("%s: Invalid EEH ops name for %p\n", 986 __func__, ops); 987 return -EINVAL; 988 } 989 990 if (eeh_ops && eeh_ops != ops) { 991 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 992 __func__, eeh_ops->name, ops->name); 993 return -EEXIST; 994 } 995 996 eeh_ops = ops; 997 998 return 0; 999 } 1000 1001 /** 1002 * eeh_ops_unregister - Unreigster platform dependent EEH operations 1003 * @name: name of EEH platform operations 1004 * 1005 * Unregister the platform dependent EEH operation callback 1006 * functions. 1007 */ 1008 int __exit eeh_ops_unregister(const char *name) 1009 { 1010 if (!name || !strlen(name)) { 1011 pr_warn("%s: Invalid EEH ops name\n", 1012 __func__); 1013 return -EINVAL; 1014 } 1015 1016 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 1017 eeh_ops = NULL; 1018 return 0; 1019 } 1020 1021 return -EEXIST; 1022 } 1023 1024 static int eeh_reboot_notifier(struct notifier_block *nb, 1025 unsigned long action, void *unused) 1026 { 1027 eeh_clear_flag(EEH_ENABLED); 1028 return NOTIFY_DONE; 1029 } 1030 1031 static struct notifier_block eeh_reboot_nb = { 1032 .notifier_call = eeh_reboot_notifier, 1033 }; 1034 1035 void eeh_probe_devices(void) 1036 { 1037 struct pci_controller *hose, *tmp; 1038 struct pci_dn *pdn; 1039 1040 /* Enable EEH for all adapters */ 1041 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1042 pdn = hose->pci_data; 1043 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1044 } 1045 if (eeh_enabled()) 1046 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1047 else 1048 pr_info("EEH: No capable adapters found\n"); 1049 1050 } 1051 1052 /** 1053 * eeh_init - EEH initialization 1054 * 1055 * Initialize EEH by trying to enable it for all of the adapters in the system. 1056 * As a side effect we can determine here if eeh is supported at all. 1057 * Note that we leave EEH on so failed config cycles won't cause a machine 1058 * check. If a user turns off EEH for a particular adapter they are really 1059 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 1060 * grant access to a slot if EEH isn't enabled, and so we always enable 1061 * EEH for all slots/all devices. 1062 * 1063 * The eeh-force-off option disables EEH checking globally, for all slots. 1064 * Even if force-off is set, the EEH hardware is still enabled, so that 1065 * newer systems can boot. 1066 */ 1067 static int eeh_init(void) 1068 { 1069 struct pci_controller *hose, *tmp; 1070 int ret = 0; 1071 1072 /* Register reboot notifier */ 1073 ret = register_reboot_notifier(&eeh_reboot_nb); 1074 if (ret) { 1075 pr_warn("%s: Failed to register notifier (%d)\n", 1076 __func__, ret); 1077 return ret; 1078 } 1079 1080 /* call platform initialization function */ 1081 if (!eeh_ops) { 1082 pr_warn("%s: Platform EEH operation not found\n", 1083 __func__); 1084 return -EEXIST; 1085 } else if ((ret = eeh_ops->init())) 1086 return ret; 1087 1088 /* Initialize PHB PEs */ 1089 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 1090 eeh_dev_phb_init_dynamic(hose); 1091 1092 /* Initialize EEH event */ 1093 return eeh_event_init(); 1094 } 1095 1096 core_initcall_sync(eeh_init); 1097 1098 /** 1099 * eeh_add_device_early - Enable EEH for the indicated device node 1100 * @pdn: PCI device node for which to set up EEH 1101 * 1102 * This routine must be used to perform EEH initialization for PCI 1103 * devices that were added after system boot (e.g. hotplug, dlpar). 1104 * This routine must be called before any i/o is performed to the 1105 * adapter (inluding any config-space i/o). 1106 * Whether this actually enables EEH or not for this device depends 1107 * on the CEC architecture, type of the device, on earlier boot 1108 * command-line arguments & etc. 1109 */ 1110 void eeh_add_device_early(struct pci_dn *pdn) 1111 { 1112 struct pci_controller *phb = pdn ? pdn->phb : NULL; 1113 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1114 1115 if (!edev) 1116 return; 1117 1118 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1119 return; 1120 1121 /* USB Bus children of PCI devices will not have BUID's */ 1122 if (NULL == phb || 1123 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1124 return; 1125 1126 eeh_ops->probe(pdn, NULL); 1127 } 1128 1129 /** 1130 * eeh_add_device_tree_early - Enable EEH for the indicated device 1131 * @pdn: PCI device node 1132 * 1133 * This routine must be used to perform EEH initialization for the 1134 * indicated PCI device that was added after system boot (e.g. 1135 * hotplug, dlpar). 1136 */ 1137 void eeh_add_device_tree_early(struct pci_dn *pdn) 1138 { 1139 struct pci_dn *n; 1140 1141 if (!pdn) 1142 return; 1143 1144 list_for_each_entry(n, &pdn->child_list, list) 1145 eeh_add_device_tree_early(n); 1146 eeh_add_device_early(pdn); 1147 } 1148 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1149 1150 /** 1151 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1152 * @dev: pci device for which to set up EEH 1153 * 1154 * This routine must be used to complete EEH initialization for PCI 1155 * devices that were added after system boot (e.g. hotplug, dlpar). 1156 */ 1157 void eeh_add_device_late(struct pci_dev *dev) 1158 { 1159 struct pci_dn *pdn; 1160 struct eeh_dev *edev; 1161 1162 if (!dev || !eeh_enabled()) 1163 return; 1164 1165 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1166 1167 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1168 edev = pdn_to_eeh_dev(pdn); 1169 if (edev->pdev == dev) { 1170 pr_debug("EEH: Already referenced !\n"); 1171 return; 1172 } 1173 1174 /* 1175 * The EEH cache might not be removed correctly because of 1176 * unbalanced kref to the device during unplug time, which 1177 * relies on pcibios_release_device(). So we have to remove 1178 * that here explicitly. 1179 */ 1180 if (edev->pdev) { 1181 eeh_rmv_from_parent_pe(edev); 1182 eeh_addr_cache_rmv_dev(edev->pdev); 1183 eeh_sysfs_remove_device(edev->pdev); 1184 edev->mode &= ~EEH_DEV_SYSFS; 1185 1186 /* 1187 * We definitely should have the PCI device removed 1188 * though it wasn't correctly. So we needn't call 1189 * into error handler afterwards. 1190 */ 1191 edev->mode |= EEH_DEV_NO_HANDLER; 1192 1193 edev->pdev = NULL; 1194 dev->dev.archdata.edev = NULL; 1195 } 1196 1197 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1198 eeh_ops->probe(pdn, NULL); 1199 1200 edev->pdev = dev; 1201 dev->dev.archdata.edev = edev; 1202 1203 eeh_addr_cache_insert_dev(dev); 1204 } 1205 1206 /** 1207 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1208 * @bus: PCI bus 1209 * 1210 * This routine must be used to perform EEH initialization for PCI 1211 * devices which are attached to the indicated PCI bus. The PCI bus 1212 * is added after system boot through hotplug or dlpar. 1213 */ 1214 void eeh_add_device_tree_late(struct pci_bus *bus) 1215 { 1216 struct pci_dev *dev; 1217 1218 list_for_each_entry(dev, &bus->devices, bus_list) { 1219 eeh_add_device_late(dev); 1220 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1221 struct pci_bus *subbus = dev->subordinate; 1222 if (subbus) 1223 eeh_add_device_tree_late(subbus); 1224 } 1225 } 1226 } 1227 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1228 1229 /** 1230 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1231 * @bus: PCI bus 1232 * 1233 * This routine must be used to add EEH sysfs files for PCI 1234 * devices which are attached to the indicated PCI bus. The PCI bus 1235 * is added after system boot through hotplug or dlpar. 1236 */ 1237 void eeh_add_sysfs_files(struct pci_bus *bus) 1238 { 1239 struct pci_dev *dev; 1240 1241 list_for_each_entry(dev, &bus->devices, bus_list) { 1242 eeh_sysfs_add_device(dev); 1243 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1244 struct pci_bus *subbus = dev->subordinate; 1245 if (subbus) 1246 eeh_add_sysfs_files(subbus); 1247 } 1248 } 1249 } 1250 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1251 1252 /** 1253 * eeh_remove_device - Undo EEH setup for the indicated pci device 1254 * @dev: pci device to be removed 1255 * 1256 * This routine should be called when a device is removed from 1257 * a running system (e.g. by hotplug or dlpar). It unregisters 1258 * the PCI device from the EEH subsystem. I/O errors affecting 1259 * this device will no longer be detected after this call; thus, 1260 * i/o errors affecting this slot may leave this device unusable. 1261 */ 1262 void eeh_remove_device(struct pci_dev *dev) 1263 { 1264 struct eeh_dev *edev; 1265 1266 if (!dev || !eeh_enabled()) 1267 return; 1268 edev = pci_dev_to_eeh_dev(dev); 1269 1270 /* Unregister the device with the EEH/PCI address search system */ 1271 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1272 1273 if (!edev || !edev->pdev || !edev->pe) { 1274 pr_debug("EEH: Not referenced !\n"); 1275 return; 1276 } 1277 1278 /* 1279 * During the hotplug for EEH error recovery, we need the EEH 1280 * device attached to the parent PE in order for BAR restore 1281 * a bit later. So we keep it for BAR restore and remove it 1282 * from the parent PE during the BAR resotre. 1283 */ 1284 edev->pdev = NULL; 1285 1286 /* 1287 * The flag "in_error" is used to trace EEH devices for VFs 1288 * in error state or not. It's set in eeh_report_error(). If 1289 * it's not set, eeh_report_{reset,resume}() won't be called 1290 * for the VF EEH device. 1291 */ 1292 edev->in_error = false; 1293 dev->dev.archdata.edev = NULL; 1294 if (!(edev->pe->state & EEH_PE_KEEP)) 1295 eeh_rmv_from_parent_pe(edev); 1296 else 1297 edev->mode |= EEH_DEV_DISCONNECTED; 1298 1299 /* 1300 * We're removing from the PCI subsystem, that means 1301 * the PCI device driver can't support EEH or not 1302 * well. So we rely on hotplug completely to do recovery 1303 * for the specific PCI device. 1304 */ 1305 edev->mode |= EEH_DEV_NO_HANDLER; 1306 1307 eeh_addr_cache_rmv_dev(dev); 1308 eeh_sysfs_remove_device(dev); 1309 edev->mode &= ~EEH_DEV_SYSFS; 1310 } 1311 1312 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1313 { 1314 int ret; 1315 1316 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1317 if (ret) { 1318 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1319 __func__, ret, pe->phb->global_number, pe->addr); 1320 return ret; 1321 } 1322 1323 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1324 if (ret) { 1325 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1326 __func__, ret, pe->phb->global_number, pe->addr); 1327 return ret; 1328 } 1329 1330 /* Clear software isolated state */ 1331 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1332 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1333 1334 return ret; 1335 } 1336 1337 1338 static struct pci_device_id eeh_reset_ids[] = { 1339 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1340 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1341 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1342 { 0 } 1343 }; 1344 1345 static int eeh_pe_change_owner(struct eeh_pe *pe) 1346 { 1347 struct eeh_dev *edev, *tmp; 1348 struct pci_dev *pdev; 1349 struct pci_device_id *id; 1350 int ret; 1351 1352 /* Check PE state */ 1353 ret = eeh_ops->get_state(pe, NULL); 1354 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1355 return 0; 1356 1357 /* Unfrozen PE, nothing to do */ 1358 if (eeh_state_active(ret)) 1359 return 0; 1360 1361 /* Frozen PE, check if it needs PE level reset */ 1362 eeh_pe_for_each_dev(pe, edev, tmp) { 1363 pdev = eeh_dev_to_pci_dev(edev); 1364 if (!pdev) 1365 continue; 1366 1367 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1368 if (id->vendor != PCI_ANY_ID && 1369 id->vendor != pdev->vendor) 1370 continue; 1371 if (id->device != PCI_ANY_ID && 1372 id->device != pdev->device) 1373 continue; 1374 if (id->subvendor != PCI_ANY_ID && 1375 id->subvendor != pdev->subsystem_vendor) 1376 continue; 1377 if (id->subdevice != PCI_ANY_ID && 1378 id->subdevice != pdev->subsystem_device) 1379 continue; 1380 1381 return eeh_pe_reset_and_recover(pe); 1382 } 1383 } 1384 1385 return eeh_unfreeze_pe(pe, true); 1386 } 1387 1388 /** 1389 * eeh_dev_open - Increase count of pass through devices for PE 1390 * @pdev: PCI device 1391 * 1392 * Increase count of passed through devices for the indicated 1393 * PE. In the result, the EEH errors detected on the PE won't be 1394 * reported. The PE owner will be responsible for detection 1395 * and recovery. 1396 */ 1397 int eeh_dev_open(struct pci_dev *pdev) 1398 { 1399 struct eeh_dev *edev; 1400 int ret = -ENODEV; 1401 1402 mutex_lock(&eeh_dev_mutex); 1403 1404 /* No PCI device ? */ 1405 if (!pdev) 1406 goto out; 1407 1408 /* No EEH device or PE ? */ 1409 edev = pci_dev_to_eeh_dev(pdev); 1410 if (!edev || !edev->pe) 1411 goto out; 1412 1413 /* 1414 * The PE might have been put into frozen state, but we 1415 * didn't detect that yet. The passed through PCI devices 1416 * in frozen PE won't work properly. Clear the frozen state 1417 * in advance. 1418 */ 1419 ret = eeh_pe_change_owner(edev->pe); 1420 if (ret) 1421 goto out; 1422 1423 /* Increase PE's pass through count */ 1424 atomic_inc(&edev->pe->pass_dev_cnt); 1425 mutex_unlock(&eeh_dev_mutex); 1426 1427 return 0; 1428 out: 1429 mutex_unlock(&eeh_dev_mutex); 1430 return ret; 1431 } 1432 EXPORT_SYMBOL_GPL(eeh_dev_open); 1433 1434 /** 1435 * eeh_dev_release - Decrease count of pass through devices for PE 1436 * @pdev: PCI device 1437 * 1438 * Decrease count of pass through devices for the indicated PE. If 1439 * there is no passed through device in PE, the EEH errors detected 1440 * on the PE will be reported and handled as usual. 1441 */ 1442 void eeh_dev_release(struct pci_dev *pdev) 1443 { 1444 struct eeh_dev *edev; 1445 1446 mutex_lock(&eeh_dev_mutex); 1447 1448 /* No PCI device ? */ 1449 if (!pdev) 1450 goto out; 1451 1452 /* No EEH device ? */ 1453 edev = pci_dev_to_eeh_dev(pdev); 1454 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1455 goto out; 1456 1457 /* Decrease PE's pass through count */ 1458 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1459 eeh_pe_change_owner(edev->pe); 1460 out: 1461 mutex_unlock(&eeh_dev_mutex); 1462 } 1463 EXPORT_SYMBOL(eeh_dev_release); 1464 1465 #ifdef CONFIG_IOMMU_API 1466 1467 static int dev_has_iommu_table(struct device *dev, void *data) 1468 { 1469 struct pci_dev *pdev = to_pci_dev(dev); 1470 struct pci_dev **ppdev = data; 1471 1472 if (!dev) 1473 return 0; 1474 1475 if (device_iommu_mapped(dev)) { 1476 *ppdev = pdev; 1477 return 1; 1478 } 1479 1480 return 0; 1481 } 1482 1483 /** 1484 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1485 * @group: IOMMU group 1486 * 1487 * The routine is called to convert IOMMU group to EEH PE. 1488 */ 1489 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1490 { 1491 struct pci_dev *pdev = NULL; 1492 struct eeh_dev *edev; 1493 int ret; 1494 1495 /* No IOMMU group ? */ 1496 if (!group) 1497 return NULL; 1498 1499 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1500 if (!ret || !pdev) 1501 return NULL; 1502 1503 /* No EEH device or PE ? */ 1504 edev = pci_dev_to_eeh_dev(pdev); 1505 if (!edev || !edev->pe) 1506 return NULL; 1507 1508 return edev->pe; 1509 } 1510 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1511 1512 #endif /* CONFIG_IOMMU_API */ 1513 1514 /** 1515 * eeh_pe_set_option - Set options for the indicated PE 1516 * @pe: EEH PE 1517 * @option: requested option 1518 * 1519 * The routine is called to enable or disable EEH functionality 1520 * on the indicated PE, to enable IO or DMA for the frozen PE. 1521 */ 1522 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1523 { 1524 int ret = 0; 1525 1526 /* Invalid PE ? */ 1527 if (!pe) 1528 return -ENODEV; 1529 1530 /* 1531 * EEH functionality could possibly be disabled, just 1532 * return error for the case. And the EEH functinality 1533 * isn't expected to be disabled on one specific PE. 1534 */ 1535 switch (option) { 1536 case EEH_OPT_ENABLE: 1537 if (eeh_enabled()) { 1538 ret = eeh_pe_change_owner(pe); 1539 break; 1540 } 1541 ret = -EIO; 1542 break; 1543 case EEH_OPT_DISABLE: 1544 break; 1545 case EEH_OPT_THAW_MMIO: 1546 case EEH_OPT_THAW_DMA: 1547 case EEH_OPT_FREEZE_PE: 1548 if (!eeh_ops || !eeh_ops->set_option) { 1549 ret = -ENOENT; 1550 break; 1551 } 1552 1553 ret = eeh_pci_enable(pe, option); 1554 break; 1555 default: 1556 pr_debug("%s: Option %d out of range (%d, %d)\n", 1557 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1558 ret = -EINVAL; 1559 } 1560 1561 return ret; 1562 } 1563 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1564 1565 /** 1566 * eeh_pe_get_state - Retrieve PE's state 1567 * @pe: EEH PE 1568 * 1569 * Retrieve the PE's state, which includes 3 aspects: enabled 1570 * DMA, enabled IO and asserted reset. 1571 */ 1572 int eeh_pe_get_state(struct eeh_pe *pe) 1573 { 1574 int result, ret = 0; 1575 bool rst_active, dma_en, mmio_en; 1576 1577 /* Existing PE ? */ 1578 if (!pe) 1579 return -ENODEV; 1580 1581 if (!eeh_ops || !eeh_ops->get_state) 1582 return -ENOENT; 1583 1584 /* 1585 * If the parent PE is owned by the host kernel and is undergoing 1586 * error recovery, we should return the PE state as temporarily 1587 * unavailable so that the error recovery on the guest is suspended 1588 * until the recovery completes on the host. 1589 */ 1590 if (pe->parent && 1591 !(pe->state & EEH_PE_REMOVED) && 1592 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1593 return EEH_PE_STATE_UNAVAIL; 1594 1595 result = eeh_ops->get_state(pe, NULL); 1596 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1597 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1598 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1599 1600 if (rst_active) 1601 ret = EEH_PE_STATE_RESET; 1602 else if (dma_en && mmio_en) 1603 ret = EEH_PE_STATE_NORMAL; 1604 else if (!dma_en && !mmio_en) 1605 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1606 else if (!dma_en && mmio_en) 1607 ret = EEH_PE_STATE_STOPPED_DMA; 1608 else 1609 ret = EEH_PE_STATE_UNAVAIL; 1610 1611 return ret; 1612 } 1613 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1614 1615 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1616 { 1617 struct eeh_dev *edev, *tmp; 1618 struct pci_dev *pdev; 1619 int ret = 0; 1620 1621 /* Restore config space */ 1622 eeh_pe_restore_bars(pe); 1623 1624 /* 1625 * Reenable PCI devices as the devices passed 1626 * through are always enabled before the reset. 1627 */ 1628 eeh_pe_for_each_dev(pe, edev, tmp) { 1629 pdev = eeh_dev_to_pci_dev(edev); 1630 if (!pdev) 1631 continue; 1632 1633 ret = pci_reenable_device(pdev); 1634 if (ret) { 1635 pr_warn("%s: Failure %d reenabling %s\n", 1636 __func__, ret, pci_name(pdev)); 1637 return ret; 1638 } 1639 } 1640 1641 /* The PE is still in frozen state */ 1642 return eeh_unfreeze_pe(pe, true); 1643 } 1644 1645 1646 /** 1647 * eeh_pe_reset - Issue PE reset according to specified type 1648 * @pe: EEH PE 1649 * @option: reset type 1650 * 1651 * The routine is called to reset the specified PE with the 1652 * indicated type, either fundamental reset or hot reset. 1653 * PE reset is the most important part for error recovery. 1654 */ 1655 int eeh_pe_reset(struct eeh_pe *pe, int option) 1656 { 1657 int ret = 0; 1658 1659 /* Invalid PE ? */ 1660 if (!pe) 1661 return -ENODEV; 1662 1663 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1664 return -ENOENT; 1665 1666 switch (option) { 1667 case EEH_RESET_DEACTIVATE: 1668 ret = eeh_ops->reset(pe, option); 1669 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1670 if (ret) 1671 break; 1672 1673 ret = eeh_pe_reenable_devices(pe); 1674 break; 1675 case EEH_RESET_HOT: 1676 case EEH_RESET_FUNDAMENTAL: 1677 /* 1678 * Proactively freeze the PE to drop all MMIO access 1679 * during reset, which should be banned as it's always 1680 * cause recursive EEH error. 1681 */ 1682 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1683 1684 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1685 ret = eeh_ops->reset(pe, option); 1686 break; 1687 default: 1688 pr_debug("%s: Unsupported option %d\n", 1689 __func__, option); 1690 ret = -EINVAL; 1691 } 1692 1693 return ret; 1694 } 1695 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1696 1697 /** 1698 * eeh_pe_configure - Configure PCI bridges after PE reset 1699 * @pe: EEH PE 1700 * 1701 * The routine is called to restore the PCI config space for 1702 * those PCI devices, especially PCI bridges affected by PE 1703 * reset issued previously. 1704 */ 1705 int eeh_pe_configure(struct eeh_pe *pe) 1706 { 1707 int ret = 0; 1708 1709 /* Invalid PE ? */ 1710 if (!pe) 1711 return -ENODEV; 1712 1713 return ret; 1714 } 1715 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1716 1717 /** 1718 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1719 * @pe: the indicated PE 1720 * @type: error type 1721 * @function: error function 1722 * @addr: address 1723 * @mask: address mask 1724 * 1725 * The routine is called to inject the specified PCI error, which 1726 * is determined by @type and @function, to the indicated PE for 1727 * testing purpose. 1728 */ 1729 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1730 unsigned long addr, unsigned long mask) 1731 { 1732 /* Invalid PE ? */ 1733 if (!pe) 1734 return -ENODEV; 1735 1736 /* Unsupported operation ? */ 1737 if (!eeh_ops || !eeh_ops->err_inject) 1738 return -ENOENT; 1739 1740 /* Check on PCI error type */ 1741 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1742 return -EINVAL; 1743 1744 /* Check on PCI error function */ 1745 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1746 return -EINVAL; 1747 1748 return eeh_ops->err_inject(pe, type, func, addr, mask); 1749 } 1750 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1751 1752 static int proc_eeh_show(struct seq_file *m, void *v) 1753 { 1754 if (!eeh_enabled()) { 1755 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1756 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1757 } else { 1758 seq_printf(m, "EEH Subsystem is enabled\n"); 1759 seq_printf(m, 1760 "no device=%llu\n" 1761 "no device node=%llu\n" 1762 "no config address=%llu\n" 1763 "check not wanted=%llu\n" 1764 "eeh_total_mmio_ffs=%llu\n" 1765 "eeh_false_positives=%llu\n" 1766 "eeh_slot_resets=%llu\n", 1767 eeh_stats.no_device, 1768 eeh_stats.no_dn, 1769 eeh_stats.no_cfg_addr, 1770 eeh_stats.ignored_check, 1771 eeh_stats.total_mmio_ffs, 1772 eeh_stats.false_positives, 1773 eeh_stats.slot_resets); 1774 } 1775 1776 return 0; 1777 } 1778 1779 #ifdef CONFIG_DEBUG_FS 1780 static int eeh_enable_dbgfs_set(void *data, u64 val) 1781 { 1782 if (val) 1783 eeh_clear_flag(EEH_FORCE_DISABLED); 1784 else 1785 eeh_add_flag(EEH_FORCE_DISABLED); 1786 1787 return 0; 1788 } 1789 1790 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1791 { 1792 if (eeh_enabled()) 1793 *val = 0x1ul; 1794 else 1795 *val = 0x0ul; 1796 return 0; 1797 } 1798 1799 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1800 { 1801 eeh_max_freezes = val; 1802 return 0; 1803 } 1804 1805 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1806 { 1807 *val = eeh_max_freezes; 1808 return 0; 1809 } 1810 1811 DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1812 eeh_enable_dbgfs_set, "0x%llx\n"); 1813 DEFINE_DEBUGFS_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1814 eeh_freeze_dbgfs_set, "0x%llx\n"); 1815 #endif 1816 1817 static int __init eeh_init_proc(void) 1818 { 1819 if (machine_is(pseries) || machine_is(powernv)) { 1820 proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); 1821 #ifdef CONFIG_DEBUG_FS 1822 debugfs_create_file_unsafe("eeh_enable", 0600, 1823 powerpc_debugfs_root, NULL, 1824 &eeh_enable_dbgfs_ops); 1825 debugfs_create_file_unsafe("eeh_max_freezes", 0600, 1826 powerpc_debugfs_root, NULL, 1827 &eeh_freeze_dbgfs_ops); 1828 #endif 1829 } 1830 1831 return 0; 1832 } 1833 __initcall(eeh_init_proc); 1834