1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright IBM Corporation 2001, 2005, 2006 4 * Copyright Dave Engebretsen & Todd Inglett 2001 5 * Copyright Linas Vepstas 2005, 2006 6 * Copyright 2001-2012 IBM Corporation. 7 * 8 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 9 */ 10 11 #include <linux/delay.h> 12 #include <linux/sched.h> 13 #include <linux/init.h> 14 #include <linux/list.h> 15 #include <linux/pci.h> 16 #include <linux/iommu.h> 17 #include <linux/proc_fs.h> 18 #include <linux/rbtree.h> 19 #include <linux/reboot.h> 20 #include <linux/seq_file.h> 21 #include <linux/spinlock.h> 22 #include <linux/export.h> 23 #include <linux/of.h> 24 25 #include <linux/atomic.h> 26 #include <asm/debugfs.h> 27 #include <asm/eeh.h> 28 #include <asm/eeh_event.h> 29 #include <asm/io.h> 30 #include <asm/iommu.h> 31 #include <asm/machdep.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/rtas.h> 34 #include <asm/pte-walk.h> 35 36 37 /** Overview: 38 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 39 * dealing with PCI bus errors that can't be dealt with within the 40 * usual PCI framework, except by check-stopping the CPU. Systems 41 * that are designed for high-availability/reliability cannot afford 42 * to crash due to a "mere" PCI error, thus the need for EEH. 43 * An EEH-capable bridge operates by converting a detected error 44 * into a "slot freeze", taking the PCI adapter off-line, making 45 * the slot behave, from the OS'es point of view, as if the slot 46 * were "empty": all reads return 0xff's and all writes are silently 47 * ignored. EEH slot isolation events can be triggered by parity 48 * errors on the address or data busses (e.g. during posted writes), 49 * which in turn might be caused by low voltage on the bus, dust, 50 * vibration, humidity, radioactivity or plain-old failed hardware. 51 * 52 * Note, however, that one of the leading causes of EEH slot 53 * freeze events are buggy device drivers, buggy device microcode, 54 * or buggy device hardware. This is because any attempt by the 55 * device to bus-master data to a memory address that is not 56 * assigned to the device will trigger a slot freeze. (The idea 57 * is to prevent devices-gone-wild from corrupting system memory). 58 * Buggy hardware/drivers will have a miserable time co-existing 59 * with EEH. 60 * 61 * Ideally, a PCI device driver, when suspecting that an isolation 62 * event has occurred (e.g. by reading 0xff's), will then ask EEH 63 * whether this is the case, and then take appropriate steps to 64 * reset the PCI slot, the PCI device, and then resume operations. 65 * However, until that day, the checking is done here, with the 66 * eeh_check_failure() routine embedded in the MMIO macros. If 67 * the slot is found to be isolated, an "EEH Event" is synthesized 68 * and sent out for processing. 69 */ 70 71 /* If a device driver keeps reading an MMIO register in an interrupt 72 * handler after a slot isolation event, it might be broken. 73 * This sets the threshold for how many read attempts we allow 74 * before printing an error message. 75 */ 76 #define EEH_MAX_FAILS 2100000 77 78 /* Time to wait for a PCI slot to report status, in milliseconds */ 79 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 80 81 /* 82 * EEH probe mode support, which is part of the flags, 83 * is to support multiple platforms for EEH. Some platforms 84 * like pSeries do PCI emunation based on device tree. 85 * However, other platforms like powernv probe PCI devices 86 * from hardware. The flag is used to distinguish that. 87 * In addition, struct eeh_ops::probe would be invoked for 88 * particular OF node or PCI device so that the corresponding 89 * PE would be created there. 90 */ 91 int eeh_subsystem_flags; 92 EXPORT_SYMBOL(eeh_subsystem_flags); 93 94 /* 95 * EEH allowed maximal frozen times. If one particular PE's 96 * frozen count in last hour exceeds this limit, the PE will 97 * be forced to be offline permanently. 98 */ 99 u32 eeh_max_freezes = 5; 100 101 /* 102 * Controls whether a recovery event should be scheduled when an 103 * isolated device is discovered. This is only really useful for 104 * debugging problems with the EEH core. 105 */ 106 bool eeh_debugfs_no_recover; 107 108 /* Platform dependent EEH operations */ 109 struct eeh_ops *eeh_ops = NULL; 110 111 /* Lock to avoid races due to multiple reports of an error */ 112 DEFINE_RAW_SPINLOCK(confirm_error_lock); 113 EXPORT_SYMBOL_GPL(confirm_error_lock); 114 115 /* Lock to protect passed flags */ 116 static DEFINE_MUTEX(eeh_dev_mutex); 117 118 /* Buffer for reporting pci register dumps. Its here in BSS, and 119 * not dynamically alloced, so that it ends up in RMO where RTAS 120 * can access it. 121 */ 122 #define EEH_PCI_REGS_LOG_LEN 8192 123 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 124 125 /* 126 * The struct is used to maintain the EEH global statistic 127 * information. Besides, the EEH global statistics will be 128 * exported to user space through procfs 129 */ 130 struct eeh_stats { 131 u64 no_device; /* PCI device not found */ 132 u64 no_dn; /* OF node not found */ 133 u64 no_cfg_addr; /* Config address not found */ 134 u64 ignored_check; /* EEH check skipped */ 135 u64 total_mmio_ffs; /* Total EEH checks */ 136 u64 false_positives; /* Unnecessary EEH checks */ 137 u64 slot_resets; /* PE reset */ 138 }; 139 140 static struct eeh_stats eeh_stats; 141 142 static int __init eeh_setup(char *str) 143 { 144 if (!strcmp(str, "off")) 145 eeh_add_flag(EEH_FORCE_DISABLED); 146 else if (!strcmp(str, "early_log")) 147 eeh_add_flag(EEH_EARLY_DUMP_LOG); 148 149 return 1; 150 } 151 __setup("eeh=", eeh_setup); 152 153 /* 154 * This routine captures assorted PCI configuration space data 155 * for the indicated PCI device, and puts them into a buffer 156 * for RTAS error logging. 157 */ 158 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 159 { 160 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 161 u32 cfg; 162 int cap, i; 163 int n = 0, l = 0; 164 char buffer[128]; 165 166 if (!pdn) { 167 pr_warn("EEH: Note: No error log for absent device.\n"); 168 return 0; 169 } 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 172 pdn->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", 175 pdn->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) 266 { 267 struct eeh_dev *edev, *tmp; 268 size_t *plen = flag; 269 270 eeh_pe_for_each_dev(pe, edev, tmp) 271 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 272 EEH_PCI_REGS_LOG_LEN - *plen); 273 274 return NULL; 275 } 276 277 /** 278 * eeh_slot_error_detail - Generate combined log including driver log and error log 279 * @pe: EEH PE 280 * @severity: temporary or permanent error log 281 * 282 * This routine should be called to generate the combined log, which 283 * is comprised of driver log and error log. The driver log is figured 284 * out from the config space of the corresponding PCI device, while 285 * the error log is fetched through platform dependent function call. 286 */ 287 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 288 { 289 size_t loglen = 0; 290 291 /* 292 * When the PHB is fenced or dead, it's pointless to collect 293 * the data from PCI config space because it should return 294 * 0xFF's. For ER, we still retrieve the data from the PCI 295 * config space. 296 * 297 * For pHyp, we have to enable IO for log retrieval. Otherwise, 298 * 0xFF's is always returned from PCI config space. 299 * 300 * When the @severity is EEH_LOG_PERM, the PE is going to be 301 * removed. Prior to that, the drivers for devices included in 302 * the PE will be closed. The drivers rely on working IO path 303 * to bring the devices to quiet state. Otherwise, PCI traffic 304 * from those devices after they are removed is like to cause 305 * another unexpected EEH error. 306 */ 307 if (!(pe->type & EEH_PE_PHB)) { 308 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || 309 severity == EEH_LOG_PERM) 310 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 311 312 /* 313 * The config space of some PCI devices can't be accessed 314 * when their PEs are in frozen state. Otherwise, fenced 315 * PHB might be seen. Those PEs are identified with flag 316 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 317 * is set automatically when the PE is put to EEH_PE_ISOLATED. 318 * 319 * Restoring BARs possibly triggers PCI config access in 320 * (OPAL) firmware and then causes fenced PHB. If the 321 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 322 * pointless to restore BARs and dump config space. 323 */ 324 eeh_ops->configure_bridge(pe); 325 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 326 eeh_pe_restore_bars(pe); 327 328 pci_regs_buf[0] = 0; 329 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 330 } 331 } 332 333 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 334 } 335 336 /** 337 * eeh_token_to_phys - Convert EEH address token to phys address 338 * @token: I/O token, should be address in the form 0xA.... 339 * 340 * This routine should be called to convert virtual I/O address 341 * to physical one. 342 */ 343 static inline unsigned long eeh_token_to_phys(unsigned long token) 344 { 345 pte_t *ptep; 346 unsigned long pa; 347 int hugepage_shift; 348 349 /* 350 * We won't find hugepages here(this is iomem). Hence we are not 351 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 352 * page table free, because of init_mm. 353 */ 354 ptep = find_init_mm_pte(token, &hugepage_shift); 355 if (!ptep) 356 return token; 357 WARN_ON(hugepage_shift); 358 pa = pte_pfn(*ptep) << PAGE_SHIFT; 359 360 return pa | (token & (PAGE_SIZE-1)); 361 } 362 363 /* 364 * On PowerNV platform, we might already have fenced PHB there. 365 * For that case, it's meaningless to recover frozen PE. Intead, 366 * We have to handle fenced PHB firstly. 367 */ 368 static int eeh_phb_check_failure(struct eeh_pe *pe) 369 { 370 struct eeh_pe *phb_pe; 371 unsigned long flags; 372 int ret; 373 374 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 375 return -EPERM; 376 377 /* Find the PHB PE */ 378 phb_pe = eeh_phb_pe_get(pe->phb); 379 if (!phb_pe) { 380 pr_warn("%s Can't find PE for PHB#%x\n", 381 __func__, pe->phb->global_number); 382 return -EEXIST; 383 } 384 385 /* If the PHB has been in problematic state */ 386 eeh_serialize_lock(&flags); 387 if (phb_pe->state & EEH_PE_ISOLATED) { 388 ret = 0; 389 goto out; 390 } 391 392 /* Check PHB state */ 393 ret = eeh_ops->get_state(phb_pe, NULL); 394 if ((ret < 0) || 395 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 396 ret = 0; 397 goto out; 398 } 399 400 /* Isolate the PHB and send event */ 401 eeh_pe_mark_isolated(phb_pe); 402 eeh_serialize_unlock(flags); 403 404 pr_err("EEH: PHB#%x failure detected, location: %s\n", 405 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 406 dump_stack(); 407 eeh_send_failure_event(phb_pe); 408 409 return 1; 410 out: 411 eeh_serialize_unlock(flags); 412 return ret; 413 } 414 415 /** 416 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 417 * @edev: eeh device 418 * 419 * Check for an EEH failure for the given device node. Call this 420 * routine if the result of a read was all 0xff's and you want to 421 * find out if this is due to an EEH slot freeze. This routine 422 * will query firmware for the EEH status. 423 * 424 * Returns 0 if there has not been an EEH error; otherwise returns 425 * a non-zero value and queues up a slot isolation event notification. 426 * 427 * It is safe to call this routine in an interrupt context. 428 */ 429 int eeh_dev_check_failure(struct eeh_dev *edev) 430 { 431 int ret; 432 unsigned long flags; 433 struct device_node *dn; 434 struct pci_dev *dev; 435 struct eeh_pe *pe, *parent_pe, *phb_pe; 436 int rc = 0; 437 const char *location = NULL; 438 439 eeh_stats.total_mmio_ffs++; 440 441 if (!eeh_enabled()) 442 return 0; 443 444 if (!edev) { 445 eeh_stats.no_dn++; 446 return 0; 447 } 448 dev = eeh_dev_to_pci_dev(edev); 449 pe = eeh_dev_to_pe(edev); 450 451 /* Access to IO BARs might get this far and still not want checking. */ 452 if (!pe) { 453 eeh_stats.ignored_check++; 454 pr_debug("EEH: Ignored check for %s\n", 455 eeh_pci_name(dev)); 456 return 0; 457 } 458 459 if (!pe->addr && !pe->config_addr) { 460 eeh_stats.no_cfg_addr++; 461 return 0; 462 } 463 464 /* 465 * On PowerNV platform, we might already have fenced PHB 466 * there and we need take care of that firstly. 467 */ 468 ret = eeh_phb_check_failure(pe); 469 if (ret > 0) 470 return ret; 471 472 /* 473 * If the PE isn't owned by us, we shouldn't check the 474 * state. Instead, let the owner handle it if the PE has 475 * been frozen. 476 */ 477 if (eeh_pe_passed(pe)) 478 return 0; 479 480 /* If we already have a pending isolation event for this 481 * slot, we know it's bad already, we don't need to check. 482 * Do this checking under a lock; as multiple PCI devices 483 * in one slot might report errors simultaneously, and we 484 * only want one error recovery routine running. 485 */ 486 eeh_serialize_lock(&flags); 487 rc = 1; 488 if (pe->state & EEH_PE_ISOLATED) { 489 pe->check_count++; 490 if (pe->check_count % EEH_MAX_FAILS == 0) { 491 dn = pci_device_to_OF_node(dev); 492 if (dn) 493 location = of_get_property(dn, "ibm,loc-code", 494 NULL); 495 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 496 "location=%s driver=%s pci addr=%s\n", 497 pe->check_count, 498 location ? location : "unknown", 499 eeh_driver_name(dev), eeh_pci_name(dev)); 500 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 501 eeh_driver_name(dev)); 502 dump_stack(); 503 } 504 goto dn_unlock; 505 } 506 507 /* 508 * Now test for an EEH failure. This is VERY expensive. 509 * Note that the eeh_config_addr may be a parent device 510 * in the case of a device behind a bridge, or it may be 511 * function zero of a multi-function device. 512 * In any case they must share a common PHB. 513 */ 514 ret = eeh_ops->get_state(pe, NULL); 515 516 /* Note that config-io to empty slots may fail; 517 * they are empty when they don't have children. 518 * We will punt with the following conditions: Failure to get 519 * PE's state, EEH not support and Permanently unavailable 520 * state, PE is in good state. 521 */ 522 if ((ret < 0) || 523 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 524 eeh_stats.false_positives++; 525 pe->false_positives++; 526 rc = 0; 527 goto dn_unlock; 528 } 529 530 /* 531 * It should be corner case that the parent PE has been 532 * put into frozen state as well. We should take care 533 * that at first. 534 */ 535 parent_pe = pe->parent; 536 while (parent_pe) { 537 /* Hit the ceiling ? */ 538 if (parent_pe->type & EEH_PE_PHB) 539 break; 540 541 /* Frozen parent PE ? */ 542 ret = eeh_ops->get_state(parent_pe, NULL); 543 if (ret > 0 && !eeh_state_active(ret)) { 544 pe = parent_pe; 545 pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", 546 pe->phb->global_number, pe->addr, 547 pe->phb->global_number, parent_pe->addr); 548 } 549 550 /* Next parent level */ 551 parent_pe = parent_pe->parent; 552 } 553 554 eeh_stats.slot_resets++; 555 556 /* Avoid repeated reports of this failure, including problems 557 * with other functions on this device, and functions under 558 * bridges. 559 */ 560 eeh_pe_mark_isolated(pe); 561 eeh_serialize_unlock(flags); 562 563 /* Most EEH events are due to device driver bugs. Having 564 * a stack trace will help the device-driver authors figure 565 * out what happened. So print that out. 566 */ 567 phb_pe = eeh_phb_pe_get(pe->phb); 568 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 569 pe->phb->global_number, pe->addr); 570 pr_err("EEH: PE location: %s, PHB location: %s\n", 571 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 572 dump_stack(); 573 574 eeh_send_failure_event(pe); 575 576 return 1; 577 578 dn_unlock: 579 eeh_serialize_unlock(flags); 580 return rc; 581 } 582 583 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 584 585 /** 586 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 587 * @token: I/O address 588 * 589 * Check for an EEH failure at the given I/O address. Call this 590 * routine if the result of a read was all 0xff's and you want to 591 * find out if this is due to an EEH slot freeze event. This routine 592 * will query firmware for the EEH status. 593 * 594 * Note this routine is safe to call in an interrupt context. 595 */ 596 int eeh_check_failure(const volatile void __iomem *token) 597 { 598 unsigned long addr; 599 struct eeh_dev *edev; 600 601 /* Finding the phys addr + pci device; this is pretty quick. */ 602 addr = eeh_token_to_phys((unsigned long __force) token); 603 edev = eeh_addr_cache_get_dev(addr); 604 if (!edev) { 605 eeh_stats.no_device++; 606 return 0; 607 } 608 609 return eeh_dev_check_failure(edev); 610 } 611 EXPORT_SYMBOL(eeh_check_failure); 612 613 614 /** 615 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 616 * @pe: EEH PE 617 * 618 * This routine should be called to reenable frozen MMIO or DMA 619 * so that it would work correctly again. It's useful while doing 620 * recovery or log collection on the indicated device. 621 */ 622 int eeh_pci_enable(struct eeh_pe *pe, int function) 623 { 624 int active_flag, rc; 625 626 /* 627 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 628 * Also, it's pointless to enable them on unfrozen PE. So 629 * we have to check before enabling IO or DMA. 630 */ 631 switch (function) { 632 case EEH_OPT_THAW_MMIO: 633 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 634 break; 635 case EEH_OPT_THAW_DMA: 636 active_flag = EEH_STATE_DMA_ACTIVE; 637 break; 638 case EEH_OPT_DISABLE: 639 case EEH_OPT_ENABLE: 640 case EEH_OPT_FREEZE_PE: 641 active_flag = 0; 642 break; 643 default: 644 pr_warn("%s: Invalid function %d\n", 645 __func__, function); 646 return -EINVAL; 647 } 648 649 /* 650 * Check if IO or DMA has been enabled before 651 * enabling them. 652 */ 653 if (active_flag) { 654 rc = eeh_ops->get_state(pe, NULL); 655 if (rc < 0) 656 return rc; 657 658 /* Needn't enable it at all */ 659 if (rc == EEH_STATE_NOT_SUPPORT) 660 return 0; 661 662 /* It's already enabled */ 663 if (rc & active_flag) 664 return 0; 665 } 666 667 668 /* Issue the request */ 669 rc = eeh_ops->set_option(pe, function); 670 if (rc) 671 pr_warn("%s: Unexpected state change %d on " 672 "PHB#%x-PE#%x, err=%d\n", 673 __func__, function, pe->phb->global_number, 674 pe->addr, rc); 675 676 /* Check if the request is finished successfully */ 677 if (active_flag) { 678 rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 679 if (rc < 0) 680 return rc; 681 682 if (rc & active_flag) 683 return 0; 684 685 return -EIO; 686 } 687 688 return rc; 689 } 690 691 static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, 692 void *userdata) 693 { 694 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 695 struct pci_dev *dev = userdata; 696 697 /* 698 * The caller should have disabled and saved the 699 * state for the specified device 700 */ 701 if (!pdev || pdev == dev) 702 return NULL; 703 704 /* Ensure we have D0 power state */ 705 pci_set_power_state(pdev, PCI_D0); 706 707 /* Save device state */ 708 pci_save_state(pdev); 709 710 /* 711 * Disable device to avoid any DMA traffic and 712 * interrupt from the device 713 */ 714 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 715 716 return NULL; 717 } 718 719 static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) 720 { 721 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 722 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 723 struct pci_dev *dev = userdata; 724 725 if (!pdev) 726 return NULL; 727 728 /* Apply customization from firmware */ 729 if (pdn && eeh_ops->restore_config) 730 eeh_ops->restore_config(pdn); 731 732 /* The caller should restore state for the specified device */ 733 if (pdev != dev) 734 pci_restore_state(pdev); 735 736 return NULL; 737 } 738 739 int eeh_restore_vf_config(struct pci_dn *pdn) 740 { 741 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 742 u32 devctl, cmd, cap2, aer_capctl; 743 int old_mps; 744 745 if (edev->pcie_cap) { 746 /* Restore MPS */ 747 old_mps = (ffs(pdn->mps) - 8) << 5; 748 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 749 2, &devctl); 750 devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; 751 devctl |= old_mps; 752 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 753 2, devctl); 754 755 /* Disable Completion Timeout if possible */ 756 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 757 4, &cap2); 758 if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { 759 eeh_ops->read_config(pdn, 760 edev->pcie_cap + PCI_EXP_DEVCTL2, 761 4, &cap2); 762 cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; 763 eeh_ops->write_config(pdn, 764 edev->pcie_cap + PCI_EXP_DEVCTL2, 765 4, cap2); 766 } 767 } 768 769 /* Enable SERR and parity checking */ 770 eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); 771 cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); 772 eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); 773 774 /* Enable report various errors */ 775 if (edev->pcie_cap) { 776 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 777 2, &devctl); 778 devctl &= ~PCI_EXP_DEVCTL_CERE; 779 devctl |= (PCI_EXP_DEVCTL_NFERE | 780 PCI_EXP_DEVCTL_FERE | 781 PCI_EXP_DEVCTL_URRE); 782 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 783 2, devctl); 784 } 785 786 /* Enable ECRC generation and check */ 787 if (edev->pcie_cap && edev->aer_cap) { 788 eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, 789 4, &aer_capctl); 790 aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 791 eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, 792 4, aer_capctl); 793 } 794 795 return 0; 796 } 797 798 /** 799 * pcibios_set_pcie_reset_state - Set PCI-E reset state 800 * @dev: pci device struct 801 * @state: reset state to enter 802 * 803 * Return value: 804 * 0 if success 805 */ 806 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 807 { 808 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 809 struct eeh_pe *pe = eeh_dev_to_pe(edev); 810 811 if (!pe) { 812 pr_err("%s: No PE found on PCI device %s\n", 813 __func__, pci_name(dev)); 814 return -EINVAL; 815 } 816 817 switch (state) { 818 case pcie_deassert_reset: 819 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 820 eeh_unfreeze_pe(pe); 821 if (!(pe->type & EEH_PE_VF)) 822 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 823 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 824 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 825 break; 826 case pcie_hot_reset: 827 eeh_pe_mark_isolated(pe); 828 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 829 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 830 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 831 if (!(pe->type & EEH_PE_VF)) 832 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 833 eeh_ops->reset(pe, EEH_RESET_HOT); 834 break; 835 case pcie_warm_reset: 836 eeh_pe_mark_isolated(pe); 837 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 838 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 839 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 840 if (!(pe->type & EEH_PE_VF)) 841 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 842 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 843 break; 844 default: 845 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); 846 return -EINVAL; 847 }; 848 849 return 0; 850 } 851 852 /** 853 * eeh_set_pe_freset - Check the required reset for the indicated device 854 * @data: EEH device 855 * @flag: return value 856 * 857 * Each device might have its preferred reset type: fundamental or 858 * hot reset. The routine is used to collected the information for 859 * the indicated device and its children so that the bunch of the 860 * devices could be reset properly. 861 */ 862 static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) 863 { 864 struct pci_dev *dev; 865 unsigned int *freset = (unsigned int *)flag; 866 867 dev = eeh_dev_to_pci_dev(edev); 868 if (dev) 869 *freset |= dev->needs_freset; 870 871 return NULL; 872 } 873 874 static void eeh_pe_refreeze_passed(struct eeh_pe *root) 875 { 876 struct eeh_pe *pe; 877 int state; 878 879 eeh_for_each_pe(root, pe) { 880 if (eeh_pe_passed(pe)) { 881 state = eeh_ops->get_state(pe, NULL); 882 if (state & 883 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { 884 pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", 885 pe->phb->global_number, pe->addr); 886 eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); 887 } 888 } 889 } 890 } 891 892 /** 893 * eeh_pe_reset_full - Complete a full reset process on the indicated PE 894 * @pe: EEH PE 895 * 896 * This function executes a full reset procedure on a PE, including setting 897 * the appropriate flags, performing a fundamental or hot reset, and then 898 * deactivating the reset status. It is designed to be used within the EEH 899 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and 900 * only performs a single operation at a time. 901 * 902 * This function will attempt to reset a PE three times before failing. 903 */ 904 int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) 905 { 906 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 907 int type = EEH_RESET_HOT; 908 unsigned int freset = 0; 909 int i, state = 0, ret; 910 911 /* 912 * Determine the type of reset to perform - hot or fundamental. 913 * Hot reset is the default operation, unless any device under the 914 * PE requires a fundamental reset. 915 */ 916 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 917 918 if (freset) 919 type = EEH_RESET_FUNDAMENTAL; 920 921 /* Mark the PE as in reset state and block config space accesses */ 922 eeh_pe_state_mark(pe, reset_state); 923 924 /* Make three attempts at resetting the bus */ 925 for (i = 0; i < 3; i++) { 926 ret = eeh_pe_reset(pe, type, include_passed); 927 if (!ret) 928 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, 929 include_passed); 930 if (ret) { 931 ret = -EIO; 932 pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n", 933 state, pe->phb->global_number, pe->addr, i + 1); 934 continue; 935 } 936 if (i) 937 pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n", 938 pe->phb->global_number, pe->addr, i + 1); 939 940 /* Wait until the PE is in a functioning state */ 941 state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 942 if (state < 0) { 943 pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x", 944 pe->phb->global_number, pe->addr); 945 ret = -ENOTRECOVERABLE; 946 break; 947 } 948 if (eeh_state_active(state)) 949 break; 950 else 951 pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n", 952 pe->phb->global_number, pe->addr, state, i + 1); 953 } 954 955 /* Resetting the PE may have unfrozen child PEs. If those PEs have been 956 * (potentially) passed through to a guest, re-freeze them: 957 */ 958 if (!include_passed) 959 eeh_pe_refreeze_passed(pe); 960 961 eeh_pe_state_clear(pe, reset_state, true); 962 return ret; 963 } 964 965 /** 966 * eeh_save_bars - Save device bars 967 * @edev: PCI device associated EEH device 968 * 969 * Save the values of the device bars. Unlike the restore 970 * routine, this routine is *not* recursive. This is because 971 * PCI devices are added individually; but, for the restore, 972 * an entire slot is reset at a time. 973 */ 974 void eeh_save_bars(struct eeh_dev *edev) 975 { 976 struct pci_dn *pdn; 977 int i; 978 979 pdn = eeh_dev_to_pdn(edev); 980 if (!pdn) 981 return; 982 983 for (i = 0; i < 16; i++) 984 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 985 986 /* 987 * For PCI bridges including root port, we need enable bus 988 * master explicitly. Otherwise, it can't fetch IODA table 989 * entries correctly. So we cache the bit in advance so that 990 * we can restore it after reset, either PHB range or PE range. 991 */ 992 if (edev->mode & EEH_DEV_BRIDGE) 993 edev->config_space[1] |= PCI_COMMAND_MASTER; 994 } 995 996 /** 997 * eeh_ops_register - Register platform dependent EEH operations 998 * @ops: platform dependent EEH operations 999 * 1000 * Register the platform dependent EEH operation callback 1001 * functions. The platform should call this function before 1002 * any other EEH operations. 1003 */ 1004 int __init eeh_ops_register(struct eeh_ops *ops) 1005 { 1006 if (!ops->name) { 1007 pr_warn("%s: Invalid EEH ops name for %p\n", 1008 __func__, ops); 1009 return -EINVAL; 1010 } 1011 1012 if (eeh_ops && eeh_ops != ops) { 1013 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 1014 __func__, eeh_ops->name, ops->name); 1015 return -EEXIST; 1016 } 1017 1018 eeh_ops = ops; 1019 1020 return 0; 1021 } 1022 1023 /** 1024 * eeh_ops_unregister - Unreigster platform dependent EEH operations 1025 * @name: name of EEH platform operations 1026 * 1027 * Unregister the platform dependent EEH operation callback 1028 * functions. 1029 */ 1030 int __exit eeh_ops_unregister(const char *name) 1031 { 1032 if (!name || !strlen(name)) { 1033 pr_warn("%s: Invalid EEH ops name\n", 1034 __func__); 1035 return -EINVAL; 1036 } 1037 1038 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 1039 eeh_ops = NULL; 1040 return 0; 1041 } 1042 1043 return -EEXIST; 1044 } 1045 1046 static int eeh_reboot_notifier(struct notifier_block *nb, 1047 unsigned long action, void *unused) 1048 { 1049 eeh_clear_flag(EEH_ENABLED); 1050 return NOTIFY_DONE; 1051 } 1052 1053 static struct notifier_block eeh_reboot_nb = { 1054 .notifier_call = eeh_reboot_notifier, 1055 }; 1056 1057 void eeh_probe_devices(void) 1058 { 1059 struct pci_controller *hose, *tmp; 1060 struct pci_dn *pdn; 1061 1062 /* Enable EEH for all adapters */ 1063 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1064 pdn = hose->pci_data; 1065 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1066 } 1067 if (eeh_enabled()) 1068 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1069 else 1070 pr_info("EEH: No capable adapters found\n"); 1071 1072 } 1073 1074 /** 1075 * eeh_init - EEH initialization 1076 * 1077 * Initialize EEH by trying to enable it for all of the adapters in the system. 1078 * As a side effect we can determine here if eeh is supported at all. 1079 * Note that we leave EEH on so failed config cycles won't cause a machine 1080 * check. If a user turns off EEH for a particular adapter they are really 1081 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 1082 * grant access to a slot if EEH isn't enabled, and so we always enable 1083 * EEH for all slots/all devices. 1084 * 1085 * The eeh-force-off option disables EEH checking globally, for all slots. 1086 * Even if force-off is set, the EEH hardware is still enabled, so that 1087 * newer systems can boot. 1088 */ 1089 static int eeh_init(void) 1090 { 1091 struct pci_controller *hose, *tmp; 1092 int ret = 0; 1093 1094 /* Register reboot notifier */ 1095 ret = register_reboot_notifier(&eeh_reboot_nb); 1096 if (ret) { 1097 pr_warn("%s: Failed to register notifier (%d)\n", 1098 __func__, ret); 1099 return ret; 1100 } 1101 1102 /* call platform initialization function */ 1103 if (!eeh_ops) { 1104 pr_warn("%s: Platform EEH operation not found\n", 1105 __func__); 1106 return -EEXIST; 1107 } else if ((ret = eeh_ops->init())) 1108 return ret; 1109 1110 /* Initialize PHB PEs */ 1111 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 1112 eeh_dev_phb_init_dynamic(hose); 1113 1114 /* Initialize EEH event */ 1115 return eeh_event_init(); 1116 } 1117 1118 core_initcall_sync(eeh_init); 1119 1120 /** 1121 * eeh_add_device_early - Enable EEH for the indicated device node 1122 * @pdn: PCI device node for which to set up EEH 1123 * 1124 * This routine must be used to perform EEH initialization for PCI 1125 * devices that were added after system boot (e.g. hotplug, dlpar). 1126 * This routine must be called before any i/o is performed to the 1127 * adapter (inluding any config-space i/o). 1128 * Whether this actually enables EEH or not for this device depends 1129 * on the CEC architecture, type of the device, on earlier boot 1130 * command-line arguments & etc. 1131 */ 1132 void eeh_add_device_early(struct pci_dn *pdn) 1133 { 1134 struct pci_controller *phb = pdn ? pdn->phb : NULL; 1135 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1136 1137 if (!edev) 1138 return; 1139 1140 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1141 return; 1142 1143 /* USB Bus children of PCI devices will not have BUID's */ 1144 if (NULL == phb || 1145 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1146 return; 1147 1148 eeh_ops->probe(pdn, NULL); 1149 } 1150 1151 /** 1152 * eeh_add_device_tree_early - Enable EEH for the indicated device 1153 * @pdn: PCI device node 1154 * 1155 * This routine must be used to perform EEH initialization for the 1156 * indicated PCI device that was added after system boot (e.g. 1157 * hotplug, dlpar). 1158 */ 1159 void eeh_add_device_tree_early(struct pci_dn *pdn) 1160 { 1161 struct pci_dn *n; 1162 1163 if (!pdn) 1164 return; 1165 1166 list_for_each_entry(n, &pdn->child_list, list) 1167 eeh_add_device_tree_early(n); 1168 eeh_add_device_early(pdn); 1169 } 1170 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1171 1172 /** 1173 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1174 * @dev: pci device for which to set up EEH 1175 * 1176 * This routine must be used to complete EEH initialization for PCI 1177 * devices that were added after system boot (e.g. hotplug, dlpar). 1178 */ 1179 void eeh_add_device_late(struct pci_dev *dev) 1180 { 1181 struct pci_dn *pdn; 1182 struct eeh_dev *edev; 1183 1184 if (!dev || !eeh_enabled()) 1185 return; 1186 1187 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1188 1189 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1190 edev = pdn_to_eeh_dev(pdn); 1191 if (edev->pdev == dev) { 1192 pr_debug("EEH: Already referenced !\n"); 1193 return; 1194 } 1195 1196 /* 1197 * The EEH cache might not be removed correctly because of 1198 * unbalanced kref to the device during unplug time, which 1199 * relies on pcibios_release_device(). So we have to remove 1200 * that here explicitly. 1201 */ 1202 if (edev->pdev) { 1203 eeh_rmv_from_parent_pe(edev); 1204 eeh_addr_cache_rmv_dev(edev->pdev); 1205 eeh_sysfs_remove_device(edev->pdev); 1206 edev->mode &= ~EEH_DEV_SYSFS; 1207 1208 /* 1209 * We definitely should have the PCI device removed 1210 * though it wasn't correctly. So we needn't call 1211 * into error handler afterwards. 1212 */ 1213 edev->mode |= EEH_DEV_NO_HANDLER; 1214 1215 edev->pdev = NULL; 1216 dev->dev.archdata.edev = NULL; 1217 } 1218 1219 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1220 eeh_ops->probe(pdn, NULL); 1221 1222 edev->pdev = dev; 1223 dev->dev.archdata.edev = edev; 1224 1225 eeh_addr_cache_insert_dev(dev); 1226 } 1227 1228 /** 1229 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1230 * @bus: PCI bus 1231 * 1232 * This routine must be used to perform EEH initialization for PCI 1233 * devices which are attached to the indicated PCI bus. The PCI bus 1234 * is added after system boot through hotplug or dlpar. 1235 */ 1236 void eeh_add_device_tree_late(struct pci_bus *bus) 1237 { 1238 struct pci_dev *dev; 1239 1240 list_for_each_entry(dev, &bus->devices, bus_list) { 1241 eeh_add_device_late(dev); 1242 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1243 struct pci_bus *subbus = dev->subordinate; 1244 if (subbus) 1245 eeh_add_device_tree_late(subbus); 1246 } 1247 } 1248 } 1249 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1250 1251 /** 1252 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1253 * @bus: PCI bus 1254 * 1255 * This routine must be used to add EEH sysfs files for PCI 1256 * devices which are attached to the indicated PCI bus. The PCI bus 1257 * is added after system boot through hotplug or dlpar. 1258 */ 1259 void eeh_add_sysfs_files(struct pci_bus *bus) 1260 { 1261 struct pci_dev *dev; 1262 1263 list_for_each_entry(dev, &bus->devices, bus_list) { 1264 eeh_sysfs_add_device(dev); 1265 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1266 struct pci_bus *subbus = dev->subordinate; 1267 if (subbus) 1268 eeh_add_sysfs_files(subbus); 1269 } 1270 } 1271 } 1272 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1273 1274 /** 1275 * eeh_remove_device - Undo EEH setup for the indicated pci device 1276 * @dev: pci device to be removed 1277 * 1278 * This routine should be called when a device is removed from 1279 * a running system (e.g. by hotplug or dlpar). It unregisters 1280 * the PCI device from the EEH subsystem. I/O errors affecting 1281 * this device will no longer be detected after this call; thus, 1282 * i/o errors affecting this slot may leave this device unusable. 1283 */ 1284 void eeh_remove_device(struct pci_dev *dev) 1285 { 1286 struct eeh_dev *edev; 1287 1288 if (!dev || !eeh_enabled()) 1289 return; 1290 edev = pci_dev_to_eeh_dev(dev); 1291 1292 /* Unregister the device with the EEH/PCI address search system */ 1293 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1294 1295 if (!edev || !edev->pdev || !edev->pe) { 1296 pr_debug("EEH: Not referenced !\n"); 1297 return; 1298 } 1299 1300 /* 1301 * During the hotplug for EEH error recovery, we need the EEH 1302 * device attached to the parent PE in order for BAR restore 1303 * a bit later. So we keep it for BAR restore and remove it 1304 * from the parent PE during the BAR resotre. 1305 */ 1306 edev->pdev = NULL; 1307 1308 /* 1309 * The flag "in_error" is used to trace EEH devices for VFs 1310 * in error state or not. It's set in eeh_report_error(). If 1311 * it's not set, eeh_report_{reset,resume}() won't be called 1312 * for the VF EEH device. 1313 */ 1314 edev->in_error = false; 1315 dev->dev.archdata.edev = NULL; 1316 if (!(edev->pe->state & EEH_PE_KEEP)) 1317 eeh_rmv_from_parent_pe(edev); 1318 else 1319 edev->mode |= EEH_DEV_DISCONNECTED; 1320 1321 /* 1322 * We're removing from the PCI subsystem, that means 1323 * the PCI device driver can't support EEH or not 1324 * well. So we rely on hotplug completely to do recovery 1325 * for the specific PCI device. 1326 */ 1327 edev->mode |= EEH_DEV_NO_HANDLER; 1328 1329 eeh_addr_cache_rmv_dev(dev); 1330 eeh_sysfs_remove_device(dev); 1331 edev->mode &= ~EEH_DEV_SYSFS; 1332 } 1333 1334 int eeh_unfreeze_pe(struct eeh_pe *pe) 1335 { 1336 int ret; 1337 1338 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1339 if (ret) { 1340 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1341 __func__, ret, pe->phb->global_number, pe->addr); 1342 return ret; 1343 } 1344 1345 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1346 if (ret) { 1347 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1348 __func__, ret, pe->phb->global_number, pe->addr); 1349 return ret; 1350 } 1351 1352 return ret; 1353 } 1354 1355 1356 static struct pci_device_id eeh_reset_ids[] = { 1357 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1358 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1359 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1360 { 0 } 1361 }; 1362 1363 static int eeh_pe_change_owner(struct eeh_pe *pe) 1364 { 1365 struct eeh_dev *edev, *tmp; 1366 struct pci_dev *pdev; 1367 struct pci_device_id *id; 1368 int ret; 1369 1370 /* Check PE state */ 1371 ret = eeh_ops->get_state(pe, NULL); 1372 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1373 return 0; 1374 1375 /* Unfrozen PE, nothing to do */ 1376 if (eeh_state_active(ret)) 1377 return 0; 1378 1379 /* Frozen PE, check if it needs PE level reset */ 1380 eeh_pe_for_each_dev(pe, edev, tmp) { 1381 pdev = eeh_dev_to_pci_dev(edev); 1382 if (!pdev) 1383 continue; 1384 1385 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1386 if (id->vendor != PCI_ANY_ID && 1387 id->vendor != pdev->vendor) 1388 continue; 1389 if (id->device != PCI_ANY_ID && 1390 id->device != pdev->device) 1391 continue; 1392 if (id->subvendor != PCI_ANY_ID && 1393 id->subvendor != pdev->subsystem_vendor) 1394 continue; 1395 if (id->subdevice != PCI_ANY_ID && 1396 id->subdevice != pdev->subsystem_device) 1397 continue; 1398 1399 return eeh_pe_reset_and_recover(pe); 1400 } 1401 } 1402 1403 ret = eeh_unfreeze_pe(pe); 1404 if (!ret) 1405 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 1406 return ret; 1407 } 1408 1409 /** 1410 * eeh_dev_open - Increase count of pass through devices for PE 1411 * @pdev: PCI device 1412 * 1413 * Increase count of passed through devices for the indicated 1414 * PE. In the result, the EEH errors detected on the PE won't be 1415 * reported. The PE owner will be responsible for detection 1416 * and recovery. 1417 */ 1418 int eeh_dev_open(struct pci_dev *pdev) 1419 { 1420 struct eeh_dev *edev; 1421 int ret = -ENODEV; 1422 1423 mutex_lock(&eeh_dev_mutex); 1424 1425 /* No PCI device ? */ 1426 if (!pdev) 1427 goto out; 1428 1429 /* No EEH device or PE ? */ 1430 edev = pci_dev_to_eeh_dev(pdev); 1431 if (!edev || !edev->pe) 1432 goto out; 1433 1434 /* 1435 * The PE might have been put into frozen state, but we 1436 * didn't detect that yet. The passed through PCI devices 1437 * in frozen PE won't work properly. Clear the frozen state 1438 * in advance. 1439 */ 1440 ret = eeh_pe_change_owner(edev->pe); 1441 if (ret) 1442 goto out; 1443 1444 /* Increase PE's pass through count */ 1445 atomic_inc(&edev->pe->pass_dev_cnt); 1446 mutex_unlock(&eeh_dev_mutex); 1447 1448 return 0; 1449 out: 1450 mutex_unlock(&eeh_dev_mutex); 1451 return ret; 1452 } 1453 EXPORT_SYMBOL_GPL(eeh_dev_open); 1454 1455 /** 1456 * eeh_dev_release - Decrease count of pass through devices for PE 1457 * @pdev: PCI device 1458 * 1459 * Decrease count of pass through devices for the indicated PE. If 1460 * there is no passed through device in PE, the EEH errors detected 1461 * on the PE will be reported and handled as usual. 1462 */ 1463 void eeh_dev_release(struct pci_dev *pdev) 1464 { 1465 struct eeh_dev *edev; 1466 1467 mutex_lock(&eeh_dev_mutex); 1468 1469 /* No PCI device ? */ 1470 if (!pdev) 1471 goto out; 1472 1473 /* No EEH device ? */ 1474 edev = pci_dev_to_eeh_dev(pdev); 1475 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1476 goto out; 1477 1478 /* Decrease PE's pass through count */ 1479 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1480 eeh_pe_change_owner(edev->pe); 1481 out: 1482 mutex_unlock(&eeh_dev_mutex); 1483 } 1484 EXPORT_SYMBOL(eeh_dev_release); 1485 1486 #ifdef CONFIG_IOMMU_API 1487 1488 static int dev_has_iommu_table(struct device *dev, void *data) 1489 { 1490 struct pci_dev *pdev = to_pci_dev(dev); 1491 struct pci_dev **ppdev = data; 1492 1493 if (!dev) 1494 return 0; 1495 1496 if (device_iommu_mapped(dev)) { 1497 *ppdev = pdev; 1498 return 1; 1499 } 1500 1501 return 0; 1502 } 1503 1504 /** 1505 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1506 * @group: IOMMU group 1507 * 1508 * The routine is called to convert IOMMU group to EEH PE. 1509 */ 1510 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1511 { 1512 struct pci_dev *pdev = NULL; 1513 struct eeh_dev *edev; 1514 int ret; 1515 1516 /* No IOMMU group ? */ 1517 if (!group) 1518 return NULL; 1519 1520 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1521 if (!ret || !pdev) 1522 return NULL; 1523 1524 /* No EEH device or PE ? */ 1525 edev = pci_dev_to_eeh_dev(pdev); 1526 if (!edev || !edev->pe) 1527 return NULL; 1528 1529 return edev->pe; 1530 } 1531 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1532 1533 #endif /* CONFIG_IOMMU_API */ 1534 1535 /** 1536 * eeh_pe_set_option - Set options for the indicated PE 1537 * @pe: EEH PE 1538 * @option: requested option 1539 * 1540 * The routine is called to enable or disable EEH functionality 1541 * on the indicated PE, to enable IO or DMA for the frozen PE. 1542 */ 1543 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1544 { 1545 int ret = 0; 1546 1547 /* Invalid PE ? */ 1548 if (!pe) 1549 return -ENODEV; 1550 1551 /* 1552 * EEH functionality could possibly be disabled, just 1553 * return error for the case. And the EEH functinality 1554 * isn't expected to be disabled on one specific PE. 1555 */ 1556 switch (option) { 1557 case EEH_OPT_ENABLE: 1558 if (eeh_enabled()) { 1559 ret = eeh_pe_change_owner(pe); 1560 break; 1561 } 1562 ret = -EIO; 1563 break; 1564 case EEH_OPT_DISABLE: 1565 break; 1566 case EEH_OPT_THAW_MMIO: 1567 case EEH_OPT_THAW_DMA: 1568 case EEH_OPT_FREEZE_PE: 1569 if (!eeh_ops || !eeh_ops->set_option) { 1570 ret = -ENOENT; 1571 break; 1572 } 1573 1574 ret = eeh_pci_enable(pe, option); 1575 break; 1576 default: 1577 pr_debug("%s: Option %d out of range (%d, %d)\n", 1578 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1579 ret = -EINVAL; 1580 } 1581 1582 return ret; 1583 } 1584 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1585 1586 /** 1587 * eeh_pe_get_state - Retrieve PE's state 1588 * @pe: EEH PE 1589 * 1590 * Retrieve the PE's state, which includes 3 aspects: enabled 1591 * DMA, enabled IO and asserted reset. 1592 */ 1593 int eeh_pe_get_state(struct eeh_pe *pe) 1594 { 1595 int result, ret = 0; 1596 bool rst_active, dma_en, mmio_en; 1597 1598 /* Existing PE ? */ 1599 if (!pe) 1600 return -ENODEV; 1601 1602 if (!eeh_ops || !eeh_ops->get_state) 1603 return -ENOENT; 1604 1605 /* 1606 * If the parent PE is owned by the host kernel and is undergoing 1607 * error recovery, we should return the PE state as temporarily 1608 * unavailable so that the error recovery on the guest is suspended 1609 * until the recovery completes on the host. 1610 */ 1611 if (pe->parent && 1612 !(pe->state & EEH_PE_REMOVED) && 1613 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1614 return EEH_PE_STATE_UNAVAIL; 1615 1616 result = eeh_ops->get_state(pe, NULL); 1617 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1618 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1619 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1620 1621 if (rst_active) 1622 ret = EEH_PE_STATE_RESET; 1623 else if (dma_en && mmio_en) 1624 ret = EEH_PE_STATE_NORMAL; 1625 else if (!dma_en && !mmio_en) 1626 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1627 else if (!dma_en && mmio_en) 1628 ret = EEH_PE_STATE_STOPPED_DMA; 1629 else 1630 ret = EEH_PE_STATE_UNAVAIL; 1631 1632 return ret; 1633 } 1634 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1635 1636 static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) 1637 { 1638 struct eeh_dev *edev, *tmp; 1639 struct pci_dev *pdev; 1640 int ret = 0; 1641 1642 eeh_pe_restore_bars(pe); 1643 1644 /* 1645 * Reenable PCI devices as the devices passed 1646 * through are always enabled before the reset. 1647 */ 1648 eeh_pe_for_each_dev(pe, edev, tmp) { 1649 pdev = eeh_dev_to_pci_dev(edev); 1650 if (!pdev) 1651 continue; 1652 1653 ret = pci_reenable_device(pdev); 1654 if (ret) { 1655 pr_warn("%s: Failure %d reenabling %s\n", 1656 __func__, ret, pci_name(pdev)); 1657 return ret; 1658 } 1659 } 1660 1661 /* The PE is still in frozen state */ 1662 if (include_passed || !eeh_pe_passed(pe)) { 1663 ret = eeh_unfreeze_pe(pe); 1664 } else 1665 pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", 1666 pe->phb->global_number, pe->addr); 1667 if (!ret) 1668 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); 1669 return ret; 1670 } 1671 1672 1673 /** 1674 * eeh_pe_reset - Issue PE reset according to specified type 1675 * @pe: EEH PE 1676 * @option: reset type 1677 * 1678 * The routine is called to reset the specified PE with the 1679 * indicated type, either fundamental reset or hot reset. 1680 * PE reset is the most important part for error recovery. 1681 */ 1682 int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) 1683 { 1684 int ret = 0; 1685 1686 /* Invalid PE ? */ 1687 if (!pe) 1688 return -ENODEV; 1689 1690 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1691 return -ENOENT; 1692 1693 switch (option) { 1694 case EEH_RESET_DEACTIVATE: 1695 ret = eeh_ops->reset(pe, option); 1696 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); 1697 if (ret) 1698 break; 1699 1700 ret = eeh_pe_reenable_devices(pe, include_passed); 1701 break; 1702 case EEH_RESET_HOT: 1703 case EEH_RESET_FUNDAMENTAL: 1704 /* 1705 * Proactively freeze the PE to drop all MMIO access 1706 * during reset, which should be banned as it's always 1707 * cause recursive EEH error. 1708 */ 1709 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1710 1711 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1712 ret = eeh_ops->reset(pe, option); 1713 break; 1714 default: 1715 pr_debug("%s: Unsupported option %d\n", 1716 __func__, option); 1717 ret = -EINVAL; 1718 } 1719 1720 return ret; 1721 } 1722 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1723 1724 /** 1725 * eeh_pe_configure - Configure PCI bridges after PE reset 1726 * @pe: EEH PE 1727 * 1728 * The routine is called to restore the PCI config space for 1729 * those PCI devices, especially PCI bridges affected by PE 1730 * reset issued previously. 1731 */ 1732 int eeh_pe_configure(struct eeh_pe *pe) 1733 { 1734 int ret = 0; 1735 1736 /* Invalid PE ? */ 1737 if (!pe) 1738 return -ENODEV; 1739 1740 return ret; 1741 } 1742 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1743 1744 /** 1745 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1746 * @pe: the indicated PE 1747 * @type: error type 1748 * @function: error function 1749 * @addr: address 1750 * @mask: address mask 1751 * 1752 * The routine is called to inject the specified PCI error, which 1753 * is determined by @type and @function, to the indicated PE for 1754 * testing purpose. 1755 */ 1756 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1757 unsigned long addr, unsigned long mask) 1758 { 1759 /* Invalid PE ? */ 1760 if (!pe) 1761 return -ENODEV; 1762 1763 /* Unsupported operation ? */ 1764 if (!eeh_ops || !eeh_ops->err_inject) 1765 return -ENOENT; 1766 1767 /* Check on PCI error type */ 1768 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1769 return -EINVAL; 1770 1771 /* Check on PCI error function */ 1772 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1773 return -EINVAL; 1774 1775 return eeh_ops->err_inject(pe, type, func, addr, mask); 1776 } 1777 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1778 1779 static int proc_eeh_show(struct seq_file *m, void *v) 1780 { 1781 if (!eeh_enabled()) { 1782 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1783 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1784 } else { 1785 seq_printf(m, "EEH Subsystem is enabled\n"); 1786 seq_printf(m, 1787 "no device=%llu\n" 1788 "no device node=%llu\n" 1789 "no config address=%llu\n" 1790 "check not wanted=%llu\n" 1791 "eeh_total_mmio_ffs=%llu\n" 1792 "eeh_false_positives=%llu\n" 1793 "eeh_slot_resets=%llu\n", 1794 eeh_stats.no_device, 1795 eeh_stats.no_dn, 1796 eeh_stats.no_cfg_addr, 1797 eeh_stats.ignored_check, 1798 eeh_stats.total_mmio_ffs, 1799 eeh_stats.false_positives, 1800 eeh_stats.slot_resets); 1801 } 1802 1803 return 0; 1804 } 1805 1806 #ifdef CONFIG_DEBUG_FS 1807 static int eeh_enable_dbgfs_set(void *data, u64 val) 1808 { 1809 if (val) 1810 eeh_clear_flag(EEH_FORCE_DISABLED); 1811 else 1812 eeh_add_flag(EEH_FORCE_DISABLED); 1813 1814 return 0; 1815 } 1816 1817 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1818 { 1819 if (eeh_enabled()) 1820 *val = 0x1ul; 1821 else 1822 *val = 0x0ul; 1823 return 0; 1824 } 1825 1826 DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1827 eeh_enable_dbgfs_set, "0x%llx\n"); 1828 1829 static ssize_t eeh_force_recover_write(struct file *filp, 1830 const char __user *user_buf, 1831 size_t count, loff_t *ppos) 1832 { 1833 struct pci_controller *hose; 1834 uint32_t phbid, pe_no; 1835 struct eeh_pe *pe; 1836 char buf[20]; 1837 int ret; 1838 1839 ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); 1840 if (!ret) 1841 return -EFAULT; 1842 1843 /* 1844 * When PE is NULL the event is a "special" event. Rather than 1845 * recovering a specific PE it forces the EEH core to scan for failed 1846 * PHBs and recovers each. This needs to be done before any device 1847 * recoveries can occur. 1848 */ 1849 if (!strncmp(buf, "hwcheck", 7)) { 1850 __eeh_send_failure_event(NULL); 1851 return count; 1852 } 1853 1854 ret = sscanf(buf, "%x:%x", &phbid, &pe_no); 1855 if (ret != 2) 1856 return -EINVAL; 1857 1858 hose = pci_find_controller_for_domain(phbid); 1859 if (!hose) 1860 return -ENODEV; 1861 1862 /* Retrieve PE */ 1863 pe = eeh_pe_get(hose, pe_no, 0); 1864 if (!pe) 1865 return -ENODEV; 1866 1867 /* 1868 * We don't do any state checking here since the detection 1869 * process is async to the recovery process. The recovery 1870 * thread *should* not break even if we schedule a recovery 1871 * from an odd state (e.g. PE removed, or recovery of a 1872 * non-isolated PE) 1873 */ 1874 __eeh_send_failure_event(pe); 1875 1876 return ret < 0 ? ret : count; 1877 } 1878 1879 static const struct file_operations eeh_force_recover_fops = { 1880 .open = simple_open, 1881 .llseek = no_llseek, 1882 .write = eeh_force_recover_write, 1883 }; 1884 #endif 1885 1886 static int __init eeh_init_proc(void) 1887 { 1888 if (machine_is(pseries) || machine_is(powernv)) { 1889 proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); 1890 #ifdef CONFIG_DEBUG_FS 1891 debugfs_create_file_unsafe("eeh_enable", 0600, 1892 powerpc_debugfs_root, NULL, 1893 &eeh_enable_dbgfs_ops); 1894 debugfs_create_u32("eeh_max_freezes", 0600, 1895 powerpc_debugfs_root, &eeh_max_freezes); 1896 debugfs_create_bool("eeh_disable_recovery", 0600, 1897 powerpc_debugfs_root, 1898 &eeh_debugfs_no_recover); 1899 debugfs_create_file_unsafe("eeh_force_recover", 0600, 1900 powerpc_debugfs_root, NULL, 1901 &eeh_force_recover_fops); 1902 eeh_cache_debugfs_init(); 1903 #endif 1904 } 1905 1906 return 0; 1907 } 1908 __initcall(eeh_init_proc); 1909