1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/sched.h> 26 #include <linux/init.h> 27 #include <linux/list.h> 28 #include <linux/pci.h> 29 #include <linux/iommu.h> 30 #include <linux/proc_fs.h> 31 #include <linux/rbtree.h> 32 #include <linux/reboot.h> 33 #include <linux/seq_file.h> 34 #include <linux/spinlock.h> 35 #include <linux/export.h> 36 #include <linux/of.h> 37 38 #include <linux/atomic.h> 39 #include <asm/debugfs.h> 40 #include <asm/eeh.h> 41 #include <asm/eeh_event.h> 42 #include <asm/io.h> 43 #include <asm/iommu.h> 44 #include <asm/machdep.h> 45 #include <asm/ppc-pci.h> 46 #include <asm/rtas.h> 47 48 49 /** Overview: 50 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 51 * dealing with PCI bus errors that can't be dealt with within the 52 * usual PCI framework, except by check-stopping the CPU. Systems 53 * that are designed for high-availability/reliability cannot afford 54 * to crash due to a "mere" PCI error, thus the need for EEH. 55 * An EEH-capable bridge operates by converting a detected error 56 * into a "slot freeze", taking the PCI adapter off-line, making 57 * the slot behave, from the OS'es point of view, as if the slot 58 * were "empty": all reads return 0xff's and all writes are silently 59 * ignored. EEH slot isolation events can be triggered by parity 60 * errors on the address or data busses (e.g. during posted writes), 61 * which in turn might be caused by low voltage on the bus, dust, 62 * vibration, humidity, radioactivity or plain-old failed hardware. 63 * 64 * Note, however, that one of the leading causes of EEH slot 65 * freeze events are buggy device drivers, buggy device microcode, 66 * or buggy device hardware. This is because any attempt by the 67 * device to bus-master data to a memory address that is not 68 * assigned to the device will trigger a slot freeze. (The idea 69 * is to prevent devices-gone-wild from corrupting system memory). 70 * Buggy hardware/drivers will have a miserable time co-existing 71 * with EEH. 72 * 73 * Ideally, a PCI device driver, when suspecting that an isolation 74 * event has occurred (e.g. by reading 0xff's), will then ask EEH 75 * whether this is the case, and then take appropriate steps to 76 * reset the PCI slot, the PCI device, and then resume operations. 77 * However, until that day, the checking is done here, with the 78 * eeh_check_failure() routine embedded in the MMIO macros. If 79 * the slot is found to be isolated, an "EEH Event" is synthesized 80 * and sent out for processing. 81 */ 82 83 /* If a device driver keeps reading an MMIO register in an interrupt 84 * handler after a slot isolation event, it might be broken. 85 * This sets the threshold for how many read attempts we allow 86 * before printing an error message. 87 */ 88 #define EEH_MAX_FAILS 2100000 89 90 /* Time to wait for a PCI slot to report status, in milliseconds */ 91 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 92 93 /* 94 * EEH probe mode support, which is part of the flags, 95 * is to support multiple platforms for EEH. Some platforms 96 * like pSeries do PCI emunation based on device tree. 97 * However, other platforms like powernv probe PCI devices 98 * from hardware. The flag is used to distinguish that. 99 * In addition, struct eeh_ops::probe would be invoked for 100 * particular OF node or PCI device so that the corresponding 101 * PE would be created there. 102 */ 103 int eeh_subsystem_flags; 104 EXPORT_SYMBOL(eeh_subsystem_flags); 105 106 /* 107 * EEH allowed maximal frozen times. If one particular PE's 108 * frozen count in last hour exceeds this limit, the PE will 109 * be forced to be offline permanently. 110 */ 111 int eeh_max_freezes = 5; 112 113 /* Platform dependent EEH operations */ 114 struct eeh_ops *eeh_ops = NULL; 115 116 /* Lock to avoid races due to multiple reports of an error */ 117 DEFINE_RAW_SPINLOCK(confirm_error_lock); 118 EXPORT_SYMBOL_GPL(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 static int __init eeh_setup(char *str) 148 { 149 if (!strcmp(str, "off")) 150 eeh_add_flag(EEH_FORCE_DISABLED); 151 else if (!strcmp(str, "early_log")) 152 eeh_add_flag(EEH_EARLY_DUMP_LOG); 153 154 return 1; 155 } 156 __setup("eeh=", eeh_setup); 157 158 /* 159 * This routine captures assorted PCI configuration space data 160 * for the indicated PCI device, and puts them into a buffer 161 * for RTAS error logging. 162 */ 163 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 164 { 165 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 166 u32 cfg; 167 int cap, i; 168 int n = 0, l = 0; 169 char buffer[128]; 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 172 edev->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", 175 edev->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(void *data, void *flag) 266 { 267 struct eeh_pe *pe = data; 268 struct eeh_dev *edev, *tmp; 269 size_t *plen = flag; 270 271 eeh_pe_for_each_dev(pe, edev, tmp) 272 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 273 EEH_PCI_REGS_LOG_LEN - *plen); 274 275 return NULL; 276 } 277 278 /** 279 * eeh_slot_error_detail - Generate combined log including driver log and error log 280 * @pe: EEH PE 281 * @severity: temporary or permanent error log 282 * 283 * This routine should be called to generate the combined log, which 284 * is comprised of driver log and error log. The driver log is figured 285 * out from the config space of the corresponding PCI device, while 286 * the error log is fetched through platform dependent function call. 287 */ 288 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 289 { 290 size_t loglen = 0; 291 292 /* 293 * When the PHB is fenced or dead, it's pointless to collect 294 * the data from PCI config space because it should return 295 * 0xFF's. For ER, we still retrieve the data from the PCI 296 * config space. 297 * 298 * For pHyp, we have to enable IO for log retrieval. Otherwise, 299 * 0xFF's is always returned from PCI config space. 300 * 301 * When the @severity is EEH_LOG_PERM, the PE is going to be 302 * removed. Prior to that, the drivers for devices included in 303 * the PE will be closed. The drivers rely on working IO path 304 * to bring the devices to quiet state. Otherwise, PCI traffic 305 * from those devices after they are removed is like to cause 306 * another unexpected EEH error. 307 */ 308 if (!(pe->type & EEH_PE_PHB)) { 309 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || 310 severity == EEH_LOG_PERM) 311 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 312 313 /* 314 * The config space of some PCI devices can't be accessed 315 * when their PEs are in frozen state. Otherwise, fenced 316 * PHB might be seen. Those PEs are identified with flag 317 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 318 * is set automatically when the PE is put to EEH_PE_ISOLATED. 319 * 320 * Restoring BARs possibly triggers PCI config access in 321 * (OPAL) firmware and then causes fenced PHB. If the 322 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 323 * pointless to restore BARs and dump config space. 324 */ 325 eeh_ops->configure_bridge(pe); 326 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 327 eeh_pe_restore_bars(pe); 328 329 pci_regs_buf[0] = 0; 330 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 331 } 332 } 333 334 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 335 } 336 337 /** 338 * eeh_token_to_phys - Convert EEH address token to phys address 339 * @token: I/O token, should be address in the form 0xA.... 340 * 341 * This routine should be called to convert virtual I/O address 342 * to physical one. 343 */ 344 static inline unsigned long eeh_token_to_phys(unsigned long token) 345 { 346 pte_t *ptep; 347 unsigned long pa; 348 int hugepage_shift; 349 350 /* 351 * We won't find hugepages here(this is iomem). Hence we are not 352 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 353 * page table free, because of init_mm. 354 */ 355 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, 356 NULL, &hugepage_shift); 357 if (!ptep) 358 return token; 359 WARN_ON(hugepage_shift); 360 pa = pte_pfn(*ptep) << PAGE_SHIFT; 361 362 return pa | (token & (PAGE_SIZE-1)); 363 } 364 365 /* 366 * On PowerNV platform, we might already have fenced PHB there. 367 * For that case, it's meaningless to recover frozen PE. Intead, 368 * We have to handle fenced PHB firstly. 369 */ 370 static int eeh_phb_check_failure(struct eeh_pe *pe) 371 { 372 struct eeh_pe *phb_pe; 373 unsigned long flags; 374 int ret; 375 376 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 377 return -EPERM; 378 379 /* Find the PHB PE */ 380 phb_pe = eeh_phb_pe_get(pe->phb); 381 if (!phb_pe) { 382 pr_warn("%s Can't find PE for PHB#%x\n", 383 __func__, pe->phb->global_number); 384 return -EEXIST; 385 } 386 387 /* If the PHB has been in problematic state */ 388 eeh_serialize_lock(&flags); 389 if (phb_pe->state & EEH_PE_ISOLATED) { 390 ret = 0; 391 goto out; 392 } 393 394 /* Check PHB state */ 395 ret = eeh_ops->get_state(phb_pe, NULL); 396 if ((ret < 0) || 397 (ret == EEH_STATE_NOT_SUPPORT) || 398 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 399 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 400 ret = 0; 401 goto out; 402 } 403 404 /* Isolate the PHB and send event */ 405 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 406 eeh_serialize_unlock(flags); 407 408 pr_err("EEH: PHB#%x failure detected, location: %s\n", 409 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 410 dump_stack(); 411 eeh_send_failure_event(phb_pe); 412 413 return 1; 414 out: 415 eeh_serialize_unlock(flags); 416 return ret; 417 } 418 419 /** 420 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 421 * @edev: eeh device 422 * 423 * Check for an EEH failure for the given device node. Call this 424 * routine if the result of a read was all 0xff's and you want to 425 * find out if this is due to an EEH slot freeze. This routine 426 * will query firmware for the EEH status. 427 * 428 * Returns 0 if there has not been an EEH error; otherwise returns 429 * a non-zero value and queues up a slot isolation event notification. 430 * 431 * It is safe to call this routine in an interrupt context. 432 */ 433 int eeh_dev_check_failure(struct eeh_dev *edev) 434 { 435 int ret; 436 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 437 unsigned long flags; 438 struct pci_dn *pdn; 439 struct pci_dev *dev; 440 struct eeh_pe *pe, *parent_pe, *phb_pe; 441 int rc = 0; 442 const char *location = NULL; 443 444 eeh_stats.total_mmio_ffs++; 445 446 if (!eeh_enabled()) 447 return 0; 448 449 if (!edev) { 450 eeh_stats.no_dn++; 451 return 0; 452 } 453 dev = eeh_dev_to_pci_dev(edev); 454 pe = eeh_dev_to_pe(edev); 455 456 /* Access to IO BARs might get this far and still not want checking. */ 457 if (!pe) { 458 eeh_stats.ignored_check++; 459 pr_debug("EEH: Ignored check for %s\n", 460 eeh_pci_name(dev)); 461 return 0; 462 } 463 464 if (!pe->addr && !pe->config_addr) { 465 eeh_stats.no_cfg_addr++; 466 return 0; 467 } 468 469 /* 470 * On PowerNV platform, we might already have fenced PHB 471 * there and we need take care of that firstly. 472 */ 473 ret = eeh_phb_check_failure(pe); 474 if (ret > 0) 475 return ret; 476 477 /* 478 * If the PE isn't owned by us, we shouldn't check the 479 * state. Instead, let the owner handle it if the PE has 480 * been frozen. 481 */ 482 if (eeh_pe_passed(pe)) 483 return 0; 484 485 /* If we already have a pending isolation event for this 486 * slot, we know it's bad already, we don't need to check. 487 * Do this checking under a lock; as multiple PCI devices 488 * in one slot might report errors simultaneously, and we 489 * only want one error recovery routine running. 490 */ 491 eeh_serialize_lock(&flags); 492 rc = 1; 493 if (pe->state & EEH_PE_ISOLATED) { 494 pe->check_count++; 495 if (pe->check_count % EEH_MAX_FAILS == 0) { 496 pdn = eeh_dev_to_pdn(edev); 497 if (pdn->node) 498 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 499 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 500 "location=%s driver=%s pci addr=%s\n", 501 pe->check_count, 502 location ? location : "unknown", 503 eeh_driver_name(dev), eeh_pci_name(dev)); 504 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 505 eeh_driver_name(dev)); 506 dump_stack(); 507 } 508 goto dn_unlock; 509 } 510 511 /* 512 * Now test for an EEH failure. This is VERY expensive. 513 * Note that the eeh_config_addr may be a parent device 514 * in the case of a device behind a bridge, or it may be 515 * function zero of a multi-function device. 516 * In any case they must share a common PHB. 517 */ 518 ret = eeh_ops->get_state(pe, NULL); 519 520 /* Note that config-io to empty slots may fail; 521 * they are empty when they don't have children. 522 * We will punt with the following conditions: Failure to get 523 * PE's state, EEH not support and Permanently unavailable 524 * state, PE is in good state. 525 */ 526 if ((ret < 0) || 527 (ret == EEH_STATE_NOT_SUPPORT) || 528 ((ret & active_flags) == active_flags)) { 529 eeh_stats.false_positives++; 530 pe->false_positives++; 531 rc = 0; 532 goto dn_unlock; 533 } 534 535 /* 536 * It should be corner case that the parent PE has been 537 * put into frozen state as well. We should take care 538 * that at first. 539 */ 540 parent_pe = pe->parent; 541 while (parent_pe) { 542 /* Hit the ceiling ? */ 543 if (parent_pe->type & EEH_PE_PHB) 544 break; 545 546 /* Frozen parent PE ? */ 547 ret = eeh_ops->get_state(parent_pe, NULL); 548 if (ret > 0 && 549 (ret & active_flags) != active_flags) 550 pe = parent_pe; 551 552 /* Next parent level */ 553 parent_pe = parent_pe->parent; 554 } 555 556 eeh_stats.slot_resets++; 557 558 /* Avoid repeated reports of this failure, including problems 559 * with other functions on this device, and functions under 560 * bridges. 561 */ 562 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 563 eeh_serialize_unlock(flags); 564 565 /* Most EEH events are due to device driver bugs. Having 566 * a stack trace will help the device-driver authors figure 567 * out what happened. So print that out. 568 */ 569 phb_pe = eeh_phb_pe_get(pe->phb); 570 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 571 pe->phb->global_number, pe->addr); 572 pr_err("EEH: PE location: %s, PHB location: %s\n", 573 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 574 dump_stack(); 575 576 eeh_send_failure_event(pe); 577 578 return 1; 579 580 dn_unlock: 581 eeh_serialize_unlock(flags); 582 return rc; 583 } 584 585 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 586 587 /** 588 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 589 * @token: I/O address 590 * 591 * Check for an EEH failure at the given I/O address. Call this 592 * routine if the result of a read was all 0xff's and you want to 593 * find out if this is due to an EEH slot freeze event. This routine 594 * will query firmware for the EEH status. 595 * 596 * Note this routine is safe to call in an interrupt context. 597 */ 598 int eeh_check_failure(const volatile void __iomem *token) 599 { 600 unsigned long addr; 601 struct eeh_dev *edev; 602 603 /* Finding the phys addr + pci device; this is pretty quick. */ 604 addr = eeh_token_to_phys((unsigned long __force) token); 605 edev = eeh_addr_cache_get_dev(addr); 606 if (!edev) { 607 eeh_stats.no_device++; 608 return 0; 609 } 610 611 return eeh_dev_check_failure(edev); 612 } 613 EXPORT_SYMBOL(eeh_check_failure); 614 615 616 /** 617 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 618 * @pe: EEH PE 619 * 620 * This routine should be called to reenable frozen MMIO or DMA 621 * so that it would work correctly again. It's useful while doing 622 * recovery or log collection on the indicated device. 623 */ 624 int eeh_pci_enable(struct eeh_pe *pe, int function) 625 { 626 int active_flag, rc; 627 628 /* 629 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 630 * Also, it's pointless to enable them on unfrozen PE. So 631 * we have to check before enabling IO or DMA. 632 */ 633 switch (function) { 634 case EEH_OPT_THAW_MMIO: 635 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 636 break; 637 case EEH_OPT_THAW_DMA: 638 active_flag = EEH_STATE_DMA_ACTIVE; 639 break; 640 case EEH_OPT_DISABLE: 641 case EEH_OPT_ENABLE: 642 case EEH_OPT_FREEZE_PE: 643 active_flag = 0; 644 break; 645 default: 646 pr_warn("%s: Invalid function %d\n", 647 __func__, function); 648 return -EINVAL; 649 } 650 651 /* 652 * Check if IO or DMA has been enabled before 653 * enabling them. 654 */ 655 if (active_flag) { 656 rc = eeh_ops->get_state(pe, NULL); 657 if (rc < 0) 658 return rc; 659 660 /* Needn't enable it at all */ 661 if (rc == EEH_STATE_NOT_SUPPORT) 662 return 0; 663 664 /* It's already enabled */ 665 if (rc & active_flag) 666 return 0; 667 } 668 669 670 /* Issue the request */ 671 rc = eeh_ops->set_option(pe, function); 672 if (rc) 673 pr_warn("%s: Unexpected state change %d on " 674 "PHB#%x-PE#%x, err=%d\n", 675 __func__, function, pe->phb->global_number, 676 pe->addr, rc); 677 678 /* Check if the request is finished successfully */ 679 if (active_flag) { 680 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 681 if (rc < 0) 682 return rc; 683 684 if (rc & active_flag) 685 return 0; 686 687 return -EIO; 688 } 689 690 return rc; 691 } 692 693 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 694 { 695 struct eeh_dev *edev = data; 696 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 697 struct pci_dev *dev = userdata; 698 699 /* 700 * The caller should have disabled and saved the 701 * state for the specified device 702 */ 703 if (!pdev || pdev == dev) 704 return NULL; 705 706 /* Ensure we have D0 power state */ 707 pci_set_power_state(pdev, PCI_D0); 708 709 /* Save device state */ 710 pci_save_state(pdev); 711 712 /* 713 * Disable device to avoid any DMA traffic and 714 * interrupt from the device 715 */ 716 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 717 718 return NULL; 719 } 720 721 static void *eeh_restore_dev_state(void *data, void *userdata) 722 { 723 struct eeh_dev *edev = data; 724 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 725 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 726 struct pci_dev *dev = userdata; 727 728 if (!pdev) 729 return NULL; 730 731 /* Apply customization from firmware */ 732 if (pdn && eeh_ops->restore_config) 733 eeh_ops->restore_config(pdn); 734 735 /* The caller should restore state for the specified device */ 736 if (pdev != dev) 737 pci_restore_state(pdev); 738 739 return NULL; 740 } 741 742 /** 743 * pcibios_set_pcie_reset_state - Set PCI-E reset state 744 * @dev: pci device struct 745 * @state: reset state to enter 746 * 747 * Return value: 748 * 0 if success 749 */ 750 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 751 { 752 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 753 struct eeh_pe *pe = eeh_dev_to_pe(edev); 754 755 if (!pe) { 756 pr_err("%s: No PE found on PCI device %s\n", 757 __func__, pci_name(dev)); 758 return -EINVAL; 759 } 760 761 switch (state) { 762 case pcie_deassert_reset: 763 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 764 eeh_unfreeze_pe(pe, false); 765 if (!(pe->type & EEH_PE_VF)) 766 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 767 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 768 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 769 break; 770 case pcie_hot_reset: 771 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 772 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 773 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 774 if (!(pe->type & EEH_PE_VF)) 775 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 776 eeh_ops->reset(pe, EEH_RESET_HOT); 777 break; 778 case pcie_warm_reset: 779 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 780 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 781 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 782 if (!(pe->type & EEH_PE_VF)) 783 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 784 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 785 break; 786 default: 787 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 788 return -EINVAL; 789 }; 790 791 return 0; 792 } 793 794 /** 795 * eeh_set_pe_freset - Check the required reset for the indicated device 796 * @data: EEH device 797 * @flag: return value 798 * 799 * Each device might have its preferred reset type: fundamental or 800 * hot reset. The routine is used to collected the information for 801 * the indicated device and its children so that the bunch of the 802 * devices could be reset properly. 803 */ 804 static void *eeh_set_dev_freset(void *data, void *flag) 805 { 806 struct pci_dev *dev; 807 unsigned int *freset = (unsigned int *)flag; 808 struct eeh_dev *edev = (struct eeh_dev *)data; 809 810 dev = eeh_dev_to_pci_dev(edev); 811 if (dev) 812 *freset |= dev->needs_freset; 813 814 return NULL; 815 } 816 817 /** 818 * eeh_pe_reset_full - Complete a full reset process on the indicated PE 819 * @pe: EEH PE 820 * 821 * This function executes a full reset procedure on a PE, including setting 822 * the appropriate flags, performing a fundamental or hot reset, and then 823 * deactivating the reset status. It is designed to be used within the EEH 824 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and 825 * only performs a single operation at a time. 826 * 827 * This function will attempt to reset a PE three times before failing. 828 */ 829 int eeh_pe_reset_full(struct eeh_pe *pe) 830 { 831 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 832 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 833 int type = EEH_RESET_HOT; 834 unsigned int freset = 0; 835 int i, state, ret; 836 837 /* 838 * Determine the type of reset to perform - hot or fundamental. 839 * Hot reset is the default operation, unless any device under the 840 * PE requires a fundamental reset. 841 */ 842 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 843 844 if (freset) 845 type = EEH_RESET_FUNDAMENTAL; 846 847 /* Mark the PE as in reset state and block config space accesses */ 848 eeh_pe_state_mark(pe, reset_state); 849 850 /* Make three attempts at resetting the bus */ 851 for (i = 0; i < 3; i++) { 852 ret = eeh_pe_reset(pe, type); 853 if (ret) 854 break; 855 856 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); 857 if (ret) 858 break; 859 860 /* Wait until the PE is in a functioning state */ 861 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 862 if ((state & active_flags) == active_flags) 863 break; 864 865 if (state < 0) { 866 pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", 867 __func__, pe->phb->global_number, pe->addr); 868 ret = -ENOTRECOVERABLE; 869 break; 870 } 871 872 /* Set error in case this is our last attempt */ 873 ret = -EIO; 874 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 875 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 876 } 877 878 eeh_pe_state_clear(pe, reset_state); 879 return ret; 880 } 881 882 /** 883 * eeh_save_bars - Save device bars 884 * @edev: PCI device associated EEH device 885 * 886 * Save the values of the device bars. Unlike the restore 887 * routine, this routine is *not* recursive. This is because 888 * PCI devices are added individually; but, for the restore, 889 * an entire slot is reset at a time. 890 */ 891 void eeh_save_bars(struct eeh_dev *edev) 892 { 893 struct pci_dn *pdn; 894 int i; 895 896 pdn = eeh_dev_to_pdn(edev); 897 if (!pdn) 898 return; 899 900 for (i = 0; i < 16; i++) 901 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 902 903 /* 904 * For PCI bridges including root port, we need enable bus 905 * master explicitly. Otherwise, it can't fetch IODA table 906 * entries correctly. So we cache the bit in advance so that 907 * we can restore it after reset, either PHB range or PE range. 908 */ 909 if (edev->mode & EEH_DEV_BRIDGE) 910 edev->config_space[1] |= PCI_COMMAND_MASTER; 911 } 912 913 /** 914 * eeh_ops_register - Register platform dependent EEH operations 915 * @ops: platform dependent EEH operations 916 * 917 * Register the platform dependent EEH operation callback 918 * functions. The platform should call this function before 919 * any other EEH operations. 920 */ 921 int __init eeh_ops_register(struct eeh_ops *ops) 922 { 923 if (!ops->name) { 924 pr_warn("%s: Invalid EEH ops name for %p\n", 925 __func__, ops); 926 return -EINVAL; 927 } 928 929 if (eeh_ops && eeh_ops != ops) { 930 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 931 __func__, eeh_ops->name, ops->name); 932 return -EEXIST; 933 } 934 935 eeh_ops = ops; 936 937 return 0; 938 } 939 940 /** 941 * eeh_ops_unregister - Unreigster platform dependent EEH operations 942 * @name: name of EEH platform operations 943 * 944 * Unregister the platform dependent EEH operation callback 945 * functions. 946 */ 947 int __exit eeh_ops_unregister(const char *name) 948 { 949 if (!name || !strlen(name)) { 950 pr_warn("%s: Invalid EEH ops name\n", 951 __func__); 952 return -EINVAL; 953 } 954 955 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 956 eeh_ops = NULL; 957 return 0; 958 } 959 960 return -EEXIST; 961 } 962 963 static int eeh_reboot_notifier(struct notifier_block *nb, 964 unsigned long action, void *unused) 965 { 966 eeh_clear_flag(EEH_ENABLED); 967 return NOTIFY_DONE; 968 } 969 970 static struct notifier_block eeh_reboot_nb = { 971 .notifier_call = eeh_reboot_notifier, 972 }; 973 974 /** 975 * eeh_init - EEH initialization 976 * 977 * Initialize EEH by trying to enable it for all of the adapters in the system. 978 * As a side effect we can determine here if eeh is supported at all. 979 * Note that we leave EEH on so failed config cycles won't cause a machine 980 * check. If a user turns off EEH for a particular adapter they are really 981 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 982 * grant access to a slot if EEH isn't enabled, and so we always enable 983 * EEH for all slots/all devices. 984 * 985 * The eeh-force-off option disables EEH checking globally, for all slots. 986 * Even if force-off is set, the EEH hardware is still enabled, so that 987 * newer systems can boot. 988 */ 989 int eeh_init(void) 990 { 991 struct pci_controller *hose, *tmp; 992 struct pci_dn *pdn; 993 static int cnt = 0; 994 int ret = 0; 995 996 /* 997 * We have to delay the initialization on PowerNV after 998 * the PCI hierarchy tree has been built because the PEs 999 * are figured out based on PCI devices instead of device 1000 * tree nodes 1001 */ 1002 if (machine_is(powernv) && cnt++ <= 0) 1003 return ret; 1004 1005 /* Register reboot notifier */ 1006 ret = register_reboot_notifier(&eeh_reboot_nb); 1007 if (ret) { 1008 pr_warn("%s: Failed to register notifier (%d)\n", 1009 __func__, ret); 1010 return ret; 1011 } 1012 1013 /* call platform initialization function */ 1014 if (!eeh_ops) { 1015 pr_warn("%s: Platform EEH operation not found\n", 1016 __func__); 1017 return -EEXIST; 1018 } else if ((ret = eeh_ops->init())) 1019 return ret; 1020 1021 /* Initialize EEH event */ 1022 ret = eeh_event_init(); 1023 if (ret) 1024 return ret; 1025 1026 /* Enable EEH for all adapters */ 1027 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1028 pdn = hose->pci_data; 1029 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1030 } 1031 1032 /* 1033 * Call platform post-initialization. Actually, It's good chance 1034 * to inform platform that EEH is ready to supply service if the 1035 * I/O cache stuff has been built up. 1036 */ 1037 if (eeh_ops->post_init) { 1038 ret = eeh_ops->post_init(); 1039 if (ret) 1040 return ret; 1041 } 1042 1043 if (eeh_enabled()) 1044 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1045 else 1046 pr_info("EEH: No capable adapters found\n"); 1047 1048 return ret; 1049 } 1050 1051 core_initcall_sync(eeh_init); 1052 1053 /** 1054 * eeh_add_device_early - Enable EEH for the indicated device node 1055 * @pdn: PCI device node for which to set up EEH 1056 * 1057 * This routine must be used to perform EEH initialization for PCI 1058 * devices that were added after system boot (e.g. hotplug, dlpar). 1059 * This routine must be called before any i/o is performed to the 1060 * adapter (inluding any config-space i/o). 1061 * Whether this actually enables EEH or not for this device depends 1062 * on the CEC architecture, type of the device, on earlier boot 1063 * command-line arguments & etc. 1064 */ 1065 void eeh_add_device_early(struct pci_dn *pdn) 1066 { 1067 struct pci_controller *phb; 1068 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1069 1070 if (!edev) 1071 return; 1072 1073 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1074 return; 1075 1076 /* USB Bus children of PCI devices will not have BUID's */ 1077 phb = edev->phb; 1078 if (NULL == phb || 1079 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1080 return; 1081 1082 eeh_ops->probe(pdn, NULL); 1083 } 1084 1085 /** 1086 * eeh_add_device_tree_early - Enable EEH for the indicated device 1087 * @pdn: PCI device node 1088 * 1089 * This routine must be used to perform EEH initialization for the 1090 * indicated PCI device that was added after system boot (e.g. 1091 * hotplug, dlpar). 1092 */ 1093 void eeh_add_device_tree_early(struct pci_dn *pdn) 1094 { 1095 struct pci_dn *n; 1096 1097 if (!pdn) 1098 return; 1099 1100 list_for_each_entry(n, &pdn->child_list, list) 1101 eeh_add_device_tree_early(n); 1102 eeh_add_device_early(pdn); 1103 } 1104 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1105 1106 /** 1107 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1108 * @dev: pci device for which to set up EEH 1109 * 1110 * This routine must be used to complete EEH initialization for PCI 1111 * devices that were added after system boot (e.g. hotplug, dlpar). 1112 */ 1113 void eeh_add_device_late(struct pci_dev *dev) 1114 { 1115 struct pci_dn *pdn; 1116 struct eeh_dev *edev; 1117 1118 if (!dev || !eeh_enabled()) 1119 return; 1120 1121 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1122 1123 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1124 edev = pdn_to_eeh_dev(pdn); 1125 if (edev->pdev == dev) { 1126 pr_debug("EEH: Already referenced !\n"); 1127 return; 1128 } 1129 1130 /* 1131 * The EEH cache might not be removed correctly because of 1132 * unbalanced kref to the device during unplug time, which 1133 * relies on pcibios_release_device(). So we have to remove 1134 * that here explicitly. 1135 */ 1136 if (edev->pdev) { 1137 eeh_rmv_from_parent_pe(edev); 1138 eeh_addr_cache_rmv_dev(edev->pdev); 1139 eeh_sysfs_remove_device(edev->pdev); 1140 edev->mode &= ~EEH_DEV_SYSFS; 1141 1142 /* 1143 * We definitely should have the PCI device removed 1144 * though it wasn't correctly. So we needn't call 1145 * into error handler afterwards. 1146 */ 1147 edev->mode |= EEH_DEV_NO_HANDLER; 1148 1149 edev->pdev = NULL; 1150 dev->dev.archdata.edev = NULL; 1151 } 1152 1153 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1154 eeh_ops->probe(pdn, NULL); 1155 1156 edev->pdev = dev; 1157 dev->dev.archdata.edev = edev; 1158 1159 eeh_addr_cache_insert_dev(dev); 1160 } 1161 1162 /** 1163 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1164 * @bus: PCI bus 1165 * 1166 * This routine must be used to perform EEH initialization for PCI 1167 * devices which are attached to the indicated PCI bus. The PCI bus 1168 * is added after system boot through hotplug or dlpar. 1169 */ 1170 void eeh_add_device_tree_late(struct pci_bus *bus) 1171 { 1172 struct pci_dev *dev; 1173 1174 list_for_each_entry(dev, &bus->devices, bus_list) { 1175 eeh_add_device_late(dev); 1176 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1177 struct pci_bus *subbus = dev->subordinate; 1178 if (subbus) 1179 eeh_add_device_tree_late(subbus); 1180 } 1181 } 1182 } 1183 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1184 1185 /** 1186 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1187 * @bus: PCI bus 1188 * 1189 * This routine must be used to add EEH sysfs files for PCI 1190 * devices which are attached to the indicated PCI bus. The PCI bus 1191 * is added after system boot through hotplug or dlpar. 1192 */ 1193 void eeh_add_sysfs_files(struct pci_bus *bus) 1194 { 1195 struct pci_dev *dev; 1196 1197 list_for_each_entry(dev, &bus->devices, bus_list) { 1198 eeh_sysfs_add_device(dev); 1199 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1200 struct pci_bus *subbus = dev->subordinate; 1201 if (subbus) 1202 eeh_add_sysfs_files(subbus); 1203 } 1204 } 1205 } 1206 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1207 1208 /** 1209 * eeh_remove_device - Undo EEH setup for the indicated pci device 1210 * @dev: pci device to be removed 1211 * 1212 * This routine should be called when a device is removed from 1213 * a running system (e.g. by hotplug or dlpar). It unregisters 1214 * the PCI device from the EEH subsystem. I/O errors affecting 1215 * this device will no longer be detected after this call; thus, 1216 * i/o errors affecting this slot may leave this device unusable. 1217 */ 1218 void eeh_remove_device(struct pci_dev *dev) 1219 { 1220 struct eeh_dev *edev; 1221 1222 if (!dev || !eeh_enabled()) 1223 return; 1224 edev = pci_dev_to_eeh_dev(dev); 1225 1226 /* Unregister the device with the EEH/PCI address search system */ 1227 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1228 1229 if (!edev || !edev->pdev || !edev->pe) { 1230 pr_debug("EEH: Not referenced !\n"); 1231 return; 1232 } 1233 1234 /* 1235 * During the hotplug for EEH error recovery, we need the EEH 1236 * device attached to the parent PE in order for BAR restore 1237 * a bit later. So we keep it for BAR restore and remove it 1238 * from the parent PE during the BAR resotre. 1239 */ 1240 edev->pdev = NULL; 1241 1242 /* 1243 * The flag "in_error" is used to trace EEH devices for VFs 1244 * in error state or not. It's set in eeh_report_error(). If 1245 * it's not set, eeh_report_{reset,resume}() won't be called 1246 * for the VF EEH device. 1247 */ 1248 edev->in_error = false; 1249 dev->dev.archdata.edev = NULL; 1250 if (!(edev->pe->state & EEH_PE_KEEP)) 1251 eeh_rmv_from_parent_pe(edev); 1252 else 1253 edev->mode |= EEH_DEV_DISCONNECTED; 1254 1255 /* 1256 * We're removing from the PCI subsystem, that means 1257 * the PCI device driver can't support EEH or not 1258 * well. So we rely on hotplug completely to do recovery 1259 * for the specific PCI device. 1260 */ 1261 edev->mode |= EEH_DEV_NO_HANDLER; 1262 1263 eeh_addr_cache_rmv_dev(dev); 1264 eeh_sysfs_remove_device(dev); 1265 edev->mode &= ~EEH_DEV_SYSFS; 1266 } 1267 1268 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1269 { 1270 int ret; 1271 1272 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1273 if (ret) { 1274 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1275 __func__, ret, pe->phb->global_number, pe->addr); 1276 return ret; 1277 } 1278 1279 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1280 if (ret) { 1281 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1282 __func__, ret, pe->phb->global_number, pe->addr); 1283 return ret; 1284 } 1285 1286 /* Clear software isolated state */ 1287 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1288 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1289 1290 return ret; 1291 } 1292 1293 1294 static struct pci_device_id eeh_reset_ids[] = { 1295 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1296 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1297 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1298 { 0 } 1299 }; 1300 1301 static int eeh_pe_change_owner(struct eeh_pe *pe) 1302 { 1303 struct eeh_dev *edev, *tmp; 1304 struct pci_dev *pdev; 1305 struct pci_device_id *id; 1306 int flags, ret; 1307 1308 /* Check PE state */ 1309 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1310 ret = eeh_ops->get_state(pe, NULL); 1311 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1312 return 0; 1313 1314 /* Unfrozen PE, nothing to do */ 1315 if ((ret & flags) == flags) 1316 return 0; 1317 1318 /* Frozen PE, check if it needs PE level reset */ 1319 eeh_pe_for_each_dev(pe, edev, tmp) { 1320 pdev = eeh_dev_to_pci_dev(edev); 1321 if (!pdev) 1322 continue; 1323 1324 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1325 if (id->vendor != PCI_ANY_ID && 1326 id->vendor != pdev->vendor) 1327 continue; 1328 if (id->device != PCI_ANY_ID && 1329 id->device != pdev->device) 1330 continue; 1331 if (id->subvendor != PCI_ANY_ID && 1332 id->subvendor != pdev->subsystem_vendor) 1333 continue; 1334 if (id->subdevice != PCI_ANY_ID && 1335 id->subdevice != pdev->subsystem_device) 1336 continue; 1337 1338 return eeh_pe_reset_and_recover(pe); 1339 } 1340 } 1341 1342 return eeh_unfreeze_pe(pe, true); 1343 } 1344 1345 /** 1346 * eeh_dev_open - Increase count of pass through devices for PE 1347 * @pdev: PCI device 1348 * 1349 * Increase count of passed through devices for the indicated 1350 * PE. In the result, the EEH errors detected on the PE won't be 1351 * reported. The PE owner will be responsible for detection 1352 * and recovery. 1353 */ 1354 int eeh_dev_open(struct pci_dev *pdev) 1355 { 1356 struct eeh_dev *edev; 1357 int ret = -ENODEV; 1358 1359 mutex_lock(&eeh_dev_mutex); 1360 1361 /* No PCI device ? */ 1362 if (!pdev) 1363 goto out; 1364 1365 /* No EEH device or PE ? */ 1366 edev = pci_dev_to_eeh_dev(pdev); 1367 if (!edev || !edev->pe) 1368 goto out; 1369 1370 /* 1371 * The PE might have been put into frozen state, but we 1372 * didn't detect that yet. The passed through PCI devices 1373 * in frozen PE won't work properly. Clear the frozen state 1374 * in advance. 1375 */ 1376 ret = eeh_pe_change_owner(edev->pe); 1377 if (ret) 1378 goto out; 1379 1380 /* Increase PE's pass through count */ 1381 atomic_inc(&edev->pe->pass_dev_cnt); 1382 mutex_unlock(&eeh_dev_mutex); 1383 1384 return 0; 1385 out: 1386 mutex_unlock(&eeh_dev_mutex); 1387 return ret; 1388 } 1389 EXPORT_SYMBOL_GPL(eeh_dev_open); 1390 1391 /** 1392 * eeh_dev_release - Decrease count of pass through devices for PE 1393 * @pdev: PCI device 1394 * 1395 * Decrease count of pass through devices for the indicated PE. If 1396 * there is no passed through device in PE, the EEH errors detected 1397 * on the PE will be reported and handled as usual. 1398 */ 1399 void eeh_dev_release(struct pci_dev *pdev) 1400 { 1401 struct eeh_dev *edev; 1402 1403 mutex_lock(&eeh_dev_mutex); 1404 1405 /* No PCI device ? */ 1406 if (!pdev) 1407 goto out; 1408 1409 /* No EEH device ? */ 1410 edev = pci_dev_to_eeh_dev(pdev); 1411 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1412 goto out; 1413 1414 /* Decrease PE's pass through count */ 1415 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1416 eeh_pe_change_owner(edev->pe); 1417 out: 1418 mutex_unlock(&eeh_dev_mutex); 1419 } 1420 EXPORT_SYMBOL(eeh_dev_release); 1421 1422 #ifdef CONFIG_IOMMU_API 1423 1424 static int dev_has_iommu_table(struct device *dev, void *data) 1425 { 1426 struct pci_dev *pdev = to_pci_dev(dev); 1427 struct pci_dev **ppdev = data; 1428 1429 if (!dev) 1430 return 0; 1431 1432 if (dev->iommu_group) { 1433 *ppdev = pdev; 1434 return 1; 1435 } 1436 1437 return 0; 1438 } 1439 1440 /** 1441 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1442 * @group: IOMMU group 1443 * 1444 * The routine is called to convert IOMMU group to EEH PE. 1445 */ 1446 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1447 { 1448 struct pci_dev *pdev = NULL; 1449 struct eeh_dev *edev; 1450 int ret; 1451 1452 /* No IOMMU group ? */ 1453 if (!group) 1454 return NULL; 1455 1456 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1457 if (!ret || !pdev) 1458 return NULL; 1459 1460 /* No EEH device or PE ? */ 1461 edev = pci_dev_to_eeh_dev(pdev); 1462 if (!edev || !edev->pe) 1463 return NULL; 1464 1465 return edev->pe; 1466 } 1467 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1468 1469 #endif /* CONFIG_IOMMU_API */ 1470 1471 /** 1472 * eeh_pe_set_option - Set options for the indicated PE 1473 * @pe: EEH PE 1474 * @option: requested option 1475 * 1476 * The routine is called to enable or disable EEH functionality 1477 * on the indicated PE, to enable IO or DMA for the frozen PE. 1478 */ 1479 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1480 { 1481 int ret = 0; 1482 1483 /* Invalid PE ? */ 1484 if (!pe) 1485 return -ENODEV; 1486 1487 /* 1488 * EEH functionality could possibly be disabled, just 1489 * return error for the case. And the EEH functinality 1490 * isn't expected to be disabled on one specific PE. 1491 */ 1492 switch (option) { 1493 case EEH_OPT_ENABLE: 1494 if (eeh_enabled()) { 1495 ret = eeh_pe_change_owner(pe); 1496 break; 1497 } 1498 ret = -EIO; 1499 break; 1500 case EEH_OPT_DISABLE: 1501 break; 1502 case EEH_OPT_THAW_MMIO: 1503 case EEH_OPT_THAW_DMA: 1504 case EEH_OPT_FREEZE_PE: 1505 if (!eeh_ops || !eeh_ops->set_option) { 1506 ret = -ENOENT; 1507 break; 1508 } 1509 1510 ret = eeh_pci_enable(pe, option); 1511 break; 1512 default: 1513 pr_debug("%s: Option %d out of range (%d, %d)\n", 1514 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1515 ret = -EINVAL; 1516 } 1517 1518 return ret; 1519 } 1520 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1521 1522 /** 1523 * eeh_pe_get_state - Retrieve PE's state 1524 * @pe: EEH PE 1525 * 1526 * Retrieve the PE's state, which includes 3 aspects: enabled 1527 * DMA, enabled IO and asserted reset. 1528 */ 1529 int eeh_pe_get_state(struct eeh_pe *pe) 1530 { 1531 int result, ret = 0; 1532 bool rst_active, dma_en, mmio_en; 1533 1534 /* Existing PE ? */ 1535 if (!pe) 1536 return -ENODEV; 1537 1538 if (!eeh_ops || !eeh_ops->get_state) 1539 return -ENOENT; 1540 1541 /* 1542 * If the parent PE is owned by the host kernel and is undergoing 1543 * error recovery, we should return the PE state as temporarily 1544 * unavailable so that the error recovery on the guest is suspended 1545 * until the recovery completes on the host. 1546 */ 1547 if (pe->parent && 1548 !(pe->state & EEH_PE_REMOVED) && 1549 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1550 return EEH_PE_STATE_UNAVAIL; 1551 1552 result = eeh_ops->get_state(pe, NULL); 1553 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1554 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1555 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1556 1557 if (rst_active) 1558 ret = EEH_PE_STATE_RESET; 1559 else if (dma_en && mmio_en) 1560 ret = EEH_PE_STATE_NORMAL; 1561 else if (!dma_en && !mmio_en) 1562 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1563 else if (!dma_en && mmio_en) 1564 ret = EEH_PE_STATE_STOPPED_DMA; 1565 else 1566 ret = EEH_PE_STATE_UNAVAIL; 1567 1568 return ret; 1569 } 1570 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1571 1572 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1573 { 1574 struct eeh_dev *edev, *tmp; 1575 struct pci_dev *pdev; 1576 int ret = 0; 1577 1578 /* Restore config space */ 1579 eeh_pe_restore_bars(pe); 1580 1581 /* 1582 * Reenable PCI devices as the devices passed 1583 * through are always enabled before the reset. 1584 */ 1585 eeh_pe_for_each_dev(pe, edev, tmp) { 1586 pdev = eeh_dev_to_pci_dev(edev); 1587 if (!pdev) 1588 continue; 1589 1590 ret = pci_reenable_device(pdev); 1591 if (ret) { 1592 pr_warn("%s: Failure %d reenabling %s\n", 1593 __func__, ret, pci_name(pdev)); 1594 return ret; 1595 } 1596 } 1597 1598 /* The PE is still in frozen state */ 1599 return eeh_unfreeze_pe(pe, true); 1600 } 1601 1602 1603 /** 1604 * eeh_pe_reset - Issue PE reset according to specified type 1605 * @pe: EEH PE 1606 * @option: reset type 1607 * 1608 * The routine is called to reset the specified PE with the 1609 * indicated type, either fundamental reset or hot reset. 1610 * PE reset is the most important part for error recovery. 1611 */ 1612 int eeh_pe_reset(struct eeh_pe *pe, int option) 1613 { 1614 int ret = 0; 1615 1616 /* Invalid PE ? */ 1617 if (!pe) 1618 return -ENODEV; 1619 1620 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1621 return -ENOENT; 1622 1623 switch (option) { 1624 case EEH_RESET_DEACTIVATE: 1625 ret = eeh_ops->reset(pe, option); 1626 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1627 if (ret) 1628 break; 1629 1630 ret = eeh_pe_reenable_devices(pe); 1631 break; 1632 case EEH_RESET_HOT: 1633 case EEH_RESET_FUNDAMENTAL: 1634 /* 1635 * Proactively freeze the PE to drop all MMIO access 1636 * during reset, which should be banned as it's always 1637 * cause recursive EEH error. 1638 */ 1639 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1640 1641 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1642 ret = eeh_ops->reset(pe, option); 1643 break; 1644 default: 1645 pr_debug("%s: Unsupported option %d\n", 1646 __func__, option); 1647 ret = -EINVAL; 1648 } 1649 1650 return ret; 1651 } 1652 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1653 1654 /** 1655 * eeh_pe_configure - Configure PCI bridges after PE reset 1656 * @pe: EEH PE 1657 * 1658 * The routine is called to restore the PCI config space for 1659 * those PCI devices, especially PCI bridges affected by PE 1660 * reset issued previously. 1661 */ 1662 int eeh_pe_configure(struct eeh_pe *pe) 1663 { 1664 int ret = 0; 1665 1666 /* Invalid PE ? */ 1667 if (!pe) 1668 return -ENODEV; 1669 1670 return ret; 1671 } 1672 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1673 1674 /** 1675 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1676 * @pe: the indicated PE 1677 * @type: error type 1678 * @function: error function 1679 * @addr: address 1680 * @mask: address mask 1681 * 1682 * The routine is called to inject the specified PCI error, which 1683 * is determined by @type and @function, to the indicated PE for 1684 * testing purpose. 1685 */ 1686 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1687 unsigned long addr, unsigned long mask) 1688 { 1689 /* Invalid PE ? */ 1690 if (!pe) 1691 return -ENODEV; 1692 1693 /* Unsupported operation ? */ 1694 if (!eeh_ops || !eeh_ops->err_inject) 1695 return -ENOENT; 1696 1697 /* Check on PCI error type */ 1698 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1699 return -EINVAL; 1700 1701 /* Check on PCI error function */ 1702 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1703 return -EINVAL; 1704 1705 return eeh_ops->err_inject(pe, type, func, addr, mask); 1706 } 1707 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1708 1709 static int proc_eeh_show(struct seq_file *m, void *v) 1710 { 1711 if (!eeh_enabled()) { 1712 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1713 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1714 } else { 1715 seq_printf(m, "EEH Subsystem is enabled\n"); 1716 seq_printf(m, 1717 "no device=%llu\n" 1718 "no device node=%llu\n" 1719 "no config address=%llu\n" 1720 "check not wanted=%llu\n" 1721 "eeh_total_mmio_ffs=%llu\n" 1722 "eeh_false_positives=%llu\n" 1723 "eeh_slot_resets=%llu\n", 1724 eeh_stats.no_device, 1725 eeh_stats.no_dn, 1726 eeh_stats.no_cfg_addr, 1727 eeh_stats.ignored_check, 1728 eeh_stats.total_mmio_ffs, 1729 eeh_stats.false_positives, 1730 eeh_stats.slot_resets); 1731 } 1732 1733 return 0; 1734 } 1735 1736 static int proc_eeh_open(struct inode *inode, struct file *file) 1737 { 1738 return single_open(file, proc_eeh_show, NULL); 1739 } 1740 1741 static const struct file_operations proc_eeh_operations = { 1742 .open = proc_eeh_open, 1743 .read = seq_read, 1744 .llseek = seq_lseek, 1745 .release = single_release, 1746 }; 1747 1748 #ifdef CONFIG_DEBUG_FS 1749 static int eeh_enable_dbgfs_set(void *data, u64 val) 1750 { 1751 if (val) 1752 eeh_clear_flag(EEH_FORCE_DISABLED); 1753 else 1754 eeh_add_flag(EEH_FORCE_DISABLED); 1755 1756 /* Notify the backend */ 1757 if (eeh_ops->post_init) 1758 eeh_ops->post_init(); 1759 1760 return 0; 1761 } 1762 1763 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1764 { 1765 if (eeh_enabled()) 1766 *val = 0x1ul; 1767 else 1768 *val = 0x0ul; 1769 return 0; 1770 } 1771 1772 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1773 { 1774 eeh_max_freezes = val; 1775 return 0; 1776 } 1777 1778 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1779 { 1780 *val = eeh_max_freezes; 1781 return 0; 1782 } 1783 1784 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1785 eeh_enable_dbgfs_set, "0x%llx\n"); 1786 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1787 eeh_freeze_dbgfs_set, "0x%llx\n"); 1788 #endif 1789 1790 static int __init eeh_init_proc(void) 1791 { 1792 if (machine_is(pseries) || machine_is(powernv)) { 1793 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1794 #ifdef CONFIG_DEBUG_FS 1795 debugfs_create_file("eeh_enable", 0600, 1796 powerpc_debugfs_root, NULL, 1797 &eeh_enable_dbgfs_ops); 1798 debugfs_create_file("eeh_max_freezes", 0600, 1799 powerpc_debugfs_root, NULL, 1800 &eeh_freeze_dbgfs_ops); 1801 #endif 1802 } 1803 1804 return 0; 1805 } 1806 __initcall(eeh_init_proc); 1807