1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* Platform dependent EEH operations */ 108 struct eeh_ops *eeh_ops = NULL; 109 110 /* Lock to avoid races due to multiple reports of an error */ 111 DEFINE_RAW_SPINLOCK(confirm_error_lock); 112 113 /* Lock to protect passed flags */ 114 static DEFINE_MUTEX(eeh_dev_mutex); 115 116 /* Buffer for reporting pci register dumps. Its here in BSS, and 117 * not dynamically alloced, so that it ends up in RMO where RTAS 118 * can access it. 119 */ 120 #define EEH_PCI_REGS_LOG_LEN 8192 121 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 122 123 /* 124 * The struct is used to maintain the EEH global statistic 125 * information. Besides, the EEH global statistics will be 126 * exported to user space through procfs 127 */ 128 struct eeh_stats { 129 u64 no_device; /* PCI device not found */ 130 u64 no_dn; /* OF node not found */ 131 u64 no_cfg_addr; /* Config address not found */ 132 u64 ignored_check; /* EEH check skipped */ 133 u64 total_mmio_ffs; /* Total EEH checks */ 134 u64 false_positives; /* Unnecessary EEH checks */ 135 u64 slot_resets; /* PE reset */ 136 }; 137 138 static struct eeh_stats eeh_stats; 139 140 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 141 142 static int __init eeh_setup(char *str) 143 { 144 if (!strcmp(str, "off")) 145 eeh_add_flag(EEH_FORCE_DISABLED); 146 else if (!strcmp(str, "early_log")) 147 eeh_add_flag(EEH_EARLY_DUMP_LOG); 148 149 return 1; 150 } 151 __setup("eeh=", eeh_setup); 152 153 /* 154 * This routine captures assorted PCI configuration space data 155 * for the indicated PCI device, and puts them into a buffer 156 * for RTAS error logging. 157 */ 158 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 159 { 160 struct device_node *dn = eeh_dev_to_of_node(edev); 161 u32 cfg; 162 int cap, i; 163 int n = 0, l = 0; 164 char buffer[128]; 165 166 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); 167 pr_warn("EEH: of node=%s\n", dn->full_name); 168 169 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); 170 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 171 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 172 173 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); 174 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 175 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 176 177 /* Gather bridge-specific registers */ 178 if (edev->mode & EEH_DEV_BRIDGE) { 179 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); 180 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 181 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 182 183 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); 184 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 185 pr_warn("EEH: Bridge control: %04x\n", cfg); 186 } 187 188 /* Dump out the PCI-X command and status regs */ 189 cap = edev->pcix_cap; 190 if (cap) { 191 eeh_ops->read_config(dn, cap, 4, &cfg); 192 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 193 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 194 195 eeh_ops->read_config(dn, cap+4, 4, &cfg); 196 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 197 pr_warn("EEH: PCI-X status: %08x\n", cfg); 198 } 199 200 /* If PCI-E capable, dump PCI-E cap 10 */ 201 cap = edev->pcie_cap; 202 if (cap) { 203 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 204 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 205 206 for (i=0; i<=8; i++) { 207 eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 208 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 209 210 if ((i % 4) == 0) { 211 if (i != 0) 212 pr_warn("%s\n", buffer); 213 214 l = scnprintf(buffer, sizeof(buffer), 215 "EEH: PCI-E %02x: %08x ", 216 4*i, cfg); 217 } else { 218 l += scnprintf(buffer+l, sizeof(buffer)-l, 219 "%08x ", cfg); 220 } 221 222 } 223 224 pr_warn("%s\n", buffer); 225 } 226 227 /* If AER capable, dump it */ 228 cap = edev->aer_cap; 229 if (cap) { 230 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 231 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 232 233 for (i=0; i<=13; i++) { 234 eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 235 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 236 237 if ((i % 4) == 0) { 238 if (i != 0) 239 pr_warn("%s\n", buffer); 240 241 l = scnprintf(buffer, sizeof(buffer), 242 "EEH: PCI-E AER %02x: %08x ", 243 4*i, cfg); 244 } else { 245 l += scnprintf(buffer+l, sizeof(buffer)-l, 246 "%08x ", cfg); 247 } 248 } 249 250 pr_warn("%s\n", buffer); 251 } 252 253 return n; 254 } 255 256 static void *eeh_dump_pe_log(void *data, void *flag) 257 { 258 struct eeh_pe *pe = data; 259 struct eeh_dev *edev, *tmp; 260 size_t *plen = flag; 261 262 /* If the PE's config space is blocked, 0xFF's will be 263 * returned. It's pointless to collect the log in this 264 * case. 265 */ 266 if (pe->state & EEH_PE_CFG_BLOCKED) 267 return NULL; 268 269 eeh_pe_for_each_dev(pe, edev, tmp) 270 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 271 EEH_PCI_REGS_LOG_LEN - *plen); 272 273 return NULL; 274 } 275 276 /** 277 * eeh_slot_error_detail - Generate combined log including driver log and error log 278 * @pe: EEH PE 279 * @severity: temporary or permanent error log 280 * 281 * This routine should be called to generate the combined log, which 282 * is comprised of driver log and error log. The driver log is figured 283 * out from the config space of the corresponding PCI device, while 284 * the error log is fetched through platform dependent function call. 285 */ 286 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 287 { 288 size_t loglen = 0; 289 290 /* 291 * When the PHB is fenced or dead, it's pointless to collect 292 * the data from PCI config space because it should return 293 * 0xFF's. For ER, we still retrieve the data from the PCI 294 * config space. 295 * 296 * For pHyp, we have to enable IO for log retrieval. Otherwise, 297 * 0xFF's is always returned from PCI config space. 298 */ 299 if (!(pe->type & EEH_PE_PHB)) { 300 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 301 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 302 eeh_ops->configure_bridge(pe); 303 eeh_pe_restore_bars(pe); 304 305 pci_regs_buf[0] = 0; 306 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 307 } 308 309 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 310 } 311 312 /** 313 * eeh_token_to_phys - Convert EEH address token to phys address 314 * @token: I/O token, should be address in the form 0xA.... 315 * 316 * This routine should be called to convert virtual I/O address 317 * to physical one. 318 */ 319 static inline unsigned long eeh_token_to_phys(unsigned long token) 320 { 321 pte_t *ptep; 322 unsigned long pa; 323 int hugepage_shift; 324 325 /* 326 * We won't find hugepages here, iomem 327 */ 328 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 329 if (!ptep) 330 return token; 331 WARN_ON(hugepage_shift); 332 pa = pte_pfn(*ptep) << PAGE_SHIFT; 333 334 return pa | (token & (PAGE_SIZE-1)); 335 } 336 337 /* 338 * On PowerNV platform, we might already have fenced PHB there. 339 * For that case, it's meaningless to recover frozen PE. Intead, 340 * We have to handle fenced PHB firstly. 341 */ 342 static int eeh_phb_check_failure(struct eeh_pe *pe) 343 { 344 struct eeh_pe *phb_pe; 345 unsigned long flags; 346 int ret; 347 348 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 349 return -EPERM; 350 351 /* Find the PHB PE */ 352 phb_pe = eeh_phb_pe_get(pe->phb); 353 if (!phb_pe) { 354 pr_warn("%s Can't find PE for PHB#%d\n", 355 __func__, pe->phb->global_number); 356 return -EEXIST; 357 } 358 359 /* If the PHB has been in problematic state */ 360 eeh_serialize_lock(&flags); 361 if (phb_pe->state & EEH_PE_ISOLATED) { 362 ret = 0; 363 goto out; 364 } 365 366 /* Check PHB state */ 367 ret = eeh_ops->get_state(phb_pe, NULL); 368 if ((ret < 0) || 369 (ret == EEH_STATE_NOT_SUPPORT) || 370 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 371 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 372 ret = 0; 373 goto out; 374 } 375 376 /* Isolate the PHB and send event */ 377 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 378 eeh_serialize_unlock(flags); 379 380 pr_err("EEH: PHB#%x failure detected, location: %s\n", 381 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 382 dump_stack(); 383 eeh_send_failure_event(phb_pe); 384 385 return 1; 386 out: 387 eeh_serialize_unlock(flags); 388 return ret; 389 } 390 391 /** 392 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 393 * @edev: eeh device 394 * 395 * Check for an EEH failure for the given device node. Call this 396 * routine if the result of a read was all 0xff's and you want to 397 * find out if this is due to an EEH slot freeze. This routine 398 * will query firmware for the EEH status. 399 * 400 * Returns 0 if there has not been an EEH error; otherwise returns 401 * a non-zero value and queues up a slot isolation event notification. 402 * 403 * It is safe to call this routine in an interrupt context. 404 */ 405 int eeh_dev_check_failure(struct eeh_dev *edev) 406 { 407 int ret; 408 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 409 unsigned long flags; 410 struct device_node *dn; 411 struct pci_dev *dev; 412 struct eeh_pe *pe, *parent_pe, *phb_pe; 413 int rc = 0; 414 const char *location; 415 416 eeh_stats.total_mmio_ffs++; 417 418 if (!eeh_enabled()) 419 return 0; 420 421 if (!edev) { 422 eeh_stats.no_dn++; 423 return 0; 424 } 425 dn = eeh_dev_to_of_node(edev); 426 dev = eeh_dev_to_pci_dev(edev); 427 pe = eeh_dev_to_pe(edev); 428 429 /* Access to IO BARs might get this far and still not want checking. */ 430 if (!pe) { 431 eeh_stats.ignored_check++; 432 pr_debug("EEH: Ignored check for %s %s\n", 433 eeh_pci_name(dev), dn->full_name); 434 return 0; 435 } 436 437 if (!pe->addr && !pe->config_addr) { 438 eeh_stats.no_cfg_addr++; 439 return 0; 440 } 441 442 /* 443 * On PowerNV platform, we might already have fenced PHB 444 * there and we need take care of that firstly. 445 */ 446 ret = eeh_phb_check_failure(pe); 447 if (ret > 0) 448 return ret; 449 450 /* 451 * If the PE isn't owned by us, we shouldn't check the 452 * state. Instead, let the owner handle it if the PE has 453 * been frozen. 454 */ 455 if (eeh_pe_passed(pe)) 456 return 0; 457 458 /* If we already have a pending isolation event for this 459 * slot, we know it's bad already, we don't need to check. 460 * Do this checking under a lock; as multiple PCI devices 461 * in one slot might report errors simultaneously, and we 462 * only want one error recovery routine running. 463 */ 464 eeh_serialize_lock(&flags); 465 rc = 1; 466 if (pe->state & EEH_PE_ISOLATED) { 467 pe->check_count++; 468 if (pe->check_count % EEH_MAX_FAILS == 0) { 469 location = of_get_property(dn, "ibm,loc-code", NULL); 470 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 471 "location=%s driver=%s pci addr=%s\n", 472 pe->check_count, location, 473 eeh_driver_name(dev), eeh_pci_name(dev)); 474 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 475 eeh_driver_name(dev)); 476 dump_stack(); 477 } 478 goto dn_unlock; 479 } 480 481 /* 482 * Now test for an EEH failure. This is VERY expensive. 483 * Note that the eeh_config_addr may be a parent device 484 * in the case of a device behind a bridge, or it may be 485 * function zero of a multi-function device. 486 * In any case they must share a common PHB. 487 */ 488 ret = eeh_ops->get_state(pe, NULL); 489 490 /* Note that config-io to empty slots may fail; 491 * they are empty when they don't have children. 492 * We will punt with the following conditions: Failure to get 493 * PE's state, EEH not support and Permanently unavailable 494 * state, PE is in good state. 495 */ 496 if ((ret < 0) || 497 (ret == EEH_STATE_NOT_SUPPORT) || 498 ((ret & active_flags) == active_flags)) { 499 eeh_stats.false_positives++; 500 pe->false_positives++; 501 rc = 0; 502 goto dn_unlock; 503 } 504 505 /* 506 * It should be corner case that the parent PE has been 507 * put into frozen state as well. We should take care 508 * that at first. 509 */ 510 parent_pe = pe->parent; 511 while (parent_pe) { 512 /* Hit the ceiling ? */ 513 if (parent_pe->type & EEH_PE_PHB) 514 break; 515 516 /* Frozen parent PE ? */ 517 ret = eeh_ops->get_state(parent_pe, NULL); 518 if (ret > 0 && 519 (ret & active_flags) != active_flags) 520 pe = parent_pe; 521 522 /* Next parent level */ 523 parent_pe = parent_pe->parent; 524 } 525 526 eeh_stats.slot_resets++; 527 528 /* Avoid repeated reports of this failure, including problems 529 * with other functions on this device, and functions under 530 * bridges. 531 */ 532 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 533 eeh_serialize_unlock(flags); 534 535 /* Most EEH events are due to device driver bugs. Having 536 * a stack trace will help the device-driver authors figure 537 * out what happened. So print that out. 538 */ 539 phb_pe = eeh_phb_pe_get(pe->phb); 540 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 541 pe->phb->global_number, pe->addr); 542 pr_err("EEH: PE location: %s, PHB location: %s\n", 543 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 544 dump_stack(); 545 546 eeh_send_failure_event(pe); 547 548 return 1; 549 550 dn_unlock: 551 eeh_serialize_unlock(flags); 552 return rc; 553 } 554 555 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 556 557 /** 558 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 559 * @token: I/O address 560 * 561 * Check for an EEH failure at the given I/O address. Call this 562 * routine if the result of a read was all 0xff's and you want to 563 * find out if this is due to an EEH slot freeze event. This routine 564 * will query firmware for the EEH status. 565 * 566 * Note this routine is safe to call in an interrupt context. 567 */ 568 int eeh_check_failure(const volatile void __iomem *token) 569 { 570 unsigned long addr; 571 struct eeh_dev *edev; 572 573 /* Finding the phys addr + pci device; this is pretty quick. */ 574 addr = eeh_token_to_phys((unsigned long __force) token); 575 edev = eeh_addr_cache_get_dev(addr); 576 if (!edev) { 577 eeh_stats.no_device++; 578 return 0; 579 } 580 581 return eeh_dev_check_failure(edev); 582 } 583 EXPORT_SYMBOL(eeh_check_failure); 584 585 586 /** 587 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 588 * @pe: EEH PE 589 * 590 * This routine should be called to reenable frozen MMIO or DMA 591 * so that it would work correctly again. It's useful while doing 592 * recovery or log collection on the indicated device. 593 */ 594 int eeh_pci_enable(struct eeh_pe *pe, int function) 595 { 596 int active_flag, rc; 597 598 /* 599 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 600 * Also, it's pointless to enable them on unfrozen PE. So 601 * we have to check before enabling IO or DMA. 602 */ 603 switch (function) { 604 case EEH_OPT_THAW_MMIO: 605 active_flag = EEH_STATE_MMIO_ACTIVE; 606 break; 607 case EEH_OPT_THAW_DMA: 608 active_flag = EEH_STATE_DMA_ACTIVE; 609 break; 610 case EEH_OPT_DISABLE: 611 case EEH_OPT_ENABLE: 612 case EEH_OPT_FREEZE_PE: 613 active_flag = 0; 614 break; 615 default: 616 pr_warn("%s: Invalid function %d\n", 617 __func__, function); 618 return -EINVAL; 619 } 620 621 /* 622 * Check if IO or DMA has been enabled before 623 * enabling them. 624 */ 625 if (active_flag) { 626 rc = eeh_ops->get_state(pe, NULL); 627 if (rc < 0) 628 return rc; 629 630 /* Needn't enable it at all */ 631 if (rc == EEH_STATE_NOT_SUPPORT) 632 return 0; 633 634 /* It's already enabled */ 635 if (rc & active_flag) 636 return 0; 637 } 638 639 640 /* Issue the request */ 641 rc = eeh_ops->set_option(pe, function); 642 if (rc) 643 pr_warn("%s: Unexpected state change %d on " 644 "PHB#%d-PE#%x, err=%d\n", 645 __func__, function, pe->phb->global_number, 646 pe->addr, rc); 647 648 /* Check if the request is finished successfully */ 649 if (active_flag) { 650 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 651 if (rc <= 0) 652 return rc; 653 654 if (rc & active_flag) 655 return 0; 656 657 return -EIO; 658 } 659 660 return rc; 661 } 662 663 /** 664 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 665 * @dev: pci device struct 666 * @state: reset state to enter 667 * 668 * Return value: 669 * 0 if success 670 */ 671 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 672 { 673 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 674 struct eeh_pe *pe = eeh_dev_to_pe(edev); 675 676 if (!pe) { 677 pr_err("%s: No PE found on PCI device %s\n", 678 __func__, pci_name(dev)); 679 return -EINVAL; 680 } 681 682 switch (state) { 683 case pcie_deassert_reset: 684 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 685 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 686 break; 687 case pcie_hot_reset: 688 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 689 eeh_ops->reset(pe, EEH_RESET_HOT); 690 break; 691 case pcie_warm_reset: 692 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 693 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 694 break; 695 default: 696 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 697 return -EINVAL; 698 }; 699 700 return 0; 701 } 702 703 /** 704 * eeh_set_pe_freset - Check the required reset for the indicated device 705 * @data: EEH device 706 * @flag: return value 707 * 708 * Each device might have its preferred reset type: fundamental or 709 * hot reset. The routine is used to collected the information for 710 * the indicated device and its children so that the bunch of the 711 * devices could be reset properly. 712 */ 713 static void *eeh_set_dev_freset(void *data, void *flag) 714 { 715 struct pci_dev *dev; 716 unsigned int *freset = (unsigned int *)flag; 717 struct eeh_dev *edev = (struct eeh_dev *)data; 718 719 dev = eeh_dev_to_pci_dev(edev); 720 if (dev) 721 *freset |= dev->needs_freset; 722 723 return NULL; 724 } 725 726 /** 727 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 728 * @pe: EEH PE 729 * 730 * Assert the PCI #RST line for 1/4 second. 731 */ 732 static void eeh_reset_pe_once(struct eeh_pe *pe) 733 { 734 unsigned int freset = 0; 735 736 /* Determine type of EEH reset required for 737 * Partitionable Endpoint, a hot-reset (1) 738 * or a fundamental reset (3). 739 * A fundamental reset required by any device under 740 * Partitionable Endpoint trumps hot-reset. 741 */ 742 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 743 744 if (freset) 745 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 746 else 747 eeh_ops->reset(pe, EEH_RESET_HOT); 748 749 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 750 } 751 752 /** 753 * eeh_reset_pe - Reset the indicated PE 754 * @pe: EEH PE 755 * 756 * This routine should be called to reset indicated device, including 757 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 758 * might be involved as well. 759 */ 760 int eeh_reset_pe(struct eeh_pe *pe) 761 { 762 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 763 int i, state, ret; 764 765 /* Mark as reset and block config space */ 766 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 767 768 /* Take three shots at resetting the bus */ 769 for (i = 0; i < 3; i++) { 770 eeh_reset_pe_once(pe); 771 772 /* 773 * EEH_PE_ISOLATED is expected to be removed after 774 * BAR restore. 775 */ 776 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 777 if ((state & flags) == flags) { 778 ret = 0; 779 goto out; 780 } 781 782 if (state < 0) { 783 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 784 __func__, pe->phb->global_number, pe->addr); 785 ret = -ENOTRECOVERABLE; 786 goto out; 787 } 788 789 /* We might run out of credits */ 790 ret = -EIO; 791 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 792 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 793 } 794 795 out: 796 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 797 return ret; 798 } 799 800 /** 801 * eeh_save_bars - Save device bars 802 * @edev: PCI device associated EEH device 803 * 804 * Save the values of the device bars. Unlike the restore 805 * routine, this routine is *not* recursive. This is because 806 * PCI devices are added individually; but, for the restore, 807 * an entire slot is reset at a time. 808 */ 809 void eeh_save_bars(struct eeh_dev *edev) 810 { 811 int i; 812 struct device_node *dn; 813 814 if (!edev) 815 return; 816 dn = eeh_dev_to_of_node(edev); 817 818 for (i = 0; i < 16; i++) 819 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); 820 821 /* 822 * For PCI bridges including root port, we need enable bus 823 * master explicitly. Otherwise, it can't fetch IODA table 824 * entries correctly. So we cache the bit in advance so that 825 * we can restore it after reset, either PHB range or PE range. 826 */ 827 if (edev->mode & EEH_DEV_BRIDGE) 828 edev->config_space[1] |= PCI_COMMAND_MASTER; 829 } 830 831 /** 832 * eeh_ops_register - Register platform dependent EEH operations 833 * @ops: platform dependent EEH operations 834 * 835 * Register the platform dependent EEH operation callback 836 * functions. The platform should call this function before 837 * any other EEH operations. 838 */ 839 int __init eeh_ops_register(struct eeh_ops *ops) 840 { 841 if (!ops->name) { 842 pr_warn("%s: Invalid EEH ops name for %p\n", 843 __func__, ops); 844 return -EINVAL; 845 } 846 847 if (eeh_ops && eeh_ops != ops) { 848 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 849 __func__, eeh_ops->name, ops->name); 850 return -EEXIST; 851 } 852 853 eeh_ops = ops; 854 855 return 0; 856 } 857 858 /** 859 * eeh_ops_unregister - Unreigster platform dependent EEH operations 860 * @name: name of EEH platform operations 861 * 862 * Unregister the platform dependent EEH operation callback 863 * functions. 864 */ 865 int __exit eeh_ops_unregister(const char *name) 866 { 867 if (!name || !strlen(name)) { 868 pr_warn("%s: Invalid EEH ops name\n", 869 __func__); 870 return -EINVAL; 871 } 872 873 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 874 eeh_ops = NULL; 875 return 0; 876 } 877 878 return -EEXIST; 879 } 880 881 static int eeh_reboot_notifier(struct notifier_block *nb, 882 unsigned long action, void *unused) 883 { 884 eeh_clear_flag(EEH_ENABLED); 885 return NOTIFY_DONE; 886 } 887 888 static struct notifier_block eeh_reboot_nb = { 889 .notifier_call = eeh_reboot_notifier, 890 }; 891 892 /** 893 * eeh_init - EEH initialization 894 * 895 * Initialize EEH by trying to enable it for all of the adapters in the system. 896 * As a side effect we can determine here if eeh is supported at all. 897 * Note that we leave EEH on so failed config cycles won't cause a machine 898 * check. If a user turns off EEH for a particular adapter they are really 899 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 900 * grant access to a slot if EEH isn't enabled, and so we always enable 901 * EEH for all slots/all devices. 902 * 903 * The eeh-force-off option disables EEH checking globally, for all slots. 904 * Even if force-off is set, the EEH hardware is still enabled, so that 905 * newer systems can boot. 906 */ 907 int eeh_init(void) 908 { 909 struct pci_controller *hose, *tmp; 910 struct device_node *phb; 911 static int cnt = 0; 912 int ret = 0; 913 914 /* 915 * We have to delay the initialization on PowerNV after 916 * the PCI hierarchy tree has been built because the PEs 917 * are figured out based on PCI devices instead of device 918 * tree nodes 919 */ 920 if (machine_is(powernv) && cnt++ <= 0) 921 return ret; 922 923 /* Register reboot notifier */ 924 ret = register_reboot_notifier(&eeh_reboot_nb); 925 if (ret) { 926 pr_warn("%s: Failed to register notifier (%d)\n", 927 __func__, ret); 928 return ret; 929 } 930 931 /* call platform initialization function */ 932 if (!eeh_ops) { 933 pr_warn("%s: Platform EEH operation not found\n", 934 __func__); 935 return -EEXIST; 936 } else if ((ret = eeh_ops->init())) 937 return ret; 938 939 /* Initialize EEH event */ 940 ret = eeh_event_init(); 941 if (ret) 942 return ret; 943 944 /* Enable EEH for all adapters */ 945 if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) { 946 list_for_each_entry_safe(hose, tmp, 947 &hose_list, list_node) { 948 phb = hose->dn; 949 traverse_pci_devices(phb, eeh_ops->of_probe, NULL); 950 } 951 } else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) { 952 list_for_each_entry_safe(hose, tmp, 953 &hose_list, list_node) 954 pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); 955 } else { 956 pr_warn("%s: Invalid probe mode %x", 957 __func__, eeh_subsystem_flags); 958 return -EINVAL; 959 } 960 961 /* 962 * Call platform post-initialization. Actually, It's good chance 963 * to inform platform that EEH is ready to supply service if the 964 * I/O cache stuff has been built up. 965 */ 966 if (eeh_ops->post_init) { 967 ret = eeh_ops->post_init(); 968 if (ret) 969 return ret; 970 } 971 972 if (eeh_enabled()) 973 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 974 else 975 pr_warn("EEH: No capable adapters found\n"); 976 977 return ret; 978 } 979 980 core_initcall_sync(eeh_init); 981 982 /** 983 * eeh_add_device_early - Enable EEH for the indicated device_node 984 * @dn: device node for which to set up EEH 985 * 986 * This routine must be used to perform EEH initialization for PCI 987 * devices that were added after system boot (e.g. hotplug, dlpar). 988 * This routine must be called before any i/o is performed to the 989 * adapter (inluding any config-space i/o). 990 * Whether this actually enables EEH or not for this device depends 991 * on the CEC architecture, type of the device, on earlier boot 992 * command-line arguments & etc. 993 */ 994 void eeh_add_device_early(struct device_node *dn) 995 { 996 struct pci_controller *phb; 997 998 /* 999 * If we're doing EEH probe based on PCI device, we 1000 * would delay the probe until late stage because 1001 * the PCI device isn't available this moment. 1002 */ 1003 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1004 return; 1005 1006 if (!of_node_to_eeh_dev(dn)) 1007 return; 1008 phb = of_node_to_eeh_dev(dn)->phb; 1009 1010 /* USB Bus children of PCI devices will not have BUID's */ 1011 if (NULL == phb || 0 == phb->buid) 1012 return; 1013 1014 eeh_ops->of_probe(dn, NULL); 1015 } 1016 1017 /** 1018 * eeh_add_device_tree_early - Enable EEH for the indicated device 1019 * @dn: device node 1020 * 1021 * This routine must be used to perform EEH initialization for the 1022 * indicated PCI device that was added after system boot (e.g. 1023 * hotplug, dlpar). 1024 */ 1025 void eeh_add_device_tree_early(struct device_node *dn) 1026 { 1027 struct device_node *sib; 1028 1029 for_each_child_of_node(dn, sib) 1030 eeh_add_device_tree_early(sib); 1031 eeh_add_device_early(dn); 1032 } 1033 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1034 1035 /** 1036 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1037 * @dev: pci device for which to set up EEH 1038 * 1039 * This routine must be used to complete EEH initialization for PCI 1040 * devices that were added after system boot (e.g. hotplug, dlpar). 1041 */ 1042 void eeh_add_device_late(struct pci_dev *dev) 1043 { 1044 struct device_node *dn; 1045 struct eeh_dev *edev; 1046 1047 if (!dev || !eeh_enabled()) 1048 return; 1049 1050 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1051 1052 dn = pci_device_to_OF_node(dev); 1053 edev = of_node_to_eeh_dev(dn); 1054 if (edev->pdev == dev) { 1055 pr_debug("EEH: Already referenced !\n"); 1056 return; 1057 } 1058 1059 /* 1060 * The EEH cache might not be removed correctly because of 1061 * unbalanced kref to the device during unplug time, which 1062 * relies on pcibios_release_device(). So we have to remove 1063 * that here explicitly. 1064 */ 1065 if (edev->pdev) { 1066 eeh_rmv_from_parent_pe(edev); 1067 eeh_addr_cache_rmv_dev(edev->pdev); 1068 eeh_sysfs_remove_device(edev->pdev); 1069 edev->mode &= ~EEH_DEV_SYSFS; 1070 1071 /* 1072 * We definitely should have the PCI device removed 1073 * though it wasn't correctly. So we needn't call 1074 * into error handler afterwards. 1075 */ 1076 edev->mode |= EEH_DEV_NO_HANDLER; 1077 1078 edev->pdev = NULL; 1079 dev->dev.archdata.edev = NULL; 1080 } 1081 1082 edev->pdev = dev; 1083 dev->dev.archdata.edev = edev; 1084 1085 /* 1086 * We have to do the EEH probe here because the PCI device 1087 * hasn't been created yet in the early stage. 1088 */ 1089 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1090 eeh_ops->dev_probe(dev, NULL); 1091 1092 eeh_addr_cache_insert_dev(dev); 1093 } 1094 1095 /** 1096 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1097 * @bus: PCI bus 1098 * 1099 * This routine must be used to perform EEH initialization for PCI 1100 * devices which are attached to the indicated PCI bus. The PCI bus 1101 * is added after system boot through hotplug or dlpar. 1102 */ 1103 void eeh_add_device_tree_late(struct pci_bus *bus) 1104 { 1105 struct pci_dev *dev; 1106 1107 list_for_each_entry(dev, &bus->devices, bus_list) { 1108 eeh_add_device_late(dev); 1109 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1110 struct pci_bus *subbus = dev->subordinate; 1111 if (subbus) 1112 eeh_add_device_tree_late(subbus); 1113 } 1114 } 1115 } 1116 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1117 1118 /** 1119 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1120 * @bus: PCI bus 1121 * 1122 * This routine must be used to add EEH sysfs files for PCI 1123 * devices which are attached to the indicated PCI bus. The PCI bus 1124 * is added after system boot through hotplug or dlpar. 1125 */ 1126 void eeh_add_sysfs_files(struct pci_bus *bus) 1127 { 1128 struct pci_dev *dev; 1129 1130 list_for_each_entry(dev, &bus->devices, bus_list) { 1131 eeh_sysfs_add_device(dev); 1132 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1133 struct pci_bus *subbus = dev->subordinate; 1134 if (subbus) 1135 eeh_add_sysfs_files(subbus); 1136 } 1137 } 1138 } 1139 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1140 1141 /** 1142 * eeh_remove_device - Undo EEH setup for the indicated pci device 1143 * @dev: pci device to be removed 1144 * 1145 * This routine should be called when a device is removed from 1146 * a running system (e.g. by hotplug or dlpar). It unregisters 1147 * the PCI device from the EEH subsystem. I/O errors affecting 1148 * this device will no longer be detected after this call; thus, 1149 * i/o errors affecting this slot may leave this device unusable. 1150 */ 1151 void eeh_remove_device(struct pci_dev *dev) 1152 { 1153 struct eeh_dev *edev; 1154 1155 if (!dev || !eeh_enabled()) 1156 return; 1157 edev = pci_dev_to_eeh_dev(dev); 1158 1159 /* Unregister the device with the EEH/PCI address search system */ 1160 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1161 1162 if (!edev || !edev->pdev || !edev->pe) { 1163 pr_debug("EEH: Not referenced !\n"); 1164 return; 1165 } 1166 1167 /* 1168 * During the hotplug for EEH error recovery, we need the EEH 1169 * device attached to the parent PE in order for BAR restore 1170 * a bit later. So we keep it for BAR restore and remove it 1171 * from the parent PE during the BAR resotre. 1172 */ 1173 edev->pdev = NULL; 1174 dev->dev.archdata.edev = NULL; 1175 if (!(edev->pe->state & EEH_PE_KEEP)) 1176 eeh_rmv_from_parent_pe(edev); 1177 else 1178 edev->mode |= EEH_DEV_DISCONNECTED; 1179 1180 /* 1181 * We're removing from the PCI subsystem, that means 1182 * the PCI device driver can't support EEH or not 1183 * well. So we rely on hotplug completely to do recovery 1184 * for the specific PCI device. 1185 */ 1186 edev->mode |= EEH_DEV_NO_HANDLER; 1187 1188 eeh_addr_cache_rmv_dev(dev); 1189 eeh_sysfs_remove_device(dev); 1190 edev->mode &= ~EEH_DEV_SYSFS; 1191 } 1192 1193 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1194 { 1195 int ret; 1196 1197 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1198 if (ret) { 1199 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1200 __func__, ret, pe->phb->global_number, pe->addr); 1201 return ret; 1202 } 1203 1204 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1205 if (ret) { 1206 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1207 __func__, ret, pe->phb->global_number, pe->addr); 1208 return ret; 1209 } 1210 1211 /* Clear software isolated state */ 1212 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1213 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1214 1215 return ret; 1216 } 1217 1218 1219 static struct pci_device_id eeh_reset_ids[] = { 1220 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1221 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1222 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1223 { 0 } 1224 }; 1225 1226 static int eeh_pe_change_owner(struct eeh_pe *pe) 1227 { 1228 struct eeh_dev *edev, *tmp; 1229 struct pci_dev *pdev; 1230 struct pci_device_id *id; 1231 int flags, ret; 1232 1233 /* Check PE state */ 1234 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1235 ret = eeh_ops->get_state(pe, NULL); 1236 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1237 return 0; 1238 1239 /* Unfrozen PE, nothing to do */ 1240 if ((ret & flags) == flags) 1241 return 0; 1242 1243 /* Frozen PE, check if it needs PE level reset */ 1244 eeh_pe_for_each_dev(pe, edev, tmp) { 1245 pdev = eeh_dev_to_pci_dev(edev); 1246 if (!pdev) 1247 continue; 1248 1249 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1250 if (id->vendor != PCI_ANY_ID && 1251 id->vendor != pdev->vendor) 1252 continue; 1253 if (id->device != PCI_ANY_ID && 1254 id->device != pdev->device) 1255 continue; 1256 if (id->subvendor != PCI_ANY_ID && 1257 id->subvendor != pdev->subsystem_vendor) 1258 continue; 1259 if (id->subdevice != PCI_ANY_ID && 1260 id->subdevice != pdev->subsystem_device) 1261 continue; 1262 1263 goto reset; 1264 } 1265 } 1266 1267 return eeh_unfreeze_pe(pe, true); 1268 1269 reset: 1270 return eeh_pe_reset_and_recover(pe); 1271 } 1272 1273 /** 1274 * eeh_dev_open - Increase count of pass through devices for PE 1275 * @pdev: PCI device 1276 * 1277 * Increase count of passed through devices for the indicated 1278 * PE. In the result, the EEH errors detected on the PE won't be 1279 * reported. The PE owner will be responsible for detection 1280 * and recovery. 1281 */ 1282 int eeh_dev_open(struct pci_dev *pdev) 1283 { 1284 struct eeh_dev *edev; 1285 int ret = -ENODEV; 1286 1287 mutex_lock(&eeh_dev_mutex); 1288 1289 /* No PCI device ? */ 1290 if (!pdev) 1291 goto out; 1292 1293 /* No EEH device or PE ? */ 1294 edev = pci_dev_to_eeh_dev(pdev); 1295 if (!edev || !edev->pe) 1296 goto out; 1297 1298 /* 1299 * The PE might have been put into frozen state, but we 1300 * didn't detect that yet. The passed through PCI devices 1301 * in frozen PE won't work properly. Clear the frozen state 1302 * in advance. 1303 */ 1304 ret = eeh_pe_change_owner(edev->pe); 1305 if (ret) 1306 goto out; 1307 1308 /* Increase PE's pass through count */ 1309 atomic_inc(&edev->pe->pass_dev_cnt); 1310 mutex_unlock(&eeh_dev_mutex); 1311 1312 return 0; 1313 out: 1314 mutex_unlock(&eeh_dev_mutex); 1315 return ret; 1316 } 1317 EXPORT_SYMBOL_GPL(eeh_dev_open); 1318 1319 /** 1320 * eeh_dev_release - Decrease count of pass through devices for PE 1321 * @pdev: PCI device 1322 * 1323 * Decrease count of pass through devices for the indicated PE. If 1324 * there is no passed through device in PE, the EEH errors detected 1325 * on the PE will be reported and handled as usual. 1326 */ 1327 void eeh_dev_release(struct pci_dev *pdev) 1328 { 1329 struct eeh_dev *edev; 1330 1331 mutex_lock(&eeh_dev_mutex); 1332 1333 /* No PCI device ? */ 1334 if (!pdev) 1335 goto out; 1336 1337 /* No EEH device ? */ 1338 edev = pci_dev_to_eeh_dev(pdev); 1339 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1340 goto out; 1341 1342 /* Decrease PE's pass through count */ 1343 atomic_dec(&edev->pe->pass_dev_cnt); 1344 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1345 eeh_pe_change_owner(edev->pe); 1346 out: 1347 mutex_unlock(&eeh_dev_mutex); 1348 } 1349 EXPORT_SYMBOL(eeh_dev_release); 1350 1351 #ifdef CONFIG_IOMMU_API 1352 1353 static int dev_has_iommu_table(struct device *dev, void *data) 1354 { 1355 struct pci_dev *pdev = to_pci_dev(dev); 1356 struct pci_dev **ppdev = data; 1357 struct iommu_table *tbl; 1358 1359 if (!dev) 1360 return 0; 1361 1362 tbl = get_iommu_table_base(dev); 1363 if (tbl && tbl->it_group) { 1364 *ppdev = pdev; 1365 return 1; 1366 } 1367 1368 return 0; 1369 } 1370 1371 /** 1372 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1373 * @group: IOMMU group 1374 * 1375 * The routine is called to convert IOMMU group to EEH PE. 1376 */ 1377 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1378 { 1379 struct pci_dev *pdev = NULL; 1380 struct eeh_dev *edev; 1381 int ret; 1382 1383 /* No IOMMU group ? */ 1384 if (!group) 1385 return NULL; 1386 1387 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1388 if (!ret || !pdev) 1389 return NULL; 1390 1391 /* No EEH device or PE ? */ 1392 edev = pci_dev_to_eeh_dev(pdev); 1393 if (!edev || !edev->pe) 1394 return NULL; 1395 1396 return edev->pe; 1397 } 1398 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1399 1400 #endif /* CONFIG_IOMMU_API */ 1401 1402 /** 1403 * eeh_pe_set_option - Set options for the indicated PE 1404 * @pe: EEH PE 1405 * @option: requested option 1406 * 1407 * The routine is called to enable or disable EEH functionality 1408 * on the indicated PE, to enable IO or DMA for the frozen PE. 1409 */ 1410 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1411 { 1412 int ret = 0; 1413 1414 /* Invalid PE ? */ 1415 if (!pe) 1416 return -ENODEV; 1417 1418 /* 1419 * EEH functionality could possibly be disabled, just 1420 * return error for the case. And the EEH functinality 1421 * isn't expected to be disabled on one specific PE. 1422 */ 1423 switch (option) { 1424 case EEH_OPT_ENABLE: 1425 if (eeh_enabled()) { 1426 ret = eeh_pe_change_owner(pe); 1427 break; 1428 } 1429 ret = -EIO; 1430 break; 1431 case EEH_OPT_DISABLE: 1432 break; 1433 case EEH_OPT_THAW_MMIO: 1434 case EEH_OPT_THAW_DMA: 1435 if (!eeh_ops || !eeh_ops->set_option) { 1436 ret = -ENOENT; 1437 break; 1438 } 1439 1440 ret = eeh_pci_enable(pe, option); 1441 break; 1442 default: 1443 pr_debug("%s: Option %d out of range (%d, %d)\n", 1444 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1445 ret = -EINVAL; 1446 } 1447 1448 return ret; 1449 } 1450 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1451 1452 /** 1453 * eeh_pe_get_state - Retrieve PE's state 1454 * @pe: EEH PE 1455 * 1456 * Retrieve the PE's state, which includes 3 aspects: enabled 1457 * DMA, enabled IO and asserted reset. 1458 */ 1459 int eeh_pe_get_state(struct eeh_pe *pe) 1460 { 1461 int result, ret = 0; 1462 bool rst_active, dma_en, mmio_en; 1463 1464 /* Existing PE ? */ 1465 if (!pe) 1466 return -ENODEV; 1467 1468 if (!eeh_ops || !eeh_ops->get_state) 1469 return -ENOENT; 1470 1471 result = eeh_ops->get_state(pe, NULL); 1472 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1473 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1474 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1475 1476 if (rst_active) 1477 ret = EEH_PE_STATE_RESET; 1478 else if (dma_en && mmio_en) 1479 ret = EEH_PE_STATE_NORMAL; 1480 else if (!dma_en && !mmio_en) 1481 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1482 else if (!dma_en && mmio_en) 1483 ret = EEH_PE_STATE_STOPPED_DMA; 1484 else 1485 ret = EEH_PE_STATE_UNAVAIL; 1486 1487 return ret; 1488 } 1489 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1490 1491 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1492 { 1493 struct eeh_dev *edev, *tmp; 1494 struct pci_dev *pdev; 1495 int ret = 0; 1496 1497 /* Restore config space */ 1498 eeh_pe_restore_bars(pe); 1499 1500 /* 1501 * Reenable PCI devices as the devices passed 1502 * through are always enabled before the reset. 1503 */ 1504 eeh_pe_for_each_dev(pe, edev, tmp) { 1505 pdev = eeh_dev_to_pci_dev(edev); 1506 if (!pdev) 1507 continue; 1508 1509 ret = pci_reenable_device(pdev); 1510 if (ret) { 1511 pr_warn("%s: Failure %d reenabling %s\n", 1512 __func__, ret, pci_name(pdev)); 1513 return ret; 1514 } 1515 } 1516 1517 /* The PE is still in frozen state */ 1518 return eeh_unfreeze_pe(pe, true); 1519 } 1520 1521 /** 1522 * eeh_pe_reset - Issue PE reset according to specified type 1523 * @pe: EEH PE 1524 * @option: reset type 1525 * 1526 * The routine is called to reset the specified PE with the 1527 * indicated type, either fundamental reset or hot reset. 1528 * PE reset is the most important part for error recovery. 1529 */ 1530 int eeh_pe_reset(struct eeh_pe *pe, int option) 1531 { 1532 int ret = 0; 1533 1534 /* Invalid PE ? */ 1535 if (!pe) 1536 return -ENODEV; 1537 1538 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1539 return -ENOENT; 1540 1541 switch (option) { 1542 case EEH_RESET_DEACTIVATE: 1543 ret = eeh_ops->reset(pe, option); 1544 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1545 if (ret) 1546 break; 1547 1548 ret = eeh_pe_reenable_devices(pe); 1549 break; 1550 case EEH_RESET_HOT: 1551 case EEH_RESET_FUNDAMENTAL: 1552 /* 1553 * Proactively freeze the PE to drop all MMIO access 1554 * during reset, which should be banned as it's always 1555 * cause recursive EEH error. 1556 */ 1557 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1558 1559 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1560 ret = eeh_ops->reset(pe, option); 1561 break; 1562 default: 1563 pr_debug("%s: Unsupported option %d\n", 1564 __func__, option); 1565 ret = -EINVAL; 1566 } 1567 1568 return ret; 1569 } 1570 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1571 1572 /** 1573 * eeh_pe_configure - Configure PCI bridges after PE reset 1574 * @pe: EEH PE 1575 * 1576 * The routine is called to restore the PCI config space for 1577 * those PCI devices, especially PCI bridges affected by PE 1578 * reset issued previously. 1579 */ 1580 int eeh_pe_configure(struct eeh_pe *pe) 1581 { 1582 int ret = 0; 1583 1584 /* Invalid PE ? */ 1585 if (!pe) 1586 return -ENODEV; 1587 1588 return ret; 1589 } 1590 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1591 1592 static int proc_eeh_show(struct seq_file *m, void *v) 1593 { 1594 if (!eeh_enabled()) { 1595 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1596 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1597 } else { 1598 seq_printf(m, "EEH Subsystem is enabled\n"); 1599 seq_printf(m, 1600 "no device=%llu\n" 1601 "no device node=%llu\n" 1602 "no config address=%llu\n" 1603 "check not wanted=%llu\n" 1604 "eeh_total_mmio_ffs=%llu\n" 1605 "eeh_false_positives=%llu\n" 1606 "eeh_slot_resets=%llu\n", 1607 eeh_stats.no_device, 1608 eeh_stats.no_dn, 1609 eeh_stats.no_cfg_addr, 1610 eeh_stats.ignored_check, 1611 eeh_stats.total_mmio_ffs, 1612 eeh_stats.false_positives, 1613 eeh_stats.slot_resets); 1614 } 1615 1616 return 0; 1617 } 1618 1619 static int proc_eeh_open(struct inode *inode, struct file *file) 1620 { 1621 return single_open(file, proc_eeh_show, NULL); 1622 } 1623 1624 static const struct file_operations proc_eeh_operations = { 1625 .open = proc_eeh_open, 1626 .read = seq_read, 1627 .llseek = seq_lseek, 1628 .release = single_release, 1629 }; 1630 1631 #ifdef CONFIG_DEBUG_FS 1632 static int eeh_enable_dbgfs_set(void *data, u64 val) 1633 { 1634 if (val) 1635 eeh_clear_flag(EEH_FORCE_DISABLED); 1636 else 1637 eeh_add_flag(EEH_FORCE_DISABLED); 1638 1639 /* Notify the backend */ 1640 if (eeh_ops->post_init) 1641 eeh_ops->post_init(); 1642 1643 return 0; 1644 } 1645 1646 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1647 { 1648 if (eeh_enabled()) 1649 *val = 0x1ul; 1650 else 1651 *val = 0x0ul; 1652 return 0; 1653 } 1654 1655 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1656 eeh_enable_dbgfs_set, "0x%llx\n"); 1657 #endif 1658 1659 static int __init eeh_init_proc(void) 1660 { 1661 if (machine_is(pseries) || machine_is(powernv)) { 1662 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1663 #ifdef CONFIG_DEBUG_FS 1664 debugfs_create_file("eeh_enable", 0600, 1665 powerpc_debugfs_root, NULL, 1666 &eeh_enable_dbgfs_ops); 1667 #endif 1668 } 1669 1670 return 0; 1671 } 1672 __initcall(eeh_init_proc); 1673