1 /* 2 * PCI Error Recovery Driver for RPA-compliant PPC64 platform. 3 * Copyright IBM Corp. 2004 2005 4 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005 5 * 6 * All rights reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or (at 11 * your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 16 * NON INFRINGEMENT. See the GNU General Public License for more 17 * details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * 23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> 24 */ 25 #include <linux/delay.h> 26 #include <linux/interrupt.h> 27 #include <linux/irq.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 #include <linux/pci_hotplug.h> 31 #include <asm/eeh.h> 32 #include <asm/eeh_event.h> 33 #include <asm/ppc-pci.h> 34 #include <asm/pci-bridge.h> 35 #include <asm/prom.h> 36 #include <asm/rtas.h> 37 38 struct eeh_rmv_data { 39 struct list_head removed_vf_list; 40 int removed_dev_count; 41 }; 42 43 static int eeh_result_priority(enum pci_ers_result result) 44 { 45 switch (result) { 46 case PCI_ERS_RESULT_NONE: 47 return 1; 48 case PCI_ERS_RESULT_NO_AER_DRIVER: 49 return 2; 50 case PCI_ERS_RESULT_RECOVERED: 51 return 3; 52 case PCI_ERS_RESULT_CAN_RECOVER: 53 return 4; 54 case PCI_ERS_RESULT_DISCONNECT: 55 return 5; 56 case PCI_ERS_RESULT_NEED_RESET: 57 return 6; 58 default: 59 WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); 60 return 0; 61 } 62 }; 63 64 static const char *pci_ers_result_name(enum pci_ers_result result) 65 { 66 switch (result) { 67 case PCI_ERS_RESULT_NONE: 68 return "none"; 69 case PCI_ERS_RESULT_CAN_RECOVER: 70 return "can recover"; 71 case PCI_ERS_RESULT_NEED_RESET: 72 return "need reset"; 73 case PCI_ERS_RESULT_DISCONNECT: 74 return "disconnect"; 75 case PCI_ERS_RESULT_RECOVERED: 76 return "recovered"; 77 case PCI_ERS_RESULT_NO_AER_DRIVER: 78 return "no AER driver"; 79 default: 80 WARN_ONCE(1, "Unknown result type: %d\n", (int)result); 81 return "unknown"; 82 } 83 }; 84 85 static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old, 86 enum pci_ers_result new) 87 { 88 if (eeh_result_priority(new) > eeh_result_priority(old)) 89 return new; 90 return old; 91 } 92 93 static bool eeh_dev_removed(struct eeh_dev *edev) 94 { 95 return !edev || (edev->mode & EEH_DEV_REMOVED); 96 } 97 98 static bool eeh_edev_actionable(struct eeh_dev *edev) 99 { 100 if (!edev->pdev) 101 return false; 102 if (edev->pdev->error_state == pci_channel_io_perm_failure) 103 return false; 104 if (eeh_dev_removed(edev)) 105 return false; 106 if (eeh_pe_passed(edev->pe)) 107 return false; 108 109 return true; 110 } 111 112 /** 113 * eeh_pcid_get - Get the PCI device driver 114 * @pdev: PCI device 115 * 116 * The function is used to retrieve the PCI device driver for 117 * the indicated PCI device. Besides, we will increase the reference 118 * of the PCI device driver to prevent that being unloaded on 119 * the fly. Otherwise, kernel crash would be seen. 120 */ 121 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) 122 { 123 if (!pdev || !pdev->driver) 124 return NULL; 125 126 if (!try_module_get(pdev->driver->driver.owner)) 127 return NULL; 128 129 return pdev->driver; 130 } 131 132 /** 133 * eeh_pcid_put - Dereference on the PCI device driver 134 * @pdev: PCI device 135 * 136 * The function is called to do dereference on the PCI device 137 * driver of the indicated PCI device. 138 */ 139 static inline void eeh_pcid_put(struct pci_dev *pdev) 140 { 141 if (!pdev || !pdev->driver) 142 return; 143 144 module_put(pdev->driver->driver.owner); 145 } 146 147 /** 148 * eeh_disable_irq - Disable interrupt for the recovering device 149 * @dev: PCI device 150 * 151 * This routine must be called when reporting temporary or permanent 152 * error to the particular PCI device to disable interrupt of that 153 * device. If the device has enabled MSI or MSI-X interrupt, we needn't 154 * do real work because EEH should freeze DMA transfers for those PCI 155 * devices encountering EEH errors, which includes MSI or MSI-X. 156 */ 157 static void eeh_disable_irq(struct eeh_dev *edev) 158 { 159 /* Don't disable MSI and MSI-X interrupts. They are 160 * effectively disabled by the DMA Stopped state 161 * when an EEH error occurs. 162 */ 163 if (edev->pdev->msi_enabled || edev->pdev->msix_enabled) 164 return; 165 166 if (!irq_has_action(edev->pdev->irq)) 167 return; 168 169 edev->mode |= EEH_DEV_IRQ_DISABLED; 170 disable_irq_nosync(edev->pdev->irq); 171 } 172 173 /** 174 * eeh_enable_irq - Enable interrupt for the recovering device 175 * @dev: PCI device 176 * 177 * This routine must be called to enable interrupt while failed 178 * device could be resumed. 179 */ 180 static void eeh_enable_irq(struct eeh_dev *edev) 181 { 182 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { 183 edev->mode &= ~EEH_DEV_IRQ_DISABLED; 184 /* 185 * FIXME !!!!! 186 * 187 * This is just ass backwards. This maze has 188 * unbalanced irq_enable/disable calls. So instead of 189 * finding the root cause it works around the warning 190 * in the irq_enable code by conditionally calling 191 * into it. 192 * 193 * That's just wrong.The warning in the core code is 194 * there to tell people to fix their asymmetries in 195 * their own code, not by abusing the core information 196 * to avoid it. 197 * 198 * I so wish that the assymetry would be the other way 199 * round and a few more irq_disable calls render that 200 * shit unusable forever. 201 * 202 * tglx 203 */ 204 if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq))) 205 enable_irq(edev->pdev->irq); 206 } 207 } 208 209 static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata) 210 { 211 struct pci_dev *pdev; 212 213 if (!edev) 214 return; 215 216 /* 217 * We cannot access the config space on some adapters. 218 * Otherwise, it will cause fenced PHB. We don't save 219 * the content in their config space and will restore 220 * from the initial config space saved when the EEH 221 * device is created. 222 */ 223 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) 224 return; 225 226 pdev = eeh_dev_to_pci_dev(edev); 227 if (!pdev) 228 return; 229 230 pci_save_state(pdev); 231 } 232 233 static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) 234 { 235 struct eeh_pe *pe; 236 struct eeh_dev *edev, *tmp; 237 238 eeh_for_each_pe(root, pe) 239 eeh_pe_for_each_dev(pe, edev, tmp) 240 if (eeh_edev_actionable(edev)) 241 edev->pdev->error_state = s; 242 } 243 244 static void eeh_set_irq_state(struct eeh_pe *root, bool enable) 245 { 246 struct eeh_pe *pe; 247 struct eeh_dev *edev, *tmp; 248 249 eeh_for_each_pe(root, pe) { 250 eeh_pe_for_each_dev(pe, edev, tmp) { 251 if (!eeh_edev_actionable(edev)) 252 continue; 253 254 if (!eeh_pcid_get(edev->pdev)) 255 continue; 256 257 if (enable) 258 eeh_enable_irq(edev); 259 else 260 eeh_disable_irq(edev); 261 262 eeh_pcid_put(edev->pdev); 263 } 264 } 265 } 266 267 typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *, 268 struct pci_dev *, 269 struct pci_driver *); 270 static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, 271 enum pci_ers_result *result) 272 { 273 struct pci_dev *pdev; 274 struct pci_driver *driver; 275 enum pci_ers_result new_result; 276 277 pci_lock_rescan_remove(); 278 pdev = edev->pdev; 279 if (pdev) 280 get_device(&pdev->dev); 281 pci_unlock_rescan_remove(); 282 if (!pdev) { 283 eeh_edev_info(edev, "no device"); 284 return; 285 } 286 device_lock(&pdev->dev); 287 if (eeh_edev_actionable(edev)) { 288 driver = eeh_pcid_get(pdev); 289 290 if (!driver) 291 eeh_edev_info(edev, "no driver"); 292 else if (!driver->err_handler) 293 eeh_edev_info(edev, "driver not EEH aware"); 294 else if (edev->mode & EEH_DEV_NO_HANDLER) 295 eeh_edev_info(edev, "driver bound too late"); 296 else { 297 new_result = fn(edev, pdev, driver); 298 eeh_edev_info(edev, "%s driver reports: '%s'", 299 driver->name, 300 pci_ers_result_name(new_result)); 301 if (result) 302 *result = pci_ers_merge_result(*result, 303 new_result); 304 } 305 if (driver) 306 eeh_pcid_put(pdev); 307 } else { 308 eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev, 309 !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe)); 310 } 311 device_unlock(&pdev->dev); 312 if (edev->pdev != pdev) 313 eeh_edev_warn(edev, "Device changed during processing!\n"); 314 put_device(&pdev->dev); 315 } 316 317 static void eeh_pe_report(const char *name, struct eeh_pe *root, 318 eeh_report_fn fn, enum pci_ers_result *result) 319 { 320 struct eeh_pe *pe; 321 struct eeh_dev *edev, *tmp; 322 323 pr_info("EEH: Beginning: '%s'\n", name); 324 eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) 325 eeh_pe_report_edev(edev, fn, result); 326 if (result) 327 pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", 328 name, pci_ers_result_name(*result)); 329 else 330 pr_info("EEH: Finished:'%s'", name); 331 } 332 333 /** 334 * eeh_report_error - Report pci error to each device driver 335 * @edev: eeh device 336 * @driver: device's PCI driver 337 * 338 * Report an EEH error to each device driver. 339 */ 340 static enum pci_ers_result eeh_report_error(struct eeh_dev *edev, 341 struct pci_dev *pdev, 342 struct pci_driver *driver) 343 { 344 enum pci_ers_result rc; 345 346 if (!driver->err_handler->error_detected) 347 return PCI_ERS_RESULT_NONE; 348 349 eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)", 350 driver->name); 351 rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen); 352 353 edev->in_error = true; 354 pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE); 355 return rc; 356 } 357 358 /** 359 * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled 360 * @edev: eeh device 361 * @driver: device's PCI driver 362 * 363 * Tells each device driver that IO ports, MMIO and config space I/O 364 * are now enabled. 365 */ 366 static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev, 367 struct pci_dev *pdev, 368 struct pci_driver *driver) 369 { 370 if (!driver->err_handler->mmio_enabled) 371 return PCI_ERS_RESULT_NONE; 372 eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name); 373 return driver->err_handler->mmio_enabled(pdev); 374 } 375 376 /** 377 * eeh_report_reset - Tell device that slot has been reset 378 * @edev: eeh device 379 * @driver: device's PCI driver 380 * 381 * This routine must be called while EEH tries to reset particular 382 * PCI device so that the associated PCI device driver could take 383 * some actions, usually to save data the driver needs so that the 384 * driver can work again while the device is recovered. 385 */ 386 static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev, 387 struct pci_dev *pdev, 388 struct pci_driver *driver) 389 { 390 if (!driver->err_handler->slot_reset || !edev->in_error) 391 return PCI_ERS_RESULT_NONE; 392 eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name); 393 return driver->err_handler->slot_reset(pdev); 394 } 395 396 static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) 397 { 398 struct pci_dev *pdev; 399 400 if (!edev) 401 return; 402 403 /* 404 * The content in the config space isn't saved because 405 * the blocked config space on some adapters. We have 406 * to restore the initial saved config space when the 407 * EEH device is created. 408 */ 409 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) { 410 if (list_is_last(&edev->entry, &edev->pe->edevs)) 411 eeh_pe_restore_bars(edev->pe); 412 413 return; 414 } 415 416 pdev = eeh_dev_to_pci_dev(edev); 417 if (!pdev) 418 return; 419 420 pci_restore_state(pdev); 421 } 422 423 /** 424 * eeh_report_resume - Tell device to resume normal operations 425 * @edev: eeh device 426 * @driver: device's PCI driver 427 * 428 * This routine must be called to notify the device driver that it 429 * could resume so that the device driver can do some initialization 430 * to make the recovered device work again. 431 */ 432 static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, 433 struct pci_dev *pdev, 434 struct pci_driver *driver) 435 { 436 if (!driver->err_handler->resume || !edev->in_error) 437 return PCI_ERS_RESULT_NONE; 438 439 eeh_edev_info(edev, "Invoking %s->resume()", driver->name); 440 driver->err_handler->resume(pdev); 441 442 pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); 443 #ifdef CONFIG_PCI_IOV 444 if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) 445 eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); 446 #endif 447 return PCI_ERS_RESULT_NONE; 448 } 449 450 /** 451 * eeh_report_failure - Tell device driver that device is dead. 452 * @edev: eeh device 453 * @driver: device's PCI driver 454 * 455 * This informs the device driver that the device is permanently 456 * dead, and that no further recovery attempts will be made on it. 457 */ 458 static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev, 459 struct pci_dev *pdev, 460 struct pci_driver *driver) 461 { 462 enum pci_ers_result rc; 463 464 if (!driver->err_handler->error_detected) 465 return PCI_ERS_RESULT_NONE; 466 467 eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)", 468 driver->name); 469 rc = driver->err_handler->error_detected(pdev, 470 pci_channel_io_perm_failure); 471 472 pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT); 473 return rc; 474 } 475 476 static void *eeh_add_virt_device(struct eeh_dev *edev) 477 { 478 struct pci_driver *driver; 479 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 480 481 if (!(edev->physfn)) { 482 eeh_edev_warn(edev, "Not for VF\n"); 483 return NULL; 484 } 485 486 driver = eeh_pcid_get(dev); 487 if (driver) { 488 if (driver->err_handler) { 489 eeh_pcid_put(dev); 490 return NULL; 491 } 492 eeh_pcid_put(dev); 493 } 494 495 #ifdef CONFIG_PCI_IOV 496 pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index); 497 #endif 498 return NULL; 499 } 500 501 static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) 502 { 503 struct pci_driver *driver; 504 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 505 struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; 506 507 /* 508 * Actually, we should remove the PCI bridges as well. 509 * However, that's lots of complexity to do that, 510 * particularly some of devices under the bridge might 511 * support EEH. So we just care about PCI devices for 512 * simplicity here. 513 */ 514 if (!eeh_edev_actionable(edev) || 515 (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) 516 return; 517 518 if (rmv_data) { 519 driver = eeh_pcid_get(dev); 520 if (driver) { 521 if (driver->err_handler && 522 driver->err_handler->error_detected && 523 driver->err_handler->slot_reset) { 524 eeh_pcid_put(dev); 525 return; 526 } 527 eeh_pcid_put(dev); 528 } 529 } 530 531 /* Remove it from PCI subsystem */ 532 pr_info("EEH: Removing %s without EEH sensitive driver\n", 533 pci_name(dev)); 534 edev->mode |= EEH_DEV_DISCONNECTED; 535 if (rmv_data) 536 rmv_data->removed_dev_count++; 537 538 if (edev->physfn) { 539 #ifdef CONFIG_PCI_IOV 540 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 541 542 pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); 543 edev->pdev = NULL; 544 545 /* 546 * We have to set the VF PE number to invalid one, which is 547 * required to plug the VF successfully. 548 */ 549 pdn->pe_number = IODA_INVALID_PE; 550 #endif 551 if (rmv_data) 552 list_add(&edev->rmv_entry, &rmv_data->removed_vf_list); 553 } else { 554 pci_lock_rescan_remove(); 555 pci_stop_and_remove_bus_device(dev); 556 pci_unlock_rescan_remove(); 557 } 558 } 559 560 static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) 561 { 562 struct eeh_dev *edev, *tmp; 563 564 eeh_pe_for_each_dev(pe, edev, tmp) { 565 if (!(edev->mode & EEH_DEV_DISCONNECTED)) 566 continue; 567 568 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); 569 eeh_rmv_from_parent_pe(edev); 570 } 571 572 return NULL; 573 } 574 575 /* 576 * Explicitly clear PE's frozen state for PowerNV where 577 * we have frozen PE until BAR restore is completed. It's 578 * harmless to clear it for pSeries. To be consistent with 579 * PE reset (for 3 times), we try to clear the frozen state 580 * for 3 times as well. 581 */ 582 static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed) 583 { 584 struct eeh_pe *pe; 585 int i; 586 587 eeh_for_each_pe(root, pe) { 588 if (include_passed || !eeh_pe_passed(pe)) { 589 for (i = 0; i < 3; i++) 590 if (!eeh_unfreeze_pe(pe)) 591 break; 592 if (i >= 3) 593 return -EIO; 594 } 595 } 596 eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed); 597 return 0; 598 } 599 600 int eeh_pe_reset_and_recover(struct eeh_pe *pe) 601 { 602 int ret; 603 604 /* Bail if the PE is being recovered */ 605 if (pe->state & EEH_PE_RECOVERING) 606 return 0; 607 608 /* Put the PE into recovery mode */ 609 eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 610 611 /* Save states */ 612 eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); 613 614 /* Issue reset */ 615 ret = eeh_pe_reset_full(pe, true); 616 if (ret) { 617 eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); 618 return ret; 619 } 620 621 /* Unfreeze the PE */ 622 ret = eeh_clear_pe_frozen_state(pe, true); 623 if (ret) { 624 eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); 625 return ret; 626 } 627 628 /* Restore device state */ 629 eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); 630 631 /* Clear recovery mode */ 632 eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); 633 634 return 0; 635 } 636 637 /** 638 * eeh_reset_device - Perform actual reset of a pci slot 639 * @driver_eeh_aware: Does the device's driver provide EEH support? 640 * @pe: EEH PE 641 * @bus: PCI bus corresponding to the isolcated slot 642 * @rmv_data: Optional, list to record removed devices 643 * 644 * This routine must be called to do reset on the indicated PE. 645 * During the reset, udev might be invoked because those affected 646 * PCI devices will be removed and then added. 647 */ 648 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, 649 struct eeh_rmv_data *rmv_data, 650 bool driver_eeh_aware) 651 { 652 time64_t tstamp; 653 int cnt, rc; 654 struct eeh_dev *edev; 655 struct eeh_pe *tmp_pe; 656 bool any_passed = false; 657 658 eeh_for_each_pe(pe, tmp_pe) 659 any_passed |= eeh_pe_passed(tmp_pe); 660 661 /* pcibios will clear the counter; save the value */ 662 cnt = pe->freeze_count; 663 tstamp = pe->tstamp; 664 665 /* 666 * We don't remove the corresponding PE instances because 667 * we need the information afterwords. The attached EEH 668 * devices are expected to be attached soon when calling 669 * into pci_hp_add_devices(). 670 */ 671 eeh_pe_state_mark(pe, EEH_PE_KEEP); 672 if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { 673 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); 674 } else { 675 pci_lock_rescan_remove(); 676 pci_hp_remove_devices(bus); 677 pci_unlock_rescan_remove(); 678 } 679 680 /* 681 * Reset the pci controller. (Asserts RST#; resets config space). 682 * Reconfigure bridges and devices. Don't try to bring the system 683 * up if the reset failed for some reason. 684 * 685 * During the reset, it's very dangerous to have uncontrolled PCI 686 * config accesses. So we prefer to block them. However, controlled 687 * PCI config accesses initiated from EEH itself are allowed. 688 */ 689 rc = eeh_pe_reset_full(pe, false); 690 if (rc) 691 return rc; 692 693 pci_lock_rescan_remove(); 694 695 /* Restore PE */ 696 eeh_ops->configure_bridge(pe); 697 eeh_pe_restore_bars(pe); 698 699 /* Clear frozen state */ 700 rc = eeh_clear_pe_frozen_state(pe, false); 701 if (rc) { 702 pci_unlock_rescan_remove(); 703 return rc; 704 } 705 706 /* Give the system 5 seconds to finish running the user-space 707 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, 708 * this is a hack, but if we don't do this, and try to bring 709 * the device up before the scripts have taken it down, 710 * potentially weird things happen. 711 */ 712 if (!driver_eeh_aware || rmv_data->removed_dev_count) { 713 pr_info("EEH: Sleep 5s ahead of %s hotplug\n", 714 (driver_eeh_aware ? "partial" : "complete")); 715 ssleep(5); 716 717 /* 718 * The EEH device is still connected with its parent 719 * PE. We should disconnect it so the binding can be 720 * rebuilt when adding PCI devices. 721 */ 722 edev = list_first_entry(&pe->edevs, struct eeh_dev, entry); 723 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 724 if (pe->type & EEH_PE_VF) { 725 eeh_add_virt_device(edev); 726 } else { 727 if (!driver_eeh_aware) 728 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); 729 pci_hp_add_devices(bus); 730 } 731 } 732 eeh_pe_state_clear(pe, EEH_PE_KEEP, true); 733 734 pe->tstamp = tstamp; 735 pe->freeze_count = cnt; 736 737 pci_unlock_rescan_remove(); 738 return 0; 739 } 740 741 /* The longest amount of time to wait for a pci device 742 * to come back on line, in seconds. 743 */ 744 #define MAX_WAIT_FOR_RECOVERY 300 745 746 747 /* Walks the PE tree after processing an event to remove any stale PEs. 748 * 749 * NB: This needs to be recursive to ensure the leaf PEs get removed 750 * before their parents do. Although this is possible to do recursively 751 * we don't since this is easier to read and we need to garantee 752 * the leaf nodes will be handled first. 753 */ 754 static void eeh_pe_cleanup(struct eeh_pe *pe) 755 { 756 struct eeh_pe *child_pe, *tmp; 757 758 list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child) 759 eeh_pe_cleanup(child_pe); 760 761 if (pe->state & EEH_PE_KEEP) 762 return; 763 764 if (!(pe->state & EEH_PE_INVALID)) 765 return; 766 767 if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) { 768 list_del(&pe->child); 769 kfree(pe); 770 } 771 } 772 773 /** 774 * eeh_check_slot_presence - Check if a device is still present in a slot 775 * @pdev: pci_dev to check 776 * 777 * This function may return a false positive if we can't determine the slot's 778 * presence state. This might happen for for PCIe slots if the PE containing 779 * the upstream bridge is also frozen, or the bridge is part of the same PE 780 * as the device. 781 * 782 * This shouldn't happen often, but you might see it if you hotplug a PCIe 783 * switch. 784 */ 785 static bool eeh_slot_presence_check(struct pci_dev *pdev) 786 { 787 const struct hotplug_slot_ops *ops; 788 struct pci_slot *slot; 789 u8 state; 790 int rc; 791 792 if (!pdev) 793 return false; 794 795 if (pdev->error_state == pci_channel_io_perm_failure) 796 return false; 797 798 slot = pdev->slot; 799 if (!slot || !slot->hotplug) 800 return true; 801 802 ops = slot->hotplug->ops; 803 if (!ops || !ops->get_adapter_status) 804 return true; 805 806 /* set the attention indicator while we've got the slot ops */ 807 if (ops->set_attention_status) 808 ops->set_attention_status(slot->hotplug, 1); 809 810 rc = ops->get_adapter_status(slot->hotplug, &state); 811 if (rc) 812 return true; 813 814 return !!state; 815 } 816 817 static void eeh_clear_slot_attention(struct pci_dev *pdev) 818 { 819 const struct hotplug_slot_ops *ops; 820 struct pci_slot *slot; 821 822 if (!pdev) 823 return; 824 825 if (pdev->error_state == pci_channel_io_perm_failure) 826 return; 827 828 slot = pdev->slot; 829 if (!slot || !slot->hotplug) 830 return; 831 832 ops = slot->hotplug->ops; 833 if (!ops || !ops->set_attention_status) 834 return; 835 836 ops->set_attention_status(slot->hotplug, 0); 837 } 838 839 /** 840 * eeh_handle_normal_event - Handle EEH events on a specific PE 841 * @pe: EEH PE - which should not be used after we return, as it may 842 * have been invalidated. 843 * 844 * Attempts to recover the given PE. If recovery fails or the PE has failed 845 * too many times, remove the PE. 846 * 847 * While PHB detects address or data parity errors on particular PCI 848 * slot, the associated PE will be frozen. Besides, DMA's occurring 849 * to wild addresses (which usually happen due to bugs in device 850 * drivers or in PCI adapter firmware) can cause EEH error. #SERR, 851 * #PERR or other misc PCI-related errors also can trigger EEH errors. 852 * 853 * Recovery process consists of unplugging the device driver (which 854 * generated hotplug events to userspace), then issuing a PCI #RST to 855 * the device, then reconfiguring the PCI config space for all bridges 856 * & devices under this slot, and then finally restarting the device 857 * drivers (which cause a second set of hotplug events to go out to 858 * userspace). 859 */ 860 void eeh_handle_normal_event(struct eeh_pe *pe) 861 { 862 struct pci_bus *bus; 863 struct eeh_dev *edev, *tmp; 864 struct eeh_pe *tmp_pe; 865 int rc = 0; 866 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 867 struct eeh_rmv_data rmv_data = 868 {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; 869 int devices = 0; 870 871 bus = eeh_pe_bus_get(pe); 872 if (!bus) { 873 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", 874 __func__, pe->phb->global_number, pe->addr); 875 return; 876 } 877 878 /* 879 * When devices are hot-removed we might get an EEH due to 880 * a driver attempting to touch the MMIO space of a removed 881 * device. In this case we don't have a device to recover 882 * so suppress the event if we can't find any present devices. 883 * 884 * The hotplug driver should take care of tearing down the 885 * device itself. 886 */ 887 eeh_for_each_pe(pe, tmp_pe) 888 eeh_pe_for_each_dev(tmp_pe, edev, tmp) 889 if (eeh_slot_presence_check(edev->pdev)) 890 devices++; 891 892 if (!devices) { 893 pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", 894 pe->phb->global_number, pe->addr); 895 goto out; /* nothing to recover */ 896 } 897 898 /* Log the event */ 899 if (pe->type & EEH_PE_PHB) { 900 pr_err("EEH: PHB#%x failure detected, location: %s\n", 901 pe->phb->global_number, eeh_pe_loc_get(pe)); 902 } else { 903 struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb); 904 905 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 906 pe->phb->global_number, pe->addr); 907 pr_err("EEH: PE location: %s, PHB location: %s\n", 908 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 909 } 910 911 #ifdef CONFIG_STACKTRACE 912 /* 913 * Print the saved stack trace now that we've verified there's 914 * something to recover. 915 */ 916 if (pe->trace_entries) { 917 void **ptrs = (void **) pe->stack_trace; 918 int i; 919 920 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 921 pe->phb->global_number, pe->addr); 922 923 /* FIXME: Use the same format as dump_stack() */ 924 pr_err("EEH: Call Trace:\n"); 925 for (i = 0; i < pe->trace_entries; i++) 926 pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]); 927 928 pe->trace_entries = 0; 929 } 930 #endif /* CONFIG_STACKTRACE */ 931 932 eeh_pe_update_time_stamp(pe); 933 pe->freeze_count++; 934 if (pe->freeze_count > eeh_max_freezes) { 935 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", 936 pe->phb->global_number, pe->addr, 937 pe->freeze_count); 938 result = PCI_ERS_RESULT_DISCONNECT; 939 } 940 941 eeh_for_each_pe(pe, tmp_pe) 942 eeh_pe_for_each_dev(tmp_pe, edev, tmp) 943 edev->mode &= ~EEH_DEV_NO_HANDLER; 944 945 /* Walk the various device drivers attached to this slot through 946 * a reset sequence, giving each an opportunity to do what it needs 947 * to accomplish the reset. Each child gets a report of the 948 * status ... if any child can't handle the reset, then the entire 949 * slot is dlpar removed and added. 950 * 951 * When the PHB is fenced, we have to issue a reset to recover from 952 * the error. Override the result if necessary to have partially 953 * hotplug for this case. 954 */ 955 if (result != PCI_ERS_RESULT_DISCONNECT) { 956 pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", 957 pe->freeze_count, eeh_max_freezes); 958 pr_info("EEH: Notify device drivers to shutdown\n"); 959 eeh_set_channel_state(pe, pci_channel_io_frozen); 960 eeh_set_irq_state(pe, false); 961 eeh_pe_report("error_detected(IO frozen)", pe, 962 eeh_report_error, &result); 963 if ((pe->type & EEH_PE_PHB) && 964 result != PCI_ERS_RESULT_NONE && 965 result != PCI_ERS_RESULT_NEED_RESET) 966 result = PCI_ERS_RESULT_NEED_RESET; 967 } 968 969 /* Get the current PCI slot state. This can take a long time, 970 * sometimes over 300 seconds for certain systems. 971 */ 972 if (result != PCI_ERS_RESULT_DISCONNECT) { 973 rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 974 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 975 pr_warn("EEH: Permanent failure\n"); 976 result = PCI_ERS_RESULT_DISCONNECT; 977 } 978 } 979 980 /* Since rtas may enable MMIO when posting the error log, 981 * don't post the error log until after all dev drivers 982 * have been informed. 983 */ 984 if (result != PCI_ERS_RESULT_DISCONNECT) { 985 pr_info("EEH: Collect temporary log\n"); 986 eeh_slot_error_detail(pe, EEH_LOG_TEMP); 987 } 988 989 /* If all device drivers were EEH-unaware, then shut 990 * down all of the device drivers, and hope they 991 * go down willingly, without panicing the system. 992 */ 993 if (result == PCI_ERS_RESULT_NONE) { 994 pr_info("EEH: Reset with hotplug activity\n"); 995 rc = eeh_reset_device(pe, bus, NULL, false); 996 if (rc) { 997 pr_warn("%s: Unable to reset, err=%d\n", 998 __func__, rc); 999 result = PCI_ERS_RESULT_DISCONNECT; 1000 } 1001 } 1002 1003 /* If all devices reported they can proceed, then re-enable MMIO */ 1004 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 1005 pr_info("EEH: Enable I/O for affected devices\n"); 1006 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1007 1008 if (rc < 0) { 1009 result = PCI_ERS_RESULT_DISCONNECT; 1010 } else if (rc) { 1011 result = PCI_ERS_RESULT_NEED_RESET; 1012 } else { 1013 pr_info("EEH: Notify device drivers to resume I/O\n"); 1014 eeh_pe_report("mmio_enabled", pe, 1015 eeh_report_mmio_enabled, &result); 1016 } 1017 } 1018 1019 /* If all devices reported they can proceed, then re-enable DMA */ 1020 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 1021 pr_info("EEH: Enabled DMA for affected devices\n"); 1022 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1023 1024 if (rc < 0) { 1025 result = PCI_ERS_RESULT_DISCONNECT; 1026 } else if (rc) { 1027 result = PCI_ERS_RESULT_NEED_RESET; 1028 } else { 1029 /* 1030 * We didn't do PE reset for the case. The PE 1031 * is still in frozen state. Clear it before 1032 * resuming the PE. 1033 */ 1034 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 1035 result = PCI_ERS_RESULT_RECOVERED; 1036 } 1037 } 1038 1039 /* If any device called out for a reset, then reset the slot */ 1040 if (result == PCI_ERS_RESULT_NEED_RESET) { 1041 pr_info("EEH: Reset without hotplug activity\n"); 1042 rc = eeh_reset_device(pe, bus, &rmv_data, true); 1043 if (rc) { 1044 pr_warn("%s: Cannot reset, err=%d\n", 1045 __func__, rc); 1046 result = PCI_ERS_RESULT_DISCONNECT; 1047 } else { 1048 result = PCI_ERS_RESULT_NONE; 1049 eeh_set_channel_state(pe, pci_channel_io_normal); 1050 eeh_set_irq_state(pe, true); 1051 eeh_pe_report("slot_reset", pe, eeh_report_reset, 1052 &result); 1053 } 1054 } 1055 1056 if ((result == PCI_ERS_RESULT_RECOVERED) || 1057 (result == PCI_ERS_RESULT_NONE)) { 1058 /* 1059 * For those hot removed VFs, we should add back them after PF 1060 * get recovered properly. 1061 */ 1062 list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list, 1063 rmv_entry) { 1064 eeh_add_virt_device(edev); 1065 list_del(&edev->rmv_entry); 1066 } 1067 1068 /* Tell all device drivers that they can resume operations */ 1069 pr_info("EEH: Notify device driver to resume\n"); 1070 eeh_set_channel_state(pe, pci_channel_io_normal); 1071 eeh_set_irq_state(pe, true); 1072 eeh_pe_report("resume", pe, eeh_report_resume, NULL); 1073 eeh_for_each_pe(pe, tmp_pe) { 1074 eeh_pe_for_each_dev(tmp_pe, edev, tmp) { 1075 edev->mode &= ~EEH_DEV_NO_HANDLER; 1076 edev->in_error = false; 1077 } 1078 } 1079 1080 pr_info("EEH: Recovery successful.\n"); 1081 } else { 1082 /* 1083 * About 90% of all real-life EEH failures in the field 1084 * are due to poorly seated PCI cards. Only 10% or so are 1085 * due to actual, failed cards. 1086 */ 1087 pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" 1088 "Please try reseating or replacing it\n", 1089 pe->phb->global_number, pe->addr); 1090 1091 eeh_slot_error_detail(pe, EEH_LOG_PERM); 1092 1093 /* Notify all devices that they're about to go down. */ 1094 eeh_set_channel_state(pe, pci_channel_io_perm_failure); 1095 eeh_set_irq_state(pe, false); 1096 eeh_pe_report("error_detected(permanent failure)", pe, 1097 eeh_report_failure, NULL); 1098 1099 /* Mark the PE to be removed permanently */ 1100 eeh_pe_state_mark(pe, EEH_PE_REMOVED); 1101 1102 /* 1103 * Shut down the device drivers for good. We mark 1104 * all removed devices correctly to avoid access 1105 * the their PCI config any more. 1106 */ 1107 if (pe->type & EEH_PE_VF) { 1108 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 1109 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 1110 } else { 1111 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); 1112 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 1113 1114 pci_lock_rescan_remove(); 1115 pci_hp_remove_devices(bus); 1116 pci_unlock_rescan_remove(); 1117 /* The passed PE should no longer be used */ 1118 return; 1119 } 1120 } 1121 1122 out: 1123 /* 1124 * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING 1125 * we don't want to modify the PE tree structure so we do it here. 1126 */ 1127 eeh_pe_cleanup(pe); 1128 1129 /* clear the slot attention LED for all recovered devices */ 1130 eeh_for_each_pe(pe, tmp_pe) 1131 eeh_pe_for_each_dev(tmp_pe, edev, tmp) 1132 eeh_clear_slot_attention(edev->pdev); 1133 1134 eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); 1135 } 1136 1137 /** 1138 * eeh_handle_special_event - Handle EEH events without a specific failing PE 1139 * 1140 * Called when an EEH event is detected but can't be narrowed down to a 1141 * specific PE. Iterates through possible failures and handles them as 1142 * necessary. 1143 */ 1144 void eeh_handle_special_event(void) 1145 { 1146 struct eeh_pe *pe, *phb_pe, *tmp_pe; 1147 struct eeh_dev *edev, *tmp_edev; 1148 struct pci_bus *bus; 1149 struct pci_controller *hose; 1150 unsigned long flags; 1151 int rc; 1152 1153 1154 do { 1155 rc = eeh_ops->next_error(&pe); 1156 1157 switch (rc) { 1158 case EEH_NEXT_ERR_DEAD_IOC: 1159 /* Mark all PHBs in dead state */ 1160 eeh_serialize_lock(&flags); 1161 1162 /* Purge all events */ 1163 eeh_remove_event(NULL, true); 1164 1165 list_for_each_entry(hose, &hose_list, list_node) { 1166 phb_pe = eeh_phb_pe_get(hose); 1167 if (!phb_pe) continue; 1168 1169 eeh_pe_mark_isolated(phb_pe); 1170 } 1171 1172 eeh_serialize_unlock(flags); 1173 1174 break; 1175 case EEH_NEXT_ERR_FROZEN_PE: 1176 case EEH_NEXT_ERR_FENCED_PHB: 1177 case EEH_NEXT_ERR_DEAD_PHB: 1178 /* Mark the PE in fenced state */ 1179 eeh_serialize_lock(&flags); 1180 1181 /* Purge all events of the PHB */ 1182 eeh_remove_event(pe, true); 1183 1184 if (rc != EEH_NEXT_ERR_DEAD_PHB) 1185 eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 1186 eeh_pe_mark_isolated(pe); 1187 1188 eeh_serialize_unlock(flags); 1189 1190 break; 1191 case EEH_NEXT_ERR_NONE: 1192 return; 1193 default: 1194 pr_warn("%s: Invalid value %d from next_error()\n", 1195 __func__, rc); 1196 return; 1197 } 1198 1199 /* 1200 * For fenced PHB and frozen PE, it's handled as normal 1201 * event. We have to remove the affected PHBs for dead 1202 * PHB and IOC 1203 */ 1204 if (rc == EEH_NEXT_ERR_FROZEN_PE || 1205 rc == EEH_NEXT_ERR_FENCED_PHB) { 1206 eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 1207 eeh_handle_normal_event(pe); 1208 } else { 1209 pci_lock_rescan_remove(); 1210 list_for_each_entry(hose, &hose_list, list_node) { 1211 phb_pe = eeh_phb_pe_get(hose); 1212 if (!phb_pe || 1213 !(phb_pe->state & EEH_PE_ISOLATED) || 1214 (phb_pe->state & EEH_PE_RECOVERING)) 1215 continue; 1216 1217 eeh_for_each_pe(pe, tmp_pe) 1218 eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) 1219 edev->mode &= ~EEH_DEV_NO_HANDLER; 1220 1221 /* Notify all devices to be down */ 1222 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); 1223 eeh_set_channel_state(pe, pci_channel_io_perm_failure); 1224 eeh_pe_report( 1225 "error_detected(permanent failure)", pe, 1226 eeh_report_failure, NULL); 1227 bus = eeh_pe_bus_get(phb_pe); 1228 if (!bus) { 1229 pr_err("%s: Cannot find PCI bus for " 1230 "PHB#%x-PE#%x\n", 1231 __func__, 1232 pe->phb->global_number, 1233 pe->addr); 1234 break; 1235 } 1236 pci_hp_remove_devices(bus); 1237 } 1238 pci_unlock_rescan_remove(); 1239 } 1240 1241 /* 1242 * If we have detected dead IOC, we needn't proceed 1243 * any more since all PHBs would have been removed 1244 */ 1245 if (rc == EEH_NEXT_ERR_DEAD_IOC) 1246 break; 1247 } while (rc != EEH_NEXT_ERR_NONE); 1248 } 1249