1 /* 2 * PCI Error Recovery Driver for RPA-compliant PPC64 platform. 3 * Copyright IBM Corp. 2004 2005 4 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005 5 * 6 * All rights reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or (at 11 * your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 16 * NON INFRINGEMENT. See the GNU General Public License for more 17 * details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * 23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> 24 */ 25 #include <linux/delay.h> 26 #include <linux/interrupt.h> 27 #include <linux/irq.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 #include <asm/eeh.h> 31 #include <asm/eeh_event.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/pci-bridge.h> 34 #include <asm/prom.h> 35 #include <asm/rtas.h> 36 37 struct eeh_rmv_data { 38 struct list_head edev_list; 39 int removed; 40 }; 41 42 /** 43 * eeh_pcid_name - Retrieve name of PCI device driver 44 * @pdev: PCI device 45 * 46 * This routine is used to retrieve the name of PCI device driver 47 * if that's valid. 48 */ 49 static inline const char *eeh_pcid_name(struct pci_dev *pdev) 50 { 51 if (pdev && pdev->dev.driver) 52 return pdev->dev.driver->name; 53 return ""; 54 } 55 56 /** 57 * eeh_pcid_get - Get the PCI device driver 58 * @pdev: PCI device 59 * 60 * The function is used to retrieve the PCI device driver for 61 * the indicated PCI device. Besides, we will increase the reference 62 * of the PCI device driver to prevent that being unloaded on 63 * the fly. Otherwise, kernel crash would be seen. 64 */ 65 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) 66 { 67 if (!pdev || !pdev->driver) 68 return NULL; 69 70 if (!try_module_get(pdev->driver->driver.owner)) 71 return NULL; 72 73 return pdev->driver; 74 } 75 76 /** 77 * eeh_pcid_put - Dereference on the PCI device driver 78 * @pdev: PCI device 79 * 80 * The function is called to do dereference on the PCI device 81 * driver of the indicated PCI device. 82 */ 83 static inline void eeh_pcid_put(struct pci_dev *pdev) 84 { 85 if (!pdev || !pdev->driver) 86 return; 87 88 module_put(pdev->driver->driver.owner); 89 } 90 91 /** 92 * eeh_disable_irq - Disable interrupt for the recovering device 93 * @dev: PCI device 94 * 95 * This routine must be called when reporting temporary or permanent 96 * error to the particular PCI device to disable interrupt of that 97 * device. If the device has enabled MSI or MSI-X interrupt, we needn't 98 * do real work because EEH should freeze DMA transfers for those PCI 99 * devices encountering EEH errors, which includes MSI or MSI-X. 100 */ 101 static void eeh_disable_irq(struct pci_dev *dev) 102 { 103 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 104 105 /* Don't disable MSI and MSI-X interrupts. They are 106 * effectively disabled by the DMA Stopped state 107 * when an EEH error occurs. 108 */ 109 if (dev->msi_enabled || dev->msix_enabled) 110 return; 111 112 if (!irq_has_action(dev->irq)) 113 return; 114 115 edev->mode |= EEH_DEV_IRQ_DISABLED; 116 disable_irq_nosync(dev->irq); 117 } 118 119 /** 120 * eeh_enable_irq - Enable interrupt for the recovering device 121 * @dev: PCI device 122 * 123 * This routine must be called to enable interrupt while failed 124 * device could be resumed. 125 */ 126 static void eeh_enable_irq(struct pci_dev *dev) 127 { 128 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 129 130 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { 131 edev->mode &= ~EEH_DEV_IRQ_DISABLED; 132 /* 133 * FIXME !!!!! 134 * 135 * This is just ass backwards. This maze has 136 * unbalanced irq_enable/disable calls. So instead of 137 * finding the root cause it works around the warning 138 * in the irq_enable code by conditionally calling 139 * into it. 140 * 141 * That's just wrong.The warning in the core code is 142 * there to tell people to fix their asymmetries in 143 * their own code, not by abusing the core information 144 * to avoid it. 145 * 146 * I so wish that the assymetry would be the other way 147 * round and a few more irq_disable calls render that 148 * shit unusable forever. 149 * 150 * tglx 151 */ 152 if (irqd_irq_disabled(irq_get_irq_data(dev->irq))) 153 enable_irq(dev->irq); 154 } 155 } 156 157 static bool eeh_dev_removed(struct eeh_dev *edev) 158 { 159 /* EEH device removed ? */ 160 if (!edev || (edev->mode & EEH_DEV_REMOVED)) 161 return true; 162 163 return false; 164 } 165 166 static void *eeh_dev_save_state(void *data, void *userdata) 167 { 168 struct eeh_dev *edev = data; 169 struct pci_dev *pdev; 170 171 if (!edev) 172 return NULL; 173 174 /* 175 * We cannot access the config space on some adapters. 176 * Otherwise, it will cause fenced PHB. We don't save 177 * the content in their config space and will restore 178 * from the initial config space saved when the EEH 179 * device is created. 180 */ 181 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) 182 return NULL; 183 184 pdev = eeh_dev_to_pci_dev(edev); 185 if (!pdev) 186 return NULL; 187 188 pci_save_state(pdev); 189 return NULL; 190 } 191 192 /** 193 * eeh_report_error - Report pci error to each device driver 194 * @data: eeh device 195 * @userdata: return value 196 * 197 * Report an EEH error to each device driver, collect up and 198 * merge the device driver responses. Cumulative response 199 * passed back in "userdata". 200 */ 201 static void *eeh_report_error(void *data, void *userdata) 202 { 203 struct eeh_dev *edev = (struct eeh_dev *)data; 204 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 205 enum pci_ers_result rc, *res = userdata; 206 struct pci_driver *driver; 207 208 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 209 return NULL; 210 dev->error_state = pci_channel_io_frozen; 211 212 driver = eeh_pcid_get(dev); 213 if (!driver) return NULL; 214 215 eeh_disable_irq(dev); 216 217 if (!driver->err_handler || 218 !driver->err_handler->error_detected) { 219 eeh_pcid_put(dev); 220 return NULL; 221 } 222 223 rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); 224 225 /* A driver that needs a reset trumps all others */ 226 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 227 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 228 229 edev->in_error = true; 230 eeh_pcid_put(dev); 231 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); 232 return NULL; 233 } 234 235 /** 236 * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled 237 * @data: eeh device 238 * @userdata: return value 239 * 240 * Tells each device driver that IO ports, MMIO and config space I/O 241 * are now enabled. Collects up and merges the device driver responses. 242 * Cumulative response passed back in "userdata". 243 */ 244 static void *eeh_report_mmio_enabled(void *data, void *userdata) 245 { 246 struct eeh_dev *edev = (struct eeh_dev *)data; 247 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 248 enum pci_ers_result rc, *res = userdata; 249 struct pci_driver *driver; 250 251 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 252 return NULL; 253 254 driver = eeh_pcid_get(dev); 255 if (!driver) return NULL; 256 257 if (!driver->err_handler || 258 !driver->err_handler->mmio_enabled || 259 (edev->mode & EEH_DEV_NO_HANDLER)) { 260 eeh_pcid_put(dev); 261 return NULL; 262 } 263 264 rc = driver->err_handler->mmio_enabled(dev); 265 266 /* A driver that needs a reset trumps all others */ 267 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 268 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 269 270 eeh_pcid_put(dev); 271 return NULL; 272 } 273 274 /** 275 * eeh_report_reset - Tell device that slot has been reset 276 * @data: eeh device 277 * @userdata: return value 278 * 279 * This routine must be called while EEH tries to reset particular 280 * PCI device so that the associated PCI device driver could take 281 * some actions, usually to save data the driver needs so that the 282 * driver can work again while the device is recovered. 283 */ 284 static void *eeh_report_reset(void *data, void *userdata) 285 { 286 struct eeh_dev *edev = (struct eeh_dev *)data; 287 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 288 enum pci_ers_result rc, *res = userdata; 289 struct pci_driver *driver; 290 291 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 292 return NULL; 293 dev->error_state = pci_channel_io_normal; 294 295 driver = eeh_pcid_get(dev); 296 if (!driver) return NULL; 297 298 eeh_enable_irq(dev); 299 300 if (!driver->err_handler || 301 !driver->err_handler->slot_reset || 302 (edev->mode & EEH_DEV_NO_HANDLER) || 303 (!edev->in_error)) { 304 eeh_pcid_put(dev); 305 return NULL; 306 } 307 308 rc = driver->err_handler->slot_reset(dev); 309 if ((*res == PCI_ERS_RESULT_NONE) || 310 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; 311 if (*res == PCI_ERS_RESULT_DISCONNECT && 312 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 313 314 eeh_pcid_put(dev); 315 return NULL; 316 } 317 318 static void *eeh_dev_restore_state(void *data, void *userdata) 319 { 320 struct eeh_dev *edev = data; 321 struct pci_dev *pdev; 322 323 if (!edev) 324 return NULL; 325 326 /* 327 * The content in the config space isn't saved because 328 * the blocked config space on some adapters. We have 329 * to restore the initial saved config space when the 330 * EEH device is created. 331 */ 332 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) { 333 if (list_is_last(&edev->list, &edev->pe->edevs)) 334 eeh_pe_restore_bars(edev->pe); 335 336 return NULL; 337 } 338 339 pdev = eeh_dev_to_pci_dev(edev); 340 if (!pdev) 341 return NULL; 342 343 pci_restore_state(pdev); 344 return NULL; 345 } 346 347 /** 348 * eeh_report_resume - Tell device to resume normal operations 349 * @data: eeh device 350 * @userdata: return value 351 * 352 * This routine must be called to notify the device driver that it 353 * could resume so that the device driver can do some initialization 354 * to make the recovered device work again. 355 */ 356 static void *eeh_report_resume(void *data, void *userdata) 357 { 358 struct eeh_dev *edev = (struct eeh_dev *)data; 359 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 360 bool was_in_error; 361 struct pci_driver *driver; 362 363 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 364 return NULL; 365 dev->error_state = pci_channel_io_normal; 366 367 driver = eeh_pcid_get(dev); 368 if (!driver) return NULL; 369 370 was_in_error = edev->in_error; 371 edev->in_error = false; 372 eeh_enable_irq(dev); 373 374 if (!driver->err_handler || 375 !driver->err_handler->resume || 376 (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { 377 edev->mode &= ~EEH_DEV_NO_HANDLER; 378 eeh_pcid_put(dev); 379 return NULL; 380 } 381 382 driver->err_handler->resume(dev); 383 384 eeh_pcid_put(dev); 385 pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); 386 #ifdef CONFIG_PCI_IOV 387 if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) 388 eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); 389 #endif 390 return NULL; 391 } 392 393 /** 394 * eeh_report_failure - Tell device driver that device is dead. 395 * @data: eeh device 396 * @userdata: return value 397 * 398 * This informs the device driver that the device is permanently 399 * dead, and that no further recovery attempts will be made on it. 400 */ 401 static void *eeh_report_failure(void *data, void *userdata) 402 { 403 struct eeh_dev *edev = (struct eeh_dev *)data; 404 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 405 struct pci_driver *driver; 406 407 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 408 return NULL; 409 dev->error_state = pci_channel_io_perm_failure; 410 411 driver = eeh_pcid_get(dev); 412 if (!driver) return NULL; 413 414 eeh_disable_irq(dev); 415 416 if (!driver->err_handler || 417 !driver->err_handler->error_detected) { 418 eeh_pcid_put(dev); 419 return NULL; 420 } 421 422 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); 423 424 eeh_pcid_put(dev); 425 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); 426 return NULL; 427 } 428 429 static void *eeh_add_virt_device(void *data, void *userdata) 430 { 431 struct pci_driver *driver; 432 struct eeh_dev *edev = (struct eeh_dev *)data; 433 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 434 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 435 436 if (!(edev->physfn)) { 437 pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", 438 __func__, pdn->phb->global_number, pdn->busno, 439 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 440 return NULL; 441 } 442 443 driver = eeh_pcid_get(dev); 444 if (driver) { 445 eeh_pcid_put(dev); 446 if (driver->err_handler) 447 return NULL; 448 } 449 450 #ifdef CONFIG_PCI_IOV 451 pci_iov_add_virtfn(edev->physfn, pdn->vf_index); 452 #endif 453 return NULL; 454 } 455 456 static void *eeh_rmv_device(void *data, void *userdata) 457 { 458 struct pci_driver *driver; 459 struct eeh_dev *edev = (struct eeh_dev *)data; 460 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 461 struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; 462 int *removed = rmv_data ? &rmv_data->removed : NULL; 463 464 /* 465 * Actually, we should remove the PCI bridges as well. 466 * However, that's lots of complexity to do that, 467 * particularly some of devices under the bridge might 468 * support EEH. So we just care about PCI devices for 469 * simplicity here. 470 */ 471 if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) 472 return NULL; 473 474 /* 475 * We rely on count-based pcibios_release_device() to 476 * detach permanently offlined PEs. Unfortunately, that's 477 * not reliable enough. We might have the permanently 478 * offlined PEs attached, but we needn't take care of 479 * them and their child devices. 480 */ 481 if (eeh_dev_removed(edev)) 482 return NULL; 483 484 driver = eeh_pcid_get(dev); 485 if (driver) { 486 eeh_pcid_put(dev); 487 if (removed && 488 eeh_pe_passed(edev->pe)) 489 return NULL; 490 if (removed && 491 driver->err_handler && 492 driver->err_handler->error_detected && 493 driver->err_handler->slot_reset) 494 return NULL; 495 } 496 497 /* Remove it from PCI subsystem */ 498 pr_debug("EEH: Removing %s without EEH sensitive driver\n", 499 pci_name(dev)); 500 edev->bus = dev->bus; 501 edev->mode |= EEH_DEV_DISCONNECTED; 502 if (removed) 503 (*removed)++; 504 505 if (edev->physfn) { 506 #ifdef CONFIG_PCI_IOV 507 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 508 509 pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); 510 edev->pdev = NULL; 511 512 /* 513 * We have to set the VF PE number to invalid one, which is 514 * required to plug the VF successfully. 515 */ 516 pdn->pe_number = IODA_INVALID_PE; 517 #endif 518 if (rmv_data) 519 list_add(&edev->rmv_list, &rmv_data->edev_list); 520 } else { 521 pci_lock_rescan_remove(); 522 pci_stop_and_remove_bus_device(dev); 523 pci_unlock_rescan_remove(); 524 } 525 526 return NULL; 527 } 528 529 static void *eeh_pe_detach_dev(void *data, void *userdata) 530 { 531 struct eeh_pe *pe = (struct eeh_pe *)data; 532 struct eeh_dev *edev, *tmp; 533 534 eeh_pe_for_each_dev(pe, edev, tmp) { 535 if (!(edev->mode & EEH_DEV_DISCONNECTED)) 536 continue; 537 538 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); 539 eeh_rmv_from_parent_pe(edev); 540 } 541 542 return NULL; 543 } 544 545 /* 546 * Explicitly clear PE's frozen state for PowerNV where 547 * we have frozen PE until BAR restore is completed. It's 548 * harmless to clear it for pSeries. To be consistent with 549 * PE reset (for 3 times), we try to clear the frozen state 550 * for 3 times as well. 551 */ 552 static void *__eeh_clear_pe_frozen_state(void *data, void *flag) 553 { 554 struct eeh_pe *pe = (struct eeh_pe *)data; 555 bool clear_sw_state = *(bool *)flag; 556 int i, rc = 1; 557 558 for (i = 0; rc && i < 3; i++) 559 rc = eeh_unfreeze_pe(pe, clear_sw_state); 560 561 /* Stop immediately on any errors */ 562 if (rc) { 563 pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", 564 __func__, rc, pe->phb->global_number, pe->addr); 565 return (void *)pe; 566 } 567 568 return NULL; 569 } 570 571 static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, 572 bool clear_sw_state) 573 { 574 void *rc; 575 576 rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); 577 if (!rc) 578 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 579 580 return rc ? -EIO : 0; 581 } 582 583 int eeh_pe_reset_and_recover(struct eeh_pe *pe) 584 { 585 int ret; 586 587 /* Bail if the PE is being recovered */ 588 if (pe->state & EEH_PE_RECOVERING) 589 return 0; 590 591 /* Put the PE into recovery mode */ 592 eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 593 594 /* Save states */ 595 eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); 596 597 /* Issue reset */ 598 ret = eeh_pe_reset_full(pe); 599 if (ret) { 600 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 601 return ret; 602 } 603 604 /* Unfreeze the PE */ 605 ret = eeh_clear_pe_frozen_state(pe, true); 606 if (ret) { 607 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 608 return ret; 609 } 610 611 /* Restore device state */ 612 eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); 613 614 /* Clear recovery mode */ 615 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 616 617 return 0; 618 } 619 620 /** 621 * eeh_reset_device - Perform actual reset of a pci slot 622 * @pe: EEH PE 623 * @bus: PCI bus corresponding to the isolcated slot 624 * 625 * This routine must be called to do reset on the indicated PE. 626 * During the reset, udev might be invoked because those affected 627 * PCI devices will be removed and then added. 628 */ 629 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, 630 struct eeh_rmv_data *rmv_data) 631 { 632 struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); 633 time64_t tstamp; 634 int cnt, rc; 635 struct eeh_dev *edev; 636 637 /* pcibios will clear the counter; save the value */ 638 cnt = pe->freeze_count; 639 tstamp = pe->tstamp; 640 641 /* 642 * We don't remove the corresponding PE instances because 643 * we need the information afterwords. The attached EEH 644 * devices are expected to be attached soon when calling 645 * into pci_hp_add_devices(). 646 */ 647 eeh_pe_state_mark(pe, EEH_PE_KEEP); 648 if (bus) { 649 if (pe->type & EEH_PE_VF) { 650 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 651 } else { 652 pci_lock_rescan_remove(); 653 pci_hp_remove_devices(bus); 654 pci_unlock_rescan_remove(); 655 } 656 } else if (frozen_bus) { 657 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); 658 } 659 660 /* 661 * Reset the pci controller. (Asserts RST#; resets config space). 662 * Reconfigure bridges and devices. Don't try to bring the system 663 * up if the reset failed for some reason. 664 * 665 * During the reset, it's very dangerous to have uncontrolled PCI 666 * config accesses. So we prefer to block them. However, controlled 667 * PCI config accesses initiated from EEH itself are allowed. 668 */ 669 rc = eeh_pe_reset_full(pe); 670 if (rc) 671 return rc; 672 673 pci_lock_rescan_remove(); 674 675 /* Restore PE */ 676 eeh_ops->configure_bridge(pe); 677 eeh_pe_restore_bars(pe); 678 679 /* Clear frozen state */ 680 rc = eeh_clear_pe_frozen_state(pe, false); 681 if (rc) { 682 pci_unlock_rescan_remove(); 683 return rc; 684 } 685 686 /* Give the system 5 seconds to finish running the user-space 687 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, 688 * this is a hack, but if we don't do this, and try to bring 689 * the device up before the scripts have taken it down, 690 * potentially weird things happen. 691 */ 692 if (bus) { 693 pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); 694 ssleep(5); 695 696 /* 697 * The EEH device is still connected with its parent 698 * PE. We should disconnect it so the binding can be 699 * rebuilt when adding PCI devices. 700 */ 701 edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 702 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 703 if (pe->type & EEH_PE_VF) { 704 eeh_add_virt_device(edev, NULL); 705 } else { 706 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 707 pci_hp_add_devices(bus); 708 } 709 } else if (frozen_bus && rmv_data->removed) { 710 pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); 711 ssleep(5); 712 713 edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 714 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 715 if (pe->type & EEH_PE_VF) 716 eeh_add_virt_device(edev, NULL); 717 else 718 pci_hp_add_devices(frozen_bus); 719 } 720 eeh_pe_state_clear(pe, EEH_PE_KEEP); 721 722 pe->tstamp = tstamp; 723 pe->freeze_count = cnt; 724 725 pci_unlock_rescan_remove(); 726 return 0; 727 } 728 729 /* The longest amount of time to wait for a pci device 730 * to come back on line, in seconds. 731 */ 732 #define MAX_WAIT_FOR_RECOVERY 300 733 734 /** 735 * eeh_handle_normal_event - Handle EEH events on a specific PE 736 * @pe: EEH PE 737 * 738 * Attempts to recover the given PE. If recovery fails or the PE has failed 739 * too many times, remove the PE. 740 * 741 * Returns true if @pe should no longer be used, else false. 742 */ 743 static bool eeh_handle_normal_event(struct eeh_pe *pe) 744 { 745 struct pci_bus *frozen_bus; 746 struct eeh_dev *edev, *tmp; 747 int rc = 0; 748 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 749 struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; 750 751 frozen_bus = eeh_pe_bus_get(pe); 752 if (!frozen_bus) { 753 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", 754 __func__, pe->phb->global_number, pe->addr); 755 return false; 756 } 757 758 eeh_pe_update_time_stamp(pe); 759 pe->freeze_count++; 760 if (pe->freeze_count > eeh_max_freezes) { 761 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" 762 "last hour and has been permanently disabled.\n", 763 pe->phb->global_number, pe->addr, 764 pe->freeze_count); 765 goto hard_fail; 766 } 767 pr_warn("EEH: This PCI device has failed %d times in the last hour\n", 768 pe->freeze_count); 769 770 /* Walk the various device drivers attached to this slot through 771 * a reset sequence, giving each an opportunity to do what it needs 772 * to accomplish the reset. Each child gets a report of the 773 * status ... if any child can't handle the reset, then the entire 774 * slot is dlpar removed and added. 775 * 776 * When the PHB is fenced, we have to issue a reset to recover from 777 * the error. Override the result if necessary to have partially 778 * hotplug for this case. 779 */ 780 pr_info("EEH: Notify device drivers to shutdown\n"); 781 eeh_pe_dev_traverse(pe, eeh_report_error, &result); 782 if ((pe->type & EEH_PE_PHB) && 783 result != PCI_ERS_RESULT_NONE && 784 result != PCI_ERS_RESULT_NEED_RESET) 785 result = PCI_ERS_RESULT_NEED_RESET; 786 787 /* Get the current PCI slot state. This can take a long time, 788 * sometimes over 300 seconds for certain systems. 789 */ 790 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 791 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 792 pr_warn("EEH: Permanent failure\n"); 793 goto hard_fail; 794 } 795 796 /* Since rtas may enable MMIO when posting the error log, 797 * don't post the error log until after all dev drivers 798 * have been informed. 799 */ 800 pr_info("EEH: Collect temporary log\n"); 801 eeh_slot_error_detail(pe, EEH_LOG_TEMP); 802 803 /* If all device drivers were EEH-unaware, then shut 804 * down all of the device drivers, and hope they 805 * go down willingly, without panicing the system. 806 */ 807 if (result == PCI_ERS_RESULT_NONE) { 808 pr_info("EEH: Reset with hotplug activity\n"); 809 rc = eeh_reset_device(pe, frozen_bus, NULL); 810 if (rc) { 811 pr_warn("%s: Unable to reset, err=%d\n", 812 __func__, rc); 813 goto hard_fail; 814 } 815 } 816 817 /* If all devices reported they can proceed, then re-enable MMIO */ 818 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 819 pr_info("EEH: Enable I/O for affected devices\n"); 820 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 821 822 if (rc < 0) 823 goto hard_fail; 824 if (rc) { 825 result = PCI_ERS_RESULT_NEED_RESET; 826 } else { 827 pr_info("EEH: Notify device drivers to resume I/O\n"); 828 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); 829 } 830 } 831 832 /* If all devices reported they can proceed, then re-enable DMA */ 833 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 834 pr_info("EEH: Enabled DMA for affected devices\n"); 835 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 836 837 if (rc < 0) 838 goto hard_fail; 839 if (rc) { 840 result = PCI_ERS_RESULT_NEED_RESET; 841 } else { 842 /* 843 * We didn't do PE reset for the case. The PE 844 * is still in frozen state. Clear it before 845 * resuming the PE. 846 */ 847 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 848 result = PCI_ERS_RESULT_RECOVERED; 849 } 850 } 851 852 /* If any device has a hard failure, then shut off everything. */ 853 if (result == PCI_ERS_RESULT_DISCONNECT) { 854 pr_warn("EEH: Device driver gave up\n"); 855 goto hard_fail; 856 } 857 858 /* If any device called out for a reset, then reset the slot */ 859 if (result == PCI_ERS_RESULT_NEED_RESET) { 860 pr_info("EEH: Reset without hotplug activity\n"); 861 rc = eeh_reset_device(pe, NULL, &rmv_data); 862 if (rc) { 863 pr_warn("%s: Cannot reset, err=%d\n", 864 __func__, rc); 865 goto hard_fail; 866 } 867 868 pr_info("EEH: Notify device drivers " 869 "the completion of reset\n"); 870 result = PCI_ERS_RESULT_NONE; 871 eeh_pe_dev_traverse(pe, eeh_report_reset, &result); 872 } 873 874 /* All devices should claim they have recovered by now. */ 875 if ((result != PCI_ERS_RESULT_RECOVERED) && 876 (result != PCI_ERS_RESULT_NONE)) { 877 pr_warn("EEH: Not recovered\n"); 878 goto hard_fail; 879 } 880 881 /* 882 * For those hot removed VFs, we should add back them after PF get 883 * recovered properly. 884 */ 885 list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { 886 eeh_add_virt_device(edev, NULL); 887 list_del(&edev->rmv_list); 888 } 889 890 /* Tell all device drivers that they can resume operations */ 891 pr_info("EEH: Notify device driver to resume\n"); 892 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); 893 894 return false; 895 896 hard_fail: 897 /* 898 * About 90% of all real-life EEH failures in the field 899 * are due to poorly seated PCI cards. Only 10% or so are 900 * due to actual, failed cards. 901 */ 902 pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" 903 "Please try reseating or replacing it\n", 904 pe->phb->global_number, pe->addr); 905 906 eeh_slot_error_detail(pe, EEH_LOG_PERM); 907 908 /* Notify all devices that they're about to go down. */ 909 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); 910 911 /* Mark the PE to be removed permanently */ 912 eeh_pe_state_mark(pe, EEH_PE_REMOVED); 913 914 /* 915 * Shut down the device drivers for good. We mark 916 * all removed devices correctly to avoid access 917 * the their PCI config any more. 918 */ 919 if (frozen_bus) { 920 if (pe->type & EEH_PE_VF) { 921 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 922 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 923 } else { 924 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 925 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 926 927 pci_lock_rescan_remove(); 928 pci_hp_remove_devices(frozen_bus); 929 pci_unlock_rescan_remove(); 930 931 /* The passed PE should no longer be used */ 932 return true; 933 } 934 } 935 return false; 936 } 937 938 /** 939 * eeh_handle_special_event - Handle EEH events without a specific failing PE 940 * 941 * Called when an EEH event is detected but can't be narrowed down to a 942 * specific PE. Iterates through possible failures and handles them as 943 * necessary. 944 */ 945 static void eeh_handle_special_event(void) 946 { 947 struct eeh_pe *pe, *phb_pe; 948 struct pci_bus *bus; 949 struct pci_controller *hose; 950 unsigned long flags; 951 int rc; 952 953 954 do { 955 rc = eeh_ops->next_error(&pe); 956 957 switch (rc) { 958 case EEH_NEXT_ERR_DEAD_IOC: 959 /* Mark all PHBs in dead state */ 960 eeh_serialize_lock(&flags); 961 962 /* Purge all events */ 963 eeh_remove_event(NULL, true); 964 965 list_for_each_entry(hose, &hose_list, list_node) { 966 phb_pe = eeh_phb_pe_get(hose); 967 if (!phb_pe) continue; 968 969 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 970 } 971 972 eeh_serialize_unlock(flags); 973 974 break; 975 case EEH_NEXT_ERR_FROZEN_PE: 976 case EEH_NEXT_ERR_FENCED_PHB: 977 case EEH_NEXT_ERR_DEAD_PHB: 978 /* Mark the PE in fenced state */ 979 eeh_serialize_lock(&flags); 980 981 /* Purge all events of the PHB */ 982 eeh_remove_event(pe, true); 983 984 if (rc == EEH_NEXT_ERR_DEAD_PHB) 985 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 986 else 987 eeh_pe_state_mark(pe, 988 EEH_PE_ISOLATED | EEH_PE_RECOVERING); 989 990 eeh_serialize_unlock(flags); 991 992 break; 993 case EEH_NEXT_ERR_NONE: 994 return; 995 default: 996 pr_warn("%s: Invalid value %d from next_error()\n", 997 __func__, rc); 998 return; 999 } 1000 1001 /* 1002 * For fenced PHB and frozen PE, it's handled as normal 1003 * event. We have to remove the affected PHBs for dead 1004 * PHB and IOC 1005 */ 1006 if (rc == EEH_NEXT_ERR_FROZEN_PE || 1007 rc == EEH_NEXT_ERR_FENCED_PHB) { 1008 /* 1009 * eeh_handle_normal_event() can make the PE stale if it 1010 * determines that the PE cannot possibly be recovered. 1011 * Don't modify the PE state if that's the case. 1012 */ 1013 if (eeh_handle_normal_event(pe)) 1014 continue; 1015 1016 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 1017 } else { 1018 pci_lock_rescan_remove(); 1019 list_for_each_entry(hose, &hose_list, list_node) { 1020 phb_pe = eeh_phb_pe_get(hose); 1021 if (!phb_pe || 1022 !(phb_pe->state & EEH_PE_ISOLATED) || 1023 (phb_pe->state & EEH_PE_RECOVERING)) 1024 continue; 1025 1026 /* Notify all devices to be down */ 1027 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 1028 eeh_pe_dev_traverse(pe, 1029 eeh_report_failure, NULL); 1030 bus = eeh_pe_bus_get(phb_pe); 1031 if (!bus) { 1032 pr_err("%s: Cannot find PCI bus for " 1033 "PHB#%x-PE#%x\n", 1034 __func__, 1035 pe->phb->global_number, 1036 pe->addr); 1037 break; 1038 } 1039 pci_hp_remove_devices(bus); 1040 } 1041 pci_unlock_rescan_remove(); 1042 } 1043 1044 /* 1045 * If we have detected dead IOC, we needn't proceed 1046 * any more since all PHBs would have been removed 1047 */ 1048 if (rc == EEH_NEXT_ERR_DEAD_IOC) 1049 break; 1050 } while (rc != EEH_NEXT_ERR_NONE); 1051 } 1052 1053 /** 1054 * eeh_handle_event - Reset a PCI device after hard lockup. 1055 * @pe: EEH PE 1056 * 1057 * While PHB detects address or data parity errors on particular PCI 1058 * slot, the associated PE will be frozen. Besides, DMA's occurring 1059 * to wild addresses (which usually happen due to bugs in device 1060 * drivers or in PCI adapter firmware) can cause EEH error. #SERR, 1061 * #PERR or other misc PCI-related errors also can trigger EEH errors. 1062 * 1063 * Recovery process consists of unplugging the device driver (which 1064 * generated hotplug events to userspace), then issuing a PCI #RST to 1065 * the device, then reconfiguring the PCI config space for all bridges 1066 * & devices under this slot, and then finally restarting the device 1067 * drivers (which cause a second set of hotplug events to go out to 1068 * userspace). 1069 */ 1070 void eeh_handle_event(struct eeh_pe *pe) 1071 { 1072 if (pe) 1073 eeh_handle_normal_event(pe); 1074 else 1075 eeh_handle_special_event(); 1076 } 1077