1 /* 2 * PCI Error Recovery Driver for RPA-compliant PPC64 platform. 3 * Copyright IBM Corp. 2004 2005 4 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005 5 * 6 * All rights reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or (at 11 * your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 16 * NON INFRINGEMENT. See the GNU General Public License for more 17 * details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * 23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> 24 */ 25 #include <linux/delay.h> 26 #include <linux/interrupt.h> 27 #include <linux/irq.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 #include <asm/eeh.h> 31 #include <asm/eeh_event.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/pci-bridge.h> 34 #include <asm/prom.h> 35 #include <asm/rtas.h> 36 37 struct eeh_rmv_data { 38 struct list_head edev_list; 39 int removed; 40 }; 41 42 /** 43 * eeh_pcid_name - Retrieve name of PCI device driver 44 * @pdev: PCI device 45 * 46 * This routine is used to retrieve the name of PCI device driver 47 * if that's valid. 48 */ 49 static inline const char *eeh_pcid_name(struct pci_dev *pdev) 50 { 51 if (pdev && pdev->dev.driver) 52 return pdev->dev.driver->name; 53 return ""; 54 } 55 56 /** 57 * eeh_pcid_get - Get the PCI device driver 58 * @pdev: PCI device 59 * 60 * The function is used to retrieve the PCI device driver for 61 * the indicated PCI device. Besides, we will increase the reference 62 * of the PCI device driver to prevent that being unloaded on 63 * the fly. Otherwise, kernel crash would be seen. 64 */ 65 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) 66 { 67 if (!pdev || !pdev->driver) 68 return NULL; 69 70 if (!try_module_get(pdev->driver->driver.owner)) 71 return NULL; 72 73 return pdev->driver; 74 } 75 76 /** 77 * eeh_pcid_put - Dereference on the PCI device driver 78 * @pdev: PCI device 79 * 80 * The function is called to do dereference on the PCI device 81 * driver of the indicated PCI device. 82 */ 83 static inline void eeh_pcid_put(struct pci_dev *pdev) 84 { 85 if (!pdev || !pdev->driver) 86 return; 87 88 module_put(pdev->driver->driver.owner); 89 } 90 91 /** 92 * eeh_disable_irq - Disable interrupt for the recovering device 93 * @dev: PCI device 94 * 95 * This routine must be called when reporting temporary or permanent 96 * error to the particular PCI device to disable interrupt of that 97 * device. If the device has enabled MSI or MSI-X interrupt, we needn't 98 * do real work because EEH should freeze DMA transfers for those PCI 99 * devices encountering EEH errors, which includes MSI or MSI-X. 100 */ 101 static void eeh_disable_irq(struct pci_dev *dev) 102 { 103 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 104 105 /* Don't disable MSI and MSI-X interrupts. They are 106 * effectively disabled by the DMA Stopped state 107 * when an EEH error occurs. 108 */ 109 if (dev->msi_enabled || dev->msix_enabled) 110 return; 111 112 if (!irq_has_action(dev->irq)) 113 return; 114 115 edev->mode |= EEH_DEV_IRQ_DISABLED; 116 disable_irq_nosync(dev->irq); 117 } 118 119 /** 120 * eeh_enable_irq - Enable interrupt for the recovering device 121 * @dev: PCI device 122 * 123 * This routine must be called to enable interrupt while failed 124 * device could be resumed. 125 */ 126 static void eeh_enable_irq(struct pci_dev *dev) 127 { 128 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 129 130 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { 131 edev->mode &= ~EEH_DEV_IRQ_DISABLED; 132 /* 133 * FIXME !!!!! 134 * 135 * This is just ass backwards. This maze has 136 * unbalanced irq_enable/disable calls. So instead of 137 * finding the root cause it works around the warning 138 * in the irq_enable code by conditionally calling 139 * into it. 140 * 141 * That's just wrong.The warning in the core code is 142 * there to tell people to fix their asymmetries in 143 * their own code, not by abusing the core information 144 * to avoid it. 145 * 146 * I so wish that the assymetry would be the other way 147 * round and a few more irq_disable calls render that 148 * shit unusable forever. 149 * 150 * tglx 151 */ 152 if (irqd_irq_disabled(irq_get_irq_data(dev->irq))) 153 enable_irq(dev->irq); 154 } 155 } 156 157 static bool eeh_dev_removed(struct eeh_dev *edev) 158 { 159 /* EEH device removed ? */ 160 if (!edev || (edev->mode & EEH_DEV_REMOVED)) 161 return true; 162 163 return false; 164 } 165 166 static void *eeh_dev_save_state(void *data, void *userdata) 167 { 168 struct eeh_dev *edev = data; 169 struct pci_dev *pdev; 170 171 if (!edev) 172 return NULL; 173 174 /* 175 * We cannot access the config space on some adapters. 176 * Otherwise, it will cause fenced PHB. We don't save 177 * the content in their config space and will restore 178 * from the initial config space saved when the EEH 179 * device is created. 180 */ 181 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) 182 return NULL; 183 184 pdev = eeh_dev_to_pci_dev(edev); 185 if (!pdev) 186 return NULL; 187 188 pci_save_state(pdev); 189 return NULL; 190 } 191 192 /** 193 * eeh_report_error - Report pci error to each device driver 194 * @data: eeh device 195 * @userdata: return value 196 * 197 * Report an EEH error to each device driver, collect up and 198 * merge the device driver responses. Cumulative response 199 * passed back in "userdata". 200 */ 201 static void *eeh_report_error(void *data, void *userdata) 202 { 203 struct eeh_dev *edev = (struct eeh_dev *)data; 204 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 205 enum pci_ers_result rc, *res = userdata; 206 struct pci_driver *driver; 207 208 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 209 return NULL; 210 dev->error_state = pci_channel_io_frozen; 211 212 driver = eeh_pcid_get(dev); 213 if (!driver) return NULL; 214 215 eeh_disable_irq(dev); 216 217 if (!driver->err_handler || 218 !driver->err_handler->error_detected) { 219 eeh_pcid_put(dev); 220 return NULL; 221 } 222 223 rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); 224 225 /* A driver that needs a reset trumps all others */ 226 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 227 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 228 229 edev->in_error = true; 230 eeh_pcid_put(dev); 231 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); 232 return NULL; 233 } 234 235 /** 236 * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled 237 * @data: eeh device 238 * @userdata: return value 239 * 240 * Tells each device driver that IO ports, MMIO and config space I/O 241 * are now enabled. Collects up and merges the device driver responses. 242 * Cumulative response passed back in "userdata". 243 */ 244 static void *eeh_report_mmio_enabled(void *data, void *userdata) 245 { 246 struct eeh_dev *edev = (struct eeh_dev *)data; 247 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 248 enum pci_ers_result rc, *res = userdata; 249 struct pci_driver *driver; 250 251 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 252 return NULL; 253 254 driver = eeh_pcid_get(dev); 255 if (!driver) return NULL; 256 257 if (!driver->err_handler || 258 !driver->err_handler->mmio_enabled || 259 (edev->mode & EEH_DEV_NO_HANDLER)) { 260 eeh_pcid_put(dev); 261 return NULL; 262 } 263 264 rc = driver->err_handler->mmio_enabled(dev); 265 266 /* A driver that needs a reset trumps all others */ 267 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 268 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 269 270 eeh_pcid_put(dev); 271 return NULL; 272 } 273 274 /** 275 * eeh_report_reset - Tell device that slot has been reset 276 * @data: eeh device 277 * @userdata: return value 278 * 279 * This routine must be called while EEH tries to reset particular 280 * PCI device so that the associated PCI device driver could take 281 * some actions, usually to save data the driver needs so that the 282 * driver can work again while the device is recovered. 283 */ 284 static void *eeh_report_reset(void *data, void *userdata) 285 { 286 struct eeh_dev *edev = (struct eeh_dev *)data; 287 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 288 enum pci_ers_result rc, *res = userdata; 289 struct pci_driver *driver; 290 291 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 292 return NULL; 293 dev->error_state = pci_channel_io_normal; 294 295 driver = eeh_pcid_get(dev); 296 if (!driver) return NULL; 297 298 eeh_enable_irq(dev); 299 300 if (!driver->err_handler || 301 !driver->err_handler->slot_reset || 302 (edev->mode & EEH_DEV_NO_HANDLER) || 303 (!edev->in_error)) { 304 eeh_pcid_put(dev); 305 return NULL; 306 } 307 308 rc = driver->err_handler->slot_reset(dev); 309 if ((*res == PCI_ERS_RESULT_NONE) || 310 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; 311 if (*res == PCI_ERS_RESULT_DISCONNECT && 312 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 313 314 eeh_pcid_put(dev); 315 return NULL; 316 } 317 318 static void *eeh_dev_restore_state(void *data, void *userdata) 319 { 320 struct eeh_dev *edev = data; 321 struct pci_dev *pdev; 322 323 if (!edev) 324 return NULL; 325 326 /* 327 * The content in the config space isn't saved because 328 * the blocked config space on some adapters. We have 329 * to restore the initial saved config space when the 330 * EEH device is created. 331 */ 332 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) { 333 if (list_is_last(&edev->list, &edev->pe->edevs)) 334 eeh_pe_restore_bars(edev->pe); 335 336 return NULL; 337 } 338 339 pdev = eeh_dev_to_pci_dev(edev); 340 if (!pdev) 341 return NULL; 342 343 pci_restore_state(pdev); 344 return NULL; 345 } 346 347 /** 348 * eeh_report_resume - Tell device to resume normal operations 349 * @data: eeh device 350 * @userdata: return value 351 * 352 * This routine must be called to notify the device driver that it 353 * could resume so that the device driver can do some initialization 354 * to make the recovered device work again. 355 */ 356 static void *eeh_report_resume(void *data, void *userdata) 357 { 358 struct eeh_dev *edev = (struct eeh_dev *)data; 359 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 360 bool was_in_error; 361 struct pci_driver *driver; 362 363 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 364 return NULL; 365 dev->error_state = pci_channel_io_normal; 366 367 driver = eeh_pcid_get(dev); 368 if (!driver) return NULL; 369 370 was_in_error = edev->in_error; 371 edev->in_error = false; 372 eeh_enable_irq(dev); 373 374 if (!driver->err_handler || 375 !driver->err_handler->resume || 376 (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { 377 edev->mode &= ~EEH_DEV_NO_HANDLER; 378 eeh_pcid_put(dev); 379 return NULL; 380 } 381 382 driver->err_handler->resume(dev); 383 384 eeh_pcid_put(dev); 385 pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); 386 #ifdef CONFIG_PCI_IOV 387 eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); 388 #endif 389 return NULL; 390 } 391 392 /** 393 * eeh_report_failure - Tell device driver that device is dead. 394 * @data: eeh device 395 * @userdata: return value 396 * 397 * This informs the device driver that the device is permanently 398 * dead, and that no further recovery attempts will be made on it. 399 */ 400 static void *eeh_report_failure(void *data, void *userdata) 401 { 402 struct eeh_dev *edev = (struct eeh_dev *)data; 403 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 404 struct pci_driver *driver; 405 406 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 407 return NULL; 408 dev->error_state = pci_channel_io_perm_failure; 409 410 driver = eeh_pcid_get(dev); 411 if (!driver) return NULL; 412 413 eeh_disable_irq(dev); 414 415 if (!driver->err_handler || 416 !driver->err_handler->error_detected) { 417 eeh_pcid_put(dev); 418 return NULL; 419 } 420 421 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); 422 423 eeh_pcid_put(dev); 424 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); 425 return NULL; 426 } 427 428 static void *eeh_add_virt_device(void *data, void *userdata) 429 { 430 struct pci_driver *driver; 431 struct eeh_dev *edev = (struct eeh_dev *)data; 432 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 433 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 434 435 if (!(edev->physfn)) { 436 pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", 437 __func__, pdn->phb->global_number, pdn->busno, 438 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 439 return NULL; 440 } 441 442 driver = eeh_pcid_get(dev); 443 if (driver) { 444 eeh_pcid_put(dev); 445 if (driver->err_handler) 446 return NULL; 447 } 448 449 #ifdef CONFIG_PCI_IOV 450 pci_iov_add_virtfn(edev->physfn, pdn->vf_index); 451 #endif 452 return NULL; 453 } 454 455 static void *eeh_rmv_device(void *data, void *userdata) 456 { 457 struct pci_driver *driver; 458 struct eeh_dev *edev = (struct eeh_dev *)data; 459 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 460 struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; 461 int *removed = rmv_data ? &rmv_data->removed : NULL; 462 463 /* 464 * Actually, we should remove the PCI bridges as well. 465 * However, that's lots of complexity to do that, 466 * particularly some of devices under the bridge might 467 * support EEH. So we just care about PCI devices for 468 * simplicity here. 469 */ 470 if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) 471 return NULL; 472 473 /* 474 * We rely on count-based pcibios_release_device() to 475 * detach permanently offlined PEs. Unfortunately, that's 476 * not reliable enough. We might have the permanently 477 * offlined PEs attached, but we needn't take care of 478 * them and their child devices. 479 */ 480 if (eeh_dev_removed(edev)) 481 return NULL; 482 483 driver = eeh_pcid_get(dev); 484 if (driver) { 485 eeh_pcid_put(dev); 486 if (removed && 487 eeh_pe_passed(edev->pe)) 488 return NULL; 489 if (removed && 490 driver->err_handler && 491 driver->err_handler->error_detected && 492 driver->err_handler->slot_reset) 493 return NULL; 494 } 495 496 /* Remove it from PCI subsystem */ 497 pr_debug("EEH: Removing %s without EEH sensitive driver\n", 498 pci_name(dev)); 499 edev->bus = dev->bus; 500 edev->mode |= EEH_DEV_DISCONNECTED; 501 if (removed) 502 (*removed)++; 503 504 if (edev->physfn) { 505 #ifdef CONFIG_PCI_IOV 506 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 507 508 pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); 509 edev->pdev = NULL; 510 511 /* 512 * We have to set the VF PE number to invalid one, which is 513 * required to plug the VF successfully. 514 */ 515 pdn->pe_number = IODA_INVALID_PE; 516 #endif 517 if (rmv_data) 518 list_add(&edev->rmv_list, &rmv_data->edev_list); 519 } else { 520 pci_lock_rescan_remove(); 521 pci_stop_and_remove_bus_device(dev); 522 pci_unlock_rescan_remove(); 523 } 524 525 return NULL; 526 } 527 528 static void *eeh_pe_detach_dev(void *data, void *userdata) 529 { 530 struct eeh_pe *pe = (struct eeh_pe *)data; 531 struct eeh_dev *edev, *tmp; 532 533 eeh_pe_for_each_dev(pe, edev, tmp) { 534 if (!(edev->mode & EEH_DEV_DISCONNECTED)) 535 continue; 536 537 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); 538 eeh_rmv_from_parent_pe(edev); 539 } 540 541 return NULL; 542 } 543 544 /* 545 * Explicitly clear PE's frozen state for PowerNV where 546 * we have frozen PE until BAR restore is completed. It's 547 * harmless to clear it for pSeries. To be consistent with 548 * PE reset (for 3 times), we try to clear the frozen state 549 * for 3 times as well. 550 */ 551 static void *__eeh_clear_pe_frozen_state(void *data, void *flag) 552 { 553 struct eeh_pe *pe = (struct eeh_pe *)data; 554 bool clear_sw_state = *(bool *)flag; 555 int i, rc = 1; 556 557 for (i = 0; rc && i < 3; i++) 558 rc = eeh_unfreeze_pe(pe, clear_sw_state); 559 560 /* Stop immediately on any errors */ 561 if (rc) { 562 pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", 563 __func__, rc, pe->phb->global_number, pe->addr); 564 return (void *)pe; 565 } 566 567 return NULL; 568 } 569 570 static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, 571 bool clear_sw_state) 572 { 573 void *rc; 574 575 rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); 576 if (!rc) 577 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 578 579 return rc ? -EIO : 0; 580 } 581 582 int eeh_pe_reset_and_recover(struct eeh_pe *pe) 583 { 584 int ret; 585 586 /* Bail if the PE is being recovered */ 587 if (pe->state & EEH_PE_RECOVERING) 588 return 0; 589 590 /* Put the PE into recovery mode */ 591 eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 592 593 /* Save states */ 594 eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); 595 596 /* Issue reset */ 597 ret = eeh_pe_reset_full(pe); 598 if (ret) { 599 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 600 return ret; 601 } 602 603 /* Unfreeze the PE */ 604 ret = eeh_clear_pe_frozen_state(pe, true); 605 if (ret) { 606 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 607 return ret; 608 } 609 610 /* Restore device state */ 611 eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); 612 613 /* Clear recovery mode */ 614 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 615 616 return 0; 617 } 618 619 /** 620 * eeh_reset_device - Perform actual reset of a pci slot 621 * @pe: EEH PE 622 * @bus: PCI bus corresponding to the isolcated slot 623 * 624 * This routine must be called to do reset on the indicated PE. 625 * During the reset, udev might be invoked because those affected 626 * PCI devices will be removed and then added. 627 */ 628 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, 629 struct eeh_rmv_data *rmv_data) 630 { 631 struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); 632 time64_t tstamp; 633 int cnt, rc; 634 struct eeh_dev *edev; 635 636 /* pcibios will clear the counter; save the value */ 637 cnt = pe->freeze_count; 638 tstamp = pe->tstamp; 639 640 /* 641 * We don't remove the corresponding PE instances because 642 * we need the information afterwords. The attached EEH 643 * devices are expected to be attached soon when calling 644 * into pci_hp_add_devices(). 645 */ 646 eeh_pe_state_mark(pe, EEH_PE_KEEP); 647 if (bus) { 648 if (pe->type & EEH_PE_VF) { 649 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 650 } else { 651 pci_lock_rescan_remove(); 652 pci_hp_remove_devices(bus); 653 pci_unlock_rescan_remove(); 654 } 655 } else if (frozen_bus) { 656 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); 657 } 658 659 /* 660 * Reset the pci controller. (Asserts RST#; resets config space). 661 * Reconfigure bridges and devices. Don't try to bring the system 662 * up if the reset failed for some reason. 663 * 664 * During the reset, it's very dangerous to have uncontrolled PCI 665 * config accesses. So we prefer to block them. However, controlled 666 * PCI config accesses initiated from EEH itself are allowed. 667 */ 668 rc = eeh_pe_reset_full(pe); 669 if (rc) 670 return rc; 671 672 pci_lock_rescan_remove(); 673 674 /* Restore PE */ 675 eeh_ops->configure_bridge(pe); 676 eeh_pe_restore_bars(pe); 677 678 /* Clear frozen state */ 679 rc = eeh_clear_pe_frozen_state(pe, false); 680 if (rc) { 681 pci_unlock_rescan_remove(); 682 return rc; 683 } 684 685 /* Give the system 5 seconds to finish running the user-space 686 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, 687 * this is a hack, but if we don't do this, and try to bring 688 * the device up before the scripts have taken it down, 689 * potentially weird things happen. 690 */ 691 if (bus) { 692 pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); 693 ssleep(5); 694 695 /* 696 * The EEH device is still connected with its parent 697 * PE. We should disconnect it so the binding can be 698 * rebuilt when adding PCI devices. 699 */ 700 edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 701 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 702 if (pe->type & EEH_PE_VF) { 703 eeh_add_virt_device(edev, NULL); 704 } else { 705 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 706 pci_hp_add_devices(bus); 707 } 708 } else if (frozen_bus && rmv_data->removed) { 709 pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); 710 ssleep(5); 711 712 edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 713 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 714 if (pe->type & EEH_PE_VF) 715 eeh_add_virt_device(edev, NULL); 716 else 717 pci_hp_add_devices(frozen_bus); 718 } 719 eeh_pe_state_clear(pe, EEH_PE_KEEP); 720 721 pe->tstamp = tstamp; 722 pe->freeze_count = cnt; 723 724 pci_unlock_rescan_remove(); 725 return 0; 726 } 727 728 /* The longest amount of time to wait for a pci device 729 * to come back on line, in seconds. 730 */ 731 #define MAX_WAIT_FOR_RECOVERY 300 732 733 /** 734 * eeh_handle_normal_event - Handle EEH events on a specific PE 735 * @pe: EEH PE 736 * 737 * Attempts to recover the given PE. If recovery fails or the PE has failed 738 * too many times, remove the PE. 739 * 740 * Returns true if @pe should no longer be used, else false. 741 */ 742 static bool eeh_handle_normal_event(struct eeh_pe *pe) 743 { 744 struct pci_bus *frozen_bus; 745 struct eeh_dev *edev, *tmp; 746 int rc = 0; 747 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 748 struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; 749 750 frozen_bus = eeh_pe_bus_get(pe); 751 if (!frozen_bus) { 752 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", 753 __func__, pe->phb->global_number, pe->addr); 754 return false; 755 } 756 757 eeh_pe_update_time_stamp(pe); 758 pe->freeze_count++; 759 if (pe->freeze_count > eeh_max_freezes) { 760 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" 761 "last hour and has been permanently disabled.\n", 762 pe->phb->global_number, pe->addr, 763 pe->freeze_count); 764 goto hard_fail; 765 } 766 pr_warn("EEH: This PCI device has failed %d times in the last hour\n", 767 pe->freeze_count); 768 769 /* Walk the various device drivers attached to this slot through 770 * a reset sequence, giving each an opportunity to do what it needs 771 * to accomplish the reset. Each child gets a report of the 772 * status ... if any child can't handle the reset, then the entire 773 * slot is dlpar removed and added. 774 * 775 * When the PHB is fenced, we have to issue a reset to recover from 776 * the error. Override the result if necessary to have partially 777 * hotplug for this case. 778 */ 779 pr_info("EEH: Notify device drivers to shutdown\n"); 780 eeh_pe_dev_traverse(pe, eeh_report_error, &result); 781 if ((pe->type & EEH_PE_PHB) && 782 result != PCI_ERS_RESULT_NONE && 783 result != PCI_ERS_RESULT_NEED_RESET) 784 result = PCI_ERS_RESULT_NEED_RESET; 785 786 /* Get the current PCI slot state. This can take a long time, 787 * sometimes over 300 seconds for certain systems. 788 */ 789 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 790 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 791 pr_warn("EEH: Permanent failure\n"); 792 goto hard_fail; 793 } 794 795 /* Since rtas may enable MMIO when posting the error log, 796 * don't post the error log until after all dev drivers 797 * have been informed. 798 */ 799 pr_info("EEH: Collect temporary log\n"); 800 eeh_slot_error_detail(pe, EEH_LOG_TEMP); 801 802 /* If all device drivers were EEH-unaware, then shut 803 * down all of the device drivers, and hope they 804 * go down willingly, without panicing the system. 805 */ 806 if (result == PCI_ERS_RESULT_NONE) { 807 pr_info("EEH: Reset with hotplug activity\n"); 808 rc = eeh_reset_device(pe, frozen_bus, NULL); 809 if (rc) { 810 pr_warn("%s: Unable to reset, err=%d\n", 811 __func__, rc); 812 goto hard_fail; 813 } 814 } 815 816 /* If all devices reported they can proceed, then re-enable MMIO */ 817 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 818 pr_info("EEH: Enable I/O for affected devices\n"); 819 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 820 821 if (rc < 0) 822 goto hard_fail; 823 if (rc) { 824 result = PCI_ERS_RESULT_NEED_RESET; 825 } else { 826 pr_info("EEH: Notify device drivers to resume I/O\n"); 827 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); 828 } 829 } 830 831 /* If all devices reported they can proceed, then re-enable DMA */ 832 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 833 pr_info("EEH: Enabled DMA for affected devices\n"); 834 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 835 836 if (rc < 0) 837 goto hard_fail; 838 if (rc) { 839 result = PCI_ERS_RESULT_NEED_RESET; 840 } else { 841 /* 842 * We didn't do PE reset for the case. The PE 843 * is still in frozen state. Clear it before 844 * resuming the PE. 845 */ 846 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 847 result = PCI_ERS_RESULT_RECOVERED; 848 } 849 } 850 851 /* If any device has a hard failure, then shut off everything. */ 852 if (result == PCI_ERS_RESULT_DISCONNECT) { 853 pr_warn("EEH: Device driver gave up\n"); 854 goto hard_fail; 855 } 856 857 /* If any device called out for a reset, then reset the slot */ 858 if (result == PCI_ERS_RESULT_NEED_RESET) { 859 pr_info("EEH: Reset without hotplug activity\n"); 860 rc = eeh_reset_device(pe, NULL, &rmv_data); 861 if (rc) { 862 pr_warn("%s: Cannot reset, err=%d\n", 863 __func__, rc); 864 goto hard_fail; 865 } 866 867 pr_info("EEH: Notify device drivers " 868 "the completion of reset\n"); 869 result = PCI_ERS_RESULT_NONE; 870 eeh_pe_dev_traverse(pe, eeh_report_reset, &result); 871 } 872 873 /* All devices should claim they have recovered by now. */ 874 if ((result != PCI_ERS_RESULT_RECOVERED) && 875 (result != PCI_ERS_RESULT_NONE)) { 876 pr_warn("EEH: Not recovered\n"); 877 goto hard_fail; 878 } 879 880 /* 881 * For those hot removed VFs, we should add back them after PF get 882 * recovered properly. 883 */ 884 list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { 885 eeh_add_virt_device(edev, NULL); 886 list_del(&edev->rmv_list); 887 } 888 889 /* Tell all device drivers that they can resume operations */ 890 pr_info("EEH: Notify device driver to resume\n"); 891 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); 892 893 return false; 894 895 hard_fail: 896 /* 897 * About 90% of all real-life EEH failures in the field 898 * are due to poorly seated PCI cards. Only 10% or so are 899 * due to actual, failed cards. 900 */ 901 pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" 902 "Please try reseating or replacing it\n", 903 pe->phb->global_number, pe->addr); 904 905 eeh_slot_error_detail(pe, EEH_LOG_PERM); 906 907 /* Notify all devices that they're about to go down. */ 908 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); 909 910 /* Mark the PE to be removed permanently */ 911 eeh_pe_state_mark(pe, EEH_PE_REMOVED); 912 913 /* 914 * Shut down the device drivers for good. We mark 915 * all removed devices correctly to avoid access 916 * the their PCI config any more. 917 */ 918 if (frozen_bus) { 919 if (pe->type & EEH_PE_VF) { 920 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 921 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 922 } else { 923 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 924 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 925 926 pci_lock_rescan_remove(); 927 pci_hp_remove_devices(frozen_bus); 928 pci_unlock_rescan_remove(); 929 930 /* The passed PE should no longer be used */ 931 return true; 932 } 933 } 934 return false; 935 } 936 937 /** 938 * eeh_handle_special_event - Handle EEH events without a specific failing PE 939 * 940 * Called when an EEH event is detected but can't be narrowed down to a 941 * specific PE. Iterates through possible failures and handles them as 942 * necessary. 943 */ 944 static void eeh_handle_special_event(void) 945 { 946 struct eeh_pe *pe, *phb_pe; 947 struct pci_bus *bus; 948 struct pci_controller *hose; 949 unsigned long flags; 950 int rc; 951 952 953 do { 954 rc = eeh_ops->next_error(&pe); 955 956 switch (rc) { 957 case EEH_NEXT_ERR_DEAD_IOC: 958 /* Mark all PHBs in dead state */ 959 eeh_serialize_lock(&flags); 960 961 /* Purge all events */ 962 eeh_remove_event(NULL, true); 963 964 list_for_each_entry(hose, &hose_list, list_node) { 965 phb_pe = eeh_phb_pe_get(hose); 966 if (!phb_pe) continue; 967 968 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 969 } 970 971 eeh_serialize_unlock(flags); 972 973 break; 974 case EEH_NEXT_ERR_FROZEN_PE: 975 case EEH_NEXT_ERR_FENCED_PHB: 976 case EEH_NEXT_ERR_DEAD_PHB: 977 /* Mark the PE in fenced state */ 978 eeh_serialize_lock(&flags); 979 980 /* Purge all events of the PHB */ 981 eeh_remove_event(pe, true); 982 983 if (rc == EEH_NEXT_ERR_DEAD_PHB) 984 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 985 else 986 eeh_pe_state_mark(pe, 987 EEH_PE_ISOLATED | EEH_PE_RECOVERING); 988 989 eeh_serialize_unlock(flags); 990 991 break; 992 case EEH_NEXT_ERR_NONE: 993 return; 994 default: 995 pr_warn("%s: Invalid value %d from next_error()\n", 996 __func__, rc); 997 return; 998 } 999 1000 /* 1001 * For fenced PHB and frozen PE, it's handled as normal 1002 * event. We have to remove the affected PHBs for dead 1003 * PHB and IOC 1004 */ 1005 if (rc == EEH_NEXT_ERR_FROZEN_PE || 1006 rc == EEH_NEXT_ERR_FENCED_PHB) { 1007 /* 1008 * eeh_handle_normal_event() can make the PE stale if it 1009 * determines that the PE cannot possibly be recovered. 1010 * Don't modify the PE state if that's the case. 1011 */ 1012 if (eeh_handle_normal_event(pe)) 1013 continue; 1014 1015 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 1016 } else { 1017 pci_lock_rescan_remove(); 1018 list_for_each_entry(hose, &hose_list, list_node) { 1019 phb_pe = eeh_phb_pe_get(hose); 1020 if (!phb_pe || 1021 !(phb_pe->state & EEH_PE_ISOLATED) || 1022 (phb_pe->state & EEH_PE_RECOVERING)) 1023 continue; 1024 1025 /* Notify all devices to be down */ 1026 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 1027 eeh_pe_dev_traverse(pe, 1028 eeh_report_failure, NULL); 1029 bus = eeh_pe_bus_get(phb_pe); 1030 if (!bus) { 1031 pr_err("%s: Cannot find PCI bus for " 1032 "PHB#%x-PE#%x\n", 1033 __func__, 1034 pe->phb->global_number, 1035 pe->addr); 1036 break; 1037 } 1038 pci_hp_remove_devices(bus); 1039 } 1040 pci_unlock_rescan_remove(); 1041 } 1042 1043 /* 1044 * If we have detected dead IOC, we needn't proceed 1045 * any more since all PHBs would have been removed 1046 */ 1047 if (rc == EEH_NEXT_ERR_DEAD_IOC) 1048 break; 1049 } while (rc != EEH_NEXT_ERR_NONE); 1050 } 1051 1052 /** 1053 * eeh_handle_event - Reset a PCI device after hard lockup. 1054 * @pe: EEH PE 1055 * 1056 * While PHB detects address or data parity errors on particular PCI 1057 * slot, the associated PE will be frozen. Besides, DMA's occurring 1058 * to wild addresses (which usually happen due to bugs in device 1059 * drivers or in PCI adapter firmware) can cause EEH error. #SERR, 1060 * #PERR or other misc PCI-related errors also can trigger EEH errors. 1061 * 1062 * Recovery process consists of unplugging the device driver (which 1063 * generated hotplug events to userspace), then issuing a PCI #RST to 1064 * the device, then reconfiguring the PCI config space for all bridges 1065 * & devices under this slot, and then finally restarting the device 1066 * drivers (which cause a second set of hotplug events to go out to 1067 * userspace). 1068 */ 1069 void eeh_handle_event(struct eeh_pe *pe) 1070 { 1071 if (pe) 1072 eeh_handle_normal_event(pe); 1073 else 1074 eeh_handle_special_event(); 1075 } 1076