1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * The file intends to implement the platform dependent EEH operations on 4 * powernv platform. Actually, the powernv was created in order to fully 5 * hypervisor support. 6 * 7 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. 8 */ 9 10 #include <linux/atomic.h> 11 #include <linux/debugfs.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/init.h> 15 #include <linux/interrupt.h> 16 #include <linux/list.h> 17 #include <linux/msi.h> 18 #include <linux/of.h> 19 #include <linux/pci.h> 20 #include <linux/proc_fs.h> 21 #include <linux/rbtree.h> 22 #include <linux/sched.h> 23 #include <linux/seq_file.h> 24 #include <linux/spinlock.h> 25 26 #include <asm/eeh.h> 27 #include <asm/eeh_event.h> 28 #include <asm/firmware.h> 29 #include <asm/io.h> 30 #include <asm/iommu.h> 31 #include <asm/machdep.h> 32 #include <asm/msi_bitmap.h> 33 #include <asm/opal.h> 34 #include <asm/ppc-pci.h> 35 #include <asm/pnv-pci.h> 36 37 #include "powernv.h" 38 #include "pci.h" 39 40 static int eeh_event_irq = -EINVAL; 41 42 void pnv_pcibios_bus_add_device(struct pci_dev *pdev) 43 { 44 struct pci_dn *pdn = pci_get_pdn(pdev); 45 46 if (!pdev->is_virtfn) 47 return; 48 49 /* 50 * The following operations will fail if VF's sysfs files 51 * aren't created or its resources aren't finalized. 52 */ 53 eeh_add_device_early(pdn); 54 eeh_add_device_late(pdev); 55 eeh_sysfs_add_device(pdev); 56 } 57 58 static int pnv_eeh_init(void) 59 { 60 struct pci_controller *hose; 61 struct pnv_phb *phb; 62 int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; 63 64 if (!firmware_has_feature(FW_FEATURE_OPAL)) { 65 pr_warn("%s: OPAL is required !\n", 66 __func__); 67 return -EINVAL; 68 } 69 70 /* Set probe mode */ 71 eeh_add_flag(EEH_PROBE_MODE_DEV); 72 73 /* 74 * P7IOC blocks PCI config access to frozen PE, but PHB3 75 * doesn't do that. So we have to selectively enable I/O 76 * prior to collecting error log. 77 */ 78 list_for_each_entry(hose, &hose_list, list_node) { 79 phb = hose->private_data; 80 81 if (phb->model == PNV_PHB_MODEL_P7IOC) 82 eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); 83 84 if (phb->diag_data_size > max_diag_size) 85 max_diag_size = phb->diag_data_size; 86 87 /* 88 * PE#0 should be regarded as valid by EEH core 89 * if it's not the reserved one. Currently, we 90 * have the reserved PE#255 and PE#127 for PHB3 91 * and P7IOC separately. So we should regard 92 * PE#0 as valid for PHB3 and P7IOC. 93 */ 94 if (phb->ioda.reserved_pe_idx != 0) 95 eeh_add_flag(EEH_VALID_PE_ZERO); 96 97 break; 98 } 99 100 eeh_set_pe_aux_size(max_diag_size); 101 ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; 102 103 return 0; 104 } 105 106 static irqreturn_t pnv_eeh_event(int irq, void *data) 107 { 108 /* 109 * We simply send a special EEH event if EEH has been 110 * enabled. We don't care about EEH events until we've 111 * finished processing the outstanding ones. Event processing 112 * gets unmasked in next_error() if EEH is enabled. 113 */ 114 disable_irq_nosync(irq); 115 116 if (eeh_enabled()) 117 eeh_send_failure_event(NULL); 118 119 return IRQ_HANDLED; 120 } 121 122 #ifdef CONFIG_DEBUG_FS 123 static ssize_t pnv_eeh_ei_write(struct file *filp, 124 const char __user *user_buf, 125 size_t count, loff_t *ppos) 126 { 127 struct pci_controller *hose = filp->private_data; 128 struct eeh_pe *pe; 129 int pe_no, type, func; 130 unsigned long addr, mask; 131 char buf[50]; 132 int ret; 133 134 if (!eeh_ops || !eeh_ops->err_inject) 135 return -ENXIO; 136 137 /* Copy over argument buffer */ 138 ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); 139 if (!ret) 140 return -EFAULT; 141 142 /* Retrieve parameters */ 143 ret = sscanf(buf, "%x:%x:%x:%lx:%lx", 144 &pe_no, &type, &func, &addr, &mask); 145 if (ret != 5) 146 return -EINVAL; 147 148 /* Retrieve PE */ 149 pe = eeh_pe_get(hose, pe_no, 0); 150 if (!pe) 151 return -ENODEV; 152 153 /* Do error injection */ 154 ret = eeh_ops->err_inject(pe, type, func, addr, mask); 155 return ret < 0 ? ret : count; 156 } 157 158 static const struct file_operations pnv_eeh_ei_fops = { 159 .open = simple_open, 160 .llseek = no_llseek, 161 .write = pnv_eeh_ei_write, 162 }; 163 164 static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val) 165 { 166 struct pci_controller *hose = data; 167 struct pnv_phb *phb = hose->private_data; 168 169 out_be64(phb->regs + offset, val); 170 return 0; 171 } 172 173 static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) 174 { 175 struct pci_controller *hose = data; 176 struct pnv_phb *phb = hose->private_data; 177 178 *val = in_be64(phb->regs + offset); 179 return 0; 180 } 181 182 #define PNV_EEH_DBGFS_ENTRY(name, reg) \ 183 static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \ 184 { \ 185 return pnv_eeh_dbgfs_set(data, reg, val); \ 186 } \ 187 \ 188 static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \ 189 { \ 190 return pnv_eeh_dbgfs_get(data, reg, val); \ 191 } \ 192 \ 193 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \ 194 pnv_eeh_dbgfs_get_##name, \ 195 pnv_eeh_dbgfs_set_##name, \ 196 "0x%llx\n") 197 198 PNV_EEH_DBGFS_ENTRY(outb, 0xD10); 199 PNV_EEH_DBGFS_ENTRY(inbA, 0xD90); 200 PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); 201 202 #endif /* CONFIG_DEBUG_FS */ 203 204 /** 205 * pnv_eeh_post_init - EEH platform dependent post initialization 206 * 207 * EEH platform dependent post initialization on powernv. When 208 * the function is called, the EEH PEs and devices should have 209 * been built. If the I/O cache staff has been built, EEH is 210 * ready to supply service. 211 */ 212 int pnv_eeh_post_init(void) 213 { 214 struct pci_controller *hose; 215 struct pnv_phb *phb; 216 int ret = 0; 217 218 /* Probe devices & build address cache */ 219 eeh_probe_devices(); 220 eeh_addr_cache_build(); 221 222 /* Register OPAL event notifier */ 223 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); 224 if (eeh_event_irq < 0) { 225 pr_err("%s: Can't register OPAL event interrupt (%d)\n", 226 __func__, eeh_event_irq); 227 return eeh_event_irq; 228 } 229 230 ret = request_irq(eeh_event_irq, pnv_eeh_event, 231 IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); 232 if (ret < 0) { 233 irq_dispose_mapping(eeh_event_irq); 234 pr_err("%s: Can't request OPAL event interrupt (%d)\n", 235 __func__, eeh_event_irq); 236 return ret; 237 } 238 239 if (!eeh_enabled()) 240 disable_irq(eeh_event_irq); 241 242 list_for_each_entry(hose, &hose_list, list_node) { 243 phb = hose->private_data; 244 245 /* 246 * If EEH is enabled, we're going to rely on that. 247 * Otherwise, we restore to conventional mechanism 248 * to clear frozen PE during PCI config access. 249 */ 250 if (eeh_enabled()) 251 phb->flags |= PNV_PHB_FLAG_EEH; 252 else 253 phb->flags &= ~PNV_PHB_FLAG_EEH; 254 255 /* Create debugfs entries */ 256 #ifdef CONFIG_DEBUG_FS 257 if (phb->has_dbgfs || !phb->dbgfs) 258 continue; 259 260 phb->has_dbgfs = 1; 261 debugfs_create_file("err_injct", 0200, 262 phb->dbgfs, hose, 263 &pnv_eeh_ei_fops); 264 265 debugfs_create_file("err_injct_outbound", 0600, 266 phb->dbgfs, hose, 267 &pnv_eeh_dbgfs_ops_outb); 268 debugfs_create_file("err_injct_inboundA", 0600, 269 phb->dbgfs, hose, 270 &pnv_eeh_dbgfs_ops_inbA); 271 debugfs_create_file("err_injct_inboundB", 0600, 272 phb->dbgfs, hose, 273 &pnv_eeh_dbgfs_ops_inbB); 274 #endif /* CONFIG_DEBUG_FS */ 275 } 276 277 return ret; 278 } 279 280 static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap) 281 { 282 int pos = PCI_CAPABILITY_LIST; 283 int cnt = 48; /* Maximal number of capabilities */ 284 u32 status, id; 285 286 if (!pdn) 287 return 0; 288 289 /* Check if the device supports capabilities */ 290 pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status); 291 if (!(status & PCI_STATUS_CAP_LIST)) 292 return 0; 293 294 while (cnt--) { 295 pnv_pci_cfg_read(pdn, pos, 1, &pos); 296 if (pos < 0x40) 297 break; 298 299 pos &= ~3; 300 pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id); 301 if (id == 0xff) 302 break; 303 304 /* Found */ 305 if (id == cap) 306 return pos; 307 308 /* Next one */ 309 pos += PCI_CAP_LIST_NEXT; 310 } 311 312 return 0; 313 } 314 315 static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) 316 { 317 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 318 u32 header; 319 int pos = 256, ttl = (4096 - 256) / 8; 320 321 if (!edev || !edev->pcie_cap) 322 return 0; 323 if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) 324 return 0; 325 else if (!header) 326 return 0; 327 328 while (ttl-- > 0) { 329 if (PCI_EXT_CAP_ID(header) == cap && pos) 330 return pos; 331 332 pos = PCI_EXT_CAP_NEXT(header); 333 if (pos < 256) 334 break; 335 336 if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) 337 break; 338 } 339 340 return 0; 341 } 342 343 /** 344 * pnv_eeh_probe - Do probe on PCI device 345 * @pdn: PCI device node 346 * @data: unused 347 * 348 * When EEH module is installed during system boot, all PCI devices 349 * are checked one by one to see if it supports EEH. The function 350 * is introduced for the purpose. By default, EEH has been enabled 351 * on all PCI devices. That's to say, we only need do necessary 352 * initialization on the corresponding eeh device and create PE 353 * accordingly. 354 * 355 * It's notable that's unsafe to retrieve the EEH device through 356 * the corresponding PCI device. During the PCI device hotplug, which 357 * was possiblly triggered by EEH core, the binding between EEH device 358 * and the PCI device isn't built yet. 359 */ 360 static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) 361 { 362 struct pci_controller *hose = pdn->phb; 363 struct pnv_phb *phb = hose->private_data; 364 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 365 uint32_t pcie_flags; 366 int ret; 367 int config_addr = (pdn->busno << 8) | (pdn->devfn); 368 369 /* 370 * When probing the root bridge, which doesn't have any 371 * subordinate PCI devices. We don't have OF node for 372 * the root bridge. So it's not reasonable to continue 373 * the probing. 374 */ 375 if (!edev || edev->pe) 376 return NULL; 377 378 /* Skip for PCI-ISA bridge */ 379 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) 380 return NULL; 381 382 /* Initialize eeh device */ 383 edev->class_code = pdn->class_code; 384 edev->mode &= 0xFFFFFF00; 385 edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); 386 edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); 387 edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); 388 edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); 389 if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { 390 edev->mode |= EEH_DEV_BRIDGE; 391 if (edev->pcie_cap) { 392 pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, 393 2, &pcie_flags); 394 pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; 395 if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) 396 edev->mode |= EEH_DEV_ROOT_PORT; 397 else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) 398 edev->mode |= EEH_DEV_DS_PORT; 399 } 400 } 401 402 edev->pe_config_addr = phb->ioda.pe_rmap[config_addr]; 403 404 /* Create PE */ 405 ret = eeh_add_to_parent_pe(edev); 406 if (ret) { 407 pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n", 408 __func__, hose->global_number, pdn->busno, 409 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret); 410 return NULL; 411 } 412 413 /* 414 * If the PE contains any one of following adapters, the 415 * PCI config space can't be accessed when dumping EEH log. 416 * Otherwise, we will run into fenced PHB caused by shortage 417 * of outbound credits in the adapter. The PCI config access 418 * should be blocked until PE reset. MMIO access is dropped 419 * by hardware certainly. In order to drop PCI config requests, 420 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which 421 * will be checked in the backend for PE state retrival. If 422 * the PE becomes frozen for the first time and the flag has 423 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for 424 * that PE to block its config space. 425 * 426 * Broadcom BCM5718 2-ports NICs (14e4:1656) 427 * Broadcom Austin 4-ports NICs (14e4:1657) 428 * Broadcom Shiner 4-ports 1G NICs (14e4:168a) 429 * Broadcom Shiner 2-ports 10G NICs (14e4:168e) 430 */ 431 if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && 432 pdn->device_id == 0x1656) || 433 (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && 434 pdn->device_id == 0x1657) || 435 (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && 436 pdn->device_id == 0x168a) || 437 (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && 438 pdn->device_id == 0x168e)) 439 edev->pe->state |= EEH_PE_CFG_RESTRICTED; 440 441 /* 442 * Cache the PE primary bus, which can't be fetched when 443 * full hotplug is in progress. In that case, all child 444 * PCI devices of the PE are expected to be removed prior 445 * to PE reset. 446 */ 447 if (!(edev->pe->state & EEH_PE_PRI_BUS)) { 448 edev->pe->bus = pci_find_bus(hose->global_number, 449 pdn->busno); 450 if (edev->pe->bus) 451 edev->pe->state |= EEH_PE_PRI_BUS; 452 } 453 454 /* 455 * Enable EEH explicitly so that we will do EEH check 456 * while accessing I/O stuff 457 */ 458 eeh_add_flag(EEH_ENABLED); 459 460 /* Save memory bars */ 461 eeh_save_bars(edev); 462 463 return NULL; 464 } 465 466 /** 467 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable 468 * @pe: EEH PE 469 * @option: operation to be issued 470 * 471 * The function is used to control the EEH functionality globally. 472 * Currently, following options are support according to PAPR: 473 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA 474 */ 475 static int pnv_eeh_set_option(struct eeh_pe *pe, int option) 476 { 477 struct pci_controller *hose = pe->phb; 478 struct pnv_phb *phb = hose->private_data; 479 bool freeze_pe = false; 480 int opt; 481 s64 rc; 482 483 switch (option) { 484 case EEH_OPT_DISABLE: 485 return -EPERM; 486 case EEH_OPT_ENABLE: 487 return 0; 488 case EEH_OPT_THAW_MMIO: 489 opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; 490 break; 491 case EEH_OPT_THAW_DMA: 492 opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; 493 break; 494 case EEH_OPT_FREEZE_PE: 495 freeze_pe = true; 496 opt = OPAL_EEH_ACTION_SET_FREEZE_ALL; 497 break; 498 default: 499 pr_warn("%s: Invalid option %d\n", __func__, option); 500 return -EINVAL; 501 } 502 503 /* Freeze master and slave PEs if PHB supports compound PEs */ 504 if (freeze_pe) { 505 if (phb->freeze_pe) { 506 phb->freeze_pe(phb, pe->addr); 507 return 0; 508 } 509 510 rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt); 511 if (rc != OPAL_SUCCESS) { 512 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 513 __func__, rc, phb->hose->global_number, 514 pe->addr); 515 return -EIO; 516 } 517 518 return 0; 519 } 520 521 /* Unfreeze master and slave PEs if PHB supports */ 522 if (phb->unfreeze_pe) 523 return phb->unfreeze_pe(phb, pe->addr, opt); 524 525 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt); 526 if (rc != OPAL_SUCCESS) { 527 pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n", 528 __func__, rc, option, phb->hose->global_number, 529 pe->addr); 530 return -EIO; 531 } 532 533 return 0; 534 } 535 536 /** 537 * pnv_eeh_get_pe_addr - Retrieve PE address 538 * @pe: EEH PE 539 * 540 * Retrieve the PE address according to the given tranditional 541 * PCI BDF (Bus/Device/Function) address. 542 */ 543 static int pnv_eeh_get_pe_addr(struct eeh_pe *pe) 544 { 545 return pe->addr; 546 } 547 548 static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) 549 { 550 struct pnv_phb *phb = pe->phb->private_data; 551 s64 rc; 552 553 rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, 554 phb->diag_data_size); 555 if (rc != OPAL_SUCCESS) 556 pr_warn("%s: Failure %lld getting PHB#%x diag-data\n", 557 __func__, rc, pe->phb->global_number); 558 } 559 560 static int pnv_eeh_get_phb_state(struct eeh_pe *pe) 561 { 562 struct pnv_phb *phb = pe->phb->private_data; 563 u8 fstate = 0; 564 __be16 pcierr = 0; 565 s64 rc; 566 int result = 0; 567 568 rc = opal_pci_eeh_freeze_status(phb->opal_id, 569 pe->addr, 570 &fstate, 571 &pcierr, 572 NULL); 573 if (rc != OPAL_SUCCESS) { 574 pr_warn("%s: Failure %lld getting PHB#%x state\n", 575 __func__, rc, phb->hose->global_number); 576 return EEH_STATE_NOT_SUPPORT; 577 } 578 579 /* 580 * Check PHB state. If the PHB is frozen for the 581 * first time, to dump the PHB diag-data. 582 */ 583 if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { 584 result = (EEH_STATE_MMIO_ACTIVE | 585 EEH_STATE_DMA_ACTIVE | 586 EEH_STATE_MMIO_ENABLED | 587 EEH_STATE_DMA_ENABLED); 588 } else if (!(pe->state & EEH_PE_ISOLATED)) { 589 eeh_pe_mark_isolated(pe); 590 pnv_eeh_get_phb_diag(pe); 591 592 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 593 pnv_pci_dump_phb_diag_data(pe->phb, pe->data); 594 } 595 596 return result; 597 } 598 599 static int pnv_eeh_get_pe_state(struct eeh_pe *pe) 600 { 601 struct pnv_phb *phb = pe->phb->private_data; 602 u8 fstate = 0; 603 __be16 pcierr = 0; 604 s64 rc; 605 int result; 606 607 /* 608 * We don't clobber hardware frozen state until PE 609 * reset is completed. In order to keep EEH core 610 * moving forward, we have to return operational 611 * state during PE reset. 612 */ 613 if (pe->state & EEH_PE_RESET) { 614 result = (EEH_STATE_MMIO_ACTIVE | 615 EEH_STATE_DMA_ACTIVE | 616 EEH_STATE_MMIO_ENABLED | 617 EEH_STATE_DMA_ENABLED); 618 return result; 619 } 620 621 /* 622 * Fetch PE state from hardware. If the PHB 623 * supports compound PE, let it handle that. 624 */ 625 if (phb->get_pe_state) { 626 fstate = phb->get_pe_state(phb, pe->addr); 627 } else { 628 rc = opal_pci_eeh_freeze_status(phb->opal_id, 629 pe->addr, 630 &fstate, 631 &pcierr, 632 NULL); 633 if (rc != OPAL_SUCCESS) { 634 pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", 635 __func__, rc, phb->hose->global_number, 636 pe->addr); 637 return EEH_STATE_NOT_SUPPORT; 638 } 639 } 640 641 /* Figure out state */ 642 switch (fstate) { 643 case OPAL_EEH_STOPPED_NOT_FROZEN: 644 result = (EEH_STATE_MMIO_ACTIVE | 645 EEH_STATE_DMA_ACTIVE | 646 EEH_STATE_MMIO_ENABLED | 647 EEH_STATE_DMA_ENABLED); 648 break; 649 case OPAL_EEH_STOPPED_MMIO_FREEZE: 650 result = (EEH_STATE_DMA_ACTIVE | 651 EEH_STATE_DMA_ENABLED); 652 break; 653 case OPAL_EEH_STOPPED_DMA_FREEZE: 654 result = (EEH_STATE_MMIO_ACTIVE | 655 EEH_STATE_MMIO_ENABLED); 656 break; 657 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: 658 result = 0; 659 break; 660 case OPAL_EEH_STOPPED_RESET: 661 result = EEH_STATE_RESET_ACTIVE; 662 break; 663 case OPAL_EEH_STOPPED_TEMP_UNAVAIL: 664 result = EEH_STATE_UNAVAILABLE; 665 break; 666 case OPAL_EEH_STOPPED_PERM_UNAVAIL: 667 result = EEH_STATE_NOT_SUPPORT; 668 break; 669 default: 670 result = EEH_STATE_NOT_SUPPORT; 671 pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", 672 __func__, phb->hose->global_number, 673 pe->addr, fstate); 674 } 675 676 /* 677 * If PHB supports compound PE, to freeze all 678 * slave PEs for consistency. 679 * 680 * If the PE is switching to frozen state for the 681 * first time, to dump the PHB diag-data. 682 */ 683 if (!(result & EEH_STATE_NOT_SUPPORT) && 684 !(result & EEH_STATE_UNAVAILABLE) && 685 !(result & EEH_STATE_MMIO_ACTIVE) && 686 !(result & EEH_STATE_DMA_ACTIVE) && 687 !(pe->state & EEH_PE_ISOLATED)) { 688 if (phb->freeze_pe) 689 phb->freeze_pe(phb, pe->addr); 690 691 eeh_pe_mark_isolated(pe); 692 pnv_eeh_get_phb_diag(pe); 693 694 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 695 pnv_pci_dump_phb_diag_data(pe->phb, pe->data); 696 } 697 698 return result; 699 } 700 701 /** 702 * pnv_eeh_get_state - Retrieve PE state 703 * @pe: EEH PE 704 * @delay: delay while PE state is temporarily unavailable 705 * 706 * Retrieve the state of the specified PE. For IODA-compitable 707 * platform, it should be retrieved from IODA table. Therefore, 708 * we prefer passing down to hardware implementation to handle 709 * it. 710 */ 711 static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) 712 { 713 int ret; 714 715 if (pe->type & EEH_PE_PHB) 716 ret = pnv_eeh_get_phb_state(pe); 717 else 718 ret = pnv_eeh_get_pe_state(pe); 719 720 if (!delay) 721 return ret; 722 723 /* 724 * If the PE state is temporarily unavailable, 725 * to inform the EEH core delay for default 726 * period (1 second) 727 */ 728 *delay = 0; 729 if (ret & EEH_STATE_UNAVAILABLE) 730 *delay = 1000; 731 732 return ret; 733 } 734 735 static s64 pnv_eeh_poll(unsigned long id) 736 { 737 s64 rc = OPAL_HARDWARE; 738 739 while (1) { 740 rc = opal_pci_poll(id); 741 if (rc <= 0) 742 break; 743 744 if (system_state < SYSTEM_RUNNING) 745 udelay(1000 * rc); 746 else 747 msleep(rc); 748 } 749 750 return rc; 751 } 752 753 int pnv_eeh_phb_reset(struct pci_controller *hose, int option) 754 { 755 struct pnv_phb *phb = hose->private_data; 756 s64 rc = OPAL_HARDWARE; 757 758 pr_debug("%s: Reset PHB#%x, option=%d\n", 759 __func__, hose->global_number, option); 760 761 /* Issue PHB complete reset request */ 762 if (option == EEH_RESET_FUNDAMENTAL || 763 option == EEH_RESET_HOT) 764 rc = opal_pci_reset(phb->opal_id, 765 OPAL_RESET_PHB_COMPLETE, 766 OPAL_ASSERT_RESET); 767 else if (option == EEH_RESET_DEACTIVATE) 768 rc = opal_pci_reset(phb->opal_id, 769 OPAL_RESET_PHB_COMPLETE, 770 OPAL_DEASSERT_RESET); 771 if (rc < 0) 772 goto out; 773 774 /* 775 * Poll state of the PHB until the request is done 776 * successfully. The PHB reset is usually PHB complete 777 * reset followed by hot reset on root bus. So we also 778 * need the PCI bus settlement delay. 779 */ 780 if (rc > 0) 781 rc = pnv_eeh_poll(phb->opal_id); 782 if (option == EEH_RESET_DEACTIVATE) { 783 if (system_state < SYSTEM_RUNNING) 784 udelay(1000 * EEH_PE_RST_SETTLE_TIME); 785 else 786 msleep(EEH_PE_RST_SETTLE_TIME); 787 } 788 out: 789 if (rc != OPAL_SUCCESS) 790 return -EIO; 791 792 return 0; 793 } 794 795 static int pnv_eeh_root_reset(struct pci_controller *hose, int option) 796 { 797 struct pnv_phb *phb = hose->private_data; 798 s64 rc = OPAL_HARDWARE; 799 800 pr_debug("%s: Reset PHB#%x, option=%d\n", 801 __func__, hose->global_number, option); 802 803 /* 804 * During the reset deassert time, we needn't care 805 * the reset scope because the firmware does nothing 806 * for fundamental or hot reset during deassert phase. 807 */ 808 if (option == EEH_RESET_FUNDAMENTAL) 809 rc = opal_pci_reset(phb->opal_id, 810 OPAL_RESET_PCI_FUNDAMENTAL, 811 OPAL_ASSERT_RESET); 812 else if (option == EEH_RESET_HOT) 813 rc = opal_pci_reset(phb->opal_id, 814 OPAL_RESET_PCI_HOT, 815 OPAL_ASSERT_RESET); 816 else if (option == EEH_RESET_DEACTIVATE) 817 rc = opal_pci_reset(phb->opal_id, 818 OPAL_RESET_PCI_HOT, 819 OPAL_DEASSERT_RESET); 820 if (rc < 0) 821 goto out; 822 823 /* Poll state of the PHB until the request is done */ 824 if (rc > 0) 825 rc = pnv_eeh_poll(phb->opal_id); 826 if (option == EEH_RESET_DEACTIVATE) 827 msleep(EEH_PE_RST_SETTLE_TIME); 828 out: 829 if (rc != OPAL_SUCCESS) 830 return -EIO; 831 832 return 0; 833 } 834 835 static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) 836 { 837 struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 838 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 839 int aer = edev ? edev->aer_cap : 0; 840 u32 ctrl; 841 842 pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", 843 __func__, pci_domain_nr(dev->bus), 844 dev->bus->number, option); 845 846 switch (option) { 847 case EEH_RESET_FUNDAMENTAL: 848 case EEH_RESET_HOT: 849 /* Don't report linkDown event */ 850 if (aer) { 851 eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, 852 4, &ctrl); 853 ctrl |= PCI_ERR_UNC_SURPDN; 854 eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, 855 4, ctrl); 856 } 857 858 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); 859 ctrl |= PCI_BRIDGE_CTL_BUS_RESET; 860 eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); 861 862 msleep(EEH_PE_RST_HOLD_TIME); 863 break; 864 case EEH_RESET_DEACTIVATE: 865 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); 866 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; 867 eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); 868 869 msleep(EEH_PE_RST_SETTLE_TIME); 870 871 /* Continue reporting linkDown event */ 872 if (aer) { 873 eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, 874 4, &ctrl); 875 ctrl &= ~PCI_ERR_UNC_SURPDN; 876 eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, 877 4, ctrl); 878 } 879 880 break; 881 } 882 883 return 0; 884 } 885 886 static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) 887 { 888 struct pci_controller *hose = pci_bus_to_host(pdev->bus); 889 struct pnv_phb *phb = hose->private_data; 890 struct device_node *dn = pci_device_to_OF_node(pdev); 891 uint64_t id = PCI_SLOT_ID(phb->opal_id, 892 (pdev->bus->number << 8) | pdev->devfn); 893 uint8_t scope; 894 int64_t rc; 895 896 /* Hot reset to the bus if firmware cannot handle */ 897 if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL)) 898 return __pnv_eeh_bridge_reset(pdev, option); 899 900 switch (option) { 901 case EEH_RESET_FUNDAMENTAL: 902 scope = OPAL_RESET_PCI_FUNDAMENTAL; 903 break; 904 case EEH_RESET_HOT: 905 scope = OPAL_RESET_PCI_HOT; 906 break; 907 case EEH_RESET_DEACTIVATE: 908 return 0; 909 default: 910 dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n", 911 __func__, option); 912 return -EINVAL; 913 } 914 915 rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET); 916 if (rc <= OPAL_SUCCESS) 917 goto out; 918 919 rc = pnv_eeh_poll(id); 920 out: 921 return (rc == OPAL_SUCCESS) ? 0 : -EIO; 922 } 923 924 void pnv_pci_reset_secondary_bus(struct pci_dev *dev) 925 { 926 struct pci_controller *hose; 927 928 if (pci_is_root_bus(dev->bus)) { 929 hose = pci_bus_to_host(dev->bus); 930 pnv_eeh_root_reset(hose, EEH_RESET_HOT); 931 pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); 932 } else { 933 pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); 934 pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); 935 } 936 } 937 938 static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, 939 int pos, u16 mask) 940 { 941 int i, status = 0; 942 943 /* Wait for Transaction Pending bit to be cleared */ 944 for (i = 0; i < 4; i++) { 945 eeh_ops->read_config(pdn, pos, 2, &status); 946 if (!(status & mask)) 947 return; 948 949 msleep((1 << i) * 100); 950 } 951 952 pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", 953 __func__, type, 954 pdn->phb->global_number, pdn->busno, 955 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 956 } 957 958 static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) 959 { 960 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 961 u32 reg = 0; 962 963 if (WARN_ON(!edev->pcie_cap)) 964 return -ENOTTY; 965 966 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); 967 if (!(reg & PCI_EXP_DEVCAP_FLR)) 968 return -ENOTTY; 969 970 switch (option) { 971 case EEH_RESET_HOT: 972 case EEH_RESET_FUNDAMENTAL: 973 pnv_eeh_wait_for_pending(pdn, "", 974 edev->pcie_cap + PCI_EXP_DEVSTA, 975 PCI_EXP_DEVSTA_TRPND); 976 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 977 4, ®); 978 reg |= PCI_EXP_DEVCTL_BCR_FLR; 979 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 980 4, reg); 981 msleep(EEH_PE_RST_HOLD_TIME); 982 break; 983 case EEH_RESET_DEACTIVATE: 984 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 985 4, ®); 986 reg &= ~PCI_EXP_DEVCTL_BCR_FLR; 987 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 988 4, reg); 989 msleep(EEH_PE_RST_SETTLE_TIME); 990 break; 991 } 992 993 return 0; 994 } 995 996 static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) 997 { 998 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 999 u32 cap = 0; 1000 1001 if (WARN_ON(!edev->af_cap)) 1002 return -ENOTTY; 1003 1004 eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); 1005 if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) 1006 return -ENOTTY; 1007 1008 switch (option) { 1009 case EEH_RESET_HOT: 1010 case EEH_RESET_FUNDAMENTAL: 1011 /* 1012 * Wait for Transaction Pending bit to clear. A word-aligned 1013 * test is used, so we use the conrol offset rather than status 1014 * and shift the test bit to match. 1015 */ 1016 pnv_eeh_wait_for_pending(pdn, "AF", 1017 edev->af_cap + PCI_AF_CTRL, 1018 PCI_AF_STATUS_TP << 8); 1019 eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1020 1, PCI_AF_CTRL_FLR); 1021 msleep(EEH_PE_RST_HOLD_TIME); 1022 break; 1023 case EEH_RESET_DEACTIVATE: 1024 eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); 1025 msleep(EEH_PE_RST_SETTLE_TIME); 1026 break; 1027 } 1028 1029 return 0; 1030 } 1031 1032 static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) 1033 { 1034 struct eeh_dev *edev; 1035 struct pci_dn *pdn; 1036 int ret; 1037 1038 /* The VF PE should have only one child device */ 1039 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); 1040 pdn = eeh_dev_to_pdn(edev); 1041 if (!pdn) 1042 return -ENXIO; 1043 1044 ret = pnv_eeh_do_flr(pdn, option); 1045 if (!ret) 1046 return ret; 1047 1048 return pnv_eeh_do_af_flr(pdn, option); 1049 } 1050 1051 /** 1052 * pnv_eeh_reset - Reset the specified PE 1053 * @pe: EEH PE 1054 * @option: reset option 1055 * 1056 * Do reset on the indicated PE. For PCI bus sensitive PE, 1057 * we need to reset the parent p2p bridge. The PHB has to 1058 * be reinitialized if the p2p bridge is root bridge. For 1059 * PCI device sensitive PE, we will try to reset the device 1060 * through FLR. For now, we don't have OPAL APIs to do HARD 1061 * reset yet, so all reset would be SOFT (HOT) reset. 1062 */ 1063 static int pnv_eeh_reset(struct eeh_pe *pe, int option) 1064 { 1065 struct pci_controller *hose = pe->phb; 1066 struct pnv_phb *phb; 1067 struct pci_bus *bus; 1068 int64_t rc; 1069 1070 /* 1071 * For PHB reset, we always have complete reset. For those PEs whose 1072 * primary bus derived from root complex (root bus) or root port 1073 * (usually bus#1), we apply hot or fundamental reset on the root port. 1074 * For other PEs, we always have hot reset on the PE primary bus. 1075 * 1076 * Here, we have different design to pHyp, which always clear the 1077 * frozen state during PE reset. However, the good idea here from 1078 * benh is to keep frozen state before we get PE reset done completely 1079 * (until BAR restore). With the frozen state, HW drops illegal IO 1080 * or MMIO access, which can incur recrusive frozen PE during PE 1081 * reset. The side effect is that EEH core has to clear the frozen 1082 * state explicitly after BAR restore. 1083 */ 1084 if (pe->type & EEH_PE_PHB) 1085 return pnv_eeh_phb_reset(hose, option); 1086 1087 /* 1088 * The frozen PE might be caused by PAPR error injection 1089 * registers, which are expected to be cleared after hitting 1090 * frozen PE as stated in the hardware spec. Unfortunately, 1091 * that's not true on P7IOC. So we have to clear it manually 1092 * to avoid recursive EEH errors during recovery. 1093 */ 1094 phb = hose->private_data; 1095 if (phb->model == PNV_PHB_MODEL_P7IOC && 1096 (option == EEH_RESET_HOT || 1097 option == EEH_RESET_FUNDAMENTAL)) { 1098 rc = opal_pci_reset(phb->opal_id, 1099 OPAL_RESET_PHB_ERROR, 1100 OPAL_ASSERT_RESET); 1101 if (rc != OPAL_SUCCESS) { 1102 pr_warn("%s: Failure %lld clearing error injection registers\n", 1103 __func__, rc); 1104 return -EIO; 1105 } 1106 } 1107 1108 if (pe->type & EEH_PE_VF) 1109 return pnv_eeh_reset_vf_pe(pe, option); 1110 1111 bus = eeh_pe_bus_get(pe); 1112 if (!bus) { 1113 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", 1114 __func__, pe->phb->global_number, pe->addr); 1115 return -EIO; 1116 } 1117 1118 /* 1119 * If dealing with the root bus (or the bus underneath the 1120 * root port), we reset the bus underneath the root port. 1121 * 1122 * The cxl driver depends on this behaviour for bi-modal card 1123 * switching. 1124 */ 1125 if (pci_is_root_bus(bus) || 1126 pci_is_root_bus(bus->parent)) 1127 return pnv_eeh_root_reset(hose, option); 1128 1129 return pnv_eeh_bridge_reset(bus->self, option); 1130 } 1131 1132 /** 1133 * pnv_eeh_get_log - Retrieve error log 1134 * @pe: EEH PE 1135 * @severity: temporary or permanent error log 1136 * @drv_log: driver log to be combined with retrieved error log 1137 * @len: length of driver log 1138 * 1139 * Retrieve the temporary or permanent error from the PE. 1140 */ 1141 static int pnv_eeh_get_log(struct eeh_pe *pe, int severity, 1142 char *drv_log, unsigned long len) 1143 { 1144 if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) 1145 pnv_pci_dump_phb_diag_data(pe->phb, pe->data); 1146 1147 return 0; 1148 } 1149 1150 /** 1151 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE 1152 * @pe: EEH PE 1153 * 1154 * The function will be called to reconfigure the bridges included 1155 * in the specified PE so that the mulfunctional PE would be recovered 1156 * again. 1157 */ 1158 static int pnv_eeh_configure_bridge(struct eeh_pe *pe) 1159 { 1160 return 0; 1161 } 1162 1163 /** 1164 * pnv_pe_err_inject - Inject specified error to the indicated PE 1165 * @pe: the indicated PE 1166 * @type: error type 1167 * @func: specific error type 1168 * @addr: address 1169 * @mask: address mask 1170 * 1171 * The routine is called to inject specified error, which is 1172 * determined by @type and @func, to the indicated PE for 1173 * testing purpose. 1174 */ 1175 static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func, 1176 unsigned long addr, unsigned long mask) 1177 { 1178 struct pci_controller *hose = pe->phb; 1179 struct pnv_phb *phb = hose->private_data; 1180 s64 rc; 1181 1182 if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && 1183 type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { 1184 pr_warn("%s: Invalid error type %d\n", 1185 __func__, type); 1186 return -ERANGE; 1187 } 1188 1189 if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || 1190 func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { 1191 pr_warn("%s: Invalid error function %d\n", 1192 __func__, func); 1193 return -ERANGE; 1194 } 1195 1196 /* Firmware supports error injection ? */ 1197 if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { 1198 pr_warn("%s: Firmware doesn't support error injection\n", 1199 __func__); 1200 return -ENXIO; 1201 } 1202 1203 /* Do error injection */ 1204 rc = opal_pci_err_inject(phb->opal_id, pe->addr, 1205 type, func, addr, mask); 1206 if (rc != OPAL_SUCCESS) { 1207 pr_warn("%s: Failure %lld injecting error " 1208 "%d-%d to PHB#%x-PE#%x\n", 1209 __func__, rc, type, func, 1210 hose->global_number, pe->addr); 1211 return -EIO; 1212 } 1213 1214 return 0; 1215 } 1216 1217 static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) 1218 { 1219 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1220 1221 if (!edev || !edev->pe) 1222 return false; 1223 1224 /* 1225 * We will issue FLR or AF FLR to all VFs, which are contained 1226 * in VF PE. It relies on the EEH PCI config accessors. So we 1227 * can't block them during the window. 1228 */ 1229 if (edev->physfn && (edev->pe->state & EEH_PE_RESET)) 1230 return false; 1231 1232 if (edev->pe->state & EEH_PE_CFG_BLOCKED) 1233 return true; 1234 1235 return false; 1236 } 1237 1238 static int pnv_eeh_read_config(struct pci_dn *pdn, 1239 int where, int size, u32 *val) 1240 { 1241 if (!pdn) 1242 return PCIBIOS_DEVICE_NOT_FOUND; 1243 1244 if (pnv_eeh_cfg_blocked(pdn)) { 1245 *val = 0xFFFFFFFF; 1246 return PCIBIOS_SET_FAILED; 1247 } 1248 1249 return pnv_pci_cfg_read(pdn, where, size, val); 1250 } 1251 1252 static int pnv_eeh_write_config(struct pci_dn *pdn, 1253 int where, int size, u32 val) 1254 { 1255 if (!pdn) 1256 return PCIBIOS_DEVICE_NOT_FOUND; 1257 1258 if (pnv_eeh_cfg_blocked(pdn)) 1259 return PCIBIOS_SET_FAILED; 1260 1261 return pnv_pci_cfg_write(pdn, where, size, val); 1262 } 1263 1264 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data) 1265 { 1266 /* GEM */ 1267 if (data->gemXfir || data->gemRfir || 1268 data->gemRirqfir || data->gemMask || data->gemRwof) 1269 pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", 1270 be64_to_cpu(data->gemXfir), 1271 be64_to_cpu(data->gemRfir), 1272 be64_to_cpu(data->gemRirqfir), 1273 be64_to_cpu(data->gemMask), 1274 be64_to_cpu(data->gemRwof)); 1275 1276 /* LEM */ 1277 if (data->lemFir || data->lemErrMask || 1278 data->lemAction0 || data->lemAction1 || data->lemWof) 1279 pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", 1280 be64_to_cpu(data->lemFir), 1281 be64_to_cpu(data->lemErrMask), 1282 be64_to_cpu(data->lemAction0), 1283 be64_to_cpu(data->lemAction1), 1284 be64_to_cpu(data->lemWof)); 1285 } 1286 1287 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose) 1288 { 1289 struct pnv_phb *phb = hose->private_data; 1290 struct OpalIoP7IOCErrorData *data = 1291 (struct OpalIoP7IOCErrorData*)phb->diag_data; 1292 long rc; 1293 1294 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); 1295 if (rc != OPAL_SUCCESS) { 1296 pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", 1297 __func__, phb->hub_id, rc); 1298 return; 1299 } 1300 1301 switch (be16_to_cpu(data->type)) { 1302 case OPAL_P7IOC_DIAG_TYPE_RGC: 1303 pr_info("P7IOC diag-data for RGC\n\n"); 1304 pnv_eeh_dump_hub_diag_common(data); 1305 if (data->rgc.rgcStatus || data->rgc.rgcLdcp) 1306 pr_info(" RGC: %016llx %016llx\n", 1307 be64_to_cpu(data->rgc.rgcStatus), 1308 be64_to_cpu(data->rgc.rgcLdcp)); 1309 break; 1310 case OPAL_P7IOC_DIAG_TYPE_BI: 1311 pr_info("P7IOC diag-data for BI %s\n\n", 1312 data->bi.biDownbound ? "Downbound" : "Upbound"); 1313 pnv_eeh_dump_hub_diag_common(data); 1314 if (data->bi.biLdcp0 || data->bi.biLdcp1 || 1315 data->bi.biLdcp2 || data->bi.biFenceStatus) 1316 pr_info(" BI: %016llx %016llx %016llx %016llx\n", 1317 be64_to_cpu(data->bi.biLdcp0), 1318 be64_to_cpu(data->bi.biLdcp1), 1319 be64_to_cpu(data->bi.biLdcp2), 1320 be64_to_cpu(data->bi.biFenceStatus)); 1321 break; 1322 case OPAL_P7IOC_DIAG_TYPE_CI: 1323 pr_info("P7IOC diag-data for CI Port %d\n\n", 1324 data->ci.ciPort); 1325 pnv_eeh_dump_hub_diag_common(data); 1326 if (data->ci.ciPortStatus || data->ci.ciPortLdcp) 1327 pr_info(" CI: %016llx %016llx\n", 1328 be64_to_cpu(data->ci.ciPortStatus), 1329 be64_to_cpu(data->ci.ciPortLdcp)); 1330 break; 1331 case OPAL_P7IOC_DIAG_TYPE_MISC: 1332 pr_info("P7IOC diag-data for MISC\n\n"); 1333 pnv_eeh_dump_hub_diag_common(data); 1334 break; 1335 case OPAL_P7IOC_DIAG_TYPE_I2C: 1336 pr_info("P7IOC diag-data for I2C\n\n"); 1337 pnv_eeh_dump_hub_diag_common(data); 1338 break; 1339 default: 1340 pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", 1341 __func__, phb->hub_id, data->type); 1342 } 1343 } 1344 1345 static int pnv_eeh_get_pe(struct pci_controller *hose, 1346 u16 pe_no, struct eeh_pe **pe) 1347 { 1348 struct pnv_phb *phb = hose->private_data; 1349 struct pnv_ioda_pe *pnv_pe; 1350 struct eeh_pe *dev_pe; 1351 1352 /* 1353 * If PHB supports compound PE, to fetch 1354 * the master PE because slave PE is invisible 1355 * to EEH core. 1356 */ 1357 pnv_pe = &phb->ioda.pe_array[pe_no]; 1358 if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { 1359 pnv_pe = pnv_pe->master; 1360 WARN_ON(!pnv_pe || 1361 !(pnv_pe->flags & PNV_IODA_PE_MASTER)); 1362 pe_no = pnv_pe->pe_number; 1363 } 1364 1365 /* Find the PE according to PE# */ 1366 dev_pe = eeh_pe_get(hose, pe_no, 0); 1367 if (!dev_pe) 1368 return -EEXIST; 1369 1370 /* Freeze the (compound) PE */ 1371 *pe = dev_pe; 1372 if (!(dev_pe->state & EEH_PE_ISOLATED)) 1373 phb->freeze_pe(phb, pe_no); 1374 1375 /* 1376 * At this point, we're sure the (compound) PE should 1377 * have been frozen. However, we still need poke until 1378 * hitting the frozen PE on top level. 1379 */ 1380 dev_pe = dev_pe->parent; 1381 while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { 1382 int ret; 1383 ret = eeh_ops->get_state(dev_pe, NULL); 1384 if (ret <= 0 || eeh_state_active(ret)) { 1385 dev_pe = dev_pe->parent; 1386 continue; 1387 } 1388 1389 /* Frozen parent PE */ 1390 *pe = dev_pe; 1391 if (!(dev_pe->state & EEH_PE_ISOLATED)) 1392 phb->freeze_pe(phb, dev_pe->addr); 1393 1394 /* Next one */ 1395 dev_pe = dev_pe->parent; 1396 } 1397 1398 return 0; 1399 } 1400 1401 /** 1402 * pnv_eeh_next_error - Retrieve next EEH error to handle 1403 * @pe: Affected PE 1404 * 1405 * The function is expected to be called by EEH core while it gets 1406 * special EEH event (without binding PE). The function calls to 1407 * OPAL APIs for next error to handle. The informational error is 1408 * handled internally by platform. However, the dead IOC, dead PHB, 1409 * fenced PHB and frozen PE should be handled by EEH core eventually. 1410 */ 1411 static int pnv_eeh_next_error(struct eeh_pe **pe) 1412 { 1413 struct pci_controller *hose; 1414 struct pnv_phb *phb; 1415 struct eeh_pe *phb_pe, *parent_pe; 1416 __be64 frozen_pe_no; 1417 __be16 err_type, severity; 1418 long rc; 1419 int state, ret = EEH_NEXT_ERR_NONE; 1420 1421 /* 1422 * While running here, it's safe to purge the event queue. The 1423 * event should still be masked. 1424 */ 1425 eeh_remove_event(NULL, false); 1426 1427 list_for_each_entry(hose, &hose_list, list_node) { 1428 /* 1429 * If the subordinate PCI buses of the PHB has been 1430 * removed or is exactly under error recovery, we 1431 * needn't take care of it any more. 1432 */ 1433 phb = hose->private_data; 1434 phb_pe = eeh_phb_pe_get(hose); 1435 if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) 1436 continue; 1437 1438 rc = opal_pci_next_error(phb->opal_id, 1439 &frozen_pe_no, &err_type, &severity); 1440 if (rc != OPAL_SUCCESS) { 1441 pr_devel("%s: Invalid return value on " 1442 "PHB#%x (0x%lx) from opal_pci_next_error", 1443 __func__, hose->global_number, rc); 1444 continue; 1445 } 1446 1447 /* If the PHB doesn't have error, stop processing */ 1448 if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || 1449 be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { 1450 pr_devel("%s: No error found on PHB#%x\n", 1451 __func__, hose->global_number); 1452 continue; 1453 } 1454 1455 /* 1456 * Processing the error. We're expecting the error with 1457 * highest priority reported upon multiple errors on the 1458 * specific PHB. 1459 */ 1460 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", 1461 __func__, be16_to_cpu(err_type), 1462 be16_to_cpu(severity), be64_to_cpu(frozen_pe_no), 1463 hose->global_number); 1464 switch (be16_to_cpu(err_type)) { 1465 case OPAL_EEH_IOC_ERROR: 1466 if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { 1467 pr_err("EEH: dead IOC detected\n"); 1468 ret = EEH_NEXT_ERR_DEAD_IOC; 1469 } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { 1470 pr_info("EEH: IOC informative error " 1471 "detected\n"); 1472 pnv_eeh_get_and_dump_hub_diag(hose); 1473 ret = EEH_NEXT_ERR_NONE; 1474 } 1475 1476 break; 1477 case OPAL_EEH_PHB_ERROR: 1478 if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { 1479 *pe = phb_pe; 1480 pr_err("EEH: dead PHB#%x detected, " 1481 "location: %s\n", 1482 hose->global_number, 1483 eeh_pe_loc_get(phb_pe)); 1484 ret = EEH_NEXT_ERR_DEAD_PHB; 1485 } else if (be16_to_cpu(severity) == 1486 OPAL_EEH_SEV_PHB_FENCED) { 1487 *pe = phb_pe; 1488 pr_err("EEH: Fenced PHB#%x detected, " 1489 "location: %s\n", 1490 hose->global_number, 1491 eeh_pe_loc_get(phb_pe)); 1492 ret = EEH_NEXT_ERR_FENCED_PHB; 1493 } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { 1494 pr_info("EEH: PHB#%x informative error " 1495 "detected, location: %s\n", 1496 hose->global_number, 1497 eeh_pe_loc_get(phb_pe)); 1498 pnv_eeh_get_phb_diag(phb_pe); 1499 pnv_pci_dump_phb_diag_data(hose, phb_pe->data); 1500 ret = EEH_NEXT_ERR_NONE; 1501 } 1502 1503 break; 1504 case OPAL_EEH_PE_ERROR: 1505 /* 1506 * If we can't find the corresponding PE, we 1507 * just try to unfreeze. 1508 */ 1509 if (pnv_eeh_get_pe(hose, 1510 be64_to_cpu(frozen_pe_no), pe)) { 1511 pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", 1512 hose->global_number, be64_to_cpu(frozen_pe_no)); 1513 pr_info("EEH: PHB location: %s\n", 1514 eeh_pe_loc_get(phb_pe)); 1515 1516 /* Dump PHB diag-data */ 1517 rc = opal_pci_get_phb_diag_data2(phb->opal_id, 1518 phb->diag_data, phb->diag_data_size); 1519 if (rc == OPAL_SUCCESS) 1520 pnv_pci_dump_phb_diag_data(hose, 1521 phb->diag_data); 1522 1523 /* Try best to clear it */ 1524 opal_pci_eeh_freeze_clear(phb->opal_id, 1525 be64_to_cpu(frozen_pe_no), 1526 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 1527 ret = EEH_NEXT_ERR_NONE; 1528 } else if ((*pe)->state & EEH_PE_ISOLATED || 1529 eeh_pe_passed(*pe)) { 1530 ret = EEH_NEXT_ERR_NONE; 1531 } else { 1532 pr_err("EEH: Frozen PE#%x " 1533 "on PHB#%x detected\n", 1534 (*pe)->addr, 1535 (*pe)->phb->global_number); 1536 pr_err("EEH: PE location: %s, " 1537 "PHB location: %s\n", 1538 eeh_pe_loc_get(*pe), 1539 eeh_pe_loc_get(phb_pe)); 1540 ret = EEH_NEXT_ERR_FROZEN_PE; 1541 } 1542 1543 break; 1544 default: 1545 pr_warn("%s: Unexpected error type %d\n", 1546 __func__, be16_to_cpu(err_type)); 1547 } 1548 1549 /* 1550 * EEH core will try recover from fenced PHB or 1551 * frozen PE. In the time for frozen PE, EEH core 1552 * enable IO path for that before collecting logs, 1553 * but it ruins the site. So we have to dump the 1554 * log in advance here. 1555 */ 1556 if ((ret == EEH_NEXT_ERR_FROZEN_PE || 1557 ret == EEH_NEXT_ERR_FENCED_PHB) && 1558 !((*pe)->state & EEH_PE_ISOLATED)) { 1559 eeh_pe_mark_isolated(*pe); 1560 pnv_eeh_get_phb_diag(*pe); 1561 1562 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 1563 pnv_pci_dump_phb_diag_data((*pe)->phb, 1564 (*pe)->data); 1565 } 1566 1567 /* 1568 * We probably have the frozen parent PE out there and 1569 * we need have to handle frozen parent PE firstly. 1570 */ 1571 if (ret == EEH_NEXT_ERR_FROZEN_PE) { 1572 parent_pe = (*pe)->parent; 1573 while (parent_pe) { 1574 /* Hit the ceiling ? */ 1575 if (parent_pe->type & EEH_PE_PHB) 1576 break; 1577 1578 /* Frozen parent PE ? */ 1579 state = eeh_ops->get_state(parent_pe, NULL); 1580 if (state > 0 && !eeh_state_active(state)) 1581 *pe = parent_pe; 1582 1583 /* Next parent level */ 1584 parent_pe = parent_pe->parent; 1585 } 1586 1587 /* We possibly migrate to another PE */ 1588 eeh_pe_mark_isolated(*pe); 1589 } 1590 1591 /* 1592 * If we have no errors on the specific PHB or only 1593 * informative error there, we continue poking it. 1594 * Otherwise, we need actions to be taken by upper 1595 * layer. 1596 */ 1597 if (ret > EEH_NEXT_ERR_INF) 1598 break; 1599 } 1600 1601 /* Unmask the event */ 1602 if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) 1603 enable_irq(eeh_event_irq); 1604 1605 return ret; 1606 } 1607 1608 static int pnv_eeh_restore_config(struct pci_dn *pdn) 1609 { 1610 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1611 struct pnv_phb *phb; 1612 s64 ret = 0; 1613 int config_addr = (pdn->busno << 8) | (pdn->devfn); 1614 1615 if (!edev) 1616 return -EEXIST; 1617 1618 /* 1619 * We have to restore the PCI config space after reset since the 1620 * firmware can't see SRIOV VFs. 1621 * 1622 * FIXME: The MPS, error routing rules, timeout setting are worthy 1623 * to be exported by firmware in extendible way. 1624 */ 1625 if (edev->physfn) { 1626 ret = eeh_restore_vf_config(pdn); 1627 } else { 1628 phb = pdn->phb->private_data; 1629 ret = opal_pci_reinit(phb->opal_id, 1630 OPAL_REINIT_PCI_DEV, config_addr); 1631 } 1632 1633 if (ret) { 1634 pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", 1635 __func__, config_addr, ret); 1636 return -EIO; 1637 } 1638 1639 return ret; 1640 } 1641 1642 static struct eeh_ops pnv_eeh_ops = { 1643 .name = "powernv", 1644 .init = pnv_eeh_init, 1645 .probe = pnv_eeh_probe, 1646 .set_option = pnv_eeh_set_option, 1647 .get_pe_addr = pnv_eeh_get_pe_addr, 1648 .get_state = pnv_eeh_get_state, 1649 .reset = pnv_eeh_reset, 1650 .get_log = pnv_eeh_get_log, 1651 .configure_bridge = pnv_eeh_configure_bridge, 1652 .err_inject = pnv_eeh_err_inject, 1653 .read_config = pnv_eeh_read_config, 1654 .write_config = pnv_eeh_write_config, 1655 .next_error = pnv_eeh_next_error, 1656 .restore_config = pnv_eeh_restore_config, 1657 .notify_resume = NULL 1658 }; 1659 1660 #ifdef CONFIG_PCI_IOV 1661 static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) 1662 { 1663 struct pci_dn *pdn = pci_get_pdn(pdev); 1664 int parent_mps; 1665 1666 if (!pdev->is_virtfn) 1667 return; 1668 1669 /* Synchronize MPS for VF and PF */ 1670 parent_mps = pcie_get_mps(pdev->physfn); 1671 if ((128 << pdev->pcie_mpss) >= parent_mps) 1672 pcie_set_mps(pdev, parent_mps); 1673 pdn->mps = pcie_get_mps(pdev); 1674 } 1675 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps); 1676 #endif /* CONFIG_PCI_IOV */ 1677 1678 /** 1679 * eeh_powernv_init - Register platform dependent EEH operations 1680 * 1681 * EEH initialization on powernv platform. This function should be 1682 * called before any EEH related functions. 1683 */ 1684 static int __init eeh_powernv_init(void) 1685 { 1686 int ret = -EINVAL; 1687 1688 ret = eeh_ops_register(&pnv_eeh_ops); 1689 if (!ret) 1690 pr_info("EEH: PowerNV platform initialized\n"); 1691 else 1692 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); 1693 1694 return ret; 1695 } 1696 machine_early_initcall(powernv, eeh_powernv_init); 1697