1 /*
2  * The file intends to implement the platform dependent EEH operations on
3  * powernv platform. Actually, the powernv was created in order to fully
4  * hypervisor support.
5  *
6  * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  */
13 
14 #include <linux/atomic.h>
15 #include <linux/debugfs.h>
16 #include <linux/delay.h>
17 #include <linux/export.h>
18 #include <linux/init.h>
19 #include <linux/list.h>
20 #include <linux/msi.h>
21 #include <linux/of.h>
22 #include <linux/pci.h>
23 #include <linux/proc_fs.h>
24 #include <linux/rbtree.h>
25 #include <linux/sched.h>
26 #include <linux/seq_file.h>
27 #include <linux/spinlock.h>
28 
29 #include <asm/eeh.h>
30 #include <asm/eeh_event.h>
31 #include <asm/firmware.h>
32 #include <asm/io.h>
33 #include <asm/iommu.h>
34 #include <asm/machdep.h>
35 #include <asm/msi_bitmap.h>
36 #include <asm/opal.h>
37 #include <asm/ppc-pci.h>
38 
39 #include "powernv.h"
40 #include "pci.h"
41 
42 static bool pnv_eeh_nb_init = false;
43 
44 /**
45  * pnv_eeh_init - EEH platform dependent initialization
46  *
47  * EEH platform dependent initialization on powernv
48  */
49 static int pnv_eeh_init(void)
50 {
51 	struct pci_controller *hose;
52 	struct pnv_phb *phb;
53 
54 	/* We require OPALv3 */
55 	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
56 		pr_warn("%s: OPALv3 is required !\n",
57 			__func__);
58 		return -EINVAL;
59 	}
60 
61 	/* Set probe mode */
62 	eeh_add_flag(EEH_PROBE_MODE_DEV);
63 
64 	/*
65 	 * P7IOC blocks PCI config access to frozen PE, but PHB3
66 	 * doesn't do that. So we have to selectively enable I/O
67 	 * prior to collecting error log.
68 	 */
69 	list_for_each_entry(hose, &hose_list, list_node) {
70 		phb = hose->private_data;
71 
72 		if (phb->model == PNV_PHB_MODEL_P7IOC)
73 			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
74 
75 		/*
76 		 * PE#0 should be regarded as valid by EEH core
77 		 * if it's not the reserved one. Currently, we
78 		 * have the reserved PE#0 and PE#127 for PHB3
79 		 * and P7IOC separately. So we should regard
80 		 * PE#0 as valid for P7IOC.
81 		 */
82 		if (phb->ioda.reserved_pe != 0)
83 			eeh_add_flag(EEH_VALID_PE_ZERO);
84 
85 		break;
86 	}
87 
88 	return 0;
89 }
90 
91 static int pnv_eeh_event(struct notifier_block *nb,
92 			 unsigned long events, void *change)
93 {
94 	uint64_t changed_evts = (uint64_t)change;
95 
96 	/*
97 	 * We simply send special EEH event if EEH has
98 	 * been enabled, or clear pending events in
99 	 * case that we enable EEH soon
100 	 */
101 	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
102 	    !(events & OPAL_EVENT_PCI_ERROR))
103 		return 0;
104 
105 	if (eeh_enabled())
106 		eeh_send_failure_event(NULL);
107 	else
108 		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
109 
110 	return 0;
111 }
112 
113 static struct notifier_block pnv_eeh_nb = {
114 	.notifier_call	= pnv_eeh_event,
115 	.next		= NULL,
116 	.priority	= 0
117 };
118 
119 #ifdef CONFIG_DEBUG_FS
120 static ssize_t pnv_eeh_ei_write(struct file *filp,
121 				const char __user *user_buf,
122 				size_t count, loff_t *ppos)
123 {
124 	struct pci_controller *hose = filp->private_data;
125 	struct eeh_dev *edev;
126 	struct eeh_pe *pe;
127 	int pe_no, type, func;
128 	unsigned long addr, mask;
129 	char buf[50];
130 	int ret;
131 
132 	if (!eeh_ops || !eeh_ops->err_inject)
133 		return -ENXIO;
134 
135 	/* Copy over argument buffer */
136 	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
137 	if (!ret)
138 		return -EFAULT;
139 
140 	/* Retrieve parameters */
141 	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
142 		     &pe_no, &type, &func, &addr, &mask);
143 	if (ret != 5)
144 		return -EINVAL;
145 
146 	/* Retrieve PE */
147 	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
148 	if (!edev)
149 		return -ENOMEM;
150 	edev->phb = hose;
151 	edev->pe_config_addr = pe_no;
152 	pe = eeh_pe_get(edev);
153 	kfree(edev);
154 	if (!pe)
155 		return -ENODEV;
156 
157 	/* Do error injection */
158 	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
159 	return ret < 0 ? ret : count;
160 }
161 
162 static const struct file_operations pnv_eeh_ei_fops = {
163 	.open	= simple_open,
164 	.llseek	= no_llseek,
165 	.write	= pnv_eeh_ei_write,
166 };
167 
168 static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
169 {
170 	struct pci_controller *hose = data;
171 	struct pnv_phb *phb = hose->private_data;
172 
173 	out_be64(phb->regs + offset, val);
174 	return 0;
175 }
176 
177 static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
178 {
179 	struct pci_controller *hose = data;
180 	struct pnv_phb *phb = hose->private_data;
181 
182 	*val = in_be64(phb->regs + offset);
183 	return 0;
184 }
185 
186 static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
187 {
188 	return pnv_eeh_dbgfs_set(data, 0xD10, val);
189 }
190 
191 static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
192 {
193 	return pnv_eeh_dbgfs_get(data, 0xD10, val);
194 }
195 
196 static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
197 {
198 	return pnv_eeh_dbgfs_set(data, 0xD90, val);
199 }
200 
201 static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
202 {
203 	return pnv_eeh_dbgfs_get(data, 0xD90, val);
204 }
205 
206 static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
207 {
208 	return pnv_eeh_dbgfs_set(data, 0xE10, val);
209 }
210 
211 static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
212 {
213 	return pnv_eeh_dbgfs_get(data, 0xE10, val);
214 }
215 
216 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
217 			pnv_eeh_outb_dbgfs_set, "0x%llx\n");
218 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
219 			pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
220 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
221 			pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
222 #endif /* CONFIG_DEBUG_FS */
223 
224 /**
225  * pnv_eeh_post_init - EEH platform dependent post initialization
226  *
227  * EEH platform dependent post initialization on powernv. When
228  * the function is called, the EEH PEs and devices should have
229  * been built. If the I/O cache staff has been built, EEH is
230  * ready to supply service.
231  */
232 static int pnv_eeh_post_init(void)
233 {
234 	struct pci_controller *hose;
235 	struct pnv_phb *phb;
236 	int ret = 0;
237 
238 	/* Register OPAL event notifier */
239 	if (!pnv_eeh_nb_init) {
240 		ret = opal_notifier_register(&pnv_eeh_nb);
241 		if (ret) {
242 			pr_warn("%s: Can't register OPAL event notifier (%d)\n",
243 				__func__, ret);
244 			return ret;
245 		}
246 
247 		pnv_eeh_nb_init = true;
248 	}
249 
250 	list_for_each_entry(hose, &hose_list, list_node) {
251 		phb = hose->private_data;
252 
253 		/*
254 		 * If EEH is enabled, we're going to rely on that.
255 		 * Otherwise, we restore to conventional mechanism
256 		 * to clear frozen PE during PCI config access.
257 		 */
258 		if (eeh_enabled())
259 			phb->flags |= PNV_PHB_FLAG_EEH;
260 		else
261 			phb->flags &= ~PNV_PHB_FLAG_EEH;
262 
263 		/* Create debugfs entries */
264 #ifdef CONFIG_DEBUG_FS
265 		if (phb->has_dbgfs || !phb->dbgfs)
266 			continue;
267 
268 		phb->has_dbgfs = 1;
269 		debugfs_create_file("err_injct", 0200,
270 				    phb->dbgfs, hose,
271 				    &pnv_eeh_ei_fops);
272 
273 		debugfs_create_file("err_injct_outbound", 0600,
274 				    phb->dbgfs, hose,
275 				    &pnv_eeh_outb_dbgfs_ops);
276 		debugfs_create_file("err_injct_inboundA", 0600,
277 				    phb->dbgfs, hose,
278 				    &pnv_eeh_inbA_dbgfs_ops);
279 		debugfs_create_file("err_injct_inboundB", 0600,
280 				    phb->dbgfs, hose,
281 				    &pnv_eeh_inbB_dbgfs_ops);
282 #endif /* CONFIG_DEBUG_FS */
283 	}
284 
285 
286 	return ret;
287 }
288 
289 /**
290  * pnv_eeh_dev_probe - Do probe on PCI device
291  * @dev: PCI device
292  * @flag: unused
293  *
294  * When EEH module is installed during system boot, all PCI devices
295  * are checked one by one to see if it supports EEH. The function
296  * is introduced for the purpose. By default, EEH has been enabled
297  * on all PCI devices. That's to say, we only need do necessary
298  * initialization on the corresponding eeh device and create PE
299  * accordingly.
300  *
301  * It's notable that's unsafe to retrieve the EEH device through
302  * the corresponding PCI device. During the PCI device hotplug, which
303  * was possiblly triggered by EEH core, the binding between EEH device
304  * and the PCI device isn't built yet.
305  */
306 static int pnv_eeh_dev_probe(struct pci_dev *dev, void *flag)
307 {
308 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
309 	struct pnv_phb *phb = hose->private_data;
310 	struct device_node *dn = pci_device_to_OF_node(dev);
311 	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
312 	int ret;
313 
314 	/*
315 	 * When probing the root bridge, which doesn't have any
316 	 * subordinate PCI devices. We don't have OF node for
317 	 * the root bridge. So it's not reasonable to continue
318 	 * the probing.
319 	 */
320 	if (!dn || !edev || edev->pe)
321 		return 0;
322 
323 	/* Skip for PCI-ISA bridge */
324 	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
325 		return 0;
326 
327 	/* Initialize eeh device */
328 	edev->class_code = dev->class;
329 	edev->mode	&= 0xFFFFFF00;
330 	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
331 		edev->mode |= EEH_DEV_BRIDGE;
332 	edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
333 	if (pci_is_pcie(dev)) {
334 		edev->pcie_cap = pci_pcie_cap(dev);
335 
336 		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
337 			edev->mode |= EEH_DEV_ROOT_PORT;
338 		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
339 			edev->mode |= EEH_DEV_DS_PORT;
340 
341 		edev->aer_cap = pci_find_ext_capability(dev,
342 							PCI_EXT_CAP_ID_ERR);
343 	}
344 
345 	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
346 	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
347 
348 	/* Create PE */
349 	ret = eeh_add_to_parent_pe(edev);
350 	if (ret) {
351 		pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
352 			__func__, pci_name(dev), ret);
353 		return ret;
354 	}
355 
356 	/*
357 	 * If the PE contains any one of following adapters, the
358 	 * PCI config space can't be accessed when dumping EEH log.
359 	 * Otherwise, we will run into fenced PHB caused by shortage
360 	 * of outbound credits in the adapter. The PCI config access
361 	 * should be blocked until PE reset. MMIO access is dropped
362 	 * by hardware certainly. In order to drop PCI config requests,
363 	 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
364 	 * will be checked in the backend for PE state retrival. If
365 	 * the PE becomes frozen for the first time and the flag has
366 	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
367 	 * that PE to block its config space.
368 	 *
369 	 * Broadcom Austin 4-ports NICs (14e4:1657)
370 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
371 	 */
372 	if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
373 	    (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
374 		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
375 
376 	/*
377 	 * Cache the PE primary bus, which can't be fetched when
378 	 * full hotplug is in progress. In that case, all child
379 	 * PCI devices of the PE are expected to be removed prior
380 	 * to PE reset.
381 	 */
382 	if (!edev->pe->bus)
383 		edev->pe->bus = dev->bus;
384 
385 	/*
386 	 * Enable EEH explicitly so that we will do EEH check
387 	 * while accessing I/O stuff
388 	 */
389 	eeh_add_flag(EEH_ENABLED);
390 
391 	/* Save memory bars */
392 	eeh_save_bars(edev);
393 
394 	return 0;
395 }
396 
397 /**
398  * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
399  * @pe: EEH PE
400  * @option: operation to be issued
401  *
402  * The function is used to control the EEH functionality globally.
403  * Currently, following options are support according to PAPR:
404  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
405  */
406 static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
407 {
408 	struct pci_controller *hose = pe->phb;
409 	struct pnv_phb *phb = hose->private_data;
410 	bool freeze_pe = false;
411 	int opt, ret = 0;
412 	s64 rc;
413 
414 	/* Sanity check on option */
415 	switch (option) {
416 	case EEH_OPT_DISABLE:
417 		return -EPERM;
418 	case EEH_OPT_ENABLE:
419 		return 0;
420 	case EEH_OPT_THAW_MMIO:
421 		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
422 		break;
423 	case EEH_OPT_THAW_DMA:
424 		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
425 		break;
426 	case EEH_OPT_FREEZE_PE:
427 		freeze_pe = true;
428 		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
429 		break;
430 	default:
431 		pr_warn("%s: Invalid option %d\n", __func__, option);
432 		return -EINVAL;
433 	}
434 
435 	/* If PHB supports compound PE, to handle it */
436 	if (freeze_pe) {
437 		if (phb->freeze_pe) {
438 			phb->freeze_pe(phb, pe->addr);
439 		} else {
440 			rc = opal_pci_eeh_freeze_set(phb->opal_id,
441 						     pe->addr, opt);
442 			if (rc != OPAL_SUCCESS) {
443 				pr_warn("%s: Failure %lld freezing "
444 					"PHB#%x-PE#%x\n",
445 					__func__, rc,
446 					phb->hose->global_number, pe->addr);
447 				ret = -EIO;
448 			}
449 		}
450 	} else {
451 		if (phb->unfreeze_pe) {
452 			ret = phb->unfreeze_pe(phb, pe->addr, opt);
453 		} else {
454 			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
455 						       pe->addr, opt);
456 			if (rc != OPAL_SUCCESS) {
457 				pr_warn("%s: Failure %lld enable %d "
458 					"for PHB#%x-PE#%x\n",
459 					__func__, rc, option,
460 					phb->hose->global_number, pe->addr);
461 				ret = -EIO;
462 			}
463 		}
464 	}
465 
466 	return ret;
467 }
468 
469 /**
470  * pnv_eeh_get_pe_addr - Retrieve PE address
471  * @pe: EEH PE
472  *
473  * Retrieve the PE address according to the given tranditional
474  * PCI BDF (Bus/Device/Function) address.
475  */
476 static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
477 {
478 	return pe->addr;
479 }
480 
481 static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
482 {
483 	struct pnv_phb *phb = pe->phb->private_data;
484 	s64 rc;
485 
486 	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
487 					 PNV_PCI_DIAG_BUF_SIZE);
488 	if (rc != OPAL_SUCCESS)
489 		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
490 			__func__, rc, pe->phb->global_number);
491 }
492 
493 static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
494 {
495 	struct pnv_phb *phb = pe->phb->private_data;
496 	u8 fstate;
497 	__be16 pcierr;
498 	s64 rc;
499 	int result = 0;
500 
501 	rc = opal_pci_eeh_freeze_status(phb->opal_id,
502 					pe->addr,
503 					&fstate,
504 					&pcierr,
505 					NULL);
506 	if (rc != OPAL_SUCCESS) {
507 		pr_warn("%s: Failure %lld getting PHB#%x state\n",
508 			__func__, rc, phb->hose->global_number);
509 		return EEH_STATE_NOT_SUPPORT;
510 	}
511 
512 	/*
513 	 * Check PHB state. If the PHB is frozen for the
514 	 * first time, to dump the PHB diag-data.
515 	 */
516 	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
517 		result = (EEH_STATE_MMIO_ACTIVE  |
518 			  EEH_STATE_DMA_ACTIVE   |
519 			  EEH_STATE_MMIO_ENABLED |
520 			  EEH_STATE_DMA_ENABLED);
521 	} else if (!(pe->state & EEH_PE_ISOLATED)) {
522 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
523 		pnv_eeh_get_phb_diag(pe);
524 
525 		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
526 			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
527 	}
528 
529 	return result;
530 }
531 
532 static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
533 {
534 	struct pnv_phb *phb = pe->phb->private_data;
535 	u8 fstate;
536 	__be16 pcierr;
537 	s64 rc;
538 	int result;
539 
540 	/*
541 	 * We don't clobber hardware frozen state until PE
542 	 * reset is completed. In order to keep EEH core
543 	 * moving forward, we have to return operational
544 	 * state during PE reset.
545 	 */
546 	if (pe->state & EEH_PE_RESET) {
547 		result = (EEH_STATE_MMIO_ACTIVE  |
548 			  EEH_STATE_DMA_ACTIVE   |
549 			  EEH_STATE_MMIO_ENABLED |
550 			  EEH_STATE_DMA_ENABLED);
551 		return result;
552 	}
553 
554 	/*
555 	 * Fetch PE state from hardware. If the PHB
556 	 * supports compound PE, let it handle that.
557 	 */
558 	if (phb->get_pe_state) {
559 		fstate = phb->get_pe_state(phb, pe->addr);
560 	} else {
561 		rc = opal_pci_eeh_freeze_status(phb->opal_id,
562 						pe->addr,
563 						&fstate,
564 						&pcierr,
565 						NULL);
566 		if (rc != OPAL_SUCCESS) {
567 			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
568 				__func__, rc, phb->hose->global_number,
569 				pe->addr);
570 			return EEH_STATE_NOT_SUPPORT;
571 		}
572 	}
573 
574 	/* Figure out state */
575 	switch (fstate) {
576 	case OPAL_EEH_STOPPED_NOT_FROZEN:
577 		result = (EEH_STATE_MMIO_ACTIVE  |
578 			  EEH_STATE_DMA_ACTIVE   |
579 			  EEH_STATE_MMIO_ENABLED |
580 			  EEH_STATE_DMA_ENABLED);
581 		break;
582 	case OPAL_EEH_STOPPED_MMIO_FREEZE:
583 		result = (EEH_STATE_DMA_ACTIVE |
584 			  EEH_STATE_DMA_ENABLED);
585 		break;
586 	case OPAL_EEH_STOPPED_DMA_FREEZE:
587 		result = (EEH_STATE_MMIO_ACTIVE |
588 			  EEH_STATE_MMIO_ENABLED);
589 		break;
590 	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
591 		result = 0;
592 		break;
593 	case OPAL_EEH_STOPPED_RESET:
594 		result = EEH_STATE_RESET_ACTIVE;
595 		break;
596 	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
597 		result = EEH_STATE_UNAVAILABLE;
598 		break;
599 	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
600 		result = EEH_STATE_NOT_SUPPORT;
601 		break;
602 	default:
603 		result = EEH_STATE_NOT_SUPPORT;
604 		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
605 			__func__, phb->hose->global_number,
606 			pe->addr, fstate);
607 	}
608 
609 	/*
610 	 * If PHB supports compound PE, to freeze all
611 	 * slave PEs for consistency.
612 	 *
613 	 * If the PE is switching to frozen state for the
614 	 * first time, to dump the PHB diag-data.
615 	 */
616 	if (!(result & EEH_STATE_NOT_SUPPORT) &&
617 	    !(result & EEH_STATE_UNAVAILABLE) &&
618 	    !(result & EEH_STATE_MMIO_ACTIVE) &&
619 	    !(result & EEH_STATE_DMA_ACTIVE)  &&
620 	    !(pe->state & EEH_PE_ISOLATED)) {
621 		if (phb->freeze_pe)
622 			phb->freeze_pe(phb, pe->addr);
623 
624 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
625 		pnv_eeh_get_phb_diag(pe);
626 
627 		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
628 			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
629 	}
630 
631 	return result;
632 }
633 
634 /**
635  * pnv_eeh_get_state - Retrieve PE state
636  * @pe: EEH PE
637  * @delay: delay while PE state is temporarily unavailable
638  *
639  * Retrieve the state of the specified PE. For IODA-compitable
640  * platform, it should be retrieved from IODA table. Therefore,
641  * we prefer passing down to hardware implementation to handle
642  * it.
643  */
644 static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
645 {
646 	int ret;
647 
648 	if (pe->type & EEH_PE_PHB)
649 		ret = pnv_eeh_get_phb_state(pe);
650 	else
651 		ret = pnv_eeh_get_pe_state(pe);
652 
653 	if (!delay)
654 		return ret;
655 
656 	/*
657 	 * If the PE state is temporarily unavailable,
658 	 * to inform the EEH core delay for default
659 	 * period (1 second)
660 	 */
661 	*delay = 0;
662 	if (ret & EEH_STATE_UNAVAILABLE)
663 		*delay = 1000;
664 
665 	return ret;
666 }
667 
668 static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
669 {
670 	s64 rc = OPAL_HARDWARE;
671 
672 	while (1) {
673 		rc = opal_pci_poll(phb->opal_id);
674 		if (rc <= 0)
675 			break;
676 
677 		if (system_state < SYSTEM_RUNNING)
678 			udelay(1000 * rc);
679 		else
680 			msleep(rc);
681 	}
682 
683 	return rc;
684 }
685 
686 int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
687 {
688 	struct pnv_phb *phb = hose->private_data;
689 	s64 rc = OPAL_HARDWARE;
690 
691 	pr_debug("%s: Reset PHB#%x, option=%d\n",
692 		 __func__, hose->global_number, option);
693 
694 	/* Issue PHB complete reset request */
695 	if (option == EEH_RESET_FUNDAMENTAL ||
696 	    option == EEH_RESET_HOT)
697 		rc = opal_pci_reset(phb->opal_id,
698 				    OPAL_RESET_PHB_COMPLETE,
699 				    OPAL_ASSERT_RESET);
700 	else if (option == EEH_RESET_DEACTIVATE)
701 		rc = opal_pci_reset(phb->opal_id,
702 				    OPAL_RESET_PHB_COMPLETE,
703 				    OPAL_DEASSERT_RESET);
704 	if (rc < 0)
705 		goto out;
706 
707 	/*
708 	 * Poll state of the PHB until the request is done
709 	 * successfully. The PHB reset is usually PHB complete
710 	 * reset followed by hot reset on root bus. So we also
711 	 * need the PCI bus settlement delay.
712 	 */
713 	rc = pnv_eeh_phb_poll(phb);
714 	if (option == EEH_RESET_DEACTIVATE) {
715 		if (system_state < SYSTEM_RUNNING)
716 			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
717 		else
718 			msleep(EEH_PE_RST_SETTLE_TIME);
719 	}
720 out:
721 	if (rc != OPAL_SUCCESS)
722 		return -EIO;
723 
724 	return 0;
725 }
726 
727 static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
728 {
729 	struct pnv_phb *phb = hose->private_data;
730 	s64 rc = OPAL_HARDWARE;
731 
732 	pr_debug("%s: Reset PHB#%x, option=%d\n",
733 		 __func__, hose->global_number, option);
734 
735 	/*
736 	 * During the reset deassert time, we needn't care
737 	 * the reset scope because the firmware does nothing
738 	 * for fundamental or hot reset during deassert phase.
739 	 */
740 	if (option == EEH_RESET_FUNDAMENTAL)
741 		rc = opal_pci_reset(phb->opal_id,
742 				    OPAL_RESET_PCI_FUNDAMENTAL,
743 				    OPAL_ASSERT_RESET);
744 	else if (option == EEH_RESET_HOT)
745 		rc = opal_pci_reset(phb->opal_id,
746 				    OPAL_RESET_PCI_HOT,
747 				    OPAL_ASSERT_RESET);
748 	else if (option == EEH_RESET_DEACTIVATE)
749 		rc = opal_pci_reset(phb->opal_id,
750 				    OPAL_RESET_PCI_HOT,
751 				    OPAL_DEASSERT_RESET);
752 	if (rc < 0)
753 		goto out;
754 
755 	/* Poll state of the PHB until the request is done */
756 	rc = pnv_eeh_phb_poll(phb);
757 	if (option == EEH_RESET_DEACTIVATE)
758 		msleep(EEH_PE_RST_SETTLE_TIME);
759 out:
760 	if (rc != OPAL_SUCCESS)
761 		return -EIO;
762 
763 	return 0;
764 }
765 
766 static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
767 {
768 	struct device_node *dn = pci_device_to_OF_node(dev);
769 	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
770 	int aer = edev ? edev->aer_cap : 0;
771 	u32 ctrl;
772 
773 	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
774 		 __func__, pci_domain_nr(dev->bus),
775 		 dev->bus->number, option);
776 
777 	switch (option) {
778 	case EEH_RESET_FUNDAMENTAL:
779 	case EEH_RESET_HOT:
780 		/* Don't report linkDown event */
781 		if (aer) {
782 			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
783 					     4, &ctrl);
784 			ctrl |= PCI_ERR_UNC_SURPDN;
785 			eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
786 					      4, ctrl);
787 		}
788 
789 		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
790 		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
791 		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
792 
793 		msleep(EEH_PE_RST_HOLD_TIME);
794 		break;
795 	case EEH_RESET_DEACTIVATE:
796 		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
797 		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
798 		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
799 
800 		msleep(EEH_PE_RST_SETTLE_TIME);
801 
802 		/* Continue reporting linkDown event */
803 		if (aer) {
804 			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
805 					     4, &ctrl);
806 			ctrl &= ~PCI_ERR_UNC_SURPDN;
807 			eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
808 					      4, ctrl);
809 		}
810 
811 		break;
812 	}
813 
814 	return 0;
815 }
816 
817 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
818 {
819 	struct pci_controller *hose;
820 
821 	if (pci_is_root_bus(dev->bus)) {
822 		hose = pci_bus_to_host(dev->bus);
823 		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
824 		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
825 	} else {
826 		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
827 		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
828 	}
829 }
830 
831 /**
832  * pnv_eeh_reset - Reset the specified PE
833  * @pe: EEH PE
834  * @option: reset option
835  *
836  * Do reset on the indicated PE. For PCI bus sensitive PE,
837  * we need to reset the parent p2p bridge. The PHB has to
838  * be reinitialized if the p2p bridge is root bridge. For
839  * PCI device sensitive PE, we will try to reset the device
840  * through FLR. For now, we don't have OPAL APIs to do HARD
841  * reset yet, so all reset would be SOFT (HOT) reset.
842  */
843 static int pnv_eeh_reset(struct eeh_pe *pe, int option)
844 {
845 	struct pci_controller *hose = pe->phb;
846 	struct pci_bus *bus;
847 	int ret;
848 
849 	/*
850 	 * For PHB reset, we always have complete reset. For those PEs whose
851 	 * primary bus derived from root complex (root bus) or root port
852 	 * (usually bus#1), we apply hot or fundamental reset on the root port.
853 	 * For other PEs, we always have hot reset on the PE primary bus.
854 	 *
855 	 * Here, we have different design to pHyp, which always clear the
856 	 * frozen state during PE reset. However, the good idea here from
857 	 * benh is to keep frozen state before we get PE reset done completely
858 	 * (until BAR restore). With the frozen state, HW drops illegal IO
859 	 * or MMIO access, which can incur recrusive frozen PE during PE
860 	 * reset. The side effect is that EEH core has to clear the frozen
861 	 * state explicitly after BAR restore.
862 	 */
863 	if (pe->type & EEH_PE_PHB) {
864 		ret = pnv_eeh_phb_reset(hose, option);
865 	} else {
866 		struct pnv_phb *phb;
867 		s64 rc;
868 
869 		/*
870 		 * The frozen PE might be caused by PAPR error injection
871 		 * registers, which are expected to be cleared after hitting
872 		 * frozen PE as stated in the hardware spec. Unfortunately,
873 		 * that's not true on P7IOC. So we have to clear it manually
874 		 * to avoid recursive EEH errors during recovery.
875 		 */
876 		phb = hose->private_data;
877 		if (phb->model == PNV_PHB_MODEL_P7IOC &&
878 		    (option == EEH_RESET_HOT ||
879 		    option == EEH_RESET_FUNDAMENTAL)) {
880 			rc = opal_pci_reset(phb->opal_id,
881 					    OPAL_RESET_PHB_ERROR,
882 					    OPAL_ASSERT_RESET);
883 			if (rc != OPAL_SUCCESS) {
884 				pr_warn("%s: Failure %lld clearing "
885 					"error injection registers\n",
886 					__func__, rc);
887 				return -EIO;
888 			}
889 		}
890 
891 		bus = eeh_pe_bus_get(pe);
892 		if (pci_is_root_bus(bus) ||
893 			pci_is_root_bus(bus->parent))
894 			ret = pnv_eeh_root_reset(hose, option);
895 		else
896 			ret = pnv_eeh_bridge_reset(bus->self, option);
897 	}
898 
899 	return ret;
900 }
901 
902 /**
903  * pnv_eeh_wait_state - Wait for PE state
904  * @pe: EEH PE
905  * @max_wait: maximal period in microsecond
906  *
907  * Wait for the state of associated PE. It might take some time
908  * to retrieve the PE's state.
909  */
910 static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
911 {
912 	int ret;
913 	int mwait;
914 
915 	while (1) {
916 		ret = pnv_eeh_get_state(pe, &mwait);
917 
918 		/*
919 		 * If the PE's state is temporarily unavailable,
920 		 * we have to wait for the specified time. Otherwise,
921 		 * the PE's state will be returned immediately.
922 		 */
923 		if (ret != EEH_STATE_UNAVAILABLE)
924 			return ret;
925 
926 		max_wait -= mwait;
927 		if (max_wait <= 0) {
928 			pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
929 				__func__, pe->addr, max_wait);
930 			return EEH_STATE_NOT_SUPPORT;
931 		}
932 
933 		msleep(mwait);
934 	}
935 
936 	return EEH_STATE_NOT_SUPPORT;
937 }
938 
939 /**
940  * pnv_eeh_get_log - Retrieve error log
941  * @pe: EEH PE
942  * @severity: temporary or permanent error log
943  * @drv_log: driver log to be combined with retrieved error log
944  * @len: length of driver log
945  *
946  * Retrieve the temporary or permanent error from the PE.
947  */
948 static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
949 			   char *drv_log, unsigned long len)
950 {
951 	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
952 		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
953 
954 	return 0;
955 }
956 
957 /**
958  * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
959  * @pe: EEH PE
960  *
961  * The function will be called to reconfigure the bridges included
962  * in the specified PE so that the mulfunctional PE would be recovered
963  * again.
964  */
965 static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
966 {
967 	return 0;
968 }
969 
970 /**
971  * pnv_pe_err_inject - Inject specified error to the indicated PE
972  * @pe: the indicated PE
973  * @type: error type
974  * @func: specific error type
975  * @addr: address
976  * @mask: address mask
977  *
978  * The routine is called to inject specified error, which is
979  * determined by @type and @func, to the indicated PE for
980  * testing purpose.
981  */
982 static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
983 			      unsigned long addr, unsigned long mask)
984 {
985 	struct pci_controller *hose = pe->phb;
986 	struct pnv_phb *phb = hose->private_data;
987 	s64 rc;
988 
989 	/* Sanity check on error type */
990 	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
991 	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
992 		pr_warn("%s: Invalid error type %d\n",
993 			__func__, type);
994 		return -ERANGE;
995 	}
996 
997 	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
998 	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
999 		pr_warn("%s: Invalid error function %d\n",
1000 			__func__, func);
1001 		return -ERANGE;
1002 	}
1003 
1004 	/* Firmware supports error injection ? */
1005 	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
1006 		pr_warn("%s: Firmware doesn't support error injection\n",
1007 			__func__);
1008 		return -ENXIO;
1009 	}
1010 
1011 	/* Do error injection */
1012 	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
1013 				 type, func, addr, mask);
1014 	if (rc != OPAL_SUCCESS) {
1015 		pr_warn("%s: Failure %lld injecting error "
1016 			"%d-%d to PHB#%x-PE#%x\n",
1017 			__func__, rc, type, func,
1018 			hose->global_number, pe->addr);
1019 		return -EIO;
1020 	}
1021 
1022 	return 0;
1023 }
1024 
1025 static inline bool pnv_eeh_cfg_blocked(struct device_node *dn)
1026 {
1027 	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
1028 
1029 	if (!edev || !edev->pe)
1030 		return false;
1031 
1032 	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
1033 		return true;
1034 
1035 	return false;
1036 }
1037 
1038 static int pnv_eeh_read_config(struct device_node *dn,
1039 			       int where, int size, u32 *val)
1040 {
1041 	if (pnv_eeh_cfg_blocked(dn)) {
1042 		*val = 0xFFFFFFFF;
1043 		return PCIBIOS_SET_FAILED;
1044 	}
1045 
1046 	return pnv_pci_cfg_read(dn, where, size, val);
1047 }
1048 
1049 static int pnv_eeh_write_config(struct device_node *dn,
1050 				int where, int size, u32 val)
1051 {
1052 	if (pnv_eeh_cfg_blocked(dn))
1053 		return PCIBIOS_SET_FAILED;
1054 
1055 	return pnv_pci_cfg_write(dn, where, size, val);
1056 }
1057 
1058 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
1059 {
1060 	/* GEM */
1061 	if (data->gemXfir || data->gemRfir ||
1062 	    data->gemRirqfir || data->gemMask || data->gemRwof)
1063 		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
1064 			be64_to_cpu(data->gemXfir),
1065 			be64_to_cpu(data->gemRfir),
1066 			be64_to_cpu(data->gemRirqfir),
1067 			be64_to_cpu(data->gemMask),
1068 			be64_to_cpu(data->gemRwof));
1069 
1070 	/* LEM */
1071 	if (data->lemFir || data->lemErrMask ||
1072 	    data->lemAction0 || data->lemAction1 || data->lemWof)
1073 		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
1074 			be64_to_cpu(data->lemFir),
1075 			be64_to_cpu(data->lemErrMask),
1076 			be64_to_cpu(data->lemAction0),
1077 			be64_to_cpu(data->lemAction1),
1078 			be64_to_cpu(data->lemWof));
1079 }
1080 
1081 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
1082 {
1083 	struct pnv_phb *phb = hose->private_data;
1084 	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
1085 	long rc;
1086 
1087 	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
1088 	if (rc != OPAL_SUCCESS) {
1089 		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
1090 			__func__, phb->hub_id, rc);
1091 		return;
1092 	}
1093 
1094 	switch (data->type) {
1095 	case OPAL_P7IOC_DIAG_TYPE_RGC:
1096 		pr_info("P7IOC diag-data for RGC\n\n");
1097 		pnv_eeh_dump_hub_diag_common(data);
1098 		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
1099 			pr_info("  RGC: %016llx %016llx\n",
1100 				be64_to_cpu(data->rgc.rgcStatus),
1101 				be64_to_cpu(data->rgc.rgcLdcp));
1102 		break;
1103 	case OPAL_P7IOC_DIAG_TYPE_BI:
1104 		pr_info("P7IOC diag-data for BI %s\n\n",
1105 			data->bi.biDownbound ? "Downbound" : "Upbound");
1106 		pnv_eeh_dump_hub_diag_common(data);
1107 		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
1108 		    data->bi.biLdcp2 || data->bi.biFenceStatus)
1109 			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
1110 				be64_to_cpu(data->bi.biLdcp0),
1111 				be64_to_cpu(data->bi.biLdcp1),
1112 				be64_to_cpu(data->bi.biLdcp2),
1113 				be64_to_cpu(data->bi.biFenceStatus));
1114 		break;
1115 	case OPAL_P7IOC_DIAG_TYPE_CI:
1116 		pr_info("P7IOC diag-data for CI Port %d\n\n",
1117 			data->ci.ciPort);
1118 		pnv_eeh_dump_hub_diag_common(data);
1119 		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
1120 			pr_info("  CI:  %016llx %016llx\n",
1121 				be64_to_cpu(data->ci.ciPortStatus),
1122 				be64_to_cpu(data->ci.ciPortLdcp));
1123 		break;
1124 	case OPAL_P7IOC_DIAG_TYPE_MISC:
1125 		pr_info("P7IOC diag-data for MISC\n\n");
1126 		pnv_eeh_dump_hub_diag_common(data);
1127 		break;
1128 	case OPAL_P7IOC_DIAG_TYPE_I2C:
1129 		pr_info("P7IOC diag-data for I2C\n\n");
1130 		pnv_eeh_dump_hub_diag_common(data);
1131 		break;
1132 	default:
1133 		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
1134 			__func__, phb->hub_id, data->type);
1135 	}
1136 }
1137 
1138 static int pnv_eeh_get_pe(struct pci_controller *hose,
1139 			  u16 pe_no, struct eeh_pe **pe)
1140 {
1141 	struct pnv_phb *phb = hose->private_data;
1142 	struct pnv_ioda_pe *pnv_pe;
1143 	struct eeh_pe *dev_pe;
1144 	struct eeh_dev edev;
1145 
1146 	/*
1147 	 * If PHB supports compound PE, to fetch
1148 	 * the master PE because slave PE is invisible
1149 	 * to EEH core.
1150 	 */
1151 	pnv_pe = &phb->ioda.pe_array[pe_no];
1152 	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
1153 		pnv_pe = pnv_pe->master;
1154 		WARN_ON(!pnv_pe ||
1155 			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
1156 		pe_no = pnv_pe->pe_number;
1157 	}
1158 
1159 	/* Find the PE according to PE# */
1160 	memset(&edev, 0, sizeof(struct eeh_dev));
1161 	edev.phb = hose;
1162 	edev.pe_config_addr = pe_no;
1163 	dev_pe = eeh_pe_get(&edev);
1164 	if (!dev_pe)
1165 		return -EEXIST;
1166 
1167 	/* Freeze the (compound) PE */
1168 	*pe = dev_pe;
1169 	if (!(dev_pe->state & EEH_PE_ISOLATED))
1170 		phb->freeze_pe(phb, pe_no);
1171 
1172 	/*
1173 	 * At this point, we're sure the (compound) PE should
1174 	 * have been frozen. However, we still need poke until
1175 	 * hitting the frozen PE on top level.
1176 	 */
1177 	dev_pe = dev_pe->parent;
1178 	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
1179 		int ret;
1180 		int active_flags = (EEH_STATE_MMIO_ACTIVE |
1181 				    EEH_STATE_DMA_ACTIVE);
1182 
1183 		ret = eeh_ops->get_state(dev_pe, NULL);
1184 		if (ret <= 0 || (ret & active_flags) == active_flags) {
1185 			dev_pe = dev_pe->parent;
1186 			continue;
1187 		}
1188 
1189 		/* Frozen parent PE */
1190 		*pe = dev_pe;
1191 		if (!(dev_pe->state & EEH_PE_ISOLATED))
1192 			phb->freeze_pe(phb, dev_pe->addr);
1193 
1194 		/* Next one */
1195 		dev_pe = dev_pe->parent;
1196 	}
1197 
1198 	return 0;
1199 }
1200 
1201 /**
1202  * pnv_eeh_next_error - Retrieve next EEH error to handle
1203  * @pe: Affected PE
1204  *
1205  * The function is expected to be called by EEH core while it gets
1206  * special EEH event (without binding PE). The function calls to
1207  * OPAL APIs for next error to handle. The informational error is
1208  * handled internally by platform. However, the dead IOC, dead PHB,
1209  * fenced PHB and frozen PE should be handled by EEH core eventually.
1210  */
1211 static int pnv_eeh_next_error(struct eeh_pe **pe)
1212 {
1213 	struct pci_controller *hose;
1214 	struct pnv_phb *phb;
1215 	struct eeh_pe *phb_pe, *parent_pe;
1216 	__be64 frozen_pe_no;
1217 	__be16 err_type, severity;
1218 	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1219 	long rc;
1220 	int state, ret = EEH_NEXT_ERR_NONE;
1221 
1222 	/*
1223 	 * While running here, it's safe to purge the event queue.
1224 	 * And we should keep the cached OPAL notifier event sychronized
1225 	 * between the kernel and firmware.
1226 	 */
1227 	eeh_remove_event(NULL, false);
1228 	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
1229 
1230 	list_for_each_entry(hose, &hose_list, list_node) {
1231 		/*
1232 		 * If the subordinate PCI buses of the PHB has been
1233 		 * removed or is exactly under error recovery, we
1234 		 * needn't take care of it any more.
1235 		 */
1236 		phb = hose->private_data;
1237 		phb_pe = eeh_phb_pe_get(hose);
1238 		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
1239 			continue;
1240 
1241 		rc = opal_pci_next_error(phb->opal_id,
1242 					 &frozen_pe_no, &err_type, &severity);
1243 		if (rc != OPAL_SUCCESS) {
1244 			pr_devel("%s: Invalid return value on "
1245 				 "PHB#%x (0x%lx) from opal_pci_next_error",
1246 				 __func__, hose->global_number, rc);
1247 			continue;
1248 		}
1249 
1250 		/* If the PHB doesn't have error, stop processing */
1251 		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
1252 		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
1253 			pr_devel("%s: No error found on PHB#%x\n",
1254 				 __func__, hose->global_number);
1255 			continue;
1256 		}
1257 
1258 		/*
1259 		 * Processing the error. We're expecting the error with
1260 		 * highest priority reported upon multiple errors on the
1261 		 * specific PHB.
1262 		 */
1263 		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1264 			__func__, be16_to_cpu(err_type),
1265 			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
1266 			hose->global_number);
1267 		switch (be16_to_cpu(err_type)) {
1268 		case OPAL_EEH_IOC_ERROR:
1269 			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
1270 				pr_err("EEH: dead IOC detected\n");
1271 				ret = EEH_NEXT_ERR_DEAD_IOC;
1272 			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1273 				pr_info("EEH: IOC informative error "
1274 					"detected\n");
1275 				pnv_eeh_get_and_dump_hub_diag(hose);
1276 				ret = EEH_NEXT_ERR_NONE;
1277 			}
1278 
1279 			break;
1280 		case OPAL_EEH_PHB_ERROR:
1281 			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
1282 				*pe = phb_pe;
1283 				pr_err("EEH: dead PHB#%x detected, "
1284 				       "location: %s\n",
1285 					hose->global_number,
1286 					eeh_pe_loc_get(phb_pe));
1287 				ret = EEH_NEXT_ERR_DEAD_PHB;
1288 			} else if (be16_to_cpu(severity) ==
1289 				   OPAL_EEH_SEV_PHB_FENCED) {
1290 				*pe = phb_pe;
1291 				pr_err("EEH: Fenced PHB#%x detected, "
1292 				       "location: %s\n",
1293 					hose->global_number,
1294 					eeh_pe_loc_get(phb_pe));
1295 				ret = EEH_NEXT_ERR_FENCED_PHB;
1296 			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1297 				pr_info("EEH: PHB#%x informative error "
1298 					"detected, location: %s\n",
1299 					hose->global_number,
1300 					eeh_pe_loc_get(phb_pe));
1301 				pnv_eeh_get_phb_diag(phb_pe);
1302 				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
1303 				ret = EEH_NEXT_ERR_NONE;
1304 			}
1305 
1306 			break;
1307 		case OPAL_EEH_PE_ERROR:
1308 			/*
1309 			 * If we can't find the corresponding PE, we
1310 			 * just try to unfreeze.
1311 			 */
1312 			if (pnv_eeh_get_pe(hose,
1313 				be64_to_cpu(frozen_pe_no), pe)) {
1314 				/* Try best to clear it */
1315 				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1316 					hose->global_number, frozen_pe_no);
1317 				pr_info("EEH: PHB location: %s\n",
1318 					eeh_pe_loc_get(phb_pe));
1319 				opal_pci_eeh_freeze_clear(phb->opal_id,
1320 					frozen_pe_no,
1321 					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
1322 				ret = EEH_NEXT_ERR_NONE;
1323 			} else if ((*pe)->state & EEH_PE_ISOLATED ||
1324 				   eeh_pe_passed(*pe)) {
1325 				ret = EEH_NEXT_ERR_NONE;
1326 			} else {
1327 				pr_err("EEH: Frozen PE#%x "
1328 				       "on PHB#%x detected\n",
1329 				       (*pe)->addr,
1330 					(*pe)->phb->global_number);
1331 				pr_err("EEH: PE location: %s, "
1332 				       "PHB location: %s\n",
1333 				       eeh_pe_loc_get(*pe),
1334 				       eeh_pe_loc_get(phb_pe));
1335 				ret = EEH_NEXT_ERR_FROZEN_PE;
1336 			}
1337 
1338 			break;
1339 		default:
1340 			pr_warn("%s: Unexpected error type %d\n",
1341 				__func__, be16_to_cpu(err_type));
1342 		}
1343 
1344 		/*
1345 		 * EEH core will try recover from fenced PHB or
1346 		 * frozen PE. In the time for frozen PE, EEH core
1347 		 * enable IO path for that before collecting logs,
1348 		 * but it ruins the site. So we have to dump the
1349 		 * log in advance here.
1350 		 */
1351 		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
1352 		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
1353 		    !((*pe)->state & EEH_PE_ISOLATED)) {
1354 			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1355 			pnv_eeh_get_phb_diag(*pe);
1356 
1357 			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
1358 				pnv_pci_dump_phb_diag_data((*pe)->phb,
1359 							   (*pe)->data);
1360 		}
1361 
1362 		/*
1363 		 * We probably have the frozen parent PE out there and
1364 		 * we need have to handle frozen parent PE firstly.
1365 		 */
1366 		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
1367 			parent_pe = (*pe)->parent;
1368 			while (parent_pe) {
1369 				/* Hit the ceiling ? */
1370 				if (parent_pe->type & EEH_PE_PHB)
1371 					break;
1372 
1373 				/* Frozen parent PE ? */
1374 				state = eeh_ops->get_state(parent_pe, NULL);
1375 				if (state > 0 &&
1376 				    (state & active_flags) != active_flags)
1377 					*pe = parent_pe;
1378 
1379 				/* Next parent level */
1380 				parent_pe = parent_pe->parent;
1381 			}
1382 
1383 			/* We possibly migrate to another PE */
1384 			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1385 		}
1386 
1387 		/*
1388 		 * If we have no errors on the specific PHB or only
1389 		 * informative error there, we continue poking it.
1390 		 * Otherwise, we need actions to be taken by upper
1391 		 * layer.
1392 		 */
1393 		if (ret > EEH_NEXT_ERR_INF)
1394 			break;
1395 	}
1396 
1397 	return ret;
1398 }
1399 
1400 static int pnv_eeh_restore_config(struct device_node *dn)
1401 {
1402 	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
1403 	struct pnv_phb *phb;
1404 	s64 ret;
1405 
1406 	if (!edev)
1407 		return -EEXIST;
1408 
1409 	phb = edev->phb->private_data;
1410 	ret = opal_pci_reinit(phb->opal_id,
1411 			      OPAL_REINIT_PCI_DEV, edev->config_addr);
1412 	if (ret) {
1413 		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
1414 			__func__, edev->config_addr, ret);
1415 		return -EIO;
1416 	}
1417 
1418 	return 0;
1419 }
1420 
1421 static struct eeh_ops pnv_eeh_ops = {
1422 	.name                   = "powernv",
1423 	.init                   = pnv_eeh_init,
1424 	.post_init              = pnv_eeh_post_init,
1425 	.of_probe               = NULL,
1426 	.dev_probe              = pnv_eeh_dev_probe,
1427 	.set_option             = pnv_eeh_set_option,
1428 	.get_pe_addr            = pnv_eeh_get_pe_addr,
1429 	.get_state              = pnv_eeh_get_state,
1430 	.reset                  = pnv_eeh_reset,
1431 	.wait_state             = pnv_eeh_wait_state,
1432 	.get_log                = pnv_eeh_get_log,
1433 	.configure_bridge       = pnv_eeh_configure_bridge,
1434 	.err_inject		= pnv_eeh_err_inject,
1435 	.read_config            = pnv_eeh_read_config,
1436 	.write_config           = pnv_eeh_write_config,
1437 	.next_error		= pnv_eeh_next_error,
1438 	.restore_config		= pnv_eeh_restore_config
1439 };
1440 
1441 /**
1442  * eeh_powernv_init - Register platform dependent EEH operations
1443  *
1444  * EEH initialization on powernv platform. This function should be
1445  * called before any EEH related functions.
1446  */
1447 static int __init eeh_powernv_init(void)
1448 {
1449 	int ret = -EINVAL;
1450 
1451 	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
1452 	ret = eeh_ops_register(&pnv_eeh_ops);
1453 	if (!ret)
1454 		pr_info("EEH: PowerNV platform initialized\n");
1455 	else
1456 		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
1457 
1458 	return ret;
1459 }
1460 machine_early_initcall(powernv, eeh_powernv_init);
1461