1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #undef DEBUG
13 
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/debugfs.h>
17 #include <linux/delay.h>
18 #include <linux/string.h>
19 #include <linux/init.h>
20 #include <linux/bootmem.h>
21 #include <linux/irq.h>
22 #include <linux/io.h>
23 #include <linux/msi.h>
24 
25 #include <asm/sections.h>
26 #include <asm/io.h>
27 #include <asm/prom.h>
28 #include <asm/pci-bridge.h>
29 #include <asm/machdep.h>
30 #include <asm/msi_bitmap.h>
31 #include <asm/ppc-pci.h>
32 #include <asm/opal.h>
33 #include <asm/iommu.h>
34 #include <asm/tce.h>
35 #include <asm/xics.h>
36 #include <asm/debug.h>
37 
38 #include "powernv.h"
39 #include "pci.h"
40 
41 #define define_pe_printk_level(func, kern_level)		\
42 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)	\
43 {								\
44 	struct va_format vaf;					\
45 	va_list args;						\
46 	char pfix[32];						\
47 	int r;							\
48 								\
49 	va_start(args, fmt);					\
50 								\
51 	vaf.fmt = fmt;						\
52 	vaf.va = &args;						\
53 								\
54 	if (pe->pdev)						\
55 		strlcpy(pfix, dev_name(&pe->pdev->dev),		\
56 			sizeof(pfix));				\
57 	else							\
58 		sprintf(pfix, "%04x:%02x     ",			\
59 			pci_domain_nr(pe->pbus),		\
60 			pe->pbus->number);			\
61 	r = printk(kern_level "pci %s: [PE# %.3d] %pV",		\
62 		   pfix, pe->pe_number, &vaf);			\
63 								\
64 	va_end(args);						\
65 								\
66 	return r;						\
67 }								\
68 
69 define_pe_printk_level(pe_err, KERN_ERR);
70 define_pe_printk_level(pe_warn, KERN_WARNING);
71 define_pe_printk_level(pe_info, KERN_INFO);
72 
73 /*
74  * stdcix is only supposed to be used in hypervisor real mode as per
75  * the architecture spec
76  */
77 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
78 {
79 	__asm__ __volatile__("stdcix %0,0,%1"
80 		: : "r" (val), "r" (paddr) : "memory");
81 }
82 
83 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
84 {
85 	unsigned long pe;
86 
87 	do {
88 		pe = find_next_zero_bit(phb->ioda.pe_alloc,
89 					phb->ioda.total_pe, 0);
90 		if (pe >= phb->ioda.total_pe)
91 			return IODA_INVALID_PE;
92 	} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
93 
94 	phb->ioda.pe_array[pe].phb = phb;
95 	phb->ioda.pe_array[pe].pe_number = pe;
96 	return pe;
97 }
98 
99 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
100 {
101 	WARN_ON(phb->ioda.pe_array[pe].pdev);
102 
103 	memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
104 	clear_bit(pe, phb->ioda.pe_alloc);
105 }
106 
107 /* Currently those 2 are only used when MSIs are enabled, this will change
108  * but in the meantime, we need to protect them to avoid warnings
109  */
110 #ifdef CONFIG_PCI_MSI
111 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
112 {
113 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
114 	struct pnv_phb *phb = hose->private_data;
115 	struct pci_dn *pdn = pci_get_pdn(dev);
116 
117 	if (!pdn)
118 		return NULL;
119 	if (pdn->pe_number == IODA_INVALID_PE)
120 		return NULL;
121 	return &phb->ioda.pe_array[pdn->pe_number];
122 }
123 #endif /* CONFIG_PCI_MSI */
124 
125 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
126 {
127 	struct pci_dev *parent;
128 	uint8_t bcomp, dcomp, fcomp;
129 	long rc, rid_end, rid;
130 
131 	/* Bus validation ? */
132 	if (pe->pbus) {
133 		int count;
134 
135 		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
136 		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
137 		parent = pe->pbus->self;
138 		if (pe->flags & PNV_IODA_PE_BUS_ALL)
139 			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
140 		else
141 			count = 1;
142 
143 		switch(count) {
144 		case  1: bcomp = OpalPciBusAll;		break;
145 		case  2: bcomp = OpalPciBus7Bits;	break;
146 		case  4: bcomp = OpalPciBus6Bits;	break;
147 		case  8: bcomp = OpalPciBus5Bits;	break;
148 		case 16: bcomp = OpalPciBus4Bits;	break;
149 		case 32: bcomp = OpalPciBus3Bits;	break;
150 		default:
151 			pr_err("%s: Number of subordinate busses %d"
152 			       " unsupported\n",
153 			       pci_name(pe->pbus->self), count);
154 			/* Do an exact match only */
155 			bcomp = OpalPciBusAll;
156 		}
157 		rid_end = pe->rid + (count << 8);
158 	} else {
159 		parent = pe->pdev->bus->self;
160 		bcomp = OpalPciBusAll;
161 		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
162 		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
163 		rid_end = pe->rid + 1;
164 	}
165 
166 	/* Associate PE in PELT */
167 	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
168 			     bcomp, dcomp, fcomp, OPAL_MAP_PE);
169 	if (rc) {
170 		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
171 		return -ENXIO;
172 	}
173 	opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
174 				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
175 
176 	/* Add to all parents PELT-V */
177 	while (parent) {
178 		struct pci_dn *pdn = pci_get_pdn(parent);
179 		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
180 			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
181 						pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
182 			/* XXX What to do in case of error ? */
183 		}
184 		parent = parent->bus->self;
185 	}
186 	/* Setup reverse map */
187 	for (rid = pe->rid; rid < rid_end; rid++)
188 		phb->ioda.pe_rmap[rid] = pe->pe_number;
189 
190 	/* Setup one MVTs on IODA1 */
191 	if (phb->type == PNV_PHB_IODA1) {
192 		pe->mve_number = pe->pe_number;
193 		rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
194 				      pe->pe_number);
195 		if (rc) {
196 			pe_err(pe, "OPAL error %ld setting up MVE %d\n",
197 			       rc, pe->mve_number);
198 			pe->mve_number = -1;
199 		} else {
200 			rc = opal_pci_set_mve_enable(phb->opal_id,
201 						     pe->mve_number, OPAL_ENABLE_MVE);
202 			if (rc) {
203 				pe_err(pe, "OPAL error %ld enabling MVE %d\n",
204 				       rc, pe->mve_number);
205 				pe->mve_number = -1;
206 			}
207 		}
208 	} else if (phb->type == PNV_PHB_IODA2)
209 		pe->mve_number = 0;
210 
211 	return 0;
212 }
213 
214 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
215 				       struct pnv_ioda_pe *pe)
216 {
217 	struct pnv_ioda_pe *lpe;
218 
219 	list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
220 		if (lpe->dma_weight < pe->dma_weight) {
221 			list_add_tail(&pe->dma_link, &lpe->dma_link);
222 			return;
223 		}
224 	}
225 	list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
226 }
227 
228 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
229 {
230 	/* This is quite simplistic. The "base" weight of a device
231 	 * is 10. 0 means no DMA is to be accounted for it.
232 	 */
233 
234 	/* If it's a bridge, no DMA */
235 	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
236 		return 0;
237 
238 	/* Reduce the weight of slow USB controllers */
239 	if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
240 	    dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
241 	    dev->class == PCI_CLASS_SERIAL_USB_EHCI)
242 		return 3;
243 
244 	/* Increase the weight of RAID (includes Obsidian) */
245 	if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
246 		return 15;
247 
248 	/* Default */
249 	return 10;
250 }
251 
252 #if 0
253 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
254 {
255 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
256 	struct pnv_phb *phb = hose->private_data;
257 	struct pci_dn *pdn = pci_get_pdn(dev);
258 	struct pnv_ioda_pe *pe;
259 	int pe_num;
260 
261 	if (!pdn) {
262 		pr_err("%s: Device tree node not associated properly\n",
263 			   pci_name(dev));
264 		return NULL;
265 	}
266 	if (pdn->pe_number != IODA_INVALID_PE)
267 		return NULL;
268 
269 	/* PE#0 has been pre-set */
270 	if (dev->bus->number == 0)
271 		pe_num = 0;
272 	else
273 		pe_num = pnv_ioda_alloc_pe(phb);
274 	if (pe_num == IODA_INVALID_PE) {
275 		pr_warning("%s: Not enough PE# available, disabling device\n",
276 			   pci_name(dev));
277 		return NULL;
278 	}
279 
280 	/* NOTE: We get only one ref to the pci_dev for the pdn, not for the
281 	 * pointer in the PE data structure, both should be destroyed at the
282 	 * same time. However, this needs to be looked at more closely again
283 	 * once we actually start removing things (Hotplug, SR-IOV, ...)
284 	 *
285 	 * At some point we want to remove the PDN completely anyways
286 	 */
287 	pe = &phb->ioda.pe_array[pe_num];
288 	pci_dev_get(dev);
289 	pdn->pcidev = dev;
290 	pdn->pe_number = pe_num;
291 	pe->pdev = dev;
292 	pe->pbus = NULL;
293 	pe->tce32_seg = -1;
294 	pe->mve_number = -1;
295 	pe->rid = dev->bus->number << 8 | pdn->devfn;
296 
297 	pe_info(pe, "Associated device to PE\n");
298 
299 	if (pnv_ioda_configure_pe(phb, pe)) {
300 		/* XXX What do we do here ? */
301 		if (pe_num)
302 			pnv_ioda_free_pe(phb, pe_num);
303 		pdn->pe_number = IODA_INVALID_PE;
304 		pe->pdev = NULL;
305 		pci_dev_put(dev);
306 		return NULL;
307 	}
308 
309 	/* Assign a DMA weight to the device */
310 	pe->dma_weight = pnv_ioda_dma_weight(dev);
311 	if (pe->dma_weight != 0) {
312 		phb->ioda.dma_weight += pe->dma_weight;
313 		phb->ioda.dma_pe_count++;
314 	}
315 
316 	/* Link the PE */
317 	pnv_ioda_link_pe_by_weight(phb, pe);
318 
319 	return pe;
320 }
321 #endif /* Useful for SRIOV case */
322 
323 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
324 {
325 	struct pci_dev *dev;
326 
327 	list_for_each_entry(dev, &bus->devices, bus_list) {
328 		struct pci_dn *pdn = pci_get_pdn(dev);
329 
330 		if (pdn == NULL) {
331 			pr_warn("%s: No device node associated with device !\n",
332 				pci_name(dev));
333 			continue;
334 		}
335 		pci_dev_get(dev);
336 		pdn->pcidev = dev;
337 		pdn->pe_number = pe->pe_number;
338 		pe->dma_weight += pnv_ioda_dma_weight(dev);
339 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
340 			pnv_ioda_setup_same_PE(dev->subordinate, pe);
341 	}
342 }
343 
344 /*
345  * There're 2 types of PCI bus sensitive PEs: One that is compromised of
346  * single PCI bus. Another one that contains the primary PCI bus and its
347  * subordinate PCI devices and buses. The second type of PE is normally
348  * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
349  */
350 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
351 {
352 	struct pci_controller *hose = pci_bus_to_host(bus);
353 	struct pnv_phb *phb = hose->private_data;
354 	struct pnv_ioda_pe *pe;
355 	int pe_num;
356 
357 	pe_num = pnv_ioda_alloc_pe(phb);
358 	if (pe_num == IODA_INVALID_PE) {
359 		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
360 			__func__, pci_domain_nr(bus), bus->number);
361 		return;
362 	}
363 
364 	pe = &phb->ioda.pe_array[pe_num];
365 	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
366 	pe->pbus = bus;
367 	pe->pdev = NULL;
368 	pe->tce32_seg = -1;
369 	pe->mve_number = -1;
370 	pe->rid = bus->busn_res.start << 8;
371 	pe->dma_weight = 0;
372 
373 	if (all)
374 		pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
375 			bus->busn_res.start, bus->busn_res.end, pe_num);
376 	else
377 		pe_info(pe, "Secondary bus %d associated with PE#%d\n",
378 			bus->busn_res.start, pe_num);
379 
380 	if (pnv_ioda_configure_pe(phb, pe)) {
381 		/* XXX What do we do here ? */
382 		if (pe_num)
383 			pnv_ioda_free_pe(phb, pe_num);
384 		pe->pbus = NULL;
385 		return;
386 	}
387 
388 	/* Associate it with all child devices */
389 	pnv_ioda_setup_same_PE(bus, pe);
390 
391 	/* Put PE to the list */
392 	list_add_tail(&pe->list, &phb->ioda.pe_list);
393 
394 	/* Account for one DMA PE if at least one DMA capable device exist
395 	 * below the bridge
396 	 */
397 	if (pe->dma_weight != 0) {
398 		phb->ioda.dma_weight += pe->dma_weight;
399 		phb->ioda.dma_pe_count++;
400 	}
401 
402 	/* Link the PE */
403 	pnv_ioda_link_pe_by_weight(phb, pe);
404 }
405 
406 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
407 {
408 	struct pci_dev *dev;
409 
410 	pnv_ioda_setup_bus_PE(bus, 0);
411 
412 	list_for_each_entry(dev, &bus->devices, bus_list) {
413 		if (dev->subordinate) {
414 			if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
415 				pnv_ioda_setup_bus_PE(dev->subordinate, 1);
416 			else
417 				pnv_ioda_setup_PEs(dev->subordinate);
418 		}
419 	}
420 }
421 
422 /*
423  * Configure PEs so that the downstream PCI buses and devices
424  * could have their associated PE#. Unfortunately, we didn't
425  * figure out the way to identify the PLX bridge yet. So we
426  * simply put the PCI bus and the subordinate behind the root
427  * port to PE# here. The game rule here is expected to be changed
428  * as soon as we can detected PLX bridge correctly.
429  */
430 static void pnv_pci_ioda_setup_PEs(void)
431 {
432 	struct pci_controller *hose, *tmp;
433 
434 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
435 		pnv_ioda_setup_PEs(hose->bus);
436 	}
437 }
438 
439 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
440 {
441 	struct pci_dn *pdn = pci_get_pdn(pdev);
442 	struct pnv_ioda_pe *pe;
443 
444 	/*
445 	 * The function can be called while the PE#
446 	 * hasn't been assigned. Do nothing for the
447 	 * case.
448 	 */
449 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
450 		return;
451 
452 	pe = &phb->ioda.pe_array[pdn->pe_number];
453 	set_iommu_table_base(&pdev->dev, &pe->tce32_table);
454 }
455 
456 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
457 {
458 	struct pci_dev *dev;
459 
460 	list_for_each_entry(dev, &bus->devices, bus_list) {
461 		set_iommu_table_base(&dev->dev, &pe->tce32_table);
462 		if (dev->subordinate)
463 			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
464 	}
465 }
466 
467 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
468 					 struct iommu_table *tbl,
469 					 u64 *startp, u64 *endp, bool rm)
470 {
471 	u64 __iomem *invalidate = rm ?
472 		(u64 __iomem *)pe->tce_inval_reg_phys :
473 		(u64 __iomem *)tbl->it_index;
474 	unsigned long start, end, inc;
475 
476 	start = __pa(startp);
477 	end = __pa(endp);
478 
479 	/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
480 	if (tbl->it_busno) {
481 		start <<= 12;
482 		end <<= 12;
483 		inc = 128 << 12;
484 		start |= tbl->it_busno;
485 		end |= tbl->it_busno;
486 	} else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
487 		/* p7ioc-style invalidation, 2 TCEs per write */
488 		start |= (1ull << 63);
489 		end |= (1ull << 63);
490 		inc = 16;
491         } else {
492 		/* Default (older HW) */
493                 inc = 128;
494 	}
495 
496         end |= inc - 1;	/* round up end to be different than start */
497 
498         mb(); /* Ensure above stores are visible */
499         while (start <= end) {
500 		if (rm)
501 			__raw_rm_writeq(start, invalidate);
502 		else
503 			__raw_writeq(start, invalidate);
504                 start += inc;
505         }
506 
507 	/*
508 	 * The iommu layer will do another mb() for us on build()
509 	 * and we don't care on free()
510 	 */
511 }
512 
513 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
514 					 struct iommu_table *tbl,
515 					 u64 *startp, u64 *endp, bool rm)
516 {
517 	unsigned long start, end, inc;
518 	u64 __iomem *invalidate = rm ?
519 		(u64 __iomem *)pe->tce_inval_reg_phys :
520 		(u64 __iomem *)tbl->it_index;
521 
522 	/* We'll invalidate DMA address in PE scope */
523 	start = 0x2ul << 60;
524 	start |= (pe->pe_number & 0xFF);
525 	end = start;
526 
527 	/* Figure out the start, end and step */
528 	inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
529 	start |= (inc << 12);
530 	inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
531 	end |= (inc << 12);
532 	inc = (0x1ul << 12);
533 	mb();
534 
535 	while (start <= end) {
536 		if (rm)
537 			__raw_rm_writeq(start, invalidate);
538 		else
539 			__raw_writeq(start, invalidate);
540 		start += inc;
541 	}
542 }
543 
544 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
545 				 u64 *startp, u64 *endp, bool rm)
546 {
547 	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
548 					      tce32_table);
549 	struct pnv_phb *phb = pe->phb;
550 
551 	if (phb->type == PNV_PHB_IODA1)
552 		pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
553 	else
554 		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
555 }
556 
557 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
558 				      struct pnv_ioda_pe *pe, unsigned int base,
559 				      unsigned int segs)
560 {
561 
562 	struct page *tce_mem = NULL;
563 	const __be64 *swinvp;
564 	struct iommu_table *tbl;
565 	unsigned int i;
566 	int64_t rc;
567 	void *addr;
568 
569 	/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
570 #define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
571 
572 	/* XXX FIXME: Handle 64-bit only DMA devices */
573 	/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
574 	/* XXX FIXME: Allocate multi-level tables on PHB3 */
575 
576 	/* We shouldn't already have a 32-bit DMA associated */
577 	if (WARN_ON(pe->tce32_seg >= 0))
578 		return;
579 
580 	/* Grab a 32-bit TCE table */
581 	pe->tce32_seg = base;
582 	pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
583 		(base << 28), ((base + segs) << 28) - 1);
584 
585 	/* XXX Currently, we allocate one big contiguous table for the
586 	 * TCEs. We only really need one chunk per 256M of TCE space
587 	 * (ie per segment) but that's an optimization for later, it
588 	 * requires some added smarts with our get/put_tce implementation
589 	 */
590 	tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
591 				   get_order(TCE32_TABLE_SIZE * segs));
592 	if (!tce_mem) {
593 		pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
594 		goto fail;
595 	}
596 	addr = page_address(tce_mem);
597 	memset(addr, 0, TCE32_TABLE_SIZE * segs);
598 
599 	/* Configure HW */
600 	for (i = 0; i < segs; i++) {
601 		rc = opal_pci_map_pe_dma_window(phb->opal_id,
602 					      pe->pe_number,
603 					      base + i, 1,
604 					      __pa(addr) + TCE32_TABLE_SIZE * i,
605 					      TCE32_TABLE_SIZE, 0x1000);
606 		if (rc) {
607 			pe_err(pe, " Failed to configure 32-bit TCE table,"
608 			       " err %ld\n", rc);
609 			goto fail;
610 		}
611 	}
612 
613 	/* Setup linux iommu table */
614 	tbl = &pe->tce32_table;
615 	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
616 				  base << 28);
617 
618 	/* OPAL variant of P7IOC SW invalidated TCEs */
619 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
620 	if (swinvp) {
621 		/* We need a couple more fields -- an address and a data
622 		 * to or.  Since the bus is only printed out on table free
623 		 * errors, and on the first pass the data will be a relative
624 		 * bus number, print that out instead.
625 		 */
626 		tbl->it_busno = 0;
627 		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
628 		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
629 				8);
630 		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
631 			       TCE_PCI_SWINV_PAIR;
632 	}
633 	iommu_init_table(tbl, phb->hose->node);
634 	iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
635 
636 	if (pe->pdev)
637 		set_iommu_table_base(&pe->pdev->dev, tbl);
638 	else
639 		pnv_ioda_setup_bus_dma(pe, pe->pbus);
640 
641 	return;
642  fail:
643 	/* XXX Failure: Try to fallback to 64-bit only ? */
644 	if (pe->tce32_seg >= 0)
645 		pe->tce32_seg = -1;
646 	if (tce_mem)
647 		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
648 }
649 
650 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
651 				       struct pnv_ioda_pe *pe)
652 {
653 	struct page *tce_mem = NULL;
654 	void *addr;
655 	const __be64 *swinvp;
656 	struct iommu_table *tbl;
657 	unsigned int tce_table_size, end;
658 	int64_t rc;
659 
660 	/* We shouldn't already have a 32-bit DMA associated */
661 	if (WARN_ON(pe->tce32_seg >= 0))
662 		return;
663 
664 	/* The PE will reserve all possible 32-bits space */
665 	pe->tce32_seg = 0;
666 	end = (1 << ilog2(phb->ioda.m32_pci_base));
667 	tce_table_size = (end / 0x1000) * 8;
668 	pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
669 		end);
670 
671 	/* Allocate TCE table */
672 	tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
673 				   get_order(tce_table_size));
674 	if (!tce_mem) {
675 		pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
676 		goto fail;
677 	}
678 	addr = page_address(tce_mem);
679 	memset(addr, 0, tce_table_size);
680 
681 	/*
682 	 * Map TCE table through TVT. The TVE index is the PE number
683 	 * shifted by 1 bit for 32-bits DMA space.
684 	 */
685 	rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
686 					pe->pe_number << 1, 1, __pa(addr),
687 					tce_table_size, 0x1000);
688 	if (rc) {
689 		pe_err(pe, "Failed to configure 32-bit TCE table,"
690 		       " err %ld\n", rc);
691 		goto fail;
692 	}
693 
694 	/* Setup linux iommu table */
695 	tbl = &pe->tce32_table;
696 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
697 
698 	/* OPAL variant of PHB3 invalidated TCEs */
699 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
700 	if (swinvp) {
701 		/* We need a couple more fields -- an address and a data
702 		 * to or.  Since the bus is only printed out on table free
703 		 * errors, and on the first pass the data will be a relative
704 		 * bus number, print that out instead.
705 		 */
706 		tbl->it_busno = 0;
707 		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
708 		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
709 				8);
710 		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
711 	}
712 	iommu_init_table(tbl, phb->hose->node);
713 
714 	if (pe->pdev)
715 		set_iommu_table_base(&pe->pdev->dev, tbl);
716 	else
717 		pnv_ioda_setup_bus_dma(pe, pe->pbus);
718 
719 	return;
720 fail:
721 	if (pe->tce32_seg >= 0)
722 		pe->tce32_seg = -1;
723 	if (tce_mem)
724 		__free_pages(tce_mem, get_order(tce_table_size));
725 }
726 
727 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
728 {
729 	struct pci_controller *hose = phb->hose;
730 	unsigned int residual, remaining, segs, tw, base;
731 	struct pnv_ioda_pe *pe;
732 
733 	/* If we have more PE# than segments available, hand out one
734 	 * per PE until we run out and let the rest fail. If not,
735 	 * then we assign at least one segment per PE, plus more based
736 	 * on the amount of devices under that PE
737 	 */
738 	if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
739 		residual = 0;
740 	else
741 		residual = phb->ioda.tce32_count -
742 			phb->ioda.dma_pe_count;
743 
744 	pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
745 		hose->global_number, phb->ioda.tce32_count);
746 	pr_info("PCI: %d PE# for a total weight of %d\n",
747 		phb->ioda.dma_pe_count, phb->ioda.dma_weight);
748 
749 	/* Walk our PE list and configure their DMA segments, hand them
750 	 * out one base segment plus any residual segments based on
751 	 * weight
752 	 */
753 	remaining = phb->ioda.tce32_count;
754 	tw = phb->ioda.dma_weight;
755 	base = 0;
756 	list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
757 		if (!pe->dma_weight)
758 			continue;
759 		if (!remaining) {
760 			pe_warn(pe, "No DMA32 resources available\n");
761 			continue;
762 		}
763 		segs = 1;
764 		if (residual) {
765 			segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
766 			if (segs > remaining)
767 				segs = remaining;
768 		}
769 
770 		/*
771 		 * For IODA2 compliant PHB3, we needn't care about the weight.
772 		 * The all available 32-bits DMA space will be assigned to
773 		 * the specific PE.
774 		 */
775 		if (phb->type == PNV_PHB_IODA1) {
776 			pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
777 				pe->dma_weight, segs);
778 			pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
779 		} else {
780 			pe_info(pe, "Assign DMA32 space\n");
781 			segs = 0;
782 			pnv_pci_ioda2_setup_dma_pe(phb, pe);
783 		}
784 
785 		remaining -= segs;
786 		base += segs;
787 	}
788 }
789 
790 #ifdef CONFIG_PCI_MSI
791 static void pnv_ioda2_msi_eoi(struct irq_data *d)
792 {
793 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
794 	struct irq_chip *chip = irq_data_get_irq_chip(d);
795 	struct pnv_phb *phb = container_of(chip, struct pnv_phb,
796 					   ioda.irq_chip);
797 	int64_t rc;
798 
799 	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
800 	WARN_ON_ONCE(rc);
801 
802 	icp_native_eoi(d);
803 }
804 
805 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
806 				  unsigned int hwirq, unsigned int virq,
807 				  unsigned int is_64, struct msi_msg *msg)
808 {
809 	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
810 	struct pci_dn *pdn = pci_get_pdn(dev);
811 	struct irq_data *idata;
812 	struct irq_chip *ichip;
813 	unsigned int xive_num = hwirq - phb->msi_base;
814 	uint64_t addr64;
815 	uint32_t addr32, data;
816 	int rc;
817 
818 	/* No PE assigned ? bail out ... no MSI for you ! */
819 	if (pe == NULL)
820 		return -ENXIO;
821 
822 	/* Check if we have an MVE */
823 	if (pe->mve_number < 0)
824 		return -ENXIO;
825 
826 	/* Force 32-bit MSI on some broken devices */
827 	if (pdn && pdn->force_32bit_msi)
828 		is_64 = 0;
829 
830 	/* Assign XIVE to PE */
831 	rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
832 	if (rc) {
833 		pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
834 			pci_name(dev), rc, xive_num);
835 		return -EIO;
836 	}
837 
838 	if (is_64) {
839 		rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
840 				     &addr64, &data);
841 		if (rc) {
842 			pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
843 				pci_name(dev), rc);
844 			return -EIO;
845 		}
846 		msg->address_hi = addr64 >> 32;
847 		msg->address_lo = addr64 & 0xfffffffful;
848 	} else {
849 		rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
850 				     &addr32, &data);
851 		if (rc) {
852 			pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
853 				pci_name(dev), rc);
854 			return -EIO;
855 		}
856 		msg->address_hi = 0;
857 		msg->address_lo = addr32;
858 	}
859 	msg->data = data;
860 
861 	/*
862 	 * Change the IRQ chip for the MSI interrupts on PHB3.
863 	 * The corresponding IRQ chip should be populated for
864 	 * the first time.
865 	 */
866 	if (phb->type == PNV_PHB_IODA2) {
867 		if (!phb->ioda.irq_chip_init) {
868 			idata = irq_get_irq_data(virq);
869 			ichip = irq_data_get_irq_chip(idata);
870 			phb->ioda.irq_chip_init = 1;
871 			phb->ioda.irq_chip = *ichip;
872 			phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
873 		}
874 
875 		irq_set_chip(virq, &phb->ioda.irq_chip);
876 	}
877 
878 	pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
879 		 " address=%x_%08x data=%x PE# %d\n",
880 		 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
881 		 msg->address_hi, msg->address_lo, data, pe->pe_number);
882 
883 	return 0;
884 }
885 
886 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
887 {
888 	unsigned int count;
889 	const __be32 *prop = of_get_property(phb->hose->dn,
890 					     "ibm,opal-msi-ranges", NULL);
891 	if (!prop) {
892 		/* BML Fallback */
893 		prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
894 	}
895 	if (!prop)
896 		return;
897 
898 	phb->msi_base = be32_to_cpup(prop);
899 	count = be32_to_cpup(prop + 1);
900 	if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
901 		pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
902 		       phb->hose->global_number);
903 		return;
904 	}
905 
906 	phb->msi_setup = pnv_pci_ioda_msi_setup;
907 	phb->msi32_support = 1;
908 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
909 		count, phb->msi_base);
910 }
911 #else
912 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
913 #endif /* CONFIG_PCI_MSI */
914 
915 /*
916  * This function is supposed to be called on basis of PE from top
917  * to bottom style. So the the I/O or MMIO segment assigned to
918  * parent PE could be overrided by its child PEs if necessary.
919  */
920 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
921 				  struct pnv_ioda_pe *pe)
922 {
923 	struct pnv_phb *phb = hose->private_data;
924 	struct pci_bus_region region;
925 	struct resource *res;
926 	int i, index;
927 	int rc;
928 
929 	/*
930 	 * NOTE: We only care PCI bus based PE for now. For PCI
931 	 * device based PE, for example SRIOV sensitive VF should
932 	 * be figured out later.
933 	 */
934 	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
935 
936 	pci_bus_for_each_resource(pe->pbus, res, i) {
937 		if (!res || !res->flags ||
938 		    res->start > res->end)
939 			continue;
940 
941 		if (res->flags & IORESOURCE_IO) {
942 			region.start = res->start - phb->ioda.io_pci_base;
943 			region.end   = res->end - phb->ioda.io_pci_base;
944 			index = region.start / phb->ioda.io_segsize;
945 
946 			while (index < phb->ioda.total_pe &&
947 			       region.start <= region.end) {
948 				phb->ioda.io_segmap[index] = pe->pe_number;
949 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
950 					pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
951 				if (rc != OPAL_SUCCESS) {
952 					pr_err("%s: OPAL error %d when mapping IO "
953 					       "segment #%d to PE#%d\n",
954 					       __func__, rc, index, pe->pe_number);
955 					break;
956 				}
957 
958 				region.start += phb->ioda.io_segsize;
959 				index++;
960 			}
961 		} else if (res->flags & IORESOURCE_MEM) {
962 			/* WARNING: Assumes M32 is mem region 0 in PHB. We need to
963 			 * harden that algorithm when we start supporting M64
964 			 */
965 			region.start = res->start -
966 				       hose->mem_offset[0] -
967 				       phb->ioda.m32_pci_base;
968 			region.end   = res->end -
969 				       hose->mem_offset[0] -
970 				       phb->ioda.m32_pci_base;
971 			index = region.start / phb->ioda.m32_segsize;
972 
973 			while (index < phb->ioda.total_pe &&
974 			       region.start <= region.end) {
975 				phb->ioda.m32_segmap[index] = pe->pe_number;
976 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
977 					pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
978 				if (rc != OPAL_SUCCESS) {
979 					pr_err("%s: OPAL error %d when mapping M32 "
980 					       "segment#%d to PE#%d",
981 					       __func__, rc, index, pe->pe_number);
982 					break;
983 				}
984 
985 				region.start += phb->ioda.m32_segsize;
986 				index++;
987 			}
988 		}
989 	}
990 }
991 
992 static void pnv_pci_ioda_setup_seg(void)
993 {
994 	struct pci_controller *tmp, *hose;
995 	struct pnv_phb *phb;
996 	struct pnv_ioda_pe *pe;
997 
998 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
999 		phb = hose->private_data;
1000 		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
1001 			pnv_ioda_setup_pe_seg(hose, pe);
1002 		}
1003 	}
1004 }
1005 
1006 static void pnv_pci_ioda_setup_DMA(void)
1007 {
1008 	struct pci_controller *hose, *tmp;
1009 	struct pnv_phb *phb;
1010 
1011 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1012 		pnv_ioda_setup_dma(hose->private_data);
1013 
1014 		/* Mark the PHB initialization done */
1015 		phb = hose->private_data;
1016 		phb->initialized = 1;
1017 	}
1018 }
1019 
1020 static void pnv_pci_ioda_create_dbgfs(void)
1021 {
1022 #ifdef CONFIG_DEBUG_FS
1023 	struct pci_controller *hose, *tmp;
1024 	struct pnv_phb *phb;
1025 	char name[16];
1026 
1027 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1028 		phb = hose->private_data;
1029 
1030 		sprintf(name, "PCI%04x", hose->global_number);
1031 		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1032 		if (!phb->dbgfs)
1033 			pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1034 				__func__, hose->global_number);
1035 	}
1036 #endif /* CONFIG_DEBUG_FS */
1037 }
1038 
1039 static void pnv_pci_ioda_fixup(void)
1040 {
1041 	pnv_pci_ioda_setup_PEs();
1042 	pnv_pci_ioda_setup_seg();
1043 	pnv_pci_ioda_setup_DMA();
1044 
1045 	pnv_pci_ioda_create_dbgfs();
1046 
1047 #ifdef CONFIG_EEH
1048 	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1049 	eeh_addr_cache_build();
1050 	eeh_init();
1051 #endif
1052 }
1053 
1054 /*
1055  * Returns the alignment for I/O or memory windows for P2P
1056  * bridges. That actually depends on how PEs are segmented.
1057  * For now, we return I/O or M32 segment size for PE sensitive
1058  * P2P bridges. Otherwise, the default values (4KiB for I/O,
1059  * 1MiB for memory) will be returned.
1060  *
1061  * The current PCI bus might be put into one PE, which was
1062  * create against the parent PCI bridge. For that case, we
1063  * needn't enlarge the alignment so that we can save some
1064  * resources.
1065  */
1066 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
1067 						unsigned long type)
1068 {
1069 	struct pci_dev *bridge;
1070 	struct pci_controller *hose = pci_bus_to_host(bus);
1071 	struct pnv_phb *phb = hose->private_data;
1072 	int num_pci_bridges = 0;
1073 
1074 	bridge = bus->self;
1075 	while (bridge) {
1076 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
1077 			num_pci_bridges++;
1078 			if (num_pci_bridges >= 2)
1079 				return 1;
1080 		}
1081 
1082 		bridge = bridge->bus->self;
1083 	}
1084 
1085 	/* We need support prefetchable memory window later */
1086 	if (type & IORESOURCE_MEM)
1087 		return phb->ioda.m32_segsize;
1088 
1089 	return phb->ioda.io_segsize;
1090 }
1091 
1092 /* Prevent enabling devices for which we couldn't properly
1093  * assign a PE
1094  */
1095 static int pnv_pci_enable_device_hook(struct pci_dev *dev)
1096 {
1097 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
1098 	struct pnv_phb *phb = hose->private_data;
1099 	struct pci_dn *pdn;
1100 
1101 	/* The function is probably called while the PEs have
1102 	 * not be created yet. For example, resource reassignment
1103 	 * during PCI probe period. We just skip the check if
1104 	 * PEs isn't ready.
1105 	 */
1106 	if (!phb->initialized)
1107 		return 0;
1108 
1109 	pdn = pci_get_pdn(dev);
1110 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1111 		return -EINVAL;
1112 
1113 	return 0;
1114 }
1115 
1116 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1117 			       u32 devfn)
1118 {
1119 	return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1120 }
1121 
1122 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1123 {
1124 	opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
1125 		       OPAL_ASSERT_RESET);
1126 }
1127 
1128 void __init pnv_pci_init_ioda_phb(struct device_node *np,
1129 				  u64 hub_id, int ioda_type)
1130 {
1131 	struct pci_controller *hose;
1132 	struct pnv_phb *phb;
1133 	unsigned long size, m32map_off, iomap_off, pemap_off;
1134 	const u64 *prop64;
1135 	const u32 *prop32;
1136 	int len;
1137 	u64 phb_id;
1138 	void *aux;
1139 	long rc;
1140 
1141 	pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
1142 
1143 	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1144 	if (!prop64) {
1145 		pr_err("  Missing \"ibm,opal-phbid\" property !\n");
1146 		return;
1147 	}
1148 	phb_id = be64_to_cpup(prop64);
1149 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
1150 
1151 	phb = alloc_bootmem(sizeof(struct pnv_phb));
1152 	if (!phb) {
1153 		pr_err("  Out of memory !\n");
1154 		return;
1155 	}
1156 
1157 	/* Allocate PCI controller */
1158 	memset(phb, 0, sizeof(struct pnv_phb));
1159 	phb->hose = hose = pcibios_alloc_controller(np);
1160 	if (!phb->hose) {
1161 		pr_err("  Can't allocate PCI controller for %s\n",
1162 		       np->full_name);
1163 		free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
1164 		return;
1165 	}
1166 
1167 	spin_lock_init(&phb->lock);
1168 	prop32 = of_get_property(np, "bus-range", &len);
1169 	if (prop32 && len == 8) {
1170 		hose->first_busno = prop32[0];
1171 		hose->last_busno = prop32[1];
1172 	} else {
1173 		pr_warn("  Broken <bus-range> on %s\n", np->full_name);
1174 		hose->first_busno = 0;
1175 		hose->last_busno = 0xff;
1176 	}
1177 	hose->private_data = phb;
1178 	phb->hub_id = hub_id;
1179 	phb->opal_id = phb_id;
1180 	phb->type = ioda_type;
1181 
1182 	/* Detect specific models for error handling */
1183 	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1184 		phb->model = PNV_PHB_MODEL_P7IOC;
1185 	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
1186 		phb->model = PNV_PHB_MODEL_PHB3;
1187 	else
1188 		phb->model = PNV_PHB_MODEL_UNKNOWN;
1189 
1190 	/* Parse 32-bit and IO ranges (if any) */
1191 	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
1192 
1193 	/* Get registers */
1194 	phb->regs = of_iomap(np, 0);
1195 	if (phb->regs == NULL)
1196 		pr_err("  Failed to map registers !\n");
1197 
1198 	/* Initialize more IODA stuff */
1199 	prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
1200 	if (!prop32)
1201 		phb->ioda.total_pe = 1;
1202 	else
1203 		phb->ioda.total_pe = *prop32;
1204 
1205 	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1206 	/* FW Has already off top 64k of M32 space (MSI space) */
1207 	phb->ioda.m32_size += 0x10000;
1208 
1209 	phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1210 	phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
1211 	phb->ioda.io_size = hose->pci_io_size;
1212 	phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1213 	phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1214 
1215 	/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
1216 	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1217 	m32map_off = size;
1218 	size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
1219 	iomap_off = size;
1220 	if (phb->type == PNV_PHB_IODA1) {
1221 		iomap_off = size;
1222 		size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
1223 	}
1224 	pemap_off = size;
1225 	size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1226 	aux = alloc_bootmem(size);
1227 	memset(aux, 0, size);
1228 	phb->ioda.pe_alloc = aux;
1229 	phb->ioda.m32_segmap = aux + m32map_off;
1230 	if (phb->type == PNV_PHB_IODA1)
1231 		phb->ioda.io_segmap = aux + iomap_off;
1232 	phb->ioda.pe_array = aux + pemap_off;
1233 	set_bit(0, phb->ioda.pe_alloc);
1234 
1235 	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
1236 	INIT_LIST_HEAD(&phb->ioda.pe_list);
1237 
1238 	/* Calculate how many 32-bit TCE segments we have */
1239 	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1240 
1241 	/* Clear unusable m64 */
1242 	hose->mem_resources[1].flags = 0;
1243 	hose->mem_resources[1].start = 0;
1244 	hose->mem_resources[1].end = 0;
1245 	hose->mem_resources[2].flags = 0;
1246 	hose->mem_resources[2].start = 0;
1247 	hose->mem_resources[2].end = 0;
1248 
1249 #if 0 /* We should really do that ... */
1250 	rc = opal_pci_set_phb_mem_window(opal->phb_id,
1251 					 window_type,
1252 					 window_num,
1253 					 starting_real_address,
1254 					 starting_pci_address,
1255 					 segment_size);
1256 #endif
1257 
1258 	pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1259 		phb->ioda.total_pe,
1260 		phb->ioda.m32_size, phb->ioda.m32_segsize,
1261 		phb->ioda.io_size, phb->ioda.io_segsize);
1262 
1263 	phb->hose->ops = &pnv_pci_ops;
1264 #ifdef CONFIG_EEH
1265 	phb->eeh_ops = &ioda_eeh_ops;
1266 #endif
1267 
1268 	/* Setup RID -> PE mapping function */
1269 	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1270 
1271 	/* Setup TCEs */
1272 	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1273 
1274 	/* Setup shutdown function for kexec */
1275 	phb->shutdown = pnv_pci_ioda_shutdown;
1276 
1277 	/* Setup MSI support */
1278 	pnv_pci_init_ioda_msis(phb);
1279 
1280 	/*
1281 	 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
1282 	 * to let the PCI core do resource assignment. It's supposed
1283 	 * that the PCI core will do correct I/O and MMIO alignment
1284 	 * for the P2P bridge bars so that each PCI bus (excluding
1285 	 * the child P2P bridges) can form individual PE.
1286 	 */
1287 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
1288 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1289 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
1290 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1291 
1292 	/* Reset IODA tables to a clean state */
1293 	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1294 	if (rc)
1295 		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
1296 
1297 	/*
1298 	 * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
1299 	 * has cleared the RTT which has the same effect
1300 	 */
1301 	if (ioda_type == PNV_PHB_IODA1)
1302 		opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1303 }
1304 
1305 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
1306 {
1307 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1308 }
1309 
1310 void __init pnv_pci_init_ioda_hub(struct device_node *np)
1311 {
1312 	struct device_node *phbn;
1313 	const u64 *prop64;
1314 	u64 hub_id;
1315 
1316 	pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1317 
1318 	prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1319 	if (!prop64) {
1320 		pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1321 		return;
1322 	}
1323 	hub_id = be64_to_cpup(prop64);
1324 	pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1325 
1326 	/* Count child PHBs */
1327 	for_each_child_of_node(np, phbn) {
1328 		/* Look for IODA1 PHBs */
1329 		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1330 			pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1331 	}
1332 }
1333