xref: /openbmc/qemu/hw/pci-host/pnv_phb4.c (revision e068b57d)
1 /*
2  * QEMU PowerPC PowerNV (POWER9) PHB4 model
3  *
4  * Copyright (c) 2018-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "qemu-common.h"
14 #include "monitor/monitor.h"
15 #include "target/ppc/cpu.h"
16 #include "hw/pci-host/pnv_phb4_regs.h"
17 #include "hw/pci-host/pnv_phb4.h"
18 #include "hw/pci/pcie_host.h"
19 #include "hw/pci/pcie_port.h"
20 #include "hw/ppc/pnv.h"
21 #include "hw/ppc/pnv_xscom.h"
22 #include "hw/irq.h"
23 #include "hw/qdev-properties.h"
24 #include "qom/object.h"
25 #include "trace.h"
26 
27 #define phb_error(phb, fmt, ...)                                        \
28     qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",            \
29                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
30 
31 /*
32  * QEMU version of the GETFIELD/SETFIELD macros
33  *
34  * These are common with the PnvXive model.
35  */
36 static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
37 {
38     return (word & mask) >> ctz64(mask);
39 }
40 
41 static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
42                                 uint64_t value)
43 {
44     return (word & ~mask) | ((value << ctz64(mask)) & mask);
45 }
46 
47 static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
48 {
49     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
50     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
51     uint8_t bus, devfn;
52 
53     if (!(addr >> 63)) {
54         return NULL;
55     }
56     bus = (addr >> 52) & 0xff;
57     devfn = (addr >> 44) & 0xff;
58 
59     /* We don't access the root complex this way */
60     if (bus == 0 && devfn == 0) {
61         return NULL;
62     }
63     return pci_find_device(pci->bus, bus, devfn);
64 }
65 
66 /*
67  * The CONFIG_DATA register expects little endian accesses, but as the
68  * region is big endian, we have to swap the value.
69  */
70 static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
71                                   unsigned size, uint64_t val)
72 {
73     uint32_t cfg_addr, limit;
74     PCIDevice *pdev;
75 
76     pdev = pnv_phb4_find_cfg_dev(phb);
77     if (!pdev) {
78         return;
79     }
80     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
81     cfg_addr |= off;
82     limit = pci_config_size(pdev);
83     if (limit <= cfg_addr) {
84         /*
85          * conventional pci device can be behind pcie-to-pci bridge.
86          * 256 <= addr < 4K has no effects.
87          */
88         return;
89     }
90     switch (size) {
91     case 1:
92         break;
93     case 2:
94         val = bswap16(val);
95         break;
96     case 4:
97         val = bswap32(val);
98         break;
99     default:
100         g_assert_not_reached();
101     }
102     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
103 }
104 
105 static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
106                                      unsigned size)
107 {
108     uint32_t cfg_addr, limit;
109     PCIDevice *pdev;
110     uint64_t val;
111 
112     pdev = pnv_phb4_find_cfg_dev(phb);
113     if (!pdev) {
114         return ~0ull;
115     }
116     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
117     cfg_addr |= off;
118     limit = pci_config_size(pdev);
119     if (limit <= cfg_addr) {
120         /*
121          * conventional pci device can be behind pcie-to-pci bridge.
122          * 256 <= addr < 4K has no effects.
123          */
124         return ~0ull;
125     }
126     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
127     switch (size) {
128     case 1:
129         return val;
130     case 2:
131         return bswap16(val);
132     case 4:
133         return bswap32(val);
134     default:
135         g_assert_not_reached();
136     }
137 }
138 
139 /*
140  * Root complex register accesses are memory mapped.
141  */
142 static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
143                                      unsigned size, uint64_t val)
144 {
145     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
146     PCIDevice *pdev;
147 
148     if (size != 4) {
149         phb_error(phb, "rc_config_write invalid size %d\n", size);
150         return;
151     }
152 
153     pdev = pci_find_device(pci->bus, 0, 0);
154     assert(pdev);
155 
156     pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
157                                  bswap32(val), 4);
158 }
159 
160 static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
161                                         unsigned size)
162 {
163     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
164     PCIDevice *pdev;
165     uint64_t val;
166 
167     if (size != 4) {
168         phb_error(phb, "rc_config_read invalid size %d\n", size);
169         return ~0ull;
170     }
171 
172     pdev = pci_find_device(pci->bus, 0, 0);
173     assert(pdev);
174 
175     val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
176     return bswap32(val);
177 }
178 
179 static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
180 {
181     uint64_t base, start, size, mbe0, mbe1;
182     MemoryRegion *parent;
183     char name[64];
184 
185     /* Unmap first */
186     if (memory_region_is_mapped(&phb->mr_mmio[index])) {
187         /* Should we destroy it in RCU friendly way... ? */
188         memory_region_del_subregion(phb->mr_mmio[index].container,
189                                     &phb->mr_mmio[index]);
190     }
191 
192     /* Get table entry */
193     mbe0 = phb->ioda_MBT[(index << 1)];
194     mbe1 = phb->ioda_MBT[(index << 1) + 1];
195 
196     if (!(mbe0 & IODA3_MBT0_ENABLE)) {
197         return;
198     }
199 
200     /* Grab geometry from registers */
201     base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
202     size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
203     size |= 0xff00000000000000ull;
204     size = ~size + 1;
205 
206     /* Calculate PCI side start address based on M32/M64 window type */
207     if (mbe0 & IODA3_MBT0_TYPE_M32) {
208         start = phb->regs[PHB_M32_START_ADDR >> 3];
209         if ((start + size) > 0x100000000ull) {
210             phb_error(phb, "M32 set beyond 4GB boundary !");
211             size = 0x100000000 - start;
212         }
213     } else {
214         start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
215     }
216 
217     /* TODO: Figure out how to implemet/decode AOMASK */
218 
219     /* Check if it matches an enabled MMIO region in the PEC stack */
220     if (memory_region_is_mapped(&phb->stack->mmbar0) &&
221         base >= phb->stack->mmio0_base &&
222         (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
223         parent = &phb->stack->mmbar0;
224         base -= phb->stack->mmio0_base;
225     } else if (memory_region_is_mapped(&phb->stack->mmbar1) &&
226         base >= phb->stack->mmio1_base &&
227         (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
228         parent = &phb->stack->mmbar1;
229         base -= phb->stack->mmio1_base;
230     } else {
231         phb_error(phb, "PHB MBAR %d out of parent bounds", index);
232         return;
233     }
234 
235     /* Create alias (better name ?) */
236     snprintf(name, sizeof(name), "phb4-mbar%d", index);
237     memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
238                              &phb->pci_mmio, start, size);
239     memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
240 }
241 
242 static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
243 {
244     uint64_t i;
245     uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
246         PNV_PHB4_MIN_MMIO_WINDOWS;
247 
248     for (i = 0; i < num_windows; i++) {
249         pnv_phb4_check_mbt(phb, i);
250     }
251 }
252 
253 static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
254                                       unsigned *out_table, unsigned *out_idx)
255 {
256     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
257     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
258     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
259     unsigned int mask;
260     uint64_t *tptr = NULL;
261 
262     switch (table) {
263     case IODA3_TBL_LIST:
264         tptr = phb->ioda_LIST;
265         mask = 7;
266         break;
267     case IODA3_TBL_MIST:
268         tptr = phb->ioda_MIST;
269         mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
270         mask -= 1;
271         break;
272     case IODA3_TBL_RCAM:
273         mask = phb->big_phb ? 127 : 63;
274         break;
275     case IODA3_TBL_MRT:
276         mask = phb->big_phb ? 15 : 7;
277         break;
278     case IODA3_TBL_PESTA:
279     case IODA3_TBL_PESTB:
280         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
281         mask -= 1;
282         break;
283     case IODA3_TBL_TVT:
284         tptr = phb->ioda_TVT;
285         mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
286         mask -= 1;
287         break;
288     case IODA3_TBL_TCR:
289     case IODA3_TBL_TDR:
290         mask = phb->big_phb ? 1023 : 511;
291         break;
292     case IODA3_TBL_MBT:
293         tptr = phb->ioda_MBT;
294         mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
295         mask -= 1;
296         break;
297     case IODA3_TBL_MDT:
298         tptr = phb->ioda_MDT;
299         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
300         mask -= 1;
301         break;
302     case IODA3_TBL_PEEV:
303         tptr = phb->ioda_PEEV;
304         mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
305         mask -= 1;
306         break;
307     default:
308         phb_error(phb, "invalid IODA table %d", table);
309         return NULL;
310     }
311     index &= mask;
312     if (out_idx) {
313         *out_idx = index;
314     }
315     if (out_table) {
316         *out_table = table;
317     }
318     if (tptr) {
319         tptr += index;
320     }
321     if (adreg & PHB_IODA_AD_AUTOINC) {
322         index = (index + 1) & mask;
323         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
324     }
325 
326     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
327     return tptr;
328 }
329 
330 static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
331 {
332     unsigned table, idx;
333     uint64_t *tptr;
334 
335     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
336     if (!tptr) {
337         /* Special PESTA case */
338         if (table == IODA3_TBL_PESTA) {
339             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
340         } else if (table == IODA3_TBL_PESTB) {
341             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
342         }
343         /* Return 0 on unsupported tables, not ff's */
344         return 0;
345     }
346     return *tptr;
347 }
348 
349 static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
350 {
351     unsigned table, idx;
352     uint64_t *tptr;
353 
354     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
355     if (!tptr) {
356         /* Special PESTA case */
357         if (table == IODA3_TBL_PESTA) {
358             phb->ioda_PEST_AB[idx] &= ~1;
359             phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
360         } else if (table == IODA3_TBL_PESTB) {
361             phb->ioda_PEST_AB[idx] &= ~2;
362             phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
363         }
364         return;
365     }
366 
367     /* Handle side effects */
368     switch (table) {
369     case IODA3_TBL_LIST:
370         break;
371     case IODA3_TBL_MIST: {
372         /* Special mask for MIST partial write */
373         uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
374         uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
375         uint64_t v = *tptr;
376         if (mmask == 0) {
377             mmask = 0xf;
378         }
379         if (mmask & 8) {
380             v &= 0x0000ffffffffffffull;
381             v |= 0xcfff000000000000ull & val;
382         }
383         if (mmask & 4) {
384             v &= 0xffff0000ffffffffull;
385             v |= 0x0000cfff00000000ull & val;
386         }
387         if (mmask & 2) {
388             v &= 0xffffffff0000ffffull;
389             v |= 0x00000000cfff0000ull & val;
390         }
391         if (mmask & 1) {
392             v &= 0xffffffffffff0000ull;
393             v |= 0x000000000000cfffull & val;
394         }
395         *tptr = v;
396         break;
397     }
398     case IODA3_TBL_MBT:
399         *tptr = val;
400 
401         /* Copy accross the valid bit to the other half */
402         phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
403         phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
404 
405         /* Update mappings */
406         pnv_phb4_check_mbt(phb, idx >> 1);
407         break;
408     default:
409         *tptr = val;
410     }
411 }
412 
413 static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
414 {
415     PnvPhb4DMASpace *ds;
416 
417     /* Always invalidate all for now ... */
418     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
419         ds->pe_num = PHB_INVALID_PE;
420     }
421 }
422 
423 static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
424 {
425     uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
426 
427     if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
428         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
429             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
430                                         0xffff0000, &ds->msi32_mr);
431         }
432     } else {
433         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
434             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
435                                         &ds->msi32_mr);
436         }
437     }
438 
439     if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
440         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
441             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
442                                         (1ull << 60), &ds->msi64_mr);
443         }
444     } else {
445         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
446             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
447                                         &ds->msi64_mr);
448         }
449     }
450 }
451 
452 static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
453 {
454     PnvPhb4DMASpace *ds;
455 
456     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
457         pnv_phb4_update_msi_regions(ds);
458     }
459 }
460 
461 static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
462 {
463     int shift, flags, i, lsi_base;
464     XiveSource *xsrc = &phb->xsrc;
465 
466     /* The XIVE source characteristics can be set at run time */
467     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
468         shift = XIVE_ESB_64K;
469     } else {
470         shift = XIVE_ESB_4K;
471     }
472     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
473         flags = XIVE_SRC_STORE_EOI;
474     } else {
475         flags = 0;
476     }
477 
478     phb->xsrc.esb_shift = shift;
479     phb->xsrc.esb_flags = flags;
480 
481     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
482     lsi_base <<= 3;
483 
484     /* TODO: handle reset values of PHB_LSI_SRC_ID */
485     if (!lsi_base) {
486         return;
487     }
488 
489     /* TODO: need a xive_source_irq_reset_lsi() */
490     bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
491 
492     for (i = 0; i < xsrc->nr_irqs; i++) {
493         bool msi = (i < lsi_base || i >= (lsi_base + 8));
494         if (!msi) {
495             xive_source_irq_set_lsi(xsrc, i);
496         }
497     }
498 }
499 
500 static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
501                                unsigned size)
502 {
503     PnvPHB4 *phb = PNV_PHB4(opaque);
504     bool changed;
505 
506     /* Special case outbound configuration data */
507     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
508         pnv_phb4_config_write(phb, off & 0x3, size, val);
509         return;
510     }
511 
512     /* Special case RC configuration space */
513     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
514         pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
515         return;
516     }
517 
518     /* Other registers are 64-bit only */
519     if (size != 8 || off & 0x7) {
520         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
521                    off, size);
522         return;
523     }
524 
525     /* Handle masking */
526     switch (off) {
527     case PHB_LSI_SOURCE_ID:
528         val &= PHB_LSI_SRC_ID;
529         break;
530     case PHB_M64_UPPER_BITS:
531         val &= 0xff00000000000000ull;
532         break;
533     /* TCE Kill */
534     case PHB_TCE_KILL:
535         /* Clear top 3 bits which HW does to indicate successful queuing */
536         val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
537         break;
538     case PHB_Q_DMA_R:
539         /*
540          * This is enough logic to make SW happy but we aren't
541          * actually quiescing the DMAs
542          */
543         if (val & PHB_Q_DMA_R_AUTORESET) {
544             val = 0;
545         } else {
546             val &= PHB_Q_DMA_R_QUIESCE_DMA;
547         }
548         break;
549     /* LEM stuff */
550     case PHB_LEM_FIR_AND_MASK:
551         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
552         return;
553     case PHB_LEM_FIR_OR_MASK:
554         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
555         return;
556     case PHB_LEM_ERROR_AND_MASK:
557         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
558         return;
559     case PHB_LEM_ERROR_OR_MASK:
560         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
561         return;
562     case PHB_LEM_WOF:
563         val = 0;
564         break;
565     /* TODO: More regs ..., maybe create a table with masks... */
566 
567     /* Read only registers */
568     case PHB_CPU_LOADSTORE_STATUS:
569     case PHB_ETU_ERR_SUMMARY:
570     case PHB_PHB4_GEN_CAP:
571     case PHB_PHB4_TCE_CAP:
572     case PHB_PHB4_IRQ_CAP:
573     case PHB_PHB4_EEH_CAP:
574         return;
575     }
576 
577     /* Record whether it changed */
578     changed = phb->regs[off >> 3] != val;
579 
580     /* Store in register cache first */
581     phb->regs[off >> 3] = val;
582 
583     /* Handle side effects */
584     switch (off) {
585     case PHB_PHB4_CONFIG:
586         if (changed) {
587             pnv_phb4_update_all_msi_regions(phb);
588         }
589         break;
590     case PHB_M32_START_ADDR:
591     case PHB_M64_UPPER_BITS:
592         if (changed) {
593             pnv_phb4_check_all_mbt(phb);
594         }
595         break;
596 
597     /* IODA table accesses */
598     case PHB_IODA_DATA0:
599         pnv_phb4_ioda_write(phb, val);
600         break;
601 
602     /* RTC invalidation */
603     case PHB_RTC_INVALIDATE:
604         pnv_phb4_rtc_invalidate(phb, val);
605         break;
606 
607     /* PHB Control (Affects XIVE source) */
608     case PHB_CTRLR:
609     case PHB_LSI_SOURCE_ID:
610         pnv_phb4_update_xsrc(phb);
611         break;
612 
613     /* Silent simple writes */
614     case PHB_ASN_CMPM:
615     case PHB_CONFIG_ADDRESS:
616     case PHB_IODA_ADDR:
617     case PHB_TCE_KILL:
618     case PHB_TCE_SPEC_CTL:
619     case PHB_PEST_BAR:
620     case PHB_PELTV_BAR:
621     case PHB_RTT_BAR:
622     case PHB_LEM_FIR_ACCUM:
623     case PHB_LEM_ERROR_MASK:
624     case PHB_LEM_ACTION0:
625     case PHB_LEM_ACTION1:
626     case PHB_TCE_TAG_ENABLE:
627     case PHB_INT_NOTIFY_ADDR:
628     case PHB_INT_NOTIFY_INDEX:
629     case PHB_DMARD_SYNC:
630        break;
631 
632     /* Noise on anything else */
633     default:
634         qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
635                       off, val);
636     }
637 }
638 
639 static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
640 {
641     PnvPHB4 *phb = PNV_PHB4(opaque);
642     uint64_t val;
643 
644     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
645         return pnv_phb4_config_read(phb, off & 0x3, size);
646     }
647 
648     /* Special case RC configuration space */
649     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
650         return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
651     }
652 
653     /* Other registers are 64-bit only */
654     if (size != 8 || off & 0x7) {
655         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
656                    off, size);
657         return ~0ull;
658     }
659 
660     /* Default read from cache */
661     val = phb->regs[off >> 3];
662 
663     switch (off) {
664     case PHB_VERSION:
665         return phb->version;
666 
667         /* Read-only */
668     case PHB_PHB4_GEN_CAP:
669         return 0xe4b8000000000000ull;
670     case PHB_PHB4_TCE_CAP:
671         return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
672     case PHB_PHB4_IRQ_CAP:
673         return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
674     case PHB_PHB4_EEH_CAP:
675         return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
676 
677     /* IODA table accesses */
678     case PHB_IODA_DATA0:
679         return pnv_phb4_ioda_read(phb);
680 
681     /* Link training always appears trained */
682     case PHB_PCIE_DLP_TRAIN_CTL:
683         /* TODO: Do something sensible with speed ? */
684         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
685 
686     /* DMA read sync: make it look like it's complete */
687     case PHB_DMARD_SYNC:
688         return PHB_DMARD_SYNC_COMPLETE;
689 
690     /* Silent simple reads */
691     case PHB_LSI_SOURCE_ID:
692     case PHB_CPU_LOADSTORE_STATUS:
693     case PHB_ASN_CMPM:
694     case PHB_PHB4_CONFIG:
695     case PHB_M32_START_ADDR:
696     case PHB_CONFIG_ADDRESS:
697     case PHB_IODA_ADDR:
698     case PHB_RTC_INVALIDATE:
699     case PHB_TCE_KILL:
700     case PHB_TCE_SPEC_CTL:
701     case PHB_PEST_BAR:
702     case PHB_PELTV_BAR:
703     case PHB_RTT_BAR:
704     case PHB_M64_UPPER_BITS:
705     case PHB_CTRLR:
706     case PHB_LEM_FIR_ACCUM:
707     case PHB_LEM_ERROR_MASK:
708     case PHB_LEM_ACTION0:
709     case PHB_LEM_ACTION1:
710     case PHB_TCE_TAG_ENABLE:
711     case PHB_INT_NOTIFY_ADDR:
712     case PHB_INT_NOTIFY_INDEX:
713     case PHB_Q_DMA_R:
714     case PHB_ETU_ERR_SUMMARY:
715         break;
716 
717     /* Noise on anything else */
718     default:
719         qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
720                       off, val);
721     }
722     return val;
723 }
724 
725 static const MemoryRegionOps pnv_phb4_reg_ops = {
726     .read = pnv_phb4_reg_read,
727     .write = pnv_phb4_reg_write,
728     .valid.min_access_size = 1,
729     .valid.max_access_size = 8,
730     .impl.min_access_size = 1,
731     .impl.max_access_size = 8,
732     .endianness = DEVICE_BIG_ENDIAN,
733 };
734 
735 static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
736 {
737     PnvPHB4 *phb = PNV_PHB4(opaque);
738     uint32_t reg = addr >> 3;
739     uint64_t val;
740     hwaddr offset;
741 
742     switch (reg) {
743     case PHB_SCOM_HV_IND_ADDR:
744         return phb->scom_hv_ind_addr_reg;
745 
746     case PHB_SCOM_HV_IND_DATA:
747         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
748             phb_error(phb, "Invalid indirect address");
749             return ~0ull;
750         }
751         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
752         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
753         val = pnv_phb4_reg_read(phb, offset, size);
754         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
755             offset += size;
756             offset &= 0x3fff;
757             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
758                                                  phb->scom_hv_ind_addr_reg,
759                                                  offset);
760         }
761         return val;
762     case PHB_SCOM_ETU_LEM_FIR:
763     case PHB_SCOM_ETU_LEM_FIR_AND:
764     case PHB_SCOM_ETU_LEM_FIR_OR:
765     case PHB_SCOM_ETU_LEM_FIR_MSK:
766     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
767     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
768     case PHB_SCOM_ETU_LEM_ACT0:
769     case PHB_SCOM_ETU_LEM_ACT1:
770     case PHB_SCOM_ETU_LEM_WOF:
771         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
772         return pnv_phb4_reg_read(phb, offset, size);
773     case PHB_SCOM_ETU_PMON_CONFIG:
774     case PHB_SCOM_ETU_PMON_CTR0:
775     case PHB_SCOM_ETU_PMON_CTR1:
776     case PHB_SCOM_ETU_PMON_CTR2:
777     case PHB_SCOM_ETU_PMON_CTR3:
778         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
779         return pnv_phb4_reg_read(phb, offset, size);
780 
781     default:
782         qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
783         return ~0ull;
784     }
785 }
786 
787 static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
788                                  uint64_t val, unsigned size)
789 {
790     PnvPHB4 *phb = PNV_PHB4(opaque);
791     uint32_t reg = addr >> 3;
792     hwaddr offset;
793 
794     switch (reg) {
795     case PHB_SCOM_HV_IND_ADDR:
796         phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
797         break;
798     case PHB_SCOM_HV_IND_DATA:
799         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
800             phb_error(phb, "Invalid indirect address");
801             break;
802         }
803         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
804         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
805         pnv_phb4_reg_write(phb, offset, val, size);
806         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
807             offset += size;
808             offset &= 0x3fff;
809             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
810                                                  phb->scom_hv_ind_addr_reg,
811                                                  offset);
812         }
813         break;
814     case PHB_SCOM_ETU_LEM_FIR:
815     case PHB_SCOM_ETU_LEM_FIR_AND:
816     case PHB_SCOM_ETU_LEM_FIR_OR:
817     case PHB_SCOM_ETU_LEM_FIR_MSK:
818     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
819     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
820     case PHB_SCOM_ETU_LEM_ACT0:
821     case PHB_SCOM_ETU_LEM_ACT1:
822     case PHB_SCOM_ETU_LEM_WOF:
823         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
824         pnv_phb4_reg_write(phb, offset, val, size);
825         break;
826     case PHB_SCOM_ETU_PMON_CONFIG:
827     case PHB_SCOM_ETU_PMON_CTR0:
828     case PHB_SCOM_ETU_PMON_CTR1:
829     case PHB_SCOM_ETU_PMON_CTR2:
830     case PHB_SCOM_ETU_PMON_CTR3:
831         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
832         pnv_phb4_reg_write(phb, offset, val, size);
833         break;
834     default:
835         qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
836                       "=%"PRIx64"\n", addr, val);
837     }
838 }
839 
840 const MemoryRegionOps pnv_phb4_xscom_ops = {
841     .read = pnv_phb4_xscom_read,
842     .write = pnv_phb4_xscom_write,
843     .valid.min_access_size = 8,
844     .valid.max_access_size = 8,
845     .impl.min_access_size = 8,
846     .impl.max_access_size = 8,
847     .endianness = DEVICE_BIG_ENDIAN,
848 };
849 
850 static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
851 {
852     /* Check that out properly ... */
853     return irq_num & 3;
854 }
855 
856 static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
857 {
858     PnvPHB4 *phb = PNV_PHB4(opaque);
859     uint32_t lsi_base;
860 
861     /* LSI only ... */
862     if (irq_num > 3) {
863         phb_error(phb, "IRQ %x is not an LSI", irq_num);
864     }
865     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
866     lsi_base <<= 3;
867     qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
868 }
869 
870 static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
871 {
872     uint64_t rtt, addr;
873     uint16_t rte;
874     int bus_num;
875     int num_PEs;
876 
877     /* Already resolved ? */
878     if (ds->pe_num != PHB_INVALID_PE) {
879         return true;
880     }
881 
882     /* We need to lookup the RTT */
883     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
884     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
885         phb_error(ds->phb, "DMA with RTT BAR disabled !");
886         /* Set error bits ? fence ? ... */
887         return false;
888     }
889 
890     /* Read RTE */
891     bus_num = pci_bus_num(ds->bus);
892     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
893     addr += 2 * PCI_BUILD_BDF(bus_num, ds->devfn);
894     if (dma_memory_read(&address_space_memory, addr, &rte,
895                         sizeof(rte), MEMTXATTRS_UNSPECIFIED)) {
896         phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
897         /* Set error bits ? fence ? ... */
898         return false;
899     }
900     rte = be16_to_cpu(rte);
901 
902     /* Fail upon reading of invalid PE# */
903     num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
904     if (rte >= num_PEs) {
905         phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
906         rte &= num_PEs - 1;
907     }
908     ds->pe_num = rte;
909     return true;
910 }
911 
912 static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
913                                    bool is_write, uint64_t tve,
914                                    IOMMUTLBEntry *tlb)
915 {
916     uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
917     int32_t  lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
918     uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
919     uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
920 
921     /* Invalid levels */
922     if (lev > 4) {
923         phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
924         return;
925     }
926 
927     /* Invalid entry */
928     if (tts == 0) {
929         phb_error(ds->phb, "Access to invalid TVE");
930         return;
931     }
932 
933     /* IO Page Size of 0 means untranslated, else use TCEs */
934     if (tps == 0) {
935         /* TODO: Handle boundaries */
936 
937         /* Use 4k pages like q35 ... for now */
938         tlb->iova = addr & 0xfffffffffffff000ull;
939         tlb->translated_addr = addr & 0x0003fffffffff000ull;
940         tlb->addr_mask = 0xfffull;
941         tlb->perm = IOMMU_RW;
942     } else {
943         uint32_t tce_shift, tbl_shift, sh;
944         uint64_t base, taddr, tce, tce_mask;
945 
946         /* Address bits per bottom level TCE entry */
947         tce_shift = tps + 11;
948 
949         /* Address bits per table level */
950         tbl_shift = tts + 8;
951 
952         /* Top level table base address */
953         base = tta << 12;
954 
955         /* Total shift to first level */
956         sh = tbl_shift * lev + tce_shift;
957 
958         /* TODO: Limit to support IO page sizes */
959 
960         /* TODO: Multi-level untested */
961         while ((lev--) >= 0) {
962             /* Grab the TCE address */
963             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
964             if (dma_memory_read(&address_space_memory, taddr, &tce,
965                                 sizeof(tce), MEMTXATTRS_UNSPECIFIED)) {
966                 phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
967                 return;
968             }
969             tce = be64_to_cpu(tce);
970 
971             /* Check permission for indirect TCE */
972             if ((lev >= 0) && !(tce & 3)) {
973                 phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
974                 phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
975                            is_write ? 'W' : 'R', tve);
976                 phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
977                            tta, lev, tts, tps);
978                 return;
979             }
980             sh -= tbl_shift;
981             base = tce & ~0xfffull;
982         }
983 
984         /* We exit the loop with TCE being the final TCE */
985         tce_mask = ~((1ull << tce_shift) - 1);
986         tlb->iova = addr & tce_mask;
987         tlb->translated_addr = tce & tce_mask;
988         tlb->addr_mask = ~tce_mask;
989         tlb->perm = tce & 3;
990         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
991             phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
992             phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
993                        is_write ? 'W' : 'R', tve);
994             phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
995                        tta, lev, tts, tps);
996         }
997     }
998 }
999 
1000 static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
1001                                               hwaddr addr,
1002                                               IOMMUAccessFlags flag,
1003                                               int iommu_idx)
1004 {
1005     PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
1006     int tve_sel;
1007     uint64_t tve, cfg;
1008     IOMMUTLBEntry ret = {
1009         .target_as = &address_space_memory,
1010         .iova = addr,
1011         .translated_addr = 0,
1012         .addr_mask = ~(hwaddr)0,
1013         .perm = IOMMU_NONE,
1014     };
1015 
1016     /* Resolve PE# */
1017     if (!pnv_phb4_resolve_pe(ds)) {
1018         phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1019                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1020         return ret;
1021     }
1022 
1023     /* Check top bits */
1024     switch (addr >> 60) {
1025     case 00:
1026         /* DMA or 32-bit MSI ? */
1027         cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
1028         if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
1029             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
1030             phb_error(ds->phb, "xlate on 32-bit MSI region");
1031             return ret;
1032         }
1033         /* Choose TVE XXX Use PHB4 Control Register */
1034         tve_sel = (addr >> 59) & 1;
1035         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
1036         pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
1037         break;
1038     case 01:
1039         phb_error(ds->phb, "xlate on 64-bit MSI region");
1040         break;
1041     default:
1042         phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
1043     }
1044     return ret;
1045 }
1046 
1047 #define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
1048 DECLARE_INSTANCE_CHECKER(IOMMUMemoryRegion, PNV_PHB4_IOMMU_MEMORY_REGION,
1049                          TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
1050 
1051 static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
1052                                                     void *data)
1053 {
1054     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1055 
1056     imrc->translate = pnv_phb4_translate_iommu;
1057 }
1058 
1059 static const TypeInfo pnv_phb4_iommu_memory_region_info = {
1060     .parent = TYPE_IOMMU_MEMORY_REGION,
1061     .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1062     .class_init = pnv_phb4_iommu_memory_region_class_init,
1063 };
1064 
1065 /*
1066  * MSI/MSIX memory region implementation.
1067  * The handler handles both MSI and MSIX.
1068  */
1069 static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
1070                                uint64_t data, unsigned size)
1071 {
1072     PnvPhb4DMASpace *ds = opaque;
1073     PnvPHB4 *phb = ds->phb;
1074 
1075     uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
1076 
1077     /* Resolve PE# */
1078     if (!pnv_phb4_resolve_pe(ds)) {
1079         phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1080                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1081         return;
1082     }
1083 
1084     /* TODO: Check it doesn't collide with LSIs */
1085     if (src >= phb->xsrc.nr_irqs) {
1086         phb_error(phb, "MSI %d out of bounds", src);
1087         return;
1088     }
1089 
1090     /* TODO: check PE/MSI assignement */
1091 
1092     qemu_irq_pulse(phb->qirqs[src]);
1093 }
1094 
1095 /* There is no .read as the read result is undefined by PCI spec */
1096 static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
1097 {
1098     PnvPhb4DMASpace *ds = opaque;
1099 
1100     phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
1101     return -1;
1102 }
1103 
1104 static const MemoryRegionOps pnv_phb4_msi_ops = {
1105     .read = pnv_phb4_msi_read,
1106     .write = pnv_phb4_msi_write,
1107     .endianness = DEVICE_LITTLE_ENDIAN
1108 };
1109 
1110 static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
1111 {
1112     PnvPhb4DMASpace *ds;
1113 
1114     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
1115         if (ds->bus == bus && ds->devfn == devfn) {
1116             break;
1117         }
1118     }
1119     return ds;
1120 }
1121 
1122 static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1123 {
1124     PnvPHB4 *phb = opaque;
1125     PnvPhb4DMASpace *ds;
1126     char name[32];
1127 
1128     ds = pnv_phb4_dma_find(phb, bus, devfn);
1129 
1130     if (ds == NULL) {
1131         ds = g_malloc0(sizeof(PnvPhb4DMASpace));
1132         ds->bus = bus;
1133         ds->devfn = devfn;
1134         ds->pe_num = PHB_INVALID_PE;
1135         ds->phb = phb;
1136         snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
1137                  phb->phb_id);
1138         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
1139                                  TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1140                                  OBJECT(phb), name, UINT64_MAX);
1141         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
1142                            name);
1143         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1144                               ds, "msi32", 0x10000);
1145         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1146                               ds, "msi64", 0x100000);
1147         pnv_phb4_update_msi_regions(ds);
1148 
1149         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
1150     }
1151     return &ds->dma_as;
1152 }
1153 
1154 static void pnv_phb4_instance_init(Object *obj)
1155 {
1156     PnvPHB4 *phb = PNV_PHB4(obj);
1157 
1158     QLIST_INIT(&phb->dma_spaces);
1159 
1160     /* XIVE interrupt source object */
1161     object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE);
1162 
1163     /* Root Port */
1164     object_initialize_child(obj, "root", &phb->root, TYPE_PNV_PHB4_ROOT_PORT);
1165 
1166     qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
1167     qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
1168 }
1169 
1170 static void pnv_phb4_realize(DeviceState *dev, Error **errp)
1171 {
1172     PnvPHB4 *phb = PNV_PHB4(dev);
1173     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
1174     XiveSource *xsrc = &phb->xsrc;
1175     int nr_irqs;
1176     char name[32];
1177 
1178     assert(phb->stack);
1179 
1180     /* Set the "big_phb" flag */
1181     phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
1182 
1183     /* Controller Registers */
1184     snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
1185              phb->phb_id);
1186     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
1187                           name, 0x2000);
1188 
1189     /*
1190      * PHB4 doesn't support IO space. However, qemu gets very upset if
1191      * we don't have an IO region to anchor IO BARs onto so we just
1192      * initialize one which we never hook up to anything
1193      */
1194 
1195     snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
1196              phb->phb_id);
1197     memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
1198 
1199     snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
1200              phb->phb_id);
1201     memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
1202                        PCI_MMIO_TOTAL_SIZE);
1203 
1204     pci->bus = pci_register_root_bus(dev, dev->id,
1205                                      pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
1206                                      &phb->pci_mmio, &phb->pci_io,
1207                                      0, 4, TYPE_PNV_PHB4_ROOT_BUS);
1208     pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
1209     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
1210 
1211     /* Add a single Root port */
1212     qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
1213     qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
1214     qdev_realize(DEVICE(&phb->root), BUS(pci->bus), &error_fatal);
1215 
1216     /* Setup XIVE Source */
1217     if (phb->big_phb) {
1218         nr_irqs = PNV_PHB4_MAX_INTs;
1219     } else {
1220         nr_irqs = PNV_PHB4_MAX_INTs >> 1;
1221     }
1222     object_property_set_int(OBJECT(xsrc), "nr-irqs", nr_irqs, &error_fatal);
1223     object_property_set_link(OBJECT(xsrc), "xive", OBJECT(phb), &error_fatal);
1224     if (!qdev_realize(DEVICE(xsrc), NULL, errp)) {
1225         return;
1226     }
1227 
1228     pnv_phb4_update_xsrc(phb);
1229 
1230     phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
1231 }
1232 
1233 static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
1234                                           PCIBus *rootbus)
1235 {
1236     PnvPHB4 *phb = PNV_PHB4(host_bridge);
1237 
1238     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1239              phb->chip_id, phb->phb_id);
1240     return phb->bus_path;
1241 }
1242 
1243 static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno)
1244 {
1245     PnvPHB4 *phb = PNV_PHB4(xf);
1246     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1247     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1248     uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
1249     MemTxResult result;
1250 
1251     trace_pnv_phb4_xive_notify(notif_port, data);
1252 
1253     address_space_stq_be(&address_space_memory, notif_port, data,
1254                          MEMTXATTRS_UNSPECIFIED, &result);
1255     if (result != MEMTX_OK) {
1256         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
1257         return;
1258     }
1259 }
1260 
1261 static Property pnv_phb4_properties[] = {
1262         DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
1263         DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
1264         DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
1265         DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
1266                          PnvPhb4PecStack *),
1267         DEFINE_PROP_END_OF_LIST(),
1268 };
1269 
1270 static void pnv_phb4_class_init(ObjectClass *klass, void *data)
1271 {
1272     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1273     DeviceClass *dc = DEVICE_CLASS(klass);
1274     XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
1275 
1276     hc->root_bus_path   = pnv_phb4_root_bus_path;
1277     dc->realize         = pnv_phb4_realize;
1278     device_class_set_props(dc, pnv_phb4_properties);
1279     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1280     dc->user_creatable  = false;
1281 
1282     xfc->notify         = pnv_phb4_xive_notify;
1283 }
1284 
1285 static const TypeInfo pnv_phb4_type_info = {
1286     .name          = TYPE_PNV_PHB4,
1287     .parent        = TYPE_PCIE_HOST_BRIDGE,
1288     .instance_init = pnv_phb4_instance_init,
1289     .instance_size = sizeof(PnvPHB4),
1290     .class_init    = pnv_phb4_class_init,
1291     .interfaces = (InterfaceInfo[]) {
1292             { TYPE_XIVE_NOTIFIER },
1293             { },
1294     }
1295 };
1296 
1297 static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
1298 {
1299     BusClass *k = BUS_CLASS(klass);
1300 
1301     /*
1302      * PHB4 has only a single root complex. Enforce the limit on the
1303      * parent bus
1304      */
1305     k->max_dev = 1;
1306 }
1307 
1308 static const TypeInfo pnv_phb4_root_bus_info = {
1309     .name = TYPE_PNV_PHB4_ROOT_BUS,
1310     .parent = TYPE_PCIE_BUS,
1311     .class_init = pnv_phb4_root_bus_class_init,
1312     .interfaces = (InterfaceInfo[]) {
1313         { INTERFACE_PCIE_DEVICE },
1314         { }
1315     },
1316 };
1317 
1318 static void pnv_phb4_root_port_reset(DeviceState *dev)
1319 {
1320     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1321     PCIDevice *d = PCI_DEVICE(dev);
1322     uint8_t *conf = d->config;
1323 
1324     rpc->parent_reset(dev);
1325 
1326     pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
1327                                PCI_IO_RANGE_MASK & 0xff);
1328     pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
1329                                  PCI_IO_RANGE_MASK & 0xff);
1330     pci_set_word(conf + PCI_MEMORY_BASE, 0);
1331     pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
1332     pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
1333     pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
1334     pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
1335     pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
1336 }
1337 
1338 static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
1339 {
1340     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1341     Error *local_err = NULL;
1342 
1343     rpc->parent_realize(dev, &local_err);
1344     if (local_err) {
1345         error_propagate(errp, local_err);
1346         return;
1347     }
1348 }
1349 
1350 static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
1351 {
1352     DeviceClass *dc = DEVICE_CLASS(klass);
1353     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1354     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1355 
1356     dc->desc     = "IBM PHB4 PCIE Root Port";
1357     dc->user_creatable = false;
1358 
1359     device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
1360                                     &rpc->parent_realize);
1361     device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
1362                                   &rpc->parent_reset);
1363 
1364     k->vendor_id = PCI_VENDOR_ID_IBM;
1365     k->device_id = PNV_PHB4_DEVICE_ID;
1366     k->revision  = 0;
1367 
1368     rpc->exp_offset = 0x48;
1369     rpc->aer_offset = 0x100;
1370 
1371     dc->reset = &pnv_phb4_root_port_reset;
1372 }
1373 
1374 static const TypeInfo pnv_phb4_root_port_info = {
1375     .name          = TYPE_PNV_PHB4_ROOT_PORT,
1376     .parent        = TYPE_PCIE_ROOT_PORT,
1377     .instance_size = sizeof(PnvPHB4RootPort),
1378     .class_init    = pnv_phb4_root_port_class_init,
1379 };
1380 
1381 static void pnv_phb4_register_types(void)
1382 {
1383     type_register_static(&pnv_phb4_root_bus_info);
1384     type_register_static(&pnv_phb4_root_port_info);
1385     type_register_static(&pnv_phb4_type_info);
1386     type_register_static(&pnv_phb4_iommu_memory_region_info);
1387 }
1388 
1389 type_init(pnv_phb4_register_types);
1390 
1391 void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
1392 {
1393     PnvPHB4 *phb = &stack->phb;
1394 
1395     /* Unmap first always */
1396     if (memory_region_is_mapped(&phb->mr_regs)) {
1397         memory_region_del_subregion(&stack->phbbar, &phb->mr_regs);
1398     }
1399     if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
1400         memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio);
1401     }
1402 
1403     /* Map registers if enabled */
1404     if (memory_region_is_mapped(&stack->phbbar)) {
1405         memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs);
1406     }
1407 
1408     /* Map ESB if enabled */
1409     if (memory_region_is_mapped(&stack->intbar)) {
1410         memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio);
1411     }
1412 
1413     /* Check/update m32 */
1414     pnv_phb4_check_all_mbt(phb);
1415 }
1416 
1417 void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
1418 {
1419     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1420 
1421     monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n",
1422                    phb->chip_id, phb->phb_id,
1423                    offset, offset + phb->xsrc.nr_irqs - 1);
1424     xive_source_pic_print_info(&phb->xsrc, 0, mon);
1425 }
1426