xref: /openbmc/qemu/hw/pci-host/pnv_phb4.c (revision 93dd625f)
1 /*
2  * QEMU PowerPC PowerNV (POWER9) PHB4 model
3  *
4  * Copyright (c) 2018-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "qemu-common.h"
14 #include "monitor/monitor.h"
15 #include "target/ppc/cpu.h"
16 #include "hw/pci-host/pnv_phb4_regs.h"
17 #include "hw/pci-host/pnv_phb4.h"
18 #include "hw/pci/pcie_host.h"
19 #include "hw/pci/pcie_port.h"
20 #include "hw/ppc/pnv.h"
21 #include "hw/ppc/pnv_xscom.h"
22 #include "hw/irq.h"
23 #include "hw/qdev-properties.h"
24 
25 #define phb_error(phb, fmt, ...)                                        \
26     qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",            \
27                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
28 
29 /*
30  * QEMU version of the GETFIELD/SETFIELD macros
31  *
32  * These are common with the PnvXive model.
33  */
34 static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
35 {
36     return (word & mask) >> ctz64(mask);
37 }
38 
39 static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
40                                 uint64_t value)
41 {
42     return (word & ~mask) | ((value << ctz64(mask)) & mask);
43 }
44 
45 static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
46 {
47     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
48     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
49     uint8_t bus, devfn;
50 
51     if (!(addr >> 63)) {
52         return NULL;
53     }
54     bus = (addr >> 52) & 0xff;
55     devfn = (addr >> 44) & 0xff;
56 
57     /* We don't access the root complex this way */
58     if (bus == 0 && devfn == 0) {
59         return NULL;
60     }
61     return pci_find_device(pci->bus, bus, devfn);
62 }
63 
64 /*
65  * The CONFIG_DATA register expects little endian accesses, but as the
66  * region is big endian, we have to swap the value.
67  */
68 static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
69                                   unsigned size, uint64_t val)
70 {
71     uint32_t cfg_addr, limit;
72     PCIDevice *pdev;
73 
74     pdev = pnv_phb4_find_cfg_dev(phb);
75     if (!pdev) {
76         return;
77     }
78     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
79     cfg_addr |= off;
80     limit = pci_config_size(pdev);
81     if (limit <= cfg_addr) {
82         /*
83          * conventional pci device can be behind pcie-to-pci bridge.
84          * 256 <= addr < 4K has no effects.
85          */
86         return;
87     }
88     switch (size) {
89     case 1:
90         break;
91     case 2:
92         val = bswap16(val);
93         break;
94     case 4:
95         val = bswap32(val);
96         break;
97     default:
98         g_assert_not_reached();
99     }
100     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
101 }
102 
103 static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
104                                      unsigned size)
105 {
106     uint32_t cfg_addr, limit;
107     PCIDevice *pdev;
108     uint64_t val;
109 
110     pdev = pnv_phb4_find_cfg_dev(phb);
111     if (!pdev) {
112         return ~0ull;
113     }
114     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
115     cfg_addr |= off;
116     limit = pci_config_size(pdev);
117     if (limit <= cfg_addr) {
118         /*
119          * conventional pci device can be behind pcie-to-pci bridge.
120          * 256 <= addr < 4K has no effects.
121          */
122         return ~0ull;
123     }
124     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
125     switch (size) {
126     case 1:
127         return val;
128     case 2:
129         return bswap16(val);
130     case 4:
131         return bswap32(val);
132     default:
133         g_assert_not_reached();
134     }
135 }
136 
137 /*
138  * Root complex register accesses are memory mapped.
139  */
140 static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
141                                      unsigned size, uint64_t val)
142 {
143     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
144     PCIDevice *pdev;
145 
146     if (size != 4) {
147         phb_error(phb, "rc_config_write invalid size %d\n", size);
148         return;
149     }
150 
151     pdev = pci_find_device(pci->bus, 0, 0);
152     assert(pdev);
153 
154     pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
155                                  bswap32(val), 4);
156 }
157 
158 static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
159                                         unsigned size)
160 {
161     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
162     PCIDevice *pdev;
163     uint64_t val;
164 
165     if (size != 4) {
166         phb_error(phb, "rc_config_read invalid size %d\n", size);
167         return ~0ull;
168     }
169 
170     pdev = pci_find_device(pci->bus, 0, 0);
171     assert(pdev);
172 
173     val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
174     return bswap32(val);
175 }
176 
177 static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
178 {
179     uint64_t base, start, size, mbe0, mbe1;
180     MemoryRegion *parent;
181     char name[64];
182 
183     /* Unmap first */
184     if (memory_region_is_mapped(&phb->mr_mmio[index])) {
185         /* Should we destroy it in RCU friendly way... ? */
186         memory_region_del_subregion(phb->mr_mmio[index].container,
187                                     &phb->mr_mmio[index]);
188     }
189 
190     /* Get table entry */
191     mbe0 = phb->ioda_MBT[(index << 1)];
192     mbe1 = phb->ioda_MBT[(index << 1) + 1];
193 
194     if (!(mbe0 & IODA3_MBT0_ENABLE)) {
195         return;
196     }
197 
198     /* Grab geometry from registers */
199     base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
200     size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
201     size |= 0xff00000000000000ull;
202     size = ~size + 1;
203 
204     /* Calculate PCI side start address based on M32/M64 window type */
205     if (mbe0 & IODA3_MBT0_TYPE_M32) {
206         start = phb->regs[PHB_M32_START_ADDR >> 3];
207         if ((start + size) > 0x100000000ull) {
208             phb_error(phb, "M32 set beyond 4GB boundary !");
209             size = 0x100000000 - start;
210         }
211     } else {
212         start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
213     }
214 
215     /* TODO: Figure out how to implemet/decode AOMASK */
216 
217     /* Check if it matches an enabled MMIO region in the PEC stack */
218     if (memory_region_is_mapped(&phb->stack->mmbar0) &&
219         base >= phb->stack->mmio0_base &&
220         (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
221         parent = &phb->stack->mmbar0;
222         base -= phb->stack->mmio0_base;
223     } else if (memory_region_is_mapped(&phb->stack->mmbar1) &&
224         base >= phb->stack->mmio1_base &&
225         (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
226         parent = &phb->stack->mmbar1;
227         base -= phb->stack->mmio1_base;
228     } else {
229         phb_error(phb, "PHB MBAR %d out of parent bounds", index);
230         return;
231     }
232 
233     /* Create alias (better name ?) */
234     snprintf(name, sizeof(name), "phb4-mbar%d", index);
235     memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
236                              &phb->pci_mmio, start, size);
237     memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
238 }
239 
240 static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
241 {
242     uint64_t i;
243     uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
244         PNV_PHB4_MIN_MMIO_WINDOWS;
245 
246     for (i = 0; i < num_windows; i++) {
247         pnv_phb4_check_mbt(phb, i);
248     }
249 }
250 
251 static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
252                                       unsigned *out_table, unsigned *out_idx)
253 {
254     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
255     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
256     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
257     unsigned int mask;
258     uint64_t *tptr = NULL;
259 
260     switch (table) {
261     case IODA3_TBL_LIST:
262         tptr = phb->ioda_LIST;
263         mask = 7;
264         break;
265     case IODA3_TBL_MIST:
266         tptr = phb->ioda_MIST;
267         mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
268         mask -= 1;
269         break;
270     case IODA3_TBL_RCAM:
271         mask = phb->big_phb ? 127 : 63;
272         break;
273     case IODA3_TBL_MRT:
274         mask = phb->big_phb ? 15 : 7;
275         break;
276     case IODA3_TBL_PESTA:
277     case IODA3_TBL_PESTB:
278         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
279         mask -= 1;
280         break;
281     case IODA3_TBL_TVT:
282         tptr = phb->ioda_TVT;
283         mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
284         mask -= 1;
285         break;
286     case IODA3_TBL_TCR:
287     case IODA3_TBL_TDR:
288         mask = phb->big_phb ? 1023 : 511;
289         break;
290     case IODA3_TBL_MBT:
291         tptr = phb->ioda_MBT;
292         mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
293         mask -= 1;
294         break;
295     case IODA3_TBL_MDT:
296         tptr = phb->ioda_MDT;
297         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
298         mask -= 1;
299         break;
300     case IODA3_TBL_PEEV:
301         tptr = phb->ioda_PEEV;
302         mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
303         mask -= 1;
304         break;
305     default:
306         phb_error(phb, "invalid IODA table %d", table);
307         return NULL;
308     }
309     index &= mask;
310     if (out_idx) {
311         *out_idx = index;
312     }
313     if (out_table) {
314         *out_table = table;
315     }
316     if (tptr) {
317         tptr += index;
318     }
319     if (adreg & PHB_IODA_AD_AUTOINC) {
320         index = (index + 1) & mask;
321         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
322     }
323 
324     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
325     return tptr;
326 }
327 
328 static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
329 {
330     unsigned table, idx;
331     uint64_t *tptr;
332 
333     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
334     if (!tptr) {
335         /* Special PESTA case */
336         if (table == IODA3_TBL_PESTA) {
337             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
338         } else if (table == IODA3_TBL_PESTB) {
339             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
340         }
341         /* Return 0 on unsupported tables, not ff's */
342         return 0;
343     }
344     return *tptr;
345 }
346 
347 static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
348 {
349     unsigned table, idx;
350     uint64_t *tptr;
351 
352     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
353     if (!tptr) {
354         /* Special PESTA case */
355         if (table == IODA3_TBL_PESTA) {
356             phb->ioda_PEST_AB[idx] &= ~1;
357             phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
358         } else if (table == IODA3_TBL_PESTB) {
359             phb->ioda_PEST_AB[idx] &= ~2;
360             phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
361         }
362         return;
363     }
364 
365     /* Handle side effects */
366     switch (table) {
367     case IODA3_TBL_LIST:
368         break;
369     case IODA3_TBL_MIST: {
370         /* Special mask for MIST partial write */
371         uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
372         uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
373         uint64_t v = *tptr;
374         if (mmask == 0) {
375             mmask = 0xf;
376         }
377         if (mmask & 8) {
378             v &= 0x0000ffffffffffffull;
379             v |= 0xcfff000000000000ull & val;
380         }
381         if (mmask & 4) {
382             v &= 0xffff0000ffffffffull;
383             v |= 0x0000cfff00000000ull & val;
384         }
385         if (mmask & 2) {
386             v &= 0xffffffff0000ffffull;
387             v |= 0x00000000cfff0000ull & val;
388         }
389         if (mmask & 1) {
390             v &= 0xffffffffffff0000ull;
391             v |= 0x000000000000cfffull & val;
392         }
393         *tptr = val;
394         break;
395     }
396     case IODA3_TBL_MBT:
397         *tptr = val;
398 
399         /* Copy accross the valid bit to the other half */
400         phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
401         phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
402 
403         /* Update mappings */
404         pnv_phb4_check_mbt(phb, idx >> 1);
405         break;
406     default:
407         *tptr = val;
408     }
409 }
410 
411 static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
412 {
413     PnvPhb4DMASpace *ds;
414 
415     /* Always invalidate all for now ... */
416     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
417         ds->pe_num = PHB_INVALID_PE;
418     }
419 }
420 
421 static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
422 {
423     uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
424 
425     if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
426         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
427             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
428                                         0xffff0000, &ds->msi32_mr);
429         }
430     } else {
431         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
432             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
433                                         &ds->msi32_mr);
434         }
435     }
436 
437     if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
438         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
439             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
440                                         (1ull << 60), &ds->msi64_mr);
441         }
442     } else {
443         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
444             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
445                                         &ds->msi64_mr);
446         }
447     }
448 }
449 
450 static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
451 {
452     PnvPhb4DMASpace *ds;
453 
454     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
455         pnv_phb4_update_msi_regions(ds);
456     }
457 }
458 
459 static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
460 {
461     int shift, flags, i, lsi_base;
462     XiveSource *xsrc = &phb->xsrc;
463 
464     /* The XIVE source characteristics can be set at run time */
465     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
466         shift = XIVE_ESB_64K;
467     } else {
468         shift = XIVE_ESB_4K;
469     }
470     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
471         flags = XIVE_SRC_STORE_EOI;
472     } else {
473         flags = 0;
474     }
475 
476     phb->xsrc.esb_shift = shift;
477     phb->xsrc.esb_flags = flags;
478 
479     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
480     lsi_base <<= 3;
481 
482     /* TODO: handle reset values of PHB_LSI_SRC_ID */
483     if (!lsi_base) {
484         return;
485     }
486 
487     /* TODO: need a xive_source_irq_reset_lsi() */
488     bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
489 
490     for (i = 0; i < xsrc->nr_irqs; i++) {
491         bool msi = (i < lsi_base || i >= (lsi_base + 8));
492         if (!msi) {
493             xive_source_irq_set_lsi(xsrc, i);
494         }
495     }
496 }
497 
498 static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
499                                unsigned size)
500 {
501     PnvPHB4 *phb = PNV_PHB4(opaque);
502     bool changed;
503 
504     /* Special case outbound configuration data */
505     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
506         pnv_phb4_config_write(phb, off & 0x3, size, val);
507         return;
508     }
509 
510     /* Special case RC configuration space */
511     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
512         pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
513         return;
514     }
515 
516     /* Other registers are 64-bit only */
517     if (size != 8 || off & 0x7) {
518         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
519                    off, size);
520         return;
521     }
522 
523     /* Handle masking */
524     switch (off) {
525     case PHB_LSI_SOURCE_ID:
526         val &= PHB_LSI_SRC_ID;
527         break;
528     case PHB_M64_UPPER_BITS:
529         val &= 0xff00000000000000ull;
530         break;
531     /* TCE Kill */
532     case PHB_TCE_KILL:
533         /* Clear top 3 bits which HW does to indicate successful queuing */
534         val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
535         break;
536     case PHB_Q_DMA_R:
537         /*
538          * This is enough logic to make SW happy but we aren't
539          * actually quiescing the DMAs
540          */
541         if (val & PHB_Q_DMA_R_AUTORESET) {
542             val = 0;
543         } else {
544             val &= PHB_Q_DMA_R_QUIESCE_DMA;
545         }
546         break;
547     /* LEM stuff */
548     case PHB_LEM_FIR_AND_MASK:
549         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
550         return;
551     case PHB_LEM_FIR_OR_MASK:
552         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
553         return;
554     case PHB_LEM_ERROR_AND_MASK:
555         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
556         return;
557     case PHB_LEM_ERROR_OR_MASK:
558         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
559         return;
560     case PHB_LEM_WOF:
561         val = 0;
562         break;
563     /* TODO: More regs ..., maybe create a table with masks... */
564 
565     /* Read only registers */
566     case PHB_CPU_LOADSTORE_STATUS:
567     case PHB_ETU_ERR_SUMMARY:
568     case PHB_PHB4_GEN_CAP:
569     case PHB_PHB4_TCE_CAP:
570     case PHB_PHB4_IRQ_CAP:
571     case PHB_PHB4_EEH_CAP:
572         return;
573     }
574 
575     /* Record whether it changed */
576     changed = phb->regs[off >> 3] != val;
577 
578     /* Store in register cache first */
579     phb->regs[off >> 3] = val;
580 
581     /* Handle side effects */
582     switch (off) {
583     case PHB_PHB4_CONFIG:
584         if (changed) {
585             pnv_phb4_update_all_msi_regions(phb);
586         }
587         break;
588     case PHB_M32_START_ADDR:
589     case PHB_M64_UPPER_BITS:
590         if (changed) {
591             pnv_phb4_check_all_mbt(phb);
592         }
593         break;
594 
595     /* IODA table accesses */
596     case PHB_IODA_DATA0:
597         pnv_phb4_ioda_write(phb, val);
598         break;
599 
600     /* RTC invalidation */
601     case PHB_RTC_INVALIDATE:
602         pnv_phb4_rtc_invalidate(phb, val);
603         break;
604 
605     /* PHB Control (Affects XIVE source) */
606     case PHB_CTRLR:
607     case PHB_LSI_SOURCE_ID:
608         pnv_phb4_update_xsrc(phb);
609         break;
610 
611     /* Silent simple writes */
612     case PHB_ASN_CMPM:
613     case PHB_CONFIG_ADDRESS:
614     case PHB_IODA_ADDR:
615     case PHB_TCE_KILL:
616     case PHB_TCE_SPEC_CTL:
617     case PHB_PEST_BAR:
618     case PHB_PELTV_BAR:
619     case PHB_RTT_BAR:
620     case PHB_LEM_FIR_ACCUM:
621     case PHB_LEM_ERROR_MASK:
622     case PHB_LEM_ACTION0:
623     case PHB_LEM_ACTION1:
624     case PHB_TCE_TAG_ENABLE:
625     case PHB_INT_NOTIFY_ADDR:
626     case PHB_INT_NOTIFY_INDEX:
627     case PHB_DMARD_SYNC:
628        break;
629 
630     /* Noise on anything else */
631     default:
632         qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
633                       off, val);
634     }
635 }
636 
637 static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
638 {
639     PnvPHB4 *phb = PNV_PHB4(opaque);
640     uint64_t val;
641 
642     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
643         return pnv_phb4_config_read(phb, off & 0x3, size);
644     }
645 
646     /* Special case RC configuration space */
647     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
648         return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
649     }
650 
651     /* Other registers are 64-bit only */
652     if (size != 8 || off & 0x7) {
653         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
654                    off, size);
655         return ~0ull;
656     }
657 
658     /* Default read from cache */
659     val = phb->regs[off >> 3];
660 
661     switch (off) {
662     case PHB_VERSION:
663         return phb->version;
664 
665         /* Read-only */
666     case PHB_PHB4_GEN_CAP:
667         return 0xe4b8000000000000ull;
668     case PHB_PHB4_TCE_CAP:
669         return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
670     case PHB_PHB4_IRQ_CAP:
671         return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
672     case PHB_PHB4_EEH_CAP:
673         return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
674 
675     /* IODA table accesses */
676     case PHB_IODA_DATA0:
677         return pnv_phb4_ioda_read(phb);
678 
679     /* Link training always appears trained */
680     case PHB_PCIE_DLP_TRAIN_CTL:
681         /* TODO: Do something sensible with speed ? */
682         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
683 
684     /* DMA read sync: make it look like it's complete */
685     case PHB_DMARD_SYNC:
686         return PHB_DMARD_SYNC_COMPLETE;
687 
688     /* Silent simple reads */
689     case PHB_LSI_SOURCE_ID:
690     case PHB_CPU_LOADSTORE_STATUS:
691     case PHB_ASN_CMPM:
692     case PHB_PHB4_CONFIG:
693     case PHB_M32_START_ADDR:
694     case PHB_CONFIG_ADDRESS:
695     case PHB_IODA_ADDR:
696     case PHB_RTC_INVALIDATE:
697     case PHB_TCE_KILL:
698     case PHB_TCE_SPEC_CTL:
699     case PHB_PEST_BAR:
700     case PHB_PELTV_BAR:
701     case PHB_RTT_BAR:
702     case PHB_M64_UPPER_BITS:
703     case PHB_CTRLR:
704     case PHB_LEM_FIR_ACCUM:
705     case PHB_LEM_ERROR_MASK:
706     case PHB_LEM_ACTION0:
707     case PHB_LEM_ACTION1:
708     case PHB_TCE_TAG_ENABLE:
709     case PHB_INT_NOTIFY_ADDR:
710     case PHB_INT_NOTIFY_INDEX:
711     case PHB_Q_DMA_R:
712     case PHB_ETU_ERR_SUMMARY:
713         break;
714 
715     /* Noise on anything else */
716     default:
717         qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
718                       off, val);
719     }
720     return val;
721 }
722 
723 static const MemoryRegionOps pnv_phb4_reg_ops = {
724     .read = pnv_phb4_reg_read,
725     .write = pnv_phb4_reg_write,
726     .valid.min_access_size = 1,
727     .valid.max_access_size = 8,
728     .impl.min_access_size = 1,
729     .impl.max_access_size = 8,
730     .endianness = DEVICE_BIG_ENDIAN,
731 };
732 
733 static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
734 {
735     PnvPHB4 *phb = PNV_PHB4(opaque);
736     uint32_t reg = addr >> 3;
737     uint64_t val;
738     hwaddr offset;
739 
740     switch (reg) {
741     case PHB_SCOM_HV_IND_ADDR:
742         return phb->scom_hv_ind_addr_reg;
743 
744     case PHB_SCOM_HV_IND_DATA:
745         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
746             phb_error(phb, "Invalid indirect address");
747             return ~0ull;
748         }
749         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
750         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
751         val = pnv_phb4_reg_read(phb, offset, size);
752         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
753             offset += size;
754             offset &= 0x3fff;
755             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
756                                                  phb->scom_hv_ind_addr_reg,
757                                                  offset);
758         }
759         return val;
760     case PHB_SCOM_ETU_LEM_FIR:
761     case PHB_SCOM_ETU_LEM_FIR_AND:
762     case PHB_SCOM_ETU_LEM_FIR_OR:
763     case PHB_SCOM_ETU_LEM_FIR_MSK:
764     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
765     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
766     case PHB_SCOM_ETU_LEM_ACT0:
767     case PHB_SCOM_ETU_LEM_ACT1:
768     case PHB_SCOM_ETU_LEM_WOF:
769         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
770         return pnv_phb4_reg_read(phb, offset, size);
771     case PHB_SCOM_ETU_PMON_CONFIG:
772     case PHB_SCOM_ETU_PMON_CTR0:
773     case PHB_SCOM_ETU_PMON_CTR1:
774     case PHB_SCOM_ETU_PMON_CTR2:
775     case PHB_SCOM_ETU_PMON_CTR3:
776         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
777         return pnv_phb4_reg_read(phb, offset, size);
778 
779     default:
780         qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
781         return ~0ull;
782     }
783 }
784 
785 static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
786                                  uint64_t val, unsigned size)
787 {
788     PnvPHB4 *phb = PNV_PHB4(opaque);
789     uint32_t reg = addr >> 3;
790     hwaddr offset;
791 
792     switch (reg) {
793     case PHB_SCOM_HV_IND_ADDR:
794         phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
795         break;
796     case PHB_SCOM_HV_IND_DATA:
797         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
798             phb_error(phb, "Invalid indirect address");
799             break;
800         }
801         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
802         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
803         pnv_phb4_reg_write(phb, offset, val, size);
804         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
805             offset += size;
806             offset &= 0x3fff;
807             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
808                                                  phb->scom_hv_ind_addr_reg,
809                                                  offset);
810         }
811         break;
812     case PHB_SCOM_ETU_LEM_FIR:
813     case PHB_SCOM_ETU_LEM_FIR_AND:
814     case PHB_SCOM_ETU_LEM_FIR_OR:
815     case PHB_SCOM_ETU_LEM_FIR_MSK:
816     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
817     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
818     case PHB_SCOM_ETU_LEM_ACT0:
819     case PHB_SCOM_ETU_LEM_ACT1:
820     case PHB_SCOM_ETU_LEM_WOF:
821         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
822         pnv_phb4_reg_write(phb, offset, val, size);
823         break;
824     case PHB_SCOM_ETU_PMON_CONFIG:
825     case PHB_SCOM_ETU_PMON_CTR0:
826     case PHB_SCOM_ETU_PMON_CTR1:
827     case PHB_SCOM_ETU_PMON_CTR2:
828     case PHB_SCOM_ETU_PMON_CTR3:
829         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
830         pnv_phb4_reg_write(phb, offset, val, size);
831         break;
832     default:
833         qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
834                       "=%"PRIx64"\n", addr, val);
835     }
836 }
837 
838 const MemoryRegionOps pnv_phb4_xscom_ops = {
839     .read = pnv_phb4_xscom_read,
840     .write = pnv_phb4_xscom_write,
841     .valid.min_access_size = 8,
842     .valid.max_access_size = 8,
843     .impl.min_access_size = 8,
844     .impl.max_access_size = 8,
845     .endianness = DEVICE_BIG_ENDIAN,
846 };
847 
848 static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
849 {
850     /* Check that out properly ... */
851     return irq_num & 3;
852 }
853 
854 static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
855 {
856     PnvPHB4 *phb = PNV_PHB4(opaque);
857     uint32_t lsi_base;
858 
859     /* LSI only ... */
860     if (irq_num > 3) {
861         phb_error(phb, "IRQ %x is not an LSI", irq_num);
862     }
863     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
864     lsi_base <<= 3;
865     qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
866 }
867 
868 static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
869 {
870     uint64_t rtt, addr;
871     uint16_t rte;
872     int bus_num;
873     int num_PEs;
874 
875     /* Already resolved ? */
876     if (ds->pe_num != PHB_INVALID_PE) {
877         return true;
878     }
879 
880     /* We need to lookup the RTT */
881     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
882     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
883         phb_error(ds->phb, "DMA with RTT BAR disabled !");
884         /* Set error bits ? fence ? ... */
885         return false;
886     }
887 
888     /* Read RTE */
889     bus_num = pci_bus_num(ds->bus);
890     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
891     addr += 2 * ((bus_num << 8) | ds->devfn);
892     if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
893         phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
894         /* Set error bits ? fence ? ... */
895         return false;
896     }
897     rte = be16_to_cpu(rte);
898 
899     /* Fail upon reading of invalid PE# */
900     num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
901     if (rte >= num_PEs) {
902         phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
903         rte &= num_PEs - 1;
904     }
905     ds->pe_num = rte;
906     return true;
907 }
908 
909 static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
910                                    bool is_write, uint64_t tve,
911                                    IOMMUTLBEntry *tlb)
912 {
913     uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
914     int32_t  lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
915     uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
916     uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
917 
918     /* Invalid levels */
919     if (lev > 4) {
920         phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
921         return;
922     }
923 
924     /* Invalid entry */
925     if (tts == 0) {
926         phb_error(ds->phb, "Access to invalid TVE");
927         return;
928     }
929 
930     /* IO Page Size of 0 means untranslated, else use TCEs */
931     if (tps == 0) {
932         /* TODO: Handle boundaries */
933 
934         /* Use 4k pages like q35 ... for now */
935         tlb->iova = addr & 0xfffffffffffff000ull;
936         tlb->translated_addr = addr & 0x0003fffffffff000ull;
937         tlb->addr_mask = 0xfffull;
938         tlb->perm = IOMMU_RW;
939     } else {
940         uint32_t tce_shift, tbl_shift, sh;
941         uint64_t base, taddr, tce, tce_mask;
942 
943         /* Address bits per bottom level TCE entry */
944         tce_shift = tps + 11;
945 
946         /* Address bits per table level */
947         tbl_shift = tts + 8;
948 
949         /* Top level table base address */
950         base = tta << 12;
951 
952         /* Total shift to first level */
953         sh = tbl_shift * lev + tce_shift;
954 
955         /* TODO: Limit to support IO page sizes */
956 
957         /* TODO: Multi-level untested */
958         while ((lev--) >= 0) {
959             /* Grab the TCE address */
960             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
961             if (dma_memory_read(&address_space_memory, taddr, &tce,
962                                 sizeof(tce))) {
963                 phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
964                 return;
965             }
966             tce = be64_to_cpu(tce);
967 
968             /* Check permission for indirect TCE */
969             if ((lev >= 0) && !(tce & 3)) {
970                 phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
971                 phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
972                            is_write ? 'W' : 'R', tve);
973                 phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
974                            tta, lev, tts, tps);
975                 return;
976             }
977             sh -= tbl_shift;
978             base = tce & ~0xfffull;
979         }
980 
981         /* We exit the loop with TCE being the final TCE */
982         tce_mask = ~((1ull << tce_shift) - 1);
983         tlb->iova = addr & tce_mask;
984         tlb->translated_addr = tce & tce_mask;
985         tlb->addr_mask = ~tce_mask;
986         tlb->perm = tce & 3;
987         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
988             phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
989             phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
990                        is_write ? 'W' : 'R', tve);
991             phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
992                        tta, lev, tts, tps);
993         }
994     }
995 }
996 
997 static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
998                                               hwaddr addr,
999                                               IOMMUAccessFlags flag,
1000                                               int iommu_idx)
1001 {
1002     PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
1003     int tve_sel;
1004     uint64_t tve, cfg;
1005     IOMMUTLBEntry ret = {
1006         .target_as = &address_space_memory,
1007         .iova = addr,
1008         .translated_addr = 0,
1009         .addr_mask = ~(hwaddr)0,
1010         .perm = IOMMU_NONE,
1011     };
1012 
1013     /* Resolve PE# */
1014     if (!pnv_phb4_resolve_pe(ds)) {
1015         phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1016                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1017         return ret;
1018     }
1019 
1020     /* Check top bits */
1021     switch (addr >> 60) {
1022     case 00:
1023         /* DMA or 32-bit MSI ? */
1024         cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
1025         if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
1026             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
1027             phb_error(ds->phb, "xlate on 32-bit MSI region");
1028             return ret;
1029         }
1030         /* Choose TVE XXX Use PHB4 Control Register */
1031         tve_sel = (addr >> 59) & 1;
1032         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
1033         pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
1034         break;
1035     case 01:
1036         phb_error(ds->phb, "xlate on 64-bit MSI region");
1037         break;
1038     default:
1039         phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
1040     }
1041     return ret;
1042 }
1043 
1044 #define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
1045 #define PNV_PHB4_IOMMU_MEMORY_REGION(obj) \
1046     OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
1047 
1048 static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
1049                                                     void *data)
1050 {
1051     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1052 
1053     imrc->translate = pnv_phb4_translate_iommu;
1054 }
1055 
1056 static const TypeInfo pnv_phb4_iommu_memory_region_info = {
1057     .parent = TYPE_IOMMU_MEMORY_REGION,
1058     .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1059     .class_init = pnv_phb4_iommu_memory_region_class_init,
1060 };
1061 
1062 /*
1063  * MSI/MSIX memory region implementation.
1064  * The handler handles both MSI and MSIX.
1065  */
1066 static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
1067                                uint64_t data, unsigned size)
1068 {
1069     PnvPhb4DMASpace *ds = opaque;
1070     PnvPHB4 *phb = ds->phb;
1071 
1072     uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
1073 
1074     /* Resolve PE# */
1075     if (!pnv_phb4_resolve_pe(ds)) {
1076         phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1077                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1078         return;
1079     }
1080 
1081     /* TODO: Check it doesn't collide with LSIs */
1082     if (src >= phb->xsrc.nr_irqs) {
1083         phb_error(phb, "MSI %d out of bounds", src);
1084         return;
1085     }
1086 
1087     /* TODO: check PE/MSI assignement */
1088 
1089     qemu_irq_pulse(phb->qirqs[src]);
1090 }
1091 
1092 /* There is no .read as the read result is undefined by PCI spec */
1093 static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
1094 {
1095     PnvPhb4DMASpace *ds = opaque;
1096 
1097     phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
1098     return -1;
1099 }
1100 
1101 static const MemoryRegionOps pnv_phb4_msi_ops = {
1102     .read = pnv_phb4_msi_read,
1103     .write = pnv_phb4_msi_write,
1104     .endianness = DEVICE_LITTLE_ENDIAN
1105 };
1106 
1107 static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
1108 {
1109     PnvPhb4DMASpace *ds;
1110 
1111     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
1112         if (ds->bus == bus && ds->devfn == devfn) {
1113             break;
1114         }
1115     }
1116     return ds;
1117 }
1118 
1119 static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1120 {
1121     PnvPHB4 *phb = opaque;
1122     PnvPhb4DMASpace *ds;
1123     char name[32];
1124 
1125     ds = pnv_phb4_dma_find(phb, bus, devfn);
1126 
1127     if (ds == NULL) {
1128         ds = g_malloc0(sizeof(PnvPhb4DMASpace));
1129         ds->bus = bus;
1130         ds->devfn = devfn;
1131         ds->pe_num = PHB_INVALID_PE;
1132         ds->phb = phb;
1133         snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
1134                  phb->phb_id);
1135         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
1136                                  TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1137                                  OBJECT(phb), name, UINT64_MAX);
1138         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
1139                            name);
1140         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1141                               ds, "msi32", 0x10000);
1142         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1143                               ds, "msi64", 0x100000);
1144         pnv_phb4_update_msi_regions(ds);
1145 
1146         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
1147     }
1148     return &ds->dma_as;
1149 }
1150 
1151 static void pnv_phb4_instance_init(Object *obj)
1152 {
1153     PnvPHB4 *phb = PNV_PHB4(obj);
1154 
1155     QLIST_INIT(&phb->dma_spaces);
1156 
1157     /* XIVE interrupt source object */
1158     object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE);
1159 
1160     /* Root Port */
1161     object_initialize_child(obj, "root", &phb->root, TYPE_PNV_PHB4_ROOT_PORT);
1162 
1163     qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
1164     qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
1165 }
1166 
1167 static void pnv_phb4_realize(DeviceState *dev, Error **errp)
1168 {
1169     PnvPHB4 *phb = PNV_PHB4(dev);
1170     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
1171     XiveSource *xsrc = &phb->xsrc;
1172     Error *local_err = NULL;
1173     int nr_irqs;
1174     char name[32];
1175 
1176     assert(phb->stack);
1177 
1178     /* Set the "big_phb" flag */
1179     phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
1180 
1181     /* Controller Registers */
1182     snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
1183              phb->phb_id);
1184     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
1185                           name, 0x2000);
1186 
1187     /*
1188      * PHB4 doesn't support IO space. However, qemu gets very upset if
1189      * we don't have an IO region to anchor IO BARs onto so we just
1190      * initialize one which we never hook up to anything
1191      */
1192 
1193     snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
1194              phb->phb_id);
1195     memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
1196 
1197     snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
1198              phb->phb_id);
1199     memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
1200                        PCI_MMIO_TOTAL_SIZE);
1201 
1202     pci->bus = pci_register_root_bus(dev, "root-bus",
1203                                      pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
1204                                      &phb->pci_mmio, &phb->pci_io,
1205                                      0, 4, TYPE_PNV_PHB4_ROOT_BUS);
1206     pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
1207 
1208     /* Add a single Root port */
1209     qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
1210     qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
1211     qdev_realize(DEVICE(&phb->root), BUS(pci->bus), &error_fatal);
1212 
1213     /* Setup XIVE Source */
1214     if (phb->big_phb) {
1215         nr_irqs = PNV_PHB4_MAX_INTs;
1216     } else {
1217         nr_irqs = PNV_PHB4_MAX_INTs >> 1;
1218     }
1219     object_property_set_int(OBJECT(xsrc), nr_irqs, "nr-irqs", &error_fatal);
1220     object_property_set_link(OBJECT(xsrc), OBJECT(phb), "xive", &error_fatal);
1221     qdev_realize(DEVICE(xsrc), NULL, &local_err);
1222     if (local_err) {
1223         error_propagate(errp, local_err);
1224         return;
1225     }
1226 
1227     pnv_phb4_update_xsrc(phb);
1228 
1229     phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
1230 }
1231 
1232 static void pnv_phb4_reset(DeviceState *dev)
1233 {
1234     PnvPHB4 *phb = PNV_PHB4(dev);
1235     PCIDevice *root_dev = PCI_DEVICE(&phb->root);
1236 
1237     /*
1238      * Configure PCI device id at reset using a property.
1239      */
1240     pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM);
1241     pci_config_set_device_id(root_dev->config, phb->device_id);
1242 }
1243 
1244 static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
1245                                           PCIBus *rootbus)
1246 {
1247     PnvPHB4 *phb = PNV_PHB4(host_bridge);
1248 
1249     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1250              phb->chip_id, phb->phb_id);
1251     return phb->bus_path;
1252 }
1253 
1254 static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno)
1255 {
1256     PnvPHB4 *phb = PNV_PHB4(xf);
1257     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1258     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1259     uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
1260     MemTxResult result;
1261 
1262     address_space_stq_be(&address_space_memory, notif_port, data,
1263                          MEMTXATTRS_UNSPECIFIED, &result);
1264     if (result != MEMTX_OK) {
1265         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
1266         return;
1267     }
1268 }
1269 
1270 static Property pnv_phb4_properties[] = {
1271         DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
1272         DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
1273         DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
1274         DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0),
1275         DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
1276                          PnvPhb4PecStack *),
1277         DEFINE_PROP_END_OF_LIST(),
1278 };
1279 
1280 static void pnv_phb4_class_init(ObjectClass *klass, void *data)
1281 {
1282     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1283     DeviceClass *dc = DEVICE_CLASS(klass);
1284     XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
1285 
1286     hc->root_bus_path   = pnv_phb4_root_bus_path;
1287     dc->realize         = pnv_phb4_realize;
1288     device_class_set_props(dc, pnv_phb4_properties);
1289     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1290     dc->user_creatable  = false;
1291     dc->reset           = pnv_phb4_reset;
1292 
1293     xfc->notify         = pnv_phb4_xive_notify;
1294 }
1295 
1296 static const TypeInfo pnv_phb4_type_info = {
1297     .name          = TYPE_PNV_PHB4,
1298     .parent        = TYPE_PCIE_HOST_BRIDGE,
1299     .instance_init = pnv_phb4_instance_init,
1300     .instance_size = sizeof(PnvPHB4),
1301     .class_init    = pnv_phb4_class_init,
1302     .interfaces = (InterfaceInfo[]) {
1303             { TYPE_XIVE_NOTIFIER },
1304             { },
1305     }
1306 };
1307 
1308 static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
1309 {
1310     BusClass *k = BUS_CLASS(klass);
1311 
1312     /*
1313      * PHB4 has only a single root complex. Enforce the limit on the
1314      * parent bus
1315      */
1316     k->max_dev = 1;
1317 }
1318 
1319 static const TypeInfo pnv_phb4_root_bus_info = {
1320     .name = TYPE_PNV_PHB4_ROOT_BUS,
1321     .parent = TYPE_PCIE_BUS,
1322     .class_init = pnv_phb4_root_bus_class_init,
1323     .interfaces = (InterfaceInfo[]) {
1324         { INTERFACE_PCIE_DEVICE },
1325         { }
1326     },
1327 };
1328 
1329 static void pnv_phb4_root_port_reset(DeviceState *dev)
1330 {
1331     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1332     PCIDevice *d = PCI_DEVICE(dev);
1333     uint8_t *conf = d->config;
1334 
1335     rpc->parent_reset(dev);
1336 
1337     pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
1338                                PCI_IO_RANGE_MASK & 0xff);
1339     pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
1340                                  PCI_IO_RANGE_MASK & 0xff);
1341     pci_set_word(conf + PCI_MEMORY_BASE, 0);
1342     pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
1343     pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
1344     pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
1345     pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
1346     pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
1347 }
1348 
1349 static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
1350 {
1351     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1352     Error *local_err = NULL;
1353 
1354     rpc->parent_realize(dev, &local_err);
1355     if (local_err) {
1356         error_propagate(errp, local_err);
1357         return;
1358     }
1359 }
1360 
1361 static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
1362 {
1363     DeviceClass *dc = DEVICE_CLASS(klass);
1364     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1365     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1366 
1367     dc->desc     = "IBM PHB4 PCIE Root Port";
1368     dc->user_creatable = false;
1369 
1370     device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
1371                                     &rpc->parent_realize);
1372     device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
1373                                   &rpc->parent_reset);
1374 
1375     k->vendor_id = PCI_VENDOR_ID_IBM;
1376     k->device_id = PNV_PHB4_DEVICE_ID;
1377     k->revision  = 0;
1378 
1379     rpc->exp_offset = 0x48;
1380     rpc->aer_offset = 0x100;
1381 
1382     dc->reset = &pnv_phb4_root_port_reset;
1383 }
1384 
1385 static const TypeInfo pnv_phb4_root_port_info = {
1386     .name          = TYPE_PNV_PHB4_ROOT_PORT,
1387     .parent        = TYPE_PCIE_ROOT_PORT,
1388     .instance_size = sizeof(PnvPHB4RootPort),
1389     .class_init    = pnv_phb4_root_port_class_init,
1390 };
1391 
1392 static void pnv_phb4_register_types(void)
1393 {
1394     type_register_static(&pnv_phb4_root_bus_info);
1395     type_register_static(&pnv_phb4_root_port_info);
1396     type_register_static(&pnv_phb4_type_info);
1397     type_register_static(&pnv_phb4_iommu_memory_region_info);
1398 }
1399 
1400 type_init(pnv_phb4_register_types);
1401 
1402 void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
1403 {
1404     PnvPHB4 *phb = &stack->phb;
1405 
1406     /* Unmap first always */
1407     if (memory_region_is_mapped(&phb->mr_regs)) {
1408         memory_region_del_subregion(&stack->phbbar, &phb->mr_regs);
1409     }
1410     if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
1411         memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio);
1412     }
1413 
1414     /* Map registers if enabled */
1415     if (memory_region_is_mapped(&stack->phbbar)) {
1416         memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs);
1417     }
1418 
1419     /* Map ESB if enabled */
1420     if (memory_region_is_mapped(&stack->intbar)) {
1421         memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio);
1422     }
1423 
1424     /* Check/update m32 */
1425     pnv_phb4_check_all_mbt(phb);
1426 }
1427 
1428 void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
1429 {
1430     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1431 
1432     monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n",
1433                    phb->chip_id, phb->phb_id,
1434                    offset, offset + phb->xsrc.nr_irqs - 1);
1435     xive_source_pic_print_info(&phb->xsrc, 0, mon);
1436 }
1437