xref: /openbmc/qemu/hw/pci-host/pnv_phb4.c (revision 8d99713b)
1 /*
2  * QEMU PowerPC PowerNV (POWER9) PHB4 model
3  *
4  * Copyright (c) 2018-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "qemu-common.h"
14 #include "monitor/monitor.h"
15 #include "target/ppc/cpu.h"
16 #include "hw/pci-host/pnv_phb4_regs.h"
17 #include "hw/pci-host/pnv_phb4.h"
18 #include "hw/pci/pcie_host.h"
19 #include "hw/pci/pcie_port.h"
20 #include "hw/ppc/pnv.h"
21 #include "hw/ppc/pnv_xscom.h"
22 #include "hw/irq.h"
23 #include "hw/qdev-properties.h"
24 #include "qom/object.h"
25 #include "trace.h"
26 
27 #define phb_error(phb, fmt, ...)                                        \
28     qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",            \
29                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
30 
31 #define phb_pec_error(pec, fmt, ...)                                    \
32     qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n",        \
33                   (pec)->chip_id, (pec)->index, ## __VA_ARGS__)
34 
35 /*
36  * QEMU version of the GETFIELD/SETFIELD macros
37  *
38  * These are common with the PnvXive model.
39  */
40 static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
41 {
42     return (word & mask) >> ctz64(mask);
43 }
44 
45 static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
46                                 uint64_t value)
47 {
48     return (word & ~mask) | ((value << ctz64(mask)) & mask);
49 }
50 
51 static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
52 {
53     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
54     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
55     uint8_t bus, devfn;
56 
57     if (!(addr >> 63)) {
58         return NULL;
59     }
60     bus = (addr >> 52) & 0xff;
61     devfn = (addr >> 44) & 0xff;
62 
63     /* We don't access the root complex this way */
64     if (bus == 0 && devfn == 0) {
65         return NULL;
66     }
67     return pci_find_device(pci->bus, bus, devfn);
68 }
69 
70 /*
71  * The CONFIG_DATA register expects little endian accesses, but as the
72  * region is big endian, we have to swap the value.
73  */
74 static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
75                                   unsigned size, uint64_t val)
76 {
77     uint32_t cfg_addr, limit;
78     PCIDevice *pdev;
79 
80     pdev = pnv_phb4_find_cfg_dev(phb);
81     if (!pdev) {
82         return;
83     }
84     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
85     cfg_addr |= off;
86     limit = pci_config_size(pdev);
87     if (limit <= cfg_addr) {
88         /*
89          * conventional pci device can be behind pcie-to-pci bridge.
90          * 256 <= addr < 4K has no effects.
91          */
92         return;
93     }
94     switch (size) {
95     case 1:
96         break;
97     case 2:
98         val = bswap16(val);
99         break;
100     case 4:
101         val = bswap32(val);
102         break;
103     default:
104         g_assert_not_reached();
105     }
106     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
107 }
108 
109 static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
110                                      unsigned size)
111 {
112     uint32_t cfg_addr, limit;
113     PCIDevice *pdev;
114     uint64_t val;
115 
116     pdev = pnv_phb4_find_cfg_dev(phb);
117     if (!pdev) {
118         return ~0ull;
119     }
120     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
121     cfg_addr |= off;
122     limit = pci_config_size(pdev);
123     if (limit <= cfg_addr) {
124         /*
125          * conventional pci device can be behind pcie-to-pci bridge.
126          * 256 <= addr < 4K has no effects.
127          */
128         return ~0ull;
129     }
130     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
131     switch (size) {
132     case 1:
133         return val;
134     case 2:
135         return bswap16(val);
136     case 4:
137         return bswap32(val);
138     default:
139         g_assert_not_reached();
140     }
141 }
142 
143 /*
144  * Root complex register accesses are memory mapped.
145  */
146 static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
147                                      unsigned size, uint64_t val)
148 {
149     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
150     PCIDevice *pdev;
151 
152     if (size != 4) {
153         phb_error(phb, "rc_config_write invalid size %d\n", size);
154         return;
155     }
156 
157     pdev = pci_find_device(pci->bus, 0, 0);
158     if (!pdev) {
159         phb_error(phb, "rc_config_write device not found\n");
160         return;
161     }
162 
163     pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
164                                  bswap32(val), 4);
165 }
166 
167 static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
168                                         unsigned size)
169 {
170     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
171     PCIDevice *pdev;
172     uint64_t val;
173 
174     if (size != 4) {
175         phb_error(phb, "rc_config_read invalid size %d\n", size);
176         return ~0ull;
177     }
178 
179     pdev = pci_find_device(pci->bus, 0, 0);
180     if (!pdev) {
181         phb_error(phb, "rc_config_read device not found\n");
182         return ~0ull;
183     }
184 
185     val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
186     return bswap32(val);
187 }
188 
189 static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
190 {
191     uint64_t base, start, size, mbe0, mbe1;
192     MemoryRegion *parent;
193     char name[64];
194 
195     /* Unmap first */
196     if (memory_region_is_mapped(&phb->mr_mmio[index])) {
197         /* Should we destroy it in RCU friendly way... ? */
198         memory_region_del_subregion(phb->mr_mmio[index].container,
199                                     &phb->mr_mmio[index]);
200     }
201 
202     /* Get table entry */
203     mbe0 = phb->ioda_MBT[(index << 1)];
204     mbe1 = phb->ioda_MBT[(index << 1) + 1];
205 
206     if (!(mbe0 & IODA3_MBT0_ENABLE)) {
207         return;
208     }
209 
210     /* Grab geometry from registers */
211     base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
212     size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
213     size |= 0xff00000000000000ull;
214     size = ~size + 1;
215 
216     /* Calculate PCI side start address based on M32/M64 window type */
217     if (mbe0 & IODA3_MBT0_TYPE_M32) {
218         start = phb->regs[PHB_M32_START_ADDR >> 3];
219         if ((start + size) > 0x100000000ull) {
220             phb_error(phb, "M32 set beyond 4GB boundary !");
221             size = 0x100000000 - start;
222         }
223     } else {
224         start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
225     }
226 
227     /* TODO: Figure out how to implemet/decode AOMASK */
228 
229     /* Check if it matches an enabled MMIO region in the PEC stack */
230     if (memory_region_is_mapped(&phb->mmbar0) &&
231         base >= phb->mmio0_base &&
232         (base + size) <= (phb->mmio0_base + phb->mmio0_size)) {
233         parent = &phb->mmbar0;
234         base -= phb->mmio0_base;
235     } else if (memory_region_is_mapped(&phb->mmbar1) &&
236         base >= phb->mmio1_base &&
237         (base + size) <= (phb->mmio1_base + phb->mmio1_size)) {
238         parent = &phb->mmbar1;
239         base -= phb->mmio1_base;
240     } else {
241         phb_error(phb, "PHB MBAR %d out of parent bounds", index);
242         return;
243     }
244 
245     /* Create alias (better name ?) */
246     snprintf(name, sizeof(name), "phb4-mbar%d", index);
247     memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
248                              &phb->pci_mmio, start, size);
249     memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
250 }
251 
252 static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
253 {
254     uint64_t i;
255     uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
256         PNV_PHB4_MIN_MMIO_WINDOWS;
257 
258     for (i = 0; i < num_windows; i++) {
259         pnv_phb4_check_mbt(phb, i);
260     }
261 }
262 
263 static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
264                                       unsigned *out_table, unsigned *out_idx)
265 {
266     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
267     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
268     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
269     unsigned int mask;
270     uint64_t *tptr = NULL;
271 
272     switch (table) {
273     case IODA3_TBL_LIST:
274         tptr = phb->ioda_LIST;
275         mask = 7;
276         break;
277     case IODA3_TBL_MIST:
278         tptr = phb->ioda_MIST;
279         mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
280         mask -= 1;
281         break;
282     case IODA3_TBL_RCAM:
283         mask = phb->big_phb ? 127 : 63;
284         break;
285     case IODA3_TBL_MRT:
286         mask = phb->big_phb ? 15 : 7;
287         break;
288     case IODA3_TBL_PESTA:
289     case IODA3_TBL_PESTB:
290         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
291         mask -= 1;
292         break;
293     case IODA3_TBL_TVT:
294         tptr = phb->ioda_TVT;
295         mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
296         mask -= 1;
297         break;
298     case IODA3_TBL_TCR:
299     case IODA3_TBL_TDR:
300         mask = phb->big_phb ? 1023 : 511;
301         break;
302     case IODA3_TBL_MBT:
303         tptr = phb->ioda_MBT;
304         mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
305         mask -= 1;
306         break;
307     case IODA3_TBL_MDT:
308         tptr = phb->ioda_MDT;
309         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
310         mask -= 1;
311         break;
312     case IODA3_TBL_PEEV:
313         tptr = phb->ioda_PEEV;
314         mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
315         mask -= 1;
316         break;
317     default:
318         phb_error(phb, "invalid IODA table %d", table);
319         return NULL;
320     }
321     index &= mask;
322     if (out_idx) {
323         *out_idx = index;
324     }
325     if (out_table) {
326         *out_table = table;
327     }
328     if (tptr) {
329         tptr += index;
330     }
331     if (adreg & PHB_IODA_AD_AUTOINC) {
332         index = (index + 1) & mask;
333         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
334     }
335 
336     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
337     return tptr;
338 }
339 
340 static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
341 {
342     unsigned table, idx;
343     uint64_t *tptr;
344 
345     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
346     if (!tptr) {
347         /* Special PESTA case */
348         if (table == IODA3_TBL_PESTA) {
349             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
350         } else if (table == IODA3_TBL_PESTB) {
351             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
352         }
353         /* Return 0 on unsupported tables, not ff's */
354         return 0;
355     }
356     return *tptr;
357 }
358 
359 static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
360 {
361     unsigned table, idx;
362     uint64_t *tptr;
363 
364     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
365     if (!tptr) {
366         /* Special PESTA case */
367         if (table == IODA3_TBL_PESTA) {
368             phb->ioda_PEST_AB[idx] &= ~1;
369             phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
370         } else if (table == IODA3_TBL_PESTB) {
371             phb->ioda_PEST_AB[idx] &= ~2;
372             phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
373         }
374         return;
375     }
376 
377     /* Handle side effects */
378     switch (table) {
379     case IODA3_TBL_LIST:
380         break;
381     case IODA3_TBL_MIST: {
382         /* Special mask for MIST partial write */
383         uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
384         uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
385         uint64_t v = *tptr;
386         if (mmask == 0) {
387             mmask = 0xf;
388         }
389         if (mmask & 8) {
390             v &= 0x0000ffffffffffffull;
391             v |= 0xcfff000000000000ull & val;
392         }
393         if (mmask & 4) {
394             v &= 0xffff0000ffffffffull;
395             v |= 0x0000cfff00000000ull & val;
396         }
397         if (mmask & 2) {
398             v &= 0xffffffff0000ffffull;
399             v |= 0x00000000cfff0000ull & val;
400         }
401         if (mmask & 1) {
402             v &= 0xffffffffffff0000ull;
403             v |= 0x000000000000cfffull & val;
404         }
405         *tptr = v;
406         break;
407     }
408     case IODA3_TBL_MBT:
409         *tptr = val;
410 
411         /* Copy accross the valid bit to the other half */
412         phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
413         phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
414 
415         /* Update mappings */
416         pnv_phb4_check_mbt(phb, idx >> 1);
417         break;
418     default:
419         *tptr = val;
420     }
421 }
422 
423 static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
424 {
425     PnvPhb4DMASpace *ds;
426 
427     /* Always invalidate all for now ... */
428     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
429         ds->pe_num = PHB_INVALID_PE;
430     }
431 }
432 
433 static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
434 {
435     uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
436 
437     if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
438         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
439             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
440                                         0xffff0000, &ds->msi32_mr);
441         }
442     } else {
443         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
444             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
445                                         &ds->msi32_mr);
446         }
447     }
448 
449     if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
450         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
451             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
452                                         (1ull << 60), &ds->msi64_mr);
453         }
454     } else {
455         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
456             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
457                                         &ds->msi64_mr);
458         }
459     }
460 }
461 
462 static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
463 {
464     PnvPhb4DMASpace *ds;
465 
466     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
467         pnv_phb4_update_msi_regions(ds);
468     }
469 }
470 
471 static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
472 {
473     int shift, flags, i, lsi_base;
474     XiveSource *xsrc = &phb->xsrc;
475 
476     /* The XIVE source characteristics can be set at run time */
477     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
478         shift = XIVE_ESB_64K;
479     } else {
480         shift = XIVE_ESB_4K;
481     }
482     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
483         flags = XIVE_SRC_STORE_EOI;
484     } else {
485         flags = 0;
486     }
487 
488     /*
489      * When the PQ disable configuration bit is set, the check on the
490      * PQ state bits is disabled on the PHB side (for MSI only) and it
491      * is performed on the IC side instead.
492      */
493     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PQ_DISABLE) {
494         flags |= XIVE_SRC_PQ_DISABLE;
495     }
496 
497     phb->xsrc.esb_shift = shift;
498     phb->xsrc.esb_flags = flags;
499 
500     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
501     lsi_base <<= 3;
502 
503     /* TODO: handle reset values of PHB_LSI_SRC_ID */
504     if (!lsi_base) {
505         return;
506     }
507 
508     /* TODO: need a xive_source_irq_reset_lsi() */
509     bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
510 
511     for (i = 0; i < xsrc->nr_irqs; i++) {
512         bool msi = (i < lsi_base || i >= (lsi_base + 8));
513         if (!msi) {
514             xive_source_irq_set_lsi(xsrc, i);
515         }
516     }
517 }
518 
519 static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
520                                unsigned size)
521 {
522     PnvPHB4 *phb = PNV_PHB4(opaque);
523     bool changed;
524 
525     /* Special case outbound configuration data */
526     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
527         pnv_phb4_config_write(phb, off & 0x3, size, val);
528         return;
529     }
530 
531     /* Special case RC configuration space */
532     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
533         pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
534         return;
535     }
536 
537     /* Other registers are 64-bit only */
538     if (size != 8 || off & 0x7) {
539         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
540                    off, size);
541         return;
542     }
543 
544     /* Handle masking */
545     switch (off) {
546     case PHB_LSI_SOURCE_ID:
547         val &= PHB_LSI_SRC_ID;
548         break;
549     case PHB_M64_UPPER_BITS:
550         val &= 0xff00000000000000ull;
551         break;
552     /* TCE Kill */
553     case PHB_TCE_KILL:
554         /* Clear top 3 bits which HW does to indicate successful queuing */
555         val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
556         break;
557     case PHB_Q_DMA_R:
558         /*
559          * This is enough logic to make SW happy but we aren't
560          * actually quiescing the DMAs
561          */
562         if (val & PHB_Q_DMA_R_AUTORESET) {
563             val = 0;
564         } else {
565             val &= PHB_Q_DMA_R_QUIESCE_DMA;
566         }
567         break;
568     /* LEM stuff */
569     case PHB_LEM_FIR_AND_MASK:
570         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
571         return;
572     case PHB_LEM_FIR_OR_MASK:
573         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
574         return;
575     case PHB_LEM_ERROR_AND_MASK:
576         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
577         return;
578     case PHB_LEM_ERROR_OR_MASK:
579         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
580         return;
581     case PHB_LEM_WOF:
582         val = 0;
583         break;
584     /* TODO: More regs ..., maybe create a table with masks... */
585 
586     /* Read only registers */
587     case PHB_CPU_LOADSTORE_STATUS:
588     case PHB_ETU_ERR_SUMMARY:
589     case PHB_PHB4_GEN_CAP:
590     case PHB_PHB4_TCE_CAP:
591     case PHB_PHB4_IRQ_CAP:
592     case PHB_PHB4_EEH_CAP:
593         return;
594     }
595 
596     /* Record whether it changed */
597     changed = phb->regs[off >> 3] != val;
598 
599     /* Store in register cache first */
600     phb->regs[off >> 3] = val;
601 
602     /* Handle side effects */
603     switch (off) {
604     case PHB_PHB4_CONFIG:
605         if (changed) {
606             pnv_phb4_update_all_msi_regions(phb);
607         }
608         break;
609     case PHB_M32_START_ADDR:
610     case PHB_M64_UPPER_BITS:
611         if (changed) {
612             pnv_phb4_check_all_mbt(phb);
613         }
614         break;
615 
616     /* IODA table accesses */
617     case PHB_IODA_DATA0:
618         pnv_phb4_ioda_write(phb, val);
619         break;
620 
621     /* RTC invalidation */
622     case PHB_RTC_INVALIDATE:
623         pnv_phb4_rtc_invalidate(phb, val);
624         break;
625 
626     /* PHB Control (Affects XIVE source) */
627     case PHB_CTRLR:
628     case PHB_LSI_SOURCE_ID:
629         pnv_phb4_update_xsrc(phb);
630         break;
631 
632     /* Silent simple writes */
633     case PHB_ASN_CMPM:
634     case PHB_CONFIG_ADDRESS:
635     case PHB_IODA_ADDR:
636     case PHB_TCE_KILL:
637     case PHB_TCE_SPEC_CTL:
638     case PHB_PEST_BAR:
639     case PHB_PELTV_BAR:
640     case PHB_RTT_BAR:
641     case PHB_LEM_FIR_ACCUM:
642     case PHB_LEM_ERROR_MASK:
643     case PHB_LEM_ACTION0:
644     case PHB_LEM_ACTION1:
645     case PHB_TCE_TAG_ENABLE:
646     case PHB_INT_NOTIFY_ADDR:
647     case PHB_INT_NOTIFY_INDEX:
648     case PHB_DMARD_SYNC:
649        break;
650 
651     /* Noise on anything else */
652     default:
653         qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
654                       off, val);
655     }
656 }
657 
658 static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
659 {
660     PnvPHB4 *phb = PNV_PHB4(opaque);
661     uint64_t val;
662 
663     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
664         return pnv_phb4_config_read(phb, off & 0x3, size);
665     }
666 
667     /* Special case RC configuration space */
668     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
669         return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
670     }
671 
672     /* Other registers are 64-bit only */
673     if (size != 8 || off & 0x7) {
674         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
675                    off, size);
676         return ~0ull;
677     }
678 
679     /* Default read from cache */
680     val = phb->regs[off >> 3];
681 
682     switch (off) {
683     case PHB_VERSION:
684         return PNV_PHB4_PEC_GET_CLASS(phb->pec)->version;
685 
686         /* Read-only */
687     case PHB_PHB4_GEN_CAP:
688         return 0xe4b8000000000000ull;
689     case PHB_PHB4_TCE_CAP:
690         return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
691     case PHB_PHB4_IRQ_CAP:
692         return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
693     case PHB_PHB4_EEH_CAP:
694         return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
695 
696     /* IODA table accesses */
697     case PHB_IODA_DATA0:
698         return pnv_phb4_ioda_read(phb);
699 
700     /* Link training always appears trained */
701     case PHB_PCIE_DLP_TRAIN_CTL:
702         /* TODO: Do something sensible with speed ? */
703         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
704 
705     /* DMA read sync: make it look like it's complete */
706     case PHB_DMARD_SYNC:
707         return PHB_DMARD_SYNC_COMPLETE;
708 
709     /* Silent simple reads */
710     case PHB_LSI_SOURCE_ID:
711     case PHB_CPU_LOADSTORE_STATUS:
712     case PHB_ASN_CMPM:
713     case PHB_PHB4_CONFIG:
714     case PHB_M32_START_ADDR:
715     case PHB_CONFIG_ADDRESS:
716     case PHB_IODA_ADDR:
717     case PHB_RTC_INVALIDATE:
718     case PHB_TCE_KILL:
719     case PHB_TCE_SPEC_CTL:
720     case PHB_PEST_BAR:
721     case PHB_PELTV_BAR:
722     case PHB_RTT_BAR:
723     case PHB_M64_UPPER_BITS:
724     case PHB_CTRLR:
725     case PHB_LEM_FIR_ACCUM:
726     case PHB_LEM_ERROR_MASK:
727     case PHB_LEM_ACTION0:
728     case PHB_LEM_ACTION1:
729     case PHB_TCE_TAG_ENABLE:
730     case PHB_INT_NOTIFY_ADDR:
731     case PHB_INT_NOTIFY_INDEX:
732     case PHB_Q_DMA_R:
733     case PHB_ETU_ERR_SUMMARY:
734         break;
735 
736     /* Noise on anything else */
737     default:
738         qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
739                       off, val);
740     }
741     return val;
742 }
743 
744 static const MemoryRegionOps pnv_phb4_reg_ops = {
745     .read = pnv_phb4_reg_read,
746     .write = pnv_phb4_reg_write,
747     .valid.min_access_size = 1,
748     .valid.max_access_size = 8,
749     .impl.min_access_size = 1,
750     .impl.max_access_size = 8,
751     .endianness = DEVICE_BIG_ENDIAN,
752 };
753 
754 static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
755 {
756     PnvPHB4 *phb = PNV_PHB4(opaque);
757     uint32_t reg = addr >> 3;
758     uint64_t val;
759     hwaddr offset;
760 
761     switch (reg) {
762     case PHB_SCOM_HV_IND_ADDR:
763         return phb->scom_hv_ind_addr_reg;
764 
765     case PHB_SCOM_HV_IND_DATA:
766         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
767             phb_error(phb, "Invalid indirect address");
768             return ~0ull;
769         }
770         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
771         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
772         val = pnv_phb4_reg_read(phb, offset, size);
773         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
774             offset += size;
775             offset &= 0x3fff;
776             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
777                                                  phb->scom_hv_ind_addr_reg,
778                                                  offset);
779         }
780         return val;
781     case PHB_SCOM_ETU_LEM_FIR:
782     case PHB_SCOM_ETU_LEM_FIR_AND:
783     case PHB_SCOM_ETU_LEM_FIR_OR:
784     case PHB_SCOM_ETU_LEM_FIR_MSK:
785     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
786     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
787     case PHB_SCOM_ETU_LEM_ACT0:
788     case PHB_SCOM_ETU_LEM_ACT1:
789     case PHB_SCOM_ETU_LEM_WOF:
790         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
791         return pnv_phb4_reg_read(phb, offset, size);
792     case PHB_SCOM_ETU_PMON_CONFIG:
793     case PHB_SCOM_ETU_PMON_CTR0:
794     case PHB_SCOM_ETU_PMON_CTR1:
795     case PHB_SCOM_ETU_PMON_CTR2:
796     case PHB_SCOM_ETU_PMON_CTR3:
797         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
798         return pnv_phb4_reg_read(phb, offset, size);
799 
800     default:
801         qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
802         return ~0ull;
803     }
804 }
805 
806 static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
807                                  uint64_t val, unsigned size)
808 {
809     PnvPHB4 *phb = PNV_PHB4(opaque);
810     uint32_t reg = addr >> 3;
811     hwaddr offset;
812 
813     switch (reg) {
814     case PHB_SCOM_HV_IND_ADDR:
815         phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
816         break;
817     case PHB_SCOM_HV_IND_DATA:
818         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
819             phb_error(phb, "Invalid indirect address");
820             break;
821         }
822         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
823         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
824         pnv_phb4_reg_write(phb, offset, val, size);
825         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
826             offset += size;
827             offset &= 0x3fff;
828             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
829                                                  phb->scom_hv_ind_addr_reg,
830                                                  offset);
831         }
832         break;
833     case PHB_SCOM_ETU_LEM_FIR:
834     case PHB_SCOM_ETU_LEM_FIR_AND:
835     case PHB_SCOM_ETU_LEM_FIR_OR:
836     case PHB_SCOM_ETU_LEM_FIR_MSK:
837     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
838     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
839     case PHB_SCOM_ETU_LEM_ACT0:
840     case PHB_SCOM_ETU_LEM_ACT1:
841     case PHB_SCOM_ETU_LEM_WOF:
842         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
843         pnv_phb4_reg_write(phb, offset, val, size);
844         break;
845     case PHB_SCOM_ETU_PMON_CONFIG:
846     case PHB_SCOM_ETU_PMON_CTR0:
847     case PHB_SCOM_ETU_PMON_CTR1:
848     case PHB_SCOM_ETU_PMON_CTR2:
849     case PHB_SCOM_ETU_PMON_CTR3:
850         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
851         pnv_phb4_reg_write(phb, offset, val, size);
852         break;
853     default:
854         qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
855                       "=%"PRIx64"\n", addr, val);
856     }
857 }
858 
859 const MemoryRegionOps pnv_phb4_xscom_ops = {
860     .read = pnv_phb4_xscom_read,
861     .write = pnv_phb4_xscom_write,
862     .valid.min_access_size = 8,
863     .valid.max_access_size = 8,
864     .impl.min_access_size = 8,
865     .impl.max_access_size = 8,
866     .endianness = DEVICE_BIG_ENDIAN,
867 };
868 
869 static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr,
870                                             unsigned size)
871 {
872     PnvPHB4 *phb = PNV_PHB4(opaque);
873     uint32_t reg = addr >> 3;
874 
875     /* TODO: add list of allowed registers and error out if not */
876     return phb->nest_regs[reg];
877 }
878 
879 /*
880  * Return the 'stack_no' of a PHB4. 'stack_no' is the order
881  * the PHB4 occupies in the PEC. This is the reverse of what
882  * pnv_phb4_pec_get_phb_id() does.
883  *
884  * E.g. a phb with phb_id = 4 and pec->index = 1 (PEC1) will
885  * be the second phb (stack_no = 1) of the PEC.
886  */
887 static int pnv_phb4_get_phb_stack_no(PnvPHB4 *phb)
888 {
889     PnvPhb4PecState *pec = phb->pec;
890     PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
891     int index = pec->index;
892     int stack_no = phb->phb_id;
893 
894     while (index--) {
895         stack_no -= pecc->num_phbs[index];
896     }
897 
898     return stack_no;
899 }
900 
901 static void pnv_phb4_update_regions(PnvPHB4 *phb)
902 {
903     /* Unmap first always */
904     if (memory_region_is_mapped(&phb->mr_regs)) {
905         memory_region_del_subregion(&phb->phbbar, &phb->mr_regs);
906     }
907     if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
908         memory_region_del_subregion(&phb->intbar, &phb->xsrc.esb_mmio);
909     }
910 
911     /* Map registers if enabled */
912     if (memory_region_is_mapped(&phb->phbbar)) {
913         memory_region_add_subregion(&phb->phbbar, 0, &phb->mr_regs);
914     }
915 
916     /* Map ESB if enabled */
917     if (memory_region_is_mapped(&phb->intbar)) {
918         memory_region_add_subregion(&phb->intbar, 0, &phb->xsrc.esb_mmio);
919     }
920 
921     /* Check/update m32 */
922     pnv_phb4_check_all_mbt(phb);
923 }
924 
925 static void pnv_pec_phb_update_map(PnvPHB4 *phb)
926 {
927     PnvPhb4PecState *pec = phb->pec;
928     MemoryRegion *sysmem = get_system_memory();
929     uint64_t bar_en = phb->nest_regs[PEC_NEST_STK_BAR_EN];
930     int stack_no = pnv_phb4_get_phb_stack_no(phb);
931     uint64_t bar, mask, size;
932     char name[64];
933 
934     /*
935      * NOTE: This will really not work well if those are remapped
936      * after the PHB has created its sub regions. We could do better
937      * if we had a way to resize regions but we don't really care
938      * that much in practice as the stuff below really only happens
939      * once early during boot
940      */
941 
942     /* Handle unmaps */
943     if (memory_region_is_mapped(&phb->mmbar0) &&
944         !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
945         memory_region_del_subregion(sysmem, &phb->mmbar0);
946     }
947     if (memory_region_is_mapped(&phb->mmbar1) &&
948         !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
949         memory_region_del_subregion(sysmem, &phb->mmbar1);
950     }
951     if (memory_region_is_mapped(&phb->phbbar) &&
952         !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
953         memory_region_del_subregion(sysmem, &phb->phbbar);
954     }
955     if (memory_region_is_mapped(&phb->intbar) &&
956         !(bar_en & PEC_NEST_STK_BAR_EN_INT)) {
957         memory_region_del_subregion(sysmem, &phb->intbar);
958     }
959 
960     /* Update PHB */
961     pnv_phb4_update_regions(phb);
962 
963     /* Handle maps */
964     if (!memory_region_is_mapped(&phb->mmbar0) &&
965         (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
966         bar = phb->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8;
967         mask = phb->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
968         size = ((~mask) >> 8) + 1;
969         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio0",
970                  pec->chip_id, pec->index, stack_no);
971         memory_region_init(&phb->mmbar0, OBJECT(phb), name, size);
972         memory_region_add_subregion(sysmem, bar, &phb->mmbar0);
973         phb->mmio0_base = bar;
974         phb->mmio0_size = size;
975     }
976     if (!memory_region_is_mapped(&phb->mmbar1) &&
977         (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
978         bar = phb->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8;
979         mask = phb->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
980         size = ((~mask) >> 8) + 1;
981         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio1",
982                  pec->chip_id, pec->index, stack_no);
983         memory_region_init(&phb->mmbar1, OBJECT(phb), name, size);
984         memory_region_add_subregion(sysmem, bar, &phb->mmbar1);
985         phb->mmio1_base = bar;
986         phb->mmio1_size = size;
987     }
988     if (!memory_region_is_mapped(&phb->phbbar) &&
989         (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
990         bar = phb->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
991         size = PNV_PHB4_NUM_REGS << 3;
992         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d",
993                  pec->chip_id, pec->index, stack_no);
994         memory_region_init(&phb->phbbar, OBJECT(phb), name, size);
995         memory_region_add_subregion(sysmem, bar, &phb->phbbar);
996     }
997     if (!memory_region_is_mapped(&phb->intbar) &&
998         (bar_en & PEC_NEST_STK_BAR_EN_INT)) {
999         bar = phb->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
1000         size = PNV_PHB4_MAX_INTs << 16;
1001         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-int",
1002                  phb->pec->chip_id, phb->pec->index, stack_no);
1003         memory_region_init(&phb->intbar, OBJECT(phb), name, size);
1004         memory_region_add_subregion(sysmem, bar, &phb->intbar);
1005     }
1006 
1007     /* Update PHB */
1008     pnv_phb4_update_regions(phb);
1009 }
1010 
1011 static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
1012                                          uint64_t val, unsigned size)
1013 {
1014     PnvPHB4 *phb = PNV_PHB4(opaque);
1015     PnvPhb4PecState *pec = phb->pec;
1016     uint32_t reg = addr >> 3;
1017 
1018     switch (reg) {
1019     case PEC_NEST_STK_PCI_NEST_FIR:
1020         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val;
1021         break;
1022     case PEC_NEST_STK_PCI_NEST_FIR_CLR:
1023         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val;
1024         break;
1025     case PEC_NEST_STK_PCI_NEST_FIR_SET:
1026         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val;
1027         break;
1028     case PEC_NEST_STK_PCI_NEST_FIR_MSK:
1029         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val;
1030         break;
1031     case PEC_NEST_STK_PCI_NEST_FIR_MSKC:
1032         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val;
1033         break;
1034     case PEC_NEST_STK_PCI_NEST_FIR_MSKS:
1035         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val;
1036         break;
1037     case PEC_NEST_STK_PCI_NEST_FIR_ACT0:
1038     case PEC_NEST_STK_PCI_NEST_FIR_ACT1:
1039         phb->nest_regs[reg] = val;
1040         break;
1041     case PEC_NEST_STK_PCI_NEST_FIR_WOF:
1042         phb->nest_regs[reg] = 0;
1043         break;
1044     case PEC_NEST_STK_ERR_REPORT_0:
1045     case PEC_NEST_STK_ERR_REPORT_1:
1046     case PEC_NEST_STK_PBCQ_GNRL_STATUS:
1047         /* Flag error ? */
1048         break;
1049     case PEC_NEST_STK_PBCQ_MODE:
1050         phb->nest_regs[reg] = val & 0xff00000000000000ull;
1051         break;
1052     case PEC_NEST_STK_MMIO_BAR0:
1053     case PEC_NEST_STK_MMIO_BAR0_MASK:
1054     case PEC_NEST_STK_MMIO_BAR1:
1055     case PEC_NEST_STK_MMIO_BAR1_MASK:
1056         if (phb->nest_regs[PEC_NEST_STK_BAR_EN] &
1057             (PEC_NEST_STK_BAR_EN_MMIO0 |
1058              PEC_NEST_STK_BAR_EN_MMIO1)) {
1059             phb_pec_error(pec, "Changing enabled BAR unsupported\n");
1060         }
1061         phb->nest_regs[reg] = val & 0xffffffffff000000ull;
1062         break;
1063     case PEC_NEST_STK_PHB_REGS_BAR:
1064         if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) {
1065             phb_pec_error(pec, "Changing enabled BAR unsupported\n");
1066         }
1067         phb->nest_regs[reg] = val & 0xffffffffffc00000ull;
1068         break;
1069     case PEC_NEST_STK_INT_BAR:
1070         if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) {
1071             phb_pec_error(pec, "Changing enabled BAR unsupported\n");
1072         }
1073         phb->nest_regs[reg] = val & 0xfffffff000000000ull;
1074         break;
1075     case PEC_NEST_STK_BAR_EN:
1076         phb->nest_regs[reg] = val & 0xf000000000000000ull;
1077         pnv_pec_phb_update_map(phb);
1078         break;
1079     case PEC_NEST_STK_DATA_FRZ_TYPE:
1080     case PEC_NEST_STK_PBCQ_TUN_BAR:
1081         /* Not used for now */
1082         phb->nest_regs[reg] = val;
1083         break;
1084     default:
1085         qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx
1086                       "=%"PRIx64"\n", addr, val);
1087     }
1088 }
1089 
1090 static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = {
1091     .read = pnv_pec_stk_nest_xscom_read,
1092     .write = pnv_pec_stk_nest_xscom_write,
1093     .valid.min_access_size = 8,
1094     .valid.max_access_size = 8,
1095     .impl.min_access_size = 8,
1096     .impl.max_access_size = 8,
1097     .endianness = DEVICE_BIG_ENDIAN,
1098 };
1099 
1100 static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr,
1101                                            unsigned size)
1102 {
1103     PnvPHB4 *phb = PNV_PHB4(opaque);
1104     uint32_t reg = addr >> 3;
1105 
1106     /* TODO: add list of allowed registers and error out if not */
1107     return phb->pci_regs[reg];
1108 }
1109 
1110 static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr,
1111                                         uint64_t val, unsigned size)
1112 {
1113     PnvPHB4 *phb = PNV_PHB4(opaque);
1114     uint32_t reg = addr >> 3;
1115 
1116     switch (reg) {
1117     case PEC_PCI_STK_PCI_FIR:
1118         phb->pci_regs[reg] = val;
1119         break;
1120     case PEC_PCI_STK_PCI_FIR_CLR:
1121         phb->pci_regs[PEC_PCI_STK_PCI_FIR] &= val;
1122         break;
1123     case PEC_PCI_STK_PCI_FIR_SET:
1124         phb->pci_regs[PEC_PCI_STK_PCI_FIR] |= val;
1125         break;
1126     case PEC_PCI_STK_PCI_FIR_MSK:
1127         phb->pci_regs[reg] = val;
1128         break;
1129     case PEC_PCI_STK_PCI_FIR_MSKC:
1130         phb->pci_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val;
1131         break;
1132     case PEC_PCI_STK_PCI_FIR_MSKS:
1133         phb->pci_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val;
1134         break;
1135     case PEC_PCI_STK_PCI_FIR_ACT0:
1136     case PEC_PCI_STK_PCI_FIR_ACT1:
1137         phb->pci_regs[reg] = val;
1138         break;
1139     case PEC_PCI_STK_PCI_FIR_WOF:
1140         phb->pci_regs[reg] = 0;
1141         break;
1142     case PEC_PCI_STK_ETU_RESET:
1143         phb->pci_regs[reg] = val & 0x8000000000000000ull;
1144         /* TODO: Implement reset */
1145         break;
1146     case PEC_PCI_STK_PBAIB_ERR_REPORT:
1147         break;
1148     case PEC_PCI_STK_PBAIB_TX_CMD_CRED:
1149     case PEC_PCI_STK_PBAIB_TX_DAT_CRED:
1150         phb->pci_regs[reg] = val;
1151         break;
1152     default:
1153         qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx
1154                       "=%"PRIx64"\n", addr, val);
1155     }
1156 }
1157 
1158 static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = {
1159     .read = pnv_pec_stk_pci_xscom_read,
1160     .write = pnv_pec_stk_pci_xscom_write,
1161     .valid.min_access_size = 8,
1162     .valid.max_access_size = 8,
1163     .impl.min_access_size = 8,
1164     .impl.max_access_size = 8,
1165     .endianness = DEVICE_BIG_ENDIAN,
1166 };
1167 
1168 static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
1169 {
1170     /* Check that out properly ... */
1171     return irq_num & 3;
1172 }
1173 
1174 static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
1175 {
1176     PnvPHB4 *phb = PNV_PHB4(opaque);
1177     uint32_t lsi_base;
1178 
1179     /* LSI only ... */
1180     if (irq_num > 3) {
1181         phb_error(phb, "IRQ %x is not an LSI", irq_num);
1182     }
1183     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
1184     lsi_base <<= 3;
1185     qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
1186 }
1187 
1188 static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
1189 {
1190     uint64_t rtt, addr;
1191     uint16_t rte;
1192     int bus_num;
1193     int num_PEs;
1194 
1195     /* Already resolved ? */
1196     if (ds->pe_num != PHB_INVALID_PE) {
1197         return true;
1198     }
1199 
1200     /* We need to lookup the RTT */
1201     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
1202     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
1203         phb_error(ds->phb, "DMA with RTT BAR disabled !");
1204         /* Set error bits ? fence ? ... */
1205         return false;
1206     }
1207 
1208     /* Read RTE */
1209     bus_num = pci_bus_num(ds->bus);
1210     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
1211     addr += 2 * PCI_BUILD_BDF(bus_num, ds->devfn);
1212     if (dma_memory_read(&address_space_memory, addr, &rte,
1213                         sizeof(rte), MEMTXATTRS_UNSPECIFIED)) {
1214         phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
1215         /* Set error bits ? fence ? ... */
1216         return false;
1217     }
1218     rte = be16_to_cpu(rte);
1219 
1220     /* Fail upon reading of invalid PE# */
1221     num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
1222     if (rte >= num_PEs) {
1223         phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
1224         rte &= num_PEs - 1;
1225     }
1226     ds->pe_num = rte;
1227     return true;
1228 }
1229 
1230 static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
1231                                    bool is_write, uint64_t tve,
1232                                    IOMMUTLBEntry *tlb)
1233 {
1234     uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
1235     int32_t  lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
1236     uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
1237     uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
1238 
1239     /* Invalid levels */
1240     if (lev > 4) {
1241         phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
1242         return;
1243     }
1244 
1245     /* Invalid entry */
1246     if (tts == 0) {
1247         phb_error(ds->phb, "Access to invalid TVE");
1248         return;
1249     }
1250 
1251     /* IO Page Size of 0 means untranslated, else use TCEs */
1252     if (tps == 0) {
1253         /* TODO: Handle boundaries */
1254 
1255         /* Use 4k pages like q35 ... for now */
1256         tlb->iova = addr & 0xfffffffffffff000ull;
1257         tlb->translated_addr = addr & 0x0003fffffffff000ull;
1258         tlb->addr_mask = 0xfffull;
1259         tlb->perm = IOMMU_RW;
1260     } else {
1261         uint32_t tce_shift, tbl_shift, sh;
1262         uint64_t base, taddr, tce, tce_mask;
1263 
1264         /* Address bits per bottom level TCE entry */
1265         tce_shift = tps + 11;
1266 
1267         /* Address bits per table level */
1268         tbl_shift = tts + 8;
1269 
1270         /* Top level table base address */
1271         base = tta << 12;
1272 
1273         /* Total shift to first level */
1274         sh = tbl_shift * lev + tce_shift;
1275 
1276         /* TODO: Limit to support IO page sizes */
1277 
1278         /* TODO: Multi-level untested */
1279         do {
1280             lev--;
1281 
1282             /* Grab the TCE address */
1283             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
1284             if (dma_memory_read(&address_space_memory, taddr, &tce,
1285                                 sizeof(tce), MEMTXATTRS_UNSPECIFIED)) {
1286                 phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
1287                 return;
1288             }
1289             tce = be64_to_cpu(tce);
1290 
1291             /* Check permission for indirect TCE */
1292             if ((lev >= 0) && !(tce & 3)) {
1293                 phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
1294                 phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
1295                            is_write ? 'W' : 'R', tve);
1296                 phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
1297                            tta, lev, tts, tps);
1298                 return;
1299             }
1300             sh -= tbl_shift;
1301             base = tce & ~0xfffull;
1302         } while (lev >= 0);
1303 
1304         /* We exit the loop with TCE being the final TCE */
1305         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
1306             phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
1307             phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
1308                        is_write ? 'W' : 'R', tve);
1309             phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
1310                        tta, lev, tts, tps);
1311             return;
1312         }
1313         tce_mask = ~((1ull << tce_shift) - 1);
1314         tlb->iova = addr & tce_mask;
1315         tlb->translated_addr = tce & tce_mask;
1316         tlb->addr_mask = ~tce_mask;
1317         tlb->perm = tce & 3;
1318     }
1319 }
1320 
1321 static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
1322                                               hwaddr addr,
1323                                               IOMMUAccessFlags flag,
1324                                               int iommu_idx)
1325 {
1326     PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
1327     int tve_sel;
1328     uint64_t tve, cfg;
1329     IOMMUTLBEntry ret = {
1330         .target_as = &address_space_memory,
1331         .iova = addr,
1332         .translated_addr = 0,
1333         .addr_mask = ~(hwaddr)0,
1334         .perm = IOMMU_NONE,
1335     };
1336 
1337     /* Resolve PE# */
1338     if (!pnv_phb4_resolve_pe(ds)) {
1339         phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1340                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1341         return ret;
1342     }
1343 
1344     /* Check top bits */
1345     switch (addr >> 60) {
1346     case 00:
1347         /* DMA or 32-bit MSI ? */
1348         cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
1349         if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
1350             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
1351             phb_error(ds->phb, "xlate on 32-bit MSI region");
1352             return ret;
1353         }
1354         /* Choose TVE XXX Use PHB4 Control Register */
1355         tve_sel = (addr >> 59) & 1;
1356         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
1357         pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
1358         break;
1359     case 01:
1360         phb_error(ds->phb, "xlate on 64-bit MSI region");
1361         break;
1362     default:
1363         phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
1364     }
1365     return ret;
1366 }
1367 
1368 #define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
1369 DECLARE_INSTANCE_CHECKER(IOMMUMemoryRegion, PNV_PHB4_IOMMU_MEMORY_REGION,
1370                          TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
1371 
1372 static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
1373                                                     void *data)
1374 {
1375     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1376 
1377     imrc->translate = pnv_phb4_translate_iommu;
1378 }
1379 
1380 static const TypeInfo pnv_phb4_iommu_memory_region_info = {
1381     .parent = TYPE_IOMMU_MEMORY_REGION,
1382     .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1383     .class_init = pnv_phb4_iommu_memory_region_class_init,
1384 };
1385 
1386 /*
1387  * Return the index/phb-id of a PHB4 that belongs to a
1388  * pec->stacks[stack_index] stack.
1389  */
1390 int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int stack_index)
1391 {
1392     PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
1393     int index = pec->index;
1394     int offset = 0;
1395 
1396     while (index--) {
1397         offset += pecc->num_phbs[index];
1398     }
1399 
1400     return offset + stack_index;
1401 }
1402 
1403 /*
1404  * MSI/MSIX memory region implementation.
1405  * The handler handles both MSI and MSIX.
1406  */
1407 static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
1408                                uint64_t data, unsigned size)
1409 {
1410     PnvPhb4DMASpace *ds = opaque;
1411     PnvPHB4 *phb = ds->phb;
1412 
1413     uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
1414 
1415     /* Resolve PE# */
1416     if (!pnv_phb4_resolve_pe(ds)) {
1417         phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1418                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1419         return;
1420     }
1421 
1422     /* TODO: Check it doesn't collide with LSIs */
1423     if (src >= phb->xsrc.nr_irqs) {
1424         phb_error(phb, "MSI %d out of bounds", src);
1425         return;
1426     }
1427 
1428     /* TODO: check PE/MSI assignement */
1429 
1430     qemu_irq_pulse(phb->qirqs[src]);
1431 }
1432 
1433 /* There is no .read as the read result is undefined by PCI spec */
1434 static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
1435 {
1436     PnvPhb4DMASpace *ds = opaque;
1437 
1438     phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
1439     return -1;
1440 }
1441 
1442 static const MemoryRegionOps pnv_phb4_msi_ops = {
1443     .read = pnv_phb4_msi_read,
1444     .write = pnv_phb4_msi_write,
1445     .endianness = DEVICE_LITTLE_ENDIAN
1446 };
1447 
1448 static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
1449 {
1450     PnvPhb4DMASpace *ds;
1451 
1452     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
1453         if (ds->bus == bus && ds->devfn == devfn) {
1454             break;
1455         }
1456     }
1457     return ds;
1458 }
1459 
1460 static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1461 {
1462     PnvPHB4 *phb = opaque;
1463     PnvPhb4DMASpace *ds;
1464     char name[32];
1465 
1466     ds = pnv_phb4_dma_find(phb, bus, devfn);
1467 
1468     if (ds == NULL) {
1469         ds = g_malloc0(sizeof(PnvPhb4DMASpace));
1470         ds->bus = bus;
1471         ds->devfn = devfn;
1472         ds->pe_num = PHB_INVALID_PE;
1473         ds->phb = phb;
1474         snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
1475                  phb->phb_id);
1476         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
1477                                  TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1478                                  OBJECT(phb), name, UINT64_MAX);
1479         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
1480                            name);
1481         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1482                               ds, "msi32", 0x10000);
1483         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1484                               ds, "msi64", 0x100000);
1485         pnv_phb4_update_msi_regions(ds);
1486 
1487         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
1488     }
1489     return &ds->dma_as;
1490 }
1491 
1492 static void pnv_phb4_xscom_realize(PnvPHB4 *phb)
1493 {
1494     PnvPhb4PecState *pec = phb->pec;
1495     PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
1496     int stack_no = pnv_phb4_get_phb_stack_no(phb);
1497     uint32_t pec_nest_base;
1498     uint32_t pec_pci_base;
1499     char name[64];
1500 
1501     assert(pec);
1502 
1503     /* Initialize the XSCOM regions for the stack registers */
1504     snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-phb-%d",
1505              pec->chip_id, pec->index, stack_no);
1506     pnv_xscom_region_init(&phb->nest_regs_mr, OBJECT(phb),
1507                           &pnv_pec_stk_nest_xscom_ops, phb, name,
1508                           PHB4_PEC_NEST_STK_REGS_COUNT);
1509 
1510     snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-phb-%d",
1511              pec->chip_id, pec->index, stack_no);
1512     pnv_xscom_region_init(&phb->pci_regs_mr, OBJECT(phb),
1513                           &pnv_pec_stk_pci_xscom_ops, phb, name,
1514                           PHB4_PEC_PCI_STK_REGS_COUNT);
1515 
1516     /* PHB pass-through */
1517     snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-phb-%d",
1518              pec->chip_id, pec->index, stack_no);
1519     pnv_xscom_region_init(&phb->phb_regs_mr, OBJECT(phb),
1520                           &pnv_phb4_xscom_ops, phb, name, 0x40);
1521 
1522     pec_nest_base = pecc->xscom_nest_base(pec);
1523     pec_pci_base = pecc->xscom_pci_base(pec);
1524 
1525     /* Populate the XSCOM address space. */
1526     pnv_xscom_add_subregion(pec->chip,
1527                             pec_nest_base + 0x40 * (stack_no + 1),
1528                             &phb->nest_regs_mr);
1529     pnv_xscom_add_subregion(pec->chip,
1530                             pec_pci_base + 0x40 * (stack_no + 1),
1531                             &phb->pci_regs_mr);
1532     pnv_xscom_add_subregion(pec->chip,
1533                             pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
1534                             0x40 * stack_no,
1535                             &phb->phb_regs_mr);
1536 }
1537 
1538 static void pnv_phb4_instance_init(Object *obj)
1539 {
1540     PnvPHB4 *phb = PNV_PHB4(obj);
1541 
1542     QLIST_INIT(&phb->dma_spaces);
1543 
1544     /* XIVE interrupt source object */
1545     object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE);
1546 }
1547 
1548 static PnvPhb4PecState *pnv_phb4_get_pec(PnvChip *chip, PnvPHB4 *phb,
1549                                          Error **errp)
1550 {
1551     Pnv9Chip *chip9 = PNV9_CHIP(chip);
1552     int chip_id = phb->chip_id;
1553     int index = phb->phb_id;
1554     int i, j;
1555 
1556     for (i = 0; i < chip->num_pecs; i++) {
1557         /*
1558          * For each PEC, check the amount of phbs it supports
1559          * and see if the given phb4 index matches an index.
1560          */
1561         PnvPhb4PecState *pec = &chip9->pecs[i];
1562 
1563         for (j = 0; j < pec->num_phbs; j++) {
1564             if (index == pnv_phb4_pec_get_phb_id(pec, j)) {
1565                 return pec;
1566             }
1567         }
1568     }
1569 
1570     error_setg(errp,
1571                "pnv-phb4 chip-id %d index %d didn't match any existing PEC",
1572                chip_id, index);
1573 
1574     return NULL;
1575 }
1576 
1577 static void pnv_phb4_realize(DeviceState *dev, Error **errp)
1578 {
1579     PnvPHB4 *phb = PNV_PHB4(dev);
1580     PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
1581     PnvChip *chip = pnv_get_chip(pnv, phb->chip_id);
1582     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
1583     XiveSource *xsrc = &phb->xsrc;
1584     BusState *s;
1585     Error *local_err = NULL;
1586     int nr_irqs;
1587     char name[32];
1588 
1589     if (!chip) {
1590         error_setg(errp, "invalid chip id: %d", phb->chip_id);
1591         return;
1592     }
1593 
1594     /* User created PHBs need to be assigned to a PEC */
1595     if (!phb->pec) {
1596         phb->pec = pnv_phb4_get_pec(chip, phb, &local_err);
1597         if (local_err) {
1598             error_propagate(errp, local_err);
1599             return;
1600         }
1601     }
1602 
1603     /* Reparent the PHB to the chip to build the device tree */
1604     pnv_chip_parent_fixup(chip, OBJECT(phb), phb->phb_id);
1605 
1606     s = qdev_get_parent_bus(DEVICE(chip));
1607     if (!qdev_set_parent_bus(DEVICE(phb), s, &local_err)) {
1608         error_propagate(errp, local_err);
1609         return;
1610     }
1611 
1612     /* Set the "big_phb" flag */
1613     phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
1614 
1615     /* Controller Registers */
1616     snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
1617              phb->phb_id);
1618     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
1619                           name, 0x2000);
1620 
1621     /*
1622      * PHB4 doesn't support IO space. However, qemu gets very upset if
1623      * we don't have an IO region to anchor IO BARs onto so we just
1624      * initialize one which we never hook up to anything
1625      */
1626 
1627     snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
1628              phb->phb_id);
1629     memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
1630 
1631     snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
1632              phb->phb_id);
1633     memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
1634                        PCI_MMIO_TOTAL_SIZE);
1635 
1636     pci->bus = pci_register_root_bus(dev, dev->id,
1637                                      pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
1638                                      &phb->pci_mmio, &phb->pci_io,
1639                                      0, 4, TYPE_PNV_PHB4_ROOT_BUS);
1640     pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
1641     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
1642 
1643     /* Setup XIVE Source */
1644     if (phb->big_phb) {
1645         nr_irqs = PNV_PHB4_MAX_INTs;
1646     } else {
1647         nr_irqs = PNV_PHB4_MAX_INTs >> 1;
1648     }
1649     object_property_set_int(OBJECT(xsrc), "nr-irqs", nr_irqs, &error_fatal);
1650     object_property_set_link(OBJECT(xsrc), "xive", OBJECT(phb), &error_fatal);
1651     if (!qdev_realize(DEVICE(xsrc), NULL, errp)) {
1652         return;
1653     }
1654 
1655     pnv_phb4_update_xsrc(phb);
1656 
1657     phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
1658 
1659     pnv_phb4_xscom_realize(phb);
1660 }
1661 
1662 static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
1663                                           PCIBus *rootbus)
1664 {
1665     PnvPHB4 *phb = PNV_PHB4(host_bridge);
1666 
1667     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1668              phb->chip_id, phb->phb_id);
1669     return phb->bus_path;
1670 }
1671 
1672 /*
1673  * Address base trigger mode (POWER10)
1674  *
1675  * Trigger directly the IC ESB page
1676  */
1677 static void pnv_phb4_xive_notify_abt(PnvPHB4 *phb, uint32_t srcno,
1678                                      bool pq_checked)
1679 {
1680     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1681     uint64_t data = 0; /* trigger data : don't care */
1682     hwaddr addr;
1683     MemTxResult result;
1684     int esb_shift;
1685 
1686     if (notif_port & PHB_INT_NOTIFY_ADDR_64K) {
1687         esb_shift = 16;
1688     } else {
1689         esb_shift = 12;
1690     }
1691 
1692     /* Compute the address of the IC ESB management page */
1693     addr = (notif_port & ~PHB_INT_NOTIFY_ADDR_64K);
1694     addr |= (1ull << (esb_shift + 1)) * srcno;
1695     addr |= (1ull << esb_shift);
1696 
1697     /*
1698      * When the PQ state bits are checked on the PHB, the associated
1699      * PQ state bits on the IC should be ignored. Use the unconditional
1700      * trigger offset to inject a trigger on the IC. This is always
1701      * the case for LSIs
1702      */
1703     if (pq_checked) {
1704         addr |= XIVE_ESB_INJECT;
1705     }
1706 
1707     trace_pnv_phb4_xive_notify_ic(addr, data);
1708 
1709     address_space_stq_be(&address_space_memory, addr, data,
1710                          MEMTXATTRS_UNSPECIFIED, &result);
1711     if (result != MEMTX_OK) {
1712         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", addr);
1713         return;
1714     }
1715 }
1716 
1717 static void pnv_phb4_xive_notify_ic(PnvPHB4 *phb, uint32_t srcno,
1718                                     bool pq_checked)
1719 {
1720     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1721     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1722     uint64_t data = offset | srcno;
1723     MemTxResult result;
1724 
1725     if (pq_checked) {
1726         data |= XIVE_TRIGGER_PQ;
1727     }
1728 
1729     trace_pnv_phb4_xive_notify_ic(notif_port, data);
1730 
1731     address_space_stq_be(&address_space_memory, notif_port, data,
1732                          MEMTXATTRS_UNSPECIFIED, &result);
1733     if (result != MEMTX_OK) {
1734         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
1735         return;
1736     }
1737 }
1738 
1739 static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno,
1740                                  bool pq_checked)
1741 {
1742     PnvPHB4 *phb = PNV_PHB4(xf);
1743 
1744     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_ABT_MODE) {
1745         pnv_phb4_xive_notify_abt(phb, srcno, pq_checked);
1746     } else {
1747         pnv_phb4_xive_notify_ic(phb, srcno, pq_checked);
1748     }
1749 }
1750 
1751 static Property pnv_phb4_properties[] = {
1752         DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
1753         DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
1754         DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC,
1755                          PnvPhb4PecState *),
1756         DEFINE_PROP_END_OF_LIST(),
1757 };
1758 
1759 static void pnv_phb4_class_init(ObjectClass *klass, void *data)
1760 {
1761     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1762     DeviceClass *dc = DEVICE_CLASS(klass);
1763     XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
1764 
1765     hc->root_bus_path   = pnv_phb4_root_bus_path;
1766     dc->realize         = pnv_phb4_realize;
1767     device_class_set_props(dc, pnv_phb4_properties);
1768     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1769     dc->user_creatable  = true;
1770 
1771     xfc->notify         = pnv_phb4_xive_notify;
1772 }
1773 
1774 static const TypeInfo pnv_phb4_type_info = {
1775     .name          = TYPE_PNV_PHB4,
1776     .parent        = TYPE_PCIE_HOST_BRIDGE,
1777     .instance_init = pnv_phb4_instance_init,
1778     .instance_size = sizeof(PnvPHB4),
1779     .class_init    = pnv_phb4_class_init,
1780     .interfaces = (InterfaceInfo[]) {
1781             { TYPE_XIVE_NOTIFIER },
1782             { },
1783     }
1784 };
1785 
1786 static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
1787 {
1788     BusClass *k = BUS_CLASS(klass);
1789 
1790     /*
1791      * PHB4 has only a single root complex. Enforce the limit on the
1792      * parent bus
1793      */
1794     k->max_dev = 1;
1795 }
1796 
1797 static const TypeInfo pnv_phb4_root_bus_info = {
1798     .name = TYPE_PNV_PHB4_ROOT_BUS,
1799     .parent = TYPE_PCIE_BUS,
1800     .class_init = pnv_phb4_root_bus_class_init,
1801     .interfaces = (InterfaceInfo[]) {
1802         { INTERFACE_PCIE_DEVICE },
1803         { }
1804     },
1805 };
1806 
1807 static void pnv_phb4_root_port_reset(DeviceState *dev)
1808 {
1809     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1810     PCIDevice *d = PCI_DEVICE(dev);
1811     uint8_t *conf = d->config;
1812 
1813     rpc->parent_reset(dev);
1814 
1815     pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
1816                                PCI_IO_RANGE_MASK & 0xff);
1817     pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
1818                                  PCI_IO_RANGE_MASK & 0xff);
1819     pci_set_word(conf + PCI_MEMORY_BASE, 0);
1820     pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
1821     pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
1822     pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
1823     pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
1824     pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
1825 }
1826 
1827 static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
1828 {
1829     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1830     PCIDevice *pci = PCI_DEVICE(dev);
1831     PCIBus *bus = pci_get_bus(pci);
1832     PnvPHB4 *phb = NULL;
1833     Error *local_err = NULL;
1834 
1835     phb = (PnvPHB4 *) object_dynamic_cast(OBJECT(bus->qbus.parent),
1836                                           TYPE_PNV_PHB4);
1837 
1838     if (!phb) {
1839         error_setg(errp, "%s must be connected to pnv-phb4 buses", dev->id);
1840         return;
1841     }
1842 
1843     /* Set unique chassis/slot values for the root port */
1844     qdev_prop_set_uint8(&pci->qdev, "chassis", phb->chip_id);
1845     qdev_prop_set_uint16(&pci->qdev, "slot", phb->phb_id);
1846 
1847     rpc->parent_realize(dev, &local_err);
1848     if (local_err) {
1849         error_propagate(errp, local_err);
1850         return;
1851     }
1852 }
1853 
1854 static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
1855 {
1856     DeviceClass *dc = DEVICE_CLASS(klass);
1857     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1858     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1859 
1860     dc->desc     = "IBM PHB4 PCIE Root Port";
1861     dc->user_creatable = true;
1862 
1863     device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
1864                                     &rpc->parent_realize);
1865     device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
1866                                   &rpc->parent_reset);
1867 
1868     k->vendor_id = PCI_VENDOR_ID_IBM;
1869     k->device_id = PNV_PHB4_DEVICE_ID;
1870     k->revision  = 0;
1871 
1872     rpc->exp_offset = 0x48;
1873     rpc->aer_offset = 0x100;
1874 
1875     dc->reset = &pnv_phb4_root_port_reset;
1876 }
1877 
1878 static const TypeInfo pnv_phb4_root_port_info = {
1879     .name          = TYPE_PNV_PHB4_ROOT_PORT,
1880     .parent        = TYPE_PCIE_ROOT_PORT,
1881     .instance_size = sizeof(PnvPHB4RootPort),
1882     .class_init    = pnv_phb4_root_port_class_init,
1883 };
1884 
1885 static void pnv_phb5_root_port_class_init(ObjectClass *klass, void *data)
1886 {
1887     DeviceClass *dc = DEVICE_CLASS(klass);
1888     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1889 
1890     dc->desc     = "IBM PHB5 PCIE Root Port";
1891     dc->user_creatable = true;
1892 
1893     k->vendor_id = PCI_VENDOR_ID_IBM;
1894     k->device_id = PNV_PHB5_DEVICE_ID;
1895 }
1896 
1897 static const TypeInfo pnv_phb5_root_port_info = {
1898     .name          = TYPE_PNV_PHB5_ROOT_PORT,
1899     .parent        = TYPE_PNV_PHB4_ROOT_PORT,
1900     .instance_size = sizeof(PnvPHB4RootPort),
1901     .class_init    = pnv_phb5_root_port_class_init,
1902 };
1903 
1904 static void pnv_phb4_register_types(void)
1905 {
1906     type_register_static(&pnv_phb4_root_bus_info);
1907     type_register_static(&pnv_phb5_root_port_info);
1908     type_register_static(&pnv_phb4_root_port_info);
1909     type_register_static(&pnv_phb4_type_info);
1910     type_register_static(&pnv_phb4_iommu_memory_region_info);
1911 }
1912 
1913 type_init(pnv_phb4_register_types);
1914 
1915 void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
1916 {
1917     uint64_t notif_port =
1918         phb->regs[PHB_INT_NOTIFY_ADDR >> 3] & ~PHB_INT_NOTIFY_ADDR_64K;
1919     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1920     bool abt = !!(phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_ABT_MODE);
1921 
1922     monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x %s @%"HWADDR_PRIx"\n",
1923                    phb->chip_id, phb->phb_id,
1924                    offset, offset + phb->xsrc.nr_irqs - 1,
1925                    abt ? "ABT" : "",
1926                    notif_port);
1927     xive_source_pic_print_info(&phb->xsrc, 0, mon);
1928 }
1929