xref: /openbmc/qemu/hw/pci-host/pnv_phb3.c (revision 19f70347)
1 /*
2  * QEMU PowerPC PowerNV (POWER8) PHB3 model
3  *
4  * Copyright (c) 2014-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "qemu-common.h"
14 #include "hw/pci-host/pnv_phb3_regs.h"
15 #include "hw/pci-host/pnv_phb3.h"
16 #include "hw/pci/pcie_host.h"
17 #include "hw/pci/pcie_port.h"
18 #include "hw/ppc/pnv.h"
19 #include "hw/irq.h"
20 #include "hw/qdev-properties.h"
21 
22 #define phb3_error(phb, fmt, ...)                                       \
23     qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n",            \
24                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
25 
26 static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb)
27 {
28     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
29     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
30     uint8_t bus, devfn;
31 
32     if (!(addr >> 63)) {
33         return NULL;
34     }
35     bus = (addr >> 52) & 0xff;
36     devfn = (addr >> 44) & 0xff;
37 
38     return pci_find_device(pci->bus, bus, devfn);
39 }
40 
41 /*
42  * The CONFIG_DATA register expects little endian accesses, but as the
43  * region is big endian, we have to swap the value.
44  */
45 static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off,
46                                   unsigned size, uint64_t val)
47 {
48     uint32_t cfg_addr, limit;
49     PCIDevice *pdev;
50 
51     pdev = pnv_phb3_find_cfg_dev(phb);
52     if (!pdev) {
53         return;
54     }
55     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
56     cfg_addr |= off;
57     limit = pci_config_size(pdev);
58     if (limit <= cfg_addr) {
59         /*
60          * conventional pci device can be behind pcie-to-pci bridge.
61          * 256 <= addr < 4K has no effects.
62          */
63         return;
64     }
65     switch (size) {
66     case 1:
67         break;
68     case 2:
69         val = bswap16(val);
70         break;
71     case 4:
72         val = bswap32(val);
73         break;
74     default:
75         g_assert_not_reached();
76     }
77     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
78 }
79 
80 static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off,
81                                      unsigned size)
82 {
83     uint32_t cfg_addr, limit;
84     PCIDevice *pdev;
85     uint64_t val;
86 
87     pdev = pnv_phb3_find_cfg_dev(phb);
88     if (!pdev) {
89         return ~0ull;
90     }
91     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
92     cfg_addr |= off;
93     limit = pci_config_size(pdev);
94     if (limit <= cfg_addr) {
95         /*
96          * conventional pci device can be behind pcie-to-pci bridge.
97          * 256 <= addr < 4K has no effects.
98          */
99         return ~0ull;
100     }
101     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
102     switch (size) {
103     case 1:
104         return val;
105     case 2:
106         return bswap16(val);
107     case 4:
108         return bswap32(val);
109     default:
110         g_assert_not_reached();
111     }
112 }
113 
114 static void pnv_phb3_check_m32(PnvPHB3 *phb)
115 {
116     uint64_t base, start, size;
117     MemoryRegion *parent;
118     PnvPBCQState *pbcq = &phb->pbcq;
119 
120     if (memory_region_is_mapped(&phb->mr_m32)) {
121         memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32);
122     }
123 
124     if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) {
125         return;
126     }
127 
128     /* Grab geometry from registers */
129     base = phb->regs[PHB_M32_BASE_ADDR >> 3];
130     start = phb->regs[PHB_M32_START_ADDR >> 3];
131     size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1;
132 
133     /* Check if it matches an enabled MMIO region in the PBCQ */
134     if (memory_region_is_mapped(&pbcq->mmbar0) &&
135         base >= pbcq->mmio0_base &&
136         (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
137         parent = &pbcq->mmbar0;
138         base -= pbcq->mmio0_base;
139     } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
140                base >= pbcq->mmio1_base &&
141                (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
142         parent = &pbcq->mmbar1;
143         base -= pbcq->mmio1_base;
144     } else {
145         return;
146     }
147 
148     /* Create alias */
149     memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32",
150                              &phb->pci_mmio, start, size);
151     memory_region_add_subregion(parent, base, &phb->mr_m32);
152 }
153 
154 static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index)
155 {
156     uint64_t base, start, size, m64;
157     MemoryRegion *parent;
158     PnvPBCQState *pbcq = &phb->pbcq;
159 
160     if (memory_region_is_mapped(&phb->mr_m64[index])) {
161         /* Should we destroy it in RCU friendly way... ? */
162         memory_region_del_subregion(phb->mr_m64[index].container,
163                                     &phb->mr_m64[index]);
164     }
165 
166     /* Get table entry */
167     m64 = phb->ioda_M64BT[index];
168 
169     if (!(m64 & IODA2_M64BT_ENABLE)) {
170         return;
171     }
172 
173     /* Grab geometry from registers */
174     base = GETFIELD(IODA2_M64BT_BASE, m64) << 20;
175     if (m64 & IODA2_M64BT_SINGLE_PE) {
176         base &= ~0x1ffffffull;
177     }
178     size = GETFIELD(IODA2_M64BT_MASK, m64) << 20;
179     size |= 0xfffc000000000000ull;
180     size = ~size + 1;
181     start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
182 
183     /* Check if it matches an enabled MMIO region in the PBCQ */
184     if (memory_region_is_mapped(&pbcq->mmbar0) &&
185         base >= pbcq->mmio0_base &&
186         (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
187         parent = &pbcq->mmbar0;
188         base -= pbcq->mmio0_base;
189     } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
190                base >= pbcq->mmio1_base &&
191                (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
192         parent = &pbcq->mmbar1;
193         base -= pbcq->mmio1_base;
194     } else {
195         return;
196     }
197 
198     /* Create alias */
199     memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64",
200                              &phb->pci_mmio, start, size);
201     memory_region_add_subregion(parent, base, &phb->mr_m64[index]);
202 }
203 
204 static void pnv_phb3_check_all_m64s(PnvPHB3 *phb)
205 {
206     uint64_t i;
207 
208     for (i = 0; i < PNV_PHB3_NUM_M64; i++) {
209         pnv_phb3_check_m64(phb, i);
210     }
211 }
212 
213 static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val)
214 {
215     uint8_t server, prio;
216 
217     phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER |
218                                   IODA2_LXIVT_PRIORITY |
219                                   IODA2_LXIVT_NODE_ID);
220     server = GETFIELD(IODA2_LXIVT_SERVER, val);
221     prio = GETFIELD(IODA2_LXIVT_PRIORITY, val);
222 
223     /*
224      * The low order 2 bits are the link pointer (Type II interrupts).
225      * Shift back to get a valid IRQ server.
226      */
227     server >>= 2;
228 
229     ics_write_xive(&phb->lsis, idx, server, prio, prio);
230 }
231 
232 static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb,
233                                       unsigned *out_table, unsigned *out_idx)
234 {
235     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
236     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
237     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
238     unsigned int mask;
239     uint64_t *tptr = NULL;
240 
241     switch (table) {
242     case IODA2_TBL_LIST:
243         tptr = phb->ioda_LIST;
244         mask = 7;
245         break;
246     case IODA2_TBL_LXIVT:
247         tptr = phb->ioda_LXIVT;
248         mask = 7;
249         break;
250     case IODA2_TBL_IVC_CAM:
251     case IODA2_TBL_RBA:
252         mask = 31;
253         break;
254     case IODA2_TBL_RCAM:
255         mask = 63;
256         break;
257     case IODA2_TBL_MRT:
258         mask = 7;
259         break;
260     case IODA2_TBL_PESTA:
261     case IODA2_TBL_PESTB:
262         mask = 255;
263         break;
264     case IODA2_TBL_TVT:
265         tptr = phb->ioda_TVT;
266         mask = 511;
267         break;
268     case IODA2_TBL_TCAM:
269     case IODA2_TBL_TDR:
270         mask = 63;
271         break;
272     case IODA2_TBL_M64BT:
273         tptr = phb->ioda_M64BT;
274         mask = 15;
275         break;
276     case IODA2_TBL_M32DT:
277         tptr = phb->ioda_MDT;
278         mask = 255;
279         break;
280     case IODA2_TBL_PEEV:
281         tptr = phb->ioda_PEEV;
282         mask = 3;
283         break;
284     default:
285         phb3_error(phb, "invalid IODA table %d", table);
286         return NULL;
287     }
288     index &= mask;
289     if (out_idx) {
290         *out_idx = index;
291     }
292     if (out_table) {
293         *out_table = table;
294     }
295     if (tptr) {
296         tptr += index;
297     }
298     if (adreg & PHB_IODA_AD_AUTOINC) {
299         index = (index + 1) & mask;
300         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
301     }
302     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
303     return tptr;
304 }
305 
306 static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb)
307 {
308         unsigned table;
309         uint64_t *tptr;
310 
311         tptr = pnv_phb3_ioda_access(phb, &table, NULL);
312         if (!tptr) {
313             /* Return 0 on unsupported tables, not ff's */
314             return 0;
315         }
316         return *tptr;
317 }
318 
319 static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val)
320 {
321         unsigned table, idx;
322         uint64_t *tptr;
323 
324         tptr = pnv_phb3_ioda_access(phb, &table, &idx);
325         if (!tptr) {
326             return;
327         }
328 
329         /* Handle side effects */
330         switch (table) {
331         case IODA2_TBL_LXIVT:
332             pnv_phb3_lxivt_write(phb, idx, val);
333             break;
334         case IODA2_TBL_M64BT:
335             *tptr = val;
336             pnv_phb3_check_m64(phb, idx);
337             break;
338         default:
339             *tptr = val;
340         }
341 }
342 
343 /*
344  * This is called whenever the PHB LSI, MSI source ID register or
345  * the PBCQ irq filters are written.
346  */
347 void pnv_phb3_remap_irqs(PnvPHB3 *phb)
348 {
349     ICSState *ics = &phb->lsis;
350     uint32_t local, global, count, mask, comp;
351     uint64_t baren;
352     PnvPBCQState *pbcq = &phb->pbcq;
353 
354     /*
355      * First check if we are enabled. Unlike real HW we don't separate
356      * TX and RX so we enable if both are set
357      */
358     baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
359     if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
360         !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
361         ics->offset = 0;
362         return;
363     }
364 
365     /* Grab local LSI source ID */
366     local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
367 
368     /* Grab global one and compare */
369     global = GETFIELD(PBCQ_NEST_LSI_SRC,
370                       pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
371     if (global != local) {
372         /*
373          * This happens during initialization, let's come back when we
374          * are properly configured
375          */
376         ics->offset = 0;
377         return;
378     }
379 
380     /* Get the base on the powerbus */
381     comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
382                     pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
383     mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
384                     pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
385     count = ((~mask) + 1) & 0x7ffff;
386     phb->total_irq = count;
387 
388     /* Sanity checks */
389     if ((global + PNV_PHB3_NUM_LSI) > count) {
390         phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global,
391                    count);
392     }
393 
394     if (count > 2048) {
395         phb3_error(phb, "More interrupts than supported: %d", count);
396     }
397 
398     if ((comp & mask) != comp) {
399         phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
400                    comp, mask);
401         comp &= mask;
402     }
403     /* Setup LSI offset */
404     ics->offset = comp + global;
405 
406     /* Setup MSI offset */
407     pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI);
408 }
409 
410 static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val)
411 {
412     /* Sanitize content */
413     val &= PHB_LSI_SRC_ID;
414     phb->regs[PHB_LSI_SOURCE_ID >> 3] = val;
415     pnv_phb3_remap_irqs(phb);
416 }
417 
418 static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val)
419 {
420     PnvPhb3DMASpace *ds;
421 
422     /* Always invalidate all for now ... */
423     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
424         ds->pe_num = PHB_INVALID_PE;
425     }
426 }
427 
428 
429 static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds)
430 {
431     uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
432 
433     if (cfg & PHB_PHB3C_32BIT_MSI_EN) {
434         if (!memory_region_is_mapped(&ds->msi32_mr)) {
435             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
436                                         0xffff0000, &ds->msi32_mr);
437         }
438     } else {
439         if (memory_region_is_mapped(&ds->msi32_mr)) {
440             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
441                                         &ds->msi32_mr);
442         }
443     }
444 
445     if (cfg & PHB_PHB3C_64BIT_MSI_EN) {
446         if (!memory_region_is_mapped(&ds->msi64_mr)) {
447             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
448                                         (1ull << 60), &ds->msi64_mr);
449         }
450     } else {
451         if (memory_region_is_mapped(&ds->msi64_mr)) {
452             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
453                                         &ds->msi64_mr);
454         }
455     }
456 }
457 
458 static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb)
459 {
460     PnvPhb3DMASpace *ds;
461 
462     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
463         pnv_phb3_update_msi_regions(ds);
464     }
465 }
466 
467 void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)
468 {
469     PnvPHB3 *phb = opaque;
470     bool changed;
471 
472     /* Special case configuration data */
473     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
474         pnv_phb3_config_write(phb, off & 0x3, size, val);
475         return;
476     }
477 
478     /* Other registers are 64-bit only */
479     if (size != 8 || off & 0x7) {
480         phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
481                    off, size);
482         return;
483     }
484 
485     /* Handle masking & filtering */
486     switch (off) {
487     case PHB_M64_UPPER_BITS:
488         val &= 0xfffc000000000000ull;
489         break;
490     case PHB_Q_DMA_R:
491         /*
492          * This is enough logic to make SW happy but we aren't actually
493          * quiescing the DMAs
494          */
495         if (val & PHB_Q_DMA_R_AUTORESET) {
496             val = 0;
497         } else {
498             val &= PHB_Q_DMA_R_QUIESCE_DMA;
499         }
500         break;
501     /* LEM stuff */
502     case PHB_LEM_FIR_AND_MASK:
503         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
504         return;
505     case PHB_LEM_FIR_OR_MASK:
506         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
507         return;
508     case PHB_LEM_ERROR_AND_MASK:
509         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
510         return;
511     case PHB_LEM_ERROR_OR_MASK:
512         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
513         return;
514     case PHB_LEM_WOF:
515         val = 0;
516         break;
517     }
518 
519     /* Record whether it changed */
520     changed = phb->regs[off >> 3] != val;
521 
522     /* Store in register cache first */
523     phb->regs[off >> 3] = val;
524 
525     /* Handle side effects */
526     switch (off) {
527     case PHB_PHB3_CONFIG:
528         if (changed) {
529             pnv_phb3_update_all_msi_regions(phb);
530         }
531         /* fall through */
532     case PHB_M32_BASE_ADDR:
533     case PHB_M32_BASE_MASK:
534     case PHB_M32_START_ADDR:
535         if (changed) {
536             pnv_phb3_check_m32(phb);
537         }
538         break;
539     case PHB_M64_UPPER_BITS:
540         if (changed) {
541             pnv_phb3_check_all_m64s(phb);
542         }
543         break;
544     case PHB_LSI_SOURCE_ID:
545         if (changed) {
546             pnv_phb3_lsi_src_id_write(phb, val);
547         }
548         break;
549 
550     /* IODA table accesses */
551     case PHB_IODA_DATA0:
552         pnv_phb3_ioda_write(phb, val);
553         break;
554 
555     /* RTC invalidation */
556     case PHB_RTC_INVALIDATE:
557         pnv_phb3_rtc_invalidate(phb, val);
558         break;
559 
560     /* FFI request */
561     case PHB_FFI_REQUEST:
562         pnv_phb3_msi_ffi(&phb->msis, val);
563         break;
564 
565     /* Silent simple writes */
566     case PHB_CONFIG_ADDRESS:
567     case PHB_IODA_ADDR:
568     case PHB_TCE_KILL:
569     case PHB_TCE_SPEC_CTL:
570     case PHB_PEST_BAR:
571     case PHB_PELTV_BAR:
572     case PHB_RTT_BAR:
573     case PHB_RBA_BAR:
574     case PHB_IVT_BAR:
575     case PHB_FFI_LOCK:
576     case PHB_LEM_FIR_ACCUM:
577     case PHB_LEM_ERROR_MASK:
578     case PHB_LEM_ACTION0:
579     case PHB_LEM_ACTION1:
580         break;
581 
582     /* Noise on anything else */
583     default:
584         qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n",
585                       off, val);
586     }
587 }
588 
589 uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
590 {
591     PnvPHB3 *phb = opaque;
592     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
593     uint64_t val;
594 
595     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
596         return pnv_phb3_config_read(phb, off & 0x3, size);
597     }
598 
599     /* Other registers are 64-bit only */
600     if (size != 8 || off & 0x7) {
601         phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
602                    off, size);
603         return ~0ull;
604     }
605 
606     /* Default read from cache */
607     val = phb->regs[off >> 3];
608 
609     switch (off) {
610     /* Simulate venice DD2.0 */
611     case PHB_VERSION:
612         return 0x000000a300000005ull;
613     case PHB_PCIE_SYSTEM_CONFIG:
614         return 0x441100fc30000000;
615 
616     /* IODA table accesses */
617     case PHB_IODA_DATA0:
618         return pnv_phb3_ioda_read(phb);
619 
620     /* Link training always appears trained */
621     case PHB_PCIE_DLP_TRAIN_CTL:
622         if (!pci_find_device(pci->bus, 1, 0)) {
623             return 0;
624         }
625         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT;
626 
627     /* FFI Lock */
628     case PHB_FFI_LOCK:
629         /* Set lock and return previous value */
630         phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE;
631         return val;
632 
633     /* DMA read sync: make it look like it's complete */
634     case PHB_DMARD_SYNC:
635         return PHB_DMARD_SYNC_COMPLETE;
636 
637     /* Silent simple reads */
638     case PHB_PHB3_CONFIG:
639     case PHB_M32_BASE_ADDR:
640     case PHB_M32_BASE_MASK:
641     case PHB_M32_START_ADDR:
642     case PHB_CONFIG_ADDRESS:
643     case PHB_IODA_ADDR:
644     case PHB_RTC_INVALIDATE:
645     case PHB_TCE_KILL:
646     case PHB_TCE_SPEC_CTL:
647     case PHB_PEST_BAR:
648     case PHB_PELTV_BAR:
649     case PHB_RTT_BAR:
650     case PHB_RBA_BAR:
651     case PHB_IVT_BAR:
652     case PHB_M64_UPPER_BITS:
653     case PHB_LEM_FIR_ACCUM:
654     case PHB_LEM_ERROR_MASK:
655     case PHB_LEM_ACTION0:
656     case PHB_LEM_ACTION1:
657         break;
658 
659     /* Noise on anything else */
660     default:
661         qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n",
662                       off, val);
663     }
664     return val;
665 }
666 
667 static const MemoryRegionOps pnv_phb3_reg_ops = {
668     .read = pnv_phb3_reg_read,
669     .write = pnv_phb3_reg_write,
670     .valid.min_access_size = 1,
671     .valid.max_access_size = 8,
672     .impl.min_access_size = 1,
673     .impl.max_access_size = 8,
674     .endianness = DEVICE_BIG_ENDIAN,
675 };
676 
677 static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num)
678 {
679     /* Check that out properly ... */
680     return irq_num & 3;
681 }
682 
683 static void pnv_phb3_set_irq(void *opaque, int irq_num, int level)
684 {
685     PnvPHB3 *phb = opaque;
686 
687     /* LSI only ... */
688     if (irq_num > 3) {
689         phb3_error(phb, "Unknown IRQ to set %d", irq_num);
690     }
691     qemu_set_irq(phb->qirqs[irq_num], level);
692 }
693 
694 static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds)
695 {
696     uint64_t rtt, addr;
697     uint16_t rte;
698     int bus_num;
699 
700     /* Already resolved ? */
701     if (ds->pe_num != PHB_INVALID_PE) {
702         return true;
703     }
704 
705     /* We need to lookup the RTT */
706     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
707     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
708         phb3_error(ds->phb, "DMA with RTT BAR disabled !");
709         /* Set error bits ? fence ? ... */
710         return false;
711     }
712 
713     /* Read RTE */
714     bus_num = pci_bus_num(ds->bus);
715     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
716     addr += 2 * ((bus_num << 8) | ds->devfn);
717     if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
718         phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
719         /* Set error bits ? fence ? ... */
720         return false;
721     }
722     rte = be16_to_cpu(rte);
723 
724     /* Fail upon reading of invalid PE# */
725     if (rte >= PNV_PHB3_NUM_PE) {
726         phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
727         /* Set error bits ? fence ? ... */
728         return false;
729     }
730     ds->pe_num = rte;
731     return true;
732 }
733 
734 static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
735                                    bool is_write, uint64_t tve,
736                                    IOMMUTLBEntry *tlb)
737 {
738     uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve);
739     int32_t  lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve);
740     uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve);
741     uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve);
742     PnvPHB3 *phb = ds->phb;
743 
744     /* Invalid levels */
745     if (lev > 4) {
746         phb3_error(phb, "Invalid #levels in TVE %d", lev);
747         return;
748     }
749 
750     /* IO Page Size of 0 means untranslated, else use TCEs */
751     if (tps == 0) {
752         /*
753          * We only support non-translate in top window.
754          *
755          * TODO: Venice/Murano support it on bottom window above 4G and
756          * Naples suports it on everything
757          */
758         if (!(tve & PPC_BIT(51))) {
759             phb3_error(phb, "xlate for invalid non-translate TVE");
760             return;
761         }
762         /* TODO: Handle boundaries */
763 
764         /* Use 4k pages like q35 ... for now */
765         tlb->iova = addr & 0xfffffffffffff000ull;
766         tlb->translated_addr = addr & 0x0003fffffffff000ull;
767         tlb->addr_mask = 0xfffull;
768         tlb->perm = IOMMU_RW;
769     } else {
770         uint32_t tce_shift, tbl_shift, sh;
771         uint64_t base, taddr, tce, tce_mask;
772 
773         /* TVE disabled ? */
774         if (tts == 0) {
775             phb3_error(phb, "xlate for invalid translated TVE");
776             return;
777         }
778 
779         /* Address bits per bottom level TCE entry */
780         tce_shift = tps + 11;
781 
782         /* Address bits per table level */
783         tbl_shift = tts + 8;
784 
785         /* Top level table base address */
786         base = tta << 12;
787 
788         /* Total shift to first level */
789         sh = tbl_shift * lev + tce_shift;
790 
791         /* TODO: Multi-level untested */
792         while ((lev--) >= 0) {
793             /* Grab the TCE address */
794             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
795             if (dma_memory_read(&address_space_memory, taddr, &tce,
796                                 sizeof(tce))) {
797                 phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr);
798                 return;
799             }
800             tce = be64_to_cpu(tce);
801 
802             /* Check permission for indirect TCE */
803             if ((lev >= 0) && !(tce & 3)) {
804                 phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
805                 phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
806                            is_write ? 'W' : 'R', tve);
807                 phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
808                            tta, lev, tts, tps);
809                 return;
810             }
811             sh -= tbl_shift;
812             base = tce & ~0xfffull;
813         }
814 
815         /* We exit the loop with TCE being the final TCE */
816         tce_mask = ~((1ull << tce_shift) - 1);
817         tlb->iova = addr & tce_mask;
818         tlb->translated_addr = tce & tce_mask;
819         tlb->addr_mask = ~tce_mask;
820         tlb->perm = tce & 3;
821         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
822             phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr);
823             phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
824                        is_write ? 'W' : 'R', tve);
825             phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
826                        tta, lev, tts, tps);
827         }
828     }
829 }
830 
831 static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu,
832                                               hwaddr addr,
833                                               IOMMUAccessFlags flag,
834                                               int iommu_idx)
835 {
836     PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr);
837     int tve_sel;
838     uint64_t tve, cfg;
839     IOMMUTLBEntry ret = {
840         .target_as = &address_space_memory,
841         .iova = addr,
842         .translated_addr = 0,
843         .addr_mask = ~(hwaddr)0,
844         .perm = IOMMU_NONE,
845     };
846     PnvPHB3 *phb = ds->phb;
847 
848     /* Resolve PE# */
849     if (!pnv_phb3_resolve_pe(ds)) {
850         phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
851                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
852         return ret;
853     }
854 
855     /* Check top bits */
856     switch (addr >> 60) {
857     case 00:
858         /* DMA or 32-bit MSI ? */
859         cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
860         if ((cfg & PHB_PHB3C_32BIT_MSI_EN) &&
861             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
862             phb3_error(phb, "xlate on 32-bit MSI region");
863             return ret;
864         }
865         /* Choose TVE XXX Use PHB3 Control Register */
866         tve_sel = (addr >> 59) & 1;
867         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
868         pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
869         break;
870     case 01:
871         phb3_error(phb, "xlate on 64-bit MSI region");
872         break;
873     default:
874         phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr);
875     }
876     return ret;
877 }
878 
879 #define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region"
880 #define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \
881     OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION)
882 
883 static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass,
884                                                     void *data)
885 {
886     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
887 
888     imrc->translate = pnv_phb3_translate_iommu;
889 }
890 
891 static const TypeInfo pnv_phb3_iommu_memory_region_info = {
892     .parent = TYPE_IOMMU_MEMORY_REGION,
893     .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
894     .class_init = pnv_phb3_iommu_memory_region_class_init,
895 };
896 
897 /*
898  * MSI/MSIX memory region implementation.
899  * The handler handles both MSI and MSIX.
900  */
901 static void pnv_phb3_msi_write(void *opaque, hwaddr addr,
902                                uint64_t data, unsigned size)
903 {
904     PnvPhb3DMASpace *ds = opaque;
905 
906     /* Resolve PE# */
907     if (!pnv_phb3_resolve_pe(ds)) {
908         phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
909                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
910         return;
911     }
912 
913     pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num);
914 }
915 
916 /* There is no .read as the read result is undefined by PCI spec */
917 static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size)
918 {
919     PnvPhb3DMASpace *ds = opaque;
920 
921     phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr);
922     return -1;
923 }
924 
925 static const MemoryRegionOps pnv_phb3_msi_ops = {
926     .read = pnv_phb3_msi_read,
927     .write = pnv_phb3_msi_write,
928     .endianness = DEVICE_LITTLE_ENDIAN
929 };
930 
931 static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
932 {
933     PnvPHB3 *phb = opaque;
934     PnvPhb3DMASpace *ds;
935 
936     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
937         if (ds->bus == bus && ds->devfn == devfn) {
938             break;
939         }
940     }
941 
942     if (ds == NULL) {
943         ds = g_malloc0(sizeof(PnvPhb3DMASpace));
944         ds->bus = bus;
945         ds->devfn = devfn;
946         ds->pe_num = PHB_INVALID_PE;
947         ds->phb = phb;
948         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
949                                  TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
950                                  OBJECT(phb), "phb3_iommu", UINT64_MAX);
951         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
952                            "phb3_iommu");
953         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops,
954                               ds, "msi32", 0x10000);
955         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops,
956                               ds, "msi64", 0x100000);
957         pnv_phb3_update_msi_regions(ds);
958 
959         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
960     }
961     return &ds->dma_as;
962 }
963 
964 static void pnv_phb3_instance_init(Object *obj)
965 {
966     PnvPHB3 *phb = PNV_PHB3(obj);
967 
968     QLIST_INIT(&phb->dma_spaces);
969 
970     /* LSI sources */
971     object_initialize_child(obj, "lsi", &phb->lsis, sizeof(phb->lsis),
972                              TYPE_ICS, &error_abort, NULL);
973 
974     /* Default init ... will be fixed by HW inits */
975     phb->lsis.offset = 0;
976 
977     /* MSI sources */
978     object_initialize_child(obj, "msi", &phb->msis, sizeof(phb->msis),
979                             TYPE_PHB3_MSI, &error_abort, NULL);
980 
981     /* Power Bus Common Queue */
982     object_initialize_child(obj, "pbcq", &phb->pbcq, sizeof(phb->pbcq),
983                             TYPE_PNV_PBCQ, &error_abort, NULL);
984 
985     /* Root Port */
986     object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
987                             TYPE_PNV_PHB3_ROOT_PORT, &error_abort, NULL);
988     qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
989     qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
990 }
991 
992 static void pnv_phb3_realize(DeviceState *dev, Error **errp)
993 {
994     PnvPHB3 *phb = PNV_PHB3(dev);
995     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
996     PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
997     Error *local_err = NULL;
998     int i;
999 
1000     if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) {
1001         error_setg(errp, "invalid PHB index: %d", phb->phb_id);
1002         return;
1003     }
1004 
1005     /* LSI sources */
1006     object_property_set_link(OBJECT(&phb->lsis), OBJECT(pnv), "xics",
1007                                    &error_abort);
1008     object_property_set_int(OBJECT(&phb->lsis), PNV_PHB3_NUM_LSI, "nr-irqs",
1009                             &error_abort);
1010     object_property_set_bool(OBJECT(&phb->lsis), true, "realized", &local_err);
1011     if (local_err) {
1012         error_propagate(errp, local_err);
1013         return;
1014     }
1015 
1016     for (i = 0; i < phb->lsis.nr_irqs; i++) {
1017         ics_set_irq_type(&phb->lsis, i, true);
1018     }
1019 
1020     phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs);
1021 
1022     /* MSI sources */
1023     object_property_set_link(OBJECT(&phb->msis), OBJECT(phb), "phb",
1024                                    &error_abort);
1025     object_property_set_link(OBJECT(&phb->msis), OBJECT(pnv), "xics",
1026                                    &error_abort);
1027     object_property_set_int(OBJECT(&phb->msis), PHB3_MAX_MSI, "nr-irqs",
1028                             &error_abort);
1029     object_property_set_bool(OBJECT(&phb->msis), true, "realized", &local_err);
1030     if (local_err) {
1031         error_propagate(errp, local_err);
1032         return;
1033     }
1034 
1035     /* Power Bus Common Queue */
1036     object_property_set_link(OBJECT(&phb->pbcq), OBJECT(phb), "phb",
1037                                    &error_abort);
1038     object_property_set_bool(OBJECT(&phb->pbcq), true, "realized", &local_err);
1039     if (local_err) {
1040         error_propagate(errp, local_err);
1041         return;
1042     }
1043 
1044     /* Controller Registers */
1045     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb,
1046                           "phb3-regs", 0x1000);
1047 
1048     /*
1049      * PHB3 doesn't support IO space. However, qemu gets very upset if
1050      * we don't have an IO region to anchor IO BARs onto so we just
1051      * initialize one which we never hook up to anything
1052      */
1053     memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000);
1054     memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio",
1055                        PCI_MMIO_TOTAL_SIZE);
1056 
1057     pci->bus = pci_register_root_bus(dev, "root-bus",
1058                                      pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
1059                                      &phb->pci_mmio, &phb->pci_io,
1060                                      0, 4, TYPE_PNV_PHB3_ROOT_BUS);
1061 
1062     pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
1063 
1064     /* Add a single Root port */
1065     qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
1066     qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
1067     qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
1068     qdev_init_nofail(DEVICE(&phb->root));
1069 }
1070 
1071 void pnv_phb3_update_regions(PnvPHB3 *phb)
1072 {
1073     PnvPBCQState *pbcq = &phb->pbcq;
1074 
1075     /* Unmap first always */
1076     if (memory_region_is_mapped(&phb->mr_regs)) {
1077         memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs);
1078     }
1079 
1080     /* Map registers if enabled */
1081     if (memory_region_is_mapped(&pbcq->phbbar)) {
1082         /* TODO: We should use the PHB BAR 2 register but we don't ... */
1083         memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs);
1084     }
1085 
1086     /* Check/update m32 */
1087     if (memory_region_is_mapped(&phb->mr_m32)) {
1088         pnv_phb3_check_m32(phb);
1089     }
1090     pnv_phb3_check_all_m64s(phb);
1091 }
1092 
1093 static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,
1094                                           PCIBus *rootbus)
1095 {
1096     PnvPHB3 *phb = PNV_PHB3(host_bridge);
1097 
1098     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1099              phb->chip_id, phb->phb_id);
1100     return phb->bus_path;
1101 }
1102 
1103 static Property pnv_phb3_properties[] = {
1104         DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
1105         DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
1106         DEFINE_PROP_END_OF_LIST(),
1107 };
1108 
1109 static void pnv_phb3_class_init(ObjectClass *klass, void *data)
1110 {
1111     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1112     DeviceClass *dc = DEVICE_CLASS(klass);
1113 
1114     hc->root_bus_path = pnv_phb3_root_bus_path;
1115     dc->realize = pnv_phb3_realize;
1116     device_class_set_props(dc, pnv_phb3_properties);
1117     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1118     dc->user_creatable = false;
1119 }
1120 
1121 static const TypeInfo pnv_phb3_type_info = {
1122     .name          = TYPE_PNV_PHB3,
1123     .parent        = TYPE_PCIE_HOST_BRIDGE,
1124     .instance_size = sizeof(PnvPHB3),
1125     .class_init    = pnv_phb3_class_init,
1126     .instance_init = pnv_phb3_instance_init,
1127 };
1128 
1129 static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data)
1130 {
1131     BusClass *k = BUS_CLASS(klass);
1132 
1133     /*
1134      * PHB3 has only a single root complex. Enforce the limit on the
1135      * parent bus
1136      */
1137     k->max_dev = 1;
1138 }
1139 
1140 static const TypeInfo pnv_phb3_root_bus_info = {
1141     .name = TYPE_PNV_PHB3_ROOT_BUS,
1142     .parent = TYPE_PCIE_BUS,
1143     .class_init = pnv_phb3_root_bus_class_init,
1144     .interfaces = (InterfaceInfo[]) {
1145         { INTERFACE_PCIE_DEVICE },
1146         { }
1147     },
1148 };
1149 
1150 static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp)
1151 {
1152     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1153     Error *local_err = NULL;
1154 
1155     rpc->parent_realize(dev, &local_err);
1156     if (local_err) {
1157         error_propagate(errp, local_err);
1158         return;
1159     }
1160 }
1161 
1162 static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data)
1163 {
1164     DeviceClass *dc = DEVICE_CLASS(klass);
1165     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1166     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1167 
1168     dc->desc     = "IBM PHB3 PCIE Root Port";
1169 
1170     device_class_set_parent_realize(dc, pnv_phb3_root_port_realize,
1171                                     &rpc->parent_realize);
1172     dc->user_creatable = false;
1173 
1174     k->vendor_id = PCI_VENDOR_ID_IBM;
1175     k->device_id = 0x03dc;
1176     k->revision  = 0;
1177 
1178     rpc->exp_offset = 0x48;
1179     rpc->aer_offset = 0x100;
1180 }
1181 
1182 static const TypeInfo pnv_phb3_root_port_info = {
1183     .name          = TYPE_PNV_PHB3_ROOT_PORT,
1184     .parent        = TYPE_PCIE_ROOT_PORT,
1185     .instance_size = sizeof(PnvPHB3RootPort),
1186     .class_init    = pnv_phb3_root_port_class_init,
1187 };
1188 
1189 static void pnv_phb3_register_types(void)
1190 {
1191     type_register_static(&pnv_phb3_root_bus_info);
1192     type_register_static(&pnv_phb3_root_port_info);
1193     type_register_static(&pnv_phb3_type_info);
1194     type_register_static(&pnv_phb3_iommu_memory_region_info);
1195 }
1196 
1197 type_init(pnv_phb3_register_types)
1198