xref: /openbmc/qemu/hw/pci-host/pnv_phb3.c (revision f7160f32)
1 /*
2  * QEMU PowerPC PowerNV (POWER8) PHB3 model
3  *
4  * Copyright (c) 2014-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "qemu-common.h"
14 #include "hw/pci-host/pnv_phb3_regs.h"
15 #include "hw/pci-host/pnv_phb3.h"
16 #include "hw/pci/pcie_host.h"
17 #include "hw/pci/pcie_port.h"
18 #include "hw/ppc/pnv.h"
19 #include "hw/irq.h"
20 #include "hw/qdev-properties.h"
21 
22 #define phb3_error(phb, fmt, ...)                                       \
23     qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n",            \
24                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
25 
26 static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb)
27 {
28     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
29     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
30     uint8_t bus, devfn;
31 
32     if (!(addr >> 63)) {
33         return NULL;
34     }
35     bus = (addr >> 52) & 0xff;
36     devfn = (addr >> 44) & 0xff;
37 
38     return pci_find_device(pci->bus, bus, devfn);
39 }
40 
41 /*
42  * The CONFIG_DATA register expects little endian accesses, but as the
43  * region is big endian, we have to swap the value.
44  */
45 static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off,
46                                   unsigned size, uint64_t val)
47 {
48     uint32_t cfg_addr, limit;
49     PCIDevice *pdev;
50 
51     pdev = pnv_phb3_find_cfg_dev(phb);
52     if (!pdev) {
53         return;
54     }
55     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
56     cfg_addr |= off;
57     limit = pci_config_size(pdev);
58     if (limit <= cfg_addr) {
59         /*
60          * conventional pci device can be behind pcie-to-pci bridge.
61          * 256 <= addr < 4K has no effects.
62          */
63         return;
64     }
65     switch (size) {
66     case 1:
67         break;
68     case 2:
69         val = bswap16(val);
70         break;
71     case 4:
72         val = bswap32(val);
73         break;
74     default:
75         g_assert_not_reached();
76     }
77     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
78 }
79 
80 static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off,
81                                      unsigned size)
82 {
83     uint32_t cfg_addr, limit;
84     PCIDevice *pdev;
85     uint64_t val;
86 
87     pdev = pnv_phb3_find_cfg_dev(phb);
88     if (!pdev) {
89         return ~0ull;
90     }
91     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
92     cfg_addr |= off;
93     limit = pci_config_size(pdev);
94     if (limit <= cfg_addr) {
95         /*
96          * conventional pci device can be behind pcie-to-pci bridge.
97          * 256 <= addr < 4K has no effects.
98          */
99         return ~0ull;
100     }
101     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
102     switch (size) {
103     case 1:
104         return val;
105     case 2:
106         return bswap16(val);
107     case 4:
108         return bswap32(val);
109     default:
110         g_assert_not_reached();
111     }
112 }
113 
114 static void pnv_phb3_check_m32(PnvPHB3 *phb)
115 {
116     uint64_t base, start, size;
117     MemoryRegion *parent;
118     PnvPBCQState *pbcq = &phb->pbcq;
119 
120     if (memory_region_is_mapped(&phb->mr_m32)) {
121         memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32);
122     }
123 
124     if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) {
125         return;
126     }
127 
128     /* Grab geometry from registers */
129     base = phb->regs[PHB_M32_BASE_ADDR >> 3];
130     start = phb->regs[PHB_M32_START_ADDR >> 3];
131     size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1;
132 
133     /* Check if it matches an enabled MMIO region in the PBCQ */
134     if (memory_region_is_mapped(&pbcq->mmbar0) &&
135         base >= pbcq->mmio0_base &&
136         (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
137         parent = &pbcq->mmbar0;
138         base -= pbcq->mmio0_base;
139     } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
140                base >= pbcq->mmio1_base &&
141                (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
142         parent = &pbcq->mmbar1;
143         base -= pbcq->mmio1_base;
144     } else {
145         return;
146     }
147 
148     /* Create alias */
149     memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32",
150                              &phb->pci_mmio, start, size);
151     memory_region_add_subregion(parent, base, &phb->mr_m32);
152 }
153 
154 static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index)
155 {
156     uint64_t base, start, size, m64;
157     MemoryRegion *parent;
158     PnvPBCQState *pbcq = &phb->pbcq;
159 
160     if (memory_region_is_mapped(&phb->mr_m64[index])) {
161         /* Should we destroy it in RCU friendly way... ? */
162         memory_region_del_subregion(phb->mr_m64[index].container,
163                                     &phb->mr_m64[index]);
164     }
165 
166     /* Get table entry */
167     m64 = phb->ioda_M64BT[index];
168 
169     if (!(m64 & IODA2_M64BT_ENABLE)) {
170         return;
171     }
172 
173     /* Grab geometry from registers */
174     base = GETFIELD(IODA2_M64BT_BASE, m64) << 20;
175     if (m64 & IODA2_M64BT_SINGLE_PE) {
176         base &= ~0x1ffffffull;
177     }
178     size = GETFIELD(IODA2_M64BT_MASK, m64) << 20;
179     size |= 0xfffc000000000000ull;
180     size = ~size + 1;
181     start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
182 
183     /* Check if it matches an enabled MMIO region in the PBCQ */
184     if (memory_region_is_mapped(&pbcq->mmbar0) &&
185         base >= pbcq->mmio0_base &&
186         (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
187         parent = &pbcq->mmbar0;
188         base -= pbcq->mmio0_base;
189     } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
190                base >= pbcq->mmio1_base &&
191                (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
192         parent = &pbcq->mmbar1;
193         base -= pbcq->mmio1_base;
194     } else {
195         return;
196     }
197 
198     /* Create alias */
199     memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64",
200                              &phb->pci_mmio, start, size);
201     memory_region_add_subregion(parent, base, &phb->mr_m64[index]);
202 }
203 
204 static void pnv_phb3_check_all_m64s(PnvPHB3 *phb)
205 {
206     uint64_t i;
207 
208     for (i = 0; i < PNV_PHB3_NUM_M64; i++) {
209         pnv_phb3_check_m64(phb, i);
210     }
211 }
212 
213 static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val)
214 {
215     uint8_t server, prio;
216 
217     phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER |
218                                   IODA2_LXIVT_PRIORITY |
219                                   IODA2_LXIVT_NODE_ID);
220     server = GETFIELD(IODA2_LXIVT_SERVER, val);
221     prio = GETFIELD(IODA2_LXIVT_PRIORITY, val);
222 
223     /*
224      * The low order 2 bits are the link pointer (Type II interrupts).
225      * Shift back to get a valid IRQ server.
226      */
227     server >>= 2;
228 
229     ics_write_xive(&phb->lsis, idx, server, prio, prio);
230 }
231 
232 static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb,
233                                       unsigned *out_table, unsigned *out_idx)
234 {
235     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
236     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
237     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
238     unsigned int mask;
239     uint64_t *tptr = NULL;
240 
241     switch (table) {
242     case IODA2_TBL_LIST:
243         tptr = phb->ioda_LIST;
244         mask = 7;
245         break;
246     case IODA2_TBL_LXIVT:
247         tptr = phb->ioda_LXIVT;
248         mask = 7;
249         break;
250     case IODA2_TBL_IVC_CAM:
251     case IODA2_TBL_RBA:
252         mask = 31;
253         break;
254     case IODA2_TBL_RCAM:
255         mask = 63;
256         break;
257     case IODA2_TBL_MRT:
258         mask = 7;
259         break;
260     case IODA2_TBL_PESTA:
261     case IODA2_TBL_PESTB:
262         mask = 255;
263         break;
264     case IODA2_TBL_TVT:
265         tptr = phb->ioda_TVT;
266         mask = 511;
267         break;
268     case IODA2_TBL_TCAM:
269     case IODA2_TBL_TDR:
270         mask = 63;
271         break;
272     case IODA2_TBL_M64BT:
273         tptr = phb->ioda_M64BT;
274         mask = 15;
275         break;
276     case IODA2_TBL_M32DT:
277         tptr = phb->ioda_MDT;
278         mask = 255;
279         break;
280     case IODA2_TBL_PEEV:
281         tptr = phb->ioda_PEEV;
282         mask = 3;
283         break;
284     default:
285         phb3_error(phb, "invalid IODA table %d", table);
286         return NULL;
287     }
288     index &= mask;
289     if (out_idx) {
290         *out_idx = index;
291     }
292     if (out_table) {
293         *out_table = table;
294     }
295     if (tptr) {
296         tptr += index;
297     }
298     if (adreg & PHB_IODA_AD_AUTOINC) {
299         index = (index + 1) & mask;
300         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
301     }
302     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
303     return tptr;
304 }
305 
306 static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb)
307 {
308         unsigned table;
309         uint64_t *tptr;
310 
311         tptr = pnv_phb3_ioda_access(phb, &table, NULL);
312         if (!tptr) {
313             /* Return 0 on unsupported tables, not ff's */
314             return 0;
315         }
316         return *tptr;
317 }
318 
319 static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val)
320 {
321         unsigned table, idx;
322         uint64_t *tptr;
323 
324         tptr = pnv_phb3_ioda_access(phb, &table, &idx);
325         if (!tptr) {
326             return;
327         }
328 
329         /* Handle side effects */
330         switch (table) {
331         case IODA2_TBL_LXIVT:
332             pnv_phb3_lxivt_write(phb, idx, val);
333             break;
334         case IODA2_TBL_M64BT:
335             *tptr = val;
336             pnv_phb3_check_m64(phb, idx);
337             break;
338         default:
339             *tptr = val;
340         }
341 }
342 
343 /*
344  * This is called whenever the PHB LSI, MSI source ID register or
345  * the PBCQ irq filters are written.
346  */
347 void pnv_phb3_remap_irqs(PnvPHB3 *phb)
348 {
349     ICSState *ics = &phb->lsis;
350     uint32_t local, global, count, mask, comp;
351     uint64_t baren;
352     PnvPBCQState *pbcq = &phb->pbcq;
353 
354     /*
355      * First check if we are enabled. Unlike real HW we don't separate
356      * TX and RX so we enable if both are set
357      */
358     baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
359     if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
360         !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
361         ics->offset = 0;
362         return;
363     }
364 
365     /* Grab local LSI source ID */
366     local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
367 
368     /* Grab global one and compare */
369     global = GETFIELD(PBCQ_NEST_LSI_SRC,
370                       pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
371     if (global != local) {
372         /*
373          * This happens during initialization, let's come back when we
374          * are properly configured
375          */
376         ics->offset = 0;
377         return;
378     }
379 
380     /* Get the base on the powerbus */
381     comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
382                     pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
383     mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
384                     pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
385     count = ((~mask) + 1) & 0x7ffff;
386     phb->total_irq = count;
387 
388     /* Sanity checks */
389     if ((global + PNV_PHB3_NUM_LSI) > count) {
390         phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global,
391                    count);
392     }
393 
394     if (count > 2048) {
395         phb3_error(phb, "More interrupts than supported: %d", count);
396     }
397 
398     if ((comp & mask) != comp) {
399         phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
400                    comp, mask);
401         comp &= mask;
402     }
403     /* Setup LSI offset */
404     ics->offset = comp + global;
405 
406     /* Setup MSI offset */
407     pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI);
408 }
409 
410 static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val)
411 {
412     /* Sanitize content */
413     val &= PHB_LSI_SRC_ID;
414     phb->regs[PHB_LSI_SOURCE_ID >> 3] = val;
415     pnv_phb3_remap_irqs(phb);
416 }
417 
418 static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val)
419 {
420     PnvPhb3DMASpace *ds;
421 
422     /* Always invalidate all for now ... */
423     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
424         ds->pe_num = PHB_INVALID_PE;
425     }
426 }
427 
428 
429 static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds)
430 {
431     uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
432 
433     if (cfg & PHB_PHB3C_32BIT_MSI_EN) {
434         if (!memory_region_is_mapped(&ds->msi32_mr)) {
435             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
436                                         0xffff0000, &ds->msi32_mr);
437         }
438     } else {
439         if (memory_region_is_mapped(&ds->msi32_mr)) {
440             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
441                                         &ds->msi32_mr);
442         }
443     }
444 
445     if (cfg & PHB_PHB3C_64BIT_MSI_EN) {
446         if (!memory_region_is_mapped(&ds->msi64_mr)) {
447             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
448                                         (1ull << 60), &ds->msi64_mr);
449         }
450     } else {
451         if (memory_region_is_mapped(&ds->msi64_mr)) {
452             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
453                                         &ds->msi64_mr);
454         }
455     }
456 }
457 
458 static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb)
459 {
460     PnvPhb3DMASpace *ds;
461 
462     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
463         pnv_phb3_update_msi_regions(ds);
464     }
465 }
466 
467 void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)
468 {
469     PnvPHB3 *phb = opaque;
470     bool changed;
471 
472     /* Special case configuration data */
473     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
474         pnv_phb3_config_write(phb, off & 0x3, size, val);
475         return;
476     }
477 
478     /* Other registers are 64-bit only */
479     if (size != 8 || off & 0x7) {
480         phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
481                    off, size);
482         return;
483     }
484 
485     /* Handle masking & filtering */
486     switch (off) {
487     case PHB_M64_UPPER_BITS:
488         val &= 0xfffc000000000000ull;
489         break;
490     case PHB_Q_DMA_R:
491         /*
492          * This is enough logic to make SW happy but we aren't actually
493          * quiescing the DMAs
494          */
495         if (val & PHB_Q_DMA_R_AUTORESET) {
496             val = 0;
497         } else {
498             val &= PHB_Q_DMA_R_QUIESCE_DMA;
499         }
500         break;
501     /* LEM stuff */
502     case PHB_LEM_FIR_AND_MASK:
503         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
504         return;
505     case PHB_LEM_FIR_OR_MASK:
506         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
507         return;
508     case PHB_LEM_ERROR_AND_MASK:
509         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
510         return;
511     case PHB_LEM_ERROR_OR_MASK:
512         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
513         return;
514     case PHB_LEM_WOF:
515         val = 0;
516         break;
517     }
518 
519     /* Record whether it changed */
520     changed = phb->regs[off >> 3] != val;
521 
522     /* Store in register cache first */
523     phb->regs[off >> 3] = val;
524 
525     /* Handle side effects */
526     switch (off) {
527     case PHB_PHB3_CONFIG:
528         if (changed) {
529             pnv_phb3_update_all_msi_regions(phb);
530         }
531         /* fall through */
532     case PHB_M32_BASE_ADDR:
533     case PHB_M32_BASE_MASK:
534     case PHB_M32_START_ADDR:
535         if (changed) {
536             pnv_phb3_check_m32(phb);
537         }
538         break;
539     case PHB_M64_UPPER_BITS:
540         if (changed) {
541             pnv_phb3_check_all_m64s(phb);
542         }
543         break;
544     case PHB_LSI_SOURCE_ID:
545         if (changed) {
546             pnv_phb3_lsi_src_id_write(phb, val);
547         }
548         break;
549 
550     /* IODA table accesses */
551     case PHB_IODA_DATA0:
552         pnv_phb3_ioda_write(phb, val);
553         break;
554 
555     /* RTC invalidation */
556     case PHB_RTC_INVALIDATE:
557         pnv_phb3_rtc_invalidate(phb, val);
558         break;
559 
560     /* FFI request */
561     case PHB_FFI_REQUEST:
562         pnv_phb3_msi_ffi(&phb->msis, val);
563         break;
564 
565     /* Silent simple writes */
566     case PHB_CONFIG_ADDRESS:
567     case PHB_IODA_ADDR:
568     case PHB_TCE_KILL:
569     case PHB_TCE_SPEC_CTL:
570     case PHB_PEST_BAR:
571     case PHB_PELTV_BAR:
572     case PHB_RTT_BAR:
573     case PHB_RBA_BAR:
574     case PHB_IVT_BAR:
575     case PHB_FFI_LOCK:
576     case PHB_LEM_FIR_ACCUM:
577     case PHB_LEM_ERROR_MASK:
578     case PHB_LEM_ACTION0:
579     case PHB_LEM_ACTION1:
580         break;
581 
582     /* Noise on anything else */
583     default:
584         qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n",
585                       off, val);
586     }
587 }
588 
589 uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
590 {
591     PnvPHB3 *phb = opaque;
592     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
593     uint64_t val;
594 
595     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
596         return pnv_phb3_config_read(phb, off & 0x3, size);
597     }
598 
599     /* Other registers are 64-bit only */
600     if (size != 8 || off & 0x7) {
601         phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
602                    off, size);
603         return ~0ull;
604     }
605 
606     /* Default read from cache */
607     val = phb->regs[off >> 3];
608 
609     switch (off) {
610     /* Simulate venice DD2.0 */
611     case PHB_VERSION:
612         return 0x000000a300000005ull;
613     case PHB_PCIE_SYSTEM_CONFIG:
614         return 0x441100fc30000000;
615 
616     /* IODA table accesses */
617     case PHB_IODA_DATA0:
618         return pnv_phb3_ioda_read(phb);
619 
620     /* Link training always appears trained */
621     case PHB_PCIE_DLP_TRAIN_CTL:
622         if (!pci_find_device(pci->bus, 1, 0)) {
623             return 0;
624         }
625         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT;
626 
627     /* FFI Lock */
628     case PHB_FFI_LOCK:
629         /* Set lock and return previous value */
630         phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE;
631         return val;
632 
633     /* DMA read sync: make it look like it's complete */
634     case PHB_DMARD_SYNC:
635         return PHB_DMARD_SYNC_COMPLETE;
636 
637     /* Silent simple reads */
638     case PHB_PHB3_CONFIG:
639     case PHB_M32_BASE_ADDR:
640     case PHB_M32_BASE_MASK:
641     case PHB_M32_START_ADDR:
642     case PHB_CONFIG_ADDRESS:
643     case PHB_IODA_ADDR:
644     case PHB_RTC_INVALIDATE:
645     case PHB_TCE_KILL:
646     case PHB_TCE_SPEC_CTL:
647     case PHB_PEST_BAR:
648     case PHB_PELTV_BAR:
649     case PHB_RTT_BAR:
650     case PHB_RBA_BAR:
651     case PHB_IVT_BAR:
652     case PHB_M64_UPPER_BITS:
653     case PHB_LEM_FIR_ACCUM:
654     case PHB_LEM_ERROR_MASK:
655     case PHB_LEM_ACTION0:
656     case PHB_LEM_ACTION1:
657         break;
658 
659     /* Noise on anything else */
660     default:
661         qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n",
662                       off, val);
663     }
664     return val;
665 }
666 
667 static const MemoryRegionOps pnv_phb3_reg_ops = {
668     .read = pnv_phb3_reg_read,
669     .write = pnv_phb3_reg_write,
670     .valid.min_access_size = 1,
671     .valid.max_access_size = 8,
672     .impl.min_access_size = 1,
673     .impl.max_access_size = 8,
674     .endianness = DEVICE_BIG_ENDIAN,
675 };
676 
677 static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num)
678 {
679     /* Check that out properly ... */
680     return irq_num & 3;
681 }
682 
683 static void pnv_phb3_set_irq(void *opaque, int irq_num, int level)
684 {
685     PnvPHB3 *phb = opaque;
686 
687     /* LSI only ... */
688     if (irq_num > 3) {
689         phb3_error(phb, "Unknown IRQ to set %d", irq_num);
690     }
691     qemu_set_irq(phb->qirqs[irq_num], level);
692 }
693 
694 static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds)
695 {
696     uint64_t rtt, addr;
697     uint16_t rte;
698     int bus_num;
699 
700     /* Already resolved ? */
701     if (ds->pe_num != PHB_INVALID_PE) {
702         return true;
703     }
704 
705     /* We need to lookup the RTT */
706     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
707     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
708         phb3_error(ds->phb, "DMA with RTT BAR disabled !");
709         /* Set error bits ? fence ? ... */
710         return false;
711     }
712 
713     /* Read RTE */
714     bus_num = pci_bus_num(ds->bus);
715     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
716     addr += 2 * ((bus_num << 8) | ds->devfn);
717     if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
718         phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
719         /* Set error bits ? fence ? ... */
720         return false;
721     }
722     rte = be16_to_cpu(rte);
723 
724     /* Fail upon reading of invalid PE# */
725     if (rte >= PNV_PHB3_NUM_PE) {
726         phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
727         /* Set error bits ? fence ? ... */
728         return false;
729     }
730     ds->pe_num = rte;
731     return true;
732 }
733 
734 static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
735                                    bool is_write, uint64_t tve,
736                                    IOMMUTLBEntry *tlb)
737 {
738     uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve);
739     int32_t  lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve);
740     uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve);
741     uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve);
742     PnvPHB3 *phb = ds->phb;
743 
744     /* Invalid levels */
745     if (lev > 4) {
746         phb3_error(phb, "Invalid #levels in TVE %d", lev);
747         return;
748     }
749 
750     /* IO Page Size of 0 means untranslated, else use TCEs */
751     if (tps == 0) {
752         /*
753          * We only support non-translate in top window.
754          *
755          * TODO: Venice/Murano support it on bottom window above 4G and
756          * Naples suports it on everything
757          */
758         if (!(tve & PPC_BIT(51))) {
759             phb3_error(phb, "xlate for invalid non-translate TVE");
760             return;
761         }
762         /* TODO: Handle boundaries */
763 
764         /* Use 4k pages like q35 ... for now */
765         tlb->iova = addr & 0xfffffffffffff000ull;
766         tlb->translated_addr = addr & 0x0003fffffffff000ull;
767         tlb->addr_mask = 0xfffull;
768         tlb->perm = IOMMU_RW;
769     } else {
770         uint32_t tce_shift, tbl_shift, sh;
771         uint64_t base, taddr, tce, tce_mask;
772 
773         /* TVE disabled ? */
774         if (tts == 0) {
775             phb3_error(phb, "xlate for invalid translated TVE");
776             return;
777         }
778 
779         /* Address bits per bottom level TCE entry */
780         tce_shift = tps + 11;
781 
782         /* Address bits per table level */
783         tbl_shift = tts + 8;
784 
785         /* Top level table base address */
786         base = tta << 12;
787 
788         /* Total shift to first level */
789         sh = tbl_shift * lev + tce_shift;
790 
791         /* TODO: Multi-level untested */
792         while ((lev--) >= 0) {
793             /* Grab the TCE address */
794             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
795             if (dma_memory_read(&address_space_memory, taddr, &tce,
796                                 sizeof(tce))) {
797                 phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr);
798                 return;
799             }
800             tce = be64_to_cpu(tce);
801 
802             /* Check permission for indirect TCE */
803             if ((lev >= 0) && !(tce & 3)) {
804                 phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
805                 phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
806                            is_write ? 'W' : 'R', tve);
807                 phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
808                            tta, lev, tts, tps);
809                 return;
810             }
811             sh -= tbl_shift;
812             base = tce & ~0xfffull;
813         }
814 
815         /* We exit the loop with TCE being the final TCE */
816         tce_mask = ~((1ull << tce_shift) - 1);
817         tlb->iova = addr & tce_mask;
818         tlb->translated_addr = tce & tce_mask;
819         tlb->addr_mask = ~tce_mask;
820         tlb->perm = tce & 3;
821         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
822             phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr);
823             phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
824                        is_write ? 'W' : 'R', tve);
825             phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
826                        tta, lev, tts, tps);
827         }
828     }
829 }
830 
831 static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu,
832                                               hwaddr addr,
833                                               IOMMUAccessFlags flag,
834                                               int iommu_idx)
835 {
836     PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr);
837     int tve_sel;
838     uint64_t tve, cfg;
839     IOMMUTLBEntry ret = {
840         .target_as = &address_space_memory,
841         .iova = addr,
842         .translated_addr = 0,
843         .addr_mask = ~(hwaddr)0,
844         .perm = IOMMU_NONE,
845     };
846     PnvPHB3 *phb = ds->phb;
847 
848     /* Resolve PE# */
849     if (!pnv_phb3_resolve_pe(ds)) {
850         phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
851                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
852         return ret;
853     }
854 
855     /* Check top bits */
856     switch (addr >> 60) {
857     case 00:
858         /* DMA or 32-bit MSI ? */
859         cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
860         if ((cfg & PHB_PHB3C_32BIT_MSI_EN) &&
861             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
862             phb3_error(phb, "xlate on 32-bit MSI region");
863             return ret;
864         }
865         /* Choose TVE XXX Use PHB3 Control Register */
866         tve_sel = (addr >> 59) & 1;
867         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
868         pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
869         break;
870     case 01:
871         phb3_error(phb, "xlate on 64-bit MSI region");
872         break;
873     default:
874         phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr);
875     }
876     return ret;
877 }
878 
879 #define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region"
880 #define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \
881     OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION)
882 
883 static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass,
884                                                     void *data)
885 {
886     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
887 
888     imrc->translate = pnv_phb3_translate_iommu;
889 }
890 
891 static const TypeInfo pnv_phb3_iommu_memory_region_info = {
892     .parent = TYPE_IOMMU_MEMORY_REGION,
893     .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
894     .class_init = pnv_phb3_iommu_memory_region_class_init,
895 };
896 
897 /*
898  * MSI/MSIX memory region implementation.
899  * The handler handles both MSI and MSIX.
900  */
901 static void pnv_phb3_msi_write(void *opaque, hwaddr addr,
902                                uint64_t data, unsigned size)
903 {
904     PnvPhb3DMASpace *ds = opaque;
905 
906     /* Resolve PE# */
907     if (!pnv_phb3_resolve_pe(ds)) {
908         phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
909                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
910         return;
911     }
912 
913     pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num);
914 }
915 
916 /* There is no .read as the read result is undefined by PCI spec */
917 static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size)
918 {
919     PnvPhb3DMASpace *ds = opaque;
920 
921     phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr);
922     return -1;
923 }
924 
925 static const MemoryRegionOps pnv_phb3_msi_ops = {
926     .read = pnv_phb3_msi_read,
927     .write = pnv_phb3_msi_write,
928     .endianness = DEVICE_LITTLE_ENDIAN
929 };
930 
931 static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
932 {
933     PnvPHB3 *phb = opaque;
934     PnvPhb3DMASpace *ds;
935 
936     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
937         if (ds->bus == bus && ds->devfn == devfn) {
938             break;
939         }
940     }
941 
942     if (ds == NULL) {
943         ds = g_malloc0(sizeof(PnvPhb3DMASpace));
944         ds->bus = bus;
945         ds->devfn = devfn;
946         ds->pe_num = PHB_INVALID_PE;
947         ds->phb = phb;
948         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
949                                  TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
950                                  OBJECT(phb), "phb3_iommu", UINT64_MAX);
951         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
952                            "phb3_iommu");
953         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops,
954                               ds, "msi32", 0x10000);
955         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops,
956                               ds, "msi64", 0x100000);
957         pnv_phb3_update_msi_regions(ds);
958 
959         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
960     }
961     return &ds->dma_as;
962 }
963 
964 static void pnv_phb3_instance_init(Object *obj)
965 {
966     PnvPHB3 *phb = PNV_PHB3(obj);
967 
968     QLIST_INIT(&phb->dma_spaces);
969 
970     /* LSI sources */
971     object_initialize_child(obj, "lsi", &phb->lsis, TYPE_ICS);
972 
973     /* Default init ... will be fixed by HW inits */
974     phb->lsis.offset = 0;
975 
976     /* MSI sources */
977     object_initialize_child(obj, "msi", &phb->msis, TYPE_PHB3_MSI);
978 
979     /* Power Bus Common Queue */
980     object_initialize_child(obj, "pbcq", &phb->pbcq, TYPE_PNV_PBCQ);
981 
982     /* Root Port */
983     object_initialize_child(obj, "root", &phb->root, TYPE_PNV_PHB3_ROOT_PORT);
984     qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
985     qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
986 }
987 
988 static void pnv_phb3_realize(DeviceState *dev, Error **errp)
989 {
990     PnvPHB3 *phb = PNV_PHB3(dev);
991     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
992     PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
993     int i;
994 
995     if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) {
996         error_setg(errp, "invalid PHB index: %d", phb->phb_id);
997         return;
998     }
999 
1000     /* LSI sources */
1001     object_property_set_link(OBJECT(&phb->lsis), "xics", OBJECT(pnv),
1002                              &error_abort);
1003     object_property_set_int(OBJECT(&phb->lsis), "nr-irqs", PNV_PHB3_NUM_LSI,
1004                             &error_abort);
1005     if (!qdev_realize(DEVICE(&phb->lsis), NULL, errp)) {
1006         return;
1007     }
1008 
1009     for (i = 0; i < phb->lsis.nr_irqs; i++) {
1010         ics_set_irq_type(&phb->lsis, i, true);
1011     }
1012 
1013     phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs);
1014 
1015     /* MSI sources */
1016     object_property_set_link(OBJECT(&phb->msis), "phb", OBJECT(phb),
1017                              &error_abort);
1018     object_property_set_link(OBJECT(&phb->msis), "xics", OBJECT(pnv),
1019                              &error_abort);
1020     object_property_set_int(OBJECT(&phb->msis), "nr-irqs", PHB3_MAX_MSI,
1021                             &error_abort);
1022     if (!qdev_realize(DEVICE(&phb->msis), NULL, errp)) {
1023         return;
1024     }
1025 
1026     /* Power Bus Common Queue */
1027     object_property_set_link(OBJECT(&phb->pbcq), "phb", OBJECT(phb),
1028                              &error_abort);
1029     if (!qdev_realize(DEVICE(&phb->pbcq), NULL, errp)) {
1030         return;
1031     }
1032 
1033     /* Controller Registers */
1034     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb,
1035                           "phb3-regs", 0x1000);
1036 
1037     /*
1038      * PHB3 doesn't support IO space. However, qemu gets very upset if
1039      * we don't have an IO region to anchor IO BARs onto so we just
1040      * initialize one which we never hook up to anything
1041      */
1042     memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000);
1043     memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio",
1044                        PCI_MMIO_TOTAL_SIZE);
1045 
1046     pci->bus = pci_register_root_bus(dev, "root-bus",
1047                                      pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
1048                                      &phb->pci_mmio, &phb->pci_io,
1049                                      0, 4, TYPE_PNV_PHB3_ROOT_BUS);
1050 
1051     pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
1052 
1053     /* Add a single Root port */
1054     qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
1055     qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
1056     qdev_realize(DEVICE(&phb->root), BUS(pci->bus), &error_fatal);
1057 }
1058 
1059 void pnv_phb3_update_regions(PnvPHB3 *phb)
1060 {
1061     PnvPBCQState *pbcq = &phb->pbcq;
1062 
1063     /* Unmap first always */
1064     if (memory_region_is_mapped(&phb->mr_regs)) {
1065         memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs);
1066     }
1067 
1068     /* Map registers if enabled */
1069     if (memory_region_is_mapped(&pbcq->phbbar)) {
1070         /* TODO: We should use the PHB BAR 2 register but we don't ... */
1071         memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs);
1072     }
1073 
1074     /* Check/update m32 */
1075     if (memory_region_is_mapped(&phb->mr_m32)) {
1076         pnv_phb3_check_m32(phb);
1077     }
1078     pnv_phb3_check_all_m64s(phb);
1079 }
1080 
1081 static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,
1082                                           PCIBus *rootbus)
1083 {
1084     PnvPHB3 *phb = PNV_PHB3(host_bridge);
1085 
1086     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1087              phb->chip_id, phb->phb_id);
1088     return phb->bus_path;
1089 }
1090 
1091 static Property pnv_phb3_properties[] = {
1092         DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
1093         DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
1094         DEFINE_PROP_END_OF_LIST(),
1095 };
1096 
1097 static void pnv_phb3_class_init(ObjectClass *klass, void *data)
1098 {
1099     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1100     DeviceClass *dc = DEVICE_CLASS(klass);
1101 
1102     hc->root_bus_path = pnv_phb3_root_bus_path;
1103     dc->realize = pnv_phb3_realize;
1104     device_class_set_props(dc, pnv_phb3_properties);
1105     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1106     dc->user_creatable = false;
1107 }
1108 
1109 static const TypeInfo pnv_phb3_type_info = {
1110     .name          = TYPE_PNV_PHB3,
1111     .parent        = TYPE_PCIE_HOST_BRIDGE,
1112     .instance_size = sizeof(PnvPHB3),
1113     .class_init    = pnv_phb3_class_init,
1114     .instance_init = pnv_phb3_instance_init,
1115 };
1116 
1117 static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data)
1118 {
1119     BusClass *k = BUS_CLASS(klass);
1120 
1121     /*
1122      * PHB3 has only a single root complex. Enforce the limit on the
1123      * parent bus
1124      */
1125     k->max_dev = 1;
1126 }
1127 
1128 static const TypeInfo pnv_phb3_root_bus_info = {
1129     .name = TYPE_PNV_PHB3_ROOT_BUS,
1130     .parent = TYPE_PCIE_BUS,
1131     .class_init = pnv_phb3_root_bus_class_init,
1132     .interfaces = (InterfaceInfo[]) {
1133         { INTERFACE_PCIE_DEVICE },
1134         { }
1135     },
1136 };
1137 
1138 static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp)
1139 {
1140     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1141     Error *local_err = NULL;
1142 
1143     rpc->parent_realize(dev, &local_err);
1144     if (local_err) {
1145         error_propagate(errp, local_err);
1146         return;
1147     }
1148 }
1149 
1150 static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data)
1151 {
1152     DeviceClass *dc = DEVICE_CLASS(klass);
1153     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1154     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1155 
1156     dc->desc     = "IBM PHB3 PCIE Root Port";
1157 
1158     device_class_set_parent_realize(dc, pnv_phb3_root_port_realize,
1159                                     &rpc->parent_realize);
1160     dc->user_creatable = false;
1161 
1162     k->vendor_id = PCI_VENDOR_ID_IBM;
1163     k->device_id = 0x03dc;
1164     k->revision  = 0;
1165 
1166     rpc->exp_offset = 0x48;
1167     rpc->aer_offset = 0x100;
1168 }
1169 
1170 static const TypeInfo pnv_phb3_root_port_info = {
1171     .name          = TYPE_PNV_PHB3_ROOT_PORT,
1172     .parent        = TYPE_PCIE_ROOT_PORT,
1173     .instance_size = sizeof(PnvPHB3RootPort),
1174     .class_init    = pnv_phb3_root_port_class_init,
1175 };
1176 
1177 static void pnv_phb3_register_types(void)
1178 {
1179     type_register_static(&pnv_phb3_root_bus_info);
1180     type_register_static(&pnv_phb3_root_port_info);
1181     type_register_static(&pnv_phb3_type_info);
1182     type_register_static(&pnv_phb3_iommu_memory_region_info);
1183 }
1184 
1185 type_init(pnv_phb3_register_types)
1186