xref: /openbmc/qemu/hw/ppc/spapr_pci.c (revision acb0ef58)
1 /*
2  * QEMU sPAPR PCI host originated from Uninorth PCI host
3  *
4  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5  * Copyright (C) 2011 David Gibson, IBM Corporation.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 #include "hw/hw.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/pci_host.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/pci-host/spapr.h"
32 #include "exec/address-spaces.h"
33 #include <libfdt.h>
34 #include "trace.h"
35 #include "qemu/error-report.h"
36 
37 #include "hw/pci/pci_bus.h"
38 
39 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
40 #define RTAS_QUERY_FN           0
41 #define RTAS_CHANGE_FN          1
42 #define RTAS_RESET_FN           2
43 #define RTAS_CHANGE_MSI_FN      3
44 #define RTAS_CHANGE_MSIX_FN     4
45 
46 /* Interrupt types to return on RTAS_CHANGE_* */
47 #define RTAS_TYPE_MSI           1
48 #define RTAS_TYPE_MSIX          2
49 
50 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
51 {
52     sPAPRPHBState *sphb;
53 
54     QLIST_FOREACH(sphb, &spapr->phbs, list) {
55         if (sphb->buid != buid) {
56             continue;
57         }
58         return sphb;
59     }
60 
61     return NULL;
62 }
63 
64 static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
65                            uint32_t config_addr)
66 {
67     sPAPRPHBState *sphb = find_phb(spapr, buid);
68     PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
69     int bus_num = (config_addr >> 16) & 0xFF;
70     int devfn = (config_addr >> 8) & 0xFF;
71 
72     if (!phb) {
73         return NULL;
74     }
75 
76     return pci_find_device(phb->bus, bus_num, devfn);
77 }
78 
79 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
80 {
81     /* This handles the encoding of extended config space addresses */
82     return ((arg >> 20) & 0xf00) | (arg & 0xff);
83 }
84 
85 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
86                                    uint32_t addr, uint32_t size,
87                                    target_ulong rets)
88 {
89     PCIDevice *pci_dev;
90     uint32_t val;
91 
92     if ((size != 1) && (size != 2) && (size != 4)) {
93         /* access must be 1, 2 or 4 bytes */
94         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
95         return;
96     }
97 
98     pci_dev = find_dev(spapr, buid, addr);
99     addr = rtas_pci_cfgaddr(addr);
100 
101     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
102         /* Access must be to a valid device, within bounds and
103          * naturally aligned */
104         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
105         return;
106     }
107 
108     val = pci_host_config_read_common(pci_dev, addr,
109                                       pci_config_size(pci_dev), size);
110 
111     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
112     rtas_st(rets, 1, val);
113 }
114 
115 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
116                                      uint32_t token, uint32_t nargs,
117                                      target_ulong args,
118                                      uint32_t nret, target_ulong rets)
119 {
120     uint64_t buid;
121     uint32_t size, addr;
122 
123     if ((nargs != 4) || (nret != 2)) {
124         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
125         return;
126     }
127 
128     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
129     size = rtas_ld(args, 3);
130     addr = rtas_ld(args, 0);
131 
132     finish_read_pci_config(spapr, buid, addr, size, rets);
133 }
134 
135 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
136                                  uint32_t token, uint32_t nargs,
137                                  target_ulong args,
138                                  uint32_t nret, target_ulong rets)
139 {
140     uint32_t size, addr;
141 
142     if ((nargs != 2) || (nret != 2)) {
143         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
144         return;
145     }
146 
147     size = rtas_ld(args, 1);
148     addr = rtas_ld(args, 0);
149 
150     finish_read_pci_config(spapr, 0, addr, size, rets);
151 }
152 
153 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
154                                     uint32_t addr, uint32_t size,
155                                     uint32_t val, target_ulong rets)
156 {
157     PCIDevice *pci_dev;
158 
159     if ((size != 1) && (size != 2) && (size != 4)) {
160         /* access must be 1, 2 or 4 bytes */
161         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
162         return;
163     }
164 
165     pci_dev = find_dev(spapr, buid, addr);
166     addr = rtas_pci_cfgaddr(addr);
167 
168     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
169         /* Access must be to a valid device, within bounds and
170          * naturally aligned */
171         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
172         return;
173     }
174 
175     pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
176                                  val, size);
177 
178     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
179 }
180 
181 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
182                                       uint32_t token, uint32_t nargs,
183                                       target_ulong args,
184                                       uint32_t nret, target_ulong rets)
185 {
186     uint64_t buid;
187     uint32_t val, size, addr;
188 
189     if ((nargs != 5) || (nret != 1)) {
190         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
191         return;
192     }
193 
194     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
195     val = rtas_ld(args, 4);
196     size = rtas_ld(args, 3);
197     addr = rtas_ld(args, 0);
198 
199     finish_write_pci_config(spapr, buid, addr, size, val, rets);
200 }
201 
202 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
203                                   uint32_t token, uint32_t nargs,
204                                   target_ulong args,
205                                   uint32_t nret, target_ulong rets)
206 {
207     uint32_t val, size, addr;
208 
209     if ((nargs != 3) || (nret != 1)) {
210         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
211         return;
212     }
213 
214 
215     val = rtas_ld(args, 2);
216     size = rtas_ld(args, 1);
217     addr = rtas_ld(args, 0);
218 
219     finish_write_pci_config(spapr, 0, addr, size, val, rets);
220 }
221 
222 /*
223  * Find an entry with config_addr or returns the empty one if not found AND
224  * alloc_new is set.
225  * At the moment the msi_table entries are never released so there is
226  * no point to look till the end of the list if we need to find the free entry.
227  */
228 static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
229                              bool alloc_new)
230 {
231     int i;
232 
233     for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
234         if (!phb->msi_table[i].nvec) {
235             break;
236         }
237         if (phb->msi_table[i].config_addr == config_addr) {
238             return i;
239         }
240     }
241     if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
242         trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
243         return i;
244     }
245 
246     return -1;
247 }
248 
249 /*
250  * Set MSI/MSIX message data.
251  * This is required for msi_notify()/msix_notify() which
252  * will write at the addresses via spapr_msi_write().
253  */
254 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
255                              unsigned first_irq, unsigned req_num)
256 {
257     unsigned i;
258     MSIMessage msg = { .address = addr, .data = first_irq };
259 
260     if (!msix) {
261         msi_set_message(pdev, msg);
262         trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
263         return;
264     }
265 
266     for (i = 0; i < req_num; ++i, ++msg.data) {
267         msix_set_message(pdev, i, msg);
268         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
269     }
270 }
271 
272 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
273                                 uint32_t token, uint32_t nargs,
274                                 target_ulong args, uint32_t nret,
275                                 target_ulong rets)
276 {
277     uint32_t config_addr = rtas_ld(args, 0);
278     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
279     unsigned int func = rtas_ld(args, 3);
280     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
281     unsigned int seq_num = rtas_ld(args, 5);
282     unsigned int ret_intr_type;
283     int ndev, irq, max_irqs = 0;
284     sPAPRPHBState *phb = NULL;
285     PCIDevice *pdev = NULL;
286 
287     switch (func) {
288     case RTAS_CHANGE_MSI_FN:
289     case RTAS_CHANGE_FN:
290         ret_intr_type = RTAS_TYPE_MSI;
291         break;
292     case RTAS_CHANGE_MSIX_FN:
293         ret_intr_type = RTAS_TYPE_MSIX;
294         break;
295     default:
296         error_report("rtas_ibm_change_msi(%u) is not implemented", func);
297         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
298         return;
299     }
300 
301     /* Fins sPAPRPHBState */
302     phb = find_phb(spapr, buid);
303     if (phb) {
304         pdev = find_dev(spapr, buid, config_addr);
305     }
306     if (!phb || !pdev) {
307         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
308         return;
309     }
310 
311     /* Releasing MSIs */
312     if (!req_num) {
313         ndev = spapr_msicfg_find(phb, config_addr, false);
314         if (ndev < 0) {
315             trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
316             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
317             return;
318         }
319         trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
320         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
321         rtas_st(rets, 1, 0);
322         return;
323     }
324 
325     /* Enabling MSI */
326 
327     /* Find a device number in the map to add or reuse the existing one */
328     ndev = spapr_msicfg_find(phb, config_addr, true);
329     if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
330         error_report("No free entry for a new MSI device");
331         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
332         return;
333     }
334     trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
335 
336     /* Check if the device supports as many IRQs as requested */
337     if (ret_intr_type == RTAS_TYPE_MSI) {
338         max_irqs = msi_nr_vectors_allocated(pdev);
339     } else if (ret_intr_type == RTAS_TYPE_MSIX) {
340         max_irqs = pdev->msix_entries_nr;
341     }
342     if (!max_irqs) {
343         error_report("Requested interrupt type %d is not enabled for device#%d",
344                      ret_intr_type, ndev);
345         rtas_st(rets, 0, -1); /* Hardware error */
346         return;
347     }
348     /* Correct the number if the guest asked for too many */
349     if (req_num > max_irqs) {
350         req_num = max_irqs;
351     }
352 
353     /* Check if there is an old config and MSI number has not changed */
354     if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
355         /* Unexpected behaviour */
356         error_report("Cannot reuse MSI config for device#%d", ndev);
357         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
358         return;
359     }
360 
361     /* There is no cached config, allocate MSIs */
362     if (!phb->msi_table[ndev].nvec) {
363         irq = spapr_allocate_irq_block(req_num, false,
364                                        ret_intr_type == RTAS_TYPE_MSI);
365         if (irq < 0) {
366             error_report("Cannot allocate MSIs for device#%d", ndev);
367             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
368             return;
369         }
370         phb->msi_table[ndev].irq = irq;
371         phb->msi_table[ndev].nvec = req_num;
372         phb->msi_table[ndev].config_addr = config_addr;
373     }
374 
375     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
376     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
377                      phb->msi_table[ndev].irq, req_num);
378 
379     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
380     rtas_st(rets, 1, req_num);
381     rtas_st(rets, 2, ++seq_num);
382     rtas_st(rets, 3, ret_intr_type);
383 
384     trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
385 }
386 
387 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
388                                                    sPAPREnvironment *spapr,
389                                                    uint32_t token,
390                                                    uint32_t nargs,
391                                                    target_ulong args,
392                                                    uint32_t nret,
393                                                    target_ulong rets)
394 {
395     uint32_t config_addr = rtas_ld(args, 0);
396     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
397     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
398     int ndev;
399     sPAPRPHBState *phb = NULL;
400 
401     /* Fins sPAPRPHBState */
402     phb = find_phb(spapr, buid);
403     if (!phb) {
404         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
405         return;
406     }
407 
408     /* Find device descriptor and start IRQ */
409     ndev = spapr_msicfg_find(phb, config_addr, false);
410     if (ndev < 0) {
411         trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
412         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
413         return;
414     }
415 
416     intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
417     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
418                                                            intr_src_num);
419 
420     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
421     rtas_st(rets, 1, intr_src_num);
422     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
423 }
424 
425 static int pci_spapr_swizzle(int slot, int pin)
426 {
427     return (slot + pin) % PCI_NUM_PINS;
428 }
429 
430 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
431 {
432     /*
433      * Here we need to convert pci_dev + irq_num to some unique value
434      * which is less than number of IRQs on the specific bus (4).  We
435      * use standard PCI swizzling, that is (slot number + pin number)
436      * % 4.
437      */
438     return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
439 }
440 
441 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
442 {
443     /*
444      * Here we use the number returned by pci_spapr_map_irq to find a
445      * corresponding qemu_irq.
446      */
447     sPAPRPHBState *phb = opaque;
448 
449     trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
450     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
451 }
452 
453 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
454 {
455     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
456     PCIINTxRoute route;
457 
458     route.mode = PCI_INTX_ENABLED;
459     route.irq = sphb->lsi_table[pin].irq;
460 
461     return route;
462 }
463 
464 /*
465  * MSI/MSIX memory region implementation.
466  * The handler handles both MSI and MSIX.
467  * For MSI-X, the vector number is encoded as a part of the address,
468  * data is set to 0.
469  * For MSI, the vector number is encoded in least bits in data.
470  */
471 static void spapr_msi_write(void *opaque, hwaddr addr,
472                             uint64_t data, unsigned size)
473 {
474     uint32_t irq = data;
475 
476     trace_spapr_pci_msi_write(addr, data, irq);
477 
478     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
479 }
480 
481 static const MemoryRegionOps spapr_msi_ops = {
482     /* There is no .read as the read result is undefined by PCI spec */
483     .read = NULL,
484     .write = spapr_msi_write,
485     .endianness = DEVICE_LITTLE_ENDIAN
486 };
487 
488 void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
489 {
490     uint64_t window_size = 4096;
491 
492     /*
493      * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
494      * we need to allocate some memory to catch those writes coming
495      * from msi_notify()/msix_notify().
496      * As MSIMessage:addr is going to be the same and MSIMessage:data
497      * is going to be a VIRQ number, 4 bytes of the MSI MR will only
498      * be used.
499      *
500      * For KVM we want to ensure that this memory is a full page so that
501      * our memory slot is of page size granularity.
502      */
503 #ifdef CONFIG_KVM
504     if (kvm_enabled()) {
505         window_size = getpagesize();
506     }
507 #endif
508 
509     spapr->msi_win_addr = addr;
510     memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
511                           "msi", window_size);
512     memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
513                                 &spapr->msiwindow);
514 }
515 
516 /*
517  * PHB PCI device
518  */
519 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
520 {
521     sPAPRPHBState *phb = opaque;
522 
523     return &phb->iommu_as;
524 }
525 
526 static void spapr_phb_realize(DeviceState *dev, Error **errp)
527 {
528     SysBusDevice *s = SYS_BUS_DEVICE(dev);
529     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
530     PCIHostState *phb = PCI_HOST_BRIDGE(s);
531     char *namebuf;
532     int i;
533     PCIBus *bus;
534 
535     if (sphb->index != -1) {
536         hwaddr windows_base;
537 
538         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
539             || (sphb->mem_win_addr != -1)
540             || (sphb->io_win_addr != -1)) {
541             error_setg(errp, "Either \"index\" or other parameters must"
542                        " be specified for PAPR PHB, not both");
543             return;
544         }
545 
546         sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
547         sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
548 
549         windows_base = SPAPR_PCI_WINDOW_BASE
550             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
551         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
552         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
553     }
554 
555     if (sphb->buid == -1) {
556         error_setg(errp, "BUID not specified for PHB");
557         return;
558     }
559 
560     if (sphb->dma_liobn == -1) {
561         error_setg(errp, "LIOBN not specified for PHB");
562         return;
563     }
564 
565     if (sphb->mem_win_addr == -1) {
566         error_setg(errp, "Memory window address not specified for PHB");
567         return;
568     }
569 
570     if (sphb->io_win_addr == -1) {
571         error_setg(errp, "IO window address not specified for PHB");
572         return;
573     }
574 
575     if (find_phb(spapr, sphb->buid)) {
576         error_setg(errp, "PCI host bridges must have unique BUIDs");
577         return;
578     }
579 
580     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
581 
582     namebuf = alloca(strlen(sphb->dtbusname) + 32);
583 
584     /* Initialize memory regions */
585     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
586     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
587 
588     sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
589     memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
590                              namebuf, &sphb->memspace,
591                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
592     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
593                                 &sphb->memwindow);
594 
595     /* Initialize IO regions */
596     sprintf(namebuf, "%s.io", sphb->dtbusname);
597     memory_region_init(&sphb->iospace, OBJECT(sphb),
598                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
599 
600     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
601     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
602                              &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
603     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
604                                 &sphb->iowindow);
605 
606     bus = pci_register_bus(dev, NULL,
607                            pci_spapr_set_irq, pci_spapr_map_irq, sphb,
608                            &sphb->memspace, &sphb->iospace,
609                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
610     phb->bus = bus;
611 
612     sphb->dma_window_start = 0;
613     sphb->dma_window_size = 0x40000000;
614     sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
615                                      sphb->dma_window_size);
616     if (!sphb->tcet) {
617         error_setg(errp, "Unable to create TCE table for %s",
618                    sphb->dtbusname);
619         return;
620     }
621     address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
622                        sphb->dtbusname);
623 
624     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
625 
626     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
627 
628     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
629 
630     /* Initialize the LSI table */
631     for (i = 0; i < PCI_NUM_PINS; i++) {
632         uint32_t irq;
633 
634         irq = spapr_allocate_lsi(0);
635         if (!irq) {
636             error_setg(errp, "spapr_allocate_lsi failed");
637             return;
638         }
639 
640         sphb->lsi_table[i].irq = irq;
641     }
642 }
643 
644 static void spapr_phb_reset(DeviceState *qdev)
645 {
646     SysBusDevice *s = SYS_BUS_DEVICE(qdev);
647     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
648 
649     /* Reset the IOMMU state */
650     device_reset(DEVICE(sphb->tcet));
651 }
652 
653 static Property spapr_phb_properties[] = {
654     DEFINE_PROP_INT32("index", sPAPRPHBState, index, -1),
655     DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
656     DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
657     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
658     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
659                        SPAPR_PCI_MMIO_WIN_SIZE),
660     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
661     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
662                        SPAPR_PCI_IO_WIN_SIZE),
663     DEFINE_PROP_END_OF_LIST(),
664 };
665 
666 static const VMStateDescription vmstate_spapr_pci_lsi = {
667     .name = "spapr_pci/lsi",
668     .version_id = 1,
669     .minimum_version_id = 1,
670     .fields = (VMStateField[]) {
671         VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
672 
673         VMSTATE_END_OF_LIST()
674     },
675 };
676 
677 static const VMStateDescription vmstate_spapr_pci_msi = {
678     .name = "spapr_pci/lsi",
679     .version_id = 1,
680     .minimum_version_id = 1,
681     .fields = (VMStateField[]) {
682         VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
683         VMSTATE_UINT32(irq, struct spapr_pci_msi),
684         VMSTATE_UINT32(nvec, struct spapr_pci_msi),
685 
686         VMSTATE_END_OF_LIST()
687     },
688 };
689 
690 static const VMStateDescription vmstate_spapr_pci = {
691     .name = "spapr_pci",
692     .version_id = 1,
693     .minimum_version_id = 1,
694     .fields = (VMStateField[]) {
695         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
696         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
697         VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
698         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
699         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
700         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
701         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
702                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
703         VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
704                              vmstate_spapr_pci_msi, struct spapr_pci_msi),
705 
706         VMSTATE_END_OF_LIST()
707     },
708 };
709 
710 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
711                                            PCIBus *rootbus)
712 {
713     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
714 
715     return sphb->dtbusname;
716 }
717 
718 static void spapr_phb_class_init(ObjectClass *klass, void *data)
719 {
720     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
721     DeviceClass *dc = DEVICE_CLASS(klass);
722 
723     hc->root_bus_path = spapr_phb_root_bus_path;
724     dc->realize = spapr_phb_realize;
725     dc->props = spapr_phb_properties;
726     dc->reset = spapr_phb_reset;
727     dc->vmsd = &vmstate_spapr_pci;
728     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
729     dc->cannot_instantiate_with_device_add_yet = false;
730 }
731 
732 static const TypeInfo spapr_phb_info = {
733     .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
734     .parent        = TYPE_PCI_HOST_BRIDGE,
735     .instance_size = sizeof(sPAPRPHBState),
736     .class_init    = spapr_phb_class_init,
737 };
738 
739 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
740 {
741     DeviceState *dev;
742 
743     dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
744     qdev_prop_set_uint32(dev, "index", index);
745     qdev_init_nofail(dev);
746 
747     return PCI_HOST_BRIDGE(dev);
748 }
749 
750 /* Macros to operate with address in OF binding to PCI */
751 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
752 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
753 #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
754 #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
755 #define b_ss(x)         b_x((x), 24, 2) /* the space code */
756 #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
757 #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
758 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
759 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
760 
761 int spapr_populate_pci_dt(sPAPRPHBState *phb,
762                           uint32_t xics_phandle,
763                           void *fdt)
764 {
765     int bus_off, i, j;
766     char nodename[256];
767     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
768     struct {
769         uint32_t hi;
770         uint64_t child;
771         uint64_t parent;
772         uint64_t size;
773     } QEMU_PACKED ranges[] = {
774         {
775             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
776             cpu_to_be64(phb->io_win_addr),
777             cpu_to_be64(memory_region_size(&phb->iospace)),
778         },
779         {
780             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
781             cpu_to_be64(phb->mem_win_addr),
782             cpu_to_be64(memory_region_size(&phb->memwindow)),
783         },
784     };
785     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
786     uint32_t interrupt_map_mask[] = {
787         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
788     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
789 
790     /* Start populating the FDT */
791     sprintf(nodename, "pci@%" PRIx64, phb->buid);
792     bus_off = fdt_add_subnode(fdt, 0, nodename);
793     if (bus_off < 0) {
794         return bus_off;
795     }
796 
797 #define _FDT(exp) \
798     do { \
799         int ret = (exp);                                           \
800         if (ret < 0) {                                             \
801             return ret;                                            \
802         }                                                          \
803     } while (0)
804 
805     /* Write PHB properties */
806     _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
807     _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
808     _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
809     _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
810     _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
811     _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
812     _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
813     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
814     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
815     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
816 
817     /* Build the interrupt-map, this must matches what is done
818      * in pci_spapr_map_irq
819      */
820     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
821                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
822     for (i = 0; i < PCI_SLOT_MAX; i++) {
823         for (j = 0; j < PCI_NUM_PINS; j++) {
824             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
825             int lsi_num = pci_spapr_swizzle(i, j);
826 
827             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
828             irqmap[1] = 0;
829             irqmap[2] = 0;
830             irqmap[3] = cpu_to_be32(j+1);
831             irqmap[4] = cpu_to_be32(xics_phandle);
832             irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
833             irqmap[6] = cpu_to_be32(0x8);
834         }
835     }
836     /* Write interrupt map */
837     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
838                      sizeof(interrupt_map)));
839 
840     spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
841                  phb->dma_liobn, phb->dma_window_start,
842                  phb->dma_window_size);
843 
844     return 0;
845 }
846 
847 void spapr_pci_rtas_init(void)
848 {
849     spapr_rtas_register("read-pci-config", rtas_read_pci_config);
850     spapr_rtas_register("write-pci-config", rtas_write_pci_config);
851     spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
852     spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
853     if (msi_supported) {
854         spapr_rtas_register("ibm,query-interrupt-source-number",
855                             rtas_ibm_query_interrupt_source_number);
856         spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
857     }
858 }
859 
860 static void spapr_pci_register_types(void)
861 {
862     type_register_static(&spapr_phb_info);
863 }
864 
865 type_init(spapr_pci_register_types)
866