xref: /openbmc/qemu/hw/ppc/spapr_pci.c (revision e38e943a)
1 /*
2  * QEMU sPAPR PCI host originated from Uninorth PCI host
3  *
4  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5  * Copyright (C) 2011 David Gibson, IBM Corporation.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 #include "hw/hw.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/pci_host.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/pci-host/spapr.h"
32 #include "exec/address-spaces.h"
33 #include <libfdt.h>
34 #include "trace.h"
35 #include "qemu/error-report.h"
36 
37 #include "hw/pci/pci_bus.h"
38 
39 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
40 #define RTAS_QUERY_FN           0
41 #define RTAS_CHANGE_FN          1
42 #define RTAS_RESET_FN           2
43 #define RTAS_CHANGE_MSI_FN      3
44 #define RTAS_CHANGE_MSIX_FN     4
45 
46 /* Interrupt types to return on RTAS_CHANGE_* */
47 #define RTAS_TYPE_MSI           1
48 #define RTAS_TYPE_MSIX          2
49 
50 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
51 {
52     sPAPRPHBState *sphb;
53 
54     QLIST_FOREACH(sphb, &spapr->phbs, list) {
55         if (sphb->buid != buid) {
56             continue;
57         }
58         return sphb;
59     }
60 
61     return NULL;
62 }
63 
64 static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
65                            uint32_t config_addr)
66 {
67     sPAPRPHBState *sphb = find_phb(spapr, buid);
68     PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
69     int bus_num = (config_addr >> 16) & 0xFF;
70     int devfn = (config_addr >> 8) & 0xFF;
71 
72     if (!phb) {
73         return NULL;
74     }
75 
76     return pci_find_device(phb->bus, bus_num, devfn);
77 }
78 
79 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
80 {
81     /* This handles the encoding of extended config space addresses */
82     return ((arg >> 20) & 0xf00) | (arg & 0xff);
83 }
84 
85 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
86                                    uint32_t addr, uint32_t size,
87                                    target_ulong rets)
88 {
89     PCIDevice *pci_dev;
90     uint32_t val;
91 
92     if ((size != 1) && (size != 2) && (size != 4)) {
93         /* access must be 1, 2 or 4 bytes */
94         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
95         return;
96     }
97 
98     pci_dev = find_dev(spapr, buid, addr);
99     addr = rtas_pci_cfgaddr(addr);
100 
101     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
102         /* Access must be to a valid device, within bounds and
103          * naturally aligned */
104         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
105         return;
106     }
107 
108     val = pci_host_config_read_common(pci_dev, addr,
109                                       pci_config_size(pci_dev), size);
110 
111     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
112     rtas_st(rets, 1, val);
113 }
114 
115 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
116                                      uint32_t token, uint32_t nargs,
117                                      target_ulong args,
118                                      uint32_t nret, target_ulong rets)
119 {
120     uint64_t buid;
121     uint32_t size, addr;
122 
123     if ((nargs != 4) || (nret != 2)) {
124         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
125         return;
126     }
127 
128     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
129     size = rtas_ld(args, 3);
130     addr = rtas_ld(args, 0);
131 
132     finish_read_pci_config(spapr, buid, addr, size, rets);
133 }
134 
135 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
136                                  uint32_t token, uint32_t nargs,
137                                  target_ulong args,
138                                  uint32_t nret, target_ulong rets)
139 {
140     uint32_t size, addr;
141 
142     if ((nargs != 2) || (nret != 2)) {
143         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
144         return;
145     }
146 
147     size = rtas_ld(args, 1);
148     addr = rtas_ld(args, 0);
149 
150     finish_read_pci_config(spapr, 0, addr, size, rets);
151 }
152 
153 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
154                                     uint32_t addr, uint32_t size,
155                                     uint32_t val, target_ulong rets)
156 {
157     PCIDevice *pci_dev;
158 
159     if ((size != 1) && (size != 2) && (size != 4)) {
160         /* access must be 1, 2 or 4 bytes */
161         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
162         return;
163     }
164 
165     pci_dev = find_dev(spapr, buid, addr);
166     addr = rtas_pci_cfgaddr(addr);
167 
168     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
169         /* Access must be to a valid device, within bounds and
170          * naturally aligned */
171         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
172         return;
173     }
174 
175     pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
176                                  val, size);
177 
178     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
179 }
180 
181 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
182                                       uint32_t token, uint32_t nargs,
183                                       target_ulong args,
184                                       uint32_t nret, target_ulong rets)
185 {
186     uint64_t buid;
187     uint32_t val, size, addr;
188 
189     if ((nargs != 5) || (nret != 1)) {
190         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
191         return;
192     }
193 
194     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
195     val = rtas_ld(args, 4);
196     size = rtas_ld(args, 3);
197     addr = rtas_ld(args, 0);
198 
199     finish_write_pci_config(spapr, buid, addr, size, val, rets);
200 }
201 
202 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
203                                   uint32_t token, uint32_t nargs,
204                                   target_ulong args,
205                                   uint32_t nret, target_ulong rets)
206 {
207     uint32_t val, size, addr;
208 
209     if ((nargs != 3) || (nret != 1)) {
210         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
211         return;
212     }
213 
214 
215     val = rtas_ld(args, 2);
216     size = rtas_ld(args, 1);
217     addr = rtas_ld(args, 0);
218 
219     finish_write_pci_config(spapr, 0, addr, size, val, rets);
220 }
221 
222 /*
223  * Set MSI/MSIX message data.
224  * This is required for msi_notify()/msix_notify() which
225  * will write at the addresses via spapr_msi_write().
226  *
227  * If hwaddr == 0, all entries will have .data == first_irq i.e.
228  * table will be reset.
229  */
230 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
231                              unsigned first_irq, unsigned req_num)
232 {
233     unsigned i;
234     MSIMessage msg = { .address = addr, .data = first_irq };
235 
236     if (!msix) {
237         msi_set_message(pdev, msg);
238         trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
239         return;
240     }
241 
242     for (i = 0; i < req_num; ++i) {
243         msix_set_message(pdev, i, msg);
244         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
245         if (addr) {
246             ++msg.data;
247         }
248     }
249 }
250 
251 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
252                                 uint32_t token, uint32_t nargs,
253                                 target_ulong args, uint32_t nret,
254                                 target_ulong rets)
255 {
256     uint32_t config_addr = rtas_ld(args, 0);
257     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
258     unsigned int func = rtas_ld(args, 3);
259     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
260     unsigned int seq_num = rtas_ld(args, 5);
261     unsigned int ret_intr_type;
262     unsigned int irq, max_irqs = 0, num = 0;
263     sPAPRPHBState *phb = NULL;
264     PCIDevice *pdev = NULL;
265     bool msix = false;
266     spapr_pci_msi *msi;
267     int *config_addr_key;
268 
269     switch (func) {
270     case RTAS_CHANGE_MSI_FN:
271     case RTAS_CHANGE_FN:
272         ret_intr_type = RTAS_TYPE_MSI;
273         break;
274     case RTAS_CHANGE_MSIX_FN:
275         ret_intr_type = RTAS_TYPE_MSIX;
276         break;
277     default:
278         error_report("rtas_ibm_change_msi(%u) is not implemented", func);
279         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
280         return;
281     }
282 
283     /* Fins sPAPRPHBState */
284     phb = find_phb(spapr, buid);
285     if (phb) {
286         pdev = find_dev(spapr, buid, config_addr);
287     }
288     if (!phb || !pdev) {
289         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
290         return;
291     }
292 
293     /* Releasing MSIs */
294     if (!req_num) {
295         msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
296         if (!msi) {
297             trace_spapr_pci_msi("Releasing wrong config", config_addr);
298             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
299             return;
300         }
301 
302         xics_free(spapr->icp, msi->first_irq, msi->num);
303         spapr_msi_setmsg(pdev, 0, msix, 0, num);
304         g_hash_table_remove(phb->msi, &config_addr);
305 
306         trace_spapr_pci_msi("Released MSIs", config_addr);
307         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
308         rtas_st(rets, 1, 0);
309         return;
310     }
311 
312     /* Enabling MSI */
313 
314     /* Check if the device supports as many IRQs as requested */
315     if (ret_intr_type == RTAS_TYPE_MSI) {
316         max_irqs = msi_nr_vectors_allocated(pdev);
317     } else if (ret_intr_type == RTAS_TYPE_MSIX) {
318         max_irqs = pdev->msix_entries_nr;
319     }
320     if (!max_irqs) {
321         error_report("Requested interrupt type %d is not enabled for device %x",
322                      ret_intr_type, config_addr);
323         rtas_st(rets, 0, -1); /* Hardware error */
324         return;
325     }
326     /* Correct the number if the guest asked for too many */
327     if (req_num > max_irqs) {
328         trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
329         req_num = max_irqs;
330         irq = 0; /* to avoid misleading trace */
331         goto out;
332     }
333 
334     /* Allocate MSIs */
335     irq = xics_alloc_block(spapr->icp, 0, req_num, false,
336                            ret_intr_type == RTAS_TYPE_MSI);
337     if (!irq) {
338         error_report("Cannot allocate MSIs for device %x", config_addr);
339         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
340         return;
341     }
342 
343     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
344     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
345                      irq, req_num);
346 
347     /* Add MSI device to cache */
348     msi = g_new(spapr_pci_msi, 1);
349     msi->first_irq = irq;
350     msi->num = req_num;
351     config_addr_key = g_new(int, 1);
352     *config_addr_key = config_addr;
353     g_hash_table_insert(phb->msi, config_addr_key, msi);
354 
355 out:
356     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
357     rtas_st(rets, 1, req_num);
358     rtas_st(rets, 2, ++seq_num);
359     rtas_st(rets, 3, ret_intr_type);
360 
361     trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
362 }
363 
364 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
365                                                    sPAPREnvironment *spapr,
366                                                    uint32_t token,
367                                                    uint32_t nargs,
368                                                    target_ulong args,
369                                                    uint32_t nret,
370                                                    target_ulong rets)
371 {
372     uint32_t config_addr = rtas_ld(args, 0);
373     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
374     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
375     sPAPRPHBState *phb = NULL;
376     PCIDevice *pdev = NULL;
377     spapr_pci_msi *msi;
378 
379     /* Find sPAPRPHBState */
380     phb = find_phb(spapr, buid);
381     if (phb) {
382         pdev = find_dev(spapr, buid, config_addr);
383     }
384     if (!phb || !pdev) {
385         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
386         return;
387     }
388 
389     /* Find device descriptor and start IRQ */
390     msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
391     if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
392         trace_spapr_pci_msi("Failed to return vector", config_addr);
393         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
394         return;
395     }
396     intr_src_num = msi->first_irq + ioa_intr_num;
397     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
398                                                            intr_src_num);
399 
400     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
401     rtas_st(rets, 1, intr_src_num);
402     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
403 }
404 
405 static int pci_spapr_swizzle(int slot, int pin)
406 {
407     return (slot + pin) % PCI_NUM_PINS;
408 }
409 
410 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
411 {
412     /*
413      * Here we need to convert pci_dev + irq_num to some unique value
414      * which is less than number of IRQs on the specific bus (4).  We
415      * use standard PCI swizzling, that is (slot number + pin number)
416      * % 4.
417      */
418     return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
419 }
420 
421 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
422 {
423     /*
424      * Here we use the number returned by pci_spapr_map_irq to find a
425      * corresponding qemu_irq.
426      */
427     sPAPRPHBState *phb = opaque;
428 
429     trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
430     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
431 }
432 
433 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
434 {
435     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
436     PCIINTxRoute route;
437 
438     route.mode = PCI_INTX_ENABLED;
439     route.irq = sphb->lsi_table[pin].irq;
440 
441     return route;
442 }
443 
444 /*
445  * MSI/MSIX memory region implementation.
446  * The handler handles both MSI and MSIX.
447  * For MSI-X, the vector number is encoded as a part of the address,
448  * data is set to 0.
449  * For MSI, the vector number is encoded in least bits in data.
450  */
451 static void spapr_msi_write(void *opaque, hwaddr addr,
452                             uint64_t data, unsigned size)
453 {
454     uint32_t irq = data;
455 
456     trace_spapr_pci_msi_write(addr, data, irq);
457 
458     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
459 }
460 
461 static const MemoryRegionOps spapr_msi_ops = {
462     /* There is no .read as the read result is undefined by PCI spec */
463     .read = NULL,
464     .write = spapr_msi_write,
465     .endianness = DEVICE_LITTLE_ENDIAN
466 };
467 
468 void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
469 {
470     uint64_t window_size = 4096;
471 
472     /*
473      * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
474      * we need to allocate some memory to catch those writes coming
475      * from msi_notify()/msix_notify().
476      * As MSIMessage:addr is going to be the same and MSIMessage:data
477      * is going to be a VIRQ number, 4 bytes of the MSI MR will only
478      * be used.
479      *
480      * For KVM we want to ensure that this memory is a full page so that
481      * our memory slot is of page size granularity.
482      */
483 #ifdef CONFIG_KVM
484     if (kvm_enabled()) {
485         window_size = getpagesize();
486     }
487 #endif
488 
489     spapr->msi_win_addr = addr;
490     memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
491                           "msi", window_size);
492     memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
493                                 &spapr->msiwindow);
494 }
495 
496 /*
497  * PHB PCI device
498  */
499 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
500 {
501     sPAPRPHBState *phb = opaque;
502 
503     return &phb->iommu_as;
504 }
505 
506 static void spapr_phb_realize(DeviceState *dev, Error **errp)
507 {
508     SysBusDevice *s = SYS_BUS_DEVICE(dev);
509     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
510     PCIHostState *phb = PCI_HOST_BRIDGE(s);
511     sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
512     char *namebuf;
513     int i;
514     PCIBus *bus;
515 
516     if (sphb->index != -1) {
517         hwaddr windows_base;
518 
519         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
520             || (sphb->mem_win_addr != -1)
521             || (sphb->io_win_addr != -1)) {
522             error_setg(errp, "Either \"index\" or other parameters must"
523                        " be specified for PAPR PHB, not both");
524             return;
525         }
526 
527         sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
528         sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
529 
530         windows_base = SPAPR_PCI_WINDOW_BASE
531             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
532         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
533         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
534     }
535 
536     if (sphb->buid == -1) {
537         error_setg(errp, "BUID not specified for PHB");
538         return;
539     }
540 
541     if (sphb->dma_liobn == -1) {
542         error_setg(errp, "LIOBN not specified for PHB");
543         return;
544     }
545 
546     if (sphb->mem_win_addr == -1) {
547         error_setg(errp, "Memory window address not specified for PHB");
548         return;
549     }
550 
551     if (sphb->io_win_addr == -1) {
552         error_setg(errp, "IO window address not specified for PHB");
553         return;
554     }
555 
556     if (find_phb(spapr, sphb->buid)) {
557         error_setg(errp, "PCI host bridges must have unique BUIDs");
558         return;
559     }
560 
561     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
562 
563     namebuf = alloca(strlen(sphb->dtbusname) + 32);
564 
565     /* Initialize memory regions */
566     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
567     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
568 
569     sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
570     memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
571                              namebuf, &sphb->memspace,
572                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
573     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
574                                 &sphb->memwindow);
575 
576     /* Initialize IO regions */
577     sprintf(namebuf, "%s.io", sphb->dtbusname);
578     memory_region_init(&sphb->iospace, OBJECT(sphb),
579                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
580 
581     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
582     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
583                              &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
584     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
585                                 &sphb->iowindow);
586 
587     bus = pci_register_bus(dev, NULL,
588                            pci_spapr_set_irq, pci_spapr_map_irq, sphb,
589                            &sphb->memspace, &sphb->iospace,
590                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
591     phb->bus = bus;
592 
593     /*
594      * Initialize PHB address space.
595      * By default there will be at least one subregion for default
596      * 32bit DMA window.
597      * Later the guest might want to create another DMA window
598      * which will become another memory subregion.
599      */
600     sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
601 
602     memory_region_init(&sphb->iommu_root, OBJECT(sphb),
603                        namebuf, UINT64_MAX);
604     address_space_init(&sphb->iommu_as, &sphb->iommu_root,
605                        sphb->dtbusname);
606 
607     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
608 
609     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
610 
611     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
612 
613     /* Initialize the LSI table */
614     for (i = 0; i < PCI_NUM_PINS; i++) {
615         uint32_t irq;
616 
617         irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
618         if (!irq) {
619             error_setg(errp, "spapr_allocate_lsi failed");
620             return;
621         }
622 
623         sphb->lsi_table[i].irq = irq;
624     }
625 
626     if (!info->finish_realize) {
627         error_setg(errp, "finish_realize not defined");
628         return;
629     }
630 
631     info->finish_realize(sphb, errp);
632 
633     sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
634 }
635 
636 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
637 {
638     sPAPRTCETable *tcet;
639 
640     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
641                                0,
642                                SPAPR_TCE_PAGE_SHIFT,
643                                0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
644     if (!tcet) {
645         error_setg(errp, "Unable to create TCE table for %s",
646                    sphb->dtbusname);
647         return ;
648     }
649 
650     /* Register default 32bit DMA window */
651     memory_region_add_subregion(&sphb->iommu_root, 0,
652                                 spapr_tce_get_iommu(tcet));
653 }
654 
655 static int spapr_phb_children_reset(Object *child, void *opaque)
656 {
657     DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
658 
659     if (dev) {
660         device_reset(dev);
661     }
662 
663     return 0;
664 }
665 
666 static void spapr_phb_reset(DeviceState *qdev)
667 {
668     /* Reset the IOMMU state */
669     object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
670 }
671 
672 static Property spapr_phb_properties[] = {
673     DEFINE_PROP_INT32("index", sPAPRPHBState, index, -1),
674     DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
675     DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
676     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
677     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
678                        SPAPR_PCI_MMIO_WIN_SIZE),
679     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
680     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
681                        SPAPR_PCI_IO_WIN_SIZE),
682     DEFINE_PROP_END_OF_LIST(),
683 };
684 
685 static const VMStateDescription vmstate_spapr_pci_lsi = {
686     .name = "spapr_pci/lsi",
687     .version_id = 1,
688     .minimum_version_id = 1,
689     .fields = (VMStateField[]) {
690         VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
691 
692         VMSTATE_END_OF_LIST()
693     },
694 };
695 
696 static const VMStateDescription vmstate_spapr_pci_msi = {
697     .name = "spapr_pci/msi",
698     .version_id = 1,
699     .minimum_version_id = 1,
700     .fields = (VMStateField []) {
701         VMSTATE_UINT32(key, spapr_pci_msi_mig),
702         VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
703         VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
704         VMSTATE_END_OF_LIST()
705     },
706 };
707 
708 static void spapr_pci_pre_save(void *opaque)
709 {
710     sPAPRPHBState *sphb = opaque;
711     GHashTableIter iter;
712     gpointer key, value;
713     int i;
714 
715     if (sphb->msi_devs) {
716         g_free(sphb->msi_devs);
717         sphb->msi_devs = NULL;
718     }
719     sphb->msi_devs_num = g_hash_table_size(sphb->msi);
720     if (!sphb->msi_devs_num) {
721         return;
722     }
723     sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
724 
725     g_hash_table_iter_init(&iter, sphb->msi);
726     for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
727         sphb->msi_devs[i].key = *(uint32_t *) key;
728         sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
729     }
730 }
731 
732 static int spapr_pci_post_load(void *opaque, int version_id)
733 {
734     sPAPRPHBState *sphb = opaque;
735     gpointer key, value;
736     int i;
737 
738     for (i = 0; i < sphb->msi_devs_num; ++i) {
739         key = g_memdup(&sphb->msi_devs[i].key,
740                        sizeof(sphb->msi_devs[i].key));
741         value = g_memdup(&sphb->msi_devs[i].value,
742                          sizeof(sphb->msi_devs[i].value));
743         g_hash_table_insert(sphb->msi, key, value);
744     }
745     if (sphb->msi_devs) {
746         g_free(sphb->msi_devs);
747         sphb->msi_devs = NULL;
748     }
749     sphb->msi_devs_num = 0;
750 
751     return 0;
752 }
753 
754 static const VMStateDescription vmstate_spapr_pci = {
755     .name = "spapr_pci",
756     .version_id = 2,
757     .minimum_version_id = 2,
758     .pre_save = spapr_pci_pre_save,
759     .post_load = spapr_pci_post_load,
760     .fields = (VMStateField[]) {
761         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
762         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
763         VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
764         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
765         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
766         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
767         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
768                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
769         VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
770         VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0,
771                                     vmstate_spapr_pci_msi, spapr_pci_msi_mig),
772         VMSTATE_END_OF_LIST()
773     },
774 };
775 
776 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
777                                            PCIBus *rootbus)
778 {
779     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
780 
781     return sphb->dtbusname;
782 }
783 
784 static void spapr_phb_class_init(ObjectClass *klass, void *data)
785 {
786     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
787     DeviceClass *dc = DEVICE_CLASS(klass);
788     sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
789 
790     hc->root_bus_path = spapr_phb_root_bus_path;
791     dc->realize = spapr_phb_realize;
792     dc->props = spapr_phb_properties;
793     dc->reset = spapr_phb_reset;
794     dc->vmsd = &vmstate_spapr_pci;
795     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
796     dc->cannot_instantiate_with_device_add_yet = false;
797     spc->finish_realize = spapr_phb_finish_realize;
798 }
799 
800 static const TypeInfo spapr_phb_info = {
801     .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
802     .parent        = TYPE_PCI_HOST_BRIDGE,
803     .instance_size = sizeof(sPAPRPHBState),
804     .class_init    = spapr_phb_class_init,
805     .class_size    = sizeof(sPAPRPHBClass),
806 };
807 
808 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
809 {
810     DeviceState *dev;
811 
812     dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
813     qdev_prop_set_uint32(dev, "index", index);
814     qdev_init_nofail(dev);
815 
816     return PCI_HOST_BRIDGE(dev);
817 }
818 
819 /* Macros to operate with address in OF binding to PCI */
820 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
821 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
822 #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
823 #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
824 #define b_ss(x)         b_x((x), 24, 2) /* the space code */
825 #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
826 #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
827 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
828 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
829 
830 typedef struct sPAPRTCEDT {
831     void *fdt;
832     int node_off;
833 } sPAPRTCEDT;
834 
835 static int spapr_phb_children_dt(Object *child, void *opaque)
836 {
837     sPAPRTCEDT *p = opaque;
838     sPAPRTCETable *tcet;
839 
840     tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
841     if (!tcet) {
842         return 0;
843     }
844 
845     spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
846                  tcet->liobn, tcet->bus_offset,
847                  tcet->nb_table << tcet->page_shift);
848     /* Stop after the first window */
849 
850     return 1;
851 }
852 
853 int spapr_populate_pci_dt(sPAPRPHBState *phb,
854                           uint32_t xics_phandle,
855                           void *fdt)
856 {
857     int bus_off, i, j;
858     char nodename[256];
859     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
860     struct {
861         uint32_t hi;
862         uint64_t child;
863         uint64_t parent;
864         uint64_t size;
865     } QEMU_PACKED ranges[] = {
866         {
867             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
868             cpu_to_be64(phb->io_win_addr),
869             cpu_to_be64(memory_region_size(&phb->iospace)),
870         },
871         {
872             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
873             cpu_to_be64(phb->mem_win_addr),
874             cpu_to_be64(memory_region_size(&phb->memwindow)),
875         },
876     };
877     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
878     uint32_t interrupt_map_mask[] = {
879         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
880     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
881 
882     /* Start populating the FDT */
883     sprintf(nodename, "pci@%" PRIx64, phb->buid);
884     bus_off = fdt_add_subnode(fdt, 0, nodename);
885     if (bus_off < 0) {
886         return bus_off;
887     }
888 
889 #define _FDT(exp) \
890     do { \
891         int ret = (exp);                                           \
892         if (ret < 0) {                                             \
893             return ret;                                            \
894         }                                                          \
895     } while (0)
896 
897     /* Write PHB properties */
898     _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
899     _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
900     _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
901     _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
902     _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
903     _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
904     _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
905     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
906     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
907     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
908     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
909 
910     /* Build the interrupt-map, this must matches what is done
911      * in pci_spapr_map_irq
912      */
913     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
914                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
915     for (i = 0; i < PCI_SLOT_MAX; i++) {
916         for (j = 0; j < PCI_NUM_PINS; j++) {
917             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
918             int lsi_num = pci_spapr_swizzle(i, j);
919 
920             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
921             irqmap[1] = 0;
922             irqmap[2] = 0;
923             irqmap[3] = cpu_to_be32(j+1);
924             irqmap[4] = cpu_to_be32(xics_phandle);
925             irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
926             irqmap[6] = cpu_to_be32(0x8);
927         }
928     }
929     /* Write interrupt map */
930     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
931                      sizeof(interrupt_map)));
932 
933     object_child_foreach(OBJECT(phb), spapr_phb_children_dt,
934                          &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off }));
935 
936     return 0;
937 }
938 
939 void spapr_pci_rtas_init(void)
940 {
941     spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
942                         rtas_read_pci_config);
943     spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
944                         rtas_write_pci_config);
945     spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
946                         rtas_ibm_read_pci_config);
947     spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
948                         rtas_ibm_write_pci_config);
949     if (msi_supported) {
950         spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
951                             "ibm,query-interrupt-source-number",
952                             rtas_ibm_query_interrupt_source_number);
953         spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
954                             rtas_ibm_change_msi);
955     }
956 }
957 
958 static void spapr_pci_register_types(void)
959 {
960     type_register_static(&spapr_phb_info);
961 }
962 
963 type_init(spapr_pci_register_types)
964