xref: /openbmc/qemu/hw/ppc/spapr_pci.c (revision a75ed3c43064528f3409f0be286b62b9c3a47218)
1  /*
2   * QEMU sPAPR PCI host originated from Uninorth PCI host
3   *
4   * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5   * Copyright (C) 2011 David Gibson, IBM Corporation.
6   *
7   * Permission is hereby granted, free of charge, to any person obtaining a copy
8   * of this software and associated documentation files (the "Software"), to deal
9   * in the Software without restriction, including without limitation the rights
10   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11   * copies of the Software, and to permit persons to whom the Software is
12   * furnished to do so, subject to the following conditions:
13   *
14   * The above copyright notice and this permission notice shall be included in
15   * all copies or substantial portions of the Software.
16   *
17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23   * THE SOFTWARE.
24   */
25  
26  #include "qemu/osdep.h"
27  #include "qapi/error.h"
28  #include "hw/irq.h"
29  #include "hw/sysbus.h"
30  #include "migration/vmstate.h"
31  #include "hw/pci/pci.h"
32  #include "hw/pci/msi.h"
33  #include "hw/pci/msix.h"
34  #include "hw/pci/pci_host.h"
35  #include "hw/ppc/spapr.h"
36  #include "hw/pci-host/spapr.h"
37  #include "exec/ram_addr.h"
38  #include <libfdt.h>
39  #include "trace.h"
40  #include "qemu/error-report.h"
41  #include "qemu/module.h"
42  #include "qapi/qmp/qerror.h"
43  #include "hw/ppc/fdt.h"
44  #include "hw/pci/pci_bridge.h"
45  #include "hw/pci/pci_bus.h"
46  #include "hw/pci/pci_ids.h"
47  #include "hw/ppc/spapr_drc.h"
48  #include "hw/qdev-properties.h"
49  #include "sysemu/device_tree.h"
50  #include "sysemu/kvm.h"
51  #include "sysemu/hostmem.h"
52  #include "sysemu/numa.h"
53  #include "hw/ppc/spapr_numa.h"
54  #include "qemu/log.h"
55  
56  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
57  #define RTAS_QUERY_FN           0
58  #define RTAS_CHANGE_FN          1
59  #define RTAS_RESET_FN           2
60  #define RTAS_CHANGE_MSI_FN      3
61  #define RTAS_CHANGE_MSIX_FN     4
62  
63  /* Interrupt types to return on RTAS_CHANGE_* */
64  #define RTAS_TYPE_MSI           1
65  #define RTAS_TYPE_MSIX          2
66  
67  SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid)
68  {
69      SpaprPhbState *sphb;
70  
71      QLIST_FOREACH(sphb, &spapr->phbs, list) {
72          if (sphb->buid != buid) {
73              continue;
74          }
75          return sphb;
76      }
77  
78      return NULL;
79  }
80  
81  PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
82                                uint32_t config_addr)
83  {
84      SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid);
85      PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
86      int bus_num = (config_addr >> 16) & 0xFF;
87      int devfn = (config_addr >> 8) & 0xFF;
88  
89      if (!phb) {
90          return NULL;
91      }
92  
93      return pci_find_device(phb->bus, bus_num, devfn);
94  }
95  
96  static uint32_t rtas_pci_cfgaddr(uint32_t arg)
97  {
98      /* This handles the encoding of extended config space addresses */
99      return ((arg >> 20) & 0xf00) | (arg & 0xff);
100  }
101  
102  static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid,
103                                     uint32_t addr, uint32_t size,
104                                     target_ulong rets)
105  {
106      PCIDevice *pci_dev;
107      uint32_t val;
108  
109      if ((size != 1) && (size != 2) && (size != 4)) {
110          /* access must be 1, 2 or 4 bytes */
111          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
112          return;
113      }
114  
115      pci_dev = spapr_pci_find_dev(spapr, buid, addr);
116      addr = rtas_pci_cfgaddr(addr);
117  
118      if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
119          /* Access must be to a valid device, within bounds and
120           * naturally aligned */
121          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
122          return;
123      }
124  
125      val = pci_host_config_read_common(pci_dev, addr,
126                                        pci_config_size(pci_dev), size);
127  
128      rtas_st(rets, 0, RTAS_OUT_SUCCESS);
129      rtas_st(rets, 1, val);
130  }
131  
132  static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
133                                       uint32_t token, uint32_t nargs,
134                                       target_ulong args,
135                                       uint32_t nret, target_ulong rets)
136  {
137      uint64_t buid;
138      uint32_t size, addr;
139  
140      if ((nargs != 4) || (nret != 2)) {
141          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
142          return;
143      }
144  
145      buid = rtas_ldq(args, 1);
146      size = rtas_ld(args, 3);
147      addr = rtas_ld(args, 0);
148  
149      finish_read_pci_config(spapr, buid, addr, size, rets);
150  }
151  
152  static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
153                                   uint32_t token, uint32_t nargs,
154                                   target_ulong args,
155                                   uint32_t nret, target_ulong rets)
156  {
157      uint32_t size, addr;
158  
159      if ((nargs != 2) || (nret != 2)) {
160          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
161          return;
162      }
163  
164      size = rtas_ld(args, 1);
165      addr = rtas_ld(args, 0);
166  
167      finish_read_pci_config(spapr, 0, addr, size, rets);
168  }
169  
170  static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid,
171                                      uint32_t addr, uint32_t size,
172                                      uint32_t val, target_ulong rets)
173  {
174      PCIDevice *pci_dev;
175  
176      if ((size != 1) && (size != 2) && (size != 4)) {
177          /* access must be 1, 2 or 4 bytes */
178          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
179          return;
180      }
181  
182      pci_dev = spapr_pci_find_dev(spapr, buid, addr);
183      addr = rtas_pci_cfgaddr(addr);
184  
185      if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
186          /* Access must be to a valid device, within bounds and
187           * naturally aligned */
188          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
189          return;
190      }
191  
192      pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
193                                   val, size);
194  
195      rtas_st(rets, 0, RTAS_OUT_SUCCESS);
196  }
197  
198  static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
199                                        uint32_t token, uint32_t nargs,
200                                        target_ulong args,
201                                        uint32_t nret, target_ulong rets)
202  {
203      uint64_t buid;
204      uint32_t val, size, addr;
205  
206      if ((nargs != 5) || (nret != 1)) {
207          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
208          return;
209      }
210  
211      buid = rtas_ldq(args, 1);
212      val = rtas_ld(args, 4);
213      size = rtas_ld(args, 3);
214      addr = rtas_ld(args, 0);
215  
216      finish_write_pci_config(spapr, buid, addr, size, val, rets);
217  }
218  
219  static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
220                                    uint32_t token, uint32_t nargs,
221                                    target_ulong args,
222                                    uint32_t nret, target_ulong rets)
223  {
224      uint32_t val, size, addr;
225  
226      if ((nargs != 3) || (nret != 1)) {
227          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
228          return;
229      }
230  
231  
232      val = rtas_ld(args, 2);
233      size = rtas_ld(args, 1);
234      addr = rtas_ld(args, 0);
235  
236      finish_write_pci_config(spapr, 0, addr, size, val, rets);
237  }
238  
239  /*
240   * Set MSI/MSIX message data.
241   * This is required for msi_notify()/msix_notify() which
242   * will write at the addresses via spapr_msi_write().
243   *
244   * If hwaddr == 0, all entries will have .data == first_irq i.e.
245   * table will be reset.
246   */
247  static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
248                               unsigned first_irq, unsigned req_num)
249  {
250      unsigned i;
251      MSIMessage msg = { .address = addr, .data = first_irq };
252  
253      if (!msix) {
254          msi_set_message(pdev, msg);
255          trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
256          return;
257      }
258  
259      for (i = 0; i < req_num; ++i) {
260          msix_set_message(pdev, i, msg);
261          trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
262          if (addr) {
263              ++msg.data;
264          }
265      }
266  }
267  
268  static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
269                                  uint32_t token, uint32_t nargs,
270                                  target_ulong args, uint32_t nret,
271                                  target_ulong rets)
272  {
273      SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
274      uint32_t config_addr = rtas_ld(args, 0);
275      uint64_t buid = rtas_ldq(args, 1);
276      unsigned int func = rtas_ld(args, 3);
277      unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
278      unsigned int seq_num = rtas_ld(args, 5);
279      unsigned int ret_intr_type;
280      unsigned int irq, max_irqs = 0;
281      SpaprPhbState *phb = NULL;
282      PCIDevice *pdev = NULL;
283      SpaprPciMsi *msi;
284      int *config_addr_key;
285      Error *err = NULL;
286      int i;
287  
288      /* Fins SpaprPhbState */
289      phb = spapr_pci_find_phb(spapr, buid);
290      if (phb) {
291          pdev = spapr_pci_find_dev(spapr, buid, config_addr);
292      }
293      if (!phb || !pdev) {
294          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
295          return;
296      }
297  
298      switch (func) {
299      case RTAS_CHANGE_FN:
300          if (msi_present(pdev)) {
301              ret_intr_type = RTAS_TYPE_MSI;
302          } else if (msix_present(pdev)) {
303              ret_intr_type = RTAS_TYPE_MSIX;
304          } else {
305              rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
306              return;
307          }
308          break;
309      case RTAS_CHANGE_MSI_FN:
310          if (msi_present(pdev)) {
311              ret_intr_type = RTAS_TYPE_MSI;
312          } else {
313              rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
314              return;
315          }
316          break;
317      case RTAS_CHANGE_MSIX_FN:
318          if (msix_present(pdev)) {
319              ret_intr_type = RTAS_TYPE_MSIX;
320          } else {
321              rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
322              return;
323          }
324          break;
325      default:
326          error_report("rtas_ibm_change_msi(%u) is not implemented", func);
327          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
328          return;
329      }
330  
331      msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr);
332  
333      /* Releasing MSIs */
334      if (!req_num) {
335          if (!msi) {
336              trace_spapr_pci_msi("Releasing wrong config", config_addr);
337              rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
338              return;
339          }
340  
341          if (msi_present(pdev)) {
342              spapr_msi_setmsg(pdev, 0, false, 0, 0);
343          }
344          if (msix_present(pdev)) {
345              spapr_msi_setmsg(pdev, 0, true, 0, 0);
346          }
347          g_hash_table_remove(phb->msi, &config_addr);
348  
349          trace_spapr_pci_msi("Released MSIs", config_addr);
350          rtas_st(rets, 0, RTAS_OUT_SUCCESS);
351          rtas_st(rets, 1, 0);
352          return;
353      }
354  
355      /* Enabling MSI */
356  
357      /* Check if the device supports as many IRQs as requested */
358      if (ret_intr_type == RTAS_TYPE_MSI) {
359          max_irqs = msi_nr_vectors_allocated(pdev);
360      } else if (ret_intr_type == RTAS_TYPE_MSIX) {
361          max_irqs = pdev->msix_entries_nr;
362      }
363      if (!max_irqs) {
364          error_report("Requested interrupt type %d is not enabled for device %x",
365                       ret_intr_type, config_addr);
366          rtas_st(rets, 0, -1); /* Hardware error */
367          return;
368      }
369      /* Correct the number if the guest asked for too many */
370      if (req_num > max_irqs) {
371          trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
372          req_num = max_irqs;
373          irq = 0; /* to avoid misleading trace */
374          goto out;
375      }
376  
377      /* Allocate MSIs */
378      if (smc->legacy_irq_allocation) {
379          irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
380                               &err);
381      } else {
382          irq = spapr_irq_msi_alloc(spapr, req_num,
383                                    ret_intr_type == RTAS_TYPE_MSI, &err);
384      }
385      if (err) {
386          error_reportf_err(err, "Can't allocate MSIs for device %x: ",
387                            config_addr);
388          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
389          return;
390      }
391  
392      for (i = 0; i < req_num; i++) {
393          spapr_irq_claim(spapr, irq + i, false, &err);
394          if (err) {
395              if (i) {
396                  spapr_irq_free(spapr, irq, i);
397              }
398              if (!smc->legacy_irq_allocation) {
399                  spapr_irq_msi_free(spapr, irq, req_num);
400              }
401              error_reportf_err(err, "Can't allocate MSIs for device %x: ",
402                                config_addr);
403              rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
404              return;
405          }
406      }
407  
408      /* Release previous MSIs */
409      if (msi) {
410          g_hash_table_remove(phb->msi, &config_addr);
411      }
412  
413      /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
414      spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
415                       irq, req_num);
416  
417      /* Add MSI device to cache */
418      msi = g_new(SpaprPciMsi, 1);
419      msi->first_irq = irq;
420      msi->num = req_num;
421      config_addr_key = g_new(int, 1);
422      *config_addr_key = config_addr;
423      g_hash_table_insert(phb->msi, config_addr_key, msi);
424  
425  out:
426      rtas_st(rets, 0, RTAS_OUT_SUCCESS);
427      rtas_st(rets, 1, req_num);
428      rtas_st(rets, 2, ++seq_num);
429      if (nret > 3) {
430          rtas_st(rets, 3, ret_intr_type);
431      }
432  
433      trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
434  }
435  
436  static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
437                                                     SpaprMachineState *spapr,
438                                                     uint32_t token,
439                                                     uint32_t nargs,
440                                                     target_ulong args,
441                                                     uint32_t nret,
442                                                     target_ulong rets)
443  {
444      uint32_t config_addr = rtas_ld(args, 0);
445      uint64_t buid = rtas_ldq(args, 1);
446      unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
447      SpaprPhbState *phb = NULL;
448      PCIDevice *pdev = NULL;
449      SpaprPciMsi *msi;
450  
451      /* Find SpaprPhbState */
452      phb = spapr_pci_find_phb(spapr, buid);
453      if (phb) {
454          pdev = spapr_pci_find_dev(spapr, buid, config_addr);
455      }
456      if (!phb || !pdev) {
457          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
458          return;
459      }
460  
461      /* Find device descriptor and start IRQ */
462      msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr);
463      if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
464          trace_spapr_pci_msi("Failed to return vector", config_addr);
465          rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
466          return;
467      }
468      intr_src_num = msi->first_irq + ioa_intr_num;
469      trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
470                                                             intr_src_num);
471  
472      rtas_st(rets, 0, RTAS_OUT_SUCCESS);
473      rtas_st(rets, 1, intr_src_num);
474      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
475  }
476  
477  static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
478                                      SpaprMachineState *spapr,
479                                      uint32_t token, uint32_t nargs,
480                                      target_ulong args, uint32_t nret,
481                                      target_ulong rets)
482  {
483      SpaprPhbState *sphb;
484      uint32_t addr, option;
485      uint64_t buid;
486      int ret;
487  
488      if ((nargs != 4) || (nret != 1)) {
489          goto param_error_exit;
490      }
491  
492      buid = rtas_ldq(args, 1);
493      addr = rtas_ld(args, 0);
494      option = rtas_ld(args, 3);
495  
496      sphb = spapr_pci_find_phb(spapr, buid);
497      if (!sphb) {
498          goto param_error_exit;
499      }
500  
501      if (!spapr_phb_eeh_available(sphb)) {
502          goto param_error_exit;
503      }
504  
505      ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
506      rtas_st(rets, 0, ret);
507      return;
508  
509  param_error_exit:
510      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
511  }
512  
513  static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
514                                             SpaprMachineState *spapr,
515                                             uint32_t token, uint32_t nargs,
516                                             target_ulong args, uint32_t nret,
517                                             target_ulong rets)
518  {
519      SpaprPhbState *sphb;
520      PCIDevice *pdev;
521      uint32_t addr, option;
522      uint64_t buid;
523  
524      if ((nargs != 4) || (nret != 2)) {
525          goto param_error_exit;
526      }
527  
528      buid = rtas_ldq(args, 1);
529      sphb = spapr_pci_find_phb(spapr, buid);
530      if (!sphb) {
531          goto param_error_exit;
532      }
533  
534      if (!spapr_phb_eeh_available(sphb)) {
535          goto param_error_exit;
536      }
537  
538      /*
539       * We always have PE address of form "00BB0001". "BB"
540       * represents the bus number of PE's primary bus.
541       */
542      option = rtas_ld(args, 3);
543      switch (option) {
544      case RTAS_GET_PE_ADDR:
545          addr = rtas_ld(args, 0);
546          pdev = spapr_pci_find_dev(spapr, buid, addr);
547          if (!pdev) {
548              goto param_error_exit;
549          }
550  
551          rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1);
552          break;
553      case RTAS_GET_PE_MODE:
554          rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
555          break;
556      default:
557          goto param_error_exit;
558      }
559  
560      rtas_st(rets, 0, RTAS_OUT_SUCCESS);
561      return;
562  
563  param_error_exit:
564      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
565  }
566  
567  static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
568                                              SpaprMachineState *spapr,
569                                              uint32_t token, uint32_t nargs,
570                                              target_ulong args, uint32_t nret,
571                                              target_ulong rets)
572  {
573      SpaprPhbState *sphb;
574      uint64_t buid;
575      int state, ret;
576  
577      if ((nargs != 3) || (nret != 4 && nret != 5)) {
578          goto param_error_exit;
579      }
580  
581      buid = rtas_ldq(args, 1);
582      sphb = spapr_pci_find_phb(spapr, buid);
583      if (!sphb) {
584          goto param_error_exit;
585      }
586  
587      if (!spapr_phb_eeh_available(sphb)) {
588          goto param_error_exit;
589      }
590  
591      ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
592      rtas_st(rets, 0, ret);
593      if (ret != RTAS_OUT_SUCCESS) {
594          return;
595      }
596  
597      rtas_st(rets, 1, state);
598      rtas_st(rets, 2, RTAS_EEH_SUPPORT);
599      rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
600      if (nret >= 5) {
601          rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
602      }
603      return;
604  
605  param_error_exit:
606      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
607  }
608  
609  static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
610                                      SpaprMachineState *spapr,
611                                      uint32_t token, uint32_t nargs,
612                                      target_ulong args, uint32_t nret,
613                                      target_ulong rets)
614  {
615      SpaprPhbState *sphb;
616      uint32_t option;
617      uint64_t buid;
618      int ret;
619  
620      if ((nargs != 4) || (nret != 1)) {
621          goto param_error_exit;
622      }
623  
624      buid = rtas_ldq(args, 1);
625      option = rtas_ld(args, 3);
626      sphb = spapr_pci_find_phb(spapr, buid);
627      if (!sphb) {
628          goto param_error_exit;
629      }
630  
631      if (!spapr_phb_eeh_available(sphb)) {
632          goto param_error_exit;
633      }
634  
635      ret = spapr_phb_vfio_eeh_reset(sphb, option);
636      rtas_st(rets, 0, ret);
637      return;
638  
639  param_error_exit:
640      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
641  }
642  
643  static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
644                                    SpaprMachineState *spapr,
645                                    uint32_t token, uint32_t nargs,
646                                    target_ulong args, uint32_t nret,
647                                    target_ulong rets)
648  {
649      SpaprPhbState *sphb;
650      uint64_t buid;
651      int ret;
652  
653      if ((nargs != 3) || (nret != 1)) {
654          goto param_error_exit;
655      }
656  
657      buid = rtas_ldq(args, 1);
658      sphb = spapr_pci_find_phb(spapr, buid);
659      if (!sphb) {
660          goto param_error_exit;
661      }
662  
663      if (!spapr_phb_eeh_available(sphb)) {
664          goto param_error_exit;
665      }
666  
667      ret = spapr_phb_vfio_eeh_configure(sphb);
668      rtas_st(rets, 0, ret);
669      return;
670  
671  param_error_exit:
672      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
673  }
674  
675  /* To support it later */
676  static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
677                                         SpaprMachineState *spapr,
678                                         uint32_t token, uint32_t nargs,
679                                         target_ulong args, uint32_t nret,
680                                         target_ulong rets)
681  {
682      SpaprPhbState *sphb;
683      int option;
684      uint64_t buid;
685  
686      if ((nargs != 8) || (nret != 1)) {
687          goto param_error_exit;
688      }
689  
690      buid = rtas_ldq(args, 1);
691      sphb = spapr_pci_find_phb(spapr, buid);
692      if (!sphb) {
693          goto param_error_exit;
694      }
695  
696      if (!spapr_phb_eeh_available(sphb)) {
697          goto param_error_exit;
698      }
699  
700      option = rtas_ld(args, 7);
701      switch (option) {
702      case RTAS_SLOT_TEMP_ERR_LOG:
703      case RTAS_SLOT_PERM_ERR_LOG:
704          break;
705      default:
706          goto param_error_exit;
707      }
708  
709      /* We don't have error log yet */
710      rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
711      return;
712  
713  param_error_exit:
714      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
715  }
716  
717  static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
718  {
719      /*
720       * Here we use the number returned by pci_swizzle_map_irq_fn to find a
721       * corresponding qemu_irq.
722       */
723      SpaprPhbState *phb = opaque;
724      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
725  
726      trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
727      qemu_set_irq(spapr_qirq(spapr, phb->lsi_table[irq_num].irq), level);
728  }
729  
730  static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
731  {
732      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
733      PCIINTxRoute route;
734  
735      route.mode = PCI_INTX_ENABLED;
736      route.irq = sphb->lsi_table[pin].irq;
737  
738      return route;
739  }
740  
741  static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size)
742  {
743      qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
744      return 0;
745  }
746  
747  /*
748   * MSI/MSIX memory region implementation.
749   * The handler handles both MSI and MSIX.
750   * The vector number is encoded in least bits in data.
751   */
752  static void spapr_msi_write(void *opaque, hwaddr addr,
753                              uint64_t data, unsigned size)
754  {
755      SpaprMachineState *spapr = opaque;
756      uint32_t irq = data;
757  
758      trace_spapr_pci_msi_write(addr, data, irq);
759  
760      qemu_irq_pulse(spapr_qirq(spapr, irq));
761  }
762  
763  static const MemoryRegionOps spapr_msi_ops = {
764      /*
765       * .read result is undefined by PCI spec.
766       * define .read method to avoid assert failure in memory_region_init_io
767       */
768      .read = spapr_msi_read,
769      .write = spapr_msi_write,
770      .endianness = DEVICE_LITTLE_ENDIAN
771  };
772  
773  /*
774   * PHB PCI device
775   */
776  static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
777  {
778      SpaprPhbState *phb = opaque;
779  
780      return &phb->iommu_as;
781  }
782  
783  static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb,  PCIDevice *pdev)
784  {
785      g_autofree char *path = NULL;
786      g_autofree char *host = NULL;
787      g_autofree char *devspec = NULL;
788      char *buf = NULL;
789  
790      /* Get the PCI VFIO host id */
791      host = object_property_get_str(OBJECT(pdev), "host", NULL);
792      if (!host) {
793          return NULL;
794      }
795  
796      /* Construct the path of the file that will give us the DT location */
797      path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
798      if (!g_file_get_contents(path, &devspec, NULL, NULL)) {
799          return NULL;
800      }
801  
802      /* Construct and read from host device tree the loc-code */
803      g_free(path);
804      path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec);
805      if (!g_file_get_contents(path, &buf, NULL, NULL)) {
806          return NULL;
807      }
808      return buf;
809  }
810  
811  static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
812  {
813      char *buf;
814      const char *devtype = "qemu";
815      uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
816  
817      if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
818          buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
819          if (buf) {
820              return buf;
821          }
822          devtype = "vfio";
823      }
824      /*
825       * For emulated devices and VFIO-failure case, make up
826       * the loc-code.
827       */
828      buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
829                            devtype, pdev->name, sphb->index, busnr,
830                            PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
831      return buf;
832  }
833  
834  /* Macros to operate with address in OF binding to PCI */
835  #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
836  #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
837  #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
838  #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
839  #define b_ss(x)         b_x((x), 24, 2) /* the space code */
840  #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
841  #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
842  #define b_fff(x)        b_x((x), 8, 3)  /* function number */
843  #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
844  
845  /* for 'reg' OF properties */
846  #define RESOURCE_CELLS_SIZE 2
847  #define RESOURCE_CELLS_ADDRESS 3
848  
849  typedef struct ResourceFields {
850      uint32_t phys_hi;
851      uint32_t phys_mid;
852      uint32_t phys_lo;
853      uint32_t size_hi;
854      uint32_t size_lo;
855  } QEMU_PACKED ResourceFields;
856  
857  typedef struct ResourceProps {
858      ResourceFields reg[8];
859      uint32_t reg_len;
860  } ResourceProps;
861  
862  /* fill in the 'reg' OF properties for
863   * a PCI device. 'reg' describes resource requirements for a
864   * device's IO/MEM regions.
865   *
866   * the property is an array of ('phys-addr', 'size') pairs describing
867   * the addressable regions of the PCI device, where 'phys-addr' is a
868   * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
869   * (phys.hi, phys.mid, phys.lo), and 'size' is a
870   * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
871   *
872   * phys.hi = 0xYYXXXXZZ, where:
873   *   0xYY = npt000ss
874   *          |||   |
875   *          |||   +-- space code
876   *          |||               |
877   *          |||               +  00 if configuration space
878   *          |||               +  01 if IO region,
879   *          |||               +  10 if 32-bit MEM region
880   *          |||               +  11 if 64-bit MEM region
881   *          |||
882   *          ||+------ for non-relocatable IO: 1 if aliased
883   *          ||        for relocatable IO: 1 if below 64KB
884   *          ||        for MEM: 1 if below 1MB
885   *          |+------- 1 if region is prefetchable
886   *          +-------- 1 if region is non-relocatable
887   *   0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
888   *            bits respectively
889   *   0xZZ = rrrrrrrr, the register number of the BAR corresponding
890   *          to the region
891   *
892   * phys.mid and phys.lo correspond respectively to the hi/lo portions
893   * of the actual address of the region.
894   *
895   * note also that addresses defined in this property are, at least
896   * for PAPR guests, relative to the PHBs IO/MEM windows, and
897   * correspond directly to the addresses in the BARs.
898   *
899   * in accordance with PCI Bus Binding to Open Firmware,
900   * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
901   * Appendix C.
902   */
903  static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
904  {
905      int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
906      uint32_t dev_id = (b_bbbbbbbb(bus_num) |
907                         b_ddddd(PCI_SLOT(d->devfn)) |
908                         b_fff(PCI_FUNC(d->devfn)));
909      ResourceFields *reg;
910      int i, reg_idx = 0;
911  
912      /* config space region */
913      reg = &rp->reg[reg_idx++];
914      reg->phys_hi = cpu_to_be32(dev_id);
915      reg->phys_mid = 0;
916      reg->phys_lo = 0;
917      reg->size_hi = 0;
918      reg->size_lo = 0;
919  
920      for (i = 0; i < PCI_NUM_REGIONS; i++) {
921          if (!d->io_regions[i].size) {
922              continue;
923          }
924  
925          reg = &rp->reg[reg_idx++];
926  
927          reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
928          if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
929              reg->phys_hi |= cpu_to_be32(b_ss(1));
930          } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
931              reg->phys_hi |= cpu_to_be32(b_ss(3));
932          } else {
933              reg->phys_hi |= cpu_to_be32(b_ss(2));
934          }
935          reg->phys_mid = 0;
936          reg->phys_lo = 0;
937          reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
938          reg->size_lo = cpu_to_be32(d->io_regions[i].size);
939      }
940  
941      rp->reg_len = reg_idx * sizeof(ResourceFields);
942  }
943  
944  typedef struct PCIClass PCIClass;
945  typedef struct PCISubClass PCISubClass;
946  typedef struct PCIIFace PCIIFace;
947  
948  struct PCIIFace {
949      int iface;
950      const char *name;
951  };
952  
953  struct PCISubClass {
954      int subclass;
955      const char *name;
956      const PCIIFace *iface;
957  };
958  
959  struct PCIClass {
960      const char *name;
961      const PCISubClass *subc;
962  };
963  
964  static const PCISubClass undef_subclass[] = {
965      { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
966      { 0xFF, NULL, NULL },
967  };
968  
969  static const PCISubClass mass_subclass[] = {
970      { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
971      { PCI_CLASS_STORAGE_IDE, "ide", NULL },
972      { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
973      { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
974      { PCI_CLASS_STORAGE_RAID, "raid", NULL },
975      { PCI_CLASS_STORAGE_ATA, "ata", NULL },
976      { PCI_CLASS_STORAGE_SATA, "sata", NULL },
977      { PCI_CLASS_STORAGE_SAS, "sas", NULL },
978      { 0xFF, NULL, NULL },
979  };
980  
981  static const PCISubClass net_subclass[] = {
982      { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
983      { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
984      { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
985      { PCI_CLASS_NETWORK_ATM, "atm", NULL },
986      { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
987      { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
988      { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
989      { 0xFF, NULL, NULL },
990  };
991  
992  static const PCISubClass displ_subclass[] = {
993      { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
994      { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
995      { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
996      { 0xFF, NULL, NULL },
997  };
998  
999  static const PCISubClass media_subclass[] = {
1000      { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
1001      { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
1002      { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
1003      { 0xFF, NULL, NULL },
1004  };
1005  
1006  static const PCISubClass mem_subclass[] = {
1007      { PCI_CLASS_MEMORY_RAM, "memory", NULL },
1008      { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
1009      { 0xFF, NULL, NULL },
1010  };
1011  
1012  static const PCISubClass bridg_subclass[] = {
1013      { PCI_CLASS_BRIDGE_HOST, "host", NULL },
1014      { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
1015      { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
1016      { PCI_CLASS_BRIDGE_MC, "mca", NULL },
1017      { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
1018      { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
1019      { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
1020      { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
1021      { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
1022      { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
1023      { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
1024      { 0xFF, NULL, NULL },
1025  };
1026  
1027  static const PCISubClass comm_subclass[] = {
1028      { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
1029      { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
1030      { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
1031      { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
1032      { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
1033      { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
1034      { 0xFF, NULL, NULL, },
1035  };
1036  
1037  static const PCIIFace pic_iface[] = {
1038      { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
1039      { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
1040      { 0xFF, NULL },
1041  };
1042  
1043  static const PCISubClass sys_subclass[] = {
1044      { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
1045      { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
1046      { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
1047      { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
1048      { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
1049      { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
1050      { 0xFF, NULL, NULL },
1051  };
1052  
1053  static const PCISubClass inp_subclass[] = {
1054      { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
1055      { PCI_CLASS_INPUT_PEN, "pen", NULL },
1056      { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
1057      { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
1058      { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
1059      { 0xFF, NULL, NULL },
1060  };
1061  
1062  static const PCISubClass dock_subclass[] = {
1063      { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
1064      { 0xFF, NULL, NULL },
1065  };
1066  
1067  static const PCISubClass cpu_subclass[] = {
1068      { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
1069      { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
1070      { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
1071      { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
1072      { 0xFF, NULL, NULL },
1073  };
1074  
1075  static const PCIIFace usb_iface[] = {
1076      { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
1077      { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
1078      { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
1079      { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
1080      { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
1081      { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
1082      { 0xFF, NULL },
1083  };
1084  
1085  static const PCISubClass ser_subclass[] = {
1086      { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
1087      { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
1088      { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
1089      { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
1090      { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
1091      { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
1092      { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
1093      { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
1094      { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
1095      { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
1096      { 0xFF, NULL, NULL },
1097  };
1098  
1099  static const PCISubClass wrl_subclass[] = {
1100      { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
1101      { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
1102      { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
1103      { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
1104      { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
1105      { 0xFF, NULL, NULL },
1106  };
1107  
1108  static const PCISubClass sat_subclass[] = {
1109      { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
1110      { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
1111      { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
1112      { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
1113      { 0xFF, NULL, NULL },
1114  };
1115  
1116  static const PCISubClass crypt_subclass[] = {
1117      { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
1118      { PCI_CLASS_CRYPT_ENTERTAINMENT,
1119        "entertainment-encryption", NULL },
1120      { 0xFF, NULL, NULL },
1121  };
1122  
1123  static const PCISubClass spc_subclass[] = {
1124      { PCI_CLASS_SP_DPIO, "dpio", NULL },
1125      { PCI_CLASS_SP_PERF, "counter", NULL },
1126      { PCI_CLASS_SP_SYNCH, "measurement", NULL },
1127      { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
1128      { 0xFF, NULL, NULL },
1129  };
1130  
1131  static const PCIClass pci_classes[] = {
1132      { "legacy-device", undef_subclass },
1133      { "mass-storage",  mass_subclass },
1134      { "network", net_subclass },
1135      { "display", displ_subclass, },
1136      { "multimedia-device", media_subclass },
1137      { "memory-controller", mem_subclass },
1138      { "unknown-bridge", bridg_subclass },
1139      { "communication-controller", comm_subclass},
1140      { "system-peripheral", sys_subclass },
1141      { "input-controller", inp_subclass },
1142      { "docking-station", dock_subclass },
1143      { "cpu", cpu_subclass },
1144      { "serial-bus", ser_subclass },
1145      { "wireless-controller", wrl_subclass },
1146      { "intelligent-io", NULL },
1147      { "satellite-device", sat_subclass },
1148      { "encryption", crypt_subclass },
1149      { "data-processing-controller", spc_subclass },
1150  };
1151  
1152  static const char *dt_name_from_class(uint8_t class, uint8_t subclass,
1153                                        uint8_t iface)
1154  {
1155      const PCIClass *pclass;
1156      const PCISubClass *psubclass;
1157      const PCIIFace *piface;
1158      const char *name;
1159  
1160      if (class >= ARRAY_SIZE(pci_classes)) {
1161          return "pci";
1162      }
1163  
1164      pclass = pci_classes + class;
1165      name = pclass->name;
1166  
1167      if (pclass->subc == NULL) {
1168          return name;
1169      }
1170  
1171      psubclass = pclass->subc;
1172      while ((psubclass->subclass & 0xff) != 0xff) {
1173          if ((psubclass->subclass & 0xff) == subclass) {
1174              name = psubclass->name;
1175              break;
1176          }
1177          psubclass++;
1178      }
1179  
1180      piface = psubclass->iface;
1181      if (piface == NULL) {
1182          return name;
1183      }
1184      while ((piface->iface & 0xff) != 0xff) {
1185          if ((piface->iface & 0xff) == iface) {
1186              name = piface->name;
1187              break;
1188          }
1189          piface++;
1190      }
1191  
1192      return name;
1193  }
1194  
1195  /*
1196   * DRC helper functions
1197   */
1198  
1199  static uint32_t drc_id_from_devfn(SpaprPhbState *phb,
1200                                    uint8_t chassis, int32_t devfn)
1201  {
1202      return (phb->index << 16) | (chassis << 8) | devfn;
1203  }
1204  
1205  static SpaprDrc *drc_from_devfn(SpaprPhbState *phb,
1206                                  uint8_t chassis, int32_t devfn)
1207  {
1208      return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI,
1209                             drc_id_from_devfn(phb, chassis, devfn));
1210  }
1211  
1212  static uint8_t chassis_from_bus(PCIBus *bus)
1213  {
1214      if (pci_bus_is_root(bus)) {
1215          return 0;
1216      } else {
1217          PCIDevice *bridge = pci_bridge_get_device(bus);
1218  
1219          return object_property_get_uint(OBJECT(bridge), "chassis_nr",
1220                                          &error_abort);
1221      }
1222  }
1223  
1224  static SpaprDrc *drc_from_dev(SpaprPhbState *phb, PCIDevice *dev)
1225  {
1226      uint8_t chassis = chassis_from_bus(pci_get_bus(dev));
1227  
1228      return drc_from_devfn(phb, chassis, dev->devfn);
1229  }
1230  
1231  static void add_drcs(SpaprPhbState *phb, PCIBus *bus)
1232  {
1233      Object *owner;
1234      int i;
1235      uint8_t chassis;
1236  
1237      if (!phb->dr_enabled) {
1238          return;
1239      }
1240  
1241      chassis = chassis_from_bus(bus);
1242  
1243      if (pci_bus_is_root(bus)) {
1244          owner = OBJECT(phb);
1245      } else {
1246          owner = OBJECT(pci_bridge_get_device(bus));
1247      }
1248  
1249      for (i = 0; i < PCI_SLOT_MAX * PCI_FUNC_MAX; i++) {
1250          spapr_dr_connector_new(owner, TYPE_SPAPR_DRC_PCI,
1251                                 drc_id_from_devfn(phb, chassis, i));
1252      }
1253  }
1254  
1255  static void remove_drcs(SpaprPhbState *phb, PCIBus *bus)
1256  {
1257      int i;
1258      uint8_t chassis;
1259  
1260      if (!phb->dr_enabled) {
1261          return;
1262      }
1263  
1264      chassis = chassis_from_bus(bus);
1265  
1266      for (i = PCI_SLOT_MAX * PCI_FUNC_MAX - 1; i >= 0; i--) {
1267          SpaprDrc *drc = drc_from_devfn(phb, chassis, i);
1268  
1269          if (drc) {
1270              object_unparent(OBJECT(drc));
1271          }
1272      }
1273  }
1274  
1275  typedef struct PciWalkFdt {
1276      void *fdt;
1277      int offset;
1278      SpaprPhbState *sphb;
1279      int err;
1280  } PciWalkFdt;
1281  
1282  static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
1283                                 void *fdt, int parent_offset);
1284  
1285  static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
1286                                     void *opaque)
1287  {
1288      PciWalkFdt *p = opaque;
1289      int err;
1290  
1291      if (p->err) {
1292          /* Something's already broken, don't keep going */
1293          return;
1294      }
1295  
1296      err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
1297      if (err < 0) {
1298          p->err = err;
1299      }
1300  }
1301  
1302  /* Augment PCI device node with bridge specific information */
1303  static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus,
1304                                 void *fdt, int offset)
1305  {
1306      Object *owner;
1307      PciWalkFdt cbinfo = {
1308          .fdt = fdt,
1309          .offset = offset,
1310          .sphb = sphb,
1311          .err = 0,
1312      };
1313      int ret;
1314  
1315      _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
1316                            RESOURCE_CELLS_ADDRESS));
1317      _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
1318                            RESOURCE_CELLS_SIZE));
1319  
1320      assert(bus);
1321      pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo);
1322      if (cbinfo.err) {
1323          return cbinfo.err;
1324      }
1325  
1326      if (pci_bus_is_root(bus)) {
1327          owner = OBJECT(sphb);
1328      } else {
1329          owner = OBJECT(pci_bridge_get_device(bus));
1330      }
1331  
1332      ret = spapr_dt_drc(fdt, offset, owner,
1333                         SPAPR_DR_CONNECTOR_TYPE_PCI);
1334      if (ret) {
1335          return ret;
1336      }
1337  
1338      return offset;
1339  }
1340  
1341  char *spapr_pci_fw_dev_name(PCIDevice *dev)
1342  {
1343      const gchar *basename;
1344      int slot = PCI_SLOT(dev->devfn);
1345      int func = PCI_FUNC(dev->devfn);
1346      uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
1347  
1348      basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
1349                                    ccode & 0xff);
1350  
1351      if (func != 0) {
1352          return g_strdup_printf("%s@%x,%x", basename, slot, func);
1353      } else {
1354          return g_strdup_printf("%s@%x", basename, slot);
1355      }
1356  }
1357  
1358  /* create OF node for pci device and required OF DT properties */
1359  static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
1360                                 void *fdt, int parent_offset)
1361  {
1362      int offset;
1363      g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev);
1364      ResourceProps rp;
1365      SpaprDrc *drc = drc_from_dev(sphb, dev);
1366      uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2);
1367      uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2);
1368      uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID, 1);
1369      uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
1370      uint32_t irq_pin = pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1);
1371      uint32_t subsystem_id = pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2);
1372      uint32_t subsystem_vendor_id =
1373          pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2);
1374      uint32_t cache_line_size =
1375          pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1);
1376      uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
1377      gchar *loc_code;
1378  
1379      _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename));
1380  
1381      /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
1382      _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id));
1383      _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id));
1384      _FDT(fdt_setprop_cell(fdt, offset, "revision-id", revision_id));
1385  
1386      _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
1387      if (irq_pin) {
1388          _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin));
1389      }
1390  
1391      if (subsystem_id) {
1392          _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id));
1393      }
1394  
1395      if (subsystem_vendor_id) {
1396          _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
1397                                subsystem_vendor_id));
1398      }
1399  
1400      _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", cache_line_size));
1401  
1402  
1403      /* the following fdt cells are masked off the pci status register */
1404      _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
1405                            PCI_STATUS_DEVSEL_MASK & pci_status));
1406  
1407      if (pci_status & PCI_STATUS_FAST_BACK) {
1408          _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
1409      }
1410      if (pci_status & PCI_STATUS_66MHZ) {
1411          _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
1412      }
1413      if (pci_status & PCI_STATUS_UDF) {
1414          _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
1415      }
1416  
1417      loc_code = spapr_phb_get_loc_code(sphb, dev);
1418      _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", loc_code));
1419      g_free(loc_code);
1420  
1421      if (drc) {
1422          _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index",
1423                                spapr_drc_index(drc)));
1424      }
1425  
1426      if (msi_present(dev)) {
1427          uint32_t max_msi = msi_nr_vectors_allocated(dev);
1428          if (max_msi) {
1429              _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi));
1430          }
1431      }
1432      if (msix_present(dev)) {
1433          uint32_t max_msix = dev->msix_entries_nr;
1434          if (max_msix) {
1435              _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix));
1436          }
1437      }
1438  
1439      populate_resource_props(dev, &rp);
1440      _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
1441  
1442      if (sphb->pcie_ecs && pci_is_express(dev)) {
1443          _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
1444      }
1445  
1446      spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
1447  
1448      if (!IS_PCI_BRIDGE(dev)) {
1449          /* Properties only for non-bridges */
1450          uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
1451          uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT, 1);
1452          _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant));
1453          _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency));
1454          return offset;
1455      } else {
1456          PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
1457  
1458          return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset);
1459      }
1460  }
1461  
1462  /* Callback to be called during DRC release. */
1463  void spapr_phb_remove_pci_device_cb(DeviceState *dev)
1464  {
1465      HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
1466  
1467      hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
1468      object_unparent(OBJECT(dev));
1469  }
1470  
1471  int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
1472                            void *fdt, int *fdt_start_offset, Error **errp)
1473  {
1474      HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev);
1475      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler);
1476      PCIDevice *pdev = PCI_DEVICE(drc->dev);
1477  
1478      *fdt_start_offset = spapr_dt_pci_device(sphb, pdev, fdt, 0);
1479      return 0;
1480  }
1481  
1482  static void spapr_pci_bridge_plug(SpaprPhbState *phb,
1483                                    PCIBridge *bridge)
1484  {
1485      PCIBus *bus = pci_bridge_get_sec_bus(bridge);
1486  
1487      add_drcs(phb, bus);
1488  }
1489  
1490  /* Returns non-zero if the value of "chassis_nr" is already in use */
1491  static int check_chassis_nr(Object *obj, void *opaque)
1492  {
1493      int new_chassis_nr =
1494          object_property_get_uint(opaque, "chassis_nr", &error_abort);
1495      int chassis_nr =
1496          object_property_get_uint(obj, "chassis_nr", NULL);
1497  
1498      if (!object_dynamic_cast(obj, TYPE_PCI_BRIDGE)) {
1499          return 0;
1500      }
1501  
1502      /* Skip unsupported bridge types */
1503      if (!chassis_nr) {
1504          return 0;
1505      }
1506  
1507      /* Skip self */
1508      if (obj == opaque) {
1509          return 0;
1510      }
1511  
1512      return chassis_nr == new_chassis_nr;
1513  }
1514  
1515  static bool bridge_has_valid_chassis_nr(Object *bridge, Error **errp)
1516  {
1517      int chassis_nr =
1518          object_property_get_uint(bridge, "chassis_nr", NULL);
1519  
1520      /*
1521       * slotid_cap_init() already ensures that "chassis_nr" isn't null for
1522       * standard PCI bridges, so this really tells if "chassis_nr" is present
1523       * or not.
1524       */
1525      if (!chassis_nr) {
1526          error_setg(errp, "PCI Bridge lacks a \"chassis_nr\" property");
1527          error_append_hint(errp, "Try -device pci-bridge instead.\n");
1528          return false;
1529      }
1530  
1531      /* We want unique values for "chassis_nr" */
1532      if (object_child_foreach_recursive(object_get_root(), check_chassis_nr,
1533                                         bridge)) {
1534          error_setg(errp, "Bridge chassis %d already in use", chassis_nr);
1535          return false;
1536      }
1537  
1538      return true;
1539  }
1540  
1541  static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
1542                                 DeviceState *plugged_dev, Error **errp)
1543  {
1544      SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1545      PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1546      SpaprDrc *drc = drc_from_dev(phb, pdev);
1547      PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
1548      uint32_t slotnr = PCI_SLOT(pdev->devfn);
1549  
1550      if (!phb->dr_enabled) {
1551          /* if this is a hotplug operation initiated by the user
1552           * we need to let them know it's not enabled
1553           */
1554          if (plugged_dev->hotplugged) {
1555              error_setg(errp, QERR_BUS_NO_HOTPLUG,
1556                         object_get_typename(OBJECT(phb)));
1557              return;
1558          }
1559      }
1560  
1561      if (IS_PCI_BRIDGE(plugged_dev)) {
1562          if (!bridge_has_valid_chassis_nr(OBJECT(plugged_dev), errp)) {
1563              return;
1564          }
1565      }
1566  
1567      /* Following the QEMU convention used for PCIe multifunction
1568       * hotplug, we do not allow functions to be hotplugged to a
1569       * slot that already has function 0 present
1570       */
1571      if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
1572          PCI_FUNC(pdev->devfn) != 0) {
1573          error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
1574                     " additional functions can no longer be exposed to guest.",
1575                     slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
1576      }
1577  
1578      if (drc && drc->dev) {
1579          error_setg(errp, "PCI: slot %d already occupied by %s", slotnr,
1580                     pci_get_function_0(PCI_DEVICE(drc->dev))->name);
1581          return;
1582      }
1583  }
1584  
1585  static void spapr_pci_plug(HotplugHandler *plug_handler,
1586                             DeviceState *plugged_dev, Error **errp)
1587  {
1588      SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1589      PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1590      SpaprDrc *drc = drc_from_dev(phb, pdev);
1591      uint32_t slotnr = PCI_SLOT(pdev->devfn);
1592  
1593      /*
1594       * If DR is disabled we don't need to do anything in the case of
1595       * hotplug or coldplug callbacks.
1596       */
1597      if (!phb->dr_enabled) {
1598          return;
1599      }
1600  
1601      g_assert(drc);
1602  
1603      if (IS_PCI_BRIDGE(plugged_dev)) {
1604          spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev));
1605      }
1606  
1607      /* spapr_pci_pre_plug() already checked the DRC is attachable */
1608      spapr_drc_attach(drc, DEVICE(pdev));
1609  
1610      /* If this is function 0, signal hotplug for all the device functions.
1611       * Otherwise defer sending the hotplug event.
1612       */
1613      if (!spapr_drc_hotplugged(plugged_dev)) {
1614          spapr_drc_reset(drc);
1615      } else if (PCI_FUNC(pdev->devfn) == 0) {
1616          int i;
1617          uint8_t chassis = chassis_from_bus(pci_get_bus(pdev));
1618  
1619          for (i = 0; i < 8; i++) {
1620              SpaprDrc *func_drc;
1621              SpaprDrcClass *func_drck;
1622              SpaprDREntitySense state;
1623  
1624              func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
1625              func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1626              state = func_drck->dr_entity_sense(func_drc);
1627  
1628              if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1629                  spapr_hotplug_req_add_by_index(func_drc);
1630              }
1631          }
1632      }
1633  }
1634  
1635  static void spapr_pci_bridge_unplug(SpaprPhbState *phb,
1636                                      PCIBridge *bridge)
1637  {
1638      PCIBus *bus = pci_bridge_get_sec_bus(bridge);
1639  
1640      remove_drcs(phb, bus);
1641  }
1642  
1643  static void spapr_pci_unplug(HotplugHandler *plug_handler,
1644                               DeviceState *plugged_dev, Error **errp)
1645  {
1646      SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1647  
1648      /* some version guests do not wait for completion of a device
1649       * cleanup (generally done asynchronously by the kernel) before
1650       * signaling to QEMU that the device is safe, but instead sleep
1651       * for some 'safe' period of time. unfortunately on a busy host
1652       * this sleep isn't guaranteed to be long enough, resulting in
1653       * bad things like IRQ lines being left asserted during final
1654       * device removal. to deal with this we call reset just prior
1655       * to finalizing the device, which will put the device back into
1656       * an 'idle' state, as the device cleanup code expects.
1657       */
1658      pci_device_reset(PCI_DEVICE(plugged_dev));
1659  
1660      if (IS_PCI_BRIDGE(plugged_dev)) {
1661          spapr_pci_bridge_unplug(phb, PCI_BRIDGE(plugged_dev));
1662          return;
1663      }
1664  
1665      qdev_unrealize(plugged_dev);
1666  }
1667  
1668  static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
1669                                       DeviceState *plugged_dev, Error **errp)
1670  {
1671      SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1672      PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1673      SpaprDrc *drc = drc_from_dev(phb, pdev);
1674  
1675      if (!phb->dr_enabled) {
1676          error_setg(errp, QERR_BUS_NO_HOTPLUG,
1677                     object_get_typename(OBJECT(phb)));
1678          return;
1679      }
1680  
1681      g_assert(drc);
1682      g_assert(drc->dev == plugged_dev);
1683  
1684      if (!spapr_drc_unplug_requested(drc)) {
1685          uint32_t slotnr = PCI_SLOT(pdev->devfn);
1686          SpaprDrc *func_drc;
1687          SpaprDrcClass *func_drck;
1688          SpaprDREntitySense state;
1689          int i;
1690          uint8_t chassis = chassis_from_bus(pci_get_bus(pdev));
1691  
1692          if (IS_PCI_BRIDGE(plugged_dev)) {
1693              error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
1694              return;
1695          }
1696          if (object_property_get_uint(OBJECT(pdev), "nvlink2-tgt", NULL)) {
1697              error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
1698              return;
1699          }
1700  
1701          /* ensure any other present functions are pending unplug */
1702          if (PCI_FUNC(pdev->devfn) == 0) {
1703              for (i = 1; i < 8; i++) {
1704                  func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
1705                  func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1706                  state = func_drck->dr_entity_sense(func_drc);
1707                  if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
1708                      && !spapr_drc_unplug_requested(func_drc)) {
1709                      /*
1710                       * Attempting to remove function 0 of a multifunction
1711                       * device will will cascade into removing all child
1712                       * functions, even if their unplug weren't requested
1713                       * beforehand.
1714                       */
1715                      spapr_drc_unplug_request(func_drc);
1716                  }
1717              }
1718          }
1719  
1720          spapr_drc_unplug_request(drc);
1721  
1722          /* if this isn't func 0, defer unplug event. otherwise signal removal
1723           * for all present functions
1724           */
1725          if (PCI_FUNC(pdev->devfn) == 0) {
1726              for (i = 7; i >= 0; i--) {
1727                  func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
1728                  func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1729                  state = func_drck->dr_entity_sense(func_drc);
1730                  if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1731                      spapr_hotplug_req_remove_by_index(func_drc);
1732                  }
1733              }
1734          }
1735      } else {
1736          error_setg(errp,
1737                     "PCI device unplug already in progress for device %s",
1738                     drc->dev->id);
1739      }
1740  }
1741  
1742  static void spapr_phb_finalizefn(Object *obj)
1743  {
1744      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj);
1745  
1746      g_free(sphb->dtbusname);
1747      sphb->dtbusname = NULL;
1748  }
1749  
1750  static void spapr_phb_unrealize(DeviceState *dev)
1751  {
1752      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1753      SysBusDevice *s = SYS_BUS_DEVICE(dev);
1754      PCIHostState *phb = PCI_HOST_BRIDGE(s);
1755      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb);
1756      SpaprTceTable *tcet;
1757      int i;
1758      const unsigned windows_supported = spapr_phb_windows_supported(sphb);
1759  
1760      spapr_phb_nvgpu_free(sphb);
1761  
1762      if (sphb->msi) {
1763          g_hash_table_unref(sphb->msi);
1764          sphb->msi = NULL;
1765      }
1766  
1767      /*
1768       * Remove IO/MMIO subregions and aliases, rest should get cleaned
1769       * via PHB's unrealize->object_finalize
1770       */
1771      for (i = windows_supported - 1; i >= 0; i--) {
1772          tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
1773          if (tcet) {
1774              memory_region_del_subregion(&sphb->iommu_root,
1775                                          spapr_tce_get_iommu(tcet));
1776          }
1777      }
1778  
1779      remove_drcs(sphb, phb->bus);
1780  
1781      for (i = PCI_NUM_PINS - 1; i >= 0; i--) {
1782          if (sphb->lsi_table[i].irq) {
1783              spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1);
1784              sphb->lsi_table[i].irq = 0;
1785          }
1786      }
1787  
1788      QLIST_REMOVE(sphb, list);
1789  
1790      memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow);
1791  
1792      /*
1793       * An attached PCI device may have memory listeners, eg. VFIO PCI. We have
1794       * unmapped all sections. Remove the listeners now, before destroying the
1795       * address space.
1796       */
1797      address_space_remove_listeners(&sphb->iommu_as);
1798      address_space_destroy(&sphb->iommu_as);
1799  
1800      qbus_set_hotplug_handler(BUS(phb->bus), NULL);
1801      pci_unregister_root_bus(phb->bus);
1802  
1803      memory_region_del_subregion(get_system_memory(), &sphb->iowindow);
1804      if (sphb->mem64_win_pciaddr != (hwaddr)-1) {
1805          memory_region_del_subregion(get_system_memory(), &sphb->mem64window);
1806      }
1807      memory_region_del_subregion(get_system_memory(), &sphb->mem32window);
1808  }
1809  
1810  static void spapr_phb_destroy_msi(gpointer opaque)
1811  {
1812      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1813      SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
1814      SpaprPciMsi *msi = opaque;
1815  
1816      if (!smc->legacy_irq_allocation) {
1817          spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
1818      }
1819      spapr_irq_free(spapr, msi->first_irq, msi->num);
1820      g_free(msi);
1821  }
1822  
1823  static void spapr_phb_realize(DeviceState *dev, Error **errp)
1824  {
1825      ERRP_GUARD();
1826      /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
1827       * tries to add a sPAPR PHB to a non-pseries machine.
1828       */
1829      SpaprMachineState *spapr =
1830          (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
1831                                                    TYPE_SPAPR_MACHINE);
1832      SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL;
1833      SysBusDevice *s = SYS_BUS_DEVICE(dev);
1834      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
1835      PCIHostState *phb = PCI_HOST_BRIDGE(s);
1836      MachineState *ms = MACHINE(spapr);
1837      char *namebuf;
1838      int i;
1839      PCIBus *bus;
1840      uint64_t msi_window_size = 4096;
1841      SpaprTceTable *tcet;
1842      const unsigned windows_supported = spapr_phb_windows_supported(sphb);
1843  
1844      if (!spapr) {
1845          error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine");
1846          return;
1847      }
1848  
1849      assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */
1850  
1851      if (sphb->mem64_win_size != 0) {
1852          if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1853              error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
1854                         " (max 2 GiB)", sphb->mem_win_size);
1855              return;
1856          }
1857  
1858          /* 64-bit window defaults to identity mapping */
1859          sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
1860      } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1861          /*
1862           * For compatibility with old configuration, if no 64-bit MMIO
1863           * window is specified, but the ordinary (32-bit) memory
1864           * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
1865           * window, with a 64-bit MMIO window following on immediately
1866           * afterwards
1867           */
1868          sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
1869          sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
1870          sphb->mem64_win_pciaddr =
1871              SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
1872          sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
1873      }
1874  
1875      if (spapr_pci_find_phb(spapr, sphb->buid)) {
1876          SpaprPhbState *s;
1877  
1878          error_setg(errp, "PCI host bridges must have unique indexes");
1879          error_append_hint(errp, "The following indexes are already in use:");
1880          QLIST_FOREACH(s, &spapr->phbs, list) {
1881              error_append_hint(errp, " %d", s->index);
1882          }
1883          error_append_hint(errp, "\nTry another value for the index property\n");
1884          return;
1885      }
1886  
1887      if (sphb->numa_node != -1 &&
1888          (sphb->numa_node >= MAX_NODES ||
1889           !ms->numa_state->nodes[sphb->numa_node].present)) {
1890          error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
1891          return;
1892      }
1893  
1894      sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
1895  
1896      /* Initialize memory regions */
1897      namebuf = g_strdup_printf("%s.mmio", sphb->dtbusname);
1898      memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
1899      g_free(namebuf);
1900  
1901      namebuf = g_strdup_printf("%s.mmio32-alias", sphb->dtbusname);
1902      memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
1903                               namebuf, &sphb->memspace,
1904                               SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
1905      g_free(namebuf);
1906      memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
1907                                  &sphb->mem32window);
1908  
1909      if (sphb->mem64_win_size != 0) {
1910          namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname);
1911          memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
1912                                   namebuf, &sphb->memspace,
1913                                   sphb->mem64_win_pciaddr, sphb->mem64_win_size);
1914          g_free(namebuf);
1915  
1916          memory_region_add_subregion(get_system_memory(),
1917                                      sphb->mem64_win_addr,
1918                                      &sphb->mem64window);
1919      }
1920  
1921      /* Initialize IO regions */
1922      namebuf = g_strdup_printf("%s.io", sphb->dtbusname);
1923      memory_region_init(&sphb->iospace, OBJECT(sphb),
1924                         namebuf, SPAPR_PCI_IO_WIN_SIZE);
1925      g_free(namebuf);
1926  
1927      namebuf = g_strdup_printf("%s.io-alias", sphb->dtbusname);
1928      memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
1929                               &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
1930      g_free(namebuf);
1931      memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
1932                                  &sphb->iowindow);
1933  
1934      bus = pci_register_root_bus(dev, NULL,
1935                                  pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
1936                                  &sphb->memspace, &sphb->iospace,
1937                                  PCI_DEVFN(0, 0), PCI_NUM_PINS,
1938                                  TYPE_PCI_BUS);
1939  
1940      /*
1941       * Despite resembling a vanilla PCI bus in most ways, the PAPR
1942       * para-virtualized PCI bus *does* permit PCI-E extended config
1943       * space access
1944       */
1945      if (sphb->pcie_ecs) {
1946          bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
1947      }
1948      phb->bus = bus;
1949      qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb));
1950  
1951      /*
1952       * Initialize PHB address space.
1953       * By default there will be at least one subregion for default
1954       * 32bit DMA window.
1955       * Later the guest might want to create another DMA window
1956       * which will become another memory subregion.
1957       */
1958      namebuf = g_strdup_printf("%s.iommu-root", sphb->dtbusname);
1959      memory_region_init(&sphb->iommu_root, OBJECT(sphb),
1960                         namebuf, UINT64_MAX);
1961      g_free(namebuf);
1962      address_space_init(&sphb->iommu_as, &sphb->iommu_root,
1963                         sphb->dtbusname);
1964  
1965      /*
1966       * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
1967       * we need to allocate some memory to catch those writes coming
1968       * from msi_notify()/msix_notify().
1969       * As MSIMessage:addr is going to be the same and MSIMessage:data
1970       * is going to be a VIRQ number, 4 bytes of the MSI MR will only
1971       * be used.
1972       *
1973       * For KVM we want to ensure that this memory is a full page so that
1974       * our memory slot is of page size granularity.
1975       */
1976      if (kvm_enabled()) {
1977          msi_window_size = qemu_real_host_page_size();
1978      }
1979  
1980      memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr,
1981                            "msi", msi_window_size);
1982      memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
1983                                  &sphb->msiwindow);
1984  
1985      pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
1986  
1987      pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
1988  
1989      QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
1990  
1991      /* Initialize the LSI table */
1992      for (i = 0; i < PCI_NUM_PINS; i++) {
1993          int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
1994  
1995          if (smc->legacy_irq_allocation) {
1996              irq = spapr_irq_findone(spapr, errp);
1997              if (irq < 0) {
1998                  error_prepend(errp, "can't allocate LSIs: ");
1999                  /*
2000                   * Older machines will never support PHB hotplug, ie, this is an
2001                   * init only path and QEMU will terminate. No need to rollback.
2002                   */
2003                  return;
2004              }
2005          }
2006  
2007          if (spapr_irq_claim(spapr, irq, true, errp) < 0) {
2008              error_prepend(errp, "can't allocate LSIs: ");
2009              goto unrealize;
2010          }
2011  
2012          sphb->lsi_table[i].irq = irq;
2013      }
2014  
2015      /* allocate connectors for child PCI devices */
2016      add_drcs(sphb, phb->bus);
2017  
2018      /* DMA setup */
2019      for (i = 0; i < windows_supported; ++i) {
2020          tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
2021          if (!tcet) {
2022              error_setg(errp, "Creating window#%d failed for %s",
2023                         i, sphb->dtbusname);
2024              goto unrealize;
2025          }
2026          memory_region_add_subregion(&sphb->iommu_root, 0,
2027                                      spapr_tce_get_iommu(tcet));
2028      }
2029  
2030      sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free,
2031                                        spapr_phb_destroy_msi);
2032      return;
2033  
2034  unrealize:
2035      spapr_phb_unrealize(dev);
2036  }
2037  
2038  static int spapr_phb_children_reset(Object *child, void *opaque)
2039  {
2040      DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
2041  
2042      if (dev) {
2043          device_cold_reset(dev);
2044      }
2045  
2046      return 0;
2047  }
2048  
2049  void spapr_phb_dma_reset(SpaprPhbState *sphb)
2050  {
2051      int i;
2052      SpaprTceTable *tcet;
2053  
2054      for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
2055          tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
2056  
2057          if (tcet && tcet->nb_table) {
2058              spapr_tce_table_disable(tcet);
2059          }
2060      }
2061  
2062      /* Register default 32bit DMA window */
2063      tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
2064      spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
2065                             sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
2066      tcet->def_win = true;
2067  }
2068  
2069  static void spapr_phb_reset(DeviceState *qdev)
2070  {
2071      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
2072      Error *err = NULL;
2073  
2074      spapr_phb_dma_reset(sphb);
2075      spapr_phb_nvgpu_free(sphb);
2076      spapr_phb_nvgpu_setup(sphb, &err);
2077      if (err) {
2078          error_report_err(err);
2079      }
2080  
2081      /* Reset the IOMMU state */
2082      object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
2083  
2084      if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
2085          spapr_phb_vfio_reset(qdev);
2086      }
2087  
2088      g_hash_table_remove_all(sphb->msi);
2089  }
2090  
2091  static Property spapr_phb_properties[] = {
2092      DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1),
2093      DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size,
2094                         SPAPR_PCI_MEM32_WIN_SIZE),
2095      DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size,
2096                         SPAPR_PCI_MEM64_WIN_SIZE),
2097      DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size,
2098                         SPAPR_PCI_IO_WIN_SIZE),
2099      DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled,
2100                       true),
2101      /* Default DMA window is 0..1GB */
2102      DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0),
2103      DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000),
2104      DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr,
2105                         0x800000000000000ULL),
2106      DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true),
2107      DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask,
2108                         (1ULL << 12) | (1ULL << 16)
2109                         | (1ULL << 21) | (1ULL << 24)),
2110      DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1),
2111      DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState,
2112                       pre_2_8_migration, false),
2113      DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
2114                       pcie_ecs, true),
2115      DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
2116      DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
2117      DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
2118                       pre_5_1_assoc, false),
2119      DEFINE_PROP_END_OF_LIST(),
2120  };
2121  
2122  static const VMStateDescription vmstate_spapr_pci_lsi = {
2123      .name = "spapr_pci/lsi",
2124      .version_id = 1,
2125      .minimum_version_id = 1,
2126      .fields = (VMStateField[]) {
2127          VMSTATE_UINT32_EQUAL(irq, SpaprPciLsi, NULL),
2128  
2129          VMSTATE_END_OF_LIST()
2130      },
2131  };
2132  
2133  static const VMStateDescription vmstate_spapr_pci_msi = {
2134      .name = "spapr_pci/msi",
2135      .version_id = 1,
2136      .minimum_version_id = 1,
2137      .fields = (VMStateField []) {
2138          VMSTATE_UINT32(key, SpaprPciMsiMig),
2139          VMSTATE_UINT32(value.first_irq, SpaprPciMsiMig),
2140          VMSTATE_UINT32(value.num, SpaprPciMsiMig),
2141          VMSTATE_END_OF_LIST()
2142      },
2143  };
2144  
2145  static int spapr_pci_pre_save(void *opaque)
2146  {
2147      SpaprPhbState *sphb = opaque;
2148      GHashTableIter iter;
2149      gpointer key, value;
2150      int i;
2151  
2152      if (sphb->pre_2_8_migration) {
2153          sphb->mig_liobn = sphb->dma_liobn[0];
2154          sphb->mig_mem_win_addr = sphb->mem_win_addr;
2155          sphb->mig_mem_win_size = sphb->mem_win_size;
2156          sphb->mig_io_win_addr = sphb->io_win_addr;
2157          sphb->mig_io_win_size = sphb->io_win_size;
2158  
2159          if ((sphb->mem64_win_size != 0)
2160              && (sphb->mem64_win_addr
2161                  == (sphb->mem_win_addr + sphb->mem_win_size))) {
2162              sphb->mig_mem_win_size += sphb->mem64_win_size;
2163          }
2164      }
2165  
2166      g_free(sphb->msi_devs);
2167      sphb->msi_devs = NULL;
2168      sphb->msi_devs_num = g_hash_table_size(sphb->msi);
2169      if (!sphb->msi_devs_num) {
2170          return 0;
2171      }
2172      sphb->msi_devs = g_new(SpaprPciMsiMig, sphb->msi_devs_num);
2173  
2174      g_hash_table_iter_init(&iter, sphb->msi);
2175      for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
2176          sphb->msi_devs[i].key = *(uint32_t *) key;
2177          sphb->msi_devs[i].value = *(SpaprPciMsi *) value;
2178      }
2179  
2180      return 0;
2181  }
2182  
2183  static int spapr_pci_post_save(void *opaque)
2184  {
2185      SpaprPhbState *sphb = opaque;
2186  
2187      g_free(sphb->msi_devs);
2188      sphb->msi_devs = NULL;
2189      sphb->msi_devs_num = 0;
2190      return 0;
2191  }
2192  
2193  static int spapr_pci_post_load(void *opaque, int version_id)
2194  {
2195      SpaprPhbState *sphb = opaque;
2196      gpointer key, value;
2197      int i;
2198  
2199      for (i = 0; i < sphb->msi_devs_num; ++i) {
2200          key = g_memdup(&sphb->msi_devs[i].key,
2201                         sizeof(sphb->msi_devs[i].key));
2202          value = g_memdup(&sphb->msi_devs[i].value,
2203                           sizeof(sphb->msi_devs[i].value));
2204          g_hash_table_insert(sphb->msi, key, value);
2205      }
2206      g_free(sphb->msi_devs);
2207      sphb->msi_devs = NULL;
2208      sphb->msi_devs_num = 0;
2209  
2210      return 0;
2211  }
2212  
2213  static bool pre_2_8_migration(void *opaque, int version_id)
2214  {
2215      SpaprPhbState *sphb = opaque;
2216  
2217      return sphb->pre_2_8_migration;
2218  }
2219  
2220  static const VMStateDescription vmstate_spapr_pci = {
2221      .name = "spapr_pci",
2222      .version_id = 2,
2223      .minimum_version_id = 2,
2224      .pre_save = spapr_pci_pre_save,
2225      .post_save = spapr_pci_post_save,
2226      .post_load = spapr_pci_post_load,
2227      .fields = (VMStateField[]) {
2228          VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL),
2229          VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration),
2230          VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration),
2231          VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration),
2232          VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration),
2233          VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration),
2234          VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0,
2235                               vmstate_spapr_pci_lsi, SpaprPciLsi),
2236          VMSTATE_INT32(msi_devs_num, SpaprPhbState),
2237          VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0,
2238                                      vmstate_spapr_pci_msi, SpaprPciMsiMig),
2239          VMSTATE_END_OF_LIST()
2240      },
2241  };
2242  
2243  static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
2244                                             PCIBus *rootbus)
2245  {
2246      SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
2247  
2248      return sphb->dtbusname;
2249  }
2250  
2251  static void spapr_phb_class_init(ObjectClass *klass, void *data)
2252  {
2253      PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
2254      DeviceClass *dc = DEVICE_CLASS(klass);
2255      HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
2256  
2257      hc->root_bus_path = spapr_phb_root_bus_path;
2258      dc->realize = spapr_phb_realize;
2259      dc->unrealize = spapr_phb_unrealize;
2260      device_class_set_props(dc, spapr_phb_properties);
2261      dc->reset = spapr_phb_reset;
2262      dc->vmsd = &vmstate_spapr_pci;
2263      /* Supported by TYPE_SPAPR_MACHINE */
2264      dc->user_creatable = true;
2265      set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
2266      hp->pre_plug = spapr_pci_pre_plug;
2267      hp->plug = spapr_pci_plug;
2268      hp->unplug = spapr_pci_unplug;
2269      hp->unplug_request = spapr_pci_unplug_request;
2270  }
2271  
2272  static const TypeInfo spapr_phb_info = {
2273      .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
2274      .parent        = TYPE_PCI_HOST_BRIDGE,
2275      .instance_size = sizeof(SpaprPhbState),
2276      .instance_finalize = spapr_phb_finalizefn,
2277      .class_init    = spapr_phb_class_init,
2278      .interfaces    = (InterfaceInfo[]) {
2279          { TYPE_HOTPLUG_HANDLER },
2280          { }
2281      }
2282  };
2283  
2284  static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
2285                                             void *opaque)
2286  {
2287      unsigned int *bus_no = opaque;
2288      PCIBus *sec_bus = NULL;
2289  
2290      if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
2291           PCI_HEADER_TYPE_BRIDGE)) {
2292          return;
2293      }
2294  
2295      (*bus_no)++;
2296      pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1);
2297      pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
2298      pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
2299  
2300      sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
2301      if (!sec_bus) {
2302          return;
2303      }
2304  
2305      pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge,
2306                                    bus_no);
2307      pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
2308  }
2309  
2310  static void spapr_phb_pci_enumerate(SpaprPhbState *phb)
2311  {
2312      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
2313      unsigned int bus_no = 0;
2314  
2315      pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge,
2316                                    &bus_no);
2317  
2318  }
2319  
2320  int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
2321                   uint32_t intc_phandle, void *fdt, int *node_offset)
2322  {
2323      int bus_off, i, j, ret;
2324      uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
2325      struct {
2326          uint32_t hi;
2327          uint64_t child;
2328          uint64_t parent;
2329          uint64_t size;
2330      } QEMU_PACKED ranges[] = {
2331          {
2332              cpu_to_be32(b_ss(1)), cpu_to_be64(0),
2333              cpu_to_be64(phb->io_win_addr),
2334              cpu_to_be64(memory_region_size(&phb->iospace)),
2335          },
2336          {
2337              cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
2338              cpu_to_be64(phb->mem_win_addr),
2339              cpu_to_be64(phb->mem_win_size),
2340          },
2341          {
2342              cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
2343              cpu_to_be64(phb->mem64_win_addr),
2344              cpu_to_be64(phb->mem64_win_size),
2345          },
2346      };
2347      const unsigned sizeof_ranges =
2348          (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
2349      uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
2350      uint32_t interrupt_map_mask[] = {
2351          cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
2352      uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
2353      uint32_t ddw_applicable[] = {
2354          cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
2355          cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
2356          cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
2357      };
2358      uint32_t ddw_extensions[] = {
2359          cpu_to_be32(2),
2360          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW),
2361          cpu_to_be32(1), /* 1: ibm,query-pe-dma-window 6 outputs, PAPR 2.8 */
2362      };
2363      SpaprTceTable *tcet;
2364      SpaprDrc *drc;
2365      Error *err = NULL;
2366  
2367      /* Start populating the FDT */
2368      _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname));
2369      if (node_offset) {
2370          *node_offset = bus_off;
2371      }
2372  
2373      /* Write PHB properties */
2374      _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
2375      _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
2376      _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
2377      _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
2378      _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
2379      _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
2380      _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
2381      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
2382      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi",
2383                            spapr_irq_nr_msis(spapr)));
2384  
2385      /* Dynamic DMA window */
2386      if (phb->ddw_enabled) {
2387          _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
2388                           sizeof(ddw_applicable)));
2389          _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
2390                           &ddw_extensions, sizeof(ddw_extensions)));
2391      }
2392  
2393      /* Advertise NUMA via ibm,associativity */
2394      if (phb->numa_node != -1) {
2395          spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node);
2396      }
2397  
2398      /* Build the interrupt-map, this must matches what is done
2399       * in pci_swizzle_map_irq_fn
2400       */
2401      _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
2402                       &interrupt_map_mask, sizeof(interrupt_map_mask)));
2403      for (i = 0; i < PCI_SLOT_MAX; i++) {
2404          for (j = 0; j < PCI_NUM_PINS; j++) {
2405              uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
2406              int lsi_num = pci_swizzle(i, j);
2407  
2408              irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
2409              irqmap[1] = 0;
2410              irqmap[2] = 0;
2411              irqmap[3] = cpu_to_be32(j+1);
2412              irqmap[4] = cpu_to_be32(intc_phandle);
2413              spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true);
2414          }
2415      }
2416      /* Write interrupt map */
2417      _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
2418                       sizeof(interrupt_map)));
2419  
2420      tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
2421      if (!tcet) {
2422          return -1;
2423      }
2424      spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
2425                   tcet->liobn, tcet->bus_offset,
2426                   tcet->nb_table << tcet->page_shift);
2427  
2428      drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index);
2429      if (drc) {
2430          uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc));
2431  
2432          _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index,
2433                           sizeof(drc_index)));
2434      }
2435  
2436      /* Walk the bridges and program the bus numbers*/
2437      spapr_phb_pci_enumerate(phb);
2438      _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
2439  
2440      /* Walk the bridge and subordinate buses */
2441      ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off);
2442      if (ret < 0) {
2443          return ret;
2444      }
2445  
2446      spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &err);
2447      if (err) {
2448          error_report_err(err);
2449      }
2450      spapr_phb_nvgpu_ram_populate_dt(phb, fdt);
2451  
2452      return 0;
2453  }
2454  
2455  void spapr_pci_rtas_init(void)
2456  {
2457      spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
2458                          rtas_read_pci_config);
2459      spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
2460                          rtas_write_pci_config);
2461      spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
2462                          rtas_ibm_read_pci_config);
2463      spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
2464                          rtas_ibm_write_pci_config);
2465      if (msi_nonbroken) {
2466          spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
2467                              "ibm,query-interrupt-source-number",
2468                              rtas_ibm_query_interrupt_source_number);
2469          spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
2470                              rtas_ibm_change_msi);
2471      }
2472  
2473      spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
2474                          "ibm,set-eeh-option",
2475                          rtas_ibm_set_eeh_option);
2476      spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
2477                          "ibm,get-config-addr-info2",
2478                          rtas_ibm_get_config_addr_info2);
2479      spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
2480                          "ibm,read-slot-reset-state2",
2481                          rtas_ibm_read_slot_reset_state2);
2482      spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
2483                          "ibm,set-slot-reset",
2484                          rtas_ibm_set_slot_reset);
2485      spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
2486                          "ibm,configure-pe",
2487                          rtas_ibm_configure_pe);
2488      spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
2489                          "ibm,slot-error-detail",
2490                          rtas_ibm_slot_error_detail);
2491  }
2492  
2493  static void spapr_pci_register_types(void)
2494  {
2495      type_register_static(&spapr_phb_info);
2496  }
2497  
2498  type_init(spapr_pci_register_types)
2499  
2500  static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
2501  {
2502      bool be = *(bool *)opaque;
2503  
2504      if (object_dynamic_cast(OBJECT(dev), "VGA")
2505          || object_dynamic_cast(OBJECT(dev), "secondary-vga")
2506          || object_dynamic_cast(OBJECT(dev), "bochs-display")
2507          || object_dynamic_cast(OBJECT(dev), "virtio-vga")) {
2508          object_property_set_bool(OBJECT(dev), "big-endian-framebuffer", be,
2509                                   &error_abort);
2510      }
2511      return 0;
2512  }
2513  
2514  void spapr_pci_switch_vga(SpaprMachineState *spapr, bool big_endian)
2515  {
2516      SpaprPhbState *sphb;
2517  
2518      /*
2519       * For backward compatibility with existing guests, we switch
2520       * the endianness of the VGA controller when changing the guest
2521       * interrupt mode
2522       */
2523      QLIST_FOREACH(sphb, &spapr->phbs, list) {
2524          BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus;
2525          qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL,
2526                             &big_endian);
2527      }
2528  }
2529