xref: /openbmc/qemu/hw/riscv/virt.c (revision feb58e3b)
1 /*
2  * QEMU RISC-V VirtIO Board
3  *
4  * Copyright (c) 2017 SiFive, Inc.
5  *
6  * RISC-V machine with 16550a UART and VirtIO MMIO
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2 or later, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/units.h"
23 #include "qemu/error-report.h"
24 #include "qemu/guest-random.h"
25 #include "qapi/error.h"
26 #include "hw/boards.h"
27 #include "hw/loader.h"
28 #include "hw/sysbus.h"
29 #include "hw/qdev-properties.h"
30 #include "hw/char/serial-mm.h"
31 #include "target/riscv/cpu.h"
32 #include "hw/core/sysbus-fdt.h"
33 #include "target/riscv/pmu.h"
34 #include "hw/riscv/riscv_hart.h"
35 #include "hw/riscv/iommu.h"
36 #include "hw/riscv/virt.h"
37 #include "hw/riscv/boot.h"
38 #include "hw/riscv/numa.h"
39 #include "kvm/kvm_riscv.h"
40 #include "hw/firmware/smbios.h"
41 #include "hw/intc/riscv_aclint.h"
42 #include "hw/intc/riscv_aplic.h"
43 #include "hw/intc/sifive_plic.h"
44 #include "hw/misc/sifive_test.h"
45 #include "hw/platform-bus.h"
46 #include "chardev/char.h"
47 #include "sysemu/device_tree.h"
48 #include "sysemu/sysemu.h"
49 #include "sysemu/tcg.h"
50 #include "sysemu/kvm.h"
51 #include "sysemu/tpm.h"
52 #include "sysemu/qtest.h"
53 #include "hw/pci/pci.h"
54 #include "hw/pci-host/gpex.h"
55 #include "hw/display/ramfb.h"
56 #include "hw/acpi/aml-build.h"
57 #include "qapi/qapi-visit-common.h"
58 #include "hw/virtio/virtio-iommu.h"
59 
60 /* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */
61 static bool virt_use_kvm_aia(RISCVVirtState *s)
62 {
63     return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
64 }
65 
66 static bool virt_aclint_allowed(void)
67 {
68     return tcg_enabled() || qtest_enabled();
69 }
70 
71 static const MemMapEntry virt_memmap[] = {
72     [VIRT_DEBUG] =        {        0x0,         0x100 },
73     [VIRT_MROM] =         {     0x1000,        0xf000 },
74     [VIRT_TEST] =         {   0x100000,        0x1000 },
75     [VIRT_RTC] =          {   0x101000,        0x1000 },
76     [VIRT_CLINT] =        {  0x2000000,       0x10000 },
77     [VIRT_ACLINT_SSWI] =  {  0x2F00000,        0x4000 },
78     [VIRT_PCIE_PIO] =     {  0x3000000,       0x10000 },
79     [VIRT_PLATFORM_BUS] = {  0x4000000,     0x2000000 },
80     [VIRT_PLIC] =         {  0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
81     [VIRT_APLIC_M] =      {  0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) },
82     [VIRT_APLIC_S] =      {  0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) },
83     [VIRT_UART0] =        { 0x10000000,         0x100 },
84     [VIRT_VIRTIO] =       { 0x10001000,        0x1000 },
85     [VIRT_FW_CFG] =       { 0x10100000,          0x18 },
86     [VIRT_FLASH] =        { 0x20000000,     0x4000000 },
87     [VIRT_IMSIC_M] =      { 0x24000000, VIRT_IMSIC_MAX_SIZE },
88     [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
89     [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
90     [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
91     [VIRT_DRAM] =         { 0x80000000,           0x0 },
92 };
93 
94 /* PCIe high mmio is fixed for RV32 */
95 #define VIRT32_HIGH_PCIE_MMIO_BASE  0x300000000ULL
96 #define VIRT32_HIGH_PCIE_MMIO_SIZE  (4 * GiB)
97 
98 /* PCIe high mmio for RV64, size is fixed but base depends on top of RAM */
99 #define VIRT64_HIGH_PCIE_MMIO_SIZE  (16 * GiB)
100 
101 static MemMapEntry virt_high_pcie_memmap;
102 
103 #define VIRT_FLASH_SECTOR_SIZE (256 * KiB)
104 
105 static PFlashCFI01 *virt_flash_create1(RISCVVirtState *s,
106                                        const char *name,
107                                        const char *alias_prop_name)
108 {
109     /*
110      * Create a single flash device.  We use the same parameters as
111      * the flash devices on the ARM virt board.
112      */
113     DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
114 
115     qdev_prop_set_uint64(dev, "sector-length", VIRT_FLASH_SECTOR_SIZE);
116     qdev_prop_set_uint8(dev, "width", 4);
117     qdev_prop_set_uint8(dev, "device-width", 2);
118     qdev_prop_set_bit(dev, "big-endian", false);
119     qdev_prop_set_uint16(dev, "id0", 0x89);
120     qdev_prop_set_uint16(dev, "id1", 0x18);
121     qdev_prop_set_uint16(dev, "id2", 0x00);
122     qdev_prop_set_uint16(dev, "id3", 0x00);
123     qdev_prop_set_string(dev, "name", name);
124 
125     object_property_add_child(OBJECT(s), name, OBJECT(dev));
126     object_property_add_alias(OBJECT(s), alias_prop_name,
127                               OBJECT(dev), "drive");
128 
129     return PFLASH_CFI01(dev);
130 }
131 
132 static void virt_flash_create(RISCVVirtState *s)
133 {
134     s->flash[0] = virt_flash_create1(s, "virt.flash0", "pflash0");
135     s->flash[1] = virt_flash_create1(s, "virt.flash1", "pflash1");
136 }
137 
138 static void virt_flash_map1(PFlashCFI01 *flash,
139                             hwaddr base, hwaddr size,
140                             MemoryRegion *sysmem)
141 {
142     DeviceState *dev = DEVICE(flash);
143 
144     assert(QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE));
145     assert(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX);
146     qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE);
147     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
148 
149     memory_region_add_subregion(sysmem, base,
150                                 sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
151                                                        0));
152 }
153 
154 static void virt_flash_map(RISCVVirtState *s,
155                            MemoryRegion *sysmem)
156 {
157     hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
158     hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
159 
160     virt_flash_map1(s->flash[0], flashbase, flashsize,
161                     sysmem);
162     virt_flash_map1(s->flash[1], flashbase + flashsize, flashsize,
163                     sysmem);
164 }
165 
166 static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename,
167                                 uint32_t irqchip_phandle)
168 {
169     int pin, dev;
170     uint32_t irq_map_stride = 0;
171     uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
172                           FDT_MAX_INT_MAP_WIDTH] = {};
173     uint32_t *irq_map = full_irq_map;
174 
175     /* This code creates a standard swizzle of interrupts such that
176      * each device's first interrupt is based on it's PCI_SLOT number.
177      * (See pci_swizzle_map_irq_fn())
178      *
179      * We only need one entry per interrupt in the table (not one per
180      * possible slot) seeing the interrupt-map-mask will allow the table
181      * to wrap to any number of devices.
182      */
183     for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
184         int devfn = dev * 0x8;
185 
186         for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
187             int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
188             int i = 0;
189 
190             /* Fill PCI address cells */
191             irq_map[i] = cpu_to_be32(devfn << 8);
192             i += FDT_PCI_ADDR_CELLS;
193 
194             /* Fill PCI Interrupt cells */
195             irq_map[i] = cpu_to_be32(pin + 1);
196             i += FDT_PCI_INT_CELLS;
197 
198             /* Fill interrupt controller phandle and cells */
199             irq_map[i++] = cpu_to_be32(irqchip_phandle);
200             irq_map[i++] = cpu_to_be32(irq_nr);
201             if (s->aia_type != VIRT_AIA_TYPE_NONE) {
202                 irq_map[i++] = cpu_to_be32(0x4);
203             }
204 
205             if (!irq_map_stride) {
206                 irq_map_stride = i;
207             }
208             irq_map += irq_map_stride;
209         }
210     }
211 
212     qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
213                      GPEX_NUM_IRQS * GPEX_NUM_IRQS *
214                      irq_map_stride * sizeof(uint32_t));
215 
216     qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
217                            0x1800, 0, 0, 0x7);
218 }
219 
220 static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
221                                    char *clust_name, uint32_t *phandle,
222                                    uint32_t *intc_phandles)
223 {
224     int cpu;
225     uint32_t cpu_phandle;
226     MachineState *ms = MACHINE(s);
227     bool is_32_bit = riscv_is_32bit(&s->soc[0]);
228     uint8_t satp_mode_max;
229 
230     for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
231         RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
232         g_autofree char *cpu_name = NULL;
233         g_autofree char *core_name = NULL;
234         g_autofree char *intc_name = NULL;
235         g_autofree char *sv_name = NULL;
236 
237         cpu_phandle = (*phandle)++;
238 
239         cpu_name = g_strdup_printf("/cpus/cpu@%d",
240             s->soc[socket].hartid_base + cpu);
241         qemu_fdt_add_subnode(ms->fdt, cpu_name);
242 
243         if (cpu_ptr->cfg.satp_mode.supported != 0) {
244             satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
245             sv_name = g_strdup_printf("riscv,%s",
246                                       satp_mode_str(satp_mode_max, is_32_bit));
247             qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
248         }
249 
250         riscv_isa_write_fdt(cpu_ptr, ms->fdt, cpu_name);
251 
252         if (cpu_ptr->cfg.ext_zicbom) {
253             qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
254                                   cpu_ptr->cfg.cbom_blocksize);
255         }
256 
257         if (cpu_ptr->cfg.ext_zicboz) {
258             qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size",
259                                   cpu_ptr->cfg.cboz_blocksize);
260         }
261 
262         if (cpu_ptr->cfg.ext_zicbop) {
263             qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbop-block-size",
264                                   cpu_ptr->cfg.cbop_blocksize);
265         }
266 
267         qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv");
268         qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
269         qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
270             s->soc[socket].hartid_base + cpu);
271         qemu_fdt_setprop_string(ms->fdt, cpu_name, "device_type", "cpu");
272         riscv_socket_fdt_write_id(ms, cpu_name, socket);
273         qemu_fdt_setprop_cell(ms->fdt, cpu_name, "phandle", cpu_phandle);
274 
275         intc_phandles[cpu] = (*phandle)++;
276 
277         intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
278         qemu_fdt_add_subnode(ms->fdt, intc_name);
279         qemu_fdt_setprop_cell(ms->fdt, intc_name, "phandle",
280             intc_phandles[cpu]);
281         qemu_fdt_setprop_string(ms->fdt, intc_name, "compatible",
282             "riscv,cpu-intc");
283         qemu_fdt_setprop(ms->fdt, intc_name, "interrupt-controller", NULL, 0);
284         qemu_fdt_setprop_cell(ms->fdt, intc_name, "#interrupt-cells", 1);
285 
286         core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
287         qemu_fdt_add_subnode(ms->fdt, core_name);
288         qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
289     }
290 }
291 
292 static void create_fdt_socket_memory(RISCVVirtState *s,
293                                      const MemMapEntry *memmap, int socket)
294 {
295     g_autofree char *mem_name = NULL;
296     uint64_t addr, size;
297     MachineState *ms = MACHINE(s);
298 
299     addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket);
300     size = riscv_socket_mem_size(ms, socket);
301     mem_name = g_strdup_printf("/memory@%lx", (long)addr);
302     qemu_fdt_add_subnode(ms->fdt, mem_name);
303     qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
304         addr >> 32, addr, size >> 32, size);
305     qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
306     riscv_socket_fdt_write_id(ms, mem_name, socket);
307 }
308 
309 static void create_fdt_socket_clint(RISCVVirtState *s,
310                                     const MemMapEntry *memmap, int socket,
311                                     uint32_t *intc_phandles)
312 {
313     int cpu;
314     g_autofree char *clint_name = NULL;
315     g_autofree uint32_t *clint_cells = NULL;
316     unsigned long clint_addr;
317     MachineState *ms = MACHINE(s);
318     static const char * const clint_compat[2] = {
319         "sifive,clint0", "riscv,clint0"
320     };
321 
322     clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4);
323 
324     for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
325         clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]);
326         clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT);
327         clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]);
328         clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER);
329     }
330 
331     clint_addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
332     clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
333     qemu_fdt_add_subnode(ms->fdt, clint_name);
334     qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible",
335                                   (char **)&clint_compat,
336                                   ARRAY_SIZE(clint_compat));
337     qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg",
338         0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size);
339     qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended",
340         clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
341     riscv_socket_fdt_write_id(ms, clint_name, socket);
342 }
343 
344 static void create_fdt_socket_aclint(RISCVVirtState *s,
345                                      const MemMapEntry *memmap, int socket,
346                                      uint32_t *intc_phandles)
347 {
348     int cpu;
349     char *name;
350     unsigned long addr, size;
351     uint32_t aclint_cells_size;
352     g_autofree uint32_t *aclint_mswi_cells = NULL;
353     g_autofree uint32_t *aclint_sswi_cells = NULL;
354     g_autofree uint32_t *aclint_mtimer_cells = NULL;
355     MachineState *ms = MACHINE(s);
356 
357     aclint_mswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
358     aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
359     aclint_sswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
360 
361     for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
362         aclint_mswi_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
363         aclint_mswi_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_SOFT);
364         aclint_mtimer_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
365         aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
366         aclint_sswi_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
367         aclint_sswi_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_SOFT);
368     }
369     aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
370 
371     if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) {
372         addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
373         name = g_strdup_printf("/soc/mswi@%lx", addr);
374         qemu_fdt_add_subnode(ms->fdt, name);
375         qemu_fdt_setprop_string(ms->fdt, name, "compatible",
376             "riscv,aclint-mswi");
377         qemu_fdt_setprop_cells(ms->fdt, name, "reg",
378             0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE);
379         qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
380             aclint_mswi_cells, aclint_cells_size);
381         qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
382         qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells", 0);
383         riscv_socket_fdt_write_id(ms, name, socket);
384         g_free(name);
385     }
386 
387     if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
388         addr = memmap[VIRT_CLINT].base +
389                (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
390         size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
391     } else {
392         addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE +
393             (memmap[VIRT_CLINT].size * socket);
394         size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE;
395     }
396     name = g_strdup_printf("/soc/mtimer@%lx", addr);
397     qemu_fdt_add_subnode(ms->fdt, name);
398     qemu_fdt_setprop_string(ms->fdt, name, "compatible",
399         "riscv,aclint-mtimer");
400     qemu_fdt_setprop_cells(ms->fdt, name, "reg",
401         0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
402         0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
403         0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
404         0x0, RISCV_ACLINT_DEFAULT_MTIME);
405     qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
406         aclint_mtimer_cells, aclint_cells_size);
407     riscv_socket_fdt_write_id(ms, name, socket);
408     g_free(name);
409 
410     if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) {
411         addr = memmap[VIRT_ACLINT_SSWI].base +
412             (memmap[VIRT_ACLINT_SSWI].size * socket);
413         name = g_strdup_printf("/soc/sswi@%lx", addr);
414         qemu_fdt_add_subnode(ms->fdt, name);
415         qemu_fdt_setprop_string(ms->fdt, name, "compatible",
416             "riscv,aclint-sswi");
417         qemu_fdt_setprop_cells(ms->fdt, name, "reg",
418             0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size);
419         qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
420             aclint_sswi_cells, aclint_cells_size);
421         qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
422         qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells", 0);
423         riscv_socket_fdt_write_id(ms, name, socket);
424         g_free(name);
425     }
426 }
427 
428 static void create_fdt_socket_plic(RISCVVirtState *s,
429                                    const MemMapEntry *memmap, int socket,
430                                    uint32_t *phandle, uint32_t *intc_phandles,
431                                    uint32_t *plic_phandles)
432 {
433     int cpu;
434     g_autofree char *plic_name = NULL;
435     g_autofree uint32_t *plic_cells;
436     unsigned long plic_addr;
437     MachineState *ms = MACHINE(s);
438     static const char * const plic_compat[2] = {
439         "sifive,plic-1.0.0", "riscv,plic0"
440     };
441 
442     plic_phandles[socket] = (*phandle)++;
443     plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket);
444     plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr);
445     qemu_fdt_add_subnode(ms->fdt, plic_name);
446     qemu_fdt_setprop_cell(ms->fdt, plic_name,
447         "#interrupt-cells", FDT_PLIC_INT_CELLS);
448     qemu_fdt_setprop_cell(ms->fdt, plic_name,
449         "#address-cells", FDT_PLIC_ADDR_CELLS);
450     qemu_fdt_setprop_string_array(ms->fdt, plic_name, "compatible",
451                                   (char **)&plic_compat,
452                                   ARRAY_SIZE(plic_compat));
453     qemu_fdt_setprop(ms->fdt, plic_name, "interrupt-controller", NULL, 0);
454 
455     if (kvm_enabled()) {
456         plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
457 
458         for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
459             plic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
460             plic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
461         }
462 
463         qemu_fdt_setprop(ms->fdt, plic_name, "interrupts-extended",
464                          plic_cells,
465                          s->soc[socket].num_harts * sizeof(uint32_t) * 2);
466    } else {
467         plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4);
468 
469         for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
470             plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]);
471             plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT);
472             plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]);
473             plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT);
474         }
475 
476         qemu_fdt_setprop(ms->fdt, plic_name, "interrupts-extended",
477                          plic_cells,
478                          s->soc[socket].num_harts * sizeof(uint32_t) * 4);
479     }
480 
481     qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg",
482         0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size);
483     qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev",
484                           VIRT_IRQCHIP_NUM_SOURCES - 1);
485     riscv_socket_fdt_write_id(ms, plic_name, socket);
486     qemu_fdt_setprop_cell(ms->fdt, plic_name, "phandle",
487         plic_phandles[socket]);
488 
489     if (!socket) {
490         platform_bus_add_all_fdt_nodes(ms->fdt, plic_name,
491                                        memmap[VIRT_PLATFORM_BUS].base,
492                                        memmap[VIRT_PLATFORM_BUS].size,
493                                        VIRT_PLATFORM_BUS_IRQ);
494     }
495 }
496 
497 uint32_t imsic_num_bits(uint32_t count)
498 {
499     uint32_t ret = 0;
500 
501     while (BIT(ret) < count) {
502         ret++;
503     }
504 
505     return ret;
506 }
507 
508 static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
509                                  uint32_t *intc_phandles, uint32_t msi_phandle,
510                                  bool m_mode, uint32_t imsic_guest_bits)
511 {
512     int cpu, socket;
513     g_autofree char *imsic_name = NULL;
514     MachineState *ms = MACHINE(s);
515     int socket_count = riscv_socket_count(ms);
516     uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size;
517     g_autofree uint32_t *imsic_cells = NULL;
518     g_autofree uint32_t *imsic_regs = NULL;
519     static const char * const imsic_compat[2] = {
520         "qemu,imsics", "riscv,imsics"
521     };
522 
523     imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
524     imsic_regs = g_new0(uint32_t, socket_count * 4);
525 
526     for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
527         imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
528         imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
529     }
530 
531     imsic_max_hart_per_socket = 0;
532     for (socket = 0; socket < socket_count; socket++) {
533         imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
534         imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
535                      s->soc[socket].num_harts;
536         imsic_regs[socket * 4 + 0] = 0;
537         imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
538         imsic_regs[socket * 4 + 2] = 0;
539         imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
540         if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
541             imsic_max_hart_per_socket = s->soc[socket].num_harts;
542         }
543     }
544 
545     imsic_name = g_strdup_printf("/soc/interrupt-controller@%lx",
546                                  (unsigned long)base_addr);
547     qemu_fdt_add_subnode(ms->fdt, imsic_name);
548     qemu_fdt_setprop_string_array(ms->fdt, imsic_name, "compatible",
549                                   (char **)&imsic_compat,
550                                   ARRAY_SIZE(imsic_compat));
551 
552     qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
553                           FDT_IMSIC_INT_CELLS);
554     qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
555     qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
556     qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
557                      imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
558     qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
559                      socket_count * sizeof(uint32_t) * 4);
560     qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
561                      VIRT_IRQCHIP_NUM_MSIS);
562 
563     if (imsic_guest_bits) {
564         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
565                               imsic_guest_bits);
566     }
567 
568     if (socket_count > 1) {
569         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
570                               imsic_num_bits(imsic_max_hart_per_socket));
571         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
572                               imsic_num_bits(socket_count));
573         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
574                               IMSIC_MMIO_GROUP_MIN_SHIFT);
575     }
576     qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
577 }
578 
579 static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
580                              uint32_t *phandle, uint32_t *intc_phandles,
581                              uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
582 {
583     *msi_m_phandle = (*phandle)++;
584     *msi_s_phandle = (*phandle)++;
585 
586     if (!kvm_enabled()) {
587         /* M-level IMSIC node */
588         create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles,
589                              *msi_m_phandle, true, 0);
590     }
591 
592     /* S-level IMSIC node */
593     create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles,
594                          *msi_s_phandle, false,
595                          imsic_num_bits(s->aia_guests + 1));
596 
597 }
598 
599 /* Caller must free string after use */
600 static char *fdt_get_aplic_nodename(unsigned long aplic_addr)
601 {
602     return g_strdup_printf("/soc/interrupt-controller@%lx", aplic_addr);
603 }
604 
605 static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
606                                  unsigned long aplic_addr, uint32_t aplic_size,
607                                  uint32_t msi_phandle,
608                                  uint32_t *intc_phandles,
609                                  uint32_t aplic_phandle,
610                                  uint32_t aplic_child_phandle,
611                                  bool m_mode, int num_harts)
612 {
613     int cpu;
614     g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr);
615     g_autofree uint32_t *aplic_cells = g_new0(uint32_t, num_harts * 2);
616     MachineState *ms = MACHINE(s);
617     static const char * const aplic_compat[2] = {
618         "qemu,aplic", "riscv,aplic"
619     };
620 
621     for (cpu = 0; cpu < num_harts; cpu++) {
622         aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
623         aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
624     }
625 
626     qemu_fdt_add_subnode(ms->fdt, aplic_name);
627     qemu_fdt_setprop_string_array(ms->fdt, aplic_name, "compatible",
628                                   (char **)&aplic_compat,
629                                   ARRAY_SIZE(aplic_compat));
630     qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#address-cells",
631                           FDT_APLIC_ADDR_CELLS);
632     qemu_fdt_setprop_cell(ms->fdt, aplic_name,
633                           "#interrupt-cells", FDT_APLIC_INT_CELLS);
634     qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
635 
636     if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
637         qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
638                          aplic_cells, num_harts * sizeof(uint32_t) * 2);
639     } else {
640         qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
641     }
642 
643     qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
644                            0x0, aplic_addr, 0x0, aplic_size);
645     qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
646                           VIRT_IRQCHIP_NUM_SOURCES);
647 
648     if (aplic_child_phandle) {
649         qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
650                               aplic_child_phandle);
651         qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegation",
652                                aplic_child_phandle, 0x1,
653                                VIRT_IRQCHIP_NUM_SOURCES);
654         /*
655          * DEPRECATED_9.1: Compat property kept temporarily
656          * to allow old firmwares to work with AIA. Do *not*
657          * use 'riscv,delegate' in new code: use
658          * 'riscv,delegation' instead.
659          */
660         qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
661                                aplic_child_phandle, 0x1,
662                                VIRT_IRQCHIP_NUM_SOURCES);
663     }
664 
665     riscv_socket_fdt_write_id(ms, aplic_name, socket);
666     qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
667 }
668 
669 static void create_fdt_socket_aplic(RISCVVirtState *s,
670                                     const MemMapEntry *memmap, int socket,
671                                     uint32_t msi_m_phandle,
672                                     uint32_t msi_s_phandle,
673                                     uint32_t *phandle,
674                                     uint32_t *intc_phandles,
675                                     uint32_t *aplic_phandles,
676                                     int num_harts)
677 {
678     unsigned long aplic_addr;
679     MachineState *ms = MACHINE(s);
680     uint32_t aplic_m_phandle, aplic_s_phandle;
681 
682     aplic_m_phandle = (*phandle)++;
683     aplic_s_phandle = (*phandle)++;
684 
685     if (!kvm_enabled()) {
686         /* M-level APLIC node */
687         aplic_addr = memmap[VIRT_APLIC_M].base +
688                      (memmap[VIRT_APLIC_M].size * socket);
689         create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
690                              msi_m_phandle, intc_phandles,
691                              aplic_m_phandle, aplic_s_phandle,
692                              true, num_harts);
693     }
694 
695     /* S-level APLIC node */
696     aplic_addr = memmap[VIRT_APLIC_S].base +
697                  (memmap[VIRT_APLIC_S].size * socket);
698     create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
699                          msi_s_phandle, intc_phandles,
700                          aplic_s_phandle, 0,
701                          false, num_harts);
702 
703     if (!socket) {
704         g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr);
705         platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name,
706                                        memmap[VIRT_PLATFORM_BUS].base,
707                                        memmap[VIRT_PLATFORM_BUS].size,
708                                        VIRT_PLATFORM_BUS_IRQ);
709     }
710 
711     aplic_phandles[socket] = aplic_s_phandle;
712 }
713 
714 static void create_fdt_pmu(RISCVVirtState *s)
715 {
716     g_autofree char *pmu_name = g_strdup_printf("/pmu");
717     MachineState *ms = MACHINE(s);
718     RISCVCPU hart = s->soc[0].harts[0];
719 
720     qemu_fdt_add_subnode(ms->fdt, pmu_name);
721     qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
722     riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
723 }
724 
725 static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
726                                uint32_t *phandle,
727                                uint32_t *irq_mmio_phandle,
728                                uint32_t *irq_pcie_phandle,
729                                uint32_t *irq_virtio_phandle,
730                                uint32_t *msi_pcie_phandle)
731 {
732     int socket, phandle_pos;
733     MachineState *ms = MACHINE(s);
734     uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
735     uint32_t xplic_phandles[MAX_NODES];
736     g_autofree uint32_t *intc_phandles = NULL;
737     int socket_count = riscv_socket_count(ms);
738 
739     qemu_fdt_add_subnode(ms->fdt, "/cpus");
740     qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
741                           kvm_enabled() ?
742                           kvm_riscv_get_timebase_frequency(first_cpu) :
743                           RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
744     qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
745     qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
746     qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
747 
748     intc_phandles = g_new0(uint32_t, ms->smp.cpus);
749 
750     phandle_pos = ms->smp.cpus;
751     for (socket = (socket_count - 1); socket >= 0; socket--) {
752         g_autofree char *clust_name = NULL;
753         phandle_pos -= s->soc[socket].num_harts;
754 
755         clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
756         qemu_fdt_add_subnode(ms->fdt, clust_name);
757 
758         create_fdt_socket_cpus(s, socket, clust_name, phandle,
759                                &intc_phandles[phandle_pos]);
760 
761         create_fdt_socket_memory(s, memmap, socket);
762 
763         if (virt_aclint_allowed() && s->have_aclint) {
764             create_fdt_socket_aclint(s, memmap, socket,
765                                      &intc_phandles[phandle_pos]);
766         } else if (tcg_enabled()) {
767             create_fdt_socket_clint(s, memmap, socket,
768                                     &intc_phandles[phandle_pos]);
769         }
770     }
771 
772     if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
773         create_fdt_imsic(s, memmap, phandle, intc_phandles,
774             &msi_m_phandle, &msi_s_phandle);
775         *msi_pcie_phandle = msi_s_phandle;
776     }
777 
778     /* KVM AIA only has one APLIC instance */
779     if (kvm_enabled() && virt_use_kvm_aia(s)) {
780         create_fdt_socket_aplic(s, memmap, 0,
781                                 msi_m_phandle, msi_s_phandle, phandle,
782                                 &intc_phandles[0], xplic_phandles,
783                                 ms->smp.cpus);
784     } else {
785         phandle_pos = ms->smp.cpus;
786         for (socket = (socket_count - 1); socket >= 0; socket--) {
787             phandle_pos -= s->soc[socket].num_harts;
788 
789             if (s->aia_type == VIRT_AIA_TYPE_NONE) {
790                 create_fdt_socket_plic(s, memmap, socket, phandle,
791                                        &intc_phandles[phandle_pos],
792                                        xplic_phandles);
793             } else {
794                 create_fdt_socket_aplic(s, memmap, socket,
795                                         msi_m_phandle, msi_s_phandle, phandle,
796                                         &intc_phandles[phandle_pos],
797                                         xplic_phandles,
798                                         s->soc[socket].num_harts);
799             }
800         }
801     }
802 
803     if (kvm_enabled() && virt_use_kvm_aia(s)) {
804         *irq_mmio_phandle = xplic_phandles[0];
805         *irq_virtio_phandle = xplic_phandles[0];
806         *irq_pcie_phandle = xplic_phandles[0];
807     } else {
808         for (socket = 0; socket < socket_count; socket++) {
809             if (socket == 0) {
810                 *irq_mmio_phandle = xplic_phandles[socket];
811                 *irq_virtio_phandle = xplic_phandles[socket];
812                 *irq_pcie_phandle = xplic_phandles[socket];
813             }
814             if (socket == 1) {
815                 *irq_virtio_phandle = xplic_phandles[socket];
816                 *irq_pcie_phandle = xplic_phandles[socket];
817             }
818             if (socket == 2) {
819                 *irq_pcie_phandle = xplic_phandles[socket];
820             }
821         }
822     }
823 
824     riscv_socket_fdt_write_distance_matrix(ms);
825 }
826 
827 static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap,
828                               uint32_t irq_virtio_phandle)
829 {
830     int i;
831     MachineState *ms = MACHINE(s);
832 
833     for (i = 0; i < VIRTIO_COUNT; i++) {
834         g_autofree char *name =  g_strdup_printf("/soc/virtio_mmio@%lx",
835             (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size));
836 
837         qemu_fdt_add_subnode(ms->fdt, name);
838         qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio");
839         qemu_fdt_setprop_cells(ms->fdt, name, "reg",
840             0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
841             0x0, memmap[VIRT_VIRTIO].size);
842         qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
843             irq_virtio_phandle);
844         if (s->aia_type == VIRT_AIA_TYPE_NONE) {
845             qemu_fdt_setprop_cell(ms->fdt, name, "interrupts",
846                                   VIRTIO_IRQ + i);
847         } else {
848             qemu_fdt_setprop_cells(ms->fdt, name, "interrupts",
849                                    VIRTIO_IRQ + i, 0x4);
850         }
851     }
852 }
853 
854 static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
855                             uint32_t irq_pcie_phandle,
856                             uint32_t msi_pcie_phandle)
857 {
858     g_autofree char *name = NULL;
859     MachineState *ms = MACHINE(s);
860 
861     name = g_strdup_printf("/soc/pci@%lx",
862         (long) memmap[VIRT_PCIE_ECAM].base);
863     qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
864         FDT_PCI_ADDR_CELLS);
865     qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
866         FDT_PCI_INT_CELLS);
867     qemu_fdt_setprop_cell(ms->fdt, name, "#size-cells", 0x2);
868     qemu_fdt_setprop_string(ms->fdt, name, "compatible",
869         "pci-host-ecam-generic");
870     qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
871     qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
872     qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
873         memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
874     qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
875     if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
876         qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
877     }
878     qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
879         memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size);
880     qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
881         1, FDT_PCI_RANGE_IOPORT, 2, 0,
882         2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size,
883         1, FDT_PCI_RANGE_MMIO,
884         2, memmap[VIRT_PCIE_MMIO].base,
885         2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size,
886         1, FDT_PCI_RANGE_MMIO_64BIT,
887         2, virt_high_pcie_memmap.base,
888         2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size);
889 
890     create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
891 }
892 
893 static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap,
894                              uint32_t *phandle)
895 {
896     char *name;
897     uint32_t test_phandle;
898     MachineState *ms = MACHINE(s);
899 
900     test_phandle = (*phandle)++;
901     name = g_strdup_printf("/soc/test@%lx",
902         (long)memmap[VIRT_TEST].base);
903     qemu_fdt_add_subnode(ms->fdt, name);
904     {
905         static const char * const compat[3] = {
906             "sifive,test1", "sifive,test0", "syscon"
907         };
908         qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
909                                       (char **)&compat, ARRAY_SIZE(compat));
910     }
911     qemu_fdt_setprop_cells(ms->fdt, name, "reg",
912         0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size);
913     qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
914     test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
915     g_free(name);
916 
917     name = g_strdup_printf("/reboot");
918     qemu_fdt_add_subnode(ms->fdt, name);
919     qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-reboot");
920     qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
921     qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
922     qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_RESET);
923     g_free(name);
924 
925     name = g_strdup_printf("/poweroff");
926     qemu_fdt_add_subnode(ms->fdt, name);
927     qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-poweroff");
928     qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
929     qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
930     qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_PASS);
931     g_free(name);
932 }
933 
934 static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
935                             uint32_t irq_mmio_phandle)
936 {
937     g_autofree char *name = NULL;
938     MachineState *ms = MACHINE(s);
939 
940     name = g_strdup_printf("/soc/serial@%lx", (long)memmap[VIRT_UART0].base);
941     qemu_fdt_add_subnode(ms->fdt, name);
942     qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
943     qemu_fdt_setprop_cells(ms->fdt, name, "reg",
944         0x0, memmap[VIRT_UART0].base,
945         0x0, memmap[VIRT_UART0].size);
946     qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
947     qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
948     if (s->aia_type == VIRT_AIA_TYPE_NONE) {
949         qemu_fdt_setprop_cell(ms->fdt, name, "interrupts", UART0_IRQ);
950     } else {
951         qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", UART0_IRQ, 0x4);
952     }
953 
954     qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
955 }
956 
957 static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
958                            uint32_t irq_mmio_phandle)
959 {
960     g_autofree char *name = NULL;
961     MachineState *ms = MACHINE(s);
962 
963     name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base);
964     qemu_fdt_add_subnode(ms->fdt, name);
965     qemu_fdt_setprop_string(ms->fdt, name, "compatible",
966         "google,goldfish-rtc");
967     qemu_fdt_setprop_cells(ms->fdt, name, "reg",
968         0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size);
969     qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
970         irq_mmio_phandle);
971     if (s->aia_type == VIRT_AIA_TYPE_NONE) {
972         qemu_fdt_setprop_cell(ms->fdt, name, "interrupts", RTC_IRQ);
973     } else {
974         qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RTC_IRQ, 0x4);
975     }
976 }
977 
978 static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap)
979 {
980     MachineState *ms = MACHINE(s);
981     hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
982     hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
983     g_autofree char *name = g_strdup_printf("/flash@%" PRIx64, flashbase);
984 
985     qemu_fdt_add_subnode(ms->fdt, name);
986     qemu_fdt_setprop_string(ms->fdt, name, "compatible", "cfi-flash");
987     qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
988                                  2, flashbase, 2, flashsize,
989                                  2, flashbase + flashsize, 2, flashsize);
990     qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
991 }
992 
993 static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap)
994 {
995     MachineState *ms = MACHINE(s);
996     hwaddr base = memmap[VIRT_FW_CFG].base;
997     hwaddr size = memmap[VIRT_FW_CFG].size;
998     g_autofree char *nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
999 
1000     qemu_fdt_add_subnode(ms->fdt, nodename);
1001     qemu_fdt_setprop_string(ms->fdt, nodename,
1002                             "compatible", "qemu,fw-cfg-mmio");
1003     qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg",
1004                                  2, base, 2, size);
1005     qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
1006 }
1007 
1008 static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf)
1009 {
1010     const char compat[] = "virtio,pci-iommu\0pci1af4,1057";
1011     void *fdt = MACHINE(s)->fdt;
1012     uint32_t iommu_phandle;
1013     g_autofree char *iommu_node = NULL;
1014     g_autofree char *pci_node = NULL;
1015 
1016     pci_node = g_strdup_printf("/soc/pci@%lx",
1017                                (long) virt_memmap[VIRT_PCIE_ECAM].base);
1018     iommu_node = g_strdup_printf("%s/virtio_iommu@%x,%x", pci_node,
1019                                  PCI_SLOT(bdf), PCI_FUNC(bdf));
1020     iommu_phandle = qemu_fdt_alloc_phandle(fdt);
1021 
1022     qemu_fdt_add_subnode(fdt, iommu_node);
1023 
1024     qemu_fdt_setprop(fdt, iommu_node, "compatible", compat, sizeof(compat));
1025     qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg",
1026                                  1, bdf << 8, 1, 0, 1, 0,
1027                                  1, 0, 1, 0);
1028     qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
1029     qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
1030 
1031     qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map",
1032                            0, iommu_phandle, 0, bdf,
1033                            bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf);
1034 }
1035 
1036 static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf)
1037 {
1038     const char comp[] = "riscv,pci-iommu";
1039     void *fdt = MACHINE(s)->fdt;
1040     uint32_t iommu_phandle;
1041     g_autofree char *iommu_node = NULL;
1042     g_autofree char *pci_node = NULL;
1043 
1044     pci_node = g_strdup_printf("/soc/pci@%lx",
1045                                (long) virt_memmap[VIRT_PCIE_ECAM].base);
1046     iommu_node = g_strdup_printf("%s/iommu@%x", pci_node, bdf);
1047     iommu_phandle = qemu_fdt_alloc_phandle(fdt);
1048     qemu_fdt_add_subnode(fdt, iommu_node);
1049 
1050     qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp));
1051     qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
1052     qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
1053     qemu_fdt_setprop_cells(fdt, iommu_node, "reg",
1054                            bdf << 8, 0, 0, 0, 0);
1055     qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map",
1056                            0, iommu_phandle, 0, bdf,
1057                            bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf);
1058 }
1059 
1060 static void finalize_fdt(RISCVVirtState *s)
1061 {
1062     uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
1063     uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1;
1064 
1065     create_fdt_sockets(s, virt_memmap, &phandle, &irq_mmio_phandle,
1066                        &irq_pcie_phandle, &irq_virtio_phandle,
1067                        &msi_pcie_phandle);
1068 
1069     create_fdt_virtio(s, virt_memmap, irq_virtio_phandle);
1070 
1071     create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle);
1072 
1073     create_fdt_reset(s, virt_memmap, &phandle);
1074 
1075     create_fdt_uart(s, virt_memmap, irq_mmio_phandle);
1076 
1077     create_fdt_rtc(s, virt_memmap, irq_mmio_phandle);
1078 }
1079 
1080 static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
1081 {
1082     MachineState *ms = MACHINE(s);
1083     uint8_t rng_seed[32];
1084     g_autofree char *name = NULL;
1085 
1086     ms->fdt = create_device_tree(&s->fdt_size);
1087     if (!ms->fdt) {
1088         error_report("create_device_tree() failed");
1089         exit(1);
1090     }
1091 
1092     qemu_fdt_setprop_string(ms->fdt, "/", "model", "riscv-virtio,qemu");
1093     qemu_fdt_setprop_string(ms->fdt, "/", "compatible", "riscv-virtio");
1094     qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2);
1095     qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2);
1096 
1097     qemu_fdt_add_subnode(ms->fdt, "/soc");
1098     qemu_fdt_setprop(ms->fdt, "/soc", "ranges", NULL, 0);
1099     qemu_fdt_setprop_string(ms->fdt, "/soc", "compatible", "simple-bus");
1100     qemu_fdt_setprop_cell(ms->fdt, "/soc", "#size-cells", 0x2);
1101     qemu_fdt_setprop_cell(ms->fdt, "/soc", "#address-cells", 0x2);
1102 
1103     /*
1104      * The "/soc/pci@..." node is needed for PCIE hotplugs
1105      * that might happen before finalize_fdt().
1106      */
1107     name = g_strdup_printf("/soc/pci@%lx", (long) memmap[VIRT_PCIE_ECAM].base);
1108     qemu_fdt_add_subnode(ms->fdt, name);
1109 
1110     qemu_fdt_add_subnode(ms->fdt, "/chosen");
1111 
1112     /* Pass seed to RNG */
1113     qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
1114     qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
1115                      rng_seed, sizeof(rng_seed));
1116 
1117     create_fdt_flash(s, memmap);
1118     create_fdt_fw_cfg(s, memmap);
1119     create_fdt_pmu(s);
1120 }
1121 
1122 static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
1123                                           DeviceState *irqchip,
1124                                           RISCVVirtState *s)
1125 {
1126     DeviceState *dev;
1127     MemoryRegion *ecam_alias, *ecam_reg;
1128     MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
1129     hwaddr ecam_base = s->memmap[VIRT_PCIE_ECAM].base;
1130     hwaddr ecam_size = s->memmap[VIRT_PCIE_ECAM].size;
1131     hwaddr mmio_base = s->memmap[VIRT_PCIE_MMIO].base;
1132     hwaddr mmio_size = s->memmap[VIRT_PCIE_MMIO].size;
1133     hwaddr high_mmio_base = virt_high_pcie_memmap.base;
1134     hwaddr high_mmio_size = virt_high_pcie_memmap.size;
1135     hwaddr pio_base = s->memmap[VIRT_PCIE_PIO].base;
1136     hwaddr pio_size = s->memmap[VIRT_PCIE_PIO].size;
1137     qemu_irq irq;
1138     int i;
1139 
1140     dev = qdev_new(TYPE_GPEX_HOST);
1141 
1142     /* Set GPEX object properties for the virt machine */
1143     object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
1144                             ecam_base, NULL);
1145     object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
1146                             ecam_size, NULL);
1147     object_property_set_uint(OBJECT(GPEX_HOST(dev)),
1148                              PCI_HOST_BELOW_4G_MMIO_BASE,
1149                              mmio_base, NULL);
1150     object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
1151                             mmio_size, NULL);
1152     object_property_set_uint(OBJECT(GPEX_HOST(dev)),
1153                              PCI_HOST_ABOVE_4G_MMIO_BASE,
1154                              high_mmio_base, NULL);
1155     object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
1156                             high_mmio_size, NULL);
1157     object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
1158                             pio_base, NULL);
1159     object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
1160                             pio_size, NULL);
1161 
1162     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
1163 
1164     ecam_alias = g_new0(MemoryRegion, 1);
1165     ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
1166     memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
1167                              ecam_reg, 0, ecam_size);
1168     memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
1169 
1170     mmio_alias = g_new0(MemoryRegion, 1);
1171     mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
1172     memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
1173                              mmio_reg, mmio_base, mmio_size);
1174     memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
1175 
1176     /* Map high MMIO space */
1177     high_mmio_alias = g_new0(MemoryRegion, 1);
1178     memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
1179                              mmio_reg, high_mmio_base, high_mmio_size);
1180     memory_region_add_subregion(get_system_memory(), high_mmio_base,
1181                                 high_mmio_alias);
1182 
1183     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
1184 
1185     for (i = 0; i < GPEX_NUM_IRQS; i++) {
1186         irq = qdev_get_gpio_in(irqchip, PCIE_IRQ + i);
1187 
1188         sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
1189         gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i);
1190     }
1191 
1192     GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
1193     return dev;
1194 }
1195 
1196 static FWCfgState *create_fw_cfg(const MachineState *ms)
1197 {
1198     hwaddr base = virt_memmap[VIRT_FW_CFG].base;
1199     FWCfgState *fw_cfg;
1200 
1201     fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16,
1202                                   &address_space_memory);
1203     fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus);
1204 
1205     return fw_cfg;
1206 }
1207 
1208 static DeviceState *virt_create_plic(const MemMapEntry *memmap, int socket,
1209                                      int base_hartid, int hart_count)
1210 {
1211     DeviceState *ret;
1212     g_autofree char *plic_hart_config = NULL;
1213 
1214     /* Per-socket PLIC hart topology configuration string */
1215     plic_hart_config = riscv_plic_hart_config_string(hart_count);
1216 
1217     /* Per-socket PLIC */
1218     ret = sifive_plic_create(
1219             memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size,
1220             plic_hart_config, hart_count, base_hartid,
1221             VIRT_IRQCHIP_NUM_SOURCES,
1222             ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1),
1223             VIRT_PLIC_PRIORITY_BASE,
1224             VIRT_PLIC_PENDING_BASE,
1225             VIRT_PLIC_ENABLE_BASE,
1226             VIRT_PLIC_ENABLE_STRIDE,
1227             VIRT_PLIC_CONTEXT_BASE,
1228             VIRT_PLIC_CONTEXT_STRIDE,
1229             memmap[VIRT_PLIC].size);
1230 
1231     return ret;
1232 }
1233 
1234 static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
1235                                     const MemMapEntry *memmap, int socket,
1236                                     int base_hartid, int hart_count)
1237 {
1238     int i;
1239     hwaddr addr;
1240     uint32_t guest_bits;
1241     DeviceState *aplic_s = NULL;
1242     DeviceState *aplic_m = NULL;
1243     bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
1244 
1245     if (msimode) {
1246         if (!kvm_enabled()) {
1247             /* Per-socket M-level IMSICs */
1248             addr = memmap[VIRT_IMSIC_M].base +
1249                    socket * VIRT_IMSIC_GROUP_MAX_SIZE;
1250             for (i = 0; i < hart_count; i++) {
1251                 riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
1252                                    base_hartid + i, true, 1,
1253                                    VIRT_IRQCHIP_NUM_MSIS);
1254             }
1255         }
1256 
1257         /* Per-socket S-level IMSICs */
1258         guest_bits = imsic_num_bits(aia_guests + 1);
1259         addr = memmap[VIRT_IMSIC_S].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
1260         for (i = 0; i < hart_count; i++) {
1261             riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
1262                                base_hartid + i, false, 1 + aia_guests,
1263                                VIRT_IRQCHIP_NUM_MSIS);
1264         }
1265     }
1266 
1267     if (!kvm_enabled()) {
1268         /* Per-socket M-level APLIC */
1269         aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base +
1270                                      socket * memmap[VIRT_APLIC_M].size,
1271                                      memmap[VIRT_APLIC_M].size,
1272                                      (msimode) ? 0 : base_hartid,
1273                                      (msimode) ? 0 : hart_count,
1274                                      VIRT_IRQCHIP_NUM_SOURCES,
1275                                      VIRT_IRQCHIP_NUM_PRIO_BITS,
1276                                      msimode, true, NULL);
1277     }
1278 
1279     /* Per-socket S-level APLIC */
1280     aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base +
1281                                  socket * memmap[VIRT_APLIC_S].size,
1282                                  memmap[VIRT_APLIC_S].size,
1283                                  (msimode) ? 0 : base_hartid,
1284                                  (msimode) ? 0 : hart_count,
1285                                  VIRT_IRQCHIP_NUM_SOURCES,
1286                                  VIRT_IRQCHIP_NUM_PRIO_BITS,
1287                                  msimode, false, aplic_m);
1288 
1289     return kvm_enabled() ? aplic_s : aplic_m;
1290 }
1291 
1292 static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
1293 {
1294     DeviceState *dev;
1295     SysBusDevice *sysbus;
1296     const MemMapEntry *memmap = virt_memmap;
1297     int i;
1298     MemoryRegion *sysmem = get_system_memory();
1299 
1300     dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
1301     dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
1302     qdev_prop_set_uint32(dev, "num_irqs", VIRT_PLATFORM_BUS_NUM_IRQS);
1303     qdev_prop_set_uint32(dev, "mmio_size", memmap[VIRT_PLATFORM_BUS].size);
1304     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
1305     s->platform_bus_dev = dev;
1306 
1307     sysbus = SYS_BUS_DEVICE(dev);
1308     for (i = 0; i < VIRT_PLATFORM_BUS_NUM_IRQS; i++) {
1309         int irq = VIRT_PLATFORM_BUS_IRQ + i;
1310         sysbus_connect_irq(sysbus, i, qdev_get_gpio_in(irqchip, irq));
1311     }
1312 
1313     memory_region_add_subregion(sysmem,
1314                                 memmap[VIRT_PLATFORM_BUS].base,
1315                                 sysbus_mmio_get_region(sysbus, 0));
1316 }
1317 
1318 static void virt_build_smbios(RISCVVirtState *s)
1319 {
1320     MachineClass *mc = MACHINE_GET_CLASS(s);
1321     MachineState *ms = MACHINE(s);
1322     uint8_t *smbios_tables, *smbios_anchor;
1323     size_t smbios_tables_len, smbios_anchor_len;
1324     struct smbios_phys_mem_area mem_array;
1325     const char *product = "QEMU Virtual Machine";
1326 
1327     if (kvm_enabled()) {
1328         product = "KVM Virtual Machine";
1329     }
1330 
1331     smbios_set_defaults("QEMU", product, mc->name);
1332 
1333     if (riscv_is_32bit(&s->soc[0])) {
1334         smbios_set_default_processor_family(0x200);
1335     } else {
1336         smbios_set_default_processor_family(0x201);
1337     }
1338 
1339     /* build the array of physical mem area from base_memmap */
1340     mem_array.address = s->memmap[VIRT_DRAM].base;
1341     mem_array.length = ms->ram_size;
1342 
1343     smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64,
1344                       &mem_array, 1,
1345                       &smbios_tables, &smbios_tables_len,
1346                       &smbios_anchor, &smbios_anchor_len,
1347                       &error_fatal);
1348 
1349     if (smbios_anchor) {
1350         fw_cfg_add_file(s->fw_cfg, "etc/smbios/smbios-tables",
1351                         smbios_tables, smbios_tables_len);
1352         fw_cfg_add_file(s->fw_cfg, "etc/smbios/smbios-anchor",
1353                         smbios_anchor, smbios_anchor_len);
1354     }
1355 }
1356 
1357 static void virt_machine_done(Notifier *notifier, void *data)
1358 {
1359     RISCVVirtState *s = container_of(notifier, RISCVVirtState,
1360                                      machine_done);
1361     const MemMapEntry *memmap = virt_memmap;
1362     MachineState *machine = MACHINE(s);
1363     hwaddr start_addr = memmap[VIRT_DRAM].base;
1364     target_ulong firmware_end_addr, kernel_start_addr;
1365     const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
1366     uint64_t fdt_load_addr;
1367     uint64_t kernel_entry = 0;
1368     BlockBackend *pflash_blk0;
1369 
1370     /*
1371      * An user provided dtb must include everything, including
1372      * dynamic sysbus devices. Our FDT needs to be finalized.
1373      */
1374     if (machine->dtb == NULL) {
1375         finalize_fdt(s);
1376     }
1377 
1378     /*
1379      * Only direct boot kernel is currently supported for KVM VM,
1380      * so the "-bios" parameter is not supported when KVM is enabled.
1381      */
1382     if (kvm_enabled()) {
1383         if (machine->firmware) {
1384             if (strcmp(machine->firmware, "none")) {
1385                 error_report("Machine mode firmware is not supported in "
1386                              "combination with KVM.");
1387                 exit(1);
1388             }
1389         } else {
1390             machine->firmware = g_strdup("none");
1391         }
1392     }
1393 
1394     firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
1395                                                      &start_addr, NULL);
1396 
1397     pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
1398     if (pflash_blk0) {
1399         if (machine->firmware && !strcmp(machine->firmware, "none") &&
1400             !kvm_enabled()) {
1401             /*
1402              * Pflash was supplied but bios is none and not KVM guest,
1403              * let's overwrite the address we jump to after reset to
1404              * the base of the flash.
1405              */
1406             start_addr = virt_memmap[VIRT_FLASH].base;
1407         } else {
1408             /*
1409              * Pflash was supplied but either KVM guest or bios is not none.
1410              * In this case, base of the flash would contain S-mode payload.
1411              */
1412             riscv_setup_firmware_boot(machine);
1413             kernel_entry = virt_memmap[VIRT_FLASH].base;
1414         }
1415     }
1416 
1417     if (machine->kernel_filename && !kernel_entry) {
1418         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
1419                                                          firmware_end_addr);
1420 
1421         kernel_entry = riscv_load_kernel(machine, &s->soc[0],
1422                                          kernel_start_addr, true, NULL);
1423     }
1424 
1425     fdt_load_addr = riscv_compute_fdt_addr(memmap[VIRT_DRAM].base,
1426                                            memmap[VIRT_DRAM].size,
1427                                            machine);
1428     riscv_load_fdt(fdt_load_addr, machine->fdt);
1429 
1430     /* load the reset vector */
1431     riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
1432                               virt_memmap[VIRT_MROM].base,
1433                               virt_memmap[VIRT_MROM].size, kernel_entry,
1434                               fdt_load_addr);
1435 
1436     /*
1437      * Only direct boot kernel is currently supported for KVM VM,
1438      * So here setup kernel start address and fdt address.
1439      * TODO:Support firmware loading and integrate to TCG start
1440      */
1441     if (kvm_enabled()) {
1442         riscv_setup_direct_kernel(kernel_entry, fdt_load_addr);
1443     }
1444 
1445     virt_build_smbios(s);
1446 
1447     if (virt_is_acpi_enabled(s)) {
1448         virt_acpi_setup(s);
1449     }
1450 }
1451 
1452 static void virt_machine_init(MachineState *machine)
1453 {
1454     const MemMapEntry *memmap = virt_memmap;
1455     RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
1456     MemoryRegion *system_memory = get_system_memory();
1457     MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
1458     DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
1459     int i, base_hartid, hart_count;
1460     int socket_count = riscv_socket_count(machine);
1461 
1462     /* Check socket count limit */
1463     if (VIRT_SOCKETS_MAX < socket_count) {
1464         error_report("number of sockets/nodes should be less than %d",
1465             VIRT_SOCKETS_MAX);
1466         exit(1);
1467     }
1468 
1469     if (!virt_aclint_allowed() && s->have_aclint) {
1470         error_report("'aclint' is only available with TCG acceleration");
1471         exit(1);
1472     }
1473 
1474     /* Initialize sockets */
1475     mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL;
1476     for (i = 0; i < socket_count; i++) {
1477         g_autofree char *soc_name = g_strdup_printf("soc%d", i);
1478 
1479         if (!riscv_socket_check_hartids(machine, i)) {
1480             error_report("discontinuous hartids in socket%d", i);
1481             exit(1);
1482         }
1483 
1484         base_hartid = riscv_socket_first_hartid(machine, i);
1485         if (base_hartid < 0) {
1486             error_report("can't find hartid base for socket%d", i);
1487             exit(1);
1488         }
1489 
1490         hart_count = riscv_socket_hart_count(machine, i);
1491         if (hart_count < 0) {
1492             error_report("can't find hart count for socket%d", i);
1493             exit(1);
1494         }
1495 
1496         object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
1497                                 TYPE_RISCV_HART_ARRAY);
1498         object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
1499                                 machine->cpu_type, &error_abort);
1500         object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
1501                                 base_hartid, &error_abort);
1502         object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
1503                                 hart_count, &error_abort);
1504         sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
1505 
1506         if (virt_aclint_allowed() && s->have_aclint) {
1507             if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
1508                 /* Per-socket ACLINT MTIMER */
1509                 riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
1510                             i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
1511                         RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
1512                         base_hartid, hart_count,
1513                         RISCV_ACLINT_DEFAULT_MTIMECMP,
1514                         RISCV_ACLINT_DEFAULT_MTIME,
1515                         RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
1516             } else {
1517                 /* Per-socket ACLINT MSWI, MTIMER, and SSWI */
1518                 riscv_aclint_swi_create(memmap[VIRT_CLINT].base +
1519                             i * memmap[VIRT_CLINT].size,
1520                         base_hartid, hart_count, false);
1521                 riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
1522                             i * memmap[VIRT_CLINT].size +
1523                             RISCV_ACLINT_SWI_SIZE,
1524                         RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
1525                         base_hartid, hart_count,
1526                         RISCV_ACLINT_DEFAULT_MTIMECMP,
1527                         RISCV_ACLINT_DEFAULT_MTIME,
1528                         RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
1529                 riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base +
1530                             i * memmap[VIRT_ACLINT_SSWI].size,
1531                         base_hartid, hart_count, true);
1532             }
1533         } else if (tcg_enabled()) {
1534             /* Per-socket SiFive CLINT */
1535             riscv_aclint_swi_create(
1536                     memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size,
1537                     base_hartid, hart_count, false);
1538             riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
1539                         i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE,
1540                     RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count,
1541                     RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
1542                     RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
1543         }
1544 
1545         /* Per-socket interrupt controller */
1546         if (s->aia_type == VIRT_AIA_TYPE_NONE) {
1547             s->irqchip[i] = virt_create_plic(memmap, i,
1548                                              base_hartid, hart_count);
1549         } else {
1550             s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests,
1551                                             memmap, i, base_hartid,
1552                                             hart_count);
1553         }
1554 
1555         /* Try to use different IRQCHIP instance based device type */
1556         if (i == 0) {
1557             mmio_irqchip = s->irqchip[i];
1558             virtio_irqchip = s->irqchip[i];
1559             pcie_irqchip = s->irqchip[i];
1560         }
1561         if (i == 1) {
1562             virtio_irqchip = s->irqchip[i];
1563             pcie_irqchip = s->irqchip[i];
1564         }
1565         if (i == 2) {
1566             pcie_irqchip = s->irqchip[i];
1567         }
1568     }
1569 
1570     if (kvm_enabled() && virt_use_kvm_aia(s)) {
1571         kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
1572                              VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
1573                              memmap[VIRT_APLIC_S].base,
1574                              memmap[VIRT_IMSIC_S].base,
1575                              s->aia_guests);
1576     }
1577 
1578     if (riscv_is_32bit(&s->soc[0])) {
1579 #if HOST_LONG_BITS == 64
1580         /* limit RAM size in a 32-bit system */
1581         if (machine->ram_size > 10 * GiB) {
1582             machine->ram_size = 10 * GiB;
1583             error_report("Limiting RAM size to 10 GiB");
1584         }
1585 #endif
1586         virt_high_pcie_memmap.base = VIRT32_HIGH_PCIE_MMIO_BASE;
1587         virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE;
1588     } else {
1589         virt_high_pcie_memmap.size = VIRT64_HIGH_PCIE_MMIO_SIZE;
1590         virt_high_pcie_memmap.base = memmap[VIRT_DRAM].base + machine->ram_size;
1591         virt_high_pcie_memmap.base =
1592             ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size);
1593     }
1594 
1595     s->memmap = virt_memmap;
1596 
1597     /* register system main memory (actual RAM) */
1598     memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base,
1599         machine->ram);
1600 
1601     /* boot rom */
1602     memory_region_init_rom(mask_rom, NULL, "riscv_virt_board.mrom",
1603                            memmap[VIRT_MROM].size, &error_fatal);
1604     memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base,
1605                                 mask_rom);
1606 
1607     /*
1608      * Init fw_cfg. Must be done before riscv_load_fdt, otherwise the
1609      * device tree cannot be altered and we get FDT_ERR_NOSPACE.
1610      */
1611     s->fw_cfg = create_fw_cfg(machine);
1612     rom_set_fw(s->fw_cfg);
1613 
1614     /* SiFive Test MMIO device */
1615     sifive_test_create(memmap[VIRT_TEST].base);
1616 
1617     /* VirtIO MMIO devices */
1618     for (i = 0; i < VIRTIO_COUNT; i++) {
1619         sysbus_create_simple("virtio-mmio",
1620             memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
1621             qdev_get_gpio_in(virtio_irqchip, VIRTIO_IRQ + i));
1622     }
1623 
1624     gpex_pcie_init(system_memory, pcie_irqchip, s);
1625 
1626     create_platform_bus(s, mmio_irqchip);
1627 
1628     serial_mm_init(system_memory, memmap[VIRT_UART0].base,
1629         0, qdev_get_gpio_in(mmio_irqchip, UART0_IRQ), 399193,
1630         serial_hd(0), DEVICE_LITTLE_ENDIAN);
1631 
1632     sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base,
1633         qdev_get_gpio_in(mmio_irqchip, RTC_IRQ));
1634 
1635     for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
1636         /* Map legacy -drive if=pflash to machine properties */
1637         pflash_cfi01_legacy_drive(s->flash[i],
1638                                   drive_get(IF_PFLASH, 0, i));
1639     }
1640     virt_flash_map(s, system_memory);
1641 
1642     /* load/create device tree */
1643     if (machine->dtb) {
1644         machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
1645         if (!machine->fdt) {
1646             error_report("load_device_tree() failed");
1647             exit(1);
1648         }
1649     } else {
1650         create_fdt(s, memmap);
1651     }
1652 
1653     s->machine_done.notify = virt_machine_done;
1654     qemu_add_machine_init_done_notifier(&s->machine_done);
1655 }
1656 
1657 static void virt_machine_instance_init(Object *obj)
1658 {
1659     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1660 
1661     virt_flash_create(s);
1662 
1663     s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
1664     s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
1665     s->acpi = ON_OFF_AUTO_AUTO;
1666 }
1667 
1668 static char *virt_get_aia_guests(Object *obj, Error **errp)
1669 {
1670     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1671 
1672     return g_strdup_printf("%d", s->aia_guests);
1673 }
1674 
1675 static void virt_set_aia_guests(Object *obj, const char *val, Error **errp)
1676 {
1677     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1678 
1679     s->aia_guests = atoi(val);
1680     if (s->aia_guests < 0 || s->aia_guests > VIRT_IRQCHIP_MAX_GUESTS) {
1681         error_setg(errp, "Invalid number of AIA IMSIC guests");
1682         error_append_hint(errp, "Valid values be between 0 and %d.\n",
1683                           VIRT_IRQCHIP_MAX_GUESTS);
1684     }
1685 }
1686 
1687 static char *virt_get_aia(Object *obj, Error **errp)
1688 {
1689     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1690     const char *val;
1691 
1692     switch (s->aia_type) {
1693     case VIRT_AIA_TYPE_APLIC:
1694         val = "aplic";
1695         break;
1696     case VIRT_AIA_TYPE_APLIC_IMSIC:
1697         val = "aplic-imsic";
1698         break;
1699     default:
1700         val = "none";
1701         break;
1702     };
1703 
1704     return g_strdup(val);
1705 }
1706 
1707 static void virt_set_aia(Object *obj, const char *val, Error **errp)
1708 {
1709     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1710 
1711     if (!strcmp(val, "none")) {
1712         s->aia_type = VIRT_AIA_TYPE_NONE;
1713     } else if (!strcmp(val, "aplic")) {
1714         s->aia_type = VIRT_AIA_TYPE_APLIC;
1715     } else if (!strcmp(val, "aplic-imsic")) {
1716         s->aia_type = VIRT_AIA_TYPE_APLIC_IMSIC;
1717     } else {
1718         error_setg(errp, "Invalid AIA interrupt controller type");
1719         error_append_hint(errp, "Valid values are none, aplic, and "
1720                           "aplic-imsic.\n");
1721     }
1722 }
1723 
1724 static bool virt_get_aclint(Object *obj, Error **errp)
1725 {
1726     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1727 
1728     return s->have_aclint;
1729 }
1730 
1731 static void virt_set_aclint(Object *obj, bool value, Error **errp)
1732 {
1733     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1734 
1735     s->have_aclint = value;
1736 }
1737 
1738 bool virt_is_acpi_enabled(RISCVVirtState *s)
1739 {
1740     return s->acpi != ON_OFF_AUTO_OFF;
1741 }
1742 
1743 static void virt_get_acpi(Object *obj, Visitor *v, const char *name,
1744                           void *opaque, Error **errp)
1745 {
1746     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1747     OnOffAuto acpi = s->acpi;
1748 
1749     visit_type_OnOffAuto(v, name, &acpi, errp);
1750 }
1751 
1752 static void virt_set_acpi(Object *obj, Visitor *v, const char *name,
1753                           void *opaque, Error **errp)
1754 {
1755     RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
1756 
1757     visit_type_OnOffAuto(v, name, &s->acpi, errp);
1758 }
1759 
1760 static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
1761                                                         DeviceState *dev)
1762 {
1763     MachineClass *mc = MACHINE_GET_CLASS(machine);
1764 
1765     if (device_is_dynamic_sysbus(mc, dev) ||
1766         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
1767         object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) {
1768         return HOTPLUG_HANDLER(machine);
1769     }
1770 
1771     return NULL;
1772 }
1773 
1774 static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
1775                                         DeviceState *dev, Error **errp)
1776 {
1777     RISCVVirtState *s = RISCV_VIRT_MACHINE(hotplug_dev);
1778 
1779     if (s->platform_bus_dev) {
1780         MachineClass *mc = MACHINE_GET_CLASS(s);
1781 
1782         if (device_is_dynamic_sysbus(mc, dev)) {
1783             platform_bus_link_device(PLATFORM_BUS_DEVICE(s->platform_bus_dev),
1784                                      SYS_BUS_DEVICE(dev));
1785         }
1786     }
1787 
1788     if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
1789         create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
1790     }
1791 
1792     if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) {
1793         create_fdt_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
1794     }
1795 }
1796 
1797 static void virt_machine_class_init(ObjectClass *oc, void *data)
1798 {
1799     MachineClass *mc = MACHINE_CLASS(oc);
1800     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
1801 
1802     mc->desc = "RISC-V VirtIO board";
1803     mc->init = virt_machine_init;
1804     mc->max_cpus = VIRT_CPUS_MAX;
1805     mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
1806     mc->block_default_type = IF_VIRTIO;
1807     mc->no_cdrom = 1;
1808     mc->pci_allow_0_address = true;
1809     mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
1810     mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
1811     mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
1812     mc->numa_mem_supported = true;
1813     /* platform instead of architectural choice */
1814     mc->cpu_cluster_has_numa_boundary = true;
1815     mc->default_ram_id = "riscv_virt_board.ram";
1816     assert(!mc->get_hotplug_handler);
1817     mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
1818 
1819     hc->plug = virt_machine_device_plug_cb;
1820 
1821     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
1822 #ifdef CONFIG_TPM
1823     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
1824 #endif
1825 
1826     object_class_property_add_bool(oc, "aclint", virt_get_aclint,
1827                                    virt_set_aclint);
1828     object_class_property_set_description(oc, "aclint",
1829                                           "(TCG only) Set on/off to "
1830                                           "enable/disable emulating "
1831                                           "ACLINT devices");
1832 
1833     object_class_property_add_str(oc, "aia", virt_get_aia,
1834                                   virt_set_aia);
1835     object_class_property_set_description(oc, "aia",
1836                                           "Set type of AIA interrupt "
1837                                           "controller. Valid values are "
1838                                           "none, aplic, and aplic-imsic.");
1839 
1840     object_class_property_add_str(oc, "aia-guests",
1841                                   virt_get_aia_guests,
1842                                   virt_set_aia_guests);
1843     {
1844         g_autofree char *str =
1845             g_strdup_printf("Set number of guest MMIO pages for AIA IMSIC. "
1846                             "Valid value should be between 0 and %d.",
1847                             VIRT_IRQCHIP_MAX_GUESTS);
1848         object_class_property_set_description(oc, "aia-guests", str);
1849     }
1850 
1851     object_class_property_add(oc, "acpi", "OnOffAuto",
1852                               virt_get_acpi, virt_set_acpi,
1853                               NULL, NULL);
1854     object_class_property_set_description(oc, "acpi",
1855                                           "Enable ACPI");
1856 }
1857 
1858 static const TypeInfo virt_machine_typeinfo = {
1859     .name       = MACHINE_TYPE_NAME("virt"),
1860     .parent     = TYPE_MACHINE,
1861     .class_init = virt_machine_class_init,
1862     .instance_init = virt_machine_instance_init,
1863     .instance_size = sizeof(RISCVVirtState),
1864     .interfaces = (InterfaceInfo[]) {
1865          { TYPE_HOTPLUG_HANDLER },
1866          { }
1867     },
1868 };
1869 
1870 static void virt_machine_init_register_types(void)
1871 {
1872     type_register_static(&virt_machine_typeinfo);
1873 }
1874 
1875 type_init(virt_machine_init_register_types)
1876