xref: /openbmc/qemu/hw/ppc/pnv.c (revision 89854803)
1 /*
2  * QEMU PowerPC PowerNV machine model
3  *
4  * Copyright (c) 2016, IBM Corporation.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qapi/error.h"
22 #include "sysemu/sysemu.h"
23 #include "sysemu/numa.h"
24 #include "sysemu/cpus.h"
25 #include "hw/hw.h"
26 #include "target/ppc/cpu.h"
27 #include "qemu/log.h"
28 #include "hw/ppc/fdt.h"
29 #include "hw/ppc/ppc.h"
30 #include "hw/ppc/pnv.h"
31 #include "hw/ppc/pnv_core.h"
32 #include "hw/loader.h"
33 #include "exec/address-spaces.h"
34 #include "qemu/cutils.h"
35 #include "qapi/visitor.h"
36 #include "monitor/monitor.h"
37 #include "hw/intc/intc.h"
38 #include "hw/ipmi/ipmi.h"
39 #include "target/ppc/mmu-hash64.h"
40 
41 #include "hw/ppc/xics.h"
42 #include "hw/ppc/pnv_xscom.h"
43 
44 #include "hw/isa/isa.h"
45 #include "hw/char/serial.h"
46 #include "hw/timer/mc146818rtc.h"
47 
48 #include <libfdt.h>
49 
50 #define FDT_MAX_SIZE            0x00100000
51 
52 #define FW_FILE_NAME            "skiboot.lid"
53 #define FW_LOAD_ADDR            0x0
54 #define FW_MAX_SIZE             0x00400000
55 
56 #define KERNEL_LOAD_ADDR        0x20000000
57 #define INITRD_LOAD_ADDR        0x60000000
58 
59 static const char *pnv_chip_core_typename(const PnvChip *o)
60 {
61     const char *chip_type = object_class_get_name(object_get_class(OBJECT(o)));
62     int len = strlen(chip_type) - strlen(PNV_CHIP_TYPE_SUFFIX);
63     char *s = g_strdup_printf(PNV_CORE_TYPE_NAME("%.*s"), len, chip_type);
64     const char *core_type = object_class_get_name(object_class_by_name(s));
65     g_free(s);
66     return core_type;
67 }
68 
69 /*
70  * On Power Systems E880 (POWER8), the max cpus (threads) should be :
71  *     4 * 4 sockets * 12 cores * 8 threads = 1536
72  * Let's make it 2^11
73  */
74 #define MAX_CPUS                2048
75 
76 /*
77  * Memory nodes are created by hostboot, one for each range of memory
78  * that has a different "affinity". In practice, it means one range
79  * per chip.
80  */
81 static void pnv_dt_memory(void *fdt, int chip_id, hwaddr start, hwaddr size)
82 {
83     char *mem_name;
84     uint64_t mem_reg_property[2];
85     int off;
86 
87     mem_reg_property[0] = cpu_to_be64(start);
88     mem_reg_property[1] = cpu_to_be64(size);
89 
90     mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
91     off = fdt_add_subnode(fdt, 0, mem_name);
92     g_free(mem_name);
93 
94     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
95     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
96                        sizeof(mem_reg_property))));
97     _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
98 }
99 
100 static int get_cpus_node(void *fdt)
101 {
102     int cpus_offset = fdt_path_offset(fdt, "/cpus");
103 
104     if (cpus_offset < 0) {
105         cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
106         if (cpus_offset) {
107             _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
108             _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
109         }
110     }
111     _FDT(cpus_offset);
112     return cpus_offset;
113 }
114 
115 /*
116  * The PowerNV cores (and threads) need to use real HW ids and not an
117  * incremental index like it has been done on other platforms. This HW
118  * id is stored in the CPU PIR, it is used to create cpu nodes in the
119  * device tree, used in XSCOM to address cores and in interrupt
120  * servers.
121  */
122 static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
123 {
124     CPUState *cs = CPU(DEVICE(pc->threads));
125     DeviceClass *dc = DEVICE_GET_CLASS(cs);
126     PowerPCCPU *cpu = POWERPC_CPU(cs);
127     int smt_threads = CPU_CORE(pc)->nr_threads;
128     CPUPPCState *env = &cpu->env;
129     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
130     uint32_t servers_prop[smt_threads];
131     int i;
132     uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
133                        0xffffffff, 0xffffffff};
134     uint32_t tbfreq = PNV_TIMEBASE_FREQ;
135     uint32_t cpufreq = 1000000000;
136     uint32_t page_sizes_prop[64];
137     size_t page_sizes_prop_size;
138     const uint8_t pa_features[] = { 24, 0,
139                                     0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
140                                     0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
141                                     0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
142                                     0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
143     int offset;
144     char *nodename;
145     int cpus_offset = get_cpus_node(fdt);
146 
147     nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
148     offset = fdt_add_subnode(fdt, cpus_offset, nodename);
149     _FDT(offset);
150     g_free(nodename);
151 
152     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
153 
154     _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
155     _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
156     _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
157 
158     _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
159     _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
160                             env->dcache_line_size)));
161     _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
162                             env->dcache_line_size)));
163     _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
164                             env->icache_line_size)));
165     _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
166                             env->icache_line_size)));
167 
168     if (pcc->l1_dcache_size) {
169         _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
170                                pcc->l1_dcache_size)));
171     } else {
172         warn_report("Unknown L1 dcache size for cpu");
173     }
174     if (pcc->l1_icache_size) {
175         _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
176                                pcc->l1_icache_size)));
177     } else {
178         warn_report("Unknown L1 icache size for cpu");
179     }
180 
181     _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
182     _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
183     _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size)));
184     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
185     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
186 
187     if (env->spr_cb[SPR_PURR].oea_read) {
188         _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
189     }
190 
191     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
192         _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
193                            segs, sizeof(segs))));
194     }
195 
196     /* Advertise VMX/VSX (vector extensions) if available
197      *   0 / no property == no vector extensions
198      *   1               == VMX / Altivec available
199      *   2               == VSX available */
200     if (env->insns_flags & PPC_ALTIVEC) {
201         uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
202 
203         _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
204     }
205 
206     /* Advertise DFP (Decimal Floating Point) if available
207      *   0 / no property == no DFP
208      *   1               == DFP available */
209     if (env->insns_flags2 & PPC2_DFP) {
210         _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
211     }
212 
213     page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
214                                                       sizeof(page_sizes_prop));
215     if (page_sizes_prop_size) {
216         _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
217                            page_sizes_prop, page_sizes_prop_size)));
218     }
219 
220     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
221                        pa_features, sizeof(pa_features))));
222 
223     /* Build interrupt servers properties */
224     for (i = 0; i < smt_threads; i++) {
225         servers_prop[i] = cpu_to_be32(pc->pir + i);
226     }
227     _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
228                        servers_prop, sizeof(servers_prop))));
229 }
230 
231 static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,
232                        uint32_t nr_threads)
233 {
234     uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
235     char *name;
236     const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
237     uint32_t irange[2], i, rsize;
238     uint64_t *reg;
239     int offset;
240 
241     irange[0] = cpu_to_be32(pir);
242     irange[1] = cpu_to_be32(nr_threads);
243 
244     rsize = sizeof(uint64_t) * 2 * nr_threads;
245     reg = g_malloc(rsize);
246     for (i = 0; i < nr_threads; i++) {
247         reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
248         reg[i * 2 + 1] = cpu_to_be64(0x1000);
249     }
250 
251     name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
252     offset = fdt_add_subnode(fdt, 0, name);
253     _FDT(offset);
254     g_free(name);
255 
256     _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
257     _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
258     _FDT((fdt_setprop_string(fdt, offset, "device_type",
259                               "PowerPC-External-Interrupt-Presentation")));
260     _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
261     _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
262                        irange, sizeof(irange))));
263     _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
264     _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
265     g_free(reg);
266 }
267 
268 static int pnv_chip_lpc_offset(PnvChip *chip, void *fdt)
269 {
270     char *name;
271     int offset;
272 
273     name = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x",
274                            (uint64_t) PNV_XSCOM_BASE(chip), PNV_XSCOM_LPC_BASE);
275     offset = fdt_path_offset(fdt, name);
276     g_free(name);
277     return offset;
278 }
279 
280 static void pnv_dt_chip(PnvChip *chip, void *fdt)
281 {
282     const char *typename = pnv_chip_core_typename(chip);
283     size_t typesize = object_type_get_instance_size(typename);
284     int i;
285 
286     pnv_dt_xscom(chip, fdt, 0);
287 
288     /* The default LPC bus of a multichip system is on chip 0. It's
289      * recognized by the firmware (skiboot) using a "primary"
290      * property.
291      */
292     if (chip->chip_id == 0x0) {
293         int lpc_offset = pnv_chip_lpc_offset(chip, fdt);
294 
295         _FDT((fdt_setprop(fdt, lpc_offset, "primary", NULL, 0)));
296     }
297 
298     for (i = 0; i < chip->nr_cores; i++) {
299         PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
300 
301         pnv_dt_core(chip, pnv_core, fdt);
302 
303         /* Interrupt Control Presenters (ICP). One per core. */
304         pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
305     }
306 
307     if (chip->ram_size) {
308         pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
309     }
310 }
311 
312 static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off)
313 {
314     uint32_t io_base = d->ioport_id;
315     uint32_t io_regs[] = {
316         cpu_to_be32(1),
317         cpu_to_be32(io_base),
318         cpu_to_be32(2)
319     };
320     char *name;
321     int node;
322 
323     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
324     node = fdt_add_subnode(fdt, lpc_off, name);
325     _FDT(node);
326     g_free(name);
327 
328     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
329     _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00")));
330 }
331 
332 static void pnv_dt_serial(ISADevice *d, void *fdt, int lpc_off)
333 {
334     const char compatible[] = "ns16550\0pnpPNP,501";
335     uint32_t io_base = d->ioport_id;
336     uint32_t io_regs[] = {
337         cpu_to_be32(1),
338         cpu_to_be32(io_base),
339         cpu_to_be32(8)
340     };
341     char *name;
342     int node;
343 
344     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
345     node = fdt_add_subnode(fdt, lpc_off, name);
346     _FDT(node);
347     g_free(name);
348 
349     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
350     _FDT((fdt_setprop(fdt, node, "compatible", compatible,
351                       sizeof(compatible))));
352 
353     _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200)));
354     _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200)));
355     _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0])));
356     _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
357                            fdt_get_phandle(fdt, lpc_off))));
358 
359     /* This is needed by Linux */
360     _FDT((fdt_setprop_string(fdt, node, "device_type", "serial")));
361 }
362 
363 static void pnv_dt_ipmi_bt(ISADevice *d, void *fdt, int lpc_off)
364 {
365     const char compatible[] = "bt\0ipmi-bt";
366     uint32_t io_base;
367     uint32_t io_regs[] = {
368         cpu_to_be32(1),
369         0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */
370         cpu_to_be32(3)
371     };
372     uint32_t irq;
373     char *name;
374     int node;
375 
376     io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal);
377     io_regs[1] = cpu_to_be32(io_base);
378 
379     irq = object_property_get_int(OBJECT(d), "irq", &error_fatal);
380 
381     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
382     node = fdt_add_subnode(fdt, lpc_off, name);
383     _FDT(node);
384     g_free(name);
385 
386     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
387     _FDT((fdt_setprop(fdt, node, "compatible", compatible,
388                       sizeof(compatible))));
389 
390     /* Mark it as reserved to avoid Linux trying to claim it */
391     _FDT((fdt_setprop_string(fdt, node, "status", "reserved")));
392     _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq)));
393     _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
394                            fdt_get_phandle(fdt, lpc_off))));
395 }
396 
397 typedef struct ForeachPopulateArgs {
398     void *fdt;
399     int offset;
400 } ForeachPopulateArgs;
401 
402 static int pnv_dt_isa_device(DeviceState *dev, void *opaque)
403 {
404     ForeachPopulateArgs *args = opaque;
405     ISADevice *d = ISA_DEVICE(dev);
406 
407     if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
408         pnv_dt_rtc(d, args->fdt, args->offset);
409     } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) {
410         pnv_dt_serial(d, args->fdt, args->offset);
411     } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) {
412         pnv_dt_ipmi_bt(d, args->fdt, args->offset);
413     } else {
414         error_report("unknown isa device %s@i%x", qdev_fw_name(dev),
415                      d->ioport_id);
416     }
417 
418     return 0;
419 }
420 
421 static void pnv_dt_isa(ISABus *bus, void *fdt, int lpc_offset)
422 {
423     ForeachPopulateArgs args = {
424         .fdt = fdt,
425         .offset = lpc_offset,
426     };
427 
428     /* ISA devices are not necessarily parented to the ISA bus so we
429      * can not use object_child_foreach() */
430     qbus_walk_children(BUS(bus), pnv_dt_isa_device, NULL, NULL, NULL, &args);
431 }
432 
433 static void *pnv_dt_create(MachineState *machine)
434 {
435     const char plat_compat[] = "qemu,powernv\0ibm,powernv";
436     PnvMachineState *pnv = PNV_MACHINE(machine);
437     void *fdt;
438     char *buf;
439     int off;
440     int i;
441     int lpc_offset;
442 
443     fdt = g_malloc0(FDT_MAX_SIZE);
444     _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
445 
446     /* Root node */
447     _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
448     _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
449     _FDT((fdt_setprop_string(fdt, 0, "model",
450                              "IBM PowerNV (emulated by qemu)")));
451     _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
452                       sizeof(plat_compat))));
453 
454     buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
455     _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
456     if (qemu_uuid_set) {
457         _FDT((fdt_property_string(fdt, "system-id", buf)));
458     }
459     g_free(buf);
460 
461     off = fdt_add_subnode(fdt, 0, "chosen");
462     if (machine->kernel_cmdline) {
463         _FDT((fdt_setprop_string(fdt, off, "bootargs",
464                                  machine->kernel_cmdline)));
465     }
466 
467     if (pnv->initrd_size) {
468         uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
469         uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
470 
471         _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
472                                &start_prop, sizeof(start_prop))));
473         _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
474                                &end_prop, sizeof(end_prop))));
475     }
476 
477     /* Populate device tree for each chip */
478     for (i = 0; i < pnv->num_chips; i++) {
479         pnv_dt_chip(pnv->chips[i], fdt);
480     }
481 
482     /* Populate ISA devices on chip 0 */
483     lpc_offset = pnv_chip_lpc_offset(pnv->chips[0], fdt);
484     pnv_dt_isa(pnv->isa_bus, fdt, lpc_offset);
485 
486     if (pnv->bmc) {
487         pnv_dt_bmc_sensors(pnv->bmc, fdt);
488     }
489 
490     return fdt;
491 }
492 
493 static void pnv_powerdown_notify(Notifier *n, void *opaque)
494 {
495     PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
496 
497     if (pnv->bmc) {
498         pnv_bmc_powerdown(pnv->bmc);
499     }
500 }
501 
502 static void pnv_reset(void)
503 {
504     MachineState *machine = MACHINE(qdev_get_machine());
505     PnvMachineState *pnv = PNV_MACHINE(machine);
506     void *fdt;
507     Object *obj;
508 
509     qemu_devices_reset();
510 
511     /* OpenPOWER systems have a BMC, which can be defined on the
512      * command line with:
513      *
514      *   -device ipmi-bmc-sim,id=bmc0
515      *
516      * This is the internal simulator but it could also be an external
517      * BMC.
518      */
519     obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL);
520     if (obj) {
521         pnv->bmc = IPMI_BMC(obj);
522     }
523 
524     fdt = pnv_dt_create(machine);
525 
526     /* Pack resulting tree */
527     _FDT((fdt_pack(fdt)));
528 
529     cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
530 }
531 
532 static ISABus *pnv_isa_create(PnvChip *chip)
533 {
534     PnvLpcController *lpc = &chip->lpc;
535     ISABus *isa_bus;
536     qemu_irq *irqs;
537     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
538 
539     /* let isa_bus_new() create its own bridge on SysBus otherwise
540      * devices speficied on the command line won't find the bus and
541      * will fail to create.
542      */
543     isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io,
544                           &error_fatal);
545 
546     irqs = pnv_lpc_isa_irq_create(lpc, pcc->chip_type, ISA_NUM_IRQS);
547 
548     isa_bus_irqs(isa_bus, irqs);
549     return isa_bus;
550 }
551 
552 static void pnv_init(MachineState *machine)
553 {
554     PnvMachineState *pnv = PNV_MACHINE(machine);
555     MemoryRegion *ram;
556     char *fw_filename;
557     long fw_size;
558     int i;
559     char *chip_typename;
560 
561     /* allocate RAM */
562     if (machine->ram_size < (1 * G_BYTE)) {
563         warn_report("skiboot may not work with < 1GB of RAM");
564     }
565 
566     ram = g_new(MemoryRegion, 1);
567     memory_region_allocate_system_memory(ram, NULL, "pnv.ram",
568                                          machine->ram_size);
569     memory_region_add_subregion(get_system_memory(), 0, ram);
570 
571     /* load skiboot firmware  */
572     if (bios_name == NULL) {
573         bios_name = FW_FILE_NAME;
574     }
575 
576     fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
577     if (!fw_filename) {
578         error_report("Could not find OPAL firmware '%s'", bios_name);
579         exit(1);
580     }
581 
582     fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
583     if (fw_size < 0) {
584         error_report("Could not load OPAL firmware '%s'", fw_filename);
585         exit(1);
586     }
587     g_free(fw_filename);
588 
589     /* load kernel */
590     if (machine->kernel_filename) {
591         long kernel_size;
592 
593         kernel_size = load_image_targphys(machine->kernel_filename,
594                                           KERNEL_LOAD_ADDR, 0x2000000);
595         if (kernel_size < 0) {
596             error_report("Could not load kernel '%s'",
597                          machine->kernel_filename);
598             exit(1);
599         }
600     }
601 
602     /* load initrd */
603     if (machine->initrd_filename) {
604         pnv->initrd_base = INITRD_LOAD_ADDR;
605         pnv->initrd_size = load_image_targphys(machine->initrd_filename,
606                                   pnv->initrd_base, 0x10000000); /* 128MB max */
607         if (pnv->initrd_size < 0) {
608             error_report("Could not load initial ram disk '%s'",
609                          machine->initrd_filename);
610             exit(1);
611         }
612     }
613 
614     /* Create the processor chips */
615     i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
616     chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"),
617                                     i, machine->cpu_type);
618     if (!object_class_by_name(chip_typename)) {
619         error_report("invalid CPU model '%.*s' for %s machine",
620                      i, machine->cpu_type, MACHINE_GET_CLASS(machine)->name);
621         exit(1);
622     }
623 
624     pnv->chips = g_new0(PnvChip *, pnv->num_chips);
625     for (i = 0; i < pnv->num_chips; i++) {
626         char chip_name[32];
627         Object *chip = object_new(chip_typename);
628 
629         pnv->chips[i] = PNV_CHIP(chip);
630 
631         /* TODO: put all the memory in one node on chip 0 until we find a
632          * way to specify different ranges for each chip
633          */
634         if (i == 0) {
635             object_property_set_int(chip, machine->ram_size, "ram-size",
636                                     &error_fatal);
637         }
638 
639         snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i));
640         object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
641         object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
642                                 &error_fatal);
643         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
644         object_property_set_bool(chip, true, "realized", &error_fatal);
645     }
646     g_free(chip_typename);
647 
648     /* Instantiate ISA bus on chip 0 */
649     pnv->isa_bus = pnv_isa_create(pnv->chips[0]);
650 
651     /* Create serial port */
652     serial_hds_isa_init(pnv->isa_bus, 0, MAX_ISA_SERIAL_PORTS);
653 
654     /* Create an RTC ISA device too */
655     mc146818_rtc_init(pnv->isa_bus, 2000, NULL);
656 
657     /* OpenPOWER systems use a IPMI SEL Event message to notify the
658      * host to powerdown */
659     pnv->powerdown_notifier.notify = pnv_powerdown_notify;
660     qemu_register_powerdown_notifier(&pnv->powerdown_notifier);
661 }
662 
663 /*
664  *    0:21  Reserved - Read as zeros
665  *   22:24  Chip ID
666  *   25:28  Core number
667  *   29:31  Thread ID
668  */
669 static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
670 {
671     return (chip->chip_id << 7) | (core_id << 3);
672 }
673 
674 /*
675  *    0:48  Reserved - Read as zeroes
676  *   49:52  Node ID
677  *   53:55  Chip ID
678  *   56     Reserved - Read as zero
679  *   57:61  Core number
680  *   62:63  Thread ID
681  *
682  * We only care about the lower bits. uint32_t is fine for the moment.
683  */
684 static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
685 {
686     return (chip->chip_id << 8) | (core_id << 2);
687 }
688 
689 /* Allowed core identifiers on a POWER8 Processor Chip :
690  *
691  * <EX0 reserved>
692  *  EX1  - Venice only
693  *  EX2  - Venice only
694  *  EX3  - Venice only
695  *  EX4
696  *  EX5
697  *  EX6
698  * <EX7,8 reserved> <reserved>
699  *  EX9  - Venice only
700  *  EX10 - Venice only
701  *  EX11 - Venice only
702  *  EX12
703  *  EX13
704  *  EX14
705  * <EX15 reserved>
706  */
707 #define POWER8E_CORE_MASK  (0x7070ull)
708 #define POWER8_CORE_MASK   (0x7e7eull)
709 
710 /*
711  * POWER9 has 24 cores, ids starting at 0x0
712  */
713 #define POWER9_CORE_MASK   (0xffffffffffffffull)
714 
715 static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
716 {
717     DeviceClass *dc = DEVICE_CLASS(klass);
718     PnvChipClass *k = PNV_CHIP_CLASS(klass);
719 
720     k->chip_type = PNV_CHIP_POWER8E;
721     k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
722     k->cores_mask = POWER8E_CORE_MASK;
723     k->core_pir = pnv_chip_core_pir_p8;
724     k->xscom_base = 0x003fc0000000000ull;
725     dc->desc = "PowerNV Chip POWER8E";
726 }
727 
728 static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
729 {
730     DeviceClass *dc = DEVICE_CLASS(klass);
731     PnvChipClass *k = PNV_CHIP_CLASS(klass);
732 
733     k->chip_type = PNV_CHIP_POWER8;
734     k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
735     k->cores_mask = POWER8_CORE_MASK;
736     k->core_pir = pnv_chip_core_pir_p8;
737     k->xscom_base = 0x003fc0000000000ull;
738     dc->desc = "PowerNV Chip POWER8";
739 }
740 
741 static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
742 {
743     DeviceClass *dc = DEVICE_CLASS(klass);
744     PnvChipClass *k = PNV_CHIP_CLASS(klass);
745 
746     k->chip_type = PNV_CHIP_POWER8NVL;
747     k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
748     k->cores_mask = POWER8_CORE_MASK;
749     k->core_pir = pnv_chip_core_pir_p8;
750     k->xscom_base = 0x003fc0000000000ull;
751     dc->desc = "PowerNV Chip POWER8NVL";
752 }
753 
754 static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
755 {
756     DeviceClass *dc = DEVICE_CLASS(klass);
757     PnvChipClass *k = PNV_CHIP_CLASS(klass);
758 
759     k->chip_type = PNV_CHIP_POWER9;
760     k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */
761     k->cores_mask = POWER9_CORE_MASK;
762     k->core_pir = pnv_chip_core_pir_p9;
763     k->xscom_base = 0x00603fc00000000ull;
764     dc->desc = "PowerNV Chip POWER9";
765 }
766 
767 static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
768 {
769     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
770     int cores_max;
771 
772     /*
773      * No custom mask for this chip, let's use the default one from *
774      * the chip class
775      */
776     if (!chip->cores_mask) {
777         chip->cores_mask = pcc->cores_mask;
778     }
779 
780     /* filter alien core ids ! some are reserved */
781     if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
782         error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
783                    chip->cores_mask);
784         return;
785     }
786     chip->cores_mask &= pcc->cores_mask;
787 
788     /* now that we have a sane layout, let check the number of cores */
789     cores_max = ctpop64(chip->cores_mask);
790     if (chip->nr_cores > cores_max) {
791         error_setg(errp, "warning: too many cores for chip ! Limit is %d",
792                    cores_max);
793         return;
794     }
795 }
796 
797 static void pnv_chip_init(Object *obj)
798 {
799     PnvChip *chip = PNV_CHIP(obj);
800     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
801 
802     chip->xscom_base = pcc->xscom_base;
803 
804     object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC);
805     object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL);
806 
807     object_initialize(&chip->psi, sizeof(chip->psi), TYPE_PNV_PSI);
808     object_property_add_child(obj, "psi", OBJECT(&chip->psi), NULL);
809     object_property_add_const_link(OBJECT(&chip->psi), "xics",
810                                    OBJECT(qdev_get_machine()), &error_abort);
811 
812     object_initialize(&chip->occ, sizeof(chip->occ), TYPE_PNV_OCC);
813     object_property_add_child(obj, "occ", OBJECT(&chip->occ), NULL);
814     object_property_add_const_link(OBJECT(&chip->occ), "psi",
815                                    OBJECT(&chip->psi), &error_abort);
816 
817     /* The LPC controller needs PSI to generate interrupts */
818     object_property_add_const_link(OBJECT(&chip->lpc), "psi",
819                                    OBJECT(&chip->psi), &error_abort);
820 }
821 
822 static void pnv_chip_icp_realize(PnvChip *chip, Error **errp)
823 {
824     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
825     const char *typename = pnv_chip_core_typename(chip);
826     size_t typesize = object_type_get_instance_size(typename);
827     int i, j;
828     char *name;
829     XICSFabric *xi = XICS_FABRIC(qdev_get_machine());
830 
831     name = g_strdup_printf("icp-%x", chip->chip_id);
832     memory_region_init(&chip->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE);
833     sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip->icp_mmio);
834     g_free(name);
835 
836     sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip));
837 
838     /* Map the ICP registers for each thread */
839     for (i = 0; i < chip->nr_cores; i++) {
840         PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
841         int core_hwid = CPU_CORE(pnv_core)->core_id;
842 
843         for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
844             uint32_t pir = pcc->core_pir(chip, core_hwid) + j;
845             PnvICPState *icp = PNV_ICP(xics_icp_get(xi, pir));
846 
847             memory_region_add_subregion(&chip->icp_mmio, pir << 12, &icp->mmio);
848         }
849     }
850 }
851 
852 static void pnv_chip_realize(DeviceState *dev, Error **errp)
853 {
854     PnvChip *chip = PNV_CHIP(dev);
855     Error *error = NULL;
856     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
857     const char *typename = pnv_chip_core_typename(chip);
858     size_t typesize = object_type_get_instance_size(typename);
859     int i, core_hwid;
860 
861     if (!object_class_by_name(typename)) {
862         error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
863         return;
864     }
865 
866     /* XSCOM bridge */
867     pnv_xscom_realize(chip, &error);
868     if (error) {
869         error_propagate(errp, error);
870         return;
871     }
872     sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
873 
874     /* Cores */
875     pnv_chip_core_sanitize(chip, &error);
876     if (error) {
877         error_propagate(errp, error);
878         return;
879     }
880 
881     chip->cores = g_malloc0(typesize * chip->nr_cores);
882 
883     for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
884              && (i < chip->nr_cores); core_hwid++) {
885         char core_name[32];
886         void *pnv_core = chip->cores + i * typesize;
887         uint64_t xscom_core_base;
888 
889         if (!(chip->cores_mask & (1ull << core_hwid))) {
890             continue;
891         }
892 
893         object_initialize(pnv_core, typesize, typename);
894         snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
895         object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core),
896                                   &error_fatal);
897         object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads",
898                                 &error_fatal);
899         object_property_set_int(OBJECT(pnv_core), core_hwid,
900                                 CPU_CORE_PROP_CORE_ID, &error_fatal);
901         object_property_set_int(OBJECT(pnv_core),
902                                 pcc->core_pir(chip, core_hwid),
903                                 "pir", &error_fatal);
904         object_property_add_const_link(OBJECT(pnv_core), "xics",
905                                        qdev_get_machine(), &error_fatal);
906         object_property_set_bool(OBJECT(pnv_core), true, "realized",
907                                  &error_fatal);
908         object_unref(OBJECT(pnv_core));
909 
910         /* Each core has an XSCOM MMIO region */
911         if (!pnv_chip_is_power9(chip)) {
912             xscom_core_base = PNV_XSCOM_EX_BASE(core_hwid);
913         } else {
914             xscom_core_base = PNV_XSCOM_P9_EC_BASE(core_hwid);
915         }
916 
917         pnv_xscom_add_subregion(chip, xscom_core_base,
918                                 &PNV_CORE(pnv_core)->xscom_regs);
919         i++;
920     }
921 
922     /* Create LPC controller */
923     object_property_set_bool(OBJECT(&chip->lpc), true, "realized",
924                              &error_fatal);
925     pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs);
926 
927     /* Interrupt Management Area. This is the memory region holding
928      * all the Interrupt Control Presenter (ICP) registers */
929     pnv_chip_icp_realize(chip, &error);
930     if (error) {
931         error_propagate(errp, error);
932         return;
933     }
934 
935     /* Processor Service Interface (PSI) Host Bridge */
936     object_property_set_int(OBJECT(&chip->psi), PNV_PSIHB_BASE(chip),
937                             "bar", &error_fatal);
938     object_property_set_bool(OBJECT(&chip->psi), true, "realized", &error);
939     if (error) {
940         error_propagate(errp, error);
941         return;
942     }
943     pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, &chip->psi.xscom_regs);
944 
945     /* Create the simplified OCC model */
946     object_property_set_bool(OBJECT(&chip->occ), true, "realized", &error);
947     if (error) {
948         error_propagate(errp, error);
949         return;
950     }
951     pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip->occ.xscom_regs);
952 }
953 
954 static Property pnv_chip_properties[] = {
955     DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
956     DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
957     DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
958     DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
959     DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
960     DEFINE_PROP_END_OF_LIST(),
961 };
962 
963 static void pnv_chip_class_init(ObjectClass *klass, void *data)
964 {
965     DeviceClass *dc = DEVICE_CLASS(klass);
966 
967     set_bit(DEVICE_CATEGORY_CPU, dc->categories);
968     dc->realize = pnv_chip_realize;
969     dc->props = pnv_chip_properties;
970     dc->desc = "PowerNV Chip";
971 }
972 
973 static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
974 {
975     PnvMachineState *pnv = PNV_MACHINE(xi);
976     int i;
977 
978     for (i = 0; i < pnv->num_chips; i++) {
979         if (ics_valid_irq(&pnv->chips[i]->psi.ics, irq)) {
980             return &pnv->chips[i]->psi.ics;
981         }
982     }
983     return NULL;
984 }
985 
986 static void pnv_ics_resend(XICSFabric *xi)
987 {
988     PnvMachineState *pnv = PNV_MACHINE(xi);
989     int i;
990 
991     for (i = 0; i < pnv->num_chips; i++) {
992         ics_resend(&pnv->chips[i]->psi.ics);
993     }
994 }
995 
996 static PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
997 {
998     CPUState *cs;
999 
1000     CPU_FOREACH(cs) {
1001         PowerPCCPU *cpu = POWERPC_CPU(cs);
1002         CPUPPCState *env = &cpu->env;
1003 
1004         if (env->spr_cb[SPR_PIR].default_value == pir) {
1005             return cpu;
1006         }
1007     }
1008 
1009     return NULL;
1010 }
1011 
1012 static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
1013 {
1014     PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
1015 
1016     return cpu ? ICP(cpu->intc) : NULL;
1017 }
1018 
1019 static void pnv_pic_print_info(InterruptStatsProvider *obj,
1020                                Monitor *mon)
1021 {
1022     PnvMachineState *pnv = PNV_MACHINE(obj);
1023     int i;
1024     CPUState *cs;
1025 
1026     CPU_FOREACH(cs) {
1027         PowerPCCPU *cpu = POWERPC_CPU(cs);
1028 
1029         icp_pic_print_info(ICP(cpu->intc), mon);
1030     }
1031 
1032     for (i = 0; i < pnv->num_chips; i++) {
1033         ics_pic_print_info(&pnv->chips[i]->psi.ics, mon);
1034     }
1035 }
1036 
1037 static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
1038                               void *opaque, Error **errp)
1039 {
1040     visit_type_uint32(v, name, &PNV_MACHINE(obj)->num_chips, errp);
1041 }
1042 
1043 static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
1044                               void *opaque, Error **errp)
1045 {
1046     PnvMachineState *pnv = PNV_MACHINE(obj);
1047     uint32_t num_chips;
1048     Error *local_err = NULL;
1049 
1050     visit_type_uint32(v, name, &num_chips, &local_err);
1051     if (local_err) {
1052         error_propagate(errp, local_err);
1053         return;
1054     }
1055 
1056     /*
1057      * TODO: should we decide on how many chips we can create based
1058      * on #cores and Venice vs. Murano vs. Naples chip type etc...,
1059      */
1060     if (!is_power_of_2(num_chips) || num_chips > 4) {
1061         error_setg(errp, "invalid number of chips: '%d'", num_chips);
1062         return;
1063     }
1064 
1065     pnv->num_chips = num_chips;
1066 }
1067 
1068 static void pnv_machine_initfn(Object *obj)
1069 {
1070     PnvMachineState *pnv = PNV_MACHINE(obj);
1071     pnv->num_chips = 1;
1072 }
1073 
1074 static void pnv_machine_class_props_init(ObjectClass *oc)
1075 {
1076     object_class_property_add(oc, "num-chips", "uint32",
1077                               pnv_get_num_chips, pnv_set_num_chips,
1078                               NULL, NULL, NULL);
1079     object_class_property_set_description(oc, "num-chips",
1080                               "Specifies the number of processor chips",
1081                               NULL);
1082 }
1083 
1084 static void pnv_machine_class_init(ObjectClass *oc, void *data)
1085 {
1086     MachineClass *mc = MACHINE_CLASS(oc);
1087     XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
1088     InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
1089 
1090     mc->desc = "IBM PowerNV (Non-Virtualized)";
1091     mc->init = pnv_init;
1092     mc->reset = pnv_reset;
1093     mc->max_cpus = MAX_CPUS;
1094     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
1095     mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for
1096                                       * storage */
1097     mc->no_parallel = 1;
1098     mc->default_boot_order = NULL;
1099     mc->default_ram_size = 1 * G_BYTE;
1100     xic->icp_get = pnv_icp_get;
1101     xic->ics_get = pnv_ics_get;
1102     xic->ics_resend = pnv_ics_resend;
1103     ispc->print_info = pnv_pic_print_info;
1104 
1105     pnv_machine_class_props_init(oc);
1106 }
1107 
1108 #define DEFINE_PNV_CHIP_TYPE(type, class_initfn) \
1109     {                                            \
1110         .name          = type,                   \
1111         .class_init    = class_initfn,           \
1112         .parent        = TYPE_PNV_CHIP,          \
1113     }
1114 
1115 static const TypeInfo types[] = {
1116     {
1117         .name          = TYPE_PNV_MACHINE,
1118         .parent        = TYPE_MACHINE,
1119         .instance_size = sizeof(PnvMachineState),
1120         .instance_init = pnv_machine_initfn,
1121         .class_init    = pnv_machine_class_init,
1122         .interfaces = (InterfaceInfo[]) {
1123             { TYPE_XICS_FABRIC },
1124             { TYPE_INTERRUPT_STATS_PROVIDER },
1125             { },
1126         },
1127     },
1128     {
1129         .name          = TYPE_PNV_CHIP,
1130         .parent        = TYPE_SYS_BUS_DEVICE,
1131         .class_init    = pnv_chip_class_init,
1132         .instance_init = pnv_chip_init,
1133         .instance_size = sizeof(PnvChip),
1134         .class_size    = sizeof(PnvChipClass),
1135         .abstract      = true,
1136     },
1137     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER9, pnv_chip_power9_class_init),
1138     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER8, pnv_chip_power8_class_init),
1139     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER8E, pnv_chip_power8e_class_init),
1140     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER8NVL,
1141                          pnv_chip_power8nvl_class_init),
1142 };
1143 
1144 DEFINE_TYPES(types)
1145