xref: /openbmc/qemu/hw/ppc/pnv.c (revision 4fe6d78b)
1 /*
2  * QEMU PowerPC PowerNV machine model
3  *
4  * Copyright (c) 2016, IBM Corporation.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qapi/error.h"
22 #include "sysemu/sysemu.h"
23 #include "sysemu/numa.h"
24 #include "sysemu/cpus.h"
25 #include "hw/hw.h"
26 #include "target/ppc/cpu.h"
27 #include "qemu/log.h"
28 #include "hw/ppc/fdt.h"
29 #include "hw/ppc/ppc.h"
30 #include "hw/ppc/pnv.h"
31 #include "hw/ppc/pnv_core.h"
32 #include "hw/loader.h"
33 #include "exec/address-spaces.h"
34 #include "qemu/cutils.h"
35 #include "qapi/visitor.h"
36 #include "monitor/monitor.h"
37 #include "hw/intc/intc.h"
38 #include "hw/ipmi/ipmi.h"
39 
40 #include "hw/ppc/xics.h"
41 #include "hw/ppc/pnv_xscom.h"
42 
43 #include "hw/isa/isa.h"
44 #include "hw/char/serial.h"
45 #include "hw/timer/mc146818rtc.h"
46 
47 #include <libfdt.h>
48 
49 #define FDT_MAX_SIZE            0x00100000
50 
51 #define FW_FILE_NAME            "skiboot.lid"
52 #define FW_LOAD_ADDR            0x0
53 #define FW_MAX_SIZE             0x00400000
54 
55 #define KERNEL_LOAD_ADDR        0x20000000
56 #define INITRD_LOAD_ADDR        0x60000000
57 
58 static const char *pnv_chip_core_typename(const PnvChip *o)
59 {
60     const char *chip_type = object_class_get_name(object_get_class(OBJECT(o)));
61     int len = strlen(chip_type) - strlen(PNV_CHIP_TYPE_SUFFIX);
62     char *s = g_strdup_printf(PNV_CORE_TYPE_NAME("%.*s"), len, chip_type);
63     const char *core_type = object_class_get_name(object_class_by_name(s));
64     g_free(s);
65     return core_type;
66 }
67 
68 /*
69  * On Power Systems E880 (POWER8), the max cpus (threads) should be :
70  *     4 * 4 sockets * 12 cores * 8 threads = 1536
71  * Let's make it 2^11
72  */
73 #define MAX_CPUS                2048
74 
75 /*
76  * Memory nodes are created by hostboot, one for each range of memory
77  * that has a different "affinity". In practice, it means one range
78  * per chip.
79  */
80 static void pnv_dt_memory(void *fdt, int chip_id, hwaddr start, hwaddr size)
81 {
82     char *mem_name;
83     uint64_t mem_reg_property[2];
84     int off;
85 
86     mem_reg_property[0] = cpu_to_be64(start);
87     mem_reg_property[1] = cpu_to_be64(size);
88 
89     mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
90     off = fdt_add_subnode(fdt, 0, mem_name);
91     g_free(mem_name);
92 
93     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
94     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
95                        sizeof(mem_reg_property))));
96     _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
97 }
98 
99 static int get_cpus_node(void *fdt)
100 {
101     int cpus_offset = fdt_path_offset(fdt, "/cpus");
102 
103     if (cpus_offset < 0) {
104         cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
105         if (cpus_offset) {
106             _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
107             _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
108         }
109     }
110     _FDT(cpus_offset);
111     return cpus_offset;
112 }
113 
114 /*
115  * The PowerNV cores (and threads) need to use real HW ids and not an
116  * incremental index like it has been done on other platforms. This HW
117  * id is stored in the CPU PIR, it is used to create cpu nodes in the
118  * device tree, used in XSCOM to address cores and in interrupt
119  * servers.
120  */
121 static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
122 {
123     CPUState *cs = CPU(DEVICE(pc->threads));
124     DeviceClass *dc = DEVICE_GET_CLASS(cs);
125     PowerPCCPU *cpu = POWERPC_CPU(cs);
126     int smt_threads = CPU_CORE(pc)->nr_threads;
127     CPUPPCState *env = &cpu->env;
128     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
129     uint32_t servers_prop[smt_threads];
130     int i;
131     uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
132                        0xffffffff, 0xffffffff};
133     uint32_t tbfreq = PNV_TIMEBASE_FREQ;
134     uint32_t cpufreq = 1000000000;
135     uint32_t page_sizes_prop[64];
136     size_t page_sizes_prop_size;
137     const uint8_t pa_features[] = { 24, 0,
138                                     0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
139                                     0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
140                                     0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
141                                     0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
142     int offset;
143     char *nodename;
144     int cpus_offset = get_cpus_node(fdt);
145 
146     nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
147     offset = fdt_add_subnode(fdt, cpus_offset, nodename);
148     _FDT(offset);
149     g_free(nodename);
150 
151     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
152 
153     _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
154     _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
155     _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
156 
157     _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
158     _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
159                             env->dcache_line_size)));
160     _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
161                             env->dcache_line_size)));
162     _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
163                             env->icache_line_size)));
164     _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
165                             env->icache_line_size)));
166 
167     if (pcc->l1_dcache_size) {
168         _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
169                                pcc->l1_dcache_size)));
170     } else {
171         warn_report("Unknown L1 dcache size for cpu");
172     }
173     if (pcc->l1_icache_size) {
174         _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
175                                pcc->l1_icache_size)));
176     } else {
177         warn_report("Unknown L1 icache size for cpu");
178     }
179 
180     _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
181     _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
182     _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
183     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
184     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
185 
186     if (env->spr_cb[SPR_PURR].oea_read) {
187         _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
188     }
189 
190     if (env->mmu_model & POWERPC_MMU_1TSEG) {
191         _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
192                            segs, sizeof(segs))));
193     }
194 
195     /* Advertise VMX/VSX (vector extensions) if available
196      *   0 / no property == no vector extensions
197      *   1               == VMX / Altivec available
198      *   2               == VSX available */
199     if (env->insns_flags & PPC_ALTIVEC) {
200         uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
201 
202         _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
203     }
204 
205     /* Advertise DFP (Decimal Floating Point) if available
206      *   0 / no property == no DFP
207      *   1               == DFP available */
208     if (env->insns_flags2 & PPC2_DFP) {
209         _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
210     }
211 
212     page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
213                                                   sizeof(page_sizes_prop));
214     if (page_sizes_prop_size) {
215         _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
216                            page_sizes_prop, page_sizes_prop_size)));
217     }
218 
219     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
220                        pa_features, sizeof(pa_features))));
221 
222     /* Build interrupt servers properties */
223     for (i = 0; i < smt_threads; i++) {
224         servers_prop[i] = cpu_to_be32(pc->pir + i);
225     }
226     _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
227                        servers_prop, sizeof(servers_prop))));
228 }
229 
230 static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,
231                        uint32_t nr_threads)
232 {
233     uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
234     char *name;
235     const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
236     uint32_t irange[2], i, rsize;
237     uint64_t *reg;
238     int offset;
239 
240     irange[0] = cpu_to_be32(pir);
241     irange[1] = cpu_to_be32(nr_threads);
242 
243     rsize = sizeof(uint64_t) * 2 * nr_threads;
244     reg = g_malloc(rsize);
245     for (i = 0; i < nr_threads; i++) {
246         reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
247         reg[i * 2 + 1] = cpu_to_be64(0x1000);
248     }
249 
250     name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
251     offset = fdt_add_subnode(fdt, 0, name);
252     _FDT(offset);
253     g_free(name);
254 
255     _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
256     _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
257     _FDT((fdt_setprop_string(fdt, offset, "device_type",
258                               "PowerPC-External-Interrupt-Presentation")));
259     _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
260     _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
261                        irange, sizeof(irange))));
262     _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
263     _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
264     g_free(reg);
265 }
266 
267 static int pnv_chip_lpc_offset(PnvChip *chip, void *fdt)
268 {
269     char *name;
270     int offset;
271 
272     name = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x",
273                            (uint64_t) PNV_XSCOM_BASE(chip), PNV_XSCOM_LPC_BASE);
274     offset = fdt_path_offset(fdt, name);
275     g_free(name);
276     return offset;
277 }
278 
279 static void pnv_dt_chip(PnvChip *chip, void *fdt)
280 {
281     const char *typename = pnv_chip_core_typename(chip);
282     size_t typesize = object_type_get_instance_size(typename);
283     int i;
284 
285     pnv_dt_xscom(chip, fdt, 0);
286 
287     /* The default LPC bus of a multichip system is on chip 0. It's
288      * recognized by the firmware (skiboot) using a "primary"
289      * property.
290      */
291     if (chip->chip_id == 0x0) {
292         int lpc_offset = pnv_chip_lpc_offset(chip, fdt);
293 
294         _FDT((fdt_setprop(fdt, lpc_offset, "primary", NULL, 0)));
295     }
296 
297     for (i = 0; i < chip->nr_cores; i++) {
298         PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
299 
300         pnv_dt_core(chip, pnv_core, fdt);
301 
302         /* Interrupt Control Presenters (ICP). One per core. */
303         pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
304     }
305 
306     if (chip->ram_size) {
307         pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
308     }
309 }
310 
311 static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off)
312 {
313     uint32_t io_base = d->ioport_id;
314     uint32_t io_regs[] = {
315         cpu_to_be32(1),
316         cpu_to_be32(io_base),
317         cpu_to_be32(2)
318     };
319     char *name;
320     int node;
321 
322     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
323     node = fdt_add_subnode(fdt, lpc_off, name);
324     _FDT(node);
325     g_free(name);
326 
327     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
328     _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00")));
329 }
330 
331 static void pnv_dt_serial(ISADevice *d, void *fdt, int lpc_off)
332 {
333     const char compatible[] = "ns16550\0pnpPNP,501";
334     uint32_t io_base = d->ioport_id;
335     uint32_t io_regs[] = {
336         cpu_to_be32(1),
337         cpu_to_be32(io_base),
338         cpu_to_be32(8)
339     };
340     char *name;
341     int node;
342 
343     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
344     node = fdt_add_subnode(fdt, lpc_off, name);
345     _FDT(node);
346     g_free(name);
347 
348     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
349     _FDT((fdt_setprop(fdt, node, "compatible", compatible,
350                       sizeof(compatible))));
351 
352     _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200)));
353     _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200)));
354     _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0])));
355     _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
356                            fdt_get_phandle(fdt, lpc_off))));
357 
358     /* This is needed by Linux */
359     _FDT((fdt_setprop_string(fdt, node, "device_type", "serial")));
360 }
361 
362 static void pnv_dt_ipmi_bt(ISADevice *d, void *fdt, int lpc_off)
363 {
364     const char compatible[] = "bt\0ipmi-bt";
365     uint32_t io_base;
366     uint32_t io_regs[] = {
367         cpu_to_be32(1),
368         0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */
369         cpu_to_be32(3)
370     };
371     uint32_t irq;
372     char *name;
373     int node;
374 
375     io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal);
376     io_regs[1] = cpu_to_be32(io_base);
377 
378     irq = object_property_get_int(OBJECT(d), "irq", &error_fatal);
379 
380     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
381     node = fdt_add_subnode(fdt, lpc_off, name);
382     _FDT(node);
383     g_free(name);
384 
385     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
386     _FDT((fdt_setprop(fdt, node, "compatible", compatible,
387                       sizeof(compatible))));
388 
389     /* Mark it as reserved to avoid Linux trying to claim it */
390     _FDT((fdt_setprop_string(fdt, node, "status", "reserved")));
391     _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq)));
392     _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
393                            fdt_get_phandle(fdt, lpc_off))));
394 }
395 
396 typedef struct ForeachPopulateArgs {
397     void *fdt;
398     int offset;
399 } ForeachPopulateArgs;
400 
401 static int pnv_dt_isa_device(DeviceState *dev, void *opaque)
402 {
403     ForeachPopulateArgs *args = opaque;
404     ISADevice *d = ISA_DEVICE(dev);
405 
406     if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
407         pnv_dt_rtc(d, args->fdt, args->offset);
408     } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) {
409         pnv_dt_serial(d, args->fdt, args->offset);
410     } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) {
411         pnv_dt_ipmi_bt(d, args->fdt, args->offset);
412     } else {
413         error_report("unknown isa device %s@i%x", qdev_fw_name(dev),
414                      d->ioport_id);
415     }
416 
417     return 0;
418 }
419 
420 static void pnv_dt_isa(ISABus *bus, void *fdt, int lpc_offset)
421 {
422     ForeachPopulateArgs args = {
423         .fdt = fdt,
424         .offset = lpc_offset,
425     };
426 
427     /* ISA devices are not necessarily parented to the ISA bus so we
428      * can not use object_child_foreach() */
429     qbus_walk_children(BUS(bus), pnv_dt_isa_device, NULL, NULL, NULL, &args);
430 }
431 
432 static void *pnv_dt_create(MachineState *machine)
433 {
434     const char plat_compat[] = "qemu,powernv\0ibm,powernv";
435     PnvMachineState *pnv = PNV_MACHINE(machine);
436     void *fdt;
437     char *buf;
438     int off;
439     int i;
440     int lpc_offset;
441 
442     fdt = g_malloc0(FDT_MAX_SIZE);
443     _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
444 
445     /* Root node */
446     _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
447     _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
448     _FDT((fdt_setprop_string(fdt, 0, "model",
449                              "IBM PowerNV (emulated by qemu)")));
450     _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
451                       sizeof(plat_compat))));
452 
453     buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
454     _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
455     if (qemu_uuid_set) {
456         _FDT((fdt_property_string(fdt, "system-id", buf)));
457     }
458     g_free(buf);
459 
460     off = fdt_add_subnode(fdt, 0, "chosen");
461     if (machine->kernel_cmdline) {
462         _FDT((fdt_setprop_string(fdt, off, "bootargs",
463                                  machine->kernel_cmdline)));
464     }
465 
466     if (pnv->initrd_size) {
467         uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
468         uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
469 
470         _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
471                                &start_prop, sizeof(start_prop))));
472         _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
473                                &end_prop, sizeof(end_prop))));
474     }
475 
476     /* Populate device tree for each chip */
477     for (i = 0; i < pnv->num_chips; i++) {
478         pnv_dt_chip(pnv->chips[i], fdt);
479     }
480 
481     /* Populate ISA devices on chip 0 */
482     lpc_offset = pnv_chip_lpc_offset(pnv->chips[0], fdt);
483     pnv_dt_isa(pnv->isa_bus, fdt, lpc_offset);
484 
485     if (pnv->bmc) {
486         pnv_dt_bmc_sensors(pnv->bmc, fdt);
487     }
488 
489     return fdt;
490 }
491 
492 static void pnv_powerdown_notify(Notifier *n, void *opaque)
493 {
494     PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
495 
496     if (pnv->bmc) {
497         pnv_bmc_powerdown(pnv->bmc);
498     }
499 }
500 
501 static void pnv_reset(void)
502 {
503     MachineState *machine = MACHINE(qdev_get_machine());
504     PnvMachineState *pnv = PNV_MACHINE(machine);
505     void *fdt;
506     Object *obj;
507 
508     qemu_devices_reset();
509 
510     /* OpenPOWER systems have a BMC, which can be defined on the
511      * command line with:
512      *
513      *   -device ipmi-bmc-sim,id=bmc0
514      *
515      * This is the internal simulator but it could also be an external
516      * BMC.
517      */
518     obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL);
519     if (obj) {
520         pnv->bmc = IPMI_BMC(obj);
521     }
522 
523     fdt = pnv_dt_create(machine);
524 
525     /* Pack resulting tree */
526     _FDT((fdt_pack(fdt)));
527 
528     cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
529 }
530 
531 static ISABus *pnv_isa_create(PnvChip *chip)
532 {
533     PnvLpcController *lpc = &chip->lpc;
534     ISABus *isa_bus;
535     qemu_irq *irqs;
536     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
537 
538     /* let isa_bus_new() create its own bridge on SysBus otherwise
539      * devices speficied on the command line won't find the bus and
540      * will fail to create.
541      */
542     isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io,
543                           &error_fatal);
544 
545     irqs = pnv_lpc_isa_irq_create(lpc, pcc->chip_type, ISA_NUM_IRQS);
546 
547     isa_bus_irqs(isa_bus, irqs);
548     return isa_bus;
549 }
550 
551 static void pnv_init(MachineState *machine)
552 {
553     PnvMachineState *pnv = PNV_MACHINE(machine);
554     MemoryRegion *ram;
555     char *fw_filename;
556     long fw_size;
557     int i;
558     char *chip_typename;
559 
560     /* allocate RAM */
561     if (machine->ram_size < (1 * G_BYTE)) {
562         warn_report("skiboot may not work with < 1GB of RAM");
563     }
564 
565     ram = g_new(MemoryRegion, 1);
566     memory_region_allocate_system_memory(ram, NULL, "pnv.ram",
567                                          machine->ram_size);
568     memory_region_add_subregion(get_system_memory(), 0, ram);
569 
570     /* load skiboot firmware  */
571     if (bios_name == NULL) {
572         bios_name = FW_FILE_NAME;
573     }
574 
575     fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
576     if (!fw_filename) {
577         error_report("Could not find OPAL firmware '%s'", bios_name);
578         exit(1);
579     }
580 
581     fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
582     if (fw_size < 0) {
583         error_report("Could not load OPAL firmware '%s'", fw_filename);
584         exit(1);
585     }
586     g_free(fw_filename);
587 
588     /* load kernel */
589     if (machine->kernel_filename) {
590         long kernel_size;
591 
592         kernel_size = load_image_targphys(machine->kernel_filename,
593                                           KERNEL_LOAD_ADDR, 0x2000000);
594         if (kernel_size < 0) {
595             error_report("Could not load kernel '%s'",
596                          machine->kernel_filename);
597             exit(1);
598         }
599     }
600 
601     /* load initrd */
602     if (machine->initrd_filename) {
603         pnv->initrd_base = INITRD_LOAD_ADDR;
604         pnv->initrd_size = load_image_targphys(machine->initrd_filename,
605                                   pnv->initrd_base, 0x10000000); /* 128MB max */
606         if (pnv->initrd_size < 0) {
607             error_report("Could not load initial ram disk '%s'",
608                          machine->initrd_filename);
609             exit(1);
610         }
611     }
612 
613     /* Create the processor chips */
614     i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
615     chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"),
616                                     i, machine->cpu_type);
617     if (!object_class_by_name(chip_typename)) {
618         error_report("invalid CPU model '%.*s' for %s machine",
619                      i, machine->cpu_type, MACHINE_GET_CLASS(machine)->name);
620         exit(1);
621     }
622 
623     pnv->chips = g_new0(PnvChip *, pnv->num_chips);
624     for (i = 0; i < pnv->num_chips; i++) {
625         char chip_name[32];
626         Object *chip = object_new(chip_typename);
627 
628         pnv->chips[i] = PNV_CHIP(chip);
629 
630         /* TODO: put all the memory in one node on chip 0 until we find a
631          * way to specify different ranges for each chip
632          */
633         if (i == 0) {
634             object_property_set_int(chip, machine->ram_size, "ram-size",
635                                     &error_fatal);
636         }
637 
638         snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i));
639         object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
640         object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
641                                 &error_fatal);
642         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
643         object_property_set_bool(chip, true, "realized", &error_fatal);
644     }
645     g_free(chip_typename);
646 
647     /* Instantiate ISA bus on chip 0 */
648     pnv->isa_bus = pnv_isa_create(pnv->chips[0]);
649 
650     /* Create serial port */
651     serial_hds_isa_init(pnv->isa_bus, 0, MAX_SERIAL_PORTS);
652 
653     /* Create an RTC ISA device too */
654     mc146818_rtc_init(pnv->isa_bus, 2000, NULL);
655 
656     /* OpenPOWER systems use a IPMI SEL Event message to notify the
657      * host to powerdown */
658     pnv->powerdown_notifier.notify = pnv_powerdown_notify;
659     qemu_register_powerdown_notifier(&pnv->powerdown_notifier);
660 }
661 
662 /*
663  *    0:21  Reserved - Read as zeros
664  *   22:24  Chip ID
665  *   25:28  Core number
666  *   29:31  Thread ID
667  */
668 static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
669 {
670     return (chip->chip_id << 7) | (core_id << 3);
671 }
672 
673 /*
674  *    0:48  Reserved - Read as zeroes
675  *   49:52  Node ID
676  *   53:55  Chip ID
677  *   56     Reserved - Read as zero
678  *   57:61  Core number
679  *   62:63  Thread ID
680  *
681  * We only care about the lower bits. uint32_t is fine for the moment.
682  */
683 static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
684 {
685     return (chip->chip_id << 8) | (core_id << 2);
686 }
687 
688 /* Allowed core identifiers on a POWER8 Processor Chip :
689  *
690  * <EX0 reserved>
691  *  EX1  - Venice only
692  *  EX2  - Venice only
693  *  EX3  - Venice only
694  *  EX4
695  *  EX5
696  *  EX6
697  * <EX7,8 reserved> <reserved>
698  *  EX9  - Venice only
699  *  EX10 - Venice only
700  *  EX11 - Venice only
701  *  EX12
702  *  EX13
703  *  EX14
704  * <EX15 reserved>
705  */
706 #define POWER8E_CORE_MASK  (0x7070ull)
707 #define POWER8_CORE_MASK   (0x7e7eull)
708 
709 /*
710  * POWER9 has 24 cores, ids starting at 0x0
711  */
712 #define POWER9_CORE_MASK   (0xffffffffffffffull)
713 
714 static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
715 {
716     DeviceClass *dc = DEVICE_CLASS(klass);
717     PnvChipClass *k = PNV_CHIP_CLASS(klass);
718 
719     k->chip_type = PNV_CHIP_POWER8E;
720     k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
721     k->cores_mask = POWER8E_CORE_MASK;
722     k->core_pir = pnv_chip_core_pir_p8;
723     k->xscom_base = 0x003fc0000000000ull;
724     dc->desc = "PowerNV Chip POWER8E";
725 }
726 
727 static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
728 {
729     DeviceClass *dc = DEVICE_CLASS(klass);
730     PnvChipClass *k = PNV_CHIP_CLASS(klass);
731 
732     k->chip_type = PNV_CHIP_POWER8;
733     k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
734     k->cores_mask = POWER8_CORE_MASK;
735     k->core_pir = pnv_chip_core_pir_p8;
736     k->xscom_base = 0x003fc0000000000ull;
737     dc->desc = "PowerNV Chip POWER8";
738 }
739 
740 static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
741 {
742     DeviceClass *dc = DEVICE_CLASS(klass);
743     PnvChipClass *k = PNV_CHIP_CLASS(klass);
744 
745     k->chip_type = PNV_CHIP_POWER8NVL;
746     k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
747     k->cores_mask = POWER8_CORE_MASK;
748     k->core_pir = pnv_chip_core_pir_p8;
749     k->xscom_base = 0x003fc0000000000ull;
750     dc->desc = "PowerNV Chip POWER8NVL";
751 }
752 
753 static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
754 {
755     DeviceClass *dc = DEVICE_CLASS(klass);
756     PnvChipClass *k = PNV_CHIP_CLASS(klass);
757 
758     k->chip_type = PNV_CHIP_POWER9;
759     k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */
760     k->cores_mask = POWER9_CORE_MASK;
761     k->core_pir = pnv_chip_core_pir_p9;
762     k->xscom_base = 0x00603fc00000000ull;
763     dc->desc = "PowerNV Chip POWER9";
764 }
765 
766 static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
767 {
768     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
769     int cores_max;
770 
771     /*
772      * No custom mask for this chip, let's use the default one from *
773      * the chip class
774      */
775     if (!chip->cores_mask) {
776         chip->cores_mask = pcc->cores_mask;
777     }
778 
779     /* filter alien core ids ! some are reserved */
780     if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
781         error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
782                    chip->cores_mask);
783         return;
784     }
785     chip->cores_mask &= pcc->cores_mask;
786 
787     /* now that we have a sane layout, let check the number of cores */
788     cores_max = ctpop64(chip->cores_mask);
789     if (chip->nr_cores > cores_max) {
790         error_setg(errp, "warning: too many cores for chip ! Limit is %d",
791                    cores_max);
792         return;
793     }
794 }
795 
796 static void pnv_chip_init(Object *obj)
797 {
798     PnvChip *chip = PNV_CHIP(obj);
799     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
800 
801     chip->xscom_base = pcc->xscom_base;
802 
803     object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC);
804     object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL);
805 
806     object_initialize(&chip->psi, sizeof(chip->psi), TYPE_PNV_PSI);
807     object_property_add_child(obj, "psi", OBJECT(&chip->psi), NULL);
808     object_property_add_const_link(OBJECT(&chip->psi), "xics",
809                                    OBJECT(qdev_get_machine()), &error_abort);
810 
811     object_initialize(&chip->occ, sizeof(chip->occ), TYPE_PNV_OCC);
812     object_property_add_child(obj, "occ", OBJECT(&chip->occ), NULL);
813     object_property_add_const_link(OBJECT(&chip->occ), "psi",
814                                    OBJECT(&chip->psi), &error_abort);
815 
816     /* The LPC controller needs PSI to generate interrupts */
817     object_property_add_const_link(OBJECT(&chip->lpc), "psi",
818                                    OBJECT(&chip->psi), &error_abort);
819 }
820 
821 static void pnv_chip_icp_realize(PnvChip *chip, Error **errp)
822 {
823     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
824     const char *typename = pnv_chip_core_typename(chip);
825     size_t typesize = object_type_get_instance_size(typename);
826     int i, j;
827     char *name;
828     XICSFabric *xi = XICS_FABRIC(qdev_get_machine());
829 
830     name = g_strdup_printf("icp-%x", chip->chip_id);
831     memory_region_init(&chip->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE);
832     sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip->icp_mmio);
833     g_free(name);
834 
835     sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip));
836 
837     /* Map the ICP registers for each thread */
838     for (i = 0; i < chip->nr_cores; i++) {
839         PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
840         int core_hwid = CPU_CORE(pnv_core)->core_id;
841 
842         for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
843             uint32_t pir = pcc->core_pir(chip, core_hwid) + j;
844             PnvICPState *icp = PNV_ICP(xics_icp_get(xi, pir));
845 
846             memory_region_add_subregion(&chip->icp_mmio, pir << 12, &icp->mmio);
847         }
848     }
849 }
850 
851 static void pnv_chip_realize(DeviceState *dev, Error **errp)
852 {
853     PnvChip *chip = PNV_CHIP(dev);
854     Error *error = NULL;
855     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
856     const char *typename = pnv_chip_core_typename(chip);
857     size_t typesize = object_type_get_instance_size(typename);
858     int i, core_hwid;
859 
860     if (!object_class_by_name(typename)) {
861         error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
862         return;
863     }
864 
865     /* XSCOM bridge */
866     pnv_xscom_realize(chip, &error);
867     if (error) {
868         error_propagate(errp, error);
869         return;
870     }
871     sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
872 
873     /* Cores */
874     pnv_chip_core_sanitize(chip, &error);
875     if (error) {
876         error_propagate(errp, error);
877         return;
878     }
879 
880     chip->cores = g_malloc0(typesize * chip->nr_cores);
881 
882     for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
883              && (i < chip->nr_cores); core_hwid++) {
884         char core_name[32];
885         void *pnv_core = chip->cores + i * typesize;
886         uint64_t xscom_core_base;
887 
888         if (!(chip->cores_mask & (1ull << core_hwid))) {
889             continue;
890         }
891 
892         object_initialize(pnv_core, typesize, typename);
893         snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
894         object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core),
895                                   &error_fatal);
896         object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads",
897                                 &error_fatal);
898         object_property_set_int(OBJECT(pnv_core), core_hwid,
899                                 CPU_CORE_PROP_CORE_ID, &error_fatal);
900         object_property_set_int(OBJECT(pnv_core),
901                                 pcc->core_pir(chip, core_hwid),
902                                 "pir", &error_fatal);
903         object_property_add_const_link(OBJECT(pnv_core), "xics",
904                                        qdev_get_machine(), &error_fatal);
905         object_property_set_bool(OBJECT(pnv_core), true, "realized",
906                                  &error_fatal);
907         object_unref(OBJECT(pnv_core));
908 
909         /* Each core has an XSCOM MMIO region */
910         if (!pnv_chip_is_power9(chip)) {
911             xscom_core_base = PNV_XSCOM_EX_BASE(core_hwid);
912         } else {
913             xscom_core_base = PNV_XSCOM_P9_EC_BASE(core_hwid);
914         }
915 
916         pnv_xscom_add_subregion(chip, xscom_core_base,
917                                 &PNV_CORE(pnv_core)->xscom_regs);
918         i++;
919     }
920 
921     /* Create LPC controller */
922     object_property_set_bool(OBJECT(&chip->lpc), true, "realized",
923                              &error_fatal);
924     pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs);
925 
926     /* Interrupt Management Area. This is the memory region holding
927      * all the Interrupt Control Presenter (ICP) registers */
928     pnv_chip_icp_realize(chip, &error);
929     if (error) {
930         error_propagate(errp, error);
931         return;
932     }
933 
934     /* Processor Service Interface (PSI) Host Bridge */
935     object_property_set_int(OBJECT(&chip->psi), PNV_PSIHB_BASE(chip),
936                             "bar", &error_fatal);
937     object_property_set_bool(OBJECT(&chip->psi), true, "realized", &error);
938     if (error) {
939         error_propagate(errp, error);
940         return;
941     }
942     pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, &chip->psi.xscom_regs);
943 
944     /* Create the simplified OCC model */
945     object_property_set_bool(OBJECT(&chip->occ), true, "realized", &error);
946     if (error) {
947         error_propagate(errp, error);
948         return;
949     }
950     pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip->occ.xscom_regs);
951 }
952 
953 static Property pnv_chip_properties[] = {
954     DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
955     DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
956     DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
957     DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
958     DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
959     DEFINE_PROP_END_OF_LIST(),
960 };
961 
962 static void pnv_chip_class_init(ObjectClass *klass, void *data)
963 {
964     DeviceClass *dc = DEVICE_CLASS(klass);
965 
966     set_bit(DEVICE_CATEGORY_CPU, dc->categories);
967     dc->realize = pnv_chip_realize;
968     dc->props = pnv_chip_properties;
969     dc->desc = "PowerNV Chip";
970 }
971 
972 static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
973 {
974     PnvMachineState *pnv = PNV_MACHINE(xi);
975     int i;
976 
977     for (i = 0; i < pnv->num_chips; i++) {
978         if (ics_valid_irq(&pnv->chips[i]->psi.ics, irq)) {
979             return &pnv->chips[i]->psi.ics;
980         }
981     }
982     return NULL;
983 }
984 
985 static void pnv_ics_resend(XICSFabric *xi)
986 {
987     PnvMachineState *pnv = PNV_MACHINE(xi);
988     int i;
989 
990     for (i = 0; i < pnv->num_chips; i++) {
991         ics_resend(&pnv->chips[i]->psi.ics);
992     }
993 }
994 
995 static PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
996 {
997     CPUState *cs;
998 
999     CPU_FOREACH(cs) {
1000         PowerPCCPU *cpu = POWERPC_CPU(cs);
1001         CPUPPCState *env = &cpu->env;
1002 
1003         if (env->spr_cb[SPR_PIR].default_value == pir) {
1004             return cpu;
1005         }
1006     }
1007 
1008     return NULL;
1009 }
1010 
1011 static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
1012 {
1013     PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
1014 
1015     return cpu ? ICP(cpu->intc) : NULL;
1016 }
1017 
1018 static void pnv_pic_print_info(InterruptStatsProvider *obj,
1019                                Monitor *mon)
1020 {
1021     PnvMachineState *pnv = PNV_MACHINE(obj);
1022     int i;
1023     CPUState *cs;
1024 
1025     CPU_FOREACH(cs) {
1026         PowerPCCPU *cpu = POWERPC_CPU(cs);
1027 
1028         icp_pic_print_info(ICP(cpu->intc), mon);
1029     }
1030 
1031     for (i = 0; i < pnv->num_chips; i++) {
1032         ics_pic_print_info(&pnv->chips[i]->psi.ics, mon);
1033     }
1034 }
1035 
1036 static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
1037                               void *opaque, Error **errp)
1038 {
1039     visit_type_uint32(v, name, &PNV_MACHINE(obj)->num_chips, errp);
1040 }
1041 
1042 static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
1043                               void *opaque, Error **errp)
1044 {
1045     PnvMachineState *pnv = PNV_MACHINE(obj);
1046     uint32_t num_chips;
1047     Error *local_err = NULL;
1048 
1049     visit_type_uint32(v, name, &num_chips, &local_err);
1050     if (local_err) {
1051         error_propagate(errp, local_err);
1052         return;
1053     }
1054 
1055     /*
1056      * TODO: should we decide on how many chips we can create based
1057      * on #cores and Venice vs. Murano vs. Naples chip type etc...,
1058      */
1059     if (!is_power_of_2(num_chips) || num_chips > 4) {
1060         error_setg(errp, "invalid number of chips: '%d'", num_chips);
1061         return;
1062     }
1063 
1064     pnv->num_chips = num_chips;
1065 }
1066 
1067 static void pnv_machine_initfn(Object *obj)
1068 {
1069     PnvMachineState *pnv = PNV_MACHINE(obj);
1070     pnv->num_chips = 1;
1071 }
1072 
1073 static void pnv_machine_class_props_init(ObjectClass *oc)
1074 {
1075     object_class_property_add(oc, "num-chips", "uint32",
1076                               pnv_get_num_chips, pnv_set_num_chips,
1077                               NULL, NULL, NULL);
1078     object_class_property_set_description(oc, "num-chips",
1079                               "Specifies the number of processor chips",
1080                               NULL);
1081 }
1082 
1083 static void pnv_machine_class_init(ObjectClass *oc, void *data)
1084 {
1085     MachineClass *mc = MACHINE_CLASS(oc);
1086     XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
1087     InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
1088 
1089     mc->desc = "IBM PowerNV (Non-Virtualized)";
1090     mc->init = pnv_init;
1091     mc->reset = pnv_reset;
1092     mc->max_cpus = MAX_CPUS;
1093     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
1094     mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for
1095                                       * storage */
1096     mc->no_parallel = 1;
1097     mc->default_boot_order = NULL;
1098     mc->default_ram_size = 1 * G_BYTE;
1099     xic->icp_get = pnv_icp_get;
1100     xic->ics_get = pnv_ics_get;
1101     xic->ics_resend = pnv_ics_resend;
1102     ispc->print_info = pnv_pic_print_info;
1103 
1104     pnv_machine_class_props_init(oc);
1105 }
1106 
1107 #define DEFINE_PNV_CHIP_TYPE(type, class_initfn) \
1108     {                                            \
1109         .name          = type,                   \
1110         .class_init    = class_initfn,           \
1111         .parent        = TYPE_PNV_CHIP,          \
1112     }
1113 
1114 static const TypeInfo types[] = {
1115     {
1116         .name          = TYPE_PNV_MACHINE,
1117         .parent        = TYPE_MACHINE,
1118         .instance_size = sizeof(PnvMachineState),
1119         .instance_init = pnv_machine_initfn,
1120         .class_init    = pnv_machine_class_init,
1121         .interfaces = (InterfaceInfo[]) {
1122             { TYPE_XICS_FABRIC },
1123             { TYPE_INTERRUPT_STATS_PROVIDER },
1124             { },
1125         },
1126     },
1127     {
1128         .name          = TYPE_PNV_CHIP,
1129         .parent        = TYPE_SYS_BUS_DEVICE,
1130         .class_init    = pnv_chip_class_init,
1131         .instance_init = pnv_chip_init,
1132         .instance_size = sizeof(PnvChip),
1133         .class_size    = sizeof(PnvChipClass),
1134         .abstract      = true,
1135     },
1136     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER9, pnv_chip_power9_class_init),
1137     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER8, pnv_chip_power8_class_init),
1138     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER8E, pnv_chip_power8e_class_init),
1139     DEFINE_PNV_CHIP_TYPE(TYPE_PNV_CHIP_POWER8NVL,
1140                          pnv_chip_power8nvl_class_init),
1141 };
1142 
1143 DEFINE_TYPES(types)
1144