xref: /openbmc/qemu/hw/ppc/spapr.c (revision 1d300b5f)
1 /*
2  * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3  *
4  * Copyright (c) 2004-2007 Fabrice Bellard
5  * Copyright (c) 2007 Jocelyn Mayer
6  * Copyright (c) 2010 David Gibson, IBM Corporation.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  *
26  */
27 #include "sysemu/sysemu.h"
28 #include "hw/hw.h"
29 #include "elf.h"
30 #include "net/net.h"
31 #include "sysemu/blockdev.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/kvm.h"
34 #include "kvm_ppc.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/boards.h"
38 #include "hw/ppc/ppc.h"
39 #include "hw/loader.h"
40 
41 #include "hw/ppc/spapr.h"
42 #include "hw/ppc/spapr_vio.h"
43 #include "hw/pci-host/spapr.h"
44 #include "hw/ppc/xics.h"
45 #include "hw/pci/msi.h"
46 
47 #include "hw/pci/pci.h"
48 
49 #include "exec/address-spaces.h"
50 #include "hw/usb.h"
51 #include "qemu/config-file.h"
52 
53 #include <libfdt.h>
54 
55 /* SLOF memory layout:
56  *
57  * SLOF raw image loaded at 0, copies its romfs right below the flat
58  * device-tree, then position SLOF itself 31M below that
59  *
60  * So we set FW_OVERHEAD to 40MB which should account for all of that
61  * and more
62  *
63  * We load our kernel at 4M, leaving space for SLOF initial image
64  */
65 #define FDT_MAX_SIZE            0x10000
66 #define RTAS_MAX_SIZE           0x10000
67 #define FW_MAX_SIZE             0x400000
68 #define FW_FILE_NAME            "slof.bin"
69 #define FW_OVERHEAD             0x2800000
70 #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
71 
72 #define MIN_RMA_SLOF            128UL
73 
74 #define TIMEBASE_FREQ           512000000ULL
75 
76 #define MAX_CPUS                256
77 #define XICS_IRQS               1024
78 
79 #define PHANDLE_XICP            0x00001111
80 
81 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
82 
83 sPAPREnvironment *spapr;
84 
85 int spapr_allocate_irq(int hint, bool lsi)
86 {
87     int irq;
88 
89     if (hint) {
90         irq = hint;
91         /* FIXME: we should probably check for collisions somehow */
92     } else {
93         irq = spapr->next_irq++;
94     }
95 
96     /* Configure irq type */
97     if (!xics_get_qirq(spapr->icp, irq)) {
98         return 0;
99     }
100 
101     xics_set_irq_type(spapr->icp, irq, lsi);
102 
103     return irq;
104 }
105 
106 /* Allocate block of consequtive IRQs, returns a number of the first */
107 int spapr_allocate_irq_block(int num, bool lsi)
108 {
109     int first = -1;
110     int i;
111 
112     for (i = 0; i < num; ++i) {
113         int irq;
114 
115         irq = spapr_allocate_irq(0, lsi);
116         if (!irq) {
117             return -1;
118         }
119 
120         if (0 == i) {
121             first = irq;
122         }
123 
124         /* If the above doesn't create a consecutive block then that's
125          * an internal bug */
126         assert(irq == (first + i));
127     }
128 
129     return first;
130 }
131 
132 static XICSState *try_create_xics(const char *type, int nr_servers,
133                                   int nr_irqs)
134 {
135     DeviceState *dev;
136 
137     dev = qdev_create(NULL, type);
138     qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
139     qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
140     if (qdev_init(dev) < 0) {
141         return NULL;
142     }
143 
144     return XICS(dev);
145 }
146 
147 static XICSState *xics_system_init(int nr_servers, int nr_irqs)
148 {
149     XICSState *icp = NULL;
150 
151     icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
152     if (!icp) {
153         perror("Failed to create XICS\n");
154         abort();
155     }
156 
157     return icp;
158 }
159 
160 static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
161 {
162     int ret = 0, offset;
163     CPUState *cpu;
164     char cpu_model[32];
165     int smt = kvmppc_smt_threads();
166     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
167 
168     assert(spapr->cpu_model);
169 
170     for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
171         uint32_t associativity[] = {cpu_to_be32(0x5),
172                                     cpu_to_be32(0x0),
173                                     cpu_to_be32(0x0),
174                                     cpu_to_be32(0x0),
175                                     cpu_to_be32(cpu->numa_node),
176                                     cpu_to_be32(cpu->cpu_index)};
177 
178         if ((cpu->cpu_index % smt) != 0) {
179             continue;
180         }
181 
182         snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
183                  cpu->cpu_index);
184 
185         offset = fdt_path_offset(fdt, cpu_model);
186         if (offset < 0) {
187             return offset;
188         }
189 
190         if (nb_numa_nodes > 1) {
191             ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
192                               sizeof(associativity));
193             if (ret < 0) {
194                 return ret;
195             }
196         }
197 
198         ret = fdt_setprop(fdt, offset, "ibm,pft-size",
199                           pft_size_prop, sizeof(pft_size_prop));
200         if (ret < 0) {
201             return ret;
202         }
203     }
204     return ret;
205 }
206 
207 
208 static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
209                                      size_t maxsize)
210 {
211     size_t maxcells = maxsize / sizeof(uint32_t);
212     int i, j, count;
213     uint32_t *p = prop;
214 
215     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
216         struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
217 
218         if (!sps->page_shift) {
219             break;
220         }
221         for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
222             if (sps->enc[count].page_shift == 0) {
223                 break;
224             }
225         }
226         if ((p - prop) >= (maxcells - 3 - count * 2)) {
227             break;
228         }
229         *(p++) = cpu_to_be32(sps->page_shift);
230         *(p++) = cpu_to_be32(sps->slb_enc);
231         *(p++) = cpu_to_be32(count);
232         for (j = 0; j < count; j++) {
233             *(p++) = cpu_to_be32(sps->enc[j].page_shift);
234             *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
235         }
236     }
237 
238     return (p - prop) * sizeof(uint32_t);
239 }
240 
241 #define _FDT(exp) \
242     do { \
243         int ret = (exp);                                           \
244         if (ret < 0) {                                             \
245             fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
246                     #exp, fdt_strerror(ret));                      \
247             exit(1);                                               \
248         }                                                          \
249     } while (0)
250 
251 
252 static void *spapr_create_fdt_skel(const char *cpu_model,
253                                    hwaddr initrd_base,
254                                    hwaddr initrd_size,
255                                    hwaddr kernel_size,
256                                    const char *boot_device,
257                                    const char *kernel_cmdline,
258                                    uint32_t epow_irq)
259 {
260     void *fdt;
261     CPUState *cs;
262     uint32_t start_prop = cpu_to_be32(initrd_base);
263     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
264     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
265         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
266     char qemu_hypertas_prop[] = "hcall-memop1";
267     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
268     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
269     char *modelname;
270     int i, smt = kvmppc_smt_threads();
271     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
272 
273     fdt = g_malloc0(FDT_MAX_SIZE);
274     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
275 
276     if (kernel_size) {
277         _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
278     }
279     if (initrd_size) {
280         _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
281     }
282     _FDT((fdt_finish_reservemap(fdt)));
283 
284     /* Root node */
285     _FDT((fdt_begin_node(fdt, "")));
286     _FDT((fdt_property_string(fdt, "device_type", "chrp")));
287     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
288     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
289 
290     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
291     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
292 
293     /* /chosen */
294     _FDT((fdt_begin_node(fdt, "chosen")));
295 
296     /* Set Form1_affinity */
297     _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
298 
299     _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
300     _FDT((fdt_property(fdt, "linux,initrd-start",
301                        &start_prop, sizeof(start_prop))));
302     _FDT((fdt_property(fdt, "linux,initrd-end",
303                        &end_prop, sizeof(end_prop))));
304     if (kernel_size) {
305         uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
306                               cpu_to_be64(kernel_size) };
307 
308         _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
309     }
310     if (boot_device) {
311         _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
312     }
313     _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
314     _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
315     _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
316 
317     _FDT((fdt_end_node(fdt)));
318 
319     /* cpus */
320     _FDT((fdt_begin_node(fdt, "cpus")));
321 
322     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
323     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
324 
325     modelname = g_strdup(cpu_model);
326 
327     for (i = 0; i < strlen(modelname); i++) {
328         modelname[i] = toupper(modelname[i]);
329     }
330 
331     /* This is needed during FDT finalization */
332     spapr->cpu_model = g_strdup(modelname);
333 
334     for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
335         PowerPCCPU *cpu = POWERPC_CPU(cs);
336         CPUPPCState *env = &cpu->env;
337         PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
338         int index = cs->cpu_index;
339         uint32_t servers_prop[smp_threads];
340         uint32_t gservers_prop[smp_threads * 2];
341         char *nodename;
342         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
343                            0xffffffff, 0xffffffff};
344         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
345         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
346         uint32_t page_sizes_prop[64];
347         size_t page_sizes_prop_size;
348 
349         if ((index % smt) != 0) {
350             continue;
351         }
352 
353         nodename = g_strdup_printf("%s@%x", modelname, index);
354 
355         _FDT((fdt_begin_node(fdt, nodename)));
356 
357         g_free(nodename);
358 
359         _FDT((fdt_property_cell(fdt, "reg", index)));
360         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
361 
362         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
363         _FDT((fdt_property_cell(fdt, "d-cache-block-size",
364                                 env->dcache_line_size)));
365         _FDT((fdt_property_cell(fdt, "d-cache-line-size",
366                                 env->dcache_line_size)));
367         _FDT((fdt_property_cell(fdt, "i-cache-block-size",
368                                 env->icache_line_size)));
369         _FDT((fdt_property_cell(fdt, "i-cache-line-size",
370                                 env->icache_line_size)));
371 
372         if (pcc->l1_dcache_size) {
373             _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
374         } else {
375             fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
376         }
377         if (pcc->l1_icache_size) {
378             _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
379         } else {
380             fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
381         }
382 
383         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
384         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
385         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
386         _FDT((fdt_property_string(fdt, "status", "okay")));
387         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
388 
389         /* Build interrupt servers and gservers properties */
390         for (i = 0; i < smp_threads; i++) {
391             servers_prop[i] = cpu_to_be32(index + i);
392             /* Hack, direct the group queues back to cpu 0 */
393             gservers_prop[i*2] = cpu_to_be32(index + i);
394             gservers_prop[i*2 + 1] = 0;
395         }
396         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
397                            servers_prop, sizeof(servers_prop))));
398         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
399                            gservers_prop, sizeof(gservers_prop))));
400 
401         if (env->mmu_model & POWERPC_MMU_1TSEG) {
402             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
403                                segs, sizeof(segs))));
404         }
405 
406         /* Advertise VMX/VSX (vector extensions) if available
407          *   0 / no property == no vector extensions
408          *   1               == VMX / Altivec available
409          *   2               == VSX available */
410         if (env->insns_flags & PPC_ALTIVEC) {
411             uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
412 
413             _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
414         }
415 
416         /* Advertise DFP (Decimal Floating Point) if available
417          *   0 / no property == no DFP
418          *   1               == DFP available */
419         if (env->insns_flags2 & PPC2_DFP) {
420             _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
421         }
422 
423         page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
424                                                       sizeof(page_sizes_prop));
425         if (page_sizes_prop_size) {
426             _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
427                                page_sizes_prop, page_sizes_prop_size)));
428         }
429 
430         _FDT((fdt_end_node(fdt)));
431     }
432 
433     g_free(modelname);
434 
435     _FDT((fdt_end_node(fdt)));
436 
437     /* RTAS */
438     _FDT((fdt_begin_node(fdt, "rtas")));
439 
440     _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
441                        sizeof(hypertas_prop))));
442     _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
443                        sizeof(qemu_hypertas_prop))));
444 
445     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
446         refpoints, sizeof(refpoints))));
447 
448     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
449 
450     _FDT((fdt_end_node(fdt)));
451 
452     /* interrupt controller */
453     _FDT((fdt_begin_node(fdt, "interrupt-controller")));
454 
455     _FDT((fdt_property_string(fdt, "device_type",
456                               "PowerPC-External-Interrupt-Presentation")));
457     _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
458     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
459     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
460                        interrupt_server_ranges_prop,
461                        sizeof(interrupt_server_ranges_prop))));
462     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
463     _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
464     _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
465 
466     _FDT((fdt_end_node(fdt)));
467 
468     /* vdevice */
469     _FDT((fdt_begin_node(fdt, "vdevice")));
470 
471     _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
472     _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
473     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
474     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
475     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
476     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
477 
478     _FDT((fdt_end_node(fdt)));
479 
480     /* event-sources */
481     spapr_events_fdt_skel(fdt, epow_irq);
482 
483     _FDT((fdt_end_node(fdt))); /* close root node */
484     _FDT((fdt_finish(fdt)));
485 
486     return fdt;
487 }
488 
489 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
490 {
491     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
492                                 cpu_to_be32(0x0), cpu_to_be32(0x0),
493                                 cpu_to_be32(0x0)};
494     char mem_name[32];
495     hwaddr node0_size, mem_start;
496     uint64_t mem_reg_property[2];
497     int i, off;
498 
499     /* memory node(s) */
500     node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
501     if (spapr->rma_size > node0_size) {
502         spapr->rma_size = node0_size;
503     }
504 
505     /* RMA */
506     mem_reg_property[0] = 0;
507     mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
508     off = fdt_add_subnode(fdt, 0, "memory@0");
509     _FDT(off);
510     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
511     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
512                       sizeof(mem_reg_property))));
513     _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
514                       sizeof(associativity))));
515 
516     /* RAM: Node 0 */
517     if (node0_size > spapr->rma_size) {
518         mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
519         mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
520 
521         sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
522         off = fdt_add_subnode(fdt, 0, mem_name);
523         _FDT(off);
524         _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
525         _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
526                           sizeof(mem_reg_property))));
527         _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
528                           sizeof(associativity))));
529     }
530 
531     /* RAM: Node 1 and beyond */
532     mem_start = node0_size;
533     for (i = 1; i < nb_numa_nodes; i++) {
534         mem_reg_property[0] = cpu_to_be64(mem_start);
535         mem_reg_property[1] = cpu_to_be64(node_mem[i]);
536         associativity[3] = associativity[4] = cpu_to_be32(i);
537         sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
538         off = fdt_add_subnode(fdt, 0, mem_name);
539         _FDT(off);
540         _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
541         _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
542                           sizeof(mem_reg_property))));
543         _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
544                           sizeof(associativity))));
545         mem_start += node_mem[i];
546     }
547 
548     return 0;
549 }
550 
551 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
552                                hwaddr fdt_addr,
553                                hwaddr rtas_addr,
554                                hwaddr rtas_size)
555 {
556     int ret;
557     void *fdt;
558     sPAPRPHBState *phb;
559 
560     fdt = g_malloc(FDT_MAX_SIZE);
561 
562     /* open out the base tree into a temp buffer for the final tweaks */
563     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
564 
565     ret = spapr_populate_memory(spapr, fdt);
566     if (ret < 0) {
567         fprintf(stderr, "couldn't setup memory nodes in fdt\n");
568         exit(1);
569     }
570 
571     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
572     if (ret < 0) {
573         fprintf(stderr, "couldn't setup vio devices in fdt\n");
574         exit(1);
575     }
576 
577     QLIST_FOREACH(phb, &spapr->phbs, list) {
578         ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
579     }
580 
581     if (ret < 0) {
582         fprintf(stderr, "couldn't setup PCI devices in fdt\n");
583         exit(1);
584     }
585 
586     /* RTAS */
587     ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
588     if (ret < 0) {
589         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
590     }
591 
592     /* Advertise NUMA via ibm,associativity */
593     ret = spapr_fixup_cpu_dt(fdt, spapr);
594     if (ret < 0) {
595         fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
596     }
597 
598     if (!spapr->has_graphics) {
599         spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
600     }
601 
602     _FDT((fdt_pack(fdt)));
603 
604     if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
605         hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
606                  fdt_totalsize(fdt), FDT_MAX_SIZE);
607         exit(1);
608     }
609 
610     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
611 
612     g_free(fdt);
613 }
614 
615 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
616 {
617     return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
618 }
619 
620 static void emulate_spapr_hypercall(PowerPCCPU *cpu)
621 {
622     CPUPPCState *env = &cpu->env;
623 
624     if (msr_pr) {
625         hcall_dprintf("Hypercall made with MSR[PR]=1\n");
626         env->gpr[3] = H_PRIVILEGE;
627     } else {
628         env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
629     }
630 }
631 
632 static void spapr_reset_htab(sPAPREnvironment *spapr)
633 {
634     long shift;
635 
636     /* allocate hash page table.  For now we always make this 16mb,
637      * later we should probably make it scale to the size of guest
638      * RAM */
639 
640     shift = kvmppc_reset_htab(spapr->htab_shift);
641 
642     if (shift > 0) {
643         /* Kernel handles htab, we don't need to allocate one */
644         spapr->htab_shift = shift;
645     } else {
646         if (!spapr->htab) {
647             /* Allocate an htab if we don't yet have one */
648             spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
649         }
650 
651         /* And clear it */
652         memset(spapr->htab, 0, HTAB_SIZE(spapr));
653     }
654 
655     /* Update the RMA size if necessary */
656     if (spapr->vrma_adjust) {
657         spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
658     }
659 }
660 
661 static void ppc_spapr_reset(void)
662 {
663     PowerPCCPU *first_ppc_cpu;
664 
665     /* Reset the hash table & recalc the RMA */
666     spapr_reset_htab(spapr);
667 
668     qemu_devices_reset();
669 
670     /* Load the fdt */
671     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
672                        spapr->rtas_size);
673 
674     /* Set up the entry state */
675     first_ppc_cpu = POWERPC_CPU(first_cpu);
676     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
677     first_ppc_cpu->env.gpr[5] = 0;
678     first_cpu->halted = 0;
679     first_ppc_cpu->env.nip = spapr->entry_point;
680 
681 }
682 
683 static void spapr_cpu_reset(void *opaque)
684 {
685     PowerPCCPU *cpu = opaque;
686     CPUState *cs = CPU(cpu);
687     CPUPPCState *env = &cpu->env;
688 
689     cpu_reset(cs);
690 
691     /* All CPUs start halted.  CPU0 is unhalted from the machine level
692      * reset code and the rest are explicitly started up by the guest
693      * using an RTAS call */
694     cs->halted = 1;
695 
696     env->spr[SPR_HIOR] = 0;
697 
698     env->external_htab = (uint8_t *)spapr->htab;
699     env->htab_base = -1;
700     env->htab_mask = HTAB_SIZE(spapr) - 1;
701     env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
702         (spapr->htab_shift - 18);
703 }
704 
705 static void spapr_create_nvram(sPAPREnvironment *spapr)
706 {
707     DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
708     const char *drivename = qemu_opt_get(qemu_get_machine_opts(), "nvram");
709 
710     if (drivename) {
711         BlockDriverState *bs;
712 
713         bs = bdrv_find(drivename);
714         if (!bs) {
715             fprintf(stderr, "No such block device \"%s\" for nvram\n",
716                     drivename);
717             exit(1);
718         }
719         qdev_prop_set_drive_nofail(dev, "drive", bs);
720     }
721 
722     qdev_init_nofail(dev);
723 
724     spapr->nvram = (struct sPAPRNVRAM *)dev;
725 }
726 
727 /* Returns whether we want to use VGA or not */
728 static int spapr_vga_init(PCIBus *pci_bus)
729 {
730     switch (vga_interface_type) {
731     case VGA_NONE:
732     case VGA_STD:
733         return pci_vga_init(pci_bus) != NULL;
734     default:
735         fprintf(stderr, "This vga model is not supported,"
736                 "currently it only supports -vga std\n");
737         exit(0);
738         break;
739     }
740 }
741 
742 static const VMStateDescription vmstate_spapr = {
743     .name = "spapr",
744     .version_id = 1,
745     .minimum_version_id = 1,
746     .minimum_version_id_old = 1,
747     .fields      = (VMStateField []) {
748         VMSTATE_UINT32(next_irq, sPAPREnvironment),
749 
750         /* RTC offset */
751         VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
752 
753         VMSTATE_END_OF_LIST()
754     },
755 };
756 
757 #define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
758 #define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
759 #define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
760 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
761 
762 static int htab_save_setup(QEMUFile *f, void *opaque)
763 {
764     sPAPREnvironment *spapr = opaque;
765 
766     /* "Iteration" header */
767     qemu_put_be32(f, spapr->htab_shift);
768 
769     if (spapr->htab) {
770         spapr->htab_save_index = 0;
771         spapr->htab_first_pass = true;
772     } else {
773         assert(kvm_enabled());
774 
775         spapr->htab_fd = kvmppc_get_htab_fd(false);
776         if (spapr->htab_fd < 0) {
777             fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
778                     strerror(errno));
779             return -1;
780         }
781     }
782 
783 
784     return 0;
785 }
786 
787 static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
788                                  int64_t max_ns)
789 {
790     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
791     int index = spapr->htab_save_index;
792     int64_t starttime = qemu_get_clock_ns(rt_clock);
793 
794     assert(spapr->htab_first_pass);
795 
796     do {
797         int chunkstart;
798 
799         /* Consume invalid HPTEs */
800         while ((index < htabslots)
801                && !HPTE_VALID(HPTE(spapr->htab, index))) {
802             index++;
803             CLEAN_HPTE(HPTE(spapr->htab, index));
804         }
805 
806         /* Consume valid HPTEs */
807         chunkstart = index;
808         while ((index < htabslots)
809                && HPTE_VALID(HPTE(spapr->htab, index))) {
810             index++;
811             CLEAN_HPTE(HPTE(spapr->htab, index));
812         }
813 
814         if (index > chunkstart) {
815             int n_valid = index - chunkstart;
816 
817             qemu_put_be32(f, chunkstart);
818             qemu_put_be16(f, n_valid);
819             qemu_put_be16(f, 0);
820             qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
821                             HASH_PTE_SIZE_64 * n_valid);
822 
823             if ((qemu_get_clock_ns(rt_clock) - starttime) > max_ns) {
824                 break;
825             }
826         }
827     } while ((index < htabslots) && !qemu_file_rate_limit(f));
828 
829     if (index >= htabslots) {
830         assert(index == htabslots);
831         index = 0;
832         spapr->htab_first_pass = false;
833     }
834     spapr->htab_save_index = index;
835 }
836 
837 static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
838                                 int64_t max_ns)
839 {
840     bool final = max_ns < 0;
841     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
842     int examined = 0, sent = 0;
843     int index = spapr->htab_save_index;
844     int64_t starttime = qemu_get_clock_ns(rt_clock);
845 
846     assert(!spapr->htab_first_pass);
847 
848     do {
849         int chunkstart, invalidstart;
850 
851         /* Consume non-dirty HPTEs */
852         while ((index < htabslots)
853                && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
854             index++;
855             examined++;
856         }
857 
858         chunkstart = index;
859         /* Consume valid dirty HPTEs */
860         while ((index < htabslots)
861                && HPTE_DIRTY(HPTE(spapr->htab, index))
862                && HPTE_VALID(HPTE(spapr->htab, index))) {
863             CLEAN_HPTE(HPTE(spapr->htab, index));
864             index++;
865             examined++;
866         }
867 
868         invalidstart = index;
869         /* Consume invalid dirty HPTEs */
870         while ((index < htabslots)
871                && HPTE_DIRTY(HPTE(spapr->htab, index))
872                && !HPTE_VALID(HPTE(spapr->htab, index))) {
873             CLEAN_HPTE(HPTE(spapr->htab, index));
874             index++;
875             examined++;
876         }
877 
878         if (index > chunkstart) {
879             int n_valid = invalidstart - chunkstart;
880             int n_invalid = index - invalidstart;
881 
882             qemu_put_be32(f, chunkstart);
883             qemu_put_be16(f, n_valid);
884             qemu_put_be16(f, n_invalid);
885             qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
886                             HASH_PTE_SIZE_64 * n_valid);
887             sent += index - chunkstart;
888 
889             if (!final && (qemu_get_clock_ns(rt_clock) - starttime) > max_ns) {
890                 break;
891             }
892         }
893 
894         if (examined >= htabslots) {
895             break;
896         }
897 
898         if (index >= htabslots) {
899             assert(index == htabslots);
900             index = 0;
901         }
902     } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
903 
904     if (index >= htabslots) {
905         assert(index == htabslots);
906         index = 0;
907     }
908 
909     spapr->htab_save_index = index;
910 
911     return (examined >= htabslots) && (sent == 0) ? 1 : 0;
912 }
913 
914 #define MAX_ITERATION_NS    5000000 /* 5 ms */
915 #define MAX_KVM_BUF_SIZE    2048
916 
917 static int htab_save_iterate(QEMUFile *f, void *opaque)
918 {
919     sPAPREnvironment *spapr = opaque;
920     int rc = 0;
921 
922     /* Iteration header */
923     qemu_put_be32(f, 0);
924 
925     if (!spapr->htab) {
926         assert(kvm_enabled());
927 
928         rc = kvmppc_save_htab(f, spapr->htab_fd,
929                               MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
930         if (rc < 0) {
931             return rc;
932         }
933     } else  if (spapr->htab_first_pass) {
934         htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
935     } else {
936         rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
937     }
938 
939     /* End marker */
940     qemu_put_be32(f, 0);
941     qemu_put_be16(f, 0);
942     qemu_put_be16(f, 0);
943 
944     return rc;
945 }
946 
947 static int htab_save_complete(QEMUFile *f, void *opaque)
948 {
949     sPAPREnvironment *spapr = opaque;
950 
951     /* Iteration header */
952     qemu_put_be32(f, 0);
953 
954     if (!spapr->htab) {
955         int rc;
956 
957         assert(kvm_enabled());
958 
959         rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
960         if (rc < 0) {
961             return rc;
962         }
963         close(spapr->htab_fd);
964         spapr->htab_fd = -1;
965     } else {
966         htab_save_later_pass(f, spapr, -1);
967     }
968 
969     /* End marker */
970     qemu_put_be32(f, 0);
971     qemu_put_be16(f, 0);
972     qemu_put_be16(f, 0);
973 
974     return 0;
975 }
976 
977 static int htab_load(QEMUFile *f, void *opaque, int version_id)
978 {
979     sPAPREnvironment *spapr = opaque;
980     uint32_t section_hdr;
981     int fd = -1;
982 
983     if (version_id < 1 || version_id > 1) {
984         fprintf(stderr, "htab_load() bad version\n");
985         return -EINVAL;
986     }
987 
988     section_hdr = qemu_get_be32(f);
989 
990     if (section_hdr) {
991         /* First section, just the hash shift */
992         if (spapr->htab_shift != section_hdr) {
993             return -EINVAL;
994         }
995         return 0;
996     }
997 
998     if (!spapr->htab) {
999         assert(kvm_enabled());
1000 
1001         fd = kvmppc_get_htab_fd(true);
1002         if (fd < 0) {
1003             fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
1004                     strerror(errno));
1005         }
1006     }
1007 
1008     while (true) {
1009         uint32_t index;
1010         uint16_t n_valid, n_invalid;
1011 
1012         index = qemu_get_be32(f);
1013         n_valid = qemu_get_be16(f);
1014         n_invalid = qemu_get_be16(f);
1015 
1016         if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
1017             /* End of Stream */
1018             break;
1019         }
1020 
1021         if ((index + n_valid + n_invalid) >
1022             (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
1023             /* Bad index in stream */
1024             fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
1025                     "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
1026                     spapr->htab_shift);
1027             return -EINVAL;
1028         }
1029 
1030         if (spapr->htab) {
1031             if (n_valid) {
1032                 qemu_get_buffer(f, HPTE(spapr->htab, index),
1033                                 HASH_PTE_SIZE_64 * n_valid);
1034             }
1035             if (n_invalid) {
1036                 memset(HPTE(spapr->htab, index + n_valid), 0,
1037                        HASH_PTE_SIZE_64 * n_invalid);
1038             }
1039         } else {
1040             int rc;
1041 
1042             assert(fd >= 0);
1043 
1044             rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
1045             if (rc < 0) {
1046                 return rc;
1047             }
1048         }
1049     }
1050 
1051     if (!spapr->htab) {
1052         assert(fd >= 0);
1053         close(fd);
1054     }
1055 
1056     return 0;
1057 }
1058 
1059 static SaveVMHandlers savevm_htab_handlers = {
1060     .save_live_setup = htab_save_setup,
1061     .save_live_iterate = htab_save_iterate,
1062     .save_live_complete = htab_save_complete,
1063     .load_state = htab_load,
1064 };
1065 
1066 /* pSeries LPAR / sPAPR hardware init */
1067 static void ppc_spapr_init(QEMUMachineInitArgs *args)
1068 {
1069     ram_addr_t ram_size = args->ram_size;
1070     const char *cpu_model = args->cpu_model;
1071     const char *kernel_filename = args->kernel_filename;
1072     const char *kernel_cmdline = args->kernel_cmdline;
1073     const char *initrd_filename = args->initrd_filename;
1074     const char *boot_device = args->boot_device;
1075     PowerPCCPU *cpu;
1076     CPUPPCState *env;
1077     PCIHostState *phb;
1078     int i;
1079     MemoryRegion *sysmem = get_system_memory();
1080     MemoryRegion *ram = g_new(MemoryRegion, 1);
1081     hwaddr rma_alloc_size;
1082     uint32_t initrd_base = 0;
1083     long kernel_size = 0, initrd_size = 0;
1084     long load_limit, rtas_limit, fw_size;
1085     char *filename;
1086 
1087     msi_supported = true;
1088 
1089     spapr = g_malloc0(sizeof(*spapr));
1090     QLIST_INIT(&spapr->phbs);
1091 
1092     cpu_ppc_hypercall = emulate_spapr_hypercall;
1093 
1094     /* Allocate RMA if necessary */
1095     rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
1096 
1097     if (rma_alloc_size == -1) {
1098         hw_error("qemu: Unable to create RMA\n");
1099         exit(1);
1100     }
1101 
1102     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
1103         spapr->rma_size = rma_alloc_size;
1104     } else {
1105         spapr->rma_size = ram_size;
1106 
1107         /* With KVM, we don't actually know whether KVM supports an
1108          * unbounded RMA (PR KVM) or is limited by the hash table size
1109          * (HV KVM using VRMA), so we always assume the latter
1110          *
1111          * In that case, we also limit the initial allocations for RTAS
1112          * etc... to 256M since we have no way to know what the VRMA size
1113          * is going to be as it depends on the size of the hash table
1114          * isn't determined yet.
1115          */
1116         if (kvm_enabled()) {
1117             spapr->vrma_adjust = 1;
1118             spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
1119         }
1120     }
1121 
1122     /* We place the device tree and RTAS just below either the top of the RMA,
1123      * or just below 2GB, whichever is lowere, so that it can be
1124      * processed with 32-bit real mode code if necessary */
1125     rtas_limit = MIN(spapr->rma_size, 0x80000000);
1126     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
1127     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
1128     load_limit = spapr->fdt_addr - FW_OVERHEAD;
1129 
1130     /* We aim for a hash table of size 1/128 the size of RAM.  The
1131      * normal rule of thumb is 1/64 the size of RAM, but that's much
1132      * more than needed for the Linux guests we support. */
1133     spapr->htab_shift = 18; /* Minimum architected size */
1134     while (spapr->htab_shift <= 46) {
1135         if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
1136             break;
1137         }
1138         spapr->htab_shift++;
1139     }
1140 
1141     /* Set up Interrupt Controller before we create the VCPUs */
1142     spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
1143                                   XICS_IRQS);
1144     spapr->next_irq = XICS_IRQ_BASE;
1145 
1146     /* init CPUs */
1147     if (cpu_model == NULL) {
1148         cpu_model = kvm_enabled() ? "host" : "POWER7";
1149     }
1150     for (i = 0; i < smp_cpus; i++) {
1151         cpu = cpu_ppc_init(cpu_model);
1152         if (cpu == NULL) {
1153             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
1154             exit(1);
1155         }
1156         env = &cpu->env;
1157 
1158         xics_cpu_setup(spapr->icp, cpu);
1159 
1160         /* Set time-base frequency to 512 MHz */
1161         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
1162 
1163         /* PAPR always has exception vectors in RAM not ROM. To ensure this,
1164          * MSR[IP] should never be set.
1165          */
1166         env->msr_mask &= ~(1 << 6);
1167 
1168         /* Tell KVM that we're in PAPR mode */
1169         if (kvm_enabled()) {
1170             kvmppc_set_papr(cpu);
1171         }
1172 
1173         qemu_register_reset(spapr_cpu_reset, cpu);
1174     }
1175 
1176     /* allocate RAM */
1177     spapr->ram_limit = ram_size;
1178     if (spapr->ram_limit > rma_alloc_size) {
1179         ram_addr_t nonrma_base = rma_alloc_size;
1180         ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
1181 
1182         memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size);
1183         vmstate_register_ram_global(ram);
1184         memory_region_add_subregion(sysmem, nonrma_base, ram);
1185     }
1186 
1187     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1188     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
1189                                            rtas_limit - spapr->rtas_addr);
1190     if (spapr->rtas_size < 0) {
1191         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
1192         exit(1);
1193     }
1194     if (spapr->rtas_size > RTAS_MAX_SIZE) {
1195         hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
1196                  spapr->rtas_size, RTAS_MAX_SIZE);
1197         exit(1);
1198     }
1199     g_free(filename);
1200 
1201     /* Set up EPOW events infrastructure */
1202     spapr_events_init(spapr);
1203 
1204     /* Set up VIO bus */
1205     spapr->vio_bus = spapr_vio_bus_init();
1206 
1207     for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1208         if (serial_hds[i]) {
1209             spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1210         }
1211     }
1212 
1213     /* We always have at least the nvram device on VIO */
1214     spapr_create_nvram(spapr);
1215 
1216     /* Set up PCI */
1217     spapr_pci_rtas_init();
1218 
1219     phb = spapr_create_phb(spapr, 0);
1220 
1221     for (i = 0; i < nb_nics; i++) {
1222         NICInfo *nd = &nd_table[i];
1223 
1224         if (!nd->model) {
1225             nd->model = g_strdup("ibmveth");
1226         }
1227 
1228         if (strcmp(nd->model, "ibmveth") == 0) {
1229             spapr_vlan_create(spapr->vio_bus, nd);
1230         } else {
1231             pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1232         }
1233     }
1234 
1235     for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1236         spapr_vscsi_create(spapr->vio_bus);
1237     }
1238 
1239     /* Graphics */
1240     if (spapr_vga_init(phb->bus)) {
1241         spapr->has_graphics = true;
1242     }
1243 
1244     if (usb_enabled(spapr->has_graphics)) {
1245         pci_create_simple(phb->bus, -1, "pci-ohci");
1246         if (spapr->has_graphics) {
1247             usbdevice_create("keyboard");
1248             usbdevice_create("mouse");
1249         }
1250     }
1251 
1252     if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1253         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
1254                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
1255         exit(1);
1256     }
1257 
1258     if (kernel_filename) {
1259         uint64_t lowaddr = 0;
1260 
1261         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
1262                                NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
1263         if (kernel_size < 0) {
1264             kernel_size = load_image_targphys(kernel_filename,
1265                                               KERNEL_LOAD_ADDR,
1266                                               load_limit - KERNEL_LOAD_ADDR);
1267         }
1268         if (kernel_size < 0) {
1269             fprintf(stderr, "qemu: could not load kernel '%s'\n",
1270                     kernel_filename);
1271             exit(1);
1272         }
1273 
1274         /* load initrd */
1275         if (initrd_filename) {
1276             /* Try to locate the initrd in the gap between the kernel
1277              * and the firmware. Add a bit of space just in case
1278              */
1279             initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1280             initrd_size = load_image_targphys(initrd_filename, initrd_base,
1281                                               load_limit - initrd_base);
1282             if (initrd_size < 0) {
1283                 fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
1284                         initrd_filename);
1285                 exit(1);
1286             }
1287         } else {
1288             initrd_base = 0;
1289             initrd_size = 0;
1290         }
1291     }
1292 
1293     if (bios_name == NULL) {
1294         bios_name = FW_FILE_NAME;
1295     }
1296     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
1297     fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
1298     if (fw_size < 0) {
1299         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
1300         exit(1);
1301     }
1302     g_free(filename);
1303 
1304     spapr->entry_point = 0x100;
1305 
1306     vmstate_register(NULL, 0, &vmstate_spapr, spapr);
1307     register_savevm_live(NULL, "spapr/htab", -1, 1,
1308                          &savevm_htab_handlers, spapr);
1309 
1310     /* Prepare the device tree */
1311     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
1312                                             initrd_base, initrd_size,
1313                                             kernel_size,
1314                                             boot_device, kernel_cmdline,
1315                                             spapr->epow_irq);
1316     assert(spapr->fdt_skel != NULL);
1317 }
1318 
1319 static QEMUMachine spapr_machine = {
1320     .name = "pseries",
1321     .desc = "pSeries Logical Partition (PAPR compliant)",
1322     .is_default = 1,
1323     .init = ppc_spapr_init,
1324     .reset = ppc_spapr_reset,
1325     .block_default_type = IF_SCSI,
1326     .max_cpus = MAX_CPUS,
1327     .no_parallel = 1,
1328     .boot_order = NULL,
1329 };
1330 
1331 static void spapr_machine_init(void)
1332 {
1333     qemu_register_machine(&spapr_machine);
1334 }
1335 
1336 machine_init(spapr_machine_init);
1337