xref: /openbmc/qemu/hw/ppc/spapr.c (revision 5accc840)
1 /*
2  * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3  *
4  * Copyright (c) 2004-2007 Fabrice Bellard
5  * Copyright (c) 2007 Jocelyn Mayer
6  * Copyright (c) 2010 David Gibson, IBM Corporation.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  *
26  */
27 #include "sysemu/sysemu.h"
28 #include "hw/hw.h"
29 #include "elf.h"
30 #include "net/net.h"
31 #include "sysemu/blockdev.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/kvm.h"
34 #include "kvm_ppc.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/boards.h"
38 #include "hw/ppc/ppc.h"
39 #include "hw/loader.h"
40 
41 #include "hw/ppc/spapr.h"
42 #include "hw/ppc/spapr_vio.h"
43 #include "hw/pci-host/spapr.h"
44 #include "hw/ppc/xics.h"
45 #include "hw/pci/msi.h"
46 
47 #include "hw/pci/pci.h"
48 
49 #include "exec/address-spaces.h"
50 #include "hw/usb.h"
51 #include "qemu/config-file.h"
52 
53 #include <libfdt.h>
54 
55 /* SLOF memory layout:
56  *
57  * SLOF raw image loaded at 0, copies its romfs right below the flat
58  * device-tree, then position SLOF itself 31M below that
59  *
60  * So we set FW_OVERHEAD to 40MB which should account for all of that
61  * and more
62  *
63  * We load our kernel at 4M, leaving space for SLOF initial image
64  */
65 #define FDT_MAX_SIZE            0x10000
66 #define RTAS_MAX_SIZE           0x10000
67 #define FW_MAX_SIZE             0x400000
68 #define FW_FILE_NAME            "slof.bin"
69 #define FW_OVERHEAD             0x2800000
70 #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
71 
72 #define MIN_RMA_SLOF            128UL
73 
74 #define TIMEBASE_FREQ           512000000ULL
75 
76 #define MAX_CPUS                256
77 #define XICS_IRQS               1024
78 
79 #define PHANDLE_XICP            0x00001111
80 
81 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
82 
83 sPAPREnvironment *spapr;
84 
85 int spapr_allocate_irq(int hint, bool lsi)
86 {
87     int irq;
88 
89     if (hint) {
90         irq = hint;
91         if (hint >= spapr->next_irq) {
92             spapr->next_irq = hint + 1;
93         }
94         /* FIXME: we should probably check for collisions somehow */
95     } else {
96         irq = spapr->next_irq++;
97     }
98 
99     /* Configure irq type */
100     if (!xics_get_qirq(spapr->icp, irq)) {
101         return 0;
102     }
103 
104     xics_set_irq_type(spapr->icp, irq, lsi);
105 
106     return irq;
107 }
108 
109 /*
110  * Allocate block of consequtive IRQs, returns a number of the first.
111  * If msi==true, aligns the first IRQ number to num.
112  */
113 int spapr_allocate_irq_block(int num, bool lsi, bool msi)
114 {
115     int first = -1;
116     int i, hint = 0;
117 
118     /*
119      * MSIMesage::data is used for storing VIRQ so
120      * it has to be aligned to num to support multiple
121      * MSI vectors. MSI-X is not affected by this.
122      * The hint is used for the first IRQ, the rest should
123      * be allocated continously.
124      */
125     if (msi) {
126         assert((num == 1) || (num == 2) || (num == 4) ||
127                (num == 8) || (num == 16) || (num == 32));
128         hint = (spapr->next_irq + num - 1) & ~(num - 1);
129     }
130 
131     for (i = 0; i < num; ++i) {
132         int irq;
133 
134         irq = spapr_allocate_irq(hint, lsi);
135         if (!irq) {
136             return -1;
137         }
138 
139         if (0 == i) {
140             first = irq;
141             hint = 0;
142         }
143 
144         /* If the above doesn't create a consecutive block then that's
145          * an internal bug */
146         assert(irq == (first + i));
147     }
148 
149     return first;
150 }
151 
152 static XICSState *try_create_xics(const char *type, int nr_servers,
153                                   int nr_irqs)
154 {
155     DeviceState *dev;
156 
157     dev = qdev_create(NULL, type);
158     qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
159     qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
160     if (qdev_init(dev) < 0) {
161         return NULL;
162     }
163 
164     return XICS(dev);
165 }
166 
167 static XICSState *xics_system_init(int nr_servers, int nr_irqs)
168 {
169     XICSState *icp = NULL;
170 
171     icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
172     if (!icp) {
173         perror("Failed to create XICS\n");
174         abort();
175     }
176 
177     return icp;
178 }
179 
180 static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
181 {
182     int ret = 0, offset;
183     CPUState *cpu;
184     char cpu_model[32];
185     int smt = kvmppc_smt_threads();
186     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
187 
188     assert(spapr->cpu_model);
189 
190     CPU_FOREACH(cpu) {
191         uint32_t associativity[] = {cpu_to_be32(0x5),
192                                     cpu_to_be32(0x0),
193                                     cpu_to_be32(0x0),
194                                     cpu_to_be32(0x0),
195                                     cpu_to_be32(cpu->numa_node),
196                                     cpu_to_be32(cpu->cpu_index)};
197 
198         if ((cpu->cpu_index % smt) != 0) {
199             continue;
200         }
201 
202         snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
203                  cpu->cpu_index);
204 
205         offset = fdt_path_offset(fdt, cpu_model);
206         if (offset < 0) {
207             return offset;
208         }
209 
210         if (nb_numa_nodes > 1) {
211             ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
212                               sizeof(associativity));
213             if (ret < 0) {
214                 return ret;
215             }
216         }
217 
218         ret = fdt_setprop(fdt, offset, "ibm,pft-size",
219                           pft_size_prop, sizeof(pft_size_prop));
220         if (ret < 0) {
221             return ret;
222         }
223     }
224     return ret;
225 }
226 
227 
228 static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
229                                      size_t maxsize)
230 {
231     size_t maxcells = maxsize / sizeof(uint32_t);
232     int i, j, count;
233     uint32_t *p = prop;
234 
235     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
236         struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
237 
238         if (!sps->page_shift) {
239             break;
240         }
241         for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
242             if (sps->enc[count].page_shift == 0) {
243                 break;
244             }
245         }
246         if ((p - prop) >= (maxcells - 3 - count * 2)) {
247             break;
248         }
249         *(p++) = cpu_to_be32(sps->page_shift);
250         *(p++) = cpu_to_be32(sps->slb_enc);
251         *(p++) = cpu_to_be32(count);
252         for (j = 0; j < count; j++) {
253             *(p++) = cpu_to_be32(sps->enc[j].page_shift);
254             *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
255         }
256     }
257 
258     return (p - prop) * sizeof(uint32_t);
259 }
260 
261 #define _FDT(exp) \
262     do { \
263         int ret = (exp);                                           \
264         if (ret < 0) {                                             \
265             fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
266                     #exp, fdt_strerror(ret));                      \
267             exit(1);                                               \
268         }                                                          \
269     } while (0)
270 
271 
272 static void *spapr_create_fdt_skel(const char *cpu_model,
273                                    hwaddr initrd_base,
274                                    hwaddr initrd_size,
275                                    hwaddr kernel_size,
276                                    const char *boot_device,
277                                    const char *kernel_cmdline,
278                                    uint32_t epow_irq)
279 {
280     void *fdt;
281     CPUState *cs;
282     uint32_t start_prop = cpu_to_be32(initrd_base);
283     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
284     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
285         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
286     char qemu_hypertas_prop[] = "hcall-memop1";
287     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
288     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
289     char *modelname;
290     int i, smt = kvmppc_smt_threads();
291     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
292 
293     fdt = g_malloc0(FDT_MAX_SIZE);
294     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
295 
296     if (kernel_size) {
297         _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
298     }
299     if (initrd_size) {
300         _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
301     }
302     _FDT((fdt_finish_reservemap(fdt)));
303 
304     /* Root node */
305     _FDT((fdt_begin_node(fdt, "")));
306     _FDT((fdt_property_string(fdt, "device_type", "chrp")));
307     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
308     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
309 
310     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
311     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
312 
313     /* /chosen */
314     _FDT((fdt_begin_node(fdt, "chosen")));
315 
316     /* Set Form1_affinity */
317     _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
318 
319     _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
320     _FDT((fdt_property(fdt, "linux,initrd-start",
321                        &start_prop, sizeof(start_prop))));
322     _FDT((fdt_property(fdt, "linux,initrd-end",
323                        &end_prop, sizeof(end_prop))));
324     if (kernel_size) {
325         uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
326                               cpu_to_be64(kernel_size) };
327 
328         _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
329     }
330     if (boot_device) {
331         _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
332     }
333     _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
334     _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
335     _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
336 
337     _FDT((fdt_end_node(fdt)));
338 
339     /* cpus */
340     _FDT((fdt_begin_node(fdt, "cpus")));
341 
342     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
343     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
344 
345     modelname = g_strdup(cpu_model);
346 
347     for (i = 0; i < strlen(modelname); i++) {
348         modelname[i] = toupper(modelname[i]);
349     }
350 
351     /* This is needed during FDT finalization */
352     spapr->cpu_model = g_strdup(modelname);
353 
354     CPU_FOREACH(cs) {
355         PowerPCCPU *cpu = POWERPC_CPU(cs);
356         CPUPPCState *env = &cpu->env;
357         PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
358         int index = cs->cpu_index;
359         uint32_t servers_prop[smp_threads];
360         uint32_t gservers_prop[smp_threads * 2];
361         char *nodename;
362         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
363                            0xffffffff, 0xffffffff};
364         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
365         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
366         uint32_t page_sizes_prop[64];
367         size_t page_sizes_prop_size;
368 
369         if ((index % smt) != 0) {
370             continue;
371         }
372 
373         nodename = g_strdup_printf("%s@%x", modelname, index);
374 
375         _FDT((fdt_begin_node(fdt, nodename)));
376 
377         g_free(nodename);
378 
379         _FDT((fdt_property_cell(fdt, "reg", index)));
380         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
381 
382         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
383         _FDT((fdt_property_cell(fdt, "d-cache-block-size",
384                                 env->dcache_line_size)));
385         _FDT((fdt_property_cell(fdt, "d-cache-line-size",
386                                 env->dcache_line_size)));
387         _FDT((fdt_property_cell(fdt, "i-cache-block-size",
388                                 env->icache_line_size)));
389         _FDT((fdt_property_cell(fdt, "i-cache-line-size",
390                                 env->icache_line_size)));
391 
392         if (pcc->l1_dcache_size) {
393             _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
394         } else {
395             fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
396         }
397         if (pcc->l1_icache_size) {
398             _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
399         } else {
400             fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
401         }
402 
403         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
404         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
405         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
406         _FDT((fdt_property_string(fdt, "status", "okay")));
407         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
408 
409         /* Build interrupt servers and gservers properties */
410         for (i = 0; i < smp_threads; i++) {
411             servers_prop[i] = cpu_to_be32(index + i);
412             /* Hack, direct the group queues back to cpu 0 */
413             gservers_prop[i*2] = cpu_to_be32(index + i);
414             gservers_prop[i*2 + 1] = 0;
415         }
416         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
417                            servers_prop, sizeof(servers_prop))));
418         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
419                            gservers_prop, sizeof(gservers_prop))));
420 
421         if (env->mmu_model & POWERPC_MMU_1TSEG) {
422             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
423                                segs, sizeof(segs))));
424         }
425 
426         /* Advertise VMX/VSX (vector extensions) if available
427          *   0 / no property == no vector extensions
428          *   1               == VMX / Altivec available
429          *   2               == VSX available */
430         if (env->insns_flags & PPC_ALTIVEC) {
431             uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
432 
433             _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
434         }
435 
436         /* Advertise DFP (Decimal Floating Point) if available
437          *   0 / no property == no DFP
438          *   1               == DFP available */
439         if (env->insns_flags2 & PPC2_DFP) {
440             _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
441         }
442 
443         page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
444                                                       sizeof(page_sizes_prop));
445         if (page_sizes_prop_size) {
446             _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
447                                page_sizes_prop, page_sizes_prop_size)));
448         }
449 
450         _FDT((fdt_end_node(fdt)));
451     }
452 
453     g_free(modelname);
454 
455     _FDT((fdt_end_node(fdt)));
456 
457     /* RTAS */
458     _FDT((fdt_begin_node(fdt, "rtas")));
459 
460     _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
461                        sizeof(hypertas_prop))));
462     _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
463                        sizeof(qemu_hypertas_prop))));
464 
465     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
466         refpoints, sizeof(refpoints))));
467 
468     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
469 
470     _FDT((fdt_end_node(fdt)));
471 
472     /* interrupt controller */
473     _FDT((fdt_begin_node(fdt, "interrupt-controller")));
474 
475     _FDT((fdt_property_string(fdt, "device_type",
476                               "PowerPC-External-Interrupt-Presentation")));
477     _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
478     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
479     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
480                        interrupt_server_ranges_prop,
481                        sizeof(interrupt_server_ranges_prop))));
482     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
483     _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
484     _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
485 
486     _FDT((fdt_end_node(fdt)));
487 
488     /* vdevice */
489     _FDT((fdt_begin_node(fdt, "vdevice")));
490 
491     _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
492     _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
493     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
494     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
495     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
496     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
497 
498     _FDT((fdt_end_node(fdt)));
499 
500     /* event-sources */
501     spapr_events_fdt_skel(fdt, epow_irq);
502 
503     _FDT((fdt_end_node(fdt))); /* close root node */
504     _FDT((fdt_finish(fdt)));
505 
506     return fdt;
507 }
508 
509 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
510 {
511     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
512                                 cpu_to_be32(0x0), cpu_to_be32(0x0),
513                                 cpu_to_be32(0x0)};
514     char mem_name[32];
515     hwaddr node0_size, mem_start;
516     uint64_t mem_reg_property[2];
517     int i, off;
518 
519     /* memory node(s) */
520     node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
521     if (spapr->rma_size > node0_size) {
522         spapr->rma_size = node0_size;
523     }
524 
525     /* RMA */
526     mem_reg_property[0] = 0;
527     mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
528     off = fdt_add_subnode(fdt, 0, "memory@0");
529     _FDT(off);
530     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
531     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
532                       sizeof(mem_reg_property))));
533     _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
534                       sizeof(associativity))));
535 
536     /* RAM: Node 0 */
537     if (node0_size > spapr->rma_size) {
538         mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
539         mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
540 
541         sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
542         off = fdt_add_subnode(fdt, 0, mem_name);
543         _FDT(off);
544         _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
545         _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
546                           sizeof(mem_reg_property))));
547         _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
548                           sizeof(associativity))));
549     }
550 
551     /* RAM: Node 1 and beyond */
552     mem_start = node0_size;
553     for (i = 1; i < nb_numa_nodes; i++) {
554         mem_reg_property[0] = cpu_to_be64(mem_start);
555         mem_reg_property[1] = cpu_to_be64(node_mem[i]);
556         associativity[3] = associativity[4] = cpu_to_be32(i);
557         sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
558         off = fdt_add_subnode(fdt, 0, mem_name);
559         _FDT(off);
560         _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
561         _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
562                           sizeof(mem_reg_property))));
563         _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
564                           sizeof(associativity))));
565         mem_start += node_mem[i];
566     }
567 
568     return 0;
569 }
570 
571 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
572                                hwaddr fdt_addr,
573                                hwaddr rtas_addr,
574                                hwaddr rtas_size)
575 {
576     int ret;
577     void *fdt;
578     sPAPRPHBState *phb;
579 
580     fdt = g_malloc(FDT_MAX_SIZE);
581 
582     /* open out the base tree into a temp buffer for the final tweaks */
583     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
584 
585     ret = spapr_populate_memory(spapr, fdt);
586     if (ret < 0) {
587         fprintf(stderr, "couldn't setup memory nodes in fdt\n");
588         exit(1);
589     }
590 
591     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
592     if (ret < 0) {
593         fprintf(stderr, "couldn't setup vio devices in fdt\n");
594         exit(1);
595     }
596 
597     QLIST_FOREACH(phb, &spapr->phbs, list) {
598         ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
599     }
600 
601     if (ret < 0) {
602         fprintf(stderr, "couldn't setup PCI devices in fdt\n");
603         exit(1);
604     }
605 
606     /* RTAS */
607     ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
608     if (ret < 0) {
609         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
610     }
611 
612     /* Advertise NUMA via ibm,associativity */
613     ret = spapr_fixup_cpu_dt(fdt, spapr);
614     if (ret < 0) {
615         fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
616     }
617 
618     if (!spapr->has_graphics) {
619         spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
620     }
621 
622     _FDT((fdt_pack(fdt)));
623 
624     if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
625         hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
626                  fdt_totalsize(fdt), FDT_MAX_SIZE);
627         exit(1);
628     }
629 
630     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
631 
632     g_free(fdt);
633 }
634 
635 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
636 {
637     return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
638 }
639 
640 static void emulate_spapr_hypercall(PowerPCCPU *cpu)
641 {
642     CPUPPCState *env = &cpu->env;
643 
644     if (msr_pr) {
645         hcall_dprintf("Hypercall made with MSR[PR]=1\n");
646         env->gpr[3] = H_PRIVILEGE;
647     } else {
648         env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
649     }
650 }
651 
652 static void spapr_reset_htab(sPAPREnvironment *spapr)
653 {
654     long shift;
655 
656     /* allocate hash page table.  For now we always make this 16mb,
657      * later we should probably make it scale to the size of guest
658      * RAM */
659 
660     shift = kvmppc_reset_htab(spapr->htab_shift);
661 
662     if (shift > 0) {
663         /* Kernel handles htab, we don't need to allocate one */
664         spapr->htab_shift = shift;
665     } else {
666         if (!spapr->htab) {
667             /* Allocate an htab if we don't yet have one */
668             spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
669         }
670 
671         /* And clear it */
672         memset(spapr->htab, 0, HTAB_SIZE(spapr));
673     }
674 
675     /* Update the RMA size if necessary */
676     if (spapr->vrma_adjust) {
677         spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
678     }
679 }
680 
681 static void ppc_spapr_reset(void)
682 {
683     PowerPCCPU *first_ppc_cpu;
684 
685     /* Reset the hash table & recalc the RMA */
686     spapr_reset_htab(spapr);
687 
688     qemu_devices_reset();
689 
690     /* Load the fdt */
691     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
692                        spapr->rtas_size);
693 
694     /* Set up the entry state */
695     first_ppc_cpu = POWERPC_CPU(first_cpu);
696     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
697     first_ppc_cpu->env.gpr[5] = 0;
698     first_cpu->halted = 0;
699     first_ppc_cpu->env.nip = spapr->entry_point;
700 
701 }
702 
703 static void spapr_cpu_reset(void *opaque)
704 {
705     PowerPCCPU *cpu = opaque;
706     CPUState *cs = CPU(cpu);
707     CPUPPCState *env = &cpu->env;
708 
709     cpu_reset(cs);
710 
711     /* All CPUs start halted.  CPU0 is unhalted from the machine level
712      * reset code and the rest are explicitly started up by the guest
713      * using an RTAS call */
714     cs->halted = 1;
715 
716     env->spr[SPR_HIOR] = 0;
717 
718     env->external_htab = (uint8_t *)spapr->htab;
719     env->htab_base = -1;
720     env->htab_mask = HTAB_SIZE(spapr) - 1;
721     env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
722         (spapr->htab_shift - 18);
723 }
724 
725 static void spapr_create_nvram(sPAPREnvironment *spapr)
726 {
727     DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
728     const char *drivename = qemu_opt_get(qemu_get_machine_opts(), "nvram");
729 
730     if (drivename) {
731         BlockDriverState *bs;
732 
733         bs = bdrv_find(drivename);
734         if (!bs) {
735             fprintf(stderr, "No such block device \"%s\" for nvram\n",
736                     drivename);
737             exit(1);
738         }
739         qdev_prop_set_drive_nofail(dev, "drive", bs);
740     }
741 
742     qdev_init_nofail(dev);
743 
744     spapr->nvram = (struct sPAPRNVRAM *)dev;
745 }
746 
747 /* Returns whether we want to use VGA or not */
748 static int spapr_vga_init(PCIBus *pci_bus)
749 {
750     switch (vga_interface_type) {
751     case VGA_NONE:
752     case VGA_STD:
753         return pci_vga_init(pci_bus) != NULL;
754     default:
755         fprintf(stderr, "This vga model is not supported,"
756                 "currently it only supports -vga std\n");
757         exit(0);
758         break;
759     }
760 }
761 
762 static const VMStateDescription vmstate_spapr = {
763     .name = "spapr",
764     .version_id = 1,
765     .minimum_version_id = 1,
766     .minimum_version_id_old = 1,
767     .fields      = (VMStateField []) {
768         VMSTATE_UINT32(next_irq, sPAPREnvironment),
769 
770         /* RTC offset */
771         VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
772 
773         VMSTATE_END_OF_LIST()
774     },
775 };
776 
777 #define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
778 #define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
779 #define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
780 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
781 
782 static int htab_save_setup(QEMUFile *f, void *opaque)
783 {
784     sPAPREnvironment *spapr = opaque;
785 
786     /* "Iteration" header */
787     qemu_put_be32(f, spapr->htab_shift);
788 
789     if (spapr->htab) {
790         spapr->htab_save_index = 0;
791         spapr->htab_first_pass = true;
792     } else {
793         assert(kvm_enabled());
794 
795         spapr->htab_fd = kvmppc_get_htab_fd(false);
796         if (spapr->htab_fd < 0) {
797             fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
798                     strerror(errno));
799             return -1;
800         }
801     }
802 
803 
804     return 0;
805 }
806 
807 static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
808                                  int64_t max_ns)
809 {
810     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
811     int index = spapr->htab_save_index;
812     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
813 
814     assert(spapr->htab_first_pass);
815 
816     do {
817         int chunkstart;
818 
819         /* Consume invalid HPTEs */
820         while ((index < htabslots)
821                && !HPTE_VALID(HPTE(spapr->htab, index))) {
822             index++;
823             CLEAN_HPTE(HPTE(spapr->htab, index));
824         }
825 
826         /* Consume valid HPTEs */
827         chunkstart = index;
828         while ((index < htabslots)
829                && HPTE_VALID(HPTE(spapr->htab, index))) {
830             index++;
831             CLEAN_HPTE(HPTE(spapr->htab, index));
832         }
833 
834         if (index > chunkstart) {
835             int n_valid = index - chunkstart;
836 
837             qemu_put_be32(f, chunkstart);
838             qemu_put_be16(f, n_valid);
839             qemu_put_be16(f, 0);
840             qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
841                             HASH_PTE_SIZE_64 * n_valid);
842 
843             if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
844                 break;
845             }
846         }
847     } while ((index < htabslots) && !qemu_file_rate_limit(f));
848 
849     if (index >= htabslots) {
850         assert(index == htabslots);
851         index = 0;
852         spapr->htab_first_pass = false;
853     }
854     spapr->htab_save_index = index;
855 }
856 
857 static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
858                                 int64_t max_ns)
859 {
860     bool final = max_ns < 0;
861     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
862     int examined = 0, sent = 0;
863     int index = spapr->htab_save_index;
864     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
865 
866     assert(!spapr->htab_first_pass);
867 
868     do {
869         int chunkstart, invalidstart;
870 
871         /* Consume non-dirty HPTEs */
872         while ((index < htabslots)
873                && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
874             index++;
875             examined++;
876         }
877 
878         chunkstart = index;
879         /* Consume valid dirty HPTEs */
880         while ((index < htabslots)
881                && HPTE_DIRTY(HPTE(spapr->htab, index))
882                && HPTE_VALID(HPTE(spapr->htab, index))) {
883             CLEAN_HPTE(HPTE(spapr->htab, index));
884             index++;
885             examined++;
886         }
887 
888         invalidstart = index;
889         /* Consume invalid dirty HPTEs */
890         while ((index < htabslots)
891                && HPTE_DIRTY(HPTE(spapr->htab, index))
892                && !HPTE_VALID(HPTE(spapr->htab, index))) {
893             CLEAN_HPTE(HPTE(spapr->htab, index));
894             index++;
895             examined++;
896         }
897 
898         if (index > chunkstart) {
899             int n_valid = invalidstart - chunkstart;
900             int n_invalid = index - invalidstart;
901 
902             qemu_put_be32(f, chunkstart);
903             qemu_put_be16(f, n_valid);
904             qemu_put_be16(f, n_invalid);
905             qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
906                             HASH_PTE_SIZE_64 * n_valid);
907             sent += index - chunkstart;
908 
909             if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
910                 break;
911             }
912         }
913 
914         if (examined >= htabslots) {
915             break;
916         }
917 
918         if (index >= htabslots) {
919             assert(index == htabslots);
920             index = 0;
921         }
922     } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
923 
924     if (index >= htabslots) {
925         assert(index == htabslots);
926         index = 0;
927     }
928 
929     spapr->htab_save_index = index;
930 
931     return (examined >= htabslots) && (sent == 0) ? 1 : 0;
932 }
933 
934 #define MAX_ITERATION_NS    5000000 /* 5 ms */
935 #define MAX_KVM_BUF_SIZE    2048
936 
937 static int htab_save_iterate(QEMUFile *f, void *opaque)
938 {
939     sPAPREnvironment *spapr = opaque;
940     int rc = 0;
941 
942     /* Iteration header */
943     qemu_put_be32(f, 0);
944 
945     if (!spapr->htab) {
946         assert(kvm_enabled());
947 
948         rc = kvmppc_save_htab(f, spapr->htab_fd,
949                               MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
950         if (rc < 0) {
951             return rc;
952         }
953     } else  if (spapr->htab_first_pass) {
954         htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
955     } else {
956         rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
957     }
958 
959     /* End marker */
960     qemu_put_be32(f, 0);
961     qemu_put_be16(f, 0);
962     qemu_put_be16(f, 0);
963 
964     return rc;
965 }
966 
967 static int htab_save_complete(QEMUFile *f, void *opaque)
968 {
969     sPAPREnvironment *spapr = opaque;
970 
971     /* Iteration header */
972     qemu_put_be32(f, 0);
973 
974     if (!spapr->htab) {
975         int rc;
976 
977         assert(kvm_enabled());
978 
979         rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
980         if (rc < 0) {
981             return rc;
982         }
983         close(spapr->htab_fd);
984         spapr->htab_fd = -1;
985     } else {
986         htab_save_later_pass(f, spapr, -1);
987     }
988 
989     /* End marker */
990     qemu_put_be32(f, 0);
991     qemu_put_be16(f, 0);
992     qemu_put_be16(f, 0);
993 
994     return 0;
995 }
996 
997 static int htab_load(QEMUFile *f, void *opaque, int version_id)
998 {
999     sPAPREnvironment *spapr = opaque;
1000     uint32_t section_hdr;
1001     int fd = -1;
1002 
1003     if (version_id < 1 || version_id > 1) {
1004         fprintf(stderr, "htab_load() bad version\n");
1005         return -EINVAL;
1006     }
1007 
1008     section_hdr = qemu_get_be32(f);
1009 
1010     if (section_hdr) {
1011         /* First section, just the hash shift */
1012         if (spapr->htab_shift != section_hdr) {
1013             return -EINVAL;
1014         }
1015         return 0;
1016     }
1017 
1018     if (!spapr->htab) {
1019         assert(kvm_enabled());
1020 
1021         fd = kvmppc_get_htab_fd(true);
1022         if (fd < 0) {
1023             fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
1024                     strerror(errno));
1025         }
1026     }
1027 
1028     while (true) {
1029         uint32_t index;
1030         uint16_t n_valid, n_invalid;
1031 
1032         index = qemu_get_be32(f);
1033         n_valid = qemu_get_be16(f);
1034         n_invalid = qemu_get_be16(f);
1035 
1036         if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
1037             /* End of Stream */
1038             break;
1039         }
1040 
1041         if ((index + n_valid + n_invalid) >
1042             (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
1043             /* Bad index in stream */
1044             fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
1045                     "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
1046                     spapr->htab_shift);
1047             return -EINVAL;
1048         }
1049 
1050         if (spapr->htab) {
1051             if (n_valid) {
1052                 qemu_get_buffer(f, HPTE(spapr->htab, index),
1053                                 HASH_PTE_SIZE_64 * n_valid);
1054             }
1055             if (n_invalid) {
1056                 memset(HPTE(spapr->htab, index + n_valid), 0,
1057                        HASH_PTE_SIZE_64 * n_invalid);
1058             }
1059         } else {
1060             int rc;
1061 
1062             assert(fd >= 0);
1063 
1064             rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
1065             if (rc < 0) {
1066                 return rc;
1067             }
1068         }
1069     }
1070 
1071     if (!spapr->htab) {
1072         assert(fd >= 0);
1073         close(fd);
1074     }
1075 
1076     return 0;
1077 }
1078 
1079 static SaveVMHandlers savevm_htab_handlers = {
1080     .save_live_setup = htab_save_setup,
1081     .save_live_iterate = htab_save_iterate,
1082     .save_live_complete = htab_save_complete,
1083     .load_state = htab_load,
1084 };
1085 
1086 /* pSeries LPAR / sPAPR hardware init */
1087 static void ppc_spapr_init(QEMUMachineInitArgs *args)
1088 {
1089     ram_addr_t ram_size = args->ram_size;
1090     const char *cpu_model = args->cpu_model;
1091     const char *kernel_filename = args->kernel_filename;
1092     const char *kernel_cmdline = args->kernel_cmdline;
1093     const char *initrd_filename = args->initrd_filename;
1094     const char *boot_device = args->boot_order;
1095     PowerPCCPU *cpu;
1096     CPUPPCState *env;
1097     PCIHostState *phb;
1098     int i;
1099     MemoryRegion *sysmem = get_system_memory();
1100     MemoryRegion *ram = g_new(MemoryRegion, 1);
1101     hwaddr rma_alloc_size;
1102     uint32_t initrd_base = 0;
1103     long kernel_size = 0, initrd_size = 0;
1104     long load_limit, rtas_limit, fw_size;
1105     char *filename;
1106 
1107     msi_supported = true;
1108 
1109     spapr = g_malloc0(sizeof(*spapr));
1110     QLIST_INIT(&spapr->phbs);
1111 
1112     cpu_ppc_hypercall = emulate_spapr_hypercall;
1113 
1114     /* Allocate RMA if necessary */
1115     rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
1116 
1117     if (rma_alloc_size == -1) {
1118         hw_error("qemu: Unable to create RMA\n");
1119         exit(1);
1120     }
1121 
1122     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
1123         spapr->rma_size = rma_alloc_size;
1124     } else {
1125         spapr->rma_size = ram_size;
1126 
1127         /* With KVM, we don't actually know whether KVM supports an
1128          * unbounded RMA (PR KVM) or is limited by the hash table size
1129          * (HV KVM using VRMA), so we always assume the latter
1130          *
1131          * In that case, we also limit the initial allocations for RTAS
1132          * etc... to 256M since we have no way to know what the VRMA size
1133          * is going to be as it depends on the size of the hash table
1134          * isn't determined yet.
1135          */
1136         if (kvm_enabled()) {
1137             spapr->vrma_adjust = 1;
1138             spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
1139         }
1140     }
1141 
1142     /* We place the device tree and RTAS just below either the top of the RMA,
1143      * or just below 2GB, whichever is lowere, so that it can be
1144      * processed with 32-bit real mode code if necessary */
1145     rtas_limit = MIN(spapr->rma_size, 0x80000000);
1146     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
1147     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
1148     load_limit = spapr->fdt_addr - FW_OVERHEAD;
1149 
1150     /* We aim for a hash table of size 1/128 the size of RAM.  The
1151      * normal rule of thumb is 1/64 the size of RAM, but that's much
1152      * more than needed for the Linux guests we support. */
1153     spapr->htab_shift = 18; /* Minimum architected size */
1154     while (spapr->htab_shift <= 46) {
1155         if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
1156             break;
1157         }
1158         spapr->htab_shift++;
1159     }
1160 
1161     /* Set up Interrupt Controller before we create the VCPUs */
1162     spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
1163                                   XICS_IRQS);
1164     spapr->next_irq = XICS_IRQ_BASE;
1165 
1166     /* init CPUs */
1167     if (cpu_model == NULL) {
1168         cpu_model = kvm_enabled() ? "host" : "POWER7";
1169     }
1170     for (i = 0; i < smp_cpus; i++) {
1171         cpu = cpu_ppc_init(cpu_model);
1172         if (cpu == NULL) {
1173             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
1174             exit(1);
1175         }
1176         env = &cpu->env;
1177 
1178         xics_cpu_setup(spapr->icp, cpu);
1179 
1180         /* Set time-base frequency to 512 MHz */
1181         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
1182 
1183         /* PAPR always has exception vectors in RAM not ROM. To ensure this,
1184          * MSR[IP] should never be set.
1185          */
1186         env->msr_mask &= ~(1 << 6);
1187 
1188         /* Tell KVM that we're in PAPR mode */
1189         if (kvm_enabled()) {
1190             kvmppc_set_papr(cpu);
1191         }
1192 
1193         qemu_register_reset(spapr_cpu_reset, cpu);
1194     }
1195 
1196     /* allocate RAM */
1197     spapr->ram_limit = ram_size;
1198     if (spapr->ram_limit > rma_alloc_size) {
1199         ram_addr_t nonrma_base = rma_alloc_size;
1200         ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
1201 
1202         memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size);
1203         vmstate_register_ram_global(ram);
1204         memory_region_add_subregion(sysmem, nonrma_base, ram);
1205     }
1206 
1207     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1208     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
1209                                            rtas_limit - spapr->rtas_addr);
1210     if (spapr->rtas_size < 0) {
1211         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
1212         exit(1);
1213     }
1214     if (spapr->rtas_size > RTAS_MAX_SIZE) {
1215         hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
1216                  spapr->rtas_size, RTAS_MAX_SIZE);
1217         exit(1);
1218     }
1219     g_free(filename);
1220 
1221     /* Set up EPOW events infrastructure */
1222     spapr_events_init(spapr);
1223 
1224     /* Set up VIO bus */
1225     spapr->vio_bus = spapr_vio_bus_init();
1226 
1227     for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1228         if (serial_hds[i]) {
1229             spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1230         }
1231     }
1232 
1233     /* We always have at least the nvram device on VIO */
1234     spapr_create_nvram(spapr);
1235 
1236     /* Set up PCI */
1237     spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
1238     spapr_pci_rtas_init();
1239 
1240     phb = spapr_create_phb(spapr, 0);
1241 
1242     for (i = 0; i < nb_nics; i++) {
1243         NICInfo *nd = &nd_table[i];
1244 
1245         if (!nd->model) {
1246             nd->model = g_strdup("ibmveth");
1247         }
1248 
1249         if (strcmp(nd->model, "ibmveth") == 0) {
1250             spapr_vlan_create(spapr->vio_bus, nd);
1251         } else {
1252             pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1253         }
1254     }
1255 
1256     for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1257         spapr_vscsi_create(spapr->vio_bus);
1258     }
1259 
1260     /* Graphics */
1261     if (spapr_vga_init(phb->bus)) {
1262         spapr->has_graphics = true;
1263     }
1264 
1265     if (usb_enabled(spapr->has_graphics)) {
1266         pci_create_simple(phb->bus, -1, "pci-ohci");
1267         if (spapr->has_graphics) {
1268             usbdevice_create("keyboard");
1269             usbdevice_create("mouse");
1270         }
1271     }
1272 
1273     if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1274         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
1275                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
1276         exit(1);
1277     }
1278 
1279     if (kernel_filename) {
1280         uint64_t lowaddr = 0;
1281 
1282         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
1283                                NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
1284         if (kernel_size < 0) {
1285             kernel_size = load_image_targphys(kernel_filename,
1286                                               KERNEL_LOAD_ADDR,
1287                                               load_limit - KERNEL_LOAD_ADDR);
1288         }
1289         if (kernel_size < 0) {
1290             fprintf(stderr, "qemu: could not load kernel '%s'\n",
1291                     kernel_filename);
1292             exit(1);
1293         }
1294 
1295         /* load initrd */
1296         if (initrd_filename) {
1297             /* Try to locate the initrd in the gap between the kernel
1298              * and the firmware. Add a bit of space just in case
1299              */
1300             initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1301             initrd_size = load_image_targphys(initrd_filename, initrd_base,
1302                                               load_limit - initrd_base);
1303             if (initrd_size < 0) {
1304                 fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
1305                         initrd_filename);
1306                 exit(1);
1307             }
1308         } else {
1309             initrd_base = 0;
1310             initrd_size = 0;
1311         }
1312     }
1313 
1314     if (bios_name == NULL) {
1315         bios_name = FW_FILE_NAME;
1316     }
1317     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
1318     fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
1319     if (fw_size < 0) {
1320         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
1321         exit(1);
1322     }
1323     g_free(filename);
1324 
1325     spapr->entry_point = 0x100;
1326 
1327     vmstate_register(NULL, 0, &vmstate_spapr, spapr);
1328     register_savevm_live(NULL, "spapr/htab", -1, 1,
1329                          &savevm_htab_handlers, spapr);
1330 
1331     /* Prepare the device tree */
1332     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
1333                                             initrd_base, initrd_size,
1334                                             kernel_size,
1335                                             boot_device, kernel_cmdline,
1336                                             spapr->epow_irq);
1337     assert(spapr->fdt_skel != NULL);
1338 }
1339 
1340 static QEMUMachine spapr_machine = {
1341     .name = "pseries",
1342     .desc = "pSeries Logical Partition (PAPR compliant)",
1343     .is_default = 1,
1344     .init = ppc_spapr_init,
1345     .reset = ppc_spapr_reset,
1346     .block_default_type = IF_SCSI,
1347     .max_cpus = MAX_CPUS,
1348     .no_parallel = 1,
1349     .default_boot_order = NULL,
1350 };
1351 
1352 static void spapr_machine_init(void)
1353 {
1354     qemu_register_machine(&spapr_machine);
1355 }
1356 
1357 machine_init(spapr_machine_init);
1358