xref: /openbmc/qemu/hw/ppc/spapr.c (revision 8ffe04ed2ed44b32f97575bc3cb7c29eefdd70da)
1 /*
2  * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3  *
4  * Copyright (c) 2004-2007 Fabrice Bellard
5  * Copyright (c) 2007 Jocelyn Mayer
6  * Copyright (c) 2010 David Gibson, IBM Corporation.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  *
26  */
27 #include "sysemu/sysemu.h"
28 #include "hw/hw.h"
29 #include "elf.h"
30 #include "net/net.h"
31 #include "sysemu/blockdev.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/kvm.h"
34 #include "kvm_ppc.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/boards.h"
38 #include "hw/ppc/ppc.h"
39 #include "hw/loader.h"
40 
41 #include "hw/ppc/spapr.h"
42 #include "hw/ppc/spapr_vio.h"
43 #include "hw/pci-host/spapr.h"
44 #include "hw/ppc/xics.h"
45 #include "hw/pci/msi.h"
46 
47 #include "hw/pci/pci.h"
48 
49 #include "exec/address-spaces.h"
50 #include "hw/usb.h"
51 #include "qemu/config-file.h"
52 
53 #include <libfdt.h>
54 
55 /* SLOF memory layout:
56  *
57  * SLOF raw image loaded at 0, copies its romfs right below the flat
58  * device-tree, then position SLOF itself 31M below that
59  *
60  * So we set FW_OVERHEAD to 40MB which should account for all of that
61  * and more
62  *
63  * We load our kernel at 4M, leaving space for SLOF initial image
64  */
65 #define FDT_MAX_SIZE            0x40000
66 #define RTAS_MAX_SIZE           0x10000
67 #define FW_MAX_SIZE             0x400000
68 #define FW_FILE_NAME            "slof.bin"
69 #define FW_OVERHEAD             0x2800000
70 #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
71 
72 #define MIN_RMA_SLOF            128UL
73 
74 #define TIMEBASE_FREQ           512000000ULL
75 
76 #define MAX_CPUS                256
77 #define XICS_IRQS               1024
78 
79 #define PHANDLE_XICP            0x00001111
80 
81 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
82 
83 sPAPREnvironment *spapr;
84 
85 int spapr_allocate_irq(int hint, bool lsi)
86 {
87     int irq;
88 
89     if (hint) {
90         irq = hint;
91         if (hint >= spapr->next_irq) {
92             spapr->next_irq = hint + 1;
93         }
94         /* FIXME: we should probably check for collisions somehow */
95     } else {
96         irq = spapr->next_irq++;
97     }
98 
99     /* Configure irq type */
100     if (!xics_get_qirq(spapr->icp, irq)) {
101         return 0;
102     }
103 
104     xics_set_irq_type(spapr->icp, irq, lsi);
105 
106     return irq;
107 }
108 
109 /*
110  * Allocate block of consequtive IRQs, returns a number of the first.
111  * If msi==true, aligns the first IRQ number to num.
112  */
113 int spapr_allocate_irq_block(int num, bool lsi, bool msi)
114 {
115     int first = -1;
116     int i, hint = 0;
117 
118     /*
119      * MSIMesage::data is used for storing VIRQ so
120      * it has to be aligned to num to support multiple
121      * MSI vectors. MSI-X is not affected by this.
122      * The hint is used for the first IRQ, the rest should
123      * be allocated continously.
124      */
125     if (msi) {
126         assert((num == 1) || (num == 2) || (num == 4) ||
127                (num == 8) || (num == 16) || (num == 32));
128         hint = (spapr->next_irq + num - 1) & ~(num - 1);
129     }
130 
131     for (i = 0; i < num; ++i) {
132         int irq;
133 
134         irq = spapr_allocate_irq(hint, lsi);
135         if (!irq) {
136             return -1;
137         }
138 
139         if (0 == i) {
140             first = irq;
141             hint = 0;
142         }
143 
144         /* If the above doesn't create a consecutive block then that's
145          * an internal bug */
146         assert(irq == (first + i));
147     }
148 
149     return first;
150 }
151 
152 static XICSState *try_create_xics(const char *type, int nr_servers,
153                                   int nr_irqs)
154 {
155     DeviceState *dev;
156 
157     dev = qdev_create(NULL, type);
158     qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
159     qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
160     if (qdev_init(dev) < 0) {
161         return NULL;
162     }
163 
164     return XICS(dev);
165 }
166 
167 static XICSState *xics_system_init(int nr_servers, int nr_irqs)
168 {
169     XICSState *icp = NULL;
170 
171     icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
172     if (!icp) {
173         perror("Failed to create XICS\n");
174         abort();
175     }
176 
177     return icp;
178 }
179 
180 static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
181 {
182     int ret = 0, offset;
183     CPUState *cpu;
184     char cpu_model[32];
185     int smt = kvmppc_smt_threads();
186     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
187 
188     assert(spapr->cpu_model);
189 
190     CPU_FOREACH(cpu) {
191         uint32_t associativity[] = {cpu_to_be32(0x5),
192                                     cpu_to_be32(0x0),
193                                     cpu_to_be32(0x0),
194                                     cpu_to_be32(0x0),
195                                     cpu_to_be32(cpu->numa_node),
196                                     cpu_to_be32(cpu->cpu_index)};
197 
198         if ((cpu->cpu_index % smt) != 0) {
199             continue;
200         }
201 
202         snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
203                  cpu->cpu_index);
204 
205         offset = fdt_path_offset(fdt, cpu_model);
206         if (offset < 0) {
207             return offset;
208         }
209 
210         if (nb_numa_nodes > 1) {
211             ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
212                               sizeof(associativity));
213             if (ret < 0) {
214                 return ret;
215             }
216         }
217 
218         ret = fdt_setprop(fdt, offset, "ibm,pft-size",
219                           pft_size_prop, sizeof(pft_size_prop));
220         if (ret < 0) {
221             return ret;
222         }
223     }
224     return ret;
225 }
226 
227 
228 static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
229                                      size_t maxsize)
230 {
231     size_t maxcells = maxsize / sizeof(uint32_t);
232     int i, j, count;
233     uint32_t *p = prop;
234 
235     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
236         struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
237 
238         if (!sps->page_shift) {
239             break;
240         }
241         for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
242             if (sps->enc[count].page_shift == 0) {
243                 break;
244             }
245         }
246         if ((p - prop) >= (maxcells - 3 - count * 2)) {
247             break;
248         }
249         *(p++) = cpu_to_be32(sps->page_shift);
250         *(p++) = cpu_to_be32(sps->slb_enc);
251         *(p++) = cpu_to_be32(count);
252         for (j = 0; j < count; j++) {
253             *(p++) = cpu_to_be32(sps->enc[j].page_shift);
254             *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
255         }
256     }
257 
258     return (p - prop) * sizeof(uint32_t);
259 }
260 
261 #define _FDT(exp) \
262     do { \
263         int ret = (exp);                                           \
264         if (ret < 0) {                                             \
265             fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
266                     #exp, fdt_strerror(ret));                      \
267             exit(1);                                               \
268         }                                                          \
269     } while (0)
270 
271 
272 static void *spapr_create_fdt_skel(const char *cpu_model,
273                                    hwaddr initrd_base,
274                                    hwaddr initrd_size,
275                                    hwaddr kernel_size,
276                                    bool little_endian,
277                                    const char *boot_device,
278                                    const char *kernel_cmdline,
279                                    uint32_t epow_irq)
280 {
281     void *fdt;
282     CPUState *cs;
283     uint32_t start_prop = cpu_to_be32(initrd_base);
284     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
285     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
286         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
287     char qemu_hypertas_prop[] = "hcall-memop1";
288     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
289     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
290     char *modelname;
291     int i, smt = kvmppc_smt_threads();
292     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
293 
294     fdt = g_malloc0(FDT_MAX_SIZE);
295     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
296 
297     if (kernel_size) {
298         _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
299     }
300     if (initrd_size) {
301         _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
302     }
303     _FDT((fdt_finish_reservemap(fdt)));
304 
305     /* Root node */
306     _FDT((fdt_begin_node(fdt, "")));
307     _FDT((fdt_property_string(fdt, "device_type", "chrp")));
308     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
309     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
310 
311     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
312     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
313 
314     /* /chosen */
315     _FDT((fdt_begin_node(fdt, "chosen")));
316 
317     /* Set Form1_affinity */
318     _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
319 
320     _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
321     _FDT((fdt_property(fdt, "linux,initrd-start",
322                        &start_prop, sizeof(start_prop))));
323     _FDT((fdt_property(fdt, "linux,initrd-end",
324                        &end_prop, sizeof(end_prop))));
325     if (kernel_size) {
326         uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
327                               cpu_to_be64(kernel_size) };
328 
329         _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
330         if (little_endian) {
331             _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
332         }
333     }
334     if (boot_device) {
335         _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
336     }
337     _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
338     _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
339     _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
340 
341     _FDT((fdt_end_node(fdt)));
342 
343     /* cpus */
344     _FDT((fdt_begin_node(fdt, "cpus")));
345 
346     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
347     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
348 
349     modelname = g_strdup(cpu_model);
350 
351     for (i = 0; i < strlen(modelname); i++) {
352         modelname[i] = toupper(modelname[i]);
353     }
354 
355     /* This is needed during FDT finalization */
356     spapr->cpu_model = g_strdup(modelname);
357 
358     CPU_FOREACH(cs) {
359         PowerPCCPU *cpu = POWERPC_CPU(cs);
360         CPUPPCState *env = &cpu->env;
361         PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
362         int index = cs->cpu_index;
363         uint32_t servers_prop[smp_threads];
364         uint32_t gservers_prop[smp_threads * 2];
365         char *nodename;
366         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
367                            0xffffffff, 0xffffffff};
368         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
369         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
370         uint32_t page_sizes_prop[64];
371         size_t page_sizes_prop_size;
372 
373         if ((index % smt) != 0) {
374             continue;
375         }
376 
377         nodename = g_strdup_printf("%s@%x", modelname, index);
378 
379         _FDT((fdt_begin_node(fdt, nodename)));
380 
381         g_free(nodename);
382 
383         _FDT((fdt_property_cell(fdt, "reg", index)));
384         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
385 
386         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
387         _FDT((fdt_property_cell(fdt, "d-cache-block-size",
388                                 env->dcache_line_size)));
389         _FDT((fdt_property_cell(fdt, "d-cache-line-size",
390                                 env->dcache_line_size)));
391         _FDT((fdt_property_cell(fdt, "i-cache-block-size",
392                                 env->icache_line_size)));
393         _FDT((fdt_property_cell(fdt, "i-cache-line-size",
394                                 env->icache_line_size)));
395 
396         if (pcc->l1_dcache_size) {
397             _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
398         } else {
399             fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
400         }
401         if (pcc->l1_icache_size) {
402             _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
403         } else {
404             fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
405         }
406 
407         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
408         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
409         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
410         _FDT((fdt_property_string(fdt, "status", "okay")));
411         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
412 
413         /* Build interrupt servers and gservers properties */
414         for (i = 0; i < smp_threads; i++) {
415             servers_prop[i] = cpu_to_be32(index + i);
416             /* Hack, direct the group queues back to cpu 0 */
417             gservers_prop[i*2] = cpu_to_be32(index + i);
418             gservers_prop[i*2 + 1] = 0;
419         }
420         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
421                            servers_prop, sizeof(servers_prop))));
422         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
423                            gservers_prop, sizeof(gservers_prop))));
424 
425         if (env->spr_cb[SPR_PURR].oea_read) {
426             _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
427         }
428 
429         if (env->mmu_model & POWERPC_MMU_1TSEG) {
430             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
431                                segs, sizeof(segs))));
432         }
433 
434         /* Advertise VMX/VSX (vector extensions) if available
435          *   0 / no property == no vector extensions
436          *   1               == VMX / Altivec available
437          *   2               == VSX available */
438         if (env->insns_flags & PPC_ALTIVEC) {
439             uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
440 
441             _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
442         }
443 
444         /* Advertise DFP (Decimal Floating Point) if available
445          *   0 / no property == no DFP
446          *   1               == DFP available */
447         if (env->insns_flags2 & PPC2_DFP) {
448             _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
449         }
450 
451         page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
452                                                       sizeof(page_sizes_prop));
453         if (page_sizes_prop_size) {
454             _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
455                                page_sizes_prop, page_sizes_prop_size)));
456         }
457 
458         _FDT((fdt_end_node(fdt)));
459     }
460 
461     g_free(modelname);
462 
463     _FDT((fdt_end_node(fdt)));
464 
465     /* RTAS */
466     _FDT((fdt_begin_node(fdt, "rtas")));
467 
468     _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
469                        sizeof(hypertas_prop))));
470     _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
471                        sizeof(qemu_hypertas_prop))));
472 
473     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
474         refpoints, sizeof(refpoints))));
475 
476     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
477 
478     _FDT((fdt_end_node(fdt)));
479 
480     /* interrupt controller */
481     _FDT((fdt_begin_node(fdt, "interrupt-controller")));
482 
483     _FDT((fdt_property_string(fdt, "device_type",
484                               "PowerPC-External-Interrupt-Presentation")));
485     _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
486     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
487     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
488                        interrupt_server_ranges_prop,
489                        sizeof(interrupt_server_ranges_prop))));
490     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
491     _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
492     _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
493 
494     _FDT((fdt_end_node(fdt)));
495 
496     /* vdevice */
497     _FDT((fdt_begin_node(fdt, "vdevice")));
498 
499     _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
500     _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
501     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
502     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
503     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
504     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
505 
506     _FDT((fdt_end_node(fdt)));
507 
508     /* event-sources */
509     spapr_events_fdt_skel(fdt, epow_irq);
510 
511     _FDT((fdt_end_node(fdt))); /* close root node */
512     _FDT((fdt_finish(fdt)));
513 
514     return fdt;
515 }
516 
517 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
518 {
519     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
520                                 cpu_to_be32(0x0), cpu_to_be32(0x0),
521                                 cpu_to_be32(0x0)};
522     char mem_name[32];
523     hwaddr node0_size, mem_start;
524     uint64_t mem_reg_property[2];
525     int i, off;
526 
527     /* memory node(s) */
528     node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
529     if (spapr->rma_size > node0_size) {
530         spapr->rma_size = node0_size;
531     }
532 
533     /* RMA */
534     mem_reg_property[0] = 0;
535     mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
536     off = fdt_add_subnode(fdt, 0, "memory@0");
537     _FDT(off);
538     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
539     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
540                       sizeof(mem_reg_property))));
541     _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
542                       sizeof(associativity))));
543 
544     /* RAM: Node 0 */
545     if (node0_size > spapr->rma_size) {
546         mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
547         mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
548 
549         sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
550         off = fdt_add_subnode(fdt, 0, mem_name);
551         _FDT(off);
552         _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
553         _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
554                           sizeof(mem_reg_property))));
555         _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
556                           sizeof(associativity))));
557     }
558 
559     /* RAM: Node 1 and beyond */
560     mem_start = node0_size;
561     for (i = 1; i < nb_numa_nodes; i++) {
562         mem_reg_property[0] = cpu_to_be64(mem_start);
563         mem_reg_property[1] = cpu_to_be64(node_mem[i]);
564         associativity[3] = associativity[4] = cpu_to_be32(i);
565         sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
566         off = fdt_add_subnode(fdt, 0, mem_name);
567         _FDT(off);
568         _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
569         _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
570                           sizeof(mem_reg_property))));
571         _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
572                           sizeof(associativity))));
573         mem_start += node_mem[i];
574     }
575 
576     return 0;
577 }
578 
579 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
580                                hwaddr fdt_addr,
581                                hwaddr rtas_addr,
582                                hwaddr rtas_size)
583 {
584     int ret;
585     void *fdt;
586     sPAPRPHBState *phb;
587 
588     fdt = g_malloc(FDT_MAX_SIZE);
589 
590     /* open out the base tree into a temp buffer for the final tweaks */
591     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
592 
593     ret = spapr_populate_memory(spapr, fdt);
594     if (ret < 0) {
595         fprintf(stderr, "couldn't setup memory nodes in fdt\n");
596         exit(1);
597     }
598 
599     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
600     if (ret < 0) {
601         fprintf(stderr, "couldn't setup vio devices in fdt\n");
602         exit(1);
603     }
604 
605     QLIST_FOREACH(phb, &spapr->phbs, list) {
606         ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
607     }
608 
609     if (ret < 0) {
610         fprintf(stderr, "couldn't setup PCI devices in fdt\n");
611         exit(1);
612     }
613 
614     /* RTAS */
615     ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
616     if (ret < 0) {
617         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
618     }
619 
620     /* Advertise NUMA via ibm,associativity */
621     ret = spapr_fixup_cpu_dt(fdt, spapr);
622     if (ret < 0) {
623         fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
624     }
625 
626     if (!spapr->has_graphics) {
627         spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
628     }
629 
630     _FDT((fdt_pack(fdt)));
631 
632     if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
633         hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
634                  fdt_totalsize(fdt), FDT_MAX_SIZE);
635         exit(1);
636     }
637 
638     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
639 
640     g_free(fdt);
641 }
642 
643 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
644 {
645     return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
646 }
647 
648 static void emulate_spapr_hypercall(PowerPCCPU *cpu)
649 {
650     CPUPPCState *env = &cpu->env;
651 
652     if (msr_pr) {
653         hcall_dprintf("Hypercall made with MSR[PR]=1\n");
654         env->gpr[3] = H_PRIVILEGE;
655     } else {
656         env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
657     }
658 }
659 
660 static void spapr_reset_htab(sPAPREnvironment *spapr)
661 {
662     long shift;
663 
664     /* allocate hash page table.  For now we always make this 16mb,
665      * later we should probably make it scale to the size of guest
666      * RAM */
667 
668     shift = kvmppc_reset_htab(spapr->htab_shift);
669 
670     if (shift > 0) {
671         /* Kernel handles htab, we don't need to allocate one */
672         spapr->htab_shift = shift;
673     } else {
674         if (!spapr->htab) {
675             /* Allocate an htab if we don't yet have one */
676             spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
677         }
678 
679         /* And clear it */
680         memset(spapr->htab, 0, HTAB_SIZE(spapr));
681     }
682 
683     /* Update the RMA size if necessary */
684     if (spapr->vrma_adjust) {
685         spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
686     }
687 }
688 
689 static void ppc_spapr_reset(void)
690 {
691     PowerPCCPU *first_ppc_cpu;
692 
693     /* Reset the hash table & recalc the RMA */
694     spapr_reset_htab(spapr);
695 
696     qemu_devices_reset();
697 
698     /* Load the fdt */
699     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
700                        spapr->rtas_size);
701 
702     /* Set up the entry state */
703     first_ppc_cpu = POWERPC_CPU(first_cpu);
704     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
705     first_ppc_cpu->env.gpr[5] = 0;
706     first_cpu->halted = 0;
707     first_ppc_cpu->env.nip = spapr->entry_point;
708 
709 }
710 
711 static void spapr_cpu_reset(void *opaque)
712 {
713     PowerPCCPU *cpu = opaque;
714     CPUState *cs = CPU(cpu);
715     CPUPPCState *env = &cpu->env;
716 
717     cpu_reset(cs);
718 
719     /* All CPUs start halted.  CPU0 is unhalted from the machine level
720      * reset code and the rest are explicitly started up by the guest
721      * using an RTAS call */
722     cs->halted = 1;
723 
724     env->spr[SPR_HIOR] = 0;
725 
726     env->external_htab = (uint8_t *)spapr->htab;
727     env->htab_base = -1;
728     env->htab_mask = HTAB_SIZE(spapr) - 1;
729     env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
730         (spapr->htab_shift - 18);
731 }
732 
733 static void spapr_create_nvram(sPAPREnvironment *spapr)
734 {
735     DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
736     const char *drivename = qemu_opt_get(qemu_get_machine_opts(), "nvram");
737 
738     if (drivename) {
739         BlockDriverState *bs;
740 
741         bs = bdrv_find(drivename);
742         if (!bs) {
743             fprintf(stderr, "No such block device \"%s\" for nvram\n",
744                     drivename);
745             exit(1);
746         }
747         qdev_prop_set_drive_nofail(dev, "drive", bs);
748     }
749 
750     qdev_init_nofail(dev);
751 
752     spapr->nvram = (struct sPAPRNVRAM *)dev;
753 }
754 
755 /* Returns whether we want to use VGA or not */
756 static int spapr_vga_init(PCIBus *pci_bus)
757 {
758     switch (vga_interface_type) {
759     case VGA_NONE:
760     case VGA_STD:
761         return pci_vga_init(pci_bus) != NULL;
762     default:
763         fprintf(stderr, "This vga model is not supported,"
764                 "currently it only supports -vga std\n");
765         exit(0);
766         break;
767     }
768 }
769 
770 static const VMStateDescription vmstate_spapr = {
771     .name = "spapr",
772     .version_id = 1,
773     .minimum_version_id = 1,
774     .minimum_version_id_old = 1,
775     .fields      = (VMStateField []) {
776         VMSTATE_UINT32(next_irq, sPAPREnvironment),
777 
778         /* RTC offset */
779         VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
780 
781         VMSTATE_END_OF_LIST()
782     },
783 };
784 
785 #define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
786 #define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
787 #define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
788 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
789 
790 static int htab_save_setup(QEMUFile *f, void *opaque)
791 {
792     sPAPREnvironment *spapr = opaque;
793 
794     /* "Iteration" header */
795     qemu_put_be32(f, spapr->htab_shift);
796 
797     if (spapr->htab) {
798         spapr->htab_save_index = 0;
799         spapr->htab_first_pass = true;
800     } else {
801         assert(kvm_enabled());
802 
803         spapr->htab_fd = kvmppc_get_htab_fd(false);
804         if (spapr->htab_fd < 0) {
805             fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
806                     strerror(errno));
807             return -1;
808         }
809     }
810 
811 
812     return 0;
813 }
814 
815 static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
816                                  int64_t max_ns)
817 {
818     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
819     int index = spapr->htab_save_index;
820     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
821 
822     assert(spapr->htab_first_pass);
823 
824     do {
825         int chunkstart;
826 
827         /* Consume invalid HPTEs */
828         while ((index < htabslots)
829                && !HPTE_VALID(HPTE(spapr->htab, index))) {
830             index++;
831             CLEAN_HPTE(HPTE(spapr->htab, index));
832         }
833 
834         /* Consume valid HPTEs */
835         chunkstart = index;
836         while ((index < htabslots)
837                && HPTE_VALID(HPTE(spapr->htab, index))) {
838             index++;
839             CLEAN_HPTE(HPTE(spapr->htab, index));
840         }
841 
842         if (index > chunkstart) {
843             int n_valid = index - chunkstart;
844 
845             qemu_put_be32(f, chunkstart);
846             qemu_put_be16(f, n_valid);
847             qemu_put_be16(f, 0);
848             qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
849                             HASH_PTE_SIZE_64 * n_valid);
850 
851             if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
852                 break;
853             }
854         }
855     } while ((index < htabslots) && !qemu_file_rate_limit(f));
856 
857     if (index >= htabslots) {
858         assert(index == htabslots);
859         index = 0;
860         spapr->htab_first_pass = false;
861     }
862     spapr->htab_save_index = index;
863 }
864 
865 static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
866                                 int64_t max_ns)
867 {
868     bool final = max_ns < 0;
869     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
870     int examined = 0, sent = 0;
871     int index = spapr->htab_save_index;
872     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
873 
874     assert(!spapr->htab_first_pass);
875 
876     do {
877         int chunkstart, invalidstart;
878 
879         /* Consume non-dirty HPTEs */
880         while ((index < htabslots)
881                && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
882             index++;
883             examined++;
884         }
885 
886         chunkstart = index;
887         /* Consume valid dirty HPTEs */
888         while ((index < htabslots)
889                && HPTE_DIRTY(HPTE(spapr->htab, index))
890                && HPTE_VALID(HPTE(spapr->htab, index))) {
891             CLEAN_HPTE(HPTE(spapr->htab, index));
892             index++;
893             examined++;
894         }
895 
896         invalidstart = index;
897         /* Consume invalid dirty HPTEs */
898         while ((index < htabslots)
899                && HPTE_DIRTY(HPTE(spapr->htab, index))
900                && !HPTE_VALID(HPTE(spapr->htab, index))) {
901             CLEAN_HPTE(HPTE(spapr->htab, index));
902             index++;
903             examined++;
904         }
905 
906         if (index > chunkstart) {
907             int n_valid = invalidstart - chunkstart;
908             int n_invalid = index - invalidstart;
909 
910             qemu_put_be32(f, chunkstart);
911             qemu_put_be16(f, n_valid);
912             qemu_put_be16(f, n_invalid);
913             qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
914                             HASH_PTE_SIZE_64 * n_valid);
915             sent += index - chunkstart;
916 
917             if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
918                 break;
919             }
920         }
921 
922         if (examined >= htabslots) {
923             break;
924         }
925 
926         if (index >= htabslots) {
927             assert(index == htabslots);
928             index = 0;
929         }
930     } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
931 
932     if (index >= htabslots) {
933         assert(index == htabslots);
934         index = 0;
935     }
936 
937     spapr->htab_save_index = index;
938 
939     return (examined >= htabslots) && (sent == 0) ? 1 : 0;
940 }
941 
942 #define MAX_ITERATION_NS    5000000 /* 5 ms */
943 #define MAX_KVM_BUF_SIZE    2048
944 
945 static int htab_save_iterate(QEMUFile *f, void *opaque)
946 {
947     sPAPREnvironment *spapr = opaque;
948     int rc = 0;
949 
950     /* Iteration header */
951     qemu_put_be32(f, 0);
952 
953     if (!spapr->htab) {
954         assert(kvm_enabled());
955 
956         rc = kvmppc_save_htab(f, spapr->htab_fd,
957                               MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
958         if (rc < 0) {
959             return rc;
960         }
961     } else  if (spapr->htab_first_pass) {
962         htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
963     } else {
964         rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
965     }
966 
967     /* End marker */
968     qemu_put_be32(f, 0);
969     qemu_put_be16(f, 0);
970     qemu_put_be16(f, 0);
971 
972     return rc;
973 }
974 
975 static int htab_save_complete(QEMUFile *f, void *opaque)
976 {
977     sPAPREnvironment *spapr = opaque;
978 
979     /* Iteration header */
980     qemu_put_be32(f, 0);
981 
982     if (!spapr->htab) {
983         int rc;
984 
985         assert(kvm_enabled());
986 
987         rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
988         if (rc < 0) {
989             return rc;
990         }
991         close(spapr->htab_fd);
992         spapr->htab_fd = -1;
993     } else {
994         htab_save_later_pass(f, spapr, -1);
995     }
996 
997     /* End marker */
998     qemu_put_be32(f, 0);
999     qemu_put_be16(f, 0);
1000     qemu_put_be16(f, 0);
1001 
1002     return 0;
1003 }
1004 
1005 static int htab_load(QEMUFile *f, void *opaque, int version_id)
1006 {
1007     sPAPREnvironment *spapr = opaque;
1008     uint32_t section_hdr;
1009     int fd = -1;
1010 
1011     if (version_id < 1 || version_id > 1) {
1012         fprintf(stderr, "htab_load() bad version\n");
1013         return -EINVAL;
1014     }
1015 
1016     section_hdr = qemu_get_be32(f);
1017 
1018     if (section_hdr) {
1019         /* First section, just the hash shift */
1020         if (spapr->htab_shift != section_hdr) {
1021             return -EINVAL;
1022         }
1023         return 0;
1024     }
1025 
1026     if (!spapr->htab) {
1027         assert(kvm_enabled());
1028 
1029         fd = kvmppc_get_htab_fd(true);
1030         if (fd < 0) {
1031             fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
1032                     strerror(errno));
1033         }
1034     }
1035 
1036     while (true) {
1037         uint32_t index;
1038         uint16_t n_valid, n_invalid;
1039 
1040         index = qemu_get_be32(f);
1041         n_valid = qemu_get_be16(f);
1042         n_invalid = qemu_get_be16(f);
1043 
1044         if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
1045             /* End of Stream */
1046             break;
1047         }
1048 
1049         if ((index + n_valid + n_invalid) >
1050             (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
1051             /* Bad index in stream */
1052             fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
1053                     "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
1054                     spapr->htab_shift);
1055             return -EINVAL;
1056         }
1057 
1058         if (spapr->htab) {
1059             if (n_valid) {
1060                 qemu_get_buffer(f, HPTE(spapr->htab, index),
1061                                 HASH_PTE_SIZE_64 * n_valid);
1062             }
1063             if (n_invalid) {
1064                 memset(HPTE(spapr->htab, index + n_valid), 0,
1065                        HASH_PTE_SIZE_64 * n_invalid);
1066             }
1067         } else {
1068             int rc;
1069 
1070             assert(fd >= 0);
1071 
1072             rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
1073             if (rc < 0) {
1074                 return rc;
1075             }
1076         }
1077     }
1078 
1079     if (!spapr->htab) {
1080         assert(fd >= 0);
1081         close(fd);
1082     }
1083 
1084     return 0;
1085 }
1086 
1087 static SaveVMHandlers savevm_htab_handlers = {
1088     .save_live_setup = htab_save_setup,
1089     .save_live_iterate = htab_save_iterate,
1090     .save_live_complete = htab_save_complete,
1091     .load_state = htab_load,
1092 };
1093 
1094 /* pSeries LPAR / sPAPR hardware init */
1095 static void ppc_spapr_init(QEMUMachineInitArgs *args)
1096 {
1097     ram_addr_t ram_size = args->ram_size;
1098     const char *cpu_model = args->cpu_model;
1099     const char *kernel_filename = args->kernel_filename;
1100     const char *kernel_cmdline = args->kernel_cmdline;
1101     const char *initrd_filename = args->initrd_filename;
1102     const char *boot_device = args->boot_order;
1103     PowerPCCPU *cpu;
1104     CPUPPCState *env;
1105     PCIHostState *phb;
1106     int i;
1107     MemoryRegion *sysmem = get_system_memory();
1108     MemoryRegion *ram = g_new(MemoryRegion, 1);
1109     hwaddr rma_alloc_size;
1110     uint32_t initrd_base = 0;
1111     long kernel_size = 0, initrd_size = 0;
1112     long load_limit, rtas_limit, fw_size;
1113     bool kernel_le = false;
1114     char *filename;
1115 
1116     msi_supported = true;
1117 
1118     spapr = g_malloc0(sizeof(*spapr));
1119     QLIST_INIT(&spapr->phbs);
1120 
1121     cpu_ppc_hypercall = emulate_spapr_hypercall;
1122 
1123     /* Allocate RMA if necessary */
1124     rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
1125 
1126     if (rma_alloc_size == -1) {
1127         hw_error("qemu: Unable to create RMA\n");
1128         exit(1);
1129     }
1130 
1131     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
1132         spapr->rma_size = rma_alloc_size;
1133     } else {
1134         spapr->rma_size = ram_size;
1135 
1136         /* With KVM, we don't actually know whether KVM supports an
1137          * unbounded RMA (PR KVM) or is limited by the hash table size
1138          * (HV KVM using VRMA), so we always assume the latter
1139          *
1140          * In that case, we also limit the initial allocations for RTAS
1141          * etc... to 256M since we have no way to know what the VRMA size
1142          * is going to be as it depends on the size of the hash table
1143          * isn't determined yet.
1144          */
1145         if (kvm_enabled()) {
1146             spapr->vrma_adjust = 1;
1147             spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
1148         }
1149     }
1150 
1151     /* We place the device tree and RTAS just below either the top of the RMA,
1152      * or just below 2GB, whichever is lowere, so that it can be
1153      * processed with 32-bit real mode code if necessary */
1154     rtas_limit = MIN(spapr->rma_size, 0x80000000);
1155     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
1156     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
1157     load_limit = spapr->fdt_addr - FW_OVERHEAD;
1158 
1159     /* We aim for a hash table of size 1/128 the size of RAM.  The
1160      * normal rule of thumb is 1/64 the size of RAM, but that's much
1161      * more than needed for the Linux guests we support. */
1162     spapr->htab_shift = 18; /* Minimum architected size */
1163     while (spapr->htab_shift <= 46) {
1164         if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
1165             break;
1166         }
1167         spapr->htab_shift++;
1168     }
1169 
1170     /* Set up Interrupt Controller before we create the VCPUs */
1171     spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
1172                                   XICS_IRQS);
1173     spapr->next_irq = XICS_IRQ_BASE;
1174 
1175     /* init CPUs */
1176     if (cpu_model == NULL) {
1177         cpu_model = kvm_enabled() ? "host" : "POWER7";
1178     }
1179     for (i = 0; i < smp_cpus; i++) {
1180         cpu = cpu_ppc_init(cpu_model);
1181         if (cpu == NULL) {
1182             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
1183             exit(1);
1184         }
1185         env = &cpu->env;
1186 
1187         xics_cpu_setup(spapr->icp, cpu);
1188 
1189         /* Set time-base frequency to 512 MHz */
1190         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
1191 
1192         /* PAPR always has exception vectors in RAM not ROM. To ensure this,
1193          * MSR[IP] should never be set.
1194          */
1195         env->msr_mask &= ~(1 << 6);
1196 
1197         /* Tell KVM that we're in PAPR mode */
1198         if (kvm_enabled()) {
1199             kvmppc_set_papr(cpu);
1200         }
1201 
1202         qemu_register_reset(spapr_cpu_reset, cpu);
1203     }
1204 
1205     /* allocate RAM */
1206     spapr->ram_limit = ram_size;
1207     if (spapr->ram_limit > rma_alloc_size) {
1208         ram_addr_t nonrma_base = rma_alloc_size;
1209         ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
1210 
1211         memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size);
1212         vmstate_register_ram_global(ram);
1213         memory_region_add_subregion(sysmem, nonrma_base, ram);
1214     }
1215 
1216     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1217     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
1218                                            rtas_limit - spapr->rtas_addr);
1219     if (spapr->rtas_size < 0) {
1220         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
1221         exit(1);
1222     }
1223     if (spapr->rtas_size > RTAS_MAX_SIZE) {
1224         hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
1225                  spapr->rtas_size, RTAS_MAX_SIZE);
1226         exit(1);
1227     }
1228     g_free(filename);
1229 
1230     /* Set up EPOW events infrastructure */
1231     spapr_events_init(spapr);
1232 
1233     /* Set up VIO bus */
1234     spapr->vio_bus = spapr_vio_bus_init();
1235 
1236     for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1237         if (serial_hds[i]) {
1238             spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1239         }
1240     }
1241 
1242     /* We always have at least the nvram device on VIO */
1243     spapr_create_nvram(spapr);
1244 
1245     /* Set up PCI */
1246     spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
1247     spapr_pci_rtas_init();
1248 
1249     phb = spapr_create_phb(spapr, 0);
1250 
1251     for (i = 0; i < nb_nics; i++) {
1252         NICInfo *nd = &nd_table[i];
1253 
1254         if (!nd->model) {
1255             nd->model = g_strdup("ibmveth");
1256         }
1257 
1258         if (strcmp(nd->model, "ibmveth") == 0) {
1259             spapr_vlan_create(spapr->vio_bus, nd);
1260         } else {
1261             pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1262         }
1263     }
1264 
1265     for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1266         spapr_vscsi_create(spapr->vio_bus);
1267     }
1268 
1269     /* Graphics */
1270     if (spapr_vga_init(phb->bus)) {
1271         spapr->has_graphics = true;
1272     }
1273 
1274     if (usb_enabled(spapr->has_graphics)) {
1275         pci_create_simple(phb->bus, -1, "pci-ohci");
1276         if (spapr->has_graphics) {
1277             usbdevice_create("keyboard");
1278             usbdevice_create("mouse");
1279         }
1280     }
1281 
1282     if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1283         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
1284                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
1285         exit(1);
1286     }
1287 
1288     if (kernel_filename) {
1289         uint64_t lowaddr = 0;
1290 
1291         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
1292                                NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
1293         if (kernel_size < 0) {
1294             kernel_size = load_elf(kernel_filename,
1295                                    translate_kernel_address, NULL,
1296                                    NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0);
1297             kernel_le = kernel_size > 0;
1298         }
1299         if (kernel_size < 0) {
1300             kernel_size = load_image_targphys(kernel_filename,
1301                                               KERNEL_LOAD_ADDR,
1302                                               load_limit - KERNEL_LOAD_ADDR);
1303         }
1304         if (kernel_size < 0) {
1305             fprintf(stderr, "qemu: could not load kernel '%s'\n",
1306                     kernel_filename);
1307             exit(1);
1308         }
1309 
1310         /* load initrd */
1311         if (initrd_filename) {
1312             /* Try to locate the initrd in the gap between the kernel
1313              * and the firmware. Add a bit of space just in case
1314              */
1315             initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1316             initrd_size = load_image_targphys(initrd_filename, initrd_base,
1317                                               load_limit - initrd_base);
1318             if (initrd_size < 0) {
1319                 fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
1320                         initrd_filename);
1321                 exit(1);
1322             }
1323         } else {
1324             initrd_base = 0;
1325             initrd_size = 0;
1326         }
1327     }
1328 
1329     if (bios_name == NULL) {
1330         bios_name = FW_FILE_NAME;
1331     }
1332     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
1333     fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
1334     if (fw_size < 0) {
1335         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
1336         exit(1);
1337     }
1338     g_free(filename);
1339 
1340     spapr->entry_point = 0x100;
1341 
1342     vmstate_register(NULL, 0, &vmstate_spapr, spapr);
1343     register_savevm_live(NULL, "spapr/htab", -1, 1,
1344                          &savevm_htab_handlers, spapr);
1345 
1346     /* Prepare the device tree */
1347     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
1348                                             initrd_base, initrd_size,
1349                                             kernel_size, kernel_le,
1350                                             boot_device, kernel_cmdline,
1351                                             spapr->epow_irq);
1352     assert(spapr->fdt_skel != NULL);
1353 }
1354 
1355 static QEMUMachine spapr_machine = {
1356     .name = "pseries",
1357     .desc = "pSeries Logical Partition (PAPR compliant)",
1358     .is_default = 1,
1359     .init = ppc_spapr_init,
1360     .reset = ppc_spapr_reset,
1361     .block_default_type = IF_SCSI,
1362     .max_cpus = MAX_CPUS,
1363     .no_parallel = 1,
1364     .default_boot_order = NULL,
1365 };
1366 
1367 static void spapr_machine_init(void)
1368 {
1369     qemu_register_machine(&spapr_machine);
1370 }
1371 
1372 machine_init(spapr_machine_init);
1373